From a03157850b787edb5c47f6676aaa678b3e72fe4e Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 3 Apr 2026 15:43:32 +0800 Subject: [PATCH 01/46] global_lru_cache --- src/ailego/buffer/buffer_pool.cc | 71 ++------------- src/ailego/buffer/lru_cache.cc | 65 ++++++++++++++ src/include/zvec/ailego/buffer/buffer_pool.h | 29 +----- src/include/zvec/ailego/buffer/lru_cache.h | 92 ++++++++++++++++++++ 4 files changed, 166 insertions(+), 91 deletions(-) create mode 100644 src/ailego/buffer/lru_cache.cc create mode 100644 src/include/zvec/ailego/buffer/lru_cache.h diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc index 38f73f628..b35f51ff9 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/buffer_pool.cc @@ -23,65 +23,6 @@ static ssize_t zvec_pread(int fd, void *buf, size_t count, size_t offset) { namespace zvec { namespace ailego { -int LRUCache::init(size_t block_size) { - block_size_ = block_size; - for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - queues_.push_back(ConcurrentQueue(block_size)); - } - return 0; -} - -bool LRUCache::evict_single_block(BlockType &item) { - bool found = false; - for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - found = queues_[i].try_dequeue(item); - if (found) { - break; - } - } - return found; -} - -bool LRUCache::add_single_block(const LPMap *lp_map, const BlockType &block, - int block_type) { - bool ok = queues_[block_type].enqueue(block); - if (!ok) { - LOG_ERROR("enqueue failed."); - return false; - } - evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed); - if (evict_queue_insertions_ % block_size_ == 0) { - this->clear_dead_node(lp_map); - } - return true; -} - -void LRUCache::clear_dead_node(const LPMap *lp_map) { - for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - size_t clear_size = block_size_ * 2; - if (queues_[i].size_approx() < clear_size * 4) { - continue; - } - size_t clear_count = 0; - ConcurrentQueue tmp(block_size_); - BlockType item; - while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { - if (!lp_map->isDeadBlock(item)) { - if (!tmp.enqueue(item)) { - LOG_ERROR("enqueue failed."); - } - } - } - while (tmp.try_dequeue(item)) { - if (!lp_map->isDeadBlock(item)) { - if (!queues_[i].enqueue(item)) { - LOG_ERROR("enqueue failed."); - } - } - } - } -} - void LPMap::init(size_t entry_num) { if (entries_) { delete[] entries_; @@ -93,7 +34,6 @@ void LPMap::init(size_t entry_num) { entries_[i].load_count.store(0); entries_[i].buffer = nullptr; } - cache_.init(entry_num * 4); } char *LPMap::acquire_block(block_id_t block_id, bool lru_mode) { @@ -125,9 +65,10 @@ void LPMap::release_block(block_id_t block_id) { if (entry.ref_count.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); LRUCache::BlockType block; - block.first = block_id; - block.second = entry.load_count.load(); - cache_.add_single_block(this, block, 0); + block.lp_map = this; + block.block.first = block_id; + block.block.second = entry.load_count.load(); + LRUCache::get_instance().add_single_block(block, 0); } } @@ -171,12 +112,12 @@ char *LPMap::set_block_acquired(block_id_t block_id, char *buffer) { void LPMap::recycle(moodycamel::ConcurrentQueue &free_buffers) { LRUCache::BlockType block; do { - bool ok = cache_.evict_single_block(block); + bool ok = LRUCache::get_instance().evict_single_block(block); if (!ok) { return; } } while (isDeadBlock(block)); - char *buffer = evict_block(block.first); + char *buffer = evict_block(block.block.first); if (buffer) { if (!free_buffers.enqueue(buffer)) { LOG_ERROR("recycle buffer enqueue failed."); diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc new file mode 100644 index 000000000..0ae257a2a --- /dev/null +++ b/src/ailego/buffer/lru_cache.cc @@ -0,0 +1,65 @@ +#include +#include + +namespace zvec { +namespace ailego { + +int LRUCache::init() { + block_size_ = 512; + for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { + queues_.push_back(ConcurrentQueue()); + } + return 0; +} + +bool LRUCache::evict_single_block(BlockType &item) { + bool found = false; + for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { + found = queues_[i].try_dequeue(item); + if (found) { + break; + } + } + return found; +} + +bool LRUCache::add_single_block(const BlockType &block, int block_type) { + bool ok = queues_[block_type].enqueue(block); + if (!ok) { + LOG_ERROR("enqueue failed."); + return false; + } + evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed); + if (evict_queue_insertions_ % block_size_ == 0) { + this->clear_dead_node(block.lp_map); + } + return true; +} + +void LRUCache::clear_dead_node(const LPMap *lp_map) { + for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { + size_t clear_size = block_size_ * 2; + if (queues_[i].size_approx() < clear_size * 4) { + continue; + } + size_t clear_count = 0; + ConcurrentQueue tmp(block_size_); + BlockType item; + while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { + if (!lp_map->isDeadBlock(item)) { + if (!tmp.enqueue(item)) { + LOG_ERROR("enqueue failed."); + } + } + } + while (tmp.try_dequeue(item)) { + if (!lp_map->isDeadBlock(item)) { + if (!queues_[i].enqueue(item)) { + LOG_ERROR("enqueue failed."); + } + } + } + } +} +} // namespace ailego +} // namespace zvec \ No newline at end of file diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index 69a01b2fc..4167b4644 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -18,6 +18,7 @@ #include #include #include "concurrentqueue.h" +#include "lru_cache.h" #if defined(_MSC_VER) #include @@ -29,29 +30,6 @@ namespace ailego { using block_id_t = size_t; using version_t = size_t; -class LPMap; - -class LRUCache { - public: - typedef std::pair BlockType; - typedef moodycamel::ConcurrentQueue ConcurrentQueue; - - int init(size_t block_size); - - bool evict_single_block(BlockType &item); - - bool add_single_block(const LPMap *lp_map, const BlockType &block, - int block_type); - - void clear_dead_node(const LPMap *lp_map); - - private: - constexpr static size_t CATCH_QUEUE_NUM = 3; - size_t block_size_{0}; - std::vector queues_; - alignas(64) std::atomic evict_queue_insertions_{0}; -}; - class LPMap { struct Entry { alignas(64) std::atomic ref_count; @@ -82,14 +60,13 @@ class LPMap { } inline bool isDeadBlock(LRUCache::BlockType block) const { - Entry &entry = entries_[block.first]; - return block.second != entry.load_count.load(); + Entry &entry = entries_[block.block.first]; + return block.block.second != entry.load_count.load(); } private: size_t entry_num_{0}; Entry *entries_{nullptr}; - LRUCache cache_; }; class VecBufferPoolHandle; diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h new file mode 100644 index 000000000..af403fb60 --- /dev/null +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -0,0 +1,92 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "buffer_pool.h" +#include "concurrentqueue.h" + +#if defined(_MSC_VER) +#include +#endif + +namespace zvec { +namespace ailego { + +class LPMap; + +using block_id_t = size_t; +using version_t = size_t; + +class LRUCache { + public: + struct BlockType { + std::pair block; + LPMap *lp_map; + }; + typedef moodycamel::ConcurrentQueue ConcurrentQueue; + + static LRUCache &get_instance() { + static LRUCache instance; + return instance; + } + LRUCache(const LRUCache &) = delete; + LRUCache &operator=(const LRUCache &) = delete; + LRUCache(LRUCache &&) = delete; + LRUCache &operator=(LRUCache &&) = delete; + + int init(); + + bool evict_single_block(BlockType &item); + + bool add_single_block(const BlockType &block, int block_type); + + void clear_dead_node(const LPMap *lp_map); + + private: + LRUCache() { + init(); + } + + private: + constexpr static size_t CATCH_QUEUE_NUM = 3; + size_t block_size_{0}; + std::vector queues_; + alignas(64) std::atomic evict_queue_insertions_{0}; +}; + +// class MemoryPool { +// public: +// int init(size_t pool_size) { +// return 0; +// } + +// char *acquire_buffer(size_t size) { +// return nullptr; +// } + +// void release_buffer(char *buffer, size_t buffer_size) { +// delete[] buffer; +// } + + +// private: +// std::atomic pool_size_{0}, used_size_{0}; +// }; + +} // namespace ailego +} // namespace zvec \ No newline at end of file From ef8194bfd930487fb4ed15ef1eb340944b1c1613 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 3 Apr 2026 17:06:07 +0800 Subject: [PATCH 02/46] add MemoryLimitPool --- src/ailego/buffer/buffer_pool.cc | 77 ++++++-------------- src/ailego/buffer/lru_cache.cc | 11 +++ src/include/zvec/ailego/buffer/buffer_pool.h | 18 +---- src/include/zvec/ailego/buffer/lru_cache.h | 57 +++++++++++---- 4 files changed, 82 insertions(+), 81 deletions(-) diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc index b35f51ff9..782ef2336 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/buffer_pool.cc @@ -36,12 +36,9 @@ void LPMap::init(size_t entry_num) { } } -char *LPMap::acquire_block(block_id_t block_id, bool lru_mode) { +char *LPMap::acquire_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; - if (!lru_mode) { - return entry.buffer; - } while (true) { int current_count = entry.ref_count.load(std::memory_order_acquire); if (current_count < 0) { @@ -68,7 +65,7 @@ void LPMap::release_block(block_id_t block_id) { block.lp_map = this; block.block.first = block_id; block.block.second = entry.load_count.load(); - LRUCache::get_instance().add_single_block(block, 0); + LRUCache::get_instance().add_single_block(block, entry.size); } } @@ -86,9 +83,11 @@ char *LPMap::evict_block(block_id_t block_id) { } } -char *LPMap::set_block_acquired(block_id_t block_id, char *buffer) { +char *LPMap::set_block_acquired(block_id_t block_id, char *buffer, + size_t size) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; + entry.size = size; while (true) { int current_count = entry.ref_count.load(std::memory_order_relaxed); if (current_count >= 0) { @@ -109,20 +108,14 @@ char *LPMap::set_block_acquired(block_id_t block_id, char *buffer) { } } -void LPMap::recycle(moodycamel::ConcurrentQueue &free_buffers) { +void LPMap::recycle() { LRUCache::BlockType block; - do { - bool ok = LRUCache::get_instance().evict_single_block(block); - if (!ok) { - return; - } - } while (isDeadBlock(block)); + if (!LRUCache::get_instance().evict_block(block)) { + return; + } char *buffer = evict_block(block.block.first); if (buffer) { - if (!free_buffers.enqueue(buffer)) { - LOG_ERROR("recycle buffer enqueue failed."); - ailego_free(buffer); - } + MemoryLimitPool::get_instance().release_buffer(buffer, 0); } } @@ -149,39 +142,19 @@ VecBufferPool::VecBufferPool(const std::string &filename) { file_size_ = st.st_size; } -int VecBufferPool::init(size_t pool_capacity, size_t block_size, +int VecBufferPool::init(size_t /*pool_capacity*/, size_t block_size, size_t segment_count) { if (block_size == 0) { LOG_ERROR("block_size must not be 0"); return -1; } - pool_capacity_ = pool_capacity; - size_t buffer_num = pool_capacity_ / block_size + 10; size_t block_num = segment_count + 10; lp_map_.init(block_num); mutex_vec_.reserve(block_num); for (int i = 0; i < block_num; i++) { mutex_vec_.emplace_back(std::make_unique()); } - for (size_t i = 0; i < buffer_num; i++) { - char *buffer = (char *)ailego_malloc(block_size); - if (buffer != nullptr) { - if (!free_buffers_.enqueue(buffer)) { - LOG_ERROR("recycle buffer enqueue failed."); - ailego_free(buffer); - return -1; - } - } else { - LOG_ERROR("aligned_alloc %zu(size: %zu) failed", i, block_size); - return -1; - } - } - LOG_DEBUG("Buffer pool num: %zu, entry num: %zu", buffer_num, - lp_map_.entry_num()); - no_lru_mode_ = false; - if (lp_map_.entry_num() <= buffer_num) { - no_lru_mode_ = true; - } + LOG_DEBUG("entry num: %zu", lp_map_.entry_num()); return 0; } @@ -191,21 +164,23 @@ VecBufferPoolHandle VecBufferPool::get_handle() { char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, size_t size, int retry) { - char *buffer = lp_map_.acquire_block(block_id, !no_lru_mode()); + char *buffer = lp_map_.acquire_block(block_id); if (buffer) { return buffer; } std::lock_guard lock(*mutex_vec_[block_id]); - buffer = lp_map_.acquire_block(block_id, !no_lru_mode()); + buffer = lp_map_.acquire_block(block_id); if (buffer) { return buffer; } { - bool found = free_buffers_.try_dequeue(buffer); - if (!found && !no_lru_mode_) { + bool found = + MemoryLimitPool::get_instance().try_acquire_buffer(size, buffer); + if (!found) { for (int i = 0; i < retry; i++) { - lp_map_.recycle(free_buffers_); - found = free_buffers_.try_dequeue(buffer); + lp_map_.recycle(); + found = + MemoryLimitPool::get_instance().try_acquire_buffer(size, buffer); if (found) { break; } @@ -224,10 +199,10 @@ char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, #endif if (read_bytes != static_cast(size)) { LOG_ERROR("Buffer pool failed to read file at offset: %zu", offset); - free_buffers_.enqueue(buffer); + MemoryLimitPool::get_instance().release_buffer(buffer, size); return nullptr; } - return lp_map_.set_block_acquired(block_id, buffer); + return lp_map_.set_block_acquired(block_id, buffer, size); } int VecBufferPool::get_meta(size_t offset, size_t length, char *buffer) { @@ -254,15 +229,11 @@ int VecBufferPoolHandle::get_meta(size_t offset, size_t length, char *buffer) { } void VecBufferPoolHandle::release_one(block_id_t block_id) { - if (!pool_.no_lru_mode()) { - pool_.lp_map_.release_block(block_id); - } + pool_.lp_map_.release_block(block_id); } void VecBufferPoolHandle::acquire_one(block_id_t block_id) { - if (!pool_.no_lru_mode()) { - pool_.lp_map_.acquire_block(block_id, true); - } + pool_.lp_map_.acquire_block(block_id); } } // namespace ailego diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 0ae257a2a..cece0dd5a 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -23,6 +23,17 @@ bool LRUCache::evict_single_block(BlockType &item) { return found; } +bool LRUCache::evict_block(BlockType &item) { + bool ok = false; + do { + ok = LRUCache::get_instance().evict_single_block(item); + if (!ok) { + return false; + } + } while (item.lp_map->isDeadBlock(item)); + return ok; +} + bool LRUCache::add_single_block(const BlockType &block, int block_type) { bool ok = queues_[block_type].enqueue(block); if (!ok) { diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index 4167b4644..0920363a5 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -35,6 +35,7 @@ class LPMap { alignas(64) std::atomic ref_count; alignas(64) std::atomic load_count; char *buffer; + size_t size; }; public: @@ -45,15 +46,15 @@ class LPMap { void init(size_t entry_num); - char *acquire_block(block_id_t block_id, bool lru_mode); + char *acquire_block(block_id_t block_id); void release_block(block_id_t block_id); char *evict_block(block_id_t block_id); - char *set_block_acquired(block_id_t block_id, char *buffer); + char *set_block_acquired(block_id_t block_id, char *buffer, size_t size); - void recycle(moodycamel::ConcurrentQueue &free_buffers); + void recycle(); size_t entry_num() const { return entry_num_; @@ -77,11 +78,6 @@ class VecBufferPool { VecBufferPool(const std::string &filename); ~VecBufferPool() { - // Free all buffers in the free list - char *buf = nullptr; - while (free_buffers_.try_dequeue(buf)) { - ailego_free(buf); - } // Free any buffers still pinned in the map for (size_t i = 0; i < lp_map_.entry_num(); ++i) { char *b = lp_map_.evict_block(i); @@ -107,22 +103,16 @@ class VecBufferPool { return file_size_; } - bool no_lru_mode() { - return no_lru_mode_; - } - private: int fd_; size_t file_size_; size_t pool_capacity_; - bool no_lru_mode_; public: LPMap lp_map_; private: std::vector> mutex_vec_; - moodycamel::ConcurrentQueue free_buffers_; }; class VecBufferPoolHandle { diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index af403fb60..5df9938a6 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -53,6 +53,8 @@ class LRUCache { bool evict_single_block(BlockType &item); + bool evict_block(BlockType &item); + bool add_single_block(const BlockType &block, int block_type); void clear_dead_node(const LPMap *lp_map); @@ -69,24 +71,51 @@ class LRUCache { alignas(64) std::atomic evict_queue_insertions_{0}; }; -// class MemoryPool { -// public: -// int init(size_t pool_size) { -// return 0; -// } +class MemoryLimitPool { + public: + static MemoryLimitPool &get_instance() { + static MemoryLimitPool instance; + return instance; + } + MemoryLimitPool(const MemoryLimitPool &) = delete; + MemoryLimitPool &operator=(const MemoryLimitPool &) = delete; + MemoryLimitPool(MemoryLimitPool &&) = delete; + MemoryLimitPool &operator=(MemoryLimitPool &&) = delete; + + int init(size_t pool_size) { + pool_size_ = pool_size; + return 0; + } -// char *acquire_buffer(size_t size) { -// return nullptr; -// } + bool try_acquire_buffer(const size_t buffer_size, char *&buffer) { + size_t expected, desired; + do { + expected = used_size_.load(); + if (expected >= pool_size_) { + return false; + } + desired = expected + buffer_size; + } while (!used_size_.compare_exchange_weak(expected, desired)); + buffer = (char *)ailego_malloc(buffer_size); + return true; + } -// void release_buffer(char *buffer, size_t buffer_size) { -// delete[] buffer; -// } + void release_buffer(const char *buffer, const size_t buffer_size) { + size_t expected, desired; + do { + expected = used_size_.load(); + desired = expected - buffer_size; + } while (!used_size_.compare_exchange_weak(expected, desired)); + delete[] buffer; + } + private: + MemoryLimitPool() = default; -// private: -// std::atomic pool_size_{0}, used_size_{0}; -// }; + private: + size_t pool_size_{0}; + std::atomic used_size_{0}; +}; } // namespace ailego } // namespace zvec \ No newline at end of file From e2d5a0b89d9e93957a65f356bc096e5a9198a856 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 3 Apr 2026 17:35:55 +0800 Subject: [PATCH 03/46] upd --- src/ailego/buffer/buffer_pool.cc | 16 +++------------- src/ailego/buffer/lru_cache.cc | 8 ++++++++ src/include/zvec/ailego/buffer/buffer_pool.h | 2 -- src/include/zvec/ailego/buffer/lru_cache.h | 12 ++++++++++-- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc index 782ef2336..6a706bb9d 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/buffer_pool.cc @@ -65,7 +65,7 @@ void LPMap::release_block(block_id_t block_id) { block.lp_map = this; block.block.first = block_id; block.block.second = entry.load_count.load(); - LRUCache::get_instance().add_single_block(block, entry.size); + LRUCache::get_instance().add_single_block(block, 0); } } @@ -76,6 +76,7 @@ char *LPMap::evict_block(block_id_t block_id) { if (entry.ref_count.compare_exchange_strong( expected, std::numeric_limits::min())) { char *buffer = entry.buffer; + MemoryLimitPool::get_instance().release_buffer(buffer, entry.size); entry.buffer = nullptr; return buffer; } else { @@ -108,17 +109,6 @@ char *LPMap::set_block_acquired(block_id_t block_id, char *buffer, } } -void LPMap::recycle() { - LRUCache::BlockType block; - if (!LRUCache::get_instance().evict_block(block)) { - return; - } - char *buffer = evict_block(block.block.first); - if (buffer) { - MemoryLimitPool::get_instance().release_buffer(buffer, 0); - } -} - VecBufferPool::VecBufferPool(const std::string &filename) { #if defined(_MSC_VER) fd_ = _open(filename.c_str(), O_RDONLY | _O_BINARY); @@ -178,7 +168,7 @@ char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, MemoryLimitPool::get_instance().try_acquire_buffer(size, buffer); if (!found) { for (int i = 0; i < retry; i++) { - lp_map_.recycle(); + LRUCache::get_instance().recycle(); found = MemoryLimitPool::get_instance().try_acquire_buffer(size, buffer); if (found) { diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index cece0dd5a..06093c6fb 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -34,6 +34,14 @@ bool LRUCache::evict_block(BlockType &item) { return ok; } +bool LRUCache::recycle() { + BlockType item; + while (MemoryLimitPool::get_instance().is_full() && evict_block(item)) { + item.lp_map->evict_block(item.block.first); + } + return MemoryLimitPool::get_instance().is_full(); +} + bool LRUCache::add_single_block(const BlockType &block, int block_type) { bool ok = queues_[block_type].enqueue(block); if (!ok) { diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index 0920363a5..c6b2f12a1 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -54,8 +54,6 @@ class LPMap { char *set_block_acquired(block_id_t block_id, char *buffer, size_t size); - void recycle(); - size_t entry_num() const { return entry_num_; } diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 5df9938a6..3806f79a7 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -19,6 +19,7 @@ #include #include "buffer_pool.h" #include "concurrentqueue.h" +#include #if defined(_MSC_VER) #include @@ -59,6 +60,8 @@ class LRUCache { void clear_dead_node(const LPMap *lp_map); + bool recycle(); + private: LRUCache() { init(); @@ -92,6 +95,7 @@ class MemoryLimitPool { do { expected = used_size_.load(); if (expected >= pool_size_) { + LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); return false; } desired = expected + buffer_size; @@ -100,13 +104,17 @@ class MemoryLimitPool { return true; } - void release_buffer(const char *buffer, const size_t buffer_size) { + void release_buffer(char *buffer, const size_t buffer_size) { size_t expected, desired; do { expected = used_size_.load(); desired = expected - buffer_size; } while (!used_size_.compare_exchange_weak(expected, desired)); - delete[] buffer; + ailego_free(buffer); + } + + bool is_full() { + return used_size_.load() >= pool_size_; } private: From 1deed5e4d6eb499456f8baaa17fb3d85985da1a2 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 3 Apr 2026 17:55:44 +0800 Subject: [PATCH 04/46] fix memory_block --- src/ailego/buffer/buffer_pool.cc | 6 ++++-- src/include/zvec/ailego/buffer/buffer_pool.h | 1 - src/include/zvec/ailego/buffer/lru_cache.h | 3 ++- tests/core/algorithm/flat/flat_streamer_buffer_test.cc | 2 ++ 4 files changed, 8 insertions(+), 4 deletions(-) diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc index 6a706bb9d..72d7c0338 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/buffer_pool.cc @@ -76,8 +76,10 @@ char *LPMap::evict_block(block_id_t block_id) { if (entry.ref_count.compare_exchange_strong( expected, std::numeric_limits::min())) { char *buffer = entry.buffer; - MemoryLimitPool::get_instance().release_buffer(buffer, entry.size); - entry.buffer = nullptr; + if (buffer) { + MemoryLimitPool::get_instance().release_buffer(buffer, entry.size); + entry.buffer = nullptr; + } return buffer; } else { return nullptr; diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index c6b2f12a1..04e1cc593 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -79,7 +79,6 @@ class VecBufferPool { // Free any buffers still pinned in the map for (size_t i = 0; i < lp_map_.entry_num(); ++i) { char *b = lp_map_.evict_block(i); - if (b) ailego_free(b); } #if defined(_MSC_VER) _close(fd_); diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 3806f79a7..2c0d5138f 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -87,6 +87,7 @@ class MemoryLimitPool { int init(size_t pool_size) { pool_size_ = pool_size; + used_size_ = 0; return 0; } @@ -95,7 +96,7 @@ class MemoryLimitPool { do { expected = used_size_.load(); if (expected >= pool_size_) { - LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); + // LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); return false; } desired = expected + buffer_size; diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_test.cc b/tests/core/algorithm/flat/flat_streamer_buffer_test.cc index 396e57616..10308da9e 100644 --- a/tests/core/algorithm/flat/flat_streamer_buffer_test.cc +++ b/tests/core/algorithm/flat/flat_streamer_buffer_test.cc @@ -47,6 +47,7 @@ void FlatStreamerTest::TearDown(void) { } TEST_F(FlatStreamerTest, TestLinearSearch) { + MemoryLimitPool::get_instance().init(2 * 1024UL * 1024UL * 1024UL); IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); ASSERT_TRUE(write_streamer != nullptr); @@ -168,6 +169,7 @@ TEST_F(FlatStreamerTest, TestLinearSearch) { } TEST_F(FlatStreamerTest, TestLinearSearchWithLRU) { + MemoryLimitPool::get_instance().init(2 * 1024UL * 1024UL * 1024UL); constexpr size_t static dim = 1600; IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); From a122b2cf8ceefb7fca77d4e825874a4850bd1f09 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Sun, 5 Apr 2026 11:12:22 +0800 Subject: [PATCH 05/46] fix ut --- src/include/zvec/ailego/buffer/buffer_pool.h | 2 +- tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc | 1 + tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc | 1 + tests/core/interface/index_interface_test.cc | 4 +++- tests/db/index/column/vector_column_indexer_test.cc | 2 ++ tests/db/index/segment/segment_test.cc | 2 ++ 6 files changed, 10 insertions(+), 2 deletions(-) diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index 04e1cc593..91e6ee00d 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -78,7 +78,7 @@ class VecBufferPool { ~VecBufferPool() { // Free any buffers still pinned in the map for (size_t i = 0; i < lp_map_.entry_num(); ++i) { - char *b = lp_map_.evict_block(i); + lp_map_.evict_block(i); } #if defined(_MSC_VER) _close(fd_); diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc b/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc index 37d28ecd6..b10278ff8 100644 --- a/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc +++ b/tests/core/algorithm/flat/flat_streamer_buffer_time_test.cc @@ -140,6 +140,7 @@ TEST_F(FlatStreamerTest, TestLinearSearchMMap) { } TEST_F(FlatStreamerTest, TestLinearSearchBuffer) { + MemoryLimitPool::get_instance().init(2 * 1024UL * 1024UL * 1024UL); IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); ASSERT_TRUE(write_streamer != nullptr); diff --git a/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc b/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc index 6f111a4bf..30f9d7cbb 100644 --- a/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc +++ b/tests/core/algorithm/hnsw/hnsw_streamer_buffer_test.cc @@ -48,6 +48,7 @@ void HnswStreamerTest::TearDown(void) { } TEST_F(HnswStreamerTest, TestHnswSearch) { + MemoryLimitPool::get_instance().init(2 * 1024UL * 1024UL * 1024UL); IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("HnswStreamer"); ASSERT_TRUE(write_streamer != nullptr); diff --git a/tests/core/interface/index_interface_test.cc b/tests/core/interface/index_interface_test.cc index bba80121f..f5f473dba 100644 --- a/tests/core/interface/index_interface_test.cc +++ b/tests/core/interface/index_interface_test.cc @@ -27,6 +27,7 @@ #include "zvec/core/interface/index_factory.h" #include "zvec/core/interface/index_param.h" #include "zvec/core/interface/index_param_builders.h" +#include #if defined(__GNUC__) || defined(__GNUG__) #pragma GCC diagnostic push @@ -155,6 +156,7 @@ TEST(IndexInterface, General) { } TEST(IndexInterface, BufferGeneral) { + zvec::ailego::MemoryLimitPool::get_instance().init(100 * 1024 * 1024); constexpr uint32_t kDimension = 64; const std::string index_name{"test.index"}; @@ -261,7 +263,7 @@ TEST(IndexInterface, BufferGeneral) { .with_fetch_vector(true) .with_ef_search(20) .build()); - zvec::ailego::BufferManager::Instance().cleanup(); + // zvec::ailego::BufferManager::Instance().cleanup(); } diff --git a/tests/db/index/column/vector_column_indexer_test.cc b/tests/db/index/column/vector_column_indexer_test.cc index cbaf2d502..b16c5cea1 100644 --- a/tests/db/index/column/vector_column_indexer_test.cc +++ b/tests/db/index/column/vector_column_indexer_test.cc @@ -17,6 +17,7 @@ #include #include #include +#include #include "db/index/column/vector_column/vector_column_params.h" #include "tests/test_util.h" #include "zvec/ailego/utility/float_helper.h" @@ -2136,6 +2137,7 @@ TEST(VectorColumnIndexerTest, Failure) { // Test case 10: use_mmap = false { + zvec::ailego::MemoryLimitPool::get_instance().init(10 * 1024UL * 1024UL); auto indexer = std::make_shared( index_file_path, FieldSchema("test", DataType::VECTOR_FP32, 3, false, diff --git a/tests/db/index/segment/segment_test.cc b/tests/db/index/segment/segment_test.cc index 9530b8cf1..422a61b24 100644 --- a/tests/db/index/segment/segment_test.cc +++ b/tests/db/index/segment/segment_test.cc @@ -38,6 +38,7 @@ #include "db/index/storage/wal/wal_file.h" #include "utils/utils.h" #include "zvec/db/options.h" +#include using namespace zvec; @@ -50,6 +51,7 @@ class SegmentTest : public testing::TestWithParam { FileHelper::CreateDirectory(col_path); ailego::BufferManager::Instance().init(MIN_MEMORY_LIMIT_BYTES, 1); + zvec::ailego::MemoryLimitPool::get_instance().init(MIN_MEMORY_LIMIT_BYTES); std::string idmap_path = FileHelper::MakeFilePath(col_path, FileID::ID_FILE, 0); From 3c9451c8fa4c835d8799799916db23bf4ccffdfc Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Sun, 5 Apr 2026 16:34:46 +0800 Subject: [PATCH 06/46] upd --- src/ailego/buffer/lru_cache.cc | 34 ++++++++++++++++++++++ src/include/zvec/ailego/buffer/lru_cache.h | 33 +++------------------ 2 files changed, 38 insertions(+), 29 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 06093c6fb..435c47764 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -80,5 +80,39 @@ void LRUCache::clear_dead_node(const LPMap *lp_map) { } } } + +int MemoryLimitPool::init(size_t pool_size) { + pool_size_ = pool_size; + used_size_ = 0; + return 0; +} + +bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, char *&buffer) { + size_t expected, desired; + do { + expected = used_size_.load(); + if (expected >= pool_size_) { + // LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); + return false; + } + desired = expected + buffer_size; + } while (!used_size_.compare_exchange_weak(expected, desired)); + buffer = (char *)ailego_malloc(buffer_size); + return true; +} + +void MemoryLimitPool::release_buffer(char *buffer, const size_t buffer_size) { + size_t expected, desired; + do { + expected = used_size_.load(); + desired = expected - buffer_size; + } while (!used_size_.compare_exchange_weak(expected, desired)); + ailego_free(buffer); +} + +bool MemoryLimitPool::is_full() { + return used_size_.load() >= pool_size_; +} + } // namespace ailego } // namespace zvec \ No newline at end of file diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 2c0d5138f..0767a1eec 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -85,38 +85,13 @@ class MemoryLimitPool { MemoryLimitPool(MemoryLimitPool &&) = delete; MemoryLimitPool &operator=(MemoryLimitPool &&) = delete; - int init(size_t pool_size) { - pool_size_ = pool_size; - used_size_ = 0; - return 0; - } + int init(size_t pool_size); - bool try_acquire_buffer(const size_t buffer_size, char *&buffer) { - size_t expected, desired; - do { - expected = used_size_.load(); - if (expected >= pool_size_) { - // LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); - return false; - } - desired = expected + buffer_size; - } while (!used_size_.compare_exchange_weak(expected, desired)); - buffer = (char *)ailego_malloc(buffer_size); - return true; - } + bool try_acquire_buffer(const size_t buffer_size, char *&buffer); - void release_buffer(char *buffer, const size_t buffer_size) { - size_t expected, desired; - do { - expected = used_size_.load(); - desired = expected - buffer_size; - } while (!used_size_.compare_exchange_weak(expected, desired)); - ailego_free(buffer); - } + void release_buffer(char *buffer, const size_t buffer_size); - bool is_full() { - return used_size_.load() >= pool_size_; - } + bool is_full(); private: MemoryLimitPool() = default; From deac22323482d53f91050ed959ac584a35d7e31c Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 6 Apr 2026 22:20:23 +0800 Subject: [PATCH 07/46] upd --- src/ailego/buffer/lru_cache.cc | 36 ++- .../index/storage/bufferpool_forward_store.cc | 19 +- .../index/storage/lazy_record_batch_reader.h | 11 +- src/include/zvec/ailego/buffer/buffer_pool.h | 5 +- src/include/zvec/ailego/buffer/lru_cache.h | 28 +- .../zvec/ailego/buffer/parquet_buffer_pool.h | 287 ++++++++++++++++++ 6 files changed, 365 insertions(+), 21 deletions(-) create mode 100644 src/include/zvec/ailego/buffer/parquet_buffer_pool.h diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 435c47764..552aee195 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -30,7 +30,7 @@ bool LRUCache::evict_block(BlockType &item) { if (!ok) { return false; } - } while (item.lp_map->isDeadBlock(item)); + } while (!is_valid(item.lp_map) || item.lp_map->isDeadBlock(item)); return ok; } @@ -50,29 +50,29 @@ bool LRUCache::add_single_block(const BlockType &block, int block_type) { } evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed); if (evict_queue_insertions_ % block_size_ == 0) { - this->clear_dead_node(block.lp_map); + this->clear_dead_node(); } return true; } -void LRUCache::clear_dead_node(const LPMap *lp_map) { +void LRUCache::clear_dead_node() { for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { size_t clear_size = block_size_ * 2; if (queues_[i].size_approx() < clear_size * 4) { continue; } size_t clear_count = 0; - ConcurrentQueue tmp(block_size_); + ConcurrentQueue tmp; BlockType item; while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { - if (!lp_map->isDeadBlock(item)) { + if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { if (!tmp.enqueue(item)) { LOG_ERROR("enqueue failed."); } } } while (tmp.try_dequeue(item)) { - if (!lp_map->isDeadBlock(item)) { + if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { if (!queues_[i].enqueue(item)) { LOG_ERROR("enqueue failed."); } @@ -87,7 +87,8 @@ int MemoryLimitPool::init(size_t pool_size) { return 0; } -bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, char *&buffer) { +bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, + char *&buffer) { size_t expected, desired; do { expected = used_size_.load(); @@ -101,6 +102,19 @@ bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, char *&buffer return true; } +bool MemoryLimitPool::try_acquire_parquet(const size_t buffer_size) { + size_t expected, desired; + do { + expected = used_size_.load(); + if (expected >= pool_size_) { + // LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); + return false; + } + desired = expected + buffer_size; + } while (!used_size_.compare_exchange_weak(expected, desired)); + return true; +} + void MemoryLimitPool::release_buffer(char *buffer, const size_t buffer_size) { size_t expected, desired; do { @@ -110,6 +124,14 @@ void MemoryLimitPool::release_buffer(char *buffer, const size_t buffer_size) { ailego_free(buffer); } +void MemoryLimitPool::release_parquet(const size_t buffer_size) { + size_t expected, desired; + do { + expected = used_size_.load(); + desired = expected - buffer_size; + } while (!used_size_.compare_exchange_weak(expected, desired)); +} + bool MemoryLimitPool::is_full() { return used_size_.load() >= pool_size_; } diff --git a/src/db/index/storage/bufferpool_forward_store.cc b/src/db/index/storage/bufferpool_forward_store.cc index a8cbaee3f..1557e0740 100644 --- a/src/db/index/storage/bufferpool_forward_store.cc +++ b/src/db/index/storage/bufferpool_forward_store.cc @@ -22,6 +22,7 @@ #include #include #include +#include #include #include "db/index/storage/store_helper.h" #include "lazy_record_batch_reader.h" @@ -192,9 +193,12 @@ TablePtr BufferPoolForwardStore::fetch(const std::vector &columns, for (const auto &[rg_id, pairs] : rg_to_local) { for (size_t i = 0; i < col_indices.size(); ++i) { int col_idx = col_indices[i]; - auto buffer_id = ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); - auto buffer_handle = buf_mgr.acquire(buffer_id); - auto col_chunked_array = buffer_handle.pin_parquet_data(); + auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); + // ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); + // auto buffer_handle = buf_mgr.acquire(buffer_id); + auto col_chunked_array = + ailego::ParquetBufferPool::get_instance().acquire(buffer_id); + // buffer_handle.pin_parquet_data(); if (!col_chunked_array) { LOG_ERROR( @@ -318,9 +322,12 @@ ExecBatchPtr BufferPoolForwardStore::fetch( auto &buf_mgr = ailego::BufferManager::Instance(); for (size_t i = 0; i < col_indices.size(); ++i) { int col_idx = col_indices[i]; - auto buffer_id = ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); - auto buffer_handle = buf_mgr.acquire(buffer_id); - auto col_chunked_array = buffer_handle.pin_parquet_data(); + auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); + // ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); + // auto buffer_handle = buf_mgr.acquire(buffer_id); + auto col_chunked_array = + ailego::ParquetBufferPool::get_instance().acquire(buffer_id); + // buffer_handle.pin_parquet_data(); if (!col_chunked_array) { LOG_ERROR( diff --git a/src/db/index/storage/lazy_record_batch_reader.h b/src/db/index/storage/lazy_record_batch_reader.h index c9e124c5c..525f79615 100644 --- a/src/db/index/storage/lazy_record_batch_reader.h +++ b/src/db/index/storage/lazy_record_batch_reader.h @@ -17,6 +17,7 @@ #include #include #include +#include #include "db/common/constants.h" @@ -128,10 +129,12 @@ class ParquetRecordBatchReader : public arrow::RecordBatchReader { if (with_cache_) { auto &buf_mgr = ailego::BufferManager::Instance(); for (size_t col_idx = 0; col_idx < col_indices_.size(); ++col_idx) { - auto buffer_id = ailego::BufferID::ParquetID( - file_path_, col_indices_[col_idx], rg_id); - auto buffer_handle = buf_mgr.acquire(buffer_id); - auto col_chunked_array = buffer_handle.pin_parquet_data(); + // auto buffer_id = ailego::BufferID::ParquetID( + // file_path_, col_indices_[col_idx], rg_id); + // auto buffer_handle = buf_mgr.acquire(buffer_id); + // auto col_chunked_array = buffer_handle.pin_parquet_data(); + auto buffer_id = ailego::ParquetBufferID(file_path_, col_indices_[col_idx], rg_id); + auto col_chunked_array = ailego::ParquetBufferPool::get_instance().acquire(buffer_id); if (col_chunked_array) { std::shared_ptr concat; auto concat_result = arrow::Concatenate(col_chunked_array->chunks(), diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index 91e6ee00d..f814e9b34 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -39,9 +39,12 @@ class LPMap { }; public: - LPMap() : entry_num_(0), entries_(nullptr) {} + LPMap() : entry_num_(0), entries_(nullptr) { + LRUCache::get_instance().set_valid(this); + } ~LPMap() { delete[] entries_; + LRUCache::get_instance().set_invalid(this); } void init(size_t entry_num); diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 0767a1eec..79e03b693 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -13,13 +13,14 @@ #include #include #include +#include #include #include #include +#include #include -#include "buffer_pool.h" -#include "concurrentqueue.h" #include +#include "concurrentqueue.h" #if defined(_MSC_VER) #include @@ -58,7 +59,22 @@ class LRUCache { bool add_single_block(const BlockType &block, int block_type); - void clear_dead_node(const LPMap *lp_map); + void clear_dead_node(); + + bool is_valid(LPMap *lp_map) { + std::shared_lock lock(valid_lp_maps_mutex_); + return valid_lp_maps_.find(lp_map) != valid_lp_maps_.end(); + } + + void set_valid(LPMap *lp_map) { + std::unique_lock lock(valid_lp_maps_mutex_); + valid_lp_maps_.insert(lp_map); + } + + void set_invalid(LPMap *lp_map) { + std::unique_lock lock(valid_lp_maps_mutex_); + valid_lp_maps_.erase(lp_map); + } bool recycle(); @@ -72,6 +88,8 @@ class LRUCache { size_t block_size_{0}; std::vector queues_; alignas(64) std::atomic evict_queue_insertions_{0}; + std::unordered_set valid_lp_maps_; + std::shared_mutex valid_lp_maps_mutex_; }; class MemoryLimitPool { @@ -89,8 +107,12 @@ class MemoryLimitPool { bool try_acquire_buffer(const size_t buffer_size, char *&buffer); + bool try_acquire_parquet(const size_t buffer_size); + void release_buffer(char *buffer, const size_t buffer_size); + void release_parquet(const size_t buffer_size); + bool is_full(); private: diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h new file mode 100644 index 000000000..a5b67dd64 --- /dev/null +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -0,0 +1,287 @@ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include "lru_cache.h" + +namespace arrow { +class ChunkedArray; +class Array; +class DataType; +class Scalar; +template +class Result; +class Status; +class Buffer; +} // namespace arrow + +namespace zvec { +namespace ailego { + +using block_id_t = size_t; +using version_t = size_t; + +class LRUCache; + +struct ParquetBufferID { + std::string filename; + int column; + int row_group; + ParquetBufferID(std::string &filename, int column, int row_group) + : filename(filename), column(column), row_group(row_group) {} +}; + +struct IDHash { + size_t operator()(const ParquetBufferID &buffer_id) const { + struct stat file_stat; + uint64_t file_id; + if (stat(buffer_id.filename.c_str(), &file_stat) == 0) { + file_id = file_stat.st_ino; + } + size_t hash = 1; + hash = hash ^ (std::hash{}(file_id)); + hash = hash * 31 + std::hash{}(buffer_id.column); + hash = hash * 31 + std::hash{}(buffer_id.row_group); + return hash; + } +}; + +struct IDEqual { + bool operator()(const ParquetBufferID &a, const ParquetBufferID &b) const { + if (a.filename != b.filename) { + return false; + } + return a.column == b.column && a.row_group == b.row_group; + } +}; + + +class ParquetBufferPool { + public: + typedef std::shared_ptr Pointer; + + struct ParquetBufferContext { + // A shared pointer to the buffers allocated for arrow parquet data + std::shared_ptr arrow{nullptr}; + + // Guard original arrow buffers to prevent premature deletion + std::vector> arrow_refs{}; + + size_t size; + alignas(64) std::atomic ref_count{std::numeric_limits::min()}; + alignas(64) std::atomic load_count{0}; + }; + + struct ArrowBufferDeleter { + explicit ArrowBufferDeleter(ParquetBufferPool *c, ParquetBufferID i) + : pool(c), id(i) {} + ParquetBufferPool *pool; + ParquetBufferID id; + // Only reduces the reference count but does not actually release the + // buffer, since the buffer memory is managed by the BufferManager. + void operator()(arrow::Buffer *) { + pool->release(id); + } + }; + + using Table = std::unordered_map; + + arrow::Status readable_open( + std::shared_ptr &input, + const std::string &file_name) { + ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name)); + return arrow::Status::OK(); + } + arrow::Status file_open(std::unique_ptr &reader, + std::shared_ptr &input, + arrow::MemoryPool *mem_pool) { + ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool)); + return arrow::Status::OK(); + } + bool acquire(ParquetBufferID buffer_id, ParquetBufferContext &context) { + // TODO: file handler and memory pool can be optimized + arrow::MemoryPool *mem_pool = arrow::default_memory_pool(); + + // Open file + std::shared_ptr input; + const auto &file_name = buffer_id.filename; + if (!readable_open(input, file_name).ok()) { + LOG_ERROR("Failed to open parquet file[%s]", file_name.c_str()); + return false; + } + + // Open reader + std::unique_ptr reader; + if (!file_open(reader, input, mem_pool).ok()) { + LOG_ERROR("Failed to open parquet file[%s]", file_name.c_str()); + return false; + } + + // Perform read + int row_group = buffer_id.row_group; + int column = buffer_id.column; + auto s = reader->RowGroup(row_group)->Column(column)->Read(&context.arrow); + if (!s.ok()) { + LOG_ERROR("Failed to read parquet file[%s]", file_name.c_str()); + context.arrow = nullptr; + return false; + } + + size_t size = 0; + // Compute the memory usage and hijack Arrow's buffers with our + // implementation + for (auto &array : context.arrow->chunks()) { + auto &buffers = array->data()->buffers; + for (size_t buf_idx = 0; buf_idx < buffers.size(); ++buf_idx) { + if (buffers[buf_idx] == nullptr) { + continue; + } + // Keep references to original buffers to prevent premature deletion + context.arrow_refs.emplace_back(buffers[buf_idx]); + size += buffers[buf_idx]->capacity(); + // Create hijacked buffer with custom deleter that notifies us when + // Arrow is finished with the buffer + std::shared_ptr hijacked_buffer( + buffers[buf_idx].get(), ArrowBufferDeleter(this, buffer_id)); + buffers[buf_idx] = hijacked_buffer; + } + } + context.size = size; + + return true; + } + + bool acquire_buffer(ParquetBufferID buffer_id, + std::shared_ptr &arrow) { + { + std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter != table_.end()) { + arrow = acquire(buffer_id); + return true; + } + } + { + std::unique_lock lock(table_mutex_); + { + bool found = MemoryLimitPool::get_instance().try_acquire_parquet(0); + if (!found) { + for (int i = 0; i < 5; i++) { + LRUCache::get_instance().recycle(); + found = MemoryLimitPool::get_instance().try_acquire_parquet(0); + if (found) { + break; + } + } + } + } + if (acquire(buffer_id, table_[buffer_id])) { + arrow = set_block_acquired(buffer_id); + return true; + } else { + LOG_ERROR("Failed to acquire parquet buffer"); + return false; + } + } + } + + bool evict_buffer(ParquetBufferID buffer_id) { + std::unique_lock lock(table_mutex_); + return table_.erase(buffer_id); + } + + std::shared_ptr set_block_acquired( + ParquetBufferID buffer_id) { + std::shared_lock lock(table_mutex_); + ParquetBufferContext &context = table_[buffer_id]; + while (true) { + int current_count = context.ref_count.load(std::memory_order_relaxed); + if (current_count >= 0) { + if (context.ref_count.compare_exchange_weak( + current_count, current_count + context.arrow_refs.size(), + std::memory_order_acq_rel, std::memory_order_acquire)) { + return context.arrow; + } + } else { + if (context.ref_count.compare_exchange_weak( + current_count, context.arrow_refs.size(), + std::memory_order_acq_rel, std::memory_order_acquire)) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + return context.arrow; + } + } + } + } + std::shared_ptr acquire(ParquetBufferID buffer_id) { + std::shared_lock lock(table_mutex_); + ParquetBufferContext &context = table_[buffer_id]; + while (true) { + int current_count = context.ref_count.load(std::memory_order_acquire); + if (current_count < 0) { + return nullptr; + } + if (context.ref_count.compare_exchange_weak( + current_count, current_count + 1, std::memory_order_acq_rel, + std::memory_order_acquire)) { + if (current_count == 0) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + } + return context.arrow; + } + } + } + + void release(ParquetBufferID buffer_id) { + std::shared_lock lock(table_mutex_); + ParquetBufferContext &context = table_[buffer_id]; + if (context.ref_count.fetch_sub(1, std::memory_order_release) == 1) { + std::atomic_thread_fence(std::memory_order_acquire); + LRUCache::BlockType block; + // TODO: set block + LRUCache::get_instance().add_single_block(block, 0); + } + } + + void evict(ParquetBufferID buffer_id) { + std::shared_lock lock(table_mutex_); + ParquetBufferContext &context = table_[buffer_id]; + int expected = 0; + if (context.ref_count.compare_exchange_strong( + expected, std::numeric_limits::min())) { + MemoryLimitPool::get_instance().release_parquet(context.size); + evict_buffer(buffer_id); + } + } + + + static ParquetBufferPool &get_instance() { + static ParquetBufferPool instance; + return instance; + } + + ParquetBufferPool(const ParquetBufferPool &) = delete; + ParquetBufferPool &operator=(const ParquetBufferPool &) = delete; + ParquetBufferPool(ParquetBufferPool &&) = delete; + ParquetBufferPool &operator=(ParquetBufferPool &&) = delete; + + private: + ParquetBufferPool() = default; + + private: + Table table_; + std::shared_mutex table_mutex_; +}; + +} // namespace ailego +} // namespace zvec \ No newline at end of file From 9f03d8766c11e83cdc8c1ca4ceba2f3df7af970f Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 6 Apr 2026 22:39:23 +0800 Subject: [PATCH 08/46] upd --- src/db/index/storage/bufferpool_forward_store.cc | 14 ++++++++++---- src/db/index/storage/lazy_record_batch_reader.h | 5 ++++- 2 files changed, 14 insertions(+), 5 deletions(-) diff --git a/src/db/index/storage/bufferpool_forward_store.cc b/src/db/index/storage/bufferpool_forward_store.cc index 1557e0740..bdbe4e7d0 100644 --- a/src/db/index/storage/bufferpool_forward_store.cc +++ b/src/db/index/storage/bufferpool_forward_store.cc @@ -196,9 +196,12 @@ TablePtr BufferPoolForwardStore::fetch(const std::vector &columns, auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); // ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); // auto buffer_handle = buf_mgr.acquire(buffer_id); - auto col_chunked_array = - ailego::ParquetBufferPool::get_instance().acquire(buffer_id); // buffer_handle.pin_parquet_data(); + std::shared_ptr col_chunked_array{nullptr}; + if (ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { + LOG_ERROR("Failed to acquire parquet buffer"); + return nullptr; + } if (!col_chunked_array) { LOG_ERROR( @@ -323,10 +326,13 @@ ExecBatchPtr BufferPoolForwardStore::fetch( for (size_t i = 0; i < col_indices.size(); ++i) { int col_idx = col_indices[i]; auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); + std::shared_ptr col_chunked_array{nullptr}; + if (ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { + LOG_ERROR("Failed to acquire parquet buffer"); + return nullptr; + } // ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); // auto buffer_handle = buf_mgr.acquire(buffer_id); - auto col_chunked_array = - ailego::ParquetBufferPool::get_instance().acquire(buffer_id); // buffer_handle.pin_parquet_data(); if (!col_chunked_array) { diff --git a/src/db/index/storage/lazy_record_batch_reader.h b/src/db/index/storage/lazy_record_batch_reader.h index 525f79615..9fadf92e4 100644 --- a/src/db/index/storage/lazy_record_batch_reader.h +++ b/src/db/index/storage/lazy_record_batch_reader.h @@ -134,7 +134,10 @@ class ParquetRecordBatchReader : public arrow::RecordBatchReader { // auto buffer_handle = buf_mgr.acquire(buffer_id); // auto col_chunked_array = buffer_handle.pin_parquet_data(); auto buffer_id = ailego::ParquetBufferID(file_path_, col_indices_[col_idx], rg_id); - auto col_chunked_array = ailego::ParquetBufferPool::get_instance().acquire(buffer_id); + std::shared_ptr col_chunked_array{nullptr}; + if (ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { + return arrow::Status::Invalid("Failed to acquire parquet buffer"); + } if (col_chunked_array) { std::shared_ptr concat; auto concat_result = arrow::Concatenate(col_chunked_array->chunks(), From 200e8401daf87c4bd1f884e160373dcfda3a4839 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 6 Apr 2026 23:01:28 +0800 Subject: [PATCH 09/46] fix --- src/db/index/storage/bufferpool_forward_store.cc | 4 ++-- src/db/index/storage/lazy_record_batch_reader.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/db/index/storage/bufferpool_forward_store.cc b/src/db/index/storage/bufferpool_forward_store.cc index bdbe4e7d0..ae96316ef 100644 --- a/src/db/index/storage/bufferpool_forward_store.cc +++ b/src/db/index/storage/bufferpool_forward_store.cc @@ -198,7 +198,7 @@ TablePtr BufferPoolForwardStore::fetch(const std::vector &columns, // auto buffer_handle = buf_mgr.acquire(buffer_id); // buffer_handle.pin_parquet_data(); std::shared_ptr col_chunked_array{nullptr}; - if (ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { + if (!ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { LOG_ERROR("Failed to acquire parquet buffer"); return nullptr; } @@ -327,7 +327,7 @@ ExecBatchPtr BufferPoolForwardStore::fetch( int col_idx = col_indices[i]; auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); std::shared_ptr col_chunked_array{nullptr}; - if (ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { + if (!ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { LOG_ERROR("Failed to acquire parquet buffer"); return nullptr; } diff --git a/src/db/index/storage/lazy_record_batch_reader.h b/src/db/index/storage/lazy_record_batch_reader.h index 9fadf92e4..5a074323f 100644 --- a/src/db/index/storage/lazy_record_batch_reader.h +++ b/src/db/index/storage/lazy_record_batch_reader.h @@ -135,7 +135,7 @@ class ParquetRecordBatchReader : public arrow::RecordBatchReader { // auto col_chunked_array = buffer_handle.pin_parquet_data(); auto buffer_id = ailego::ParquetBufferID(file_path_, col_indices_[col_idx], rg_id); std::shared_ptr col_chunked_array{nullptr}; - if (ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { + if (!ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { return arrow::Status::Invalid("Failed to acquire parquet buffer"); } if (col_chunked_array) { From ebb7678d88b4b91329331812baf9a86f60b5bde8 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 6 Apr 2026 23:15:56 +0800 Subject: [PATCH 10/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index a5b67dd64..b29909661 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -158,7 +158,7 @@ class ParquetBufferPool { } } context.size = size; - + return true; } @@ -185,6 +185,10 @@ class ParquetBufferPool { } } } + if (!found) { + LOG_ERROR("Failed to acquire parquet buffer"); + return false; + } } if (acquire(buffer_id, table_[buffer_id])) { arrow = set_block_acquired(buffer_id); @@ -269,7 +273,7 @@ class ParquetBufferPool { static ParquetBufferPool instance; return instance; } - + ParquetBufferPool(const ParquetBufferPool &) = delete; ParquetBufferPool &operator=(const ParquetBufferPool &) = delete; ParquetBufferPool(ParquetBufferPool &&) = delete; From 2e677f58686bb56e54b4a0918cab4b72710ac2de Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 6 Apr 2026 23:26:07 +0800 Subject: [PATCH 11/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index b29909661..8f5e658eb 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -229,6 +229,10 @@ class ParquetBufferPool { } std::shared_ptr acquire(ParquetBufferID buffer_id) { std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return nullptr; + } ParquetBufferContext &context = table_[buffer_id]; while (true) { int current_count = context.ref_count.load(std::memory_order_acquire); @@ -248,6 +252,10 @@ class ParquetBufferPool { void release(ParquetBufferID buffer_id) { std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return; + } ParquetBufferContext &context = table_[buffer_id]; if (context.ref_count.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); @@ -259,6 +267,10 @@ class ParquetBufferPool { void evict(ParquetBufferID buffer_id) { std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return; + } ParquetBufferContext &context = table_[buffer_id]; int expected = 0; if (context.ref_count.compare_exchange_strong( From 9d0d6612365077f8dd8d3e28fb30bacfaa2a1cd7 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 00:07:56 +0800 Subject: [PATCH 12/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index 8f5e658eb..e2a993b31 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -169,7 +169,9 @@ class ParquetBufferPool { auto iter = table_.find(buffer_id); if (iter != table_.end()) { arrow = acquire(buffer_id); - return true; + if (arrow != nullptr) { + return true; + } } } { @@ -207,7 +209,6 @@ class ParquetBufferPool { std::shared_ptr set_block_acquired( ParquetBufferID buffer_id) { - std::shared_lock lock(table_mutex_); ParquetBufferContext &context = table_[buffer_id]; while (true) { int current_count = context.ref_count.load(std::memory_order_relaxed); @@ -228,7 +229,6 @@ class ParquetBufferPool { } } std::shared_ptr acquire(ParquetBufferID buffer_id) { - std::shared_lock lock(table_mutex_); auto iter = table_.find(buffer_id); if (iter == table_.end()) { return nullptr; From b2651bce593456d1366b001dcaffb06d4ea4bc8f Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 01:06:22 +0800 Subject: [PATCH 13/46] fix --- .../zvec/ailego/buffer/parquet_buffer_pool.h | 27 +++++++++---------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index e2a993b31..a8535c26a 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -37,19 +37,24 @@ struct ParquetBufferID { std::string filename; int column; int row_group; + uint64_t file_id; ParquetBufferID(std::string &filename, int column, int row_group) - : filename(filename), column(column), row_group(row_group) {} + : filename(filename), column(column), row_group(row_group) { + struct stat file_stat; + if (stat(filename.c_str(), &file_stat) == 0) { + // file_stat.st_ino contains the inode number + // file_stat.st_dev contains the device ID + // Together they uniquely identify a file + file_id = file_stat.st_ino; + } + } }; struct IDHash { size_t operator()(const ParquetBufferID &buffer_id) const { struct stat file_stat; - uint64_t file_id; - if (stat(buffer_id.filename.c_str(), &file_stat) == 0) { - file_id = file_stat.st_ino; - } size_t hash = 1; - hash = hash ^ (std::hash{}(file_id)); + hash = hash ^ (std::hash{}(buffer_id.file_id)); hash = hash * 31 + std::hash{}(buffer_id.column); hash = hash * 31 + std::hash{}(buffer_id.row_group); return hash; @@ -158,7 +163,6 @@ class ParquetBufferPool { } } context.size = size; - return true; } @@ -202,11 +206,6 @@ class ParquetBufferPool { } } - bool evict_buffer(ParquetBufferID buffer_id) { - std::unique_lock lock(table_mutex_); - return table_.erase(buffer_id); - } - std::shared_ptr set_block_acquired( ParquetBufferID buffer_id) { ParquetBufferContext &context = table_[buffer_id]; @@ -266,7 +265,7 @@ class ParquetBufferPool { } void evict(ParquetBufferID buffer_id) { - std::shared_lock lock(table_mutex_); + std::unique_lock lock(table_mutex_); auto iter = table_.find(buffer_id); if (iter == table_.end()) { return; @@ -276,7 +275,7 @@ class ParquetBufferPool { if (context.ref_count.compare_exchange_strong( expected, std::numeric_limits::min())) { MemoryLimitPool::get_instance().release_parquet(context.size); - evict_buffer(buffer_id); + table_.erase(buffer_id); } } From 0f0cf51ae630ef88023bb9807837231084bb4508 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 01:19:05 +0800 Subject: [PATCH 14/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index a8535c26a..e934d2e31 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -95,7 +95,7 @@ class ParquetBufferPool { // Only reduces the reference count but does not actually release the // buffer, since the buffer memory is managed by the BufferManager. void operator()(arrow::Buffer *) { - pool->release(id); + return; } }; From e64dd153f4cc8c58a2bfb75ac7df104098f42757 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 01:48:14 +0800 Subject: [PATCH 15/46] upd --- .../zvec/ailego/buffer/parquet_buffer_pool.h | 38 ++++++------------- 1 file changed, 11 insertions(+), 27 deletions(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index e934d2e31..a05021ea0 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -102,36 +102,19 @@ class ParquetBufferPool { using Table = std::unordered_map; - arrow::Status readable_open( - std::shared_ptr &input, - const std::string &file_name) { - ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name)); - return arrow::Status::OK(); - } - arrow::Status file_open(std::unique_ptr &reader, - std::shared_ptr &input, - arrow::MemoryPool *mem_pool) { - ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool)); - return arrow::Status::OK(); - } - bool acquire(ParquetBufferID buffer_id, ParquetBufferContext &context) { + arrow::Status acquire(ParquetBufferID buffer_id, + ParquetBufferContext &context) { // TODO: file handler and memory pool can be optimized arrow::MemoryPool *mem_pool = arrow::default_memory_pool(); // Open file std::shared_ptr input; const auto &file_name = buffer_id.filename; - if (!readable_open(input, file_name).ok()) { - LOG_ERROR("Failed to open parquet file[%s]", file_name.c_str()); - return false; - } + ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name)); // Open reader std::unique_ptr reader; - if (!file_open(reader, input, mem_pool).ok()) { - LOG_ERROR("Failed to open parquet file[%s]", file_name.c_str()); - return false; - } + ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool)); // Perform read int row_group = buffer_id.row_group; @@ -140,10 +123,11 @@ class ParquetBufferPool { if (!s.ok()) { LOG_ERROR("Failed to read parquet file[%s]", file_name.c_str()); context.arrow = nullptr; - return false; + return s; } - size_t size = 0; + context.size = 0; + context.arrow_refs.clear(); // Compute the memory usage and hijack Arrow's buffers with our // implementation for (auto &array : context.arrow->chunks()) { @@ -154,7 +138,7 @@ class ParquetBufferPool { } // Keep references to original buffers to prevent premature deletion context.arrow_refs.emplace_back(buffers[buf_idx]); - size += buffers[buf_idx]->capacity(); + context.size += buffers[buf_idx]->capacity(); // Create hijacked buffer with custom deleter that notifies us when // Arrow is finished with the buffer std::shared_ptr hijacked_buffer( @@ -162,8 +146,8 @@ class ParquetBufferPool { buffers[buf_idx] = hijacked_buffer; } } - context.size = size; - return true; + + return arrow::Status::OK(); } bool acquire_buffer(ParquetBufferID buffer_id, @@ -196,7 +180,7 @@ class ParquetBufferPool { return false; } } - if (acquire(buffer_id, table_[buffer_id])) { + if (acquire(buffer_id, table_[buffer_id]).ok()) { arrow = set_block_acquired(buffer_id); return true; } else { From 61a53346afba962836677ea078b6446ece83a93c Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 02:05:13 +0800 Subject: [PATCH 16/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index a05021ea0..5f24ca16c 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -197,14 +197,14 @@ class ParquetBufferPool { int current_count = context.ref_count.load(std::memory_order_relaxed); if (current_count >= 0) { if (context.ref_count.compare_exchange_weak( - current_count, current_count + context.arrow_refs.size(), - std::memory_order_acq_rel, std::memory_order_acquire)) { + current_count, current_count + 1, std::memory_order_acq_rel, + std::memory_order_acquire)) { return context.arrow; } } else { if (context.ref_count.compare_exchange_weak( - current_count, context.arrow_refs.size(), - std::memory_order_acq_rel, std::memory_order_acquire)) { + current_count, 1, std::memory_order_acq_rel, + std::memory_order_acquire)) { context.load_count.fetch_add(1, std::memory_order_relaxed); return context.arrow; } From 2fdf3dea27e5478ed36d176936b3185f2f1fcbd0 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 10:58:51 +0800 Subject: [PATCH 17/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index 5f24ca16c..b387ef642 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -66,6 +66,9 @@ struct IDEqual { if (a.filename != b.filename) { return false; } + if (a.file_id != b.file_id) { + return false; + } return a.column == b.column && a.row_group == b.row_group; } }; From 0e478d4027582bc579a4f43ed283acf16f8e9236 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 11:34:15 +0800 Subject: [PATCH 18/46] fix --- src/include/zvec/ailego/buffer/parquet_buffer_pool.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index b387ef642..a7f57958c 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -38,6 +38,7 @@ struct ParquetBufferID { int column; int row_group; uint64_t file_id; + long mtime; ParquetBufferID(std::string &filename, int column, int row_group) : filename(filename), column(column), row_group(row_group) { struct stat file_stat; @@ -46,14 +47,16 @@ struct ParquetBufferID { // file_stat.st_dev contains the device ID // Together they uniquely identify a file file_id = file_stat.st_ino; + std::filesystem::path p(filename); + auto ftime = std::filesystem::last_write_time(p); + mtime = static_cast(ftime.time_since_epoch().count()); } } }; struct IDHash { size_t operator()(const ParquetBufferID &buffer_id) const { - struct stat file_stat; - size_t hash = 1; + size_t hash = std::hash{}(1); hash = hash ^ (std::hash{}(buffer_id.file_id)); hash = hash * 31 + std::hash{}(buffer_id.column); hash = hash * 31 + std::hash{}(buffer_id.row_group); @@ -69,6 +72,9 @@ struct IDEqual { if (a.file_id != b.file_id) { return false; } + if (a.mtime != b.mtime) { + return false; + } return a.column == b.column && a.row_group == b.row_group; } }; From f197ecec145460ad29f1ef093d4815070b0be154 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 12:52:59 +0800 Subject: [PATCH 19/46] upd --- src/ailego/buffer/parquet_buffer_pool.cc | 246 ++++++++++++++++++ .../index/storage/bufferpool_forward_store.cc | 25 +- .../index/storage/lazy_record_batch_reader.h | 14 +- .../zvec/ailego/buffer/parquet_buffer_pool.h | 222 +++------------- tests/db/index/segment/segment_test.cc | 1 - 5 files changed, 302 insertions(+), 206 deletions(-) create mode 100644 src/ailego/buffer/parquet_buffer_pool.cc diff --git a/src/ailego/buffer/parquet_buffer_pool.cc b/src/ailego/buffer/parquet_buffer_pool.cc new file mode 100644 index 000000000..69db539bb --- /dev/null +++ b/src/ailego/buffer/parquet_buffer_pool.cc @@ -0,0 +1,246 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace zvec { +namespace ailego { + +ParquetBufferID::ParquetBufferID(std::string &filename, int column, + int row_group) + : filename(filename), column(column), row_group(row_group) { + struct stat file_stat; + if (stat(filename.c_str(), &file_stat) == 0) { + // file_stat.st_ino contains the inode number + // file_stat.st_dev contains the device ID + // Together they uniquely identify a file + file_id = file_stat.st_ino; + std::filesystem::path p(filename); + auto ftime = std::filesystem::last_write_time(p); + mtime = static_cast(ftime.time_since_epoch().count()); + } +} + +ParquetBufferContextHandle::ParquetBufferContextHandle( + const ParquetBufferContextHandle &handle_) + : buffer_id_(handle_.buffer_id_), arrow_(handle_.arrow_) { + if (arrow_) { + ParquetBufferPool::get_instance().acquire_one(buffer_id_); + } +} + +ParquetBufferContextHandle::~ParquetBufferContextHandle() { + if (arrow_) { + ParquetBufferPool::get_instance().release(buffer_id_); + } +} + +arrow::Status ParquetBufferPool::acquire(ParquetBufferID buffer_id, + ParquetBufferContext &context) { + // TODO: file handler and memory pool can be optimized + arrow::MemoryPool *mem_pool = arrow::default_memory_pool(); + + // Open file + std::shared_ptr input; + const auto &file_name = buffer_id.filename; + ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name)); + + // Open reader + std::unique_ptr reader; + ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool)); + + // Perform read + int row_group = buffer_id.row_group; + int column = buffer_id.column; + auto s = reader->RowGroup(row_group)->Column(column)->Read(&context.arrow); + if (!s.ok()) { + LOG_ERROR("Failed to read parquet file[%s]", file_name.c_str()); + context.arrow = nullptr; + return s; + } + + context.size = 0; + context.arrow_refs.clear(); + // Compute the memory usage and hijack Arrow's buffers with our + // implementation + for (auto &array : context.arrow->chunks()) { + auto &buffers = array->data()->buffers; + for (size_t buf_idx = 0; buf_idx < buffers.size(); ++buf_idx) { + if (buffers[buf_idx] == nullptr) { + continue; + } + // Keep references to original buffers to prevent premature deletion + context.arrow_refs.emplace_back(buffers[buf_idx]); + context.size += buffers[buf_idx]->capacity(); + // Create hijacked buffer with custom deleter that notifies us when + // Arrow is finished with the buffer + std::shared_ptr hijacked_buffer( + buffers[buf_idx].get(), ArrowBufferDeleter(this, buffer_id)); + buffers[buf_idx] = hijacked_buffer; + } + } + + return arrow::Status::OK(); +} + +ParquetBufferContextHandle ParquetBufferPool::acquire_buffer( + ParquetBufferID buffer_id) { + std::shared_ptr arrow{nullptr}; + { + std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter != table_.end()) { + arrow = acquire(buffer_id); + if (arrow != nullptr) { + return ParquetBufferContextHandle(buffer_id, arrow); + } + } + } + { + std::unique_lock lock(table_mutex_); + { + bool found = MemoryLimitPool::get_instance().try_acquire_parquet(0); + if (!found) { + for (int i = 0; i < 5; i++) { + LRUCache::get_instance().recycle(); + found = MemoryLimitPool::get_instance().try_acquire_parquet(0); + if (found) { + break; + } + } + } + if (!found) { + LOG_ERROR("Failed to acquire parquet buffer"); + return ParquetBufferContextHandle(); + } + } + if (acquire(buffer_id, table_[buffer_id]).ok()) { + arrow = set_block_acquired(buffer_id); + return ParquetBufferContextHandle(buffer_id, arrow); + } else { + LOG_ERROR("Failed to acquire parquet buffer"); + return ParquetBufferContextHandle(); + } + } +} + +std::shared_ptr ParquetBufferPool::set_block_acquired( + ParquetBufferID buffer_id) { + ParquetBufferContext &context = table_[buffer_id]; + while (true) { + int current_count = context.ref_count.load(std::memory_order_relaxed); + if (current_count >= 0) { + if (context.ref_count.compare_exchange_weak( + current_count, current_count + 1, std::memory_order_acq_rel, + std::memory_order_acquire)) { + return context.arrow; + } + } else { + if (context.ref_count.compare_exchange_weak(current_count, 1, + std::memory_order_acq_rel, + std::memory_order_acquire)) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + return context.arrow; + } + } + } +} + +std::shared_ptr ParquetBufferPool::acquire( + ParquetBufferID buffer_id) { + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return nullptr; + } + ParquetBufferContext &context = table_[buffer_id]; + while (true) { + int current_count = context.ref_count.load(std::memory_order_acquire); + if (current_count < 0) { + return nullptr; + } + if (context.ref_count.compare_exchange_weak( + current_count, current_count + 1, std::memory_order_acq_rel, + std::memory_order_acquire)) { + if (current_count == 0) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + } + return context.arrow; + } + } + return nullptr; +} + +std::shared_ptr ParquetBufferPool::acquire_one( + ParquetBufferID buffer_id) { + std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return nullptr; + } + ParquetBufferContext &context = table_[buffer_id]; + while (true) { + int current_count = context.ref_count.load(std::memory_order_acquire); + if (current_count < 0) { + return nullptr; + } + if (context.ref_count.compare_exchange_weak( + current_count, current_count + 1, std::memory_order_acq_rel, + std::memory_order_acquire)) { + if (current_count == 0) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + } + return context.arrow; + } + } +} + +void ParquetBufferPool::release(ParquetBufferID buffer_id) { + std::shared_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return; + } + ParquetBufferContext &context = table_[buffer_id]; + if (context.ref_count.fetch_sub(1, std::memory_order_release) == 1) { + std::atomic_thread_fence(std::memory_order_acquire); + LRUCache::BlockType block; + // TODO: set block + LRUCache::get_instance().add_single_block(block, 0); + } +} + +void ParquetBufferPool::evict(ParquetBufferID buffer_id) { + std::unique_lock lock(table_mutex_); + auto iter = table_.find(buffer_id); + if (iter == table_.end()) { + return; + } + ParquetBufferContext &context = table_[buffer_id]; + int expected = 0; + if (context.ref_count.compare_exchange_strong( + expected, std::numeric_limits::min())) { + MemoryLimitPool::get_instance().release_parquet(context.size); + table_.erase(buffer_id); + } +} + +} // namespace ailego +} // namespace zvec \ No newline at end of file diff --git a/src/db/index/storage/bufferpool_forward_store.cc b/src/db/index/storage/bufferpool_forward_store.cc index ae96316ef..6e2ef4851 100644 --- a/src/db/index/storage/bufferpool_forward_store.cc +++ b/src/db/index/storage/bufferpool_forward_store.cc @@ -194,15 +194,10 @@ TablePtr BufferPoolForwardStore::fetch(const std::vector &columns, for (size_t i = 0; i < col_indices.size(); ++i) { int col_idx = col_indices[i]; auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); - // ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); - // auto buffer_handle = buf_mgr.acquire(buffer_id); - // buffer_handle.pin_parquet_data(); - std::shared_ptr col_chunked_array{nullptr}; - if (!ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { - LOG_ERROR("Failed to acquire parquet buffer"); - return nullptr; - } - + auto buffer_handle = + ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id); + std::shared_ptr col_chunked_array = + buffer_handle.data(); if (!col_chunked_array) { LOG_ERROR( "Failed to pin parquet data for file: %s, column: %d, row_group: " @@ -326,14 +321,10 @@ ExecBatchPtr BufferPoolForwardStore::fetch( for (size_t i = 0; i < col_indices.size(); ++i) { int col_idx = col_indices[i]; auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); - std::shared_ptr col_chunked_array{nullptr}; - if (!ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { - LOG_ERROR("Failed to acquire parquet buffer"); - return nullptr; - } - // ailego::BufferID::ParquetID(file_path_, col_idx, rg_id); - // auto buffer_handle = buf_mgr.acquire(buffer_id); - // buffer_handle.pin_parquet_data(); + auto buffer_handle = + ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id); + std::shared_ptr col_chunked_array = + buffer_handle.data(); if (!col_chunked_array) { LOG_ERROR( diff --git a/src/db/index/storage/lazy_record_batch_reader.h b/src/db/index/storage/lazy_record_batch_reader.h index 5a074323f..baccc1409 100644 --- a/src/db/index/storage/lazy_record_batch_reader.h +++ b/src/db/index/storage/lazy_record_batch_reader.h @@ -129,15 +129,11 @@ class ParquetRecordBatchReader : public arrow::RecordBatchReader { if (with_cache_) { auto &buf_mgr = ailego::BufferManager::Instance(); for (size_t col_idx = 0; col_idx < col_indices_.size(); ++col_idx) { - // auto buffer_id = ailego::BufferID::ParquetID( - // file_path_, col_indices_[col_idx], rg_id); - // auto buffer_handle = buf_mgr.acquire(buffer_id); - // auto col_chunked_array = buffer_handle.pin_parquet_data(); - auto buffer_id = ailego::ParquetBufferID(file_path_, col_indices_[col_idx], rg_id); - std::shared_ptr col_chunked_array{nullptr}; - if (!ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id, col_chunked_array)) { - return arrow::Status::Invalid("Failed to acquire parquet buffer"); - } + auto buffer_id = ailego::ParquetBufferID(file_path_, col_idx, rg_id); + auto buffer_handle = + ailego::ParquetBufferPool::get_instance().acquire_buffer(buffer_id); + std::shared_ptr col_chunked_array = + buffer_handle.data(); if (col_chunked_array) { std::shared_ptr concat; auto concat_result = arrow::Concatenate(col_chunked_array->chunks(), diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index a7f57958c..ef8c18e25 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -39,19 +39,8 @@ struct ParquetBufferID { int row_group; uint64_t file_id; long mtime; - ParquetBufferID(std::string &filename, int column, int row_group) - : filename(filename), column(column), row_group(row_group) { - struct stat file_stat; - if (stat(filename.c_str(), &file_stat) == 0) { - // file_stat.st_ino contains the inode number - // file_stat.st_dev contains the device ID - // Together they uniquely identify a file - file_id = file_stat.st_ino; - std::filesystem::path p(filename); - auto ftime = std::filesystem::last_write_time(p); - mtime = static_cast(ftime.time_since_epoch().count()); - } - } + ParquetBufferID() {} + ParquetBufferID(std::string &filename, int column, int row_group); }; struct IDHash { @@ -79,22 +68,43 @@ struct IDEqual { } }; +struct ParquetBufferContext { + // A shared pointer to the buffers allocated for arrow parquet data + std::shared_ptr arrow{nullptr}; -class ParquetBufferPool { - public: - typedef std::shared_ptr Pointer; + // Guard original arrow buffers to prevent premature deletion + std::vector> arrow_refs{}; - struct ParquetBufferContext { - // A shared pointer to the buffers allocated for arrow parquet data - std::shared_ptr arrow{nullptr}; + size_t size; + alignas(64) std::atomic ref_count{std::numeric_limits::min()}; + alignas(64) std::atomic load_count{0}; +}; - // Guard original arrow buffers to prevent premature deletion - std::vector> arrow_refs{}; +class ParquetBufferContextHandle { + public: + ParquetBufferContextHandle() {} + ParquetBufferContextHandle(ParquetBufferID &buffer_id, + std::shared_ptr arrow) + : buffer_id_(buffer_id), arrow_(arrow) {} + ParquetBufferContextHandle(const ParquetBufferContextHandle &handle_); + ParquetBufferContextHandle(ParquetBufferContextHandle &&handle_) + : buffer_id_(std::move(handle_.buffer_id_)), + arrow_(std::move(handle_.arrow_)) {} + + ~ParquetBufferContextHandle(); + + std::shared_ptr data() { + return arrow_; + } - size_t size; - alignas(64) std::atomic ref_count{std::numeric_limits::min()}; - alignas(64) std::atomic load_count{0}; - }; + private: + ParquetBufferID buffer_id_; + std::shared_ptr arrow_{nullptr}; +}; + +class ParquetBufferPool { + public: + typedef std::shared_ptr Pointer; struct ArrowBufferDeleter { explicit ArrowBufferDeleter(ParquetBufferPool *c, ParquetBufferID i) @@ -112,166 +122,20 @@ class ParquetBufferPool { IDHash, IDEqual>; arrow::Status acquire(ParquetBufferID buffer_id, - ParquetBufferContext &context) { - // TODO: file handler and memory pool can be optimized - arrow::MemoryPool *mem_pool = arrow::default_memory_pool(); - - // Open file - std::shared_ptr input; - const auto &file_name = buffer_id.filename; - ARROW_ASSIGN_OR_RAISE(input, arrow::io::ReadableFile::Open(file_name)); - - // Open reader - std::unique_ptr reader; - ARROW_ASSIGN_OR_RAISE(reader, parquet::arrow::OpenFile(input, mem_pool)); - - // Perform read - int row_group = buffer_id.row_group; - int column = buffer_id.column; - auto s = reader->RowGroup(row_group)->Column(column)->Read(&context.arrow); - if (!s.ok()) { - LOG_ERROR("Failed to read parquet file[%s]", file_name.c_str()); - context.arrow = nullptr; - return s; - } - - context.size = 0; - context.arrow_refs.clear(); - // Compute the memory usage and hijack Arrow's buffers with our - // implementation - for (auto &array : context.arrow->chunks()) { - auto &buffers = array->data()->buffers; - for (size_t buf_idx = 0; buf_idx < buffers.size(); ++buf_idx) { - if (buffers[buf_idx] == nullptr) { - continue; - } - // Keep references to original buffers to prevent premature deletion - context.arrow_refs.emplace_back(buffers[buf_idx]); - context.size += buffers[buf_idx]->capacity(); - // Create hijacked buffer with custom deleter that notifies us when - // Arrow is finished with the buffer - std::shared_ptr hijacked_buffer( - buffers[buf_idx].get(), ArrowBufferDeleter(this, buffer_id)); - buffers[buf_idx] = hijacked_buffer; - } - } - - return arrow::Status::OK(); - } + ParquetBufferContext &context); - bool acquire_buffer(ParquetBufferID buffer_id, - std::shared_ptr &arrow) { - { - std::shared_lock lock(table_mutex_); - auto iter = table_.find(buffer_id); - if (iter != table_.end()) { - arrow = acquire(buffer_id); - if (arrow != nullptr) { - return true; - } - } - } - { - std::unique_lock lock(table_mutex_); - { - bool found = MemoryLimitPool::get_instance().try_acquire_parquet(0); - if (!found) { - for (int i = 0; i < 5; i++) { - LRUCache::get_instance().recycle(); - found = MemoryLimitPool::get_instance().try_acquire_parquet(0); - if (found) { - break; - } - } - } - if (!found) { - LOG_ERROR("Failed to acquire parquet buffer"); - return false; - } - } - if (acquire(buffer_id, table_[buffer_id]).ok()) { - arrow = set_block_acquired(buffer_id); - return true; - } else { - LOG_ERROR("Failed to acquire parquet buffer"); - return false; - } - } - } + ParquetBufferContextHandle acquire_buffer(ParquetBufferID buffer_id); std::shared_ptr set_block_acquired( - ParquetBufferID buffer_id) { - ParquetBufferContext &context = table_[buffer_id]; - while (true) { - int current_count = context.ref_count.load(std::memory_order_relaxed); - if (current_count >= 0) { - if (context.ref_count.compare_exchange_weak( - current_count, current_count + 1, std::memory_order_acq_rel, - std::memory_order_acquire)) { - return context.arrow; - } - } else { - if (context.ref_count.compare_exchange_weak( - current_count, 1, std::memory_order_acq_rel, - std::memory_order_acquire)) { - context.load_count.fetch_add(1, std::memory_order_relaxed); - return context.arrow; - } - } - } - } - std::shared_ptr acquire(ParquetBufferID buffer_id) { - auto iter = table_.find(buffer_id); - if (iter == table_.end()) { - return nullptr; - } - ParquetBufferContext &context = table_[buffer_id]; - while (true) { - int current_count = context.ref_count.load(std::memory_order_acquire); - if (current_count < 0) { - return nullptr; - } - if (context.ref_count.compare_exchange_weak( - current_count, current_count + 1, std::memory_order_acq_rel, - std::memory_order_acquire)) { - if (current_count == 0) { - context.load_count.fetch_add(1, std::memory_order_relaxed); - } - return context.arrow; - } - } - } + ParquetBufferID buffer_id); - void release(ParquetBufferID buffer_id) { - std::shared_lock lock(table_mutex_); - auto iter = table_.find(buffer_id); - if (iter == table_.end()) { - return; - } - ParquetBufferContext &context = table_[buffer_id]; - if (context.ref_count.fetch_sub(1, std::memory_order_release) == 1) { - std::atomic_thread_fence(std::memory_order_acquire); - LRUCache::BlockType block; - // TODO: set block - LRUCache::get_instance().add_single_block(block, 0); - } - } + std::shared_ptr acquire(ParquetBufferID buffer_id); - void evict(ParquetBufferID buffer_id) { - std::unique_lock lock(table_mutex_); - auto iter = table_.find(buffer_id); - if (iter == table_.end()) { - return; - } - ParquetBufferContext &context = table_[buffer_id]; - int expected = 0; - if (context.ref_count.compare_exchange_strong( - expected, std::numeric_limits::min())) { - MemoryLimitPool::get_instance().release_parquet(context.size); - table_.erase(buffer_id); - } - } + std::shared_ptr acquire_one(ParquetBufferID buffer_id); + + void release(ParquetBufferID buffer_id); + void evict(ParquetBufferID buffer_id); static ParquetBufferPool &get_instance() { static ParquetBufferPool instance; diff --git a/tests/db/index/segment/segment_test.cc b/tests/db/index/segment/segment_test.cc index 422a61b24..a3267fd9e 100644 --- a/tests/db/index/segment/segment_test.cc +++ b/tests/db/index/segment/segment_test.cc @@ -50,7 +50,6 @@ class SegmentTest : public testing::TestWithParam { FileHelper::RemoveDirectory(col_path); FileHelper::CreateDirectory(col_path); - ailego::BufferManager::Instance().init(MIN_MEMORY_LIMIT_BYTES, 1); zvec::ailego::MemoryLimitPool::get_instance().init(MIN_MEMORY_LIMIT_BYTES); std::string idmap_path = From e5febfa3d6c597a0a0dbf5c7e21f6e58e6ac7319 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 13:13:08 +0800 Subject: [PATCH 20/46] fix ut --- tests/db/index/storage/bufferpool_store_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/db/index/storage/bufferpool_store_test.cc b/tests/db/index/storage/bufferpool_store_test.cc index 9d4ba1881..3ea9024c1 100644 --- a/tests/db/index/storage/bufferpool_store_test.cc +++ b/tests/db/index/storage/bufferpool_store_test.cc @@ -34,7 +34,7 @@ class BufferPoolStoreTest : public testing::Test { std::cout << "err: " << s.message() << std::endl; exit(1); } - ailego::BufferManager::Instance().init(10 * 1024 * 1024, 1); + zvec::ailego::MemoryLimitPool::get_instance().init(10 * 1024 * 1024); } void TearDown() override { From 629dc6b6720382746880624f0bd7a8f15118310b Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 14:18:37 +0800 Subject: [PATCH 21/46] upd --- src/ailego/buffer/lru_cache.cc | 23 ++++++++++++++++--- src/ailego/buffer/parquet_buffer_pool.cc | 11 +++++++++ src/db/common/global_resource.cc | 5 ++-- src/db/index/segment/segment.cc | 8 +++---- src/include/zvec/ailego/buffer/lru_cache.h | 11 +++++++++ .../zvec/ailego/buffer/parquet_buffer_pool.h | 12 ++-------- 6 files changed, 51 insertions(+), 19 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 552aee195..8937512d0 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -1,4 +1,5 @@ #include +#include #include namespace zvec { @@ -37,7 +38,11 @@ bool LRUCache::evict_block(BlockType &item) { bool LRUCache::recycle() { BlockType item; while (MemoryLimitPool::get_instance().is_full() && evict_block(item)) { - item.lp_map->evict_block(item.block.first); + if (item.lp_map) { + item.lp_map->evict_block(item.block.first); + } else { + ParquetBufferPool::get_instance().evict(item.parquet_buffer_block.first); + } } return MemoryLimitPool::get_instance().is_full(); } @@ -65,14 +70,26 @@ void LRUCache::clear_dead_node() { ConcurrentQueue tmp; BlockType item; while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { - if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { + if (item.lp_map == nullptr) { + if (ParquetBufferPool::get_instance().is_dead_node(item)) { + if (!tmp.enqueue(item)) { + LOG_ERROR("enqueue failed."); + } + } + } else if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { if (!tmp.enqueue(item)) { LOG_ERROR("enqueue failed."); } } } while (tmp.try_dequeue(item)) { - if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { + if (item.lp_map == nullptr) { + if (ParquetBufferPool::get_instance().is_dead_node(item)) { + if (!tmp.enqueue(item)) { + LOG_ERROR("enqueue failed."); + } + } + } else if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { if (!queues_[i].enqueue(item)) { LOG_ERROR("enqueue failed."); } diff --git a/src/ailego/buffer/parquet_buffer_pool.cc b/src/ailego/buffer/parquet_buffer_pool.cc index 69db539bb..995427f71 100644 --- a/src/ailego/buffer/parquet_buffer_pool.cc +++ b/src/ailego/buffer/parquet_buffer_pool.cc @@ -222,6 +222,8 @@ void ParquetBufferPool::release(ParquetBufferID buffer_id) { if (context.ref_count.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); LRUCache::BlockType block; + block.parquet_buffer_block.first = buffer_id; + block.parquet_buffer_block.second = context.load_count.load(); // TODO: set block LRUCache::get_instance().add_single_block(block, 0); } @@ -242,5 +244,14 @@ void ParquetBufferPool::evict(ParquetBufferID buffer_id) { } } +bool ParquetBufferPool::is_dead_node(LRUCache::BlockType &block) { + std::unique_lock lock(table_mutex_); + auto iter = table_.find(block.parquet_buffer_block.first); + if (iter == table_.end()) { + return true; + } + return iter->second.load_count.load() != block.parquet_buffer_block.second; +} + } // namespace ailego } // namespace zvec \ No newline at end of file diff --git a/src/db/common/global_resource.cc b/src/db/common/global_resource.cc index 2f4ad1ca7..d0baf38c3 100644 --- a/src/db/common/global_resource.cc +++ b/src/db/common/global_resource.cc @@ -14,6 +14,7 @@ #include "db/common/global_resource.h" #include #include +#include #include namespace zvec { @@ -25,8 +26,8 @@ void GlobalResource::initialize() { new ailego::ThreadPool(GlobalConfig::Instance().query_thread_count())); this->optimize_thread_pool_.reset(new ailego::ThreadPool( GlobalConfig::Instance().optimize_thread_count())); - ailego::BufferManager::Instance().init( - GlobalConfig::Instance().memory_limit_bytes(), 1); + zvec::ailego::MemoryLimitPool::get_instance().init( + GlobalConfig::Instance().memory_limit_bytes()); }); } diff --git a/src/db/index/segment/segment.cc b/src/db/index/segment/segment.cc index 821d236e3..34894d18d 100644 --- a/src/db/index/segment/segment.cc +++ b/src/db/index/segment/segment.cc @@ -3415,8 +3415,8 @@ Status SegmentImpl::alter_column(const std::string &column_name, } if (!options_.enable_mmap_) { - ailego::BufferManager::Instance().init( - GlobalConfig::Instance().memory_limit_bytes(), 1); + zvec::ailego::MemoryLimitPool::get_instance().init( + GlobalConfig::Instance().memory_limit_bytes()); } // delete single column store file @@ -3510,8 +3510,8 @@ Status SegmentImpl::drop_column(const std::string &column_name) { } if (!options_.enable_mmap_) { - ailego::BufferManager::Instance().init( - GlobalConfig::Instance().memory_limit_bytes(), 1); + zvec::ailego::MemoryLimitPool::get_instance().init( + GlobalConfig::Instance().memory_limit_bytes()); } // delete single column store file diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 79e03b693..160f93391 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -34,10 +34,21 @@ class LPMap; using block_id_t = size_t; using version_t = size_t; +struct ParquetBufferID { + std::string filename; + int column; + int row_group; + uint64_t file_id; + long mtime; + ParquetBufferID() {} + ParquetBufferID(std::string &filename, int column, int row_group); +}; + class LRUCache { public: struct BlockType { std::pair block; + std::pair parquet_buffer_block; LPMap *lp_map; }; typedef moodycamel::ConcurrentQueue ConcurrentQueue; diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index ef8c18e25..877c25e2b 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -33,16 +33,6 @@ using version_t = size_t; class LRUCache; -struct ParquetBufferID { - std::string filename; - int column; - int row_group; - uint64_t file_id; - long mtime; - ParquetBufferID() {} - ParquetBufferID(std::string &filename, int column, int row_group); -}; - struct IDHash { size_t operator()(const ParquetBufferID &buffer_id) const { size_t hash = std::hash{}(1); @@ -137,6 +127,8 @@ class ParquetBufferPool { void evict(ParquetBufferID buffer_id); + bool is_dead_node(LRUCache::BlockType &block); + static ParquetBufferPool &get_instance() { static ParquetBufferPool instance; return instance; From e70840205be59fcb8ec825118bf0bbf42de288f3 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 14:36:40 +0800 Subject: [PATCH 22/46] upd --- src/include/zvec/ailego/container/heap.h | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/include/zvec/ailego/container/heap.h b/src/include/zvec/ailego/container/heap.h index fce03674d..33f4cb410 100644 --- a/src/include/zvec/ailego/container/heap.h +++ b/src/include/zvec/ailego/container/heap.h @@ -91,6 +91,9 @@ class Heap : public TBase { //! Pop the front element void pop(void) { + if (TBase::empty()) { + return; + } if (TBase::size() > 1) { auto last = TBase::end() - 1; this->replace_heap(TBase::begin(), last, std::move(*last)); From eeb55ad7ce2f8f03db44270d3572ac2d5d2ef01d Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 16:41:30 +0800 Subject: [PATCH 23/46] upd --- src/ailego/buffer/lru_cache.cc | 17 ++++++----- src/ailego/buffer/parquet_buffer_pool.cc | 33 +++++++++++----------- src/core/utility/buffer_storage.cc | 2 +- src/include/zvec/ailego/buffer/lru_cache.h | 4 +-- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 8937512d0..837619fc9 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -31,6 +31,13 @@ bool LRUCache::evict_block(BlockType &item) { if (!ok) { return false; } + if (item.lp_map == nullptr) { + if (!ParquetBufferPool::get_instance().is_dead_node(item)) { + break; + } else { + continue; + } + } } while (!is_valid(item.lp_map) || item.lp_map->isDeadBlock(item)); return ok; } @@ -99,8 +106,9 @@ void LRUCache::clear_dead_node() { } int MemoryLimitPool::init(size_t pool_size) { + pool_size_ = 0; + LRUCache::get_instance().recycle(); pool_size_ = pool_size; - used_size_ = 0; return 0; } @@ -119,17 +127,12 @@ bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, return true; } -bool MemoryLimitPool::try_acquire_parquet(const size_t buffer_size) { +void MemoryLimitPool::acquire_parquet(const size_t buffer_size) { size_t expected, desired; do { expected = used_size_.load(); - if (expected >= pool_size_) { - // LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); - return false; - } desired = expected + buffer_size; } while (!used_size_.compare_exchange_weak(expected, desired)); - return true; } void MemoryLimitPool::release_buffer(char *buffer, const size_t buffer_size) { diff --git a/src/ailego/buffer/parquet_buffer_pool.cc b/src/ailego/buffer/parquet_buffer_pool.cc index 995427f71..e1b03e2d8 100644 --- a/src/ailego/buffer/parquet_buffer_pool.cc +++ b/src/ailego/buffer/parquet_buffer_pool.cc @@ -115,24 +115,23 @@ ParquetBufferContextHandle ParquetBufferPool::acquire_buffer( } } { - std::unique_lock lock(table_mutex_); - { - bool found = MemoryLimitPool::get_instance().try_acquire_parquet(0); - if (!found) { - for (int i = 0; i < 5; i++) { - LRUCache::get_instance().recycle(); - found = MemoryLimitPool::get_instance().try_acquire_parquet(0); - if (found) { - break; - } + bool found = !MemoryLimitPool::get_instance().is_full(); + if (!found) { + for (int i = 0; i < 5; i++) { + LRUCache::get_instance().recycle(); + found = !MemoryLimitPool::get_instance().is_full(); + if (found) { + break; } } - if (!found) { - LOG_ERROR("Failed to acquire parquet buffer"); - return ParquetBufferContextHandle(); - } } + if (!found) { + LOG_ERROR("Failed to acquire parquet buffer"); + return ParquetBufferContextHandle(); + } + std::unique_lock lock(table_mutex_); if (acquire(buffer_id, table_[buffer_id]).ok()) { + MemoryLimitPool::get_instance().acquire_parquet(table_[buffer_id].size); arrow = set_block_acquired(buffer_id); return ParquetBufferContextHandle(buffer_id, arrow); } else { @@ -224,7 +223,6 @@ void ParquetBufferPool::release(ParquetBufferID buffer_id) { LRUCache::BlockType block; block.parquet_buffer_block.first = buffer_id; block.parquet_buffer_block.second = context.load_count.load(); - // TODO: set block LRUCache::get_instance().add_single_block(block, 0); } } @@ -240,12 +238,13 @@ void ParquetBufferPool::evict(ParquetBufferID buffer_id) { if (context.ref_count.compare_exchange_strong( expected, std::numeric_limits::min())) { MemoryLimitPool::get_instance().release_parquet(context.size); - table_.erase(buffer_id); + context.arrow = nullptr; + context.arrow_refs.clear(); } } bool ParquetBufferPool::is_dead_node(LRUCache::BlockType &block) { - std::unique_lock lock(table_mutex_); + std::shared_lock lock(table_mutex_); auto iter = table_.find(block.parquet_buffer_block.first); if (iter == table_.end()) { return true; diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc index a20a03160..348ada996 100644 --- a/src/core/utility/buffer_storage.cc +++ b/src/core/utility/buffer_storage.cc @@ -176,7 +176,7 @@ class BufferStorage : public IndexStorage { //! Initialize storage int init(const ailego::Params ¶ms) override { params.get(BUFFER_STORAGE_MEMORY_SIZE, &buffer_size_); - LOG_INFO("buffer size: %lu", buffer_size_); + // LOG_INFO("buffer size: %lu", buffer_size_); return 0; } diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 160f93391..7687339c3 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -49,7 +49,7 @@ class LRUCache { struct BlockType { std::pair block; std::pair parquet_buffer_block; - LPMap *lp_map; + LPMap *lp_map{nullptr}; }; typedef moodycamel::ConcurrentQueue ConcurrentQueue; @@ -118,7 +118,7 @@ class MemoryLimitPool { bool try_acquire_buffer(const size_t buffer_size, char *&buffer); - bool try_acquire_parquet(const size_t buffer_size); + void acquire_parquet(const size_t buffer_size); void release_buffer(char *buffer, const size_t buffer_size); From 4d627940dc5f683a1f017cd3f10a61191f18d313 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 17:03:07 +0800 Subject: [PATCH 24/46] fix --- src/ailego/buffer/lru_cache.cc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 837619fc9..220faba2c 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -78,7 +78,7 @@ void LRUCache::clear_dead_node() { BlockType item; while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { if (item.lp_map == nullptr) { - if (ParquetBufferPool::get_instance().is_dead_node(item)) { + if (!ParquetBufferPool::get_instance().is_dead_node(item)) { if (!tmp.enqueue(item)) { LOG_ERROR("enqueue failed."); } @@ -91,8 +91,8 @@ void LRUCache::clear_dead_node() { } while (tmp.try_dequeue(item)) { if (item.lp_map == nullptr) { - if (ParquetBufferPool::get_instance().is_dead_node(item)) { - if (!tmp.enqueue(item)) { + if (!ParquetBufferPool::get_instance().is_dead_node(item)) { + if (!queues_[i].enqueue(item)) { LOG_ERROR("enqueue failed."); } } From e2190cc7825e87064ff76700d6da5a5fddf6c262 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 20:07:27 +0800 Subject: [PATCH 25/46] fix --- src/ailego/buffer/lru_cache.cc | 26 +++++--------------------- 1 file changed, 5 insertions(+), 21 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 220faba2c..594c69b5e 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -8,7 +8,7 @@ namespace ailego { int LRUCache::init() { block_size_ = 512; for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - queues_.push_back(ConcurrentQueue()); + queues_.push_back(ConcurrentQueue(block_size_ * 20)); } return 0; } @@ -74,32 +74,16 @@ void LRUCache::clear_dead_node() { continue; } size_t clear_count = 0; - ConcurrentQueue tmp; BlockType item; while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { if (item.lp_map == nullptr) { if (!ParquetBufferPool::get_instance().is_dead_node(item)) { - if (!tmp.enqueue(item)) { - LOG_ERROR("enqueue failed."); - } + queues_[i].enqueue(item); + break; } } else if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { - if (!tmp.enqueue(item)) { - LOG_ERROR("enqueue failed."); - } - } - } - while (tmp.try_dequeue(item)) { - if (item.lp_map == nullptr) { - if (!ParquetBufferPool::get_instance().is_dead_node(item)) { - if (!queues_[i].enqueue(item)) { - LOG_ERROR("enqueue failed."); - } - } - } else if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { - if (!queues_[i].enqueue(item)) { - LOG_ERROR("enqueue failed."); - } + queues_[i].enqueue(item); + break; } } } From 8ebc279aac46ec9feacf2783810a0b147baacc7b Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 22:24:39 +0800 Subject: [PATCH 26/46] upd --- src/ailego/buffer/buffer_pool.cc | 35 +++++++++++++++++--- src/ailego/buffer/lru_cache.cc | 32 +++++++++++++----- src/include/zvec/ailego/buffer/buffer_pool.h | 1 + src/include/zvec/ailego/buffer/lru_cache.h | 7 +++- 4 files changed, 60 insertions(+), 15 deletions(-) diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc index 72d7c0338..593c7f6e5 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/buffer_pool.cc @@ -32,6 +32,7 @@ void LPMap::init(size_t entry_num) { for (size_t i = 0; i < entry_num_; i++) { entries_[i].ref_count.store(std::numeric_limits::min()); entries_[i].load_count.store(0); + entries_[i].in_lru_version.store(0); entries_[i].buffer = nullptr; } } @@ -53,6 +54,27 @@ char *LPMap::acquire_block(block_id_t block_id) { return entry.buffer; } } + if (MemoryLimitPool::get_instance().is_hot_level2()) { + for (int i = 0; i < entry_num_; i++) { + Entry &entry_hot = entries_[i]; + while (true) { + int current = entry_hot.in_lru_version.load(std::memory_order_relaxed); + int expected = entry_hot.load_count.load(std::memory_order_relaxed); + if (current == expected) { + break; + } + if (entry_hot.ref_count.compare_exchange_weak( + current, expected, std::memory_order_acq_rel, + std::memory_order_acquire)) { + LRUCache::BlockType block; + block.lp_map = this; + block.block.first = i; + block.block.second = expected; + LRUCache::get_instance().add_single_block(block, 0); + } + } + } + } } void LPMap::release_block(block_id_t block_id) { @@ -61,11 +83,14 @@ void LPMap::release_block(block_id_t block_id) { if (entry.ref_count.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); - LRUCache::BlockType block; - block.lp_map = this; - block.block.first = block_id; - block.block.second = entry.load_count.load(); - LRUCache::get_instance().add_single_block(block, 0); + if (MemoryLimitPool::get_instance().is_hot_level1()) { + LRUCache::BlockType block; + block.lp_map = this; + block.block.first = block_id; + block.block.second = entry.load_count.load(); + entry.in_lru_version = entry.load_count.load(); + LRUCache::get_instance().add_single_block(block, 0); + } } } diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 594c69b5e..e7857dac9 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -8,7 +8,7 @@ namespace ailego { int LRUCache::init() { block_size_ = 512; for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - queues_.push_back(ConcurrentQueue(block_size_ * 20)); + queues_.push_back(ConcurrentQueue(block_size_ * 200)); } return 0; } @@ -60,32 +60,38 @@ bool LRUCache::add_single_block(const BlockType &block, int block_type) { LOG_ERROR("enqueue failed."); return false; } - evict_queue_insertions_.fetch_add(1, std::memory_order_relaxed); - if (evict_queue_insertions_ % block_size_ == 0) { + static thread_local int evict_queue_insertions = 0; + if (evict_queue_insertions++ > block_size_) { this->clear_dead_node(); + evict_queue_insertions = 0; } return true; } void LRUCache::clear_dead_node() { for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - size_t clear_size = block_size_ * 2; - if (queues_[i].size_approx() < clear_size * 4) { + size_t clear_size = block_size_; + if (queues_[i].size_approx() < block_size_) { continue; } + if (queues_[i].size_approx() > block_size_ * 8) { + clear_size *= 2; + } size_t clear_count = 0; BlockType item; + ConcurrentQueue tmp_queue(block_size_ * 200); while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { if (item.lp_map == nullptr) { if (!ParquetBufferPool::get_instance().is_dead_node(item)) { - queues_[i].enqueue(item); - break; + tmp_queue.enqueue(item); } } else if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { - queues_[i].enqueue(item); - break; + tmp_queue.enqueue(item); } } + while (tmp_queue.try_dequeue(item)) { + queues_[i].enqueue(item); + } } } @@ -140,5 +146,13 @@ bool MemoryLimitPool::is_full() { return used_size_.load() >= pool_size_; } +bool MemoryLimitPool::is_hot_level1() { + return used_size_.load() >= pool_size_ * 3 / 5; +} + +bool MemoryLimitPool::is_hot_level2() { + return used_size_.load() >= pool_size_ * 4 / 5; +} + } // namespace ailego } // namespace zvec \ No newline at end of file diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/buffer_pool.h index f814e9b34..d073e5d9c 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/buffer_pool.h @@ -34,6 +34,7 @@ class LPMap { struct Entry { alignas(64) std::atomic ref_count; alignas(64) std::atomic load_count; + alignas(64) std::atomic in_lru_version; char *buffer; size_t size; }; diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 7687339c3..6edcbe7c0 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -98,7 +98,6 @@ class LRUCache { constexpr static size_t CATCH_QUEUE_NUM = 3; size_t block_size_{0}; std::vector queues_; - alignas(64) std::atomic evict_queue_insertions_{0}; std::unordered_set valid_lp_maps_; std::shared_mutex valid_lp_maps_mutex_; }; @@ -126,6 +125,12 @@ class MemoryLimitPool { bool is_full(); + bool is_hot(); + + bool is_hot_level1(); + + bool is_hot_level2(); + private: MemoryLimitPool() = default; From 7a464853b54c0db8965c562710828a5d384f2aa7 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 22:26:58 +0800 Subject: [PATCH 27/46] clang format --- src/ailego/buffer/lru_cache.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index e7857dac9..5ffa67cc4 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -147,7 +147,7 @@ bool MemoryLimitPool::is_full() { } bool MemoryLimitPool::is_hot_level1() { - return used_size_.load() >= pool_size_ * 3 / 5; + return used_size_.load() >= pool_size_ * 3 / 5; } bool MemoryLimitPool::is_hot_level2() { From 16f170b2dd37fc24c2d2f4f097f809a758c21164 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 7 Apr 2026 22:36:03 +0800 Subject: [PATCH 28/46] upd --- src/ailego/buffer/buffer_pool.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/buffer_pool.cc index 593c7f6e5..3ee80b5f0 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/buffer_pool.cc @@ -57,6 +57,9 @@ char *LPMap::acquire_block(block_id_t block_id) { if (MemoryLimitPool::get_instance().is_hot_level2()) { for (int i = 0; i < entry_num_; i++) { Entry &entry_hot = entries_[i]; + if (entry_hot.ref_count.load() != 0) { + continue; + } while (true) { int current = entry_hot.in_lru_version.load(std::memory_order_relaxed); int expected = entry_hot.load_count.load(std::memory_order_relaxed); From 8a00602ee005d211e5d1ce0c0fcd703044d598f6 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 8 Apr 2026 16:12:46 +0800 Subject: [PATCH 29/46] decrease memory in FlatStreamerTest.TestLinearSearchWithLRU --- tests/core/algorithm/flat/flat_streamer_buffer_test.cc | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/core/algorithm/flat/flat_streamer_buffer_test.cc b/tests/core/algorithm/flat/flat_streamer_buffer_test.cc index 10308da9e..d74c277e6 100644 --- a/tests/core/algorithm/flat/flat_streamer_buffer_test.cc +++ b/tests/core/algorithm/flat/flat_streamer_buffer_test.cc @@ -169,7 +169,7 @@ TEST_F(FlatStreamerTest, TestLinearSearch) { } TEST_F(FlatStreamerTest, TestLinearSearchWithLRU) { - MemoryLimitPool::get_instance().init(2 * 1024UL * 1024UL * 1024UL); + MemoryLimitPool::get_instance().init(100 * 1024UL * 1024UL); constexpr size_t static dim = 1600; IndexStreamer::Pointer write_streamer = IndexFactory::CreateStreamer("FlatStreamer"); @@ -189,7 +189,7 @@ TEST_F(FlatStreamerTest, TestLinearSearchWithLRU) { auto ctx = write_streamer->create_context(); ASSERT_TRUE(!!ctx); - size_t cnt = 1000000UL; + size_t cnt = 50000UL; IndexQueryMeta qmeta(IndexMeta::DT_FP32, dim); for (size_t i = 0; i < cnt; i++) { NumericalVector vec(dim); From 56603c50f3c6bc1823731b3675ddf8d151fbf3fb Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 8 Apr 2026 16:19:36 +0800 Subject: [PATCH 30/46] fix for pr comments --- src/ailego/buffer/lru_cache.cc | 2 +- .../buffer/{buffer_pool.cc => vector_buffer_pool.cc} | 12 ++++++------ src/core/utility/buffer_storage.cc | 4 ++-- src/include/zvec/ailego/buffer/lru_cache.h | 12 ++++++------ .../buffer/{buffer_pool.h => vector_buffer_pool.h} | 8 ++++---- src/include/zvec/core/framework/index_storage.h | 2 +- 6 files changed, 20 insertions(+), 20 deletions(-) rename src/ailego/buffer/{buffer_pool.cc => vector_buffer_pool.cc} (95%) rename src/include/zvec/ailego/buffer/{buffer_pool.h => vector_buffer_pool.h} (95%) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 5ffa67cc4..9c140721f 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -1,4 +1,4 @@ -#include +#include #include #include diff --git a/src/ailego/buffer/buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc similarity index 95% rename from src/ailego/buffer/buffer_pool.cc rename to src/ailego/buffer/vector_buffer_pool.cc index 3ee80b5f0..1e07e2ba4 100644 --- a/src/ailego/buffer/buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -1,4 +1,4 @@ -#include +#include #include #if defined(_MSC_VER) @@ -23,7 +23,7 @@ static ssize_t zvec_pread(int fd, void *buf, size_t count, size_t offset) { namespace zvec { namespace ailego { -void LPMap::init(size_t entry_num) { +void VectorPageTable::init(size_t entry_num) { if (entries_) { delete[] entries_; } @@ -37,7 +37,7 @@ void LPMap::init(size_t entry_num) { } } -char *LPMap::acquire_block(block_id_t block_id) { +char *VectorPageTable::acquire_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; while (true) { @@ -80,7 +80,7 @@ char *LPMap::acquire_block(block_id_t block_id) { } } -void LPMap::release_block(block_id_t block_id) { +void VectorPageTable::release_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; @@ -97,7 +97,7 @@ void LPMap::release_block(block_id_t block_id) { } } -char *LPMap::evict_block(block_id_t block_id) { +char *VectorPageTable::evict_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; int expected = 0; @@ -114,7 +114,7 @@ char *LPMap::evict_block(block_id_t block_id) { } } -char *LPMap::set_block_acquired(block_id_t block_id, char *buffer, +char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, size_t size) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc index 348ada996..fed61af2d 100644 --- a/src/core/utility/buffer_storage.cc +++ b/src/core/utility/buffer_storage.cc @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include @@ -176,7 +176,7 @@ class BufferStorage : public IndexStorage { //! Initialize storage int init(const ailego::Params ¶ms) override { params.get(BUFFER_STORAGE_MEMORY_SIZE, &buffer_size_); - // LOG_INFO("buffer size: %lu", buffer_size_); + // LOG_DEBUG("buffer size: %lu", buffer_size_); return 0; } diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 6edcbe7c0..a13e702a3 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -29,7 +29,7 @@ namespace zvec { namespace ailego { -class LPMap; +class VectorPageTable; using block_id_t = size_t; using version_t = size_t; @@ -49,7 +49,7 @@ class LRUCache { struct BlockType { std::pair block; std::pair parquet_buffer_block; - LPMap *lp_map{nullptr}; + VectorPageTable *lp_map{nullptr}; }; typedef moodycamel::ConcurrentQueue ConcurrentQueue; @@ -72,17 +72,17 @@ class LRUCache { void clear_dead_node(); - bool is_valid(LPMap *lp_map) { + bool is_valid(VectorPageTable *lp_map) { std::shared_lock lock(valid_lp_maps_mutex_); return valid_lp_maps_.find(lp_map) != valid_lp_maps_.end(); } - void set_valid(LPMap *lp_map) { + void set_valid(VectorPageTable *lp_map) { std::unique_lock lock(valid_lp_maps_mutex_); valid_lp_maps_.insert(lp_map); } - void set_invalid(LPMap *lp_map) { + void set_invalid(VectorPageTable *lp_map) { std::unique_lock lock(valid_lp_maps_mutex_); valid_lp_maps_.erase(lp_map); } @@ -98,7 +98,7 @@ class LRUCache { constexpr static size_t CATCH_QUEUE_NUM = 3; size_t block_size_{0}; std::vector queues_; - std::unordered_set valid_lp_maps_; + std::unordered_set valid_lp_maps_; std::shared_mutex valid_lp_maps_mutex_; }; diff --git a/src/include/zvec/ailego/buffer/buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h similarity index 95% rename from src/include/zvec/ailego/buffer/buffer_pool.h rename to src/include/zvec/ailego/buffer/vector_buffer_pool.h index d073e5d9c..a1a18fa8d 100644 --- a/src/include/zvec/ailego/buffer/buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -30,7 +30,7 @@ namespace ailego { using block_id_t = size_t; using version_t = size_t; -class LPMap { +class VectorPageTable { struct Entry { alignas(64) std::atomic ref_count; alignas(64) std::atomic load_count; @@ -40,10 +40,10 @@ class LPMap { }; public: - LPMap() : entry_num_(0), entries_(nullptr) { + VectorPageTable() : entry_num_(0), entries_(nullptr) { LRUCache::get_instance().set_valid(this); } - ~LPMap() { + ~VectorPageTable() { delete[] entries_; LRUCache::get_instance().set_invalid(this); } @@ -110,7 +110,7 @@ class VecBufferPool { size_t pool_capacity_; public: - LPMap lp_map_; + VectorPageTable lp_map_; private: std::vector> mutex_vec_; diff --git a/src/include/zvec/core/framework/index_storage.h b/src/include/zvec/core/framework/index_storage.h index 8273004a3..18ae1ddcf 100644 --- a/src/include/zvec/core/framework/index_storage.h +++ b/src/include/zvec/core/framework/index_storage.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include From f8cf1948c0665e97e533f11efbce2b175c0b0647 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 8 Apr 2026 16:21:58 +0800 Subject: [PATCH 31/46] clang format --- src/ailego/buffer/lru_cache.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 9c140721f..a4a49933c 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -1,5 +1,5 @@ -#include #include +#include #include namespace zvec { From 9ddb4fa020ce6ae7912e9221159bf48f53dcdb50 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 8 Apr 2026 16:24:05 +0800 Subject: [PATCH 32/46] clang format --- src/ailego/buffer/vector_buffer_pool.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 1e07e2ba4..83d92d582 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -115,7 +115,7 @@ char *VectorPageTable::evict_block(block_id_t block_id) { } char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, - size_t size) { + size_t size) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; entry.size = size; From 04f8b91a6a5a88db9d54f35d2d112f544be1ab39 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Thu, 9 Apr 2026 21:10:48 +0800 Subject: [PATCH 33/46] add TODO --- src/include/zvec/ailego/buffer/lru_cache.h | 1 + 1 file changed, 1 insertion(+) diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index a13e702a3..9de83b520 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -47,6 +47,7 @@ struct ParquetBufferID { class LRUCache { public: struct BlockType { + // TODO: lp_map & block std::pair block; std::pair parquet_buffer_block; VectorPageTable *lp_map{nullptr}; From ec4666d8a3577da4c9cdbd151b52398d7247b6d2 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 10 Apr 2026 17:52:50 +0800 Subject: [PATCH 34/46] rename --- src/ailego/buffer/lru_cache.cc | 48 +++++++++---------- src/ailego/buffer/vector_buffer_pool.cc | 46 +++++++++--------- src/include/zvec/ailego/buffer/lru_cache.h | 36 +++++++------- .../zvec/ailego/buffer/vector_buffer_pool.h | 18 +++---- 4 files changed, 74 insertions(+), 74 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index a4a49933c..9e2d736d1 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -6,17 +6,17 @@ namespace zvec { namespace ailego { int LRUCache::init() { - block_size_ = 512; - for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - queues_.push_back(ConcurrentQueue(block_size_ * 200)); + evict_batch_size_ = 512; + for (size_t i = 0; i < CACHE_QUEUE_NUM; i++) { + evict_queues_.push_back(ConcurrentQueue(evict_batch_size_ * 200)); } return 0; } bool LRUCache::evict_single_block(BlockType &item) { bool found = false; - for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - found = queues_[i].try_dequeue(item); + for (size_t i = 0; i < CACHE_QUEUE_NUM; i++) { + found = evict_queues_[i].try_dequeue(item); if (found) { break; } @@ -31,22 +31,22 @@ bool LRUCache::evict_block(BlockType &item) { if (!ok) { return false; } - if (item.lp_map == nullptr) { + if (item.page_table == nullptr) { if (!ParquetBufferPool::get_instance().is_dead_node(item)) { break; } else { continue; } } - } while (!is_valid(item.lp_map) || item.lp_map->isDeadBlock(item)); + } while (!is_valid(item.page_table) || item.page_table->is_dead_block(item)); return ok; } bool LRUCache::recycle() { BlockType item; while (MemoryLimitPool::get_instance().is_full() && evict_block(item)) { - if (item.lp_map) { - item.lp_map->evict_block(item.block.first); + if (item.page_table) { + item.page_table->evict_block(item.vector_block.first); } else { ParquetBufferPool::get_instance().evict(item.parquet_buffer_block.first); } @@ -54,14 +54,14 @@ bool LRUCache::recycle() { return MemoryLimitPool::get_instance().is_full(); } -bool LRUCache::add_single_block(const BlockType &block, int block_type) { - bool ok = queues_[block_type].enqueue(block); +bool LRUCache::add_single_block(const BlockType &block, int queue_index) { + bool ok = evict_queues_[queue_index].enqueue(block); if (!ok) { LOG_ERROR("enqueue failed."); return false; } static thread_local int evict_queue_insertions = 0; - if (evict_queue_insertions++ > block_size_) { + if (evict_queue_insertions++ > evict_batch_size_) { this->clear_dead_node(); evict_queue_insertions = 0; } @@ -69,28 +69,28 @@ bool LRUCache::add_single_block(const BlockType &block, int block_type) { } void LRUCache::clear_dead_node() { - for (size_t i = 0; i < CATCH_QUEUE_NUM; i++) { - size_t clear_size = block_size_; - if (queues_[i].size_approx() < block_size_) { + for (size_t i = 0; i < CACHE_QUEUE_NUM; i++) { + size_t clear_size = evict_batch_size_; + if (evict_queues_[i].size_approx() < evict_batch_size_) { continue; } - if (queues_[i].size_approx() > block_size_ * 8) { + if (evict_queues_[i].size_approx() > evict_batch_size_ * 8) { clear_size *= 2; } size_t clear_count = 0; BlockType item; - ConcurrentQueue tmp_queue(block_size_ * 200); - while (queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { - if (item.lp_map == nullptr) { + ConcurrentQueue live_blocks_queue(evict_batch_size_ * 200); + while (evict_queues_[i].try_dequeue(item) && (clear_count++ < clear_size)) { + if (item.page_table == nullptr) { if (!ParquetBufferPool::get_instance().is_dead_node(item)) { - tmp_queue.enqueue(item); + live_blocks_queue.enqueue(item); } - } else if (is_valid(item.lp_map) && !item.lp_map->isDeadBlock(item)) { - tmp_queue.enqueue(item); + } else if (is_valid(item.page_table) && !item.page_table->is_dead_block(item)) { + live_blocks_queue.enqueue(item); } } - while (tmp_queue.try_dequeue(item)) { - queues_[i].enqueue(item); + while (live_blocks_queue.try_dequeue(item)) { + evict_queues_[i].enqueue(item); } } } diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 83d92d582..69e370b86 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -32,7 +32,7 @@ void VectorPageTable::init(size_t entry_num) { for (size_t i = 0; i < entry_num_; i++) { entries_[i].ref_count.store(std::numeric_limits::min()); entries_[i].load_count.store(0); - entries_[i].in_lru_version.store(0); + entries_[i].lru_version.store(0); entries_[i].buffer = nullptr; } } @@ -56,23 +56,23 @@ char *VectorPageTable::acquire_block(block_id_t block_id) { } if (MemoryLimitPool::get_instance().is_hot_level2()) { for (int i = 0; i < entry_num_; i++) { - Entry &entry_hot = entries_[i]; - if (entry_hot.ref_count.load() != 0) { + Entry &hot_entry = entries_[i]; + if (hot_entry.ref_count.load() != 0) { continue; } while (true) { - int current = entry_hot.in_lru_version.load(std::memory_order_relaxed); - int expected = entry_hot.load_count.load(std::memory_order_relaxed); + int current = hot_entry.lru_version.load(std::memory_order_relaxed); + int expected = hot_entry.load_count.load(std::memory_order_relaxed); if (current == expected) { break; } - if (entry_hot.ref_count.compare_exchange_weak( + if (hot_entry.ref_count.compare_exchange_weak( current, expected, std::memory_order_acq_rel, std::memory_order_acquire)) { LRUCache::BlockType block; - block.lp_map = this; - block.block.first = i; - block.block.second = expected; + block.page_table = this; + block.vector_block.first = i; + block.vector_block.second = expected; LRUCache::get_instance().add_single_block(block, 0); } } @@ -88,10 +88,10 @@ void VectorPageTable::release_block(block_id_t block_id) { std::atomic_thread_fence(std::memory_order_acquire); if (MemoryLimitPool::get_instance().is_hot_level1()) { LRUCache::BlockType block; - block.lp_map = this; - block.block.first = block_id; - block.block.second = entry.load_count.load(); - entry.in_lru_version = entry.load_count.load(); + block.page_table = this; + block.vector_block.first = block_id; + block.vector_block.second = entry.load_count.load(); + entry.lru_version = entry.load_count.load(); LRUCache::get_instance().add_single_block(block, 0); } } @@ -169,12 +169,12 @@ int VecBufferPool::init(size_t /*pool_capacity*/, size_t block_size, return -1; } size_t block_num = segment_count + 10; - lp_map_.init(block_num); - mutex_vec_.reserve(block_num); + page_table_.init(block_num); + block_mutexes_.reserve(block_num); for (int i = 0; i < block_num; i++) { - mutex_vec_.emplace_back(std::make_unique()); + block_mutexes_.emplace_back(std::make_unique()); } - LOG_DEBUG("entry num: %zu", lp_map_.entry_num()); + LOG_DEBUG("entry num: %zu", page_table_.entry_num()); return 0; } @@ -184,12 +184,12 @@ VecBufferPoolHandle VecBufferPool::get_handle() { char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, size_t size, int retry) { - char *buffer = lp_map_.acquire_block(block_id); + char *buffer = page_table_.acquire_block(block_id); if (buffer) { return buffer; } - std::lock_guard lock(*mutex_vec_[block_id]); - buffer = lp_map_.acquire_block(block_id); + std::lock_guard lock(*block_mutexes_[block_id]); + buffer = page_table_.acquire_block(block_id); if (buffer) { return buffer; } @@ -222,7 +222,7 @@ char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, MemoryLimitPool::get_instance().release_buffer(buffer, size); return nullptr; } - return lp_map_.set_block_acquired(block_id, buffer, size); + return page_table_.set_block_acquired(block_id, buffer, size); } int VecBufferPool::get_meta(size_t offset, size_t length, char *buffer) { @@ -249,11 +249,11 @@ int VecBufferPoolHandle::get_meta(size_t offset, size_t length, char *buffer) { } void VecBufferPoolHandle::release_one(block_id_t block_id) { - pool_.lp_map_.release_block(block_id); + pool_.page_table_.release_block(block_id); } void VecBufferPoolHandle::acquire_one(block_id_t block_id) { - pool_.lp_map_.acquire_block(block_id); + pool_.page_table_.acquire_block(block_id); } } // namespace ailego diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 9de83b520..83299a7df 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -47,10 +47,10 @@ struct ParquetBufferID { class LRUCache { public: struct BlockType { - // TODO: lp_map & block - std::pair block; + // TODO: page_table & vector_block + std::pair vector_block; std::pair parquet_buffer_block; - VectorPageTable *lp_map{nullptr}; + VectorPageTable *page_table{nullptr}; }; typedef moodycamel::ConcurrentQueue ConcurrentQueue; @@ -69,23 +69,23 @@ class LRUCache { bool evict_block(BlockType &item); - bool add_single_block(const BlockType &block, int block_type); + bool add_single_block(const BlockType &block, int queue_index); void clear_dead_node(); - bool is_valid(VectorPageTable *lp_map) { - std::shared_lock lock(valid_lp_maps_mutex_); - return valid_lp_maps_.find(lp_map) != valid_lp_maps_.end(); + bool is_valid(VectorPageTable *page_table) { + std::shared_lock lock(valid_page_tables_mutex_); + return valid_page_tables_.find(page_table) != valid_page_tables_.end(); } - void set_valid(VectorPageTable *lp_map) { - std::unique_lock lock(valid_lp_maps_mutex_); - valid_lp_maps_.insert(lp_map); + void set_valid(VectorPageTable *page_table) { + std::unique_lock lock(valid_page_tables_mutex_); + valid_page_tables_.insert(page_table); } - void set_invalid(VectorPageTable *lp_map) { - std::unique_lock lock(valid_lp_maps_mutex_); - valid_lp_maps_.erase(lp_map); + void set_invalid(VectorPageTable *page_table) { + std::unique_lock lock(valid_page_tables_mutex_); + valid_page_tables_.erase(page_table); } bool recycle(); @@ -96,11 +96,11 @@ class LRUCache { } private: - constexpr static size_t CATCH_QUEUE_NUM = 3; - size_t block_size_{0}; - std::vector queues_; - std::unordered_set valid_lp_maps_; - std::shared_mutex valid_lp_maps_mutex_; + constexpr static size_t CACHE_QUEUE_NUM = 3; + size_t evict_batch_size_{0}; + std::vector evict_queues_; + std::unordered_set valid_page_tables_; + std::shared_mutex valid_page_tables_mutex_; }; class MemoryLimitPool { diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h index a1a18fa8d..d964ea0d9 100644 --- a/src/include/zvec/ailego/buffer/vector_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -34,7 +34,7 @@ class VectorPageTable { struct Entry { alignas(64) std::atomic ref_count; alignas(64) std::atomic load_count; - alignas(64) std::atomic in_lru_version; + alignas(64) std::atomic lru_version; char *buffer; size_t size; }; @@ -62,9 +62,9 @@ class VectorPageTable { return entry_num_; } - inline bool isDeadBlock(LRUCache::BlockType block) const { - Entry &entry = entries_[block.block.first]; - return block.block.second != entry.load_count.load(); + inline bool is_dead_block(LRUCache::BlockType block) const { + Entry &entry = entries_[block.vector_block.first]; + return block.vector_block.second != entry.load_count.load(); } private: @@ -80,9 +80,9 @@ class VecBufferPool { VecBufferPool(const std::string &filename); ~VecBufferPool() { - // Free any buffers still pinned in the map - for (size_t i = 0; i < lp_map_.entry_num(); ++i) { - lp_map_.evict_block(i); + // Free any buffers still pinned in the page table + for (size_t i = 0; i < page_table_.entry_num(); ++i) { + page_table_.evict_block(i); } #if defined(_MSC_VER) _close(fd_); @@ -110,10 +110,10 @@ class VecBufferPool { size_t pool_capacity_; public: - VectorPageTable lp_map_; + VectorPageTable page_table_; private: - std::vector> mutex_vec_; + std::vector> block_mutexes_; }; class VecBufferPoolHandle { From 9c61f9b5dac58cd6ea4c240634d7b69566c31d2d Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 10 Apr 2026 17:55:14 +0800 Subject: [PATCH 35/46] clang format --- src/ailego/buffer/lru_cache.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 9e2d736d1..a2a5d7eb4 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -85,7 +85,8 @@ void LRUCache::clear_dead_node() { if (!ParquetBufferPool::get_instance().is_dead_node(item)) { live_blocks_queue.enqueue(item); } - } else if (is_valid(item.page_table) && !item.page_table->is_dead_block(item)) { + } else if (is_valid(item.page_table) && + !item.page_table->is_dead_block(item)) { live_blocks_queue.enqueue(item); } } From 8af44e659d9310e3431094559eab646b92a53eb9 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 10 Apr 2026 18:00:57 +0800 Subject: [PATCH 36/46] fix --- src/ailego/buffer/vector_buffer_pool.cc | 28 ++++++++++++------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 69e370b86..53688599f 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -40,20 +40,6 @@ void VectorPageTable::init(size_t entry_num) { char *VectorPageTable::acquire_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; - while (true) { - int current_count = entry.ref_count.load(std::memory_order_acquire); - if (current_count < 0) { - return nullptr; - } - if (entry.ref_count.compare_exchange_weak(current_count, current_count + 1, - std::memory_order_acq_rel, - std::memory_order_acquire)) { - if (current_count == 0) { - entry.load_count.fetch_add(1, std::memory_order_relaxed); - } - return entry.buffer; - } - } if (MemoryLimitPool::get_instance().is_hot_level2()) { for (int i = 0; i < entry_num_; i++) { Entry &hot_entry = entries_[i]; @@ -78,6 +64,20 @@ char *VectorPageTable::acquire_block(block_id_t block_id) { } } } + while (true) { + int current_count = entry.ref_count.load(std::memory_order_acquire); + if (current_count < 0) { + return nullptr; + } + if (entry.ref_count.compare_exchange_weak(current_count, current_count + 1, + std::memory_order_acq_rel, + std::memory_order_acquire)) { + if (current_count == 0) { + entry.load_count.fetch_add(1, std::memory_order_relaxed); + } + return entry.buffer; + } + } } void VectorPageTable::release_block(block_id_t block_id) { From 0dea74f1990d8c2d3ddd5ccb3915e5c467ebf562 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Fri, 10 Apr 2026 18:05:34 +0800 Subject: [PATCH 37/46] fix --- src/ailego/buffer/vector_buffer_pool.cc | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 53688599f..d8187a7f5 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -122,12 +122,16 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, while (true) { int current_count = entry.ref_count.load(std::memory_order_relaxed); if (current_count >= 0) { + // Another thread has already loaded this block. Release the buffer we + // allocated since it won't be used, then pin the existing entry. if (entry.ref_count.compare_exchange_weak( current_count, current_count + 1, std::memory_order_acq_rel, std::memory_order_acquire)) { + MemoryLimitPool::get_instance().release_buffer(buffer, size); return entry.buffer; } } else { + // Block is unloaded (ref_count < 0). Take ownership of buffer. if (entry.ref_count.compare_exchange_weak(current_count, 1, std::memory_order_acq_rel, std::memory_order_acquire)) { From 02bd7b9158d0f75ea9c4a8b88e8d3a61a5cd980b Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 13 Apr 2026 16:03:53 +0800 Subject: [PATCH 38/46] add header --- src/ailego/buffer/lru_cache.cc | 14 ++++++++++++++ src/ailego/buffer/vector_buffer_pool.cc | 14 ++++++++++++++ src/include/zvec/ailego/buffer/lru_cache.h | 15 +++++++++++++++ .../zvec/ailego/buffer/parquet_buffer_pool.h | 15 +++++++++++++++ .../zvec/ailego/buffer/vector_buffer_pool.h | 15 +++++++++++++++ 5 files changed, 73 insertions(+) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index a2a5d7eb4..56372ffce 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -1,3 +1,17 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include #include #include diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index d8187a7f5..658c8611f 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -1,3 +1,17 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + #include #include diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 83299a7df..8a588d5f5 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -1,3 +1,18 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + #pragma once #include diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h index 877c25e2b..c734d76b1 100644 --- a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/parquet_buffer_pool.h @@ -1,3 +1,18 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + #pragma once #include diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h index d964ea0d9..6718c3529 100644 --- a/src/include/zvec/ailego/buffer/vector_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -1,3 +1,18 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + #pragma once #include From 17422a4a5e58627e0e2e4bb890b4220b5eb228e2 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 13 Apr 2026 17:32:31 +0800 Subject: [PATCH 39/46] fix --- src/ailego/buffer/lru_cache.cc | 4 + src/ailego/buffer/vector_buffer_pool.cc | 79 +++++++++++-------- .../zvec/ailego/buffer/vector_buffer_pool.h | 1 + 3 files changed, 49 insertions(+), 35 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 56372ffce..b2f429acf 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -129,6 +129,10 @@ bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, desired = expected + buffer_size; } while (!used_size_.compare_exchange_weak(expected, desired)); buffer = (char *)ailego_malloc(buffer_size); + if (!buffer) { + used_size_.fetch_sub(buffer_size); + return false; + } return true; } diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 658c8611f..cc88fbbeb 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -54,30 +54,6 @@ void VectorPageTable::init(size_t entry_num) { char *VectorPageTable::acquire_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; - if (MemoryLimitPool::get_instance().is_hot_level2()) { - for (int i = 0; i < entry_num_; i++) { - Entry &hot_entry = entries_[i]; - if (hot_entry.ref_count.load() != 0) { - continue; - } - while (true) { - int current = hot_entry.lru_version.load(std::memory_order_relaxed); - int expected = hot_entry.load_count.load(std::memory_order_relaxed); - if (current == expected) { - break; - } - if (hot_entry.ref_count.compare_exchange_weak( - current, expected, std::memory_order_acq_rel, - std::memory_order_acquire)) { - LRUCache::BlockType block; - block.page_table = this; - block.vector_block.first = i; - block.vector_block.second = expected; - LRUCache::get_instance().add_single_block(block, 0); - } - } - } - } while (true) { int current_count = entry.ref_count.load(std::memory_order_acquire); if (current_count < 0) { @@ -107,6 +83,10 @@ void VectorPageTable::release_block(block_id_t block_id) { block.vector_block.second = entry.load_count.load(); entry.lru_version = entry.load_count.load(); LRUCache::get_instance().add_single_block(block, 0); + } else { + if (entry.lru_version.load(std::memory_order_relaxed) + 1 == entry.load_count.load(std::memory_order_relaxed)) { + evict_cache_.enqueue(block_id); + } } } } @@ -133,11 +113,41 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, assert(block_id < entry_num_); Entry &entry = entries_[block_id]; entry.size = size; + if (MemoryLimitPool::get_instance().is_hot_level2()) { + size_t evict_block_id = 0; + while(evict_cache_.try_dequeue(evict_block_id)) { + Entry &hot_entry = entries_[evict_block_id]; + if (hot_entry.ref_count.load() != 0) { + continue; + } + while (true) { + version_t current = hot_entry.lru_version.load(std::memory_order_relaxed); + version_t expected = hot_entry.load_count.load(std::memory_order_relaxed); + if (current == expected) { + break; + } + if (hot_entry.lru_version.compare_exchange_weak( + current, expected, std::memory_order_acq_rel, + std::memory_order_acquire)) { + LRUCache::BlockType block; + block.page_table = this; + block.vector_block.first = evict_block_id; + block.vector_block.second = expected; + LRUCache::get_instance().add_single_block(block, 0); + } + } + } + } while (true) { int current_count = entry.ref_count.load(std::memory_order_relaxed); if (current_count >= 0) { - // Another thread has already loaded this block. Release the buffer we - // allocated since it won't be used, then pin the existing entry. + // Defensive branch: in practice this path should never be reached. + // set_block_acquired() is always called under block_mutexes_[block_id], + // and the caller (acquire_buffer) re-checks acquire_block() inside the + // same lock before invoking this function. Therefore, if we get here, + // ref_count must still be negative (unloaded). This branch is retained + // as a safety net in case the locking contract is violated in the future, + // e.g. if set_block_acquired is called from an unlocked context. if (entry.ref_count.compare_exchange_weak( current_count, current_count + 1, std::memory_order_acq_rel, std::memory_order_acquire)) { @@ -145,14 +155,10 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, return entry.buffer; } } else { - // Block is unloaded (ref_count < 0). Take ownership of buffer. - if (entry.ref_count.compare_exchange_weak(current_count, 1, - std::memory_order_acq_rel, - std::memory_order_acquire)) { - entry.buffer = buffer; - entry.load_count.fetch_add(1, std::memory_order_relaxed); - return entry.buffer; - } + entry.buffer = buffer; + entry.load_count.fetch_add(1, std::memory_order_relaxed); + entry.ref_count.store(1, std::memory_order_release); + return entry.buffer; } } } @@ -189,7 +195,7 @@ int VecBufferPool::init(size_t /*pool_capacity*/, size_t block_size, size_t block_num = segment_count + 10; page_table_.init(block_num); block_mutexes_.reserve(block_num); - for (int i = 0; i < block_num; i++) { + for (size_t i = 0; i < block_num; i++) { block_mutexes_.emplace_back(std::make_unique()); } LOG_DEBUG("entry num: %zu", page_table_.entry_num()); @@ -271,6 +277,9 @@ void VecBufferPoolHandle::release_one(block_id_t block_id) { } void VecBufferPoolHandle::acquire_one(block_id_t block_id) { + // The caller must guarantee the block is already loaded before calling + // acquire_one(). The return value of acquire_block() is intentionally + // ignored here, as a null return would indicate a contract violation. pool_.page_table_.acquire_block(block_id); } diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h index 6718c3529..2028304b3 100644 --- a/src/include/zvec/ailego/buffer/vector_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -85,6 +85,7 @@ class VectorPageTable { private: size_t entry_num_{0}; Entry *entries_{nullptr}; + moodycamel::ConcurrentQueue evict_cache_; }; class VecBufferPoolHandle; From 5aad18f50a54b05a31a2c2227ff4bb63377aa2fe Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 13 Apr 2026 17:38:38 +0800 Subject: [PATCH 40/46] fix --- src/ailego/buffer/vector_buffer_pool.cc | 14 ++++++++------ .../zvec/ailego/buffer/vector_buffer_pool.h | 3 +-- 2 files changed, 9 insertions(+), 8 deletions(-) diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index cc88fbbeb..3c9690261 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -14,6 +14,7 @@ #include #include +#include #if defined(_MSC_VER) #ifndef NOMINMAX @@ -80,8 +81,9 @@ void VectorPageTable::release_block(block_id_t block_id) { LRUCache::BlockType block; block.page_table = this; block.vector_block.first = block_id; - block.vector_block.second = entry.load_count.load(); - entry.lru_version = entry.load_count.load(); + version_t v = entry.load_count.load(std::memory_order_relaxed); + block.vector_block.second = v; + entry.lru_version.store(v, std::memory_order_relaxed); LRUCache::get_instance().add_single_block(block, 0); } else { if (entry.lru_version.load(std::memory_order_relaxed) + 1 == entry.load_count.load(std::memory_order_relaxed)) { @@ -122,17 +124,17 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, } while (true) { version_t current = hot_entry.lru_version.load(std::memory_order_relaxed); - version_t expected = hot_entry.load_count.load(std::memory_order_relaxed); - if (current == expected) { + version_t desired = hot_entry.load_count.load(std::memory_order_relaxed); + if (current == desired) { break; } if (hot_entry.lru_version.compare_exchange_weak( - current, expected, std::memory_order_acq_rel, + current, desired, std::memory_order_acq_rel, std::memory_order_acquire)) { LRUCache::BlockType block; block.page_table = this; block.vector_block.first = evict_block_id; - block.vector_block.second = expected; + block.vector_block.second = desired; LRUCache::get_instance().add_single_block(block, 0); } } diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h index 2028304b3..669a53c5c 100644 --- a/src/include/zvec/ailego/buffer/vector_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -59,8 +59,8 @@ class VectorPageTable { LRUCache::get_instance().set_valid(this); } ~VectorPageTable() { - delete[] entries_; LRUCache::get_instance().set_invalid(this); + delete[] entries_; } void init(size_t entry_num); @@ -123,7 +123,6 @@ class VecBufferPool { private: int fd_; size_t file_size_; - size_t pool_capacity_; public: VectorPageTable page_table_; From 2769649c7ce0e32f3e3d190b53e6c7d67592cc5f Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 13 Apr 2026 17:50:45 +0800 Subject: [PATCH 41/46] fix --- src/ailego/buffer/lru_cache.cc | 23 ++++++++++++++++++---- src/ailego/buffer/vector_buffer_pool.cc | 14 +++++++++---- src/include/zvec/ailego/buffer/lru_cache.h | 5 +++++ 3 files changed, 34 insertions(+), 8 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index b2f429acf..99b126eb4 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -38,6 +38,17 @@ bool LRUCache::evict_single_block(BlockType &item) { return found; } +bool LRUCache::is_valid_and_alive(const BlockType &item) { + std::shared_lock lock(valid_page_tables_mutex_); + if (valid_page_tables_.find(item.page_table) == valid_page_tables_.end()) { + return false; + } + // is_dead_block accesses entries_ under the same shared lock, so the + // VectorPageTable destructor (which holds the unique lock via set_invalid) + // cannot free entries_ while this check is in progress. + return !item.page_table->is_dead_block(item); +} + bool LRUCache::evict_block(BlockType &item) { bool ok = false; do { @@ -52,7 +63,7 @@ bool LRUCache::evict_block(BlockType &item) { continue; } } - } while (!is_valid(item.page_table) || item.page_table->is_dead_block(item)); + } while (!is_valid_and_alive(item)); return ok; } @@ -60,7 +71,12 @@ bool LRUCache::recycle() { BlockType item; while (MemoryLimitPool::get_instance().is_full() && evict_block(item)) { if (item.page_table) { - item.page_table->evict_block(item.vector_block.first); + // Hold the shared lock across the eviction call to prevent + // use-after-free if the VectorPageTable is concurrently destroyed. + std::shared_lock lock(valid_page_tables_mutex_); + if (valid_page_tables_.find(item.page_table) != valid_page_tables_.end()) { + item.page_table->evict_block(item.vector_block.first); + } } else { ParquetBufferPool::get_instance().evict(item.parquet_buffer_block.first); } @@ -99,8 +115,7 @@ void LRUCache::clear_dead_node() { if (!ParquetBufferPool::get_instance().is_dead_node(item)) { live_blocks_queue.enqueue(item); } - } else if (is_valid(item.page_table) && - !item.page_table->is_dead_block(item)) { + } else if (is_valid_and_alive(item)) { live_blocks_queue.enqueue(item); } } diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 3c9690261..f90a21b61 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -14,7 +14,10 @@ #include #include + +#if !defined(_MSC_VER) #include +#endif #if defined(_MSC_VER) #ifndef NOMINMAX @@ -86,7 +89,8 @@ void VectorPageTable::release_block(block_id_t block_id) { entry.lru_version.store(v, std::memory_order_relaxed); LRUCache::get_instance().add_single_block(block, 0); } else { - if (entry.lru_version.load(std::memory_order_relaxed) + 1 == entry.load_count.load(std::memory_order_relaxed)) { + if (entry.lru_version.load(std::memory_order_relaxed) + 1 == + entry.load_count.load(std::memory_order_relaxed)) { evict_cache_.enqueue(block_id); } } @@ -117,14 +121,16 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, entry.size = size; if (MemoryLimitPool::get_instance().is_hot_level2()) { size_t evict_block_id = 0; - while(evict_cache_.try_dequeue(evict_block_id)) { + while (evict_cache_.try_dequeue(evict_block_id)) { Entry &hot_entry = entries_[evict_block_id]; if (hot_entry.ref_count.load() != 0) { continue; } while (true) { - version_t current = hot_entry.lru_version.load(std::memory_order_relaxed); - version_t desired = hot_entry.load_count.load(std::memory_order_relaxed); + version_t current = + hot_entry.lru_version.load(std::memory_order_relaxed); + version_t desired = + hot_entry.load_count.load(std::memory_order_relaxed); if (current == desired) { break; } diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 8a588d5f5..4682ed881 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -103,6 +103,11 @@ class LRUCache { valid_page_tables_.erase(page_table); } + // Atomically checks under the shared lock that the page table is still valid + // AND the block version has not been superseded, preventing TOCTOU races + // when a VectorPageTable is concurrently destroyed. + bool is_valid_and_alive(const BlockType &item); + bool recycle(); private: From 105cf60e250536d983a649efea0d9a8c145fcd45 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 13 Apr 2026 20:09:43 +0800 Subject: [PATCH 42/46] fix --- src/ailego/buffer/lru_cache.cc | 2 ++ src/ailego/buffer/vector_buffer_pool.cc | 23 ++++++++++--------- .../zvec/ailego/buffer/vector_buffer_pool.h | 12 +++++++++- 3 files changed, 25 insertions(+), 12 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 99b126eb4..d61a43cae 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -164,6 +164,7 @@ void MemoryLimitPool::release_buffer(char *buffer, const size_t buffer_size) { do { expected = used_size_.load(); desired = expected - buffer_size; + assert(expected >= buffer_size); } while (!used_size_.compare_exchange_weak(expected, desired)); ailego_free(buffer); } @@ -173,6 +174,7 @@ void MemoryLimitPool::release_parquet(const size_t buffer_size) { do { expected = used_size_.load(); desired = expected - buffer_size; + assert(expected >= buffer_size); } while (!used_size_.compare_exchange_weak(expected, desired)); } diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index f90a21b61..0cc9fd51e 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -106,7 +106,6 @@ char *VectorPageTable::evict_block(block_id_t block_id) { char *buffer = entry.buffer; if (buffer) { MemoryLimitPool::get_instance().release_buffer(buffer, entry.size); - entry.buffer = nullptr; } return buffer; } else { @@ -118,7 +117,6 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, size_t size) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; - entry.size = size; if (MemoryLimitPool::get_instance().is_hot_level2()) { size_t evict_block_id = 0; while (evict_cache_.try_dequeue(evict_block_id)) { @@ -126,15 +124,17 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, if (hot_entry.ref_count.load() != 0) { continue; } - while (true) { - version_t current = - hot_entry.lru_version.load(std::memory_order_relaxed); - version_t desired = - hot_entry.load_count.load(std::memory_order_relaxed); - if (current == desired) { - break; - } - if (hot_entry.lru_version.compare_exchange_weak( + // Snapshot load_count once. We only need to advance lru_version to this + // snapshot version; chasing subsequent increments is unnecessary and can + // cause unbounded spinning under high concurrency. + // If the CAS fails, another thread has already advanced lru_version (to + // at least this version), so the block is already queued in LRU. + version_t desired = + hot_entry.load_count.load(std::memory_order_relaxed); + version_t current = + hot_entry.lru_version.load(std::memory_order_relaxed); + if (current != desired) { + if (hot_entry.lru_version.compare_exchange_strong( current, desired, std::memory_order_acq_rel, std::memory_order_acquire)) { LRUCache::BlockType block; @@ -164,6 +164,7 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, } } else { entry.buffer = buffer; + entry.size = size; entry.load_count.fetch_add(1, std::memory_order_relaxed); entry.ref_count.store(1, std::memory_order_release); return entry.buffer; diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h index 669a53c5c..675a8f2f9 100644 --- a/src/include/zvec/ailego/buffer/vector_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -77,6 +77,13 @@ class VectorPageTable { return entry_num_; } + // Returns true if the block has no active references (ref_count <= 0). + // Used by VecBufferPool destructor to assert all handles are released. + bool is_released(block_id_t block_id) const { + assert(block_id < entry_num_); + return entries_[block_id].ref_count.load(std::memory_order_relaxed) <= 0; + } + inline bool is_dead_block(LRUCache::BlockType block) const { Entry &entry = entries_[block.vector_block.first]; return block.vector_block.second != entry.load_count.load(); @@ -96,8 +103,11 @@ class VecBufferPool { VecBufferPool(const std::string &filename); ~VecBufferPool() { - // Free any buffers still pinned in the page table for (size_t i = 0; i < page_table_.entry_num(); ++i) { + // A positive ref_count means a VecBufferPoolHandle is still alive, + // which is a contract violation: all handles must be destroyed before + // the pool itself is destroyed. + assert(page_table_.is_released(i)); page_table_.evict_block(i); } #if defined(_MSC_VER) From 0f7cf9409d96bc16f9e380cf8bdf8ae0bf6ee692 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 13 Apr 2026 20:13:10 +0800 Subject: [PATCH 43/46] clang format --- src/ailego/buffer/lru_cache.cc | 3 ++- src/ailego/buffer/vector_buffer_pool.cc | 6 ++---- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index d61a43cae..df10cacbd 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -74,7 +74,8 @@ bool LRUCache::recycle() { // Hold the shared lock across the eviction call to prevent // use-after-free if the VectorPageTable is concurrently destroyed. std::shared_lock lock(valid_page_tables_mutex_); - if (valid_page_tables_.find(item.page_table) != valid_page_tables_.end()) { + if (valid_page_tables_.find(item.page_table) != + valid_page_tables_.end()) { item.page_table->evict_block(item.vector_block.first); } } else { diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 0cc9fd51e..46263bc2e 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -129,10 +129,8 @@ char *VectorPageTable::set_block_acquired(block_id_t block_id, char *buffer, // cause unbounded spinning under high concurrency. // If the CAS fails, another thread has already advanced lru_version (to // at least this version), so the block is already queued in LRU. - version_t desired = - hot_entry.load_count.load(std::memory_order_relaxed); - version_t current = - hot_entry.lru_version.load(std::memory_order_relaxed); + version_t desired = hot_entry.load_count.load(std::memory_order_relaxed); + version_t current = hot_entry.lru_version.load(std::memory_order_relaxed); if (current != desired) { if (hot_entry.lru_version.compare_exchange_strong( current, desired, std::memory_order_acq_rel, From d5f478b5eef1c7a28747d1fce06ef542fa8329e6 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 14 Apr 2026 11:35:26 +0800 Subject: [PATCH 44/46] fix --- src/ailego/buffer/lru_cache.cc | 5 ++--- src/ailego/buffer/vector_buffer_pool.cc | 17 +++++++++++------ src/include/zvec/ailego/buffer/lru_cache.h | 4 +--- .../zvec/ailego/buffer/vector_buffer_pool.h | 7 ++++++- 4 files changed, 20 insertions(+), 13 deletions(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index df10cacbd..611e7982a 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -52,7 +52,7 @@ bool LRUCache::is_valid_and_alive(const BlockType &item) { bool LRUCache::evict_block(BlockType &item) { bool ok = false; do { - ok = LRUCache::get_instance().evict_single_block(item); + ok = evict_single_block(item); if (!ok) { return false; } @@ -67,7 +67,7 @@ bool LRUCache::evict_block(BlockType &item) { return ok; } -bool LRUCache::recycle() { +void LRUCache::recycle() { BlockType item; while (MemoryLimitPool::get_instance().is_full() && evict_block(item)) { if (item.page_table) { @@ -82,7 +82,6 @@ bool LRUCache::recycle() { ParquetBufferPool::get_instance().evict(item.parquet_buffer_block.first); } } - return MemoryLimitPool::get_instance().is_full(); } bool LRUCache::add_single_block(const BlockType &block, int queue_index) { diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_buffer_pool.cc index 46263bc2e..3af77c71d 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_buffer_pool.cc @@ -89,6 +89,11 @@ void VectorPageTable::release_block(block_id_t block_id) { entry.lru_version.store(v, std::memory_order_relaxed); LRUCache::get_instance().add_single_block(block, 0); } else { + // Two separate relaxed loads: a concurrent acquire_block may increment + // load_count between the two reads, making the condition transiently + // false (missed enqueue). This is benign: the block will satisfy the + // condition again on the next release cycle, and hot_level1 pressure + // will add it to LRU directly regardless. if (entry.lru_version.load(std::memory_order_relaxed) + 1 == entry.load_count.load(std::memory_order_relaxed)) { evict_cache_.enqueue(block_id); @@ -97,19 +102,17 @@ void VectorPageTable::release_block(block_id_t block_id) { } } -char *VectorPageTable::evict_block(block_id_t block_id) { +void VectorPageTable::evict_block(block_id_t block_id) { assert(block_id < entry_num_); Entry &entry = entries_[block_id]; + char *buffer = entry.buffer; + size_t size = entry.size; int expected = 0; if (entry.ref_count.compare_exchange_strong( expected, std::numeric_limits::min())) { - char *buffer = entry.buffer; if (buffer) { - MemoryLimitPool::get_instance().release_buffer(buffer, entry.size); + MemoryLimitPool::get_instance().release_buffer(buffer, size); } - return buffer; - } else { - return nullptr; } } @@ -201,6 +204,7 @@ int VecBufferPool::init(size_t /*pool_capacity*/, size_t block_size, } size_t block_num = segment_count + 10; page_table_.init(block_num); + block_mutexes_.clear(); block_mutexes_.reserve(block_num); for (size_t i = 0; i < block_num; i++) { block_mutexes_.emplace_back(std::make_unique()); @@ -215,6 +219,7 @@ VecBufferPoolHandle VecBufferPool::get_handle() { char *VecBufferPool::acquire_buffer(block_id_t block_id, size_t offset, size_t size, int retry) { + assert(block_id < block_mutexes_.size()); char *buffer = page_table_.acquire_block(block_id); if (buffer) { return buffer; diff --git a/src/include/zvec/ailego/buffer/lru_cache.h b/src/include/zvec/ailego/buffer/lru_cache.h index 4682ed881..68c6d3d16 100644 --- a/src/include/zvec/ailego/buffer/lru_cache.h +++ b/src/include/zvec/ailego/buffer/lru_cache.h @@ -108,7 +108,7 @@ class LRUCache { // when a VectorPageTable is concurrently destroyed. bool is_valid_and_alive(const BlockType &item); - bool recycle(); + void recycle(); private: LRUCache() { @@ -146,8 +146,6 @@ class MemoryLimitPool { bool is_full(); - bool is_hot(); - bool is_hot_level1(); bool is_hot_level2(); diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_buffer_pool.h index 675a8f2f9..f0c592334 100644 --- a/src/include/zvec/ailego/buffer/vector_buffer_pool.h +++ b/src/include/zvec/ailego/buffer/vector_buffer_pool.h @@ -63,13 +63,18 @@ class VectorPageTable { delete[] entries_; } + VectorPageTable(const VectorPageTable &) = delete; + VectorPageTable &operator=(const VectorPageTable &) = delete; + VectorPageTable(VectorPageTable &&) = delete; + VectorPageTable &operator=(VectorPageTable &&) = delete; + void init(size_t entry_num); char *acquire_block(block_id_t block_id); void release_block(block_id_t block_id); - char *evict_block(block_id_t block_id); + void evict_block(block_id_t block_id); char *set_block_acquired(block_id_t block_id, char *buffer, size_t size); From 5d8164b2e2d07a3f6ff0127fbd16356451800c54 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 14 Apr 2026 15:04:13 +0800 Subject: [PATCH 45/46] upd --- src/ailego/buffer/lru_cache.cc | 4 ++-- .../buffer/{parquet_buffer_pool.cc => parquet_hash_table.cc} | 2 +- .../buffer/{vector_buffer_pool.cc => vector_page_table.cc} | 2 +- src/core/utility/buffer_storage.cc | 2 +- src/db/index/storage/bufferpool_forward_store.cc | 2 +- src/db/index/storage/lazy_record_batch_reader.h | 2 +- .../buffer/{parquet_buffer_pool.h => parquet_hash_table.h} | 0 .../buffer/{vector_buffer_pool.h => vector_page_table.h} | 0 src/include/zvec/core/framework/index_storage.h | 2 +- 9 files changed, 8 insertions(+), 8 deletions(-) rename src/ailego/buffer/{parquet_buffer_pool.cc => parquet_hash_table.cc} (99%) rename src/ailego/buffer/{vector_buffer_pool.cc => vector_page_table.cc} (99%) rename src/include/zvec/ailego/buffer/{parquet_buffer_pool.h => parquet_hash_table.h} (100%) rename src/include/zvec/ailego/buffer/{vector_buffer_pool.h => vector_page_table.h} (100%) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 611e7982a..1075d514e 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -12,8 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include +#include +#include #include namespace zvec { diff --git a/src/ailego/buffer/parquet_buffer_pool.cc b/src/ailego/buffer/parquet_hash_table.cc similarity index 99% rename from src/ailego/buffer/parquet_buffer_pool.cc rename to src/ailego/buffer/parquet_hash_table.cc index e1b03e2d8..e2f88cf52 100644 --- a/src/ailego/buffer/parquet_buffer_pool.cc +++ b/src/ailego/buffer/parquet_hash_table.cc @@ -19,7 +19,7 @@ #include #include #include -#include +#include namespace zvec { namespace ailego { diff --git a/src/ailego/buffer/vector_buffer_pool.cc b/src/ailego/buffer/vector_page_table.cc similarity index 99% rename from src/ailego/buffer/vector_buffer_pool.cc rename to src/ailego/buffer/vector_page_table.cc index 3af77c71d..bef47b194 100644 --- a/src/ailego/buffer/vector_buffer_pool.cc +++ b/src/ailego/buffer/vector_page_table.cc @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include #if !defined(_MSC_VER) diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc index fed61af2d..90e3f2547 100644 --- a/src/core/utility/buffer_storage.cc +++ b/src/core/utility/buffer_storage.cc @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/src/db/index/storage/bufferpool_forward_store.cc b/src/db/index/storage/bufferpool_forward_store.cc index 6e2ef4851..4d2b2f6e2 100644 --- a/src/db/index/storage/bufferpool_forward_store.cc +++ b/src/db/index/storage/bufferpool_forward_store.cc @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include "db/index/storage/store_helper.h" #include "lazy_record_batch_reader.h" diff --git a/src/db/index/storage/lazy_record_batch_reader.h b/src/db/index/storage/lazy_record_batch_reader.h index baccc1409..422708ed9 100644 --- a/src/db/index/storage/lazy_record_batch_reader.h +++ b/src/db/index/storage/lazy_record_batch_reader.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include "db/common/constants.h" diff --git a/src/include/zvec/ailego/buffer/parquet_buffer_pool.h b/src/include/zvec/ailego/buffer/parquet_hash_table.h similarity index 100% rename from src/include/zvec/ailego/buffer/parquet_buffer_pool.h rename to src/include/zvec/ailego/buffer/parquet_hash_table.h diff --git a/src/include/zvec/ailego/buffer/vector_buffer_pool.h b/src/include/zvec/ailego/buffer/vector_page_table.h similarity index 100% rename from src/include/zvec/ailego/buffer/vector_buffer_pool.h rename to src/include/zvec/ailego/buffer/vector_page_table.h diff --git a/src/include/zvec/core/framework/index_storage.h b/src/include/zvec/core/framework/index_storage.h index 18ae1ddcf..677838ca8 100644 --- a/src/include/zvec/core/framework/index_storage.h +++ b/src/include/zvec/core/framework/index_storage.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include #include #include From a39163b82dccedb2356fef5334d4744d0a3fb2d3 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 15 Apr 2026 11:36:52 +0800 Subject: [PATCH 46/46] add log info --- src/ailego/buffer/lru_cache.cc | 2 +- src/core/utility/buffer_storage.cc | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/src/ailego/buffer/lru_cache.cc b/src/ailego/buffer/lru_cache.cc index 1075d514e..86489e750 100644 --- a/src/ailego/buffer/lru_cache.cc +++ b/src/ailego/buffer/lru_cache.cc @@ -129,6 +129,7 @@ int MemoryLimitPool::init(size_t pool_size) { pool_size_ = 0; LRUCache::get_instance().recycle(); pool_size_ = pool_size; + LOG_INFO("MemoryLimitPool initialized with pool size: %lu", pool_size_); return 0; } @@ -138,7 +139,6 @@ bool MemoryLimitPool::try_acquire_buffer(const size_t buffer_size, do { expected = used_size_.load(); if (expected >= pool_size_) { - // LOG_ERROR("expected: %lu, pool_size: %lu", expected, pool_size_); return false; } desired = expected + buffer_size; diff --git a/src/core/utility/buffer_storage.cc b/src/core/utility/buffer_storage.cc index 90e3f2547..da37e1d31 100644 --- a/src/core/utility/buffer_storage.cc +++ b/src/core/utility/buffer_storage.cc @@ -176,6 +176,7 @@ class BufferStorage : public IndexStorage { //! Initialize storage int init(const ailego::Params ¶ms) override { params.get(BUFFER_STORAGE_MEMORY_SIZE, &buffer_size_); + LOG_INFO("buffer storage initialized"); // LOG_DEBUG("buffer size: %lu", buffer_size_); return 0; }