From 85f89dce2cbd4823b75cba59731e487fe7b3b744 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 20 May 2026 16:59:09 +0800 Subject: [PATCH 1/3] fix --- src/core/algorithm/flat/CMakeLists.txt | 7 +++++++ src/core/algorithm/flat_sparse/CMakeLists.txt | 9 +++++++++ src/core/algorithm/hnsw/CMakeLists.txt | 6 ++++++ src/core/algorithm/hnsw_rabitq/CMakeLists.txt | 6 ++++++ src/core/algorithm/hnsw_sparse/CMakeLists.txt | 6 ++++++ src/core/algorithm/ivf/CMakeLists.txt | 6 ++++++ src/core/algorithm/vamana/CMakeLists.txt | 6 ++++++ src/core/metric/CMakeLists.txt | 6 ++++++ src/core/mixed_reducer/CMakeLists.txt | 6 ++++++ src/core/quantizer/CMakeLists.txt | 6 ++++++ src/core/utility/CMakeLists.txt | 6 ++++++ 11 files changed, 70 insertions(+) diff --git a/src/core/algorithm/flat/CMakeLists.txt b/src/core/algorithm/flat/CMakeLists.txt index 4564d8ef0..60814960e 100644 --- a/src/core/algorithm/flat/CMakeLists.txt +++ b/src/core/algorithm/flat/CMakeLists.txt @@ -1,11 +1,18 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) #message(STATUS "PROJECT_ROOT_DIR = ${PROJECT_ROOT_DIR}") + +if(NOT APPLE) + set(CORE_KNN_FLAT_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_flat STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/src/core/framework + LDFLAGS "${CORE_KNN_FLAT_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/flat_sparse/CMakeLists.txt b/src/core/algorithm/flat_sparse/CMakeLists.txt index e27d2d3ee..44766138d 100644 --- a/src/core/algorithm/flat_sparse/CMakeLists.txt +++ b/src/core/algorithm/flat_sparse/CMakeLists.txt @@ -1,11 +1,20 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +# --exclude-libs is GNU ld / LLVM lld only; Apple ld does not support it. +# On macOS (Mach-O), symbol interposition works differently and the +# Arrow/Parquet double-free issue does not apply. +if(NOT APPLE) + set(CORE_KNN_FLAT_SPARSE_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_flat_sparse STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_FLAT_SPARSE_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/hnsw/CMakeLists.txt b/src/core/algorithm/hnsw/CMakeLists.txt index f4a105402..cfd1147f4 100644 --- a/src/core/algorithm/hnsw/CMakeLists.txt +++ b/src/core/algorithm/hnsw/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_HNSW_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_hnsw STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework sparsehash INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_HNSW_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt index ed547dc76..09ce72f55 100644 --- a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt +++ b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt @@ -11,11 +11,17 @@ if(AUTO_DETECT_ARCH) endforeach() endif() +if(NOT APPLE) + set(CORE_KNN_HNSW_RABITQ_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_hnsw_rabitq STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework rabitqlib sparsehash INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_HNSW_RABITQ_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) \ No newline at end of file diff --git a/src/core/algorithm/hnsw_sparse/CMakeLists.txt b/src/core/algorithm/hnsw_sparse/CMakeLists.txt index fe26d10e1..15295b485 100644 --- a/src/core/algorithm/hnsw_sparse/CMakeLists.txt +++ b/src/core/algorithm/hnsw_sparse/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_HNSW_SPARSE_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_hnsw_sparse STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework sparsehash INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_HNSW_SPARSE_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/ivf/CMakeLists.txt b/src/core/algorithm/ivf/CMakeLists.txt index ffcf30949..8e3872f31 100644 --- a/src/core/algorithm/ivf/CMakeLists.txt +++ b/src/core/algorithm/ivf/CMakeLists.txt @@ -1,10 +1,16 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_IVF_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_ivf STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework core_knn_cluster INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_IVF_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/vamana/CMakeLists.txt b/src/core/algorithm/vamana/CMakeLists.txt index 8e5bbda1e..b2feaf9c1 100644 --- a/src/core/algorithm/vamana/CMakeLists.txt +++ b/src/core/algorithm/vamana/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_VAMANA_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_vamana STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework core_knn_hnsw sparsehash INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_VAMANA_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/metric/CMakeLists.txt b/src/core/metric/CMakeLists.txt index 55dfc901e..2918b909b 100644 --- a/src/core/metric/CMakeLists.txt +++ b/src/core/metric/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_METRIC_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_metric STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego zvec_turbo core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_METRIC_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/mixed_reducer/CMakeLists.txt b/src/core/mixed_reducer/CMakeLists.txt index e9566456e..e7204f0f7 100644 --- a/src/core/mixed_reducer/CMakeLists.txt +++ b/src/core/mixed_reducer/CMakeLists.txt @@ -1,10 +1,16 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_MIX_REDUCER_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_mix_reducer STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_MIX_REDUCER_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/quantizer/CMakeLists.txt b/src/core/quantizer/CMakeLists.txt index 21a03e449..80b4f612a 100644 --- a/src/core/quantizer/CMakeLists.txt +++ b/src/core/quantizer/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_QUANTIZER_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_quantizer STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_QUANTIZER_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/utility/CMakeLists.txt b/src/core/utility/CMakeLists.txt index 99cf87ca2..7c3adf702 100644 --- a/src/core/utility/CMakeLists.txt +++ b/src/core/utility/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_UTILITY_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_utility STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_UTILITY_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) From 7da12289a9c01d0ea22ad2c33a9421c1b7bbaf52 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 1 Jun 2026 21:29:24 +0800 Subject: [PATCH 2/3] buffer read storage --- src/core/interface/indexes/ivf_index.cc | 15 +- src/core/utility/buffer_read_storage.cc | 409 ++++++++++++++++++++++++ src/core/utility/utility_params.h | 8 + 3 files changed, 424 insertions(+), 8 deletions(-) create mode 100644 src/core/utility/buffer_read_storage.cc diff --git a/src/core/interface/indexes/ivf_index.cc b/src/core/interface/indexes/ivf_index.cc index 1b91eebea..5fb71faec 100644 --- a/src/core/interface/indexes/ivf_index.cc +++ b/src/core/interface/indexes/ivf_index.cc @@ -84,20 +84,19 @@ int IVFIndex::Open(const std::string &file_path, break; } case StorageOptions::StorageType::kBufferPool: { - // NOTE: IVF index is dumped via FileDumper (plain binary file), which is - // not compatible with BufferStorage's IndexFormat layout (header/footer - // chain). Until IVF gains a BufferStorage-aware dump path, fall back to - // MMapFileReadStorage so the freshly-dumped file can be reopened. - storage_ = core::IndexFactory::CreateStorage("MMapFileReadStorage"); + // IVF index is dumped via FileDumper (FileDumper container layout). + // BufferReadStorage parses that layout through IndexUnpacker (same as + // MMapFileReadStorage) but serves reads through a VecBufferPool, so the + // freshly-dumped file can be reopened with buffer-pool memory control. + storage_ = core::IndexFactory::CreateStorage("BufferReadStorage"); if (storage_ == nullptr) { - LOG_ERROR( - "Failed to create MMapFileReadStorage (IVF buffer-pool fallback)"); + LOG_ERROR("Failed to create BufferReadStorage (IVF buffer-pool)"); return core::IndexError_Runtime; } int ret = storage_->init(storage_params); if (ret != 0) { LOG_ERROR( - "Failed to init MMapFileReadStorage (IVF buffer-pool fallback), " + "Failed to init BufferReadStorage (IVF buffer-pool), " "path: %s, err: %s", file_path_.c_str(), core::IndexError::What(ret)); return ret; diff --git a/src/core/utility/buffer_read_storage.cc b/src/core/utility/buffer_read_storage.cc new file mode 100644 index 000000000..b1d68b69e --- /dev/null +++ b/src/core/utility/buffer_read_storage.cc @@ -0,0 +1,409 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// BufferReadStorage is a read-only IndexStorage that mirrors the structure of +// MMapFileReadStorage (it parses the FileDumper container layout through +// IndexUnpacker and exposes segment-based access), but instead of mmap-ing the +// file it reads through a VecBufferPool. This lets IVF / DiskANN(Vamana) +// indexes -- which are dumped via FileDumper -- benefit from the buffer-pool's +// paged cache + LRU eviction + memory-budget control, while keeping the same +// Segment interface that those indexes already consume. +#include +#include +#include +#include +#include +#include +#include +#include "utility_params.h" + +namespace zvec { +namespace core { + +/*! Buffer Read Storage (backed by VecBufferPool) + */ +class BufferReadStorage : public IndexStorage { + public: + /*! Buffer Read Storage Segment + * + * Each segment keeps the owning VecBufferPool / VecBufferPoolHandle alive + * (shared_ptr) so that pages it reads remain valid for the segment's + * lifetime. Reads go through the pool's paged cache: + * - fetch() -> read_range into the caller's buffer + * - read(const void**) -> read_range into a per-segment buffer (stable + * pointer, never pins a page) + * - read(MemoryBlock&) -> single page: zero-copy pin tied to the + * MemoryBlock lifecycle; cross page: owned copy + * - read(SegmentData*) -> read_range into the per-segment buffer + */ + class Segment : public IndexStorage::Segment, + public std::enable_shared_from_this { + public: + //! Index Storage Pointer + typedef std::shared_ptr Pointer; + + //! Constructor + Segment(const std::shared_ptr &pool, + const std::shared_ptr &handle, + size_t index_offset, const IndexUnpacker::SegmentMeta &segment) + : data_offset_(index_offset + segment.data_offset()), + data_size_(segment.data_size()), + padding_size_(segment.padding_size()), + region_size_(segment.data_size() + segment.padding_size()), + data_crc_(segment.data_crc()), + pool_(pool), + handle_(handle) {} + + //! Constructor (clone) + Segment(const Segment &rhs) + : data_offset_(rhs.data_offset_), + data_size_(rhs.data_size_), + padding_size_(rhs.padding_size_), + region_size_(rhs.region_size_), + data_crc_(rhs.data_crc_), + pool_(rhs.pool_), + handle_(rhs.handle_) {} + + //! Destructor + ~Segment(void) override {} + + //! Retrieve size of data + size_t data_size(void) const override { + return data_size_; + } + + //! Retrieve crc of data + uint32_t data_crc(void) const override { + return data_crc_; + } + + //! Retrieve size of padding + size_t padding_size(void) const override { + return padding_size_; + } + + //! Retrieve capacity of segment + size_t capacity(void) const override { + return region_size_; + } + + //! Fetch data from segment (copies into the caller-owned buffer) + size_t fetch(size_t offset, void *buf, size_t len) const override { + if (ailego_unlikely(offset + len > region_size_)) { + if (offset > region_size_) { + offset = region_size_; + } + len = region_size_ - offset; + } + if (len == 0) { + return 0; + } + if (!handle_->read_range(data_offset_ + offset, len, + static_cast(buf))) { + LOG_ERROR( + "BufferReadStorage::Segment::fetch: read_range failed, " + "abs_offset=%zu, len=%zu", + data_offset_ + offset, len); + return 0; + } + return len; + } + + //! Read data from segment (stable pointer via per-segment buffer) + size_t read(size_t offset, const void **data, size_t len) override { + if (ailego_unlikely(offset + len > region_size_)) { + if (offset > region_size_) { + offset = region_size_; + } + len = region_size_ - offset; + } + if (len == 0) { + *data = buffer_.data(); + return 0; + } + buffer_.reserve(len); + if (!handle_->read_range(data_offset_ + offset, len, + reinterpret_cast(buffer_.data()))) { + LOG_ERROR( + "BufferReadStorage::Segment::read: read_range failed, " + "abs_offset=%zu, len=%zu", + data_offset_ + offset, len); + *data = nullptr; + return 0; + } + *data = buffer_.data(); + return len; + } + + //! Read data from segment into a MemoryBlock + size_t read(size_t offset, MemoryBlock &data, size_t len) override { + if (ailego_unlikely(offset + len > region_size_)) { + if (offset > region_size_) { + offset = region_size_; + } + len = region_size_ - offset; + } + size_t abs_offset = data_offset_ + offset; + size_t first_page = abs_offset / ailego::kVectorPageSize; + size_t last_page = (len == 0) + ? first_page + : (abs_offset + len - 1) / ailego::kVectorPageSize; + if (first_page == last_page) { + // Single-page: zero-copy pin whose release is tied to the + // MemoryBlock lifecycle (release_one on destruction). + size_t page_id = 0; + char *raw = handle_->get_single_page(abs_offset, len, page_id); + if (!raw) { + LOG_ERROR( + "BufferReadStorage::Segment::read(MemoryBlock&): single-page " + "acquire failed, abs_offset=%zu, len=%zu", + abs_offset, len); + return 0; + } + data.reset(handle_.get(), page_id, raw); + return len; + } + // Cross-page: copy into a freshly-allocated 4K-aligned buffer that the + // MemoryBlock owns (freed via ailego_free on destruction). + static constexpr size_t kAlign = 4096UL; + size_t alloc_size = (len + (kAlign - 1UL)) & ~(kAlign - 1UL); + char *tmp = + static_cast(ailego_aligned_malloc(alloc_size, kAlign)); + if (!tmp) { + LOG_ERROR( + "BufferReadStorage::Segment::read(MemoryBlock&): cross-page alloc " + "failed, abs_offset=%zu, len=%zu", + abs_offset, len); + return 0; + } + if (!handle_->read_range(abs_offset, len, tmp)) { + ailego_free(tmp); + LOG_ERROR( + "BufferReadStorage::Segment::read(MemoryBlock&): cross-page " + "read_range failed, abs_offset=%zu, len=%zu", + abs_offset, len); + return 0; + } + data = MemoryBlock::MakeOwned(tmp, len); + return len; + } + + //! Read scattered data from segment (stable pointers via per-segment buf) + bool read(SegmentData *iovec, size_t count) override { + size_t total = 0u; + for (auto *it = iovec, *end = iovec + count; it != end; ++it) { + ailego_false_if_false(it->offset + it->length <= region_size_); + total += it->length; + } + ailego_false_if_false(total != 0); + + buffer_.reserve(total); + uint8_t *buf = buffer_.data(); + for (auto *it = iovec, *end = iovec + count; it != end; ++it) { + ailego_false_if_false( + handle_->read_range(data_offset_ + it->offset, it->length, + reinterpret_cast(buf))); + it->data = buf; + buf += it->length; + } + return true; + } + + size_t write(size_t, const void *, size_t) override { + return IndexError_NotImplemented; + } + + size_t resize(size_t) override { + return IndexError_NotImplemented; + } + + void update_data_crc(uint32_t) override { + return; + } + + //! Clone the segment + IndexStorage::Segment::Pointer clone(void) override { + return std::make_shared(*this); + } + + //! No stable base pointer: data lives in an evictable paged cache. + const uint8_t *base_data(void) const override { + return nullptr; + } + + private: + size_t data_offset_{0u}; + size_t data_size_{0u}; + size_t padding_size_{0u}; + size_t region_size_{0u}; + uint32_t data_crc_{0u}; + std::vector buffer_{}; + std::shared_ptr pool_{nullptr}; + std::shared_ptr handle_{nullptr}; + }; + + //! Destructor + ~BufferReadStorage(void) override {} + + //! Initialize container + int init(const ailego::Params ¶ms) override { + params.get(BUFFER_READ_STORAGE_CHECKSUM_VALIDATION, &checksum_validation_); + params.get(BUFFER_READ_STORAGE_HEADER_OFFSET, &header_offset_); + params.get(BUFFER_READ_STORAGE_FOOTER_OFFSET, &footer_offset_); + return 0; + } + + int flush(void) override { + return 0; + } + + int append(const std::string &, size_t) override { + return IndexError_NotImplemented; + } + + void refresh(uint64_t) override { + return; + } + + uint64_t check_point(void) const override { + return 0; + } + + //! Cleanup container + int cleanup(void) override { + return this->close(); + } + + //! Load an index file into the container + int open(const std::string &path, bool) override { + // Read-only buffer pool over the freshly-dumped FileDumper container. + buffer_pool_ = std::make_shared(path, + /*writable=*/false); + if (!buffer_pool_) { + LOG_ERROR("Failed to create VecBufferPool, path: %s", path.c_str()); + return IndexError_NoMemory; + } + handle_ = std::make_shared( + buffer_pool_->get_handle()); + + size_t file_size = buffer_pool_->file_size(); + index_offset_ = (header_offset_ >= 0 ? 0 : file_size) + header_offset_; + size_t end_offset = (footer_offset_ > 0 ? 0 : file_size) + footer_offset_; + size_t size = end_offset > index_offset_ ? end_offset - index_offset_ : 0; + + // read_data for IndexUnpacker: provide a stable pointer by copying the + // requested range into a reused scratch buffer via get_meta (direct + // pread, valid before buffer_pool_->init()). + auto read_data = [this, end_offset](size_t offset, const void **data, + size_t len) -> size_t { + size_t off = offset + index_offset_; + if (off + len > end_offset) { + if (off > end_offset) { + off = end_offset; + } + len = end_offset - off; + } + scratch_.reserve(len); + *data = scratch_.data(); + if (len == 0) { + return 0; + } + if (handle_->get_meta(off, len, + reinterpret_cast(scratch_.data())) != 0) { + return 0; + } + return len; + }; + + IndexUnpacker unpacker; + if (!unpacker.unpack(read_data, size, checksum_validation_)) { + LOG_ERROR("Failed to unpack file: %s", path.c_str()); + return IndexError_UnpackIndex; + } + segments_ = std::move(*unpacker.mutable_segments()); + magic_ = unpacker.magic(); + + // Allocate the page table now that the layout is known. + int ret = buffer_pool_->init(); + if (ret != 0) { + LOG_ERROR("Failed to init VecBufferPool, path: %s", path.c_str()); + return IndexError_Runtime; + } + return 0; + } + + int close(void) override { + segments_.clear(); + handle_ = nullptr; + buffer_pool_ = nullptr; + return 0; + } + + //! Retrieve a segment by id + IndexStorage::Segment::Pointer get(const std::string &id, int) override { + if (!buffer_pool_ || !handle_) { + return IndexStorage::Segment::Pointer(); + } + auto it = segments_.find(id); + if (it == segments_.end()) { + return IndexStorage::Segment::Pointer(); + } + return std::make_shared( + buffer_pool_, handle_, index_offset_, it->second); + } + + std::map get_all( + void) const override { + std::map result; + if (buffer_pool_ && handle_) { + for (const auto &it : segments_) { + result.emplace(it.first, + std::make_shared( + buffer_pool_, handle_, index_offset_, it.second)); + } + } + return result; + } + + //! Test if a segment exists + bool has(const std::string &id) const override { + return (segments_.find(id) != segments_.end()); + } + + //! Retrieve magic number of index + uint32_t magic(void) const override { + return magic_; + } + + //! Reads go through the VecBufferPool paged cache. + MemoryBlock::MemoryBlockType memory_block_type(void) const override { + return MemoryBlock::MBT_BUFFERPOOL; + } + + private: + bool checksum_validation_{false}; + int64_t header_offset_{0}; + int64_t footer_offset_{0}; + size_t index_offset_{0}; + uint32_t magic_{0}; + std::vector scratch_{}; + std::map segments_{}; + std::shared_ptr buffer_pool_{nullptr}; + std::shared_ptr handle_{nullptr}; +}; + +INDEX_FACTORY_REGISTER_STORAGE(BufferReadStorage); + +} // namespace core +} // namespace zvec diff --git a/src/core/utility/utility_params.h b/src/core/utility/utility_params.h index c57e6e980..ebd08c504 100644 --- a/src/core/utility/utility_params.h +++ b/src/core/utility/utility_params.h @@ -60,6 +60,14 @@ static const std::string MMAPFILE_READ_STORAGE_HEADER_OFFSET = static const std::string MMAPFILE_READ_STORAGE_FOOTER_OFFSET = "proxima.mmap_file.container.footer_offset"; +//! BufferReadStorage (read-only storage backed by VecBufferPool) +static const std::string BUFFER_READ_STORAGE_CHECKSUM_VALIDATION = + "proxima.buffer.read_storage.checksum_validation"; +static const std::string BUFFER_READ_STORAGE_HEADER_OFFSET = + "proxima.buffer.read_storage.header_offset"; +static const std::string BUFFER_READ_STORAGE_FOOTER_OFFSET = + "proxima.buffer.read_storage.footer_offset"; + //! MMapFileStorage static const std::string MMAPFILE_STORAGE_MEMORY_LOCKED = "proxima.mmap_file.storage.memory_locked"; From ef0b1b393608d59eb41052f7103bb3fd192f5089 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 16 Jun 2026 14:29:47 +0800 Subject: [PATCH 3/3] fix --- src/core/utility/buffer_read_storage.cc | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/core/utility/buffer_read_storage.cc b/src/core/utility/buffer_read_storage.cc index b1d68b69e..bdf5227b2 100644 --- a/src/core/utility/buffer_read_storage.cc +++ b/src/core/utility/buffer_read_storage.cc @@ -67,7 +67,8 @@ class BufferReadStorage : public IndexStorage { //! Constructor (clone) Segment(const Segment &rhs) - : data_offset_(rhs.data_offset_), + : std::enable_shared_from_this(), + data_offset_(rhs.data_offset_), data_size_(rhs.data_size_), padding_size_(rhs.padding_size_), region_size_(rhs.region_size_),