From 85f89dce2cbd4823b75cba59731e487fe7b3b744 Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Wed, 20 May 2026 16:59:09 +0800 Subject: [PATCH 1/4] fix --- src/core/algorithm/flat/CMakeLists.txt | 7 +++++++ src/core/algorithm/flat_sparse/CMakeLists.txt | 9 +++++++++ src/core/algorithm/hnsw/CMakeLists.txt | 6 ++++++ src/core/algorithm/hnsw_rabitq/CMakeLists.txt | 6 ++++++ src/core/algorithm/hnsw_sparse/CMakeLists.txt | 6 ++++++ src/core/algorithm/ivf/CMakeLists.txt | 6 ++++++ src/core/algorithm/vamana/CMakeLists.txt | 6 ++++++ src/core/metric/CMakeLists.txt | 6 ++++++ src/core/mixed_reducer/CMakeLists.txt | 6 ++++++ src/core/quantizer/CMakeLists.txt | 6 ++++++ src/core/utility/CMakeLists.txt | 6 ++++++ 11 files changed, 70 insertions(+) diff --git a/src/core/algorithm/flat/CMakeLists.txt b/src/core/algorithm/flat/CMakeLists.txt index 4564d8ef0..60814960e 100644 --- a/src/core/algorithm/flat/CMakeLists.txt +++ b/src/core/algorithm/flat/CMakeLists.txt @@ -1,11 +1,18 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) #message(STATUS "PROJECT_ROOT_DIR = ${PROJECT_ROOT_DIR}") + +if(NOT APPLE) + set(CORE_KNN_FLAT_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_flat STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm ${PROJECT_ROOT_DIR}/src/core/framework + LDFLAGS "${CORE_KNN_FLAT_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/flat_sparse/CMakeLists.txt b/src/core/algorithm/flat_sparse/CMakeLists.txt index e27d2d3ee..44766138d 100644 --- a/src/core/algorithm/flat_sparse/CMakeLists.txt +++ b/src/core/algorithm/flat_sparse/CMakeLists.txt @@ -1,11 +1,20 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +# --exclude-libs is GNU ld / LLVM lld only; Apple ld does not support it. +# On macOS (Mach-O), symbol interposition works differently and the +# Arrow/Parquet double-free issue does not apply. +if(NOT APPLE) + set(CORE_KNN_FLAT_SPARSE_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_flat_sparse STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_FLAT_SPARSE_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/hnsw/CMakeLists.txt b/src/core/algorithm/hnsw/CMakeLists.txt index f4a105402..cfd1147f4 100644 --- a/src/core/algorithm/hnsw/CMakeLists.txt +++ b/src/core/algorithm/hnsw/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_HNSW_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_hnsw STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework sparsehash INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_HNSW_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt index ed547dc76..09ce72f55 100644 --- a/src/core/algorithm/hnsw_rabitq/CMakeLists.txt +++ b/src/core/algorithm/hnsw_rabitq/CMakeLists.txt @@ -11,11 +11,17 @@ if(AUTO_DETECT_ARCH) endforeach() endif() +if(NOT APPLE) + set(CORE_KNN_HNSW_RABITQ_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_hnsw_rabitq STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework rabitqlib sparsehash INCS . ${PROJECT_ROOT_DIR}/src ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_HNSW_RABITQ_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) \ No newline at end of file diff --git a/src/core/algorithm/hnsw_sparse/CMakeLists.txt b/src/core/algorithm/hnsw_sparse/CMakeLists.txt index fe26d10e1..15295b485 100644 --- a/src/core/algorithm/hnsw_sparse/CMakeLists.txt +++ b/src/core/algorithm/hnsw_sparse/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_HNSW_SPARSE_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_hnsw_sparse STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework sparsehash INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_HNSW_SPARSE_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/ivf/CMakeLists.txt b/src/core/algorithm/ivf/CMakeLists.txt index ffcf30949..8e3872f31 100644 --- a/src/core/algorithm/ivf/CMakeLists.txt +++ b/src/core/algorithm/ivf/CMakeLists.txt @@ -1,10 +1,16 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_IVF_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_ivf STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework core_knn_cluster INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_IVF_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/algorithm/vamana/CMakeLists.txt b/src/core/algorithm/vamana/CMakeLists.txt index 8e5bbda1e..b2feaf9c1 100644 --- a/src/core/algorithm/vamana/CMakeLists.txt +++ b/src/core/algorithm/vamana/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_KNN_VAMANA_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_knn_vamana STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS core_framework core_knn_hnsw sparsehash INCS . ${PROJECT_ROOT_DIR}/src/core ${PROJECT_ROOT_DIR}/src/core/algorithm + LDFLAGS "${CORE_KNN_VAMANA_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/metric/CMakeLists.txt b/src/core/metric/CMakeLists.txt index 55dfc901e..2918b909b 100644 --- a/src/core/metric/CMakeLists.txt +++ b/src/core/metric/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_METRIC_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_metric STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego zvec_turbo core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_METRIC_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/mixed_reducer/CMakeLists.txt b/src/core/mixed_reducer/CMakeLists.txt index e9566456e..e7204f0f7 100644 --- a/src/core/mixed_reducer/CMakeLists.txt +++ b/src/core/mixed_reducer/CMakeLists.txt @@ -1,10 +1,16 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_MIX_REDUCER_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_mix_reducer STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_MIX_REDUCER_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/quantizer/CMakeLists.txt b/src/core/quantizer/CMakeLists.txt index 21a03e449..80b4f612a 100644 --- a/src/core/quantizer/CMakeLists.txt +++ b/src/core/quantizer/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_QUANTIZER_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_quantizer STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_QUANTIZER_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) diff --git a/src/core/utility/CMakeLists.txt b/src/core/utility/CMakeLists.txt index 99cf87ca2..7c3adf702 100644 --- a/src/core/utility/CMakeLists.txt +++ b/src/core/utility/CMakeLists.txt @@ -1,11 +1,17 @@ include(${PROJECT_ROOT_DIR}/cmake/bazel.cmake) include(${PROJECT_ROOT_DIR}/cmake/option.cmake) +if(NOT APPLE) + set(CORE_UTILITY_LDFLAGS + "-Wl,--exclude-libs,libparquet.a:libarrow.a:libarrow_bundled_dependencies.a") +endif() + cc_library( NAME core_utility STATIC SHARED STRICT ALWAYS_LINK SRCS *.cc LIBS zvec_ailego core_framework INCS . ${PROJECT_ROOT_DIR}/src/core + LDFLAGS "${CORE_UTILITY_LDFLAGS}" VERSION "${PROXIMA_ZVEC_VERSION}" ) From 5e6f16c10a87647c9d8fa25b309d6b4b8c9ce67b Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 2 Jun 2026 14:22:48 +0800 Subject: [PATCH 2/4] fix --- src/ailego/buffer/parquet_hash_table.cc | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/ailego/buffer/parquet_hash_table.cc b/src/ailego/buffer/parquet_hash_table.cc index ab519843e..f21d788b0 100644 --- a/src/ailego/buffer/parquet_hash_table.cc +++ b/src/ailego/buffer/parquet_hash_table.cc @@ -181,7 +181,7 @@ std::shared_ptr ParquetBufferPool::acquire( if (iter == table_.end()) { return nullptr; } - ParquetBufferContext &context = table_[buffer_id]; + ParquetBufferContext &context = iter->second; while (true) { int current_count = context.ref_count.load(std::memory_order_acquire); if (current_count < 0) { @@ -211,7 +211,7 @@ void ParquetBufferPool::release(ParquetBufferID buffer_id) { if (iter == table_.end()) { return; } - ParquetBufferContext &context = table_[buffer_id]; + ParquetBufferContext &context = iter->second; if (context.ref_count.fetch_sub(1, std::memory_order_release) == 1) { std::atomic_thread_fence(std::memory_order_acquire); BlockEvictionQueue::BlockType block; @@ -227,13 +227,14 @@ void ParquetBufferPool::evict(ParquetBufferID buffer_id) { if (iter == table_.end()) { return; } - ParquetBufferContext &context = table_[buffer_id]; + ParquetBufferContext &context = iter->second; int expected = 0; if (context.ref_count.compare_exchange_strong( expected, std::numeric_limits::min())) { MemoryLimitPool::get_instance().release_parquet(context.size); context.arrow = nullptr; context.arrow_refs.clear(); + table_.erase(iter); } } From a590368040c590a66212162b15ec09a95273e05d Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Tue, 2 Jun 2026 14:38:40 +0800 Subject: [PATCH 3/4] fix --- src/ailego/buffer/parquet_hash_table.cc | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/ailego/buffer/parquet_hash_table.cc b/src/ailego/buffer/parquet_hash_table.cc index f21d788b0..8b50d726b 100644 --- a/src/ailego/buffer/parquet_hash_table.cc +++ b/src/ailego/buffer/parquet_hash_table.cc @@ -141,11 +141,17 @@ ParquetBufferContextHandle ParquetBufferPool::acquire_buffer( return ParquetBufferContextHandle(); } std::unique_lock lock(table_mutex_); + if (table_.find(buffer_id) != table_.end()) { + arrow = set_block_acquired(buffer_id); + return ParquetBufferContextHandle(buffer_id, arrow); + } if (acquire(buffer_id, table_[buffer_id]).ok()) { MemoryLimitPool::get_instance().acquire_parquet(table_[buffer_id].size); arrow = set_block_acquired(buffer_id); return ParquetBufferContextHandle(buffer_id, arrow); } else { + // Drop the empty entry inserted by operator[] on the failed load path. + table_.erase(buffer_id); LOG_ERROR("Failed to acquire parquet buffer: %s", buffer_id.to_string().c_str()); return ParquetBufferContextHandle(); @@ -162,6 +168,9 @@ std::shared_ptr ParquetBufferPool::set_block_acquired( if (context.ref_count.compare_exchange_weak( current_count, current_count + 1, std::memory_order_acq_rel, std::memory_order_acquire)) { + if (current_count == 0) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + } return context.arrow; } } else { @@ -196,7 +205,6 @@ std::shared_ptr ParquetBufferPool::acquire( return context.arrow; } } - return nullptr; } std::shared_ptr ParquetBufferPool::acquire_locked( From 00ff7b80cd0031dfc86a48cec579834dd506ec9e Mon Sep 17 00:00:00 2001 From: Zefeng Yin Date: Mon, 15 Jun 2026 20:04:24 +0800 Subject: [PATCH 4/4] fix --- src/ailego/buffer/parquet_hash_table.cc | 43 +++++++------------ .../zvec/ailego/buffer/parquet_hash_table.h | 28 ++++++------ 2 files changed, 30 insertions(+), 41 deletions(-) diff --git a/src/ailego/buffer/parquet_hash_table.cc b/src/ailego/buffer/parquet_hash_table.cc index 8b50d726b..be8833976 100644 --- a/src/ailego/buffer/parquet_hash_table.cc +++ b/src/ailego/buffer/parquet_hash_table.cc @@ -141,17 +141,18 @@ ParquetBufferContextHandle ParquetBufferPool::acquire_buffer( return ParquetBufferContextHandle(); } std::unique_lock lock(table_mutex_); - if (table_.find(buffer_id) != table_.end()) { - arrow = set_block_acquired(buffer_id); + auto [iter, inserted] = table_.try_emplace(buffer_id); + ParquetBufferContext &context = iter->second; + if (!inserted) { + arrow = set_block_acquired(context); return ParquetBufferContextHandle(buffer_id, arrow); } - if (acquire(buffer_id, table_[buffer_id]).ok()) { - MemoryLimitPool::get_instance().acquire_parquet(table_[buffer_id].size); - arrow = set_block_acquired(buffer_id); + if (acquire(buffer_id, context).ok()) { + MemoryLimitPool::get_instance().acquire_parquet(context.size); + arrow = set_block_acquired(context); return ParquetBufferContextHandle(buffer_id, arrow); } else { - // Drop the empty entry inserted by operator[] on the failed load path. - table_.erase(buffer_id); + table_.erase(iter); LOG_ERROR("Failed to acquire parquet buffer: %s", buffer_id.to_string().c_str()); return ParquetBufferContextHandle(); @@ -160,28 +161,14 @@ ParquetBufferContextHandle ParquetBufferPool::acquire_buffer( } std::shared_ptr ParquetBufferPool::set_block_acquired( - ParquetBufferID buffer_id) { - ParquetBufferContext &context = table_[buffer_id]; - while (true) { - int current_count = context.ref_count.load(std::memory_order_relaxed); - if (current_count >= 0) { - if (context.ref_count.compare_exchange_weak( - current_count, current_count + 1, std::memory_order_acq_rel, - std::memory_order_acquire)) { - if (current_count == 0) { - context.load_count.fetch_add(1, std::memory_order_relaxed); - } - return context.arrow; - } - } else { - if (context.ref_count.compare_exchange_weak(current_count, 1, - std::memory_order_acq_rel, - std::memory_order_acquire)) { - context.load_count.fetch_add(1, std::memory_order_relaxed); - return context.arrow; - } - } + ParquetBufferContext &context) { + int current_count = context.ref_count.load(std::memory_order_relaxed); + if (current_count <= 0) { + context.load_count.fetch_add(1, std::memory_order_relaxed); + current_count = 0; } + context.ref_count.store(current_count + 1, std::memory_order_release); + return context.arrow; } std::shared_ptr ParquetBufferPool::acquire( diff --git a/src/include/zvec/ailego/buffer/parquet_hash_table.h b/src/include/zvec/ailego/buffer/parquet_hash_table.h index 4db1a8f3d..bc99b2c3e 100644 --- a/src/include/zvec/ailego/buffer/parquet_hash_table.h +++ b/src/include/zvec/ailego/buffer/parquet_hash_table.h @@ -105,6 +105,8 @@ class ParquetBufferContextHandle { }; class ParquetBufferPool { + friend class ParquetBufferContextHandle; + public: typedef std::shared_ptr Pointer; @@ -123,21 +125,8 @@ class ParquetBufferPool { using Table = std::unordered_map; - arrow::Status acquire(ParquetBufferID buffer_id, - ParquetBufferContext &context); - ParquetBufferContextHandle acquire_buffer(ParquetBufferID buffer_id); - std::shared_ptr set_block_acquired( - ParquetBufferID buffer_id); - - std::shared_ptr acquire(ParquetBufferID buffer_id); - - std::shared_ptr acquire_locked( - ParquetBufferID buffer_id); - - void release(ParquetBufferID buffer_id); - void evict(ParquetBufferID buffer_id); bool is_dead_node(BlockEvictionQueue::BlockType &block); @@ -155,6 +144,19 @@ class ParquetBufferPool { private: ParquetBufferPool() = default; + std::shared_ptr acquire_locked( + ParquetBufferID buffer_id); + + void release(ParquetBufferID buffer_id); + + arrow::Status acquire(ParquetBufferID buffer_id, + ParquetBufferContext &context); + + std::shared_ptr acquire(ParquetBufferID buffer_id); + + std::shared_ptr set_block_acquired( + ParquetBufferContext &context); + private: Table table_; std::shared_mutex table_mutex_;