Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
61 commits
Select commit Hold shift + click to select a range
49000e1
initial add for diskann
richyreachy Feb 9, 2026
7dda45c
update code
richyreachy Feb 9, 2026
5902548
make it compile on macos
richyreachy Feb 9, 2026
4558237
fix: merge cmake config
Mar 21, 2026
303bf1b
fix: fix header include
Mar 21, 2026
1307e28
fix: fix clang-format
Mar 22, 2026
7959f13
fix: fix clang-format
Mar 22, 2026
fd26a15
fix: fix clang-format
Mar 22, 2026
f90f9b3
refactor: remove shuffle
Mar 22, 2026
29e22b4
refactor: update
Mar 22, 2026
e27e856
refactor: remove trivial codes
Mar 22, 2026
8033f22
refactor: refactor code
Mar 22, 2026
1263894
feat: lib aio
richyreachy Mar 23, 2026
29c6cf1
feat: aio
richyreachy Mar 23, 2026
dc29f87
feat: add aio
richyreachy Mar 23, 2026
d785f08
feat: update cmake config
richyreachy Mar 23, 2026
1bb41db
fix: fix logger format
richyreachy Mar 23, 2026
2782665
fix: format
richyreachy Mar 23, 2026
e405f69
fix: logger format
richyreachy Mar 24, 2026
cefab1e
feat: add cosine ut
Mar 28, 2026
29ea6ca
fix: fix ut
richyreachy Mar 29, 2026
cbe452b
feat: support cosine
richyreachy Mar 30, 2026
d959784
feat: support cosine
richyreachy Mar 30, 2026
30b5799
feat: add cosine
richyreachy Mar 30, 2026
92733fe
feat: add fp16 cosine
richyreachy Mar 30, 2026
cf50671
feat: support cosine fp16
richyreachy Mar 30, 2026
a7138e8
feat: remove aio
richyreachy Apr 3, 2026
7e63b4b
feat: remove search in mem
richyreachy Apr 3, 2026
4029813
feat: update code
richyreachy Apr 3, 2026
7a50ae4
feat: compile only on x86
richyreachy Apr 3, 2026
752e813
feat: add libaio-dev
richyreachy Apr 3, 2026
c607f83
feat: change makefile
richyreachy Apr 3, 2026
1218229
feat: add db layer support
richyreachy Apr 3, 2026
d7914f6
Merge branch 'main' into feat/diskann
Apr 5, 2026
701433d
feat: add diskann to db
richyreachy Apr 7, 2026
804d5b1
feat: add diskann to db
richyreachy Apr 7, 2026
170714e
feat: add diskann to db
richyreachy Apr 8, 2026
71fbab6
feat: add diskann to db
richyreachy Apr 8, 2026
f6b6f4c
feat: diskann python
richyreachy Apr 9, 2026
481cb17
feat: add to protocal buf
richyreachy Apr 10, 2026
07c7320
feat: support python
richyreachy Apr 10, 2026
39843d4
fix: fix python
richyreachy Apr 10, 2026
8c8856a
fix: fix tool
richyreachy Apr 13, 2026
51a4511
feat: support db and python
richyreachy Apr 13, 2026
072a1be
fix: disable on apple
richyreachy Apr 13, 2026
9c97b5d
Merge branch 'main' into feat/diskann
richyreachy Apr 13, 2026
b7f34bf
fix: github runner
richyreachy Apr 13, 2026
07eaaf4
fix: github runner
richyreachy Apr 13, 2026
a983270
fix: github runner
richyreachy Apr 13, 2026
495194a
fix: github runner
richyreachy Apr 13, 2026
1f4680f
fix: fix ci
richyreachy Apr 13, 2026
1e8c578
fix: fix ci
richyreachy Apr 13, 2026
de5e84c
fix: fix ci
richyreachy Apr 14, 2026
2c58253
fix: fix windows
richyreachy Apr 14, 2026
cb0145a
fix: fix resource leaks in DiskAnn indexer and file reader
richyreachy Apr 14, 2026
5991458
fix: add header
richyreachy Apr 14, 2026
e04290a
fix: add some error handling
richyreachy Apr 15, 2026
dd48042
fix: fix core dump
richyreachy Apr 15, 2026
72db91b
Merge branch 'feat/diskann' of github.com:richyreachy/zvec into feat/…
richyreachy Apr 15, 2026
01bb3dc
fix: fix initialization
richyreachy Apr 15, 2026
070adb1
fix: fix ut
richyreachy Apr 16, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 8 additions & 1 deletion .github/workflows/03-macos-linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,14 @@ jobs:
sudo apt-get install -y clang libomp-dev
shell: bash

- name: Install AIO
if: runner.os == 'Linux' && runner.arch == 'X64'
run: |
sudo apt-get update
sudo apt-get install -y --no-install-recommends \
libaio-dev
shell: bash

- name: Print CPU info
if: runner.os == 'Linux'
run: lscpu
Expand Down Expand Up @@ -89,7 +97,6 @@ jobs:
pytest \
scikit-build-core \
setuptools_scm
shell: bash

- name: Build from source
run: |
Expand Down
3 changes: 3 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -43,3 +43,6 @@
[submodule "thirdparty/RaBitQ-Library/RaBitQ-Library-0.1"]
path = thirdparty/RaBitQ-Library/RaBitQ-Library-0.1
url = https://github.com/VectorDB-NTU/RaBitQ-Library.git
[submodule "thirdparty/aio/libaio-0.3"]
path = thirdparty/aio/libaio-0.3
url = https://github.com/yugabyte/libaio.git
11 changes: 11 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,17 @@ else()
endif()
message(STATUS "RABITQ_ARCH_FLAG: ${RABITQ_ARCH_FLAG}")

# DiskAnn support (Linux x86_64 only, requires libaio)
if(CMAKE_SYSTEM_NAME STREQUAL "Linux" AND CMAKE_SYSTEM_PROCESSOR MATCHES "x86_64|i686|i386" AND NOT ANDROID AND NOT IOS)
set(DISKANN_SUPPORTED ON)
add_definitions(-DDISKANN_SUPPORTED=1)
else()
set(DISKANN_SUPPORTED OFF)
add_definitions(-DDISKANN_SUPPORTED=0)
message(STATUS "DiskAnn support disabled - only supported on Linux x86_64")
endif()
message(STATUS "DISKANN_SUPPORTED: ${DISKANN_SUPPORTED}")

option(USE_OSS_MIRROR "Use OSS mirror for faster third-party downloads" OFF)
if(DEFINED ENV{USE_OSS_MIRROR} AND NOT "$ENV{USE_OSS_MIRROR}" STREQUAL "")
set(USE_OSS_MIRROR "$ENV{USE_OSS_MIRROR}" CACHE BOOL "Use OSS mirror for faster third-party downloads" FORCE)
Expand Down
14 changes: 14 additions & 0 deletions python/tests/detail/fixture_helper.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,13 @@
import pytest
import logging
import platform

DISKANN_SUPPORTED = platform.system() == "Linux" and platform.machine() in (
"x86_64",
"AMD64",
"i686",
"i386",
)

from typing import Any, Generator
from zvec.typing import DataType, StatusCode, MetricType, QuantizeType
Expand Down Expand Up @@ -97,6 +105,12 @@ def full_schema_new(request) -> CollectionSchema:
else:
nullable, has_index, vector_index = True, False, HnswIndexParam()

# Skip DiskAnn tests on unsupported platforms
from zvec.model.param import DiskAnnIndexParam

if isinstance(vector_index, DiskAnnIndexParam) and not DISKANN_SUPPORTED:
pytest.skip("DiskAnn only supported on Linux x86_64")

scalar_index_param = None
vector_index_param = None
if has_index:
Expand Down
101 changes: 85 additions & 16 deletions python/tests/detail/test_collection_recall.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,10 @@
HnswIndexParam,
FlatIndexParam,
IVFIndexParam,
DiskAnnIndexParam,
HnswQueryParam,
IVFQueryParam,
DiskAnnQueryParam,
)

from zvec.model.schema import FieldSchema, VectorSchema
Expand Down Expand Up @@ -179,10 +181,24 @@ def get_ground_truth_map(collection, test_docs, query_vectors_map, metric_type,
for field_name, query_vectors in query_vectors_map.items():
ground_truth_map[field_name] = {}

# Support per-field metric type: metric_type can be a dict mapping
# field_name -> MetricType, or a single MetricType applied to all fields.
if isinstance(metric_type, dict):
field_metric = metric_type.get(field_name, MetricType.IP)
else:
field_metric = metric_type

for i, query_vector in enumerate(query_vectors):
# Get the ground truth for this query
relevant_doc_ids_scores = get_ground_truth_for_vector_query(
collection, query_vector, field_name, test_docs, i, metric_type, k, True
collection,
query_vector,
field_name,
test_docs,
i,
field_metric,
k,
True,
)
ground_truth_map[field_name][i] = relevant_doc_ids_scores

Expand Down Expand Up @@ -292,6 +308,7 @@ class TestRecall:
[
(True, True, HnswIndexParam()),
(False, True, IVFIndexParam()),
(False, True, DiskAnnIndexParam()),
(False, True, FlatIndexParam()), # ——ok
(
True,
Expand Down Expand Up @@ -371,6 +388,24 @@ class TestRecall:
use_soar=False,
),
),
(
True,
True,
DiskAnnIndexParam(
metric_type=MetricType.IP,
max_degree=32,
),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.COSINE, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -388,10 +423,16 @@ def test_recall_with_single_vector_valid_500(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -438,9 +479,13 @@ def test_recall_with_single_vector_valid_500(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -479,8 +524,8 @@ def test_recall_with_single_vector_valid_500(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down Expand Up @@ -552,7 +597,21 @@ def test_recall_with_single_vector_valid_500(
use_soar=True,
),
),
# (True, True, IVFIndexParam(metric_type=MetricType.COSINE, n_list=150, n_iters=15, use_soar=False, )),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.IP, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.L2, max_degree=32),
),
(
True,
True,
DiskAnnIndexParam(metric_type=MetricType.COSINE, max_degree=32),
),
],
indirect=True,
)
Expand All @@ -571,10 +630,16 @@ def test_recall_with_single_vector_valid_2000(
):
full_schema_params = request.getfixturevalue("full_schema_new")

# Build per-field metric type map so ground truth uses each field's
# actual index metric (fields may fall back to HnswIndexParam/IP).
field_metric_map = {}
for vector_para in full_schema_params.vectors:
if vector_para.name == "vector_fp32_field":
metric_type = vector_para.index_param.metric_type
break
if vector_para.index_param is not None:
field_metric_map[vector_para.name] = vector_para.index_param.metric_type
else:
field_metric_map[vector_para.name] = MetricType.IP

metric_type = field_metric_map.get("vector_fp32_field", MetricType.IP)

multiple_docs = [
generate_doc_recall(i, full_collection_new.schema) for i in range(doc_num)
Expand Down Expand Up @@ -621,9 +686,13 @@ def test_recall_with_single_vector_valid_2000(
multiple_docs[i].vectors[field_name] for i in range(query_num)
]

# Get ground truth mapping
# Get ground truth mapping (pass per-field metric map)
ground_truth_map = get_ground_truth_map(
full_collection_new, multiple_docs, query_vectors_map, metric_type, top_k
full_collection_new,
multiple_docs,
query_vectors_map,
field_metric_map,
top_k,
)

# Validate ground truth mapping structure
Expand Down Expand Up @@ -662,8 +731,8 @@ def test_recall_with_single_vector_valid_2000(

print("(recall_at_k_stats:\n")
print(recall_at_k_stats)
print("metric_type:")
print(metric_type)
print("field_metric_map:")
print(field_metric_map)
# Print Recall@K statistics
print(f"Recall@{top_k} using Ground Truth:")
for field_name, stats in recall_at_k_stats.items():
Expand Down
4 changes: 4 additions & 0 deletions python/zvec/model/param/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
AddColumnOption,
AlterColumnOption,
CollectionOption,
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
HnswIndexParam,
HnswQueryParam,
Expand All @@ -33,6 +35,8 @@
"AddColumnOption",
"AlterColumnOption",
"CollectionOption",
"DiskAnnIndexParam",
"DiskAnnQueryParam",
"FlatIndexParam",
"HnswIndexParam",
"HnswQueryParam",
Expand Down
5 changes: 5 additions & 0 deletions src/binding/c/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ set_target_properties(zvec_c_api PROPERTIES
VISIBILITY_INLINES_HIDDEN ON
)

# On Windows, define ZVEC_BUILD_SHARED so that c_api.h uses __declspec(dllexport)
if(MSVC OR WIN32)
target_compile_definitions(zvec_c_api PRIVATE ZVEC_BUILD_SHARED)
endif()

find_package(Threads REQUIRED)

# Static linking of C++ standard library is handled in platform-specific sections
Expand Down
62 changes: 43 additions & 19 deletions src/binding/python/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,25 +19,49 @@ set(SRC_LISTS
pybind11_add_module(_zvec ${SRC_LISTS})

if (CMAKE_SYSTEM_NAME STREQUAL "Linux")
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64|arm64|arm")
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
else ()
target_link_libraries(_zvec PRIVATE
-Wl,--whole-archive
$<TARGET_FILE:core_knn_flat_static>
$<TARGET_FILE:core_knn_flat_sparse_static>
$<TARGET_FILE:core_knn_hnsw_static>
$<TARGET_FILE:core_knn_hnsw_rabitq_static>
$<TARGET_FILE:core_knn_hnsw_sparse_static>
$<TARGET_FILE:core_knn_ivf_static>
$<TARGET_FILE:core_knn_diskann_static>
$<TARGET_FILE:core_knn_cluster_static>
$<TARGET_FILE:core_mix_reducer_static>
$<TARGET_FILE:core_metric_static>
$<TARGET_FILE:core_utility_static>
$<TARGET_FILE:core_quantizer_static>
-Wl,--no-whole-archive
zvec_db
aio
)
target_link_options(_zvec PRIVATE
"LINKER:--version-script=${CMAKE_CURRENT_SOURCE_DIR}/exports.map"
)
endif()
elseif (APPLE)
target_link_libraries(_zvec PRIVATE
-Wl,-force_load,$<TARGET_FILE:core_knn_flat_static>
Expand Down
Loading
Loading