From cdb6af9adda743aec1e22e269e8fc6ee50d0db49 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Mon, 15 Jun 2026 11:41:50 +0800 Subject: [PATCH 1/2] feat(c_api): add DiskANN index type support to C API Add complete DiskANN C API interfaces following existing HNSW/FTS patterns: - c_api.h: ZVEC_INDEX_TYPE_DISKANN constant, index params setter/getters, query params opaque type with CRUD (list_size/radius/is_linear/ is_using_refiner), query wiring for vector_query/group_by_vector_query/ sub_query, ZVEC_DISKANN_PARAMS convenience macro - c_api.cc: factory switch case, all function implementations with dynamic_cast type safety, to_string support - c_api_test.c: 3 standalone test functions + augmentations to 4 existing tests covering create/set/get/destroy, type-mismatch errors, NULL handling - examples/c/diskann_example.c: end-to-end example demonstrating DiskANN collection creation, batch insert, flush/index build, and search with list_size tuning Co-Authored-By: Claude --- examples/c/CMakeLists.txt | 11 ++ examples/c/diskann_example.c | 345 +++++++++++++++++++++++++++++++++++ src/binding/c/c_api.cc | 252 +++++++++++++++++++++++++ src/include/zvec/c_api.h | 195 ++++++++++++++++++++ tests/c/c_api_test.c | 260 ++++++++++++++++++++++++++ 5 files changed, 1063 insertions(+) create mode 100644 examples/c/diskann_example.c diff --git a/examples/c/CMakeLists.txt b/examples/c/CMakeLists.txt index 5d06f02b6..736200c82 100644 --- a/examples/c/CMakeLists.txt +++ b/examples/c/CMakeLists.txt @@ -93,6 +93,12 @@ target_link_libraries(c_api_optimized_example PRIVATE zvec-c-api ) +# DiskANN example +add_executable(c_api_diskann_example diskann_example.c) +target_link_libraries(c_api_diskann_example PRIVATE + zvec-c-api +) + # Strip symbols to reduce executable size if(CMAKE_BUILD_TYPE STREQUAL "Release" AND (ANDROID OR (CMAKE_SYSTEM_NAME STREQUAL "Linux"))) add_custom_command(TARGET c_api_basic_example POST_BUILD @@ -113,14 +119,19 @@ if(CMAKE_BUILD_TYPE STREQUAL "Release" AND (ANDROID OR (CMAKE_SYSTEM_NAME STREQU add_custom_command(TARGET c_api_optimized_example POST_BUILD COMMAND ${CMAKE_STRIP} "$" COMMENT "Stripping symbols from c_api_optimized_example") + add_custom_command(TARGET c_api_diskann_example POST_BUILD + COMMAND ${CMAKE_STRIP} "$" + COMMENT "Stripping symbols from c_api_diskann_example") endif() # Optimize for size if(CMAKE_BUILD_TYPE STREQUAL "Release" AND ANDROID) set_property(TARGET c_api_basic_example c_api_collection_schema_example c_api_doc_example c_api_index_example c_api_field_schema_example c_api_optimized_example + c_api_diskann_example PROPERTY COMPILE_FLAGS "-Os") set_property(TARGET c_api_basic_example c_api_collection_schema_example c_api_doc_example c_api_index_example c_api_field_schema_example c_api_optimized_example + c_api_diskann_example PROPERTY INTERPROCEDURAL_OPTIMIZATION TRUE) endif() diff --git a/examples/c/diskann_example.c b/examples/c/diskann_example.c new file mode 100644 index 000000000..02434a51c --- /dev/null +++ b/examples/c/diskann_example.c @@ -0,0 +1,345 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +/** + * @file diskann_example.c + * @brief End-to-end example demonstrating DiskANN index usage via the C API. + * + * DiskANN is a disk-based approximate nearest neighbor search algorithm + * optimized for large-scale datasets that exceed available memory. It uses + * a Vamana graph structure combined with product quantization (PQ) to + * achieve high recall with efficient disk I/O. + * + * NOTE: DiskANN requires Linux x86_64 with libaio. On other platforms the + * example will compile but the runtime plugin will fail to load. + * + * Workflow demonstrated: + * 1. Create collection schema with DiskANN-indexed vector field + * 2. Insert documents with high-dimensional vectors + * 3. Flush collection (triggers PQ training + graph build) + * 4. Search using DiskANN query parameters (list_size controls recall) + * 5. Clean up all resources + */ + +#include +#include +#include +#include "zvec/c_api.h" + +/* -------------------------------------------------------------------------- + * Helpers + * -------------------------------------------------------------------------- */ + +static zvec_error_code_t handle_error(zvec_error_code_t error, + const char *context) { + if (error != ZVEC_OK) { + char *error_msg = NULL; + zvec_get_last_error(&error_msg); + fprintf(stderr, "Error in %s: %d - %s\n", context, error, + error_msg ? error_msg : "Unknown error"); + zvec_free(error_msg); + } + return error; +} + +#define VECTOR_DIM 64 +#define NUM_DOCS 100 +#define COLLECTION_DIR "./diskann_example_collection" + +/* -------------------------------------------------------------------------- + * Main + * -------------------------------------------------------------------------- */ + +int main(void) { + printf("=== ZVec DiskANN Index Example ===\n\n"); + + zvec_error_code_t error; + int i; + + /* ------------------------------------------------------------------ + * Step 1: Create collection schema + * ------------------------------------------------------------------ */ + printf("[Step 1] Creating collection schema...\n"); + + zvec_collection_schema_t *schema = + zvec_collection_schema_create("diskann_example"); + if (!schema) { + fprintf(stderr, "Failed to create schema\n"); + return 1; + } + + /* Scalar field with inverted index (for primary key / filtering) */ + zvec_index_params_t *invert_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_INVERT); + zvec_index_params_set_invert_params(invert_params, true, false); + + zvec_field_schema_t *id_field = + zvec_field_schema_create("id", ZVEC_DATA_TYPE_STRING, false, 0); + zvec_field_schema_set_index_params(id_field, invert_params); + error = zvec_collection_schema_add_field(schema, id_field); + if (handle_error(error, "adding id field") != ZVEC_OK) { + goto cleanup_schema; + } + printf(" + id field (STRING, inverted index)\n"); + + /* Vector field with DiskANN index */ + zvec_index_params_t *diskann_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_DISKANN); + if (!diskann_params) { + fprintf(stderr, "Failed to create DiskANN index parameters\n"); + goto cleanup_schema; + } + zvec_index_params_set_metric_type(diskann_params, ZVEC_METRIC_TYPE_L2); + zvec_index_params_set_diskann_params(diskann_params, + 64, /* max_degree: graph connectivity */ + 100, /* list_size: build-time candidates */ + 8); /* pq_chunk_num: PQ chunks (0=auto) */ + + printf(" DiskANN index params: max_degree=%d, list_size=%d, pq_chunk_num=%d\n", + zvec_index_params_get_diskann_max_degree(diskann_params), + zvec_index_params_get_diskann_list_size(diskann_params), + zvec_index_params_get_diskann_pq_chunk_num(diskann_params)); + + zvec_field_schema_t *embedding_field = zvec_field_schema_create( + "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, VECTOR_DIM); + zvec_field_schema_set_index_params(embedding_field, diskann_params); + error = zvec_collection_schema_add_field(schema, embedding_field); + if (handle_error(error, "adding embedding field") != ZVEC_OK) { + goto cleanup_schema; + } + printf(" + embedding field (VECTOR_FP32, %dD, DiskANN index)\n", VECTOR_DIM); + + /* Index params are copied into field schemas; safe to destroy now */ + zvec_index_params_destroy(invert_params); + zvec_index_params_destroy(diskann_params); + invert_params = NULL; + diskann_params = NULL; + + /* ------------------------------------------------------------------ + * Step 2: Create and open collection + * ------------------------------------------------------------------ */ + printf("\n[Step 2] Creating collection...\n"); + + zvec_collection_options_t *options = zvec_collection_options_create(); + zvec_collection_t *collection = NULL; + error = zvec_collection_create_and_open(COLLECTION_DIR, schema, options, + &collection); + zvec_collection_options_destroy(options); + if (handle_error(error, "creating collection") != ZVEC_OK) { + goto cleanup_schema; + } + printf(" Collection created at %s\n", COLLECTION_DIR); + + /* ------------------------------------------------------------------ + * Step 3: Generate and insert documents + * ------------------------------------------------------------------ */ + printf("\n[Step 3] Inserting %d documents with %dD vectors...\n", + NUM_DOCS, VECTOR_DIM); + + /* Allocate vector storage */ + float (*vectors)[VECTOR_DIM] = + (float (*)[VECTOR_DIM])malloc(NUM_DOCS * VECTOR_DIM * sizeof(float)); + if (!vectors) { + fprintf(stderr, "Failed to allocate vector storage\n"); + goto cleanup_collection; + } + + /* Generate deterministic vector data */ + for (i = 0; i < NUM_DOCS; i++) { + for (int d = 0; d < VECTOR_DIM; d++) { + vectors[i][d] = (float)((i * VECTOR_DIM + d) % 1000) / 1000.0f; + } + } + + /* Insert in batches */ + int batch_size = 20; + size_t total_success = 0, total_error = 0; + + for (int batch_start = 0; batch_start < NUM_DOCS; + batch_start += batch_size) { + int count = batch_start + batch_size > NUM_DOCS + ? NUM_DOCS - batch_start + : batch_size; + + zvec_doc_t **docs = + (zvec_doc_t **)malloc((size_t)count * sizeof(zvec_doc_t *)); + for (i = 0; i < count; i++) { + int idx = batch_start + i; + docs[i] = zvec_doc_create(); + + char pk[32]; + snprintf(pk, sizeof(pk), "doc_%04d", idx); + zvec_doc_set_pk(docs[i], pk); + + zvec_doc_add_field_by_value(docs[i], "id", ZVEC_DATA_TYPE_STRING, pk, + strlen(pk)); + zvec_doc_add_field_by_value(docs[i], "embedding", + ZVEC_DATA_TYPE_VECTOR_FP32, vectors[idx], + VECTOR_DIM * sizeof(float)); + } + + size_t success_count = 0, error_count = 0; + error = zvec_collection_insert(collection, (const zvec_doc_t **)docs, + (size_t)count, &success_count, &error_count); + if (error != ZVEC_OK) { + handle_error(error, "inserting batch"); + } + total_success += success_count; + total_error += error_count; + + for (i = 0; i < count; i++) { + zvec_doc_destroy(docs[i]); + } + free(docs); + } + printf(" Inserted: %zu succeeded, %zu failed\n", total_success, + total_error); + + /* ------------------------------------------------------------------ + * Step 4: Flush to trigger index build (PQ training + graph construction) + * ------------------------------------------------------------------ */ + printf("\n[Step 4] Flushing collection (triggers DiskANN index build)...\n"); + + error = zvec_collection_flush(collection); + if (handle_error(error, "flushing collection") != ZVEC_OK) { + goto cleanup_vectors; + } + + zvec_collection_stats_t *stats = NULL; + error = zvec_collection_get_stats(collection, &stats); + if (error == ZVEC_OK && stats) { + printf(" Document count after flush: %llu\n", + (unsigned long long)zvec_collection_stats_get_doc_count(stats)); + zvec_collection_stats_destroy(stats); + } + + /* ------------------------------------------------------------------ + * Step 5: Search with DiskANN query parameters + * ------------------------------------------------------------------ */ + printf("\n[Step 5] Searching with DiskANN query parameters...\n"); + + /* Create DiskANN query params — list_size controls the search frontier + * (beam width). Larger values improve recall at the cost of latency. */ + zvec_diskann_query_params_t *da_qp = + zvec_query_params_diskann_create(200); + if (!da_qp) { + fprintf(stderr, "Failed to create DiskANN query params\n"); + goto cleanup_vectors; + } + printf(" DiskANN query params: list_size=%d\n", + zvec_query_params_diskann_get_list_size(da_qp)); + + /* Build the vector query */ + zvec_vector_query_t *query = zvec_vector_query_create(); + zvec_vector_query_set_field_name(query, "embedding"); + zvec_vector_query_set_query_vector(query, vectors[0], + VECTOR_DIM * sizeof(float)); + zvec_vector_query_set_topk(query, 10); + zvec_vector_query_set_include_vector(query, false); + zvec_vector_query_set_include_doc_id(query, true); + + /* Attach DiskANN query params (ownership transfers to query) */ + error = zvec_vector_query_set_diskann_params(query, da_qp); + if (handle_error(error, "setting DiskANN query params") != ZVEC_OK) { + zvec_vector_query_destroy(query); + goto cleanup_vectors; + } + /* da_qp is now owned by query — do NOT call diskann_destroy on it */ + + /* Execute the query */ + zvec_doc_t **results = NULL; + size_t result_count = 0; + error = zvec_collection_query(collection, + (const zvec_vector_query_t *)query, + &results, &result_count); + if (error != ZVEC_OK) { + handle_error(error, "executing DiskANN query"); + printf(" (This is expected on non-Linux platforms — DiskANN requires " + "libaio)\n"); + } else { + printf(" Query returned %zu results:\n", result_count); + for (size_t r = 0; r < result_count && r < 5; r++) { + const char *pk = zvec_doc_get_pk_copy(results[r]); + printf(" [%zu] pk=%s doc_id=%llu score=%.6f\n", r + 1, + pk ? pk : "NULL", + (unsigned long long)zvec_doc_get_doc_id(results[r]), + zvec_doc_get_score(results[r])); + if (pk) { + zvec_free((void *)pk); + } + } + if (result_count > 5) { + printf(" ... and %zu more\n", result_count - 5); + } + zvec_docs_free(results, result_count); + } + zvec_vector_query_destroy(query); + + /* ------------------------------------------------------------------ + * Step 6: Demonstrate list_size tuning (higher recall vs. lower latency) + * ------------------------------------------------------------------ */ + printf("\n[Step 6] Tuning list_size for recall/latency trade-off...\n"); + + int list_sizes[] = {50, 100, 300}; + for (int li = 0; li < 3; li++) { + zvec_diskann_query_params_t *tune_qp = + zvec_query_params_diskann_create(list_sizes[li]); + + zvec_vector_query_t *tune_query = zvec_vector_query_create(); + zvec_vector_query_set_field_name(tune_query, "embedding"); + zvec_vector_query_set_query_vector(tune_query, vectors[0], + VECTOR_DIM * sizeof(float)); + zvec_vector_query_set_topk(tune_query, 10); + zvec_vector_query_set_include_doc_id(tune_query, true); + zvec_vector_query_set_diskann_params(tune_query, tune_qp); + + zvec_doc_t **tune_results = NULL; + size_t tune_count = 0; + error = zvec_collection_query( + collection, (const zvec_vector_query_t *)tune_query, + &tune_results, &tune_count); + if (error == ZVEC_OK) { + printf(" list_size=%3d -> %zu results returned\n", + list_sizes[li], tune_count); + zvec_docs_free(tune_results, tune_count); + } else { + printf(" list_size=%3d -> query failed (expected on non-Linux)\n", + list_sizes[li]); + } + zvec_vector_query_destroy(tune_query); + } + + /* ------------------------------------------------------------------ + * Cleanup + * ------------------------------------------------------------------ */ +cleanup_vectors: + free(vectors); + +cleanup_collection: + zvec_collection_destroy(collection); + +cleanup_schema: + zvec_collection_schema_destroy(schema); + if (invert_params) { + zvec_index_params_destroy(invert_params); + } + if (diskann_params) { + zvec_index_params_destroy(diskann_params); + } + + printf("\n DiskANN index type string: %s\n", + zvec_index_type_to_string(ZVEC_INDEX_TYPE_DISKANN)); + printf("=== Example completed ===\n"); + return 0; +} diff --git a/src/binding/c/c_api.cc b/src/binding/c/c_api.cc index a81cc3864..e9c695f6b 100644 --- a/src/binding/c/c_api.cc +++ b/src/binding/c/c_api.cc @@ -1361,6 +1361,15 @@ zvec_index_params_t *zvec_index_params_create(zvec_index_type_t index_type) { new zvec::FlatIndexParams(zvec::MetricType::L2, // metric_type zvec::QuantizeType::UNDEFINED); break; + case ZVEC_INDEX_TYPE_DISKANN: + cpp_params = + new zvec::DiskAnnIndexParams( + zvec::MetricType::L2, // metric_type + 100, // max_degree (default) + 50, // list_size (default) + 0, // pq_chunk_num (default) + zvec::QuantizeType::UNDEFINED); + break; } // Return as opaque pointer (raw pointer) @@ -1607,6 +1616,104 @@ zvec_error_code_t zvec_index_params_get_vamana_params( return ZVEC_OK; } +/** + * @brief Set DiskANN-specific parameters + * @param params Index parameters (must be DiskANN type) + * @param max_degree Graph connectivity (max degree of Vamana graph) + * @param list_size Build-time list size + * @param pq_chunk_num PQ chunk count + * @return ZVEC_OK on success, error code on failure + */ +zvec_error_code_t zvec_index_params_set_diskann_params( + zvec_index_params_t *params, int max_degree, int list_size, + int pq_chunk_num) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + auto *cpp_params = reinterpret_cast(params); + auto *diskann_params = dynamic_cast(cpp_params); + if (!diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + diskann_params->set_max_degree(max_degree); + diskann_params->set_list_size(list_size); + diskann_params->set_pq_chunk_num(pq_chunk_num); + return ZVEC_OK; +} + +/** + * @brief Get DiskANN max_degree parameter + * @param params Index parameters (must be DiskANN type) + * @return max_degree parameter value, or 0 on error + */ +int zvec_index_params_get_diskann_max_degree( + const zvec_index_params_t *params) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return 0; + } + auto *cpp_params = reinterpret_cast(params); + auto *diskann_params = + dynamic_cast(cpp_params); + if (!diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return 0; + } + return diskann_params->max_degree(); +} + +/** + * @brief Get DiskANN list_size parameter + * @param params Index parameters (must be DiskANN type) + * @return list_size parameter value, or 0 on error + */ +int zvec_index_params_get_diskann_list_size( + const zvec_index_params_t *params) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return 0; + } + auto *cpp_params = reinterpret_cast(params); + auto *diskann_params = + dynamic_cast(cpp_params); + if (!diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return 0; + } + return diskann_params->list_size(); +} + +/** + * @brief Get DiskANN pq_chunk_num parameter + * @param params Index parameters (must be DiskANN type) + * @return pq_chunk_num parameter value, or 0 on error + */ +int zvec_index_params_get_diskann_pq_chunk_num( + const zvec_index_params_t *params) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return 0; + } + auto *cpp_params = reinterpret_cast(params); + auto *diskann_params = + dynamic_cast(cpp_params); + if (!diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Invalid params or not DiskANN index type"); + return 0; + } + return diskann_params->pq_chunk_num(); +} + /** * @brief Set IVF-specific parameters * @param params Index parameters (must be IVF type) @@ -2643,6 +2750,8 @@ const char *zvec_index_type_to_string(zvec_index_type_t index_type) { return "INVERT"; case ZVEC_INDEX_TYPE_FTS: return "FTS"; + case ZVEC_INDEX_TYPE_DISKANN: + return "DiskANN"; default: return "UNKNOWN_INDEX_TYPE"; } @@ -4794,6 +4903,100 @@ bool zvec_query_params_hnsw_get_is_using_refiner( return ptr->is_using_refiner(); } +// ============================================================================= +// DiskAnnQueryParams implementation - wrapper around zvec::DiskAnnQueryParams +// ============================================================================= + +zvec_diskann_query_params_t *zvec_query_params_diskann_create(int list_size) { + ZVEC_TRY_RETURN_NULL( + "Failed to create DiskAnnQueryParams", + auto *params = new zvec::DiskAnnQueryParams(list_size); + return reinterpret_cast(params);) + return nullptr; +} + +void zvec_query_params_diskann_destroy(zvec_diskann_query_params_t *params) { + if (params) { + delete reinterpret_cast(params); + } +} + +zvec_error_code_t zvec_query_params_diskann_set_list_size( + zvec_diskann_query_params_t *params, int list_size) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "DiskANN query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + auto *ptr = reinterpret_cast(params); + ptr->set_list_size(list_size); + return ZVEC_OK; +} + +int zvec_query_params_diskann_get_list_size( + const zvec_diskann_query_params_t *params) { + if (!params) return 300; // DiskAnnQueryParams default + auto *ptr = reinterpret_cast(params); + return ptr->list_size(); +} + +zvec_error_code_t zvec_query_params_diskann_set_radius( + zvec_diskann_query_params_t *params, float radius) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "DiskANN query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + auto *ptr = reinterpret_cast(params); + ptr->set_radius(radius); + return ZVEC_OK; +} + +float zvec_query_params_diskann_get_radius( + const zvec_diskann_query_params_t *params) { + if (!params) return 0.0f; + auto *ptr = reinterpret_cast(params); + return ptr->radius(); +} + +zvec_error_code_t zvec_query_params_diskann_set_is_linear( + zvec_diskann_query_params_t *params, bool is_linear) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "DiskANN query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + auto *ptr = reinterpret_cast(params); + ptr->set_is_linear(is_linear); + return ZVEC_OK; +} + +bool zvec_query_params_diskann_get_is_linear( + const zvec_diskann_query_params_t *params) { + if (!params) return false; + auto *ptr = reinterpret_cast(params); + return ptr->is_linear(); +} + +zvec_error_code_t zvec_query_params_diskann_set_is_using_refiner( + zvec_diskann_query_params_t *params, bool is_using_refiner) { + if (!params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "DiskANN query params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + auto *ptr = reinterpret_cast(params); + ptr->set_is_using_refiner(is_using_refiner); + return ZVEC_OK; +} + +bool zvec_query_params_diskann_get_is_using_refiner( + const zvec_diskann_query_params_t *params) { + if (!params) return false; + auto *ptr = reinterpret_cast(params); + return ptr->is_using_refiner(); +} + // ============================================================================= // IVFQueryParams implementation - wrapper around zvec::IVFQueryParams // ============================================================================= @@ -5411,6 +5614,23 @@ zvec_error_code_t zvec_vector_query_set_vamana_params( return ZVEC_OK; } +zvec_error_code_t zvec_vector_query_set_diskann_params( + zvec_vector_query_t *query, zvec_diskann_query_params_t *diskann_params) { + if (!query || !diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Query or DiskANN params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + auto *query_ptr = reinterpret_cast(query); + auto *params_ptr = + reinterpret_cast(diskann_params); + + query_ptr->target_.query_params_.reset(params_ptr); + + return ZVEC_OK; +} + // ============================================================================= // Fts payload implementation - wrapper around zvec::FtsClause (value type) // ============================================================================= @@ -5766,6 +5986,24 @@ zvec_error_code_t zvec_group_by_vector_query_set_vamana_params( return ZVEC_OK; } +zvec_error_code_t zvec_group_by_vector_query_set_diskann_params( + zvec_group_by_vector_query_t *query, + zvec_diskann_query_params_t *diskann_params) { + if (!query || !diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Query or DiskANN params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + + auto *query_ptr = reinterpret_cast(query); + auto *params_ptr = + reinterpret_cast(diskann_params); + + query_ptr->target_.query_params_.reset(params_ptr); + + return ZVEC_OK; +} + // ============================================================================= // Reranker Implementation // ============================================================================= @@ -6122,6 +6360,20 @@ zvec_error_code_t zvec_sub_query_set_vamana_params( return ZVEC_OK; } +zvec_error_code_t zvec_sub_query_set_diskann_params( + zvec_sub_query_t *query, zvec_diskann_query_params_t *diskann_params) { + if (!query || !diskann_params) { + SET_LAST_ERROR(ZVEC_ERROR_INVALID_ARGUMENT, + "Sub-vector query or DiskANN params pointer is null"); + return ZVEC_ERROR_INVALID_ARGUMENT; + } + auto *ptr = reinterpret_cast(query); + auto *params_ptr = + reinterpret_cast(diskann_params); + ptr->target_.query_params_.reset(params_ptr); + return ZVEC_OK; +} + // ============================================================================= // Index Interface Implementation // ============================================================================= diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index d02335cb3..e8d54199b 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -827,6 +827,7 @@ typedef uint32_t zvec_index_type_t; #define ZVEC_INDEX_TYPE_IVF 2 #define ZVEC_INDEX_TYPE_FLAT 3 #define ZVEC_INDEX_TYPE_VAMANA 6 +#define ZVEC_INDEX_TYPE_DISKANN 5 #define ZVEC_INDEX_TYPE_INVERT 10 #define ZVEC_INDEX_TYPE_FTS 11 @@ -1018,6 +1019,42 @@ ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_index_params_get_vamana_params( int *out_search_list_size, float *out_alpha, bool *out_saturate_graph, bool *out_use_contiguous_memory); +/** + * @brief Set DiskANN specific parameters + * @param params Index parameters (must be DiskANN type) + * @param max_degree Graph connectivity (max degree of Vamana graph) + * @param list_size Build-time list size (candidate list during construction) + * @param pq_chunk_num PQ chunk count (0 disables PQ) + * @return ZVEC_OK on success, error code on failure + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_index_params_set_diskann_params( + zvec_index_params_t *params, int max_degree, int list_size, + int pq_chunk_num); + +/** + * @brief Get DiskANN max_degree parameter + * @param params Index parameters (must not be NULL) + * @return max_degree parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_diskann_max_degree(const zvec_index_params_t *params); + +/** + * @brief Get DiskANN list_size parameter + * @param params Index parameters (must not be NULL) + * @return list_size parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_diskann_list_size(const zvec_index_params_t *params); + +/** + * @brief Get DiskANN pq_chunk_num parameter + * @param params Index parameters (must not be NULL) + * @return pq_chunk_num parameter + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_index_params_get_diskann_pq_chunk_num(const zvec_index_params_t *params); + /** * @brief Set IVF specific parameters * @param params Index parameters (must be IVF type) @@ -1144,6 +1181,16 @@ typedef struct zvec_fts_query_params_t zvec_fts_query_params_t; */ typedef struct zvec_vamana_query_params_t zvec_vamana_query_params_t; +/** + * @brief DiskANN query parameters handle (opaque pointer) + * + * Internally maps to zvec::DiskAnnQueryParams* (raw pointer). + * Created by zvec_query_params_diskann_create() and destroyed by + * zvec_query_params_diskann_destroy(). Caller owns the pointer and must + * explicitly destroy it. + */ +typedef struct zvec_diskann_query_params_t zvec_diskann_query_params_t; + // ============================================================================= // Query Structures (Opaque Pointer Pattern) @@ -1625,6 +1672,100 @@ zvec_query_params_vamana_set_is_using_refiner( ZVEC_EXPORT bool ZVEC_CALL zvec_query_params_vamana_get_is_using_refiner( const zvec_vamana_query_params_t *params); +// ----------------------------------------------------------------------------- +// zvec_diskann_query_params_t (DiskANN Query Parameters) +// ----------------------------------------------------------------------------- + +/** + * @brief Create DiskANN query parameters + * @param list_size Search frontier size (default: 300) + * @return zvec_diskann_query_params_t* Pointer to the newly created DiskANN + * query parameters + */ +ZVEC_EXPORT zvec_diskann_query_params_t *ZVEC_CALL +zvec_query_params_diskann_create(int list_size); + +/** + * @brief Destroy DiskANN query parameters + * @param params DiskANN query parameters pointer + */ +ZVEC_EXPORT void ZVEC_CALL +zvec_query_params_diskann_destroy(zvec_diskann_query_params_t *params); + +/** + * @brief Set search frontier size + * @param params DiskANN query parameters pointer + * @param list_size Search frontier size + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL +zvec_query_params_diskann_set_list_size(zvec_diskann_query_params_t *params, + int list_size); + +/** + * @brief Get search frontier size + * @param params DiskANN query parameters pointer + * @return int Search frontier size + */ +ZVEC_EXPORT int ZVEC_CALL +zvec_query_params_diskann_get_list_size( + const zvec_diskann_query_params_t *params); + +/** + * @brief Set search radius (common parameter from QueryParams base) + * @param params DiskANN query parameters pointer + * @param radius Search radius + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_query_params_diskann_set_radius( + zvec_diskann_query_params_t *params, float radius); + +/** + * @brief Get search radius (common parameter from QueryParams base) + * @param params DiskANN query parameters pointer + * @return float Search radius + */ +ZVEC_EXPORT float ZVEC_CALL +zvec_query_params_diskann_get_radius( + const zvec_diskann_query_params_t *params); + +/** + * @brief Set linear search mode (common parameter from QueryParams base) + * @param params DiskANN query parameters pointer + * @param is_linear Whether linear search + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL +zvec_query_params_diskann_set_is_linear( + zvec_diskann_query_params_t *params, bool is_linear); + +/** + * @brief Get linear search mode (common parameter from QueryParams base) + * @param params DiskANN query parameters pointer + * @return bool Whether linear search + */ +ZVEC_EXPORT bool ZVEC_CALL +zvec_query_params_diskann_get_is_linear( + const zvec_diskann_query_params_t *params); + +/** + * @brief Set whether to use refiner (common parameter from QueryParams base) + * @param params DiskANN query parameters pointer + * @param is_using_refiner Whether to use refiner + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL +zvec_query_params_diskann_set_is_using_refiner( + zvec_diskann_query_params_t *params, bool is_using_refiner); + +/** + * @brief Get whether to use refiner (common parameter from QueryParams base) + * @param params DiskANN query parameters pointer + * @return bool Whether to use refiner + */ +ZVEC_EXPORT bool ZVEC_CALL zvec_query_params_diskann_get_is_using_refiner( + const zvec_diskann_query_params_t *params); + // ----------------------------------------------------------------------------- // zvec_vector_query_t (Vector Query) // ----------------------------------------------------------------------------- @@ -1816,6 +1957,15 @@ ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_vector_query_set_fts_params( ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_vector_query_set_vamana_params( zvec_vector_query_t *query, zvec_vamana_query_params_t *vamana_params); +/** + * @brief Set DiskANN query parameters (takes ownership) + * @param query Vector query pointer + * @param diskann_params DiskANN query parameters pointer + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_vector_query_set_diskann_params( + zvec_vector_query_t *query, zvec_diskann_query_params_t *diskann_params); + // ----------------------------------------------------------------------------- // zvec_fts_t (FTS query payload) // ----------------------------------------------------------------------------- @@ -2099,6 +2249,17 @@ zvec_group_by_vector_query_set_vamana_params( zvec_group_by_vector_query_t *query, zvec_vamana_query_params_t *vamana_params); +/** + * @brief Set DiskANN query parameters (takes ownership) + * @param query Group by vector query pointer + * @param diskann_params DiskANN query parameters pointer + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL +zvec_group_by_vector_query_set_diskann_params( + zvec_group_by_vector_query_t *query, + zvec_diskann_query_params_t *diskann_params); + // ----------------------------------------------------------------------------- // Rerank Strategy (set on MultiQuery) // ----------------------------------------------------------------------------- @@ -2359,6 +2520,15 @@ ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_sub_query_set_flat_params( ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_sub_query_set_vamana_params( zvec_sub_query_t *query, zvec_vamana_query_params_t *vamana_params); +/** + * @brief Set DiskANN query parameters (takes ownership) + * @param query Sub-query pointer + * @param diskann_params DiskANN query parameters pointer + * @return zvec_error_code_t Error code + */ +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_sub_query_set_diskann_params( + zvec_sub_query_t *query, zvec_diskann_query_params_t *diskann_params); + // ============================================================================= // Collection Options and Statistics (Opaque Pointer Pattern) // ============================================================================= @@ -3824,6 +3994,31 @@ const char *zvec_metric_type_to_string(zvec_metric_type_t metric_type); .ivf.n_probe = (_nprobe) }) // clang-format on +/** + * @brief Simplified DiskANN index parameters initialization macro + * @param _metric Distance metric type + * @param _max_degree Graph connectivity (max degree) + * @param _list_size Build-time list size + * @param _pq_chunk_num PQ chunk count + * @param _quant Quantization type + * + * Usage example: + * @code + * zvec_index_params_t params = ZVEC_DISKANN_PARAMS( + * ZVEC_METRIC_TYPE_L2, 100, 50, 16, ZVEC_QUANTIZE_TYPE_UNDEFINED); + * @endcode + */ +// clang-format off +#define ZVEC_DISKANN_PARAMS(_metric, _max_degree, _list_size, _pq_chunk_num, _quant) \ + ((zvec_index_params_t){ \ + .index_type = ZVEC_INDEX_TYPE_DISKANN, \ + .metric_type = (_metric), \ + .quantize_type = (_quant), \ + .diskann.max_degree = (_max_degree), \ + .diskann.list_size = (_list_size), \ + .diskann.pq_chunk_num = (_pq_chunk_num) }) +// clang-format on + /** * @brief Simplified string initialization macro * @param str String content diff --git a/tests/c/c_api_test.c b/tests/c/c_api_test.c index 8670ff845..e62f469a9 100644 --- a/tests/c/c_api_test.c +++ b/tests/c/c_api_test.c @@ -3482,11 +3482,22 @@ void test_index_params_functions(void) { TEST_ASSERT(n_iters == 10); TEST_ASSERT(use_soar == false); // Default is false + // Test DiskANN index params + zvec_index_params_t *diskann_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(diskann_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(diskann_params) == + ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(zvec_index_params_get_diskann_max_degree(diskann_params) == 100); + TEST_ASSERT(zvec_index_params_get_diskann_list_size(diskann_params) == 50); + TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(diskann_params) == 0); + // Cleanup zvec_index_params_destroy(hnsw_params); zvec_index_params_destroy(invert_params); zvec_index_params_destroy(flat_params); zvec_index_params_destroy(ivf_params); + zvec_index_params_destroy(diskann_params); TEST_END(); } @@ -3556,11 +3567,27 @@ void test_index_params_api_functions(void) { TEST_ASSERT(zvec_index_params_get_metric_type(flat_params) == ZVEC_METRIC_TYPE_IP); + // Test zvec_index_params_create for DiskANN + zvec_index_params_t *diskann_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(diskann_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(diskann_params) == + ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(zvec_index_params_get_metric_type(diskann_params) == + ZVEC_METRIC_TYPE_L2); + + // Test zvec_index_params_set_diskann_params + zvec_index_params_set_diskann_params(diskann_params, 200, 100, 8); + TEST_ASSERT(zvec_index_params_get_diskann_max_degree(diskann_params) == 200); + TEST_ASSERT(zvec_index_params_get_diskann_list_size(diskann_params) == 100); + TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(diskann_params) == 8); + // Cleanup zvec_index_params_destroy(hnsw_params); zvec_index_params_destroy(ivf_params); zvec_index_params_destroy(invert_params); zvec_index_params_destroy(flat_params); + zvec_index_params_destroy(diskann_params); TEST_END(); } @@ -3628,6 +3655,11 @@ void test_query_params_functions(void) { zvec_query_params_flat_create(false, 2.0f); TEST_ASSERT(flat_params != NULL); + // Test DiskANN query parameters + zvec_diskann_query_params_t *diskann_params = + zvec_query_params_diskann_create(500); + TEST_ASSERT(diskann_params != NULL); + zvec_error_code_t err; // Test HNSW-specific parameters @@ -3724,17 +3756,44 @@ void test_query_params_functions(void) { TEST_ASSERT(zvec_query_params_vamana_get_is_using_refiner(vamana_params) == false); + // Test DiskANN-specific parameters + TEST_ASSERT(zvec_query_params_diskann_get_list_size(diskann_params) == 500); + err = zvec_query_params_diskann_set_list_size(diskann_params, 800); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_query_params_diskann_get_list_size(diskann_params) == 800); + + // Test DiskANN common parameters (radius, is_linear, is_using_refiner) + err = zvec_query_params_diskann_set_radius(diskann_params, 1.2f); + TEST_ASSERT(err == ZVEC_OK); + radius = zvec_query_params_diskann_get_radius(diskann_params); + TEST_ASSERT(radius == 1.2f); + + err = zvec_query_params_diskann_set_is_linear(diskann_params, true); + TEST_ASSERT(err == ZVEC_OK); + is_linear = zvec_query_params_diskann_get_is_linear(diskann_params); + TEST_ASSERT(is_linear == true); + + err = zvec_query_params_diskann_set_is_using_refiner(diskann_params, true); + TEST_ASSERT(err == ZVEC_OK); + is_using_refiner = + zvec_query_params_diskann_get_is_using_refiner(diskann_params); + TEST_ASSERT(is_using_refiner == true); + // Test destruction of valid parameters zvec_query_params_hnsw_destroy(hnsw_params); zvec_query_params_ivf_destroy(ivf_params); zvec_query_params_flat_destroy(flat_params); zvec_query_params_vamana_destroy(vamana_params); + zvec_query_params_diskann_destroy(diskann_params); + + // Test boundary cases - null pointer handling zvec_query_params_hnsw_destroy(NULL); zvec_query_params_ivf_destroy(NULL); zvec_query_params_flat_destroy(NULL); zvec_query_params_vamana_destroy(NULL); + zvec_query_params_diskann_destroy(NULL); // Test null pointer handling for setters err = zvec_query_params_hnsw_set_radius(NULL, 0.5f); @@ -3745,14 +3804,20 @@ void test_query_params_functions(void) { TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); err = zvec_query_params_vamana_set_ef_search(NULL, 100); TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_query_params_diskann_set_radius(NULL, 0.5f); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_query_params_diskann_set_list_size(NULL, 100); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); // Test default values for getters with NULL TEST_ASSERT(zvec_query_params_hnsw_get_radius(NULL) == 0.0f); TEST_ASSERT(zvec_query_params_ivf_get_radius(NULL) == 0.0f); TEST_ASSERT(zvec_query_params_flat_get_radius(NULL) == 0.0f); + TEST_ASSERT(zvec_query_params_diskann_get_radius(NULL) == 0.0f); TEST_ASSERT(zvec_query_params_hnsw_get_is_linear(NULL) == false); TEST_ASSERT(zvec_query_params_ivf_get_is_linear(NULL) == false); TEST_ASSERT(zvec_query_params_flat_get_is_linear(NULL) == false); + TEST_ASSERT(zvec_query_params_diskann_get_is_linear(NULL) == false); TEST_ASSERT(zvec_query_params_hnsw_get_is_using_refiner(NULL) == false); TEST_ASSERT(zvec_query_params_ivf_get_is_using_refiner(NULL) == false); TEST_ASSERT(zvec_query_params_flat_get_is_using_refiner(NULL) == false); @@ -3760,6 +3825,8 @@ void test_query_params_functions(void) { TEST_ASSERT(zvec_query_params_vamana_get_radius(NULL) == 0.0f); TEST_ASSERT(zvec_query_params_vamana_get_is_linear(NULL) == false); TEST_ASSERT(zvec_query_params_vamana_get_is_using_refiner(NULL) == false); + TEST_ASSERT(zvec_query_params_diskann_get_is_using_refiner(NULL) == false); + TEST_ASSERT(zvec_query_params_diskann_get_list_size(NULL) == 300); TEST_END(); } @@ -5046,12 +5113,25 @@ void test_index_params_creation_functions(void) { false); TEST_ASSERT(verr == ZVEC_ERROR_INVALID_ARGUMENT); + // Test DiskANN parameters using new API + zvec_index_params_t *diskann_params = + zvec_index_params_create(ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(diskann_params != NULL); + TEST_ASSERT(zvec_index_params_get_type(diskann_params) == + ZVEC_INDEX_TYPE_DISKANN); + zvec_index_params_set_metric_type(diskann_params, ZVEC_METRIC_TYPE_COSINE); + zvec_index_params_set_diskann_params(diskann_params, 64, 25, 4); + TEST_ASSERT(zvec_index_params_get_diskann_max_degree(diskann_params) == 64); + TEST_ASSERT(zvec_index_params_get_diskann_list_size(diskann_params) == 25); + TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(diskann_params) == 4); + // Cleanup zvec_index_params_destroy(hnsw_params); zvec_index_params_destroy(ivf_params); zvec_index_params_destroy(flat_params); zvec_index_params_destroy(invert_params); zvec_index_params_destroy(vamana_params); + zvec_index_params_destroy(diskann_params); TEST_END(); } @@ -5880,6 +5960,181 @@ void test_collection_schema_getters(void) { TEST_END(); } +// ============================================================================= +// DiskANN Tests +// ============================================================================= + +void test_diskann_index_params_functions(void) { + TEST_START(); + + // Create DiskANN index params with defaults + zvec_index_params_t *params = + zvec_index_params_create(ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(params != NULL); + TEST_ASSERT(zvec_index_params_get_type(params) == ZVEC_INDEX_TYPE_DISKANN); + + // Check defaults: max_degree=100, list_size=50, pq_chunk_num=0 + // (aligned with DiskAnnIndexParams constructor defaults) + TEST_ASSERT(zvec_index_params_get_diskann_max_degree(params) == 100); + TEST_ASSERT(zvec_index_params_get_diskann_list_size(params) == 50); + TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(params) == 0); + + // Default metric type is L2 + TEST_ASSERT(zvec_index_params_get_metric_type(params) == + ZVEC_METRIC_TYPE_L2); + + // Set and verify custom values + zvec_index_params_set_metric_type(params, ZVEC_METRIC_TYPE_COSINE); + TEST_ASSERT(zvec_index_params_get_metric_type(params) == + ZVEC_METRIC_TYPE_COSINE); + + zvec_error_code_t err = + zvec_index_params_set_diskann_params(params, 200, 100, 8); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_index_params_get_diskann_max_degree(params) == 200); + TEST_ASSERT(zvec_index_params_get_diskann_list_size(params) == 100); + TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(params) == 8); + + // Type-mismatch error path: HNSW params must not accept DiskANN setter + zvec_index_params_t *hnsw = + zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + TEST_ASSERT(hnsw != NULL); + err = zvec_index_params_set_diskann_params(hnsw, 100, 50, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + zvec_index_params_destroy(hnsw); + + // NULL pointer handling + err = zvec_index_params_set_diskann_params(NULL, 100, 50, 0); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + TEST_ASSERT(zvec_index_params_get_diskann_max_degree(NULL) == 0); + TEST_ASSERT(zvec_index_params_get_diskann_list_size(NULL) == 0); + TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(NULL) == 0); + + // to_string should report DiskANN + const char *type_str = zvec_index_type_to_string(ZVEC_INDEX_TYPE_DISKANN); + TEST_ASSERT(type_str != NULL && strcmp(type_str, "DiskANN") == 0); + + zvec_index_params_destroy(params); + TEST_END(); +} + +void test_diskann_query_params_functions(void) { + TEST_START(); + + // Create with default list_size + zvec_diskann_query_params_t *p_default = + zvec_query_params_diskann_create(300); + TEST_ASSERT(p_default != NULL); + TEST_ASSERT(zvec_query_params_diskann_get_list_size(p_default) == 300); + zvec_query_params_diskann_destroy(p_default); + + // Create with custom list_size + zvec_diskann_query_params_t *p = zvec_query_params_diskann_create(500); + TEST_ASSERT(p != NULL); + TEST_ASSERT(zvec_query_params_diskann_get_list_size(p) == 500); + + // Set/get list_size + zvec_error_code_t err = zvec_query_params_diskann_set_list_size(p, 1000); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_query_params_diskann_get_list_size(p) == 1000); + + // Common params: radius + err = zvec_query_params_diskann_set_radius(p, 1.5f); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_query_params_diskann_get_radius(p) == 1.5f); + + // Common params: is_linear + err = zvec_query_params_diskann_set_is_linear(p, true); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_query_params_diskann_get_is_linear(p) == true); + + // Common params: is_using_refiner + err = zvec_query_params_diskann_set_is_using_refiner(p, true); + TEST_ASSERT(err == ZVEC_OK); + TEST_ASSERT(zvec_query_params_diskann_get_is_using_refiner(p) == true); + + zvec_query_params_diskann_destroy(p); + + // NULL pointer handling: destroy + zvec_query_params_diskann_destroy(NULL); + + // NULL pointer handling: setters return error + err = zvec_query_params_diskann_set_list_size(NULL, 100); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_query_params_diskann_set_radius(NULL, 0.5f); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_query_params_diskann_set_is_linear(NULL, false); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + err = zvec_query_params_diskann_set_is_using_refiner(NULL, false); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + // NULL pointer handling: getters return safe defaults + TEST_ASSERT(zvec_query_params_diskann_get_list_size(NULL) == 300); + TEST_ASSERT(zvec_query_params_diskann_get_radius(NULL) == 0.0f); + TEST_ASSERT(zvec_query_params_diskann_get_is_linear(NULL) == false); + TEST_ASSERT(zvec_query_params_diskann_get_is_using_refiner(NULL) == false); + + TEST_END(); +} + +void test_diskann_wiring_on_vector_query(void) { + TEST_START(); + + zvec_error_code_t err; + + // Test wiring on zvec_vector_query_t + zvec_vector_query_t *vq = zvec_vector_query_create(); + TEST_ASSERT(vq != NULL); + + zvec_diskann_query_params_t *dp1 = zvec_query_params_diskann_create(400); + TEST_ASSERT(dp1 != NULL); + err = zvec_vector_query_set_diskann_params(vq, dp1); + TEST_ASSERT(err == ZVEC_OK); + + // NULL handling + zvec_diskann_query_params_t *dp_null = + zvec_query_params_diskann_create(100); + err = zvec_vector_query_set_diskann_params(NULL, dp_null); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + zvec_query_params_diskann_destroy(dp_null); + + err = zvec_vector_query_set_diskann_params(vq, NULL); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + zvec_vector_query_destroy(vq); + + // Test wiring on zvec_group_by_vector_query_t + zvec_group_by_vector_query_t *gbq = + zvec_group_by_vector_query_create(); + TEST_ASSERT(gbq != NULL); + + zvec_diskann_query_params_t *dp2 = zvec_query_params_diskann_create(200); + TEST_ASSERT(dp2 != NULL); + err = zvec_group_by_vector_query_set_diskann_params(gbq, dp2); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_group_by_vector_query_set_diskann_params(NULL, dp2); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + zvec_group_by_vector_query_destroy(gbq); + + // Test wiring on zvec_sub_query_t + zvec_sub_query_t *sq = zvec_sub_query_create(); + TEST_ASSERT(sq != NULL); + + zvec_diskann_query_params_t *dp3 = zvec_query_params_diskann_create(150); + TEST_ASSERT(dp3 != NULL); + err = zvec_sub_query_set_diskann_params(sq, dp3); + TEST_ASSERT(err == ZVEC_OK); + + err = zvec_sub_query_set_diskann_params(NULL, dp3); + TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); + + zvec_sub_query_destroy(sq); + + TEST_END(); +} + // ============================================================================= // Main function // ============================================================================= @@ -5966,6 +6221,11 @@ int main(void) { test_fts_wiring_on_vector_query(); test_fts_end_to_end(); + // DiskANN tests + test_diskann_index_params_functions(); + test_diskann_query_params_functions(); + test_diskann_wiring_on_vector_query(); + test_multi_vector_query_with_rrf_reranker(); test_multi_vector_query_with_weighted_reranker(); // Performance tests From eb8d05624f132a6af9532bdc57d5644754eaf159 Mon Sep 17 00:00:00 2001 From: lc285652 Date: Mon, 15 Jun 2026 15:52:06 +0800 Subject: [PATCH 2/2] style: fix clang-format violations in DiskANN additions Co-Authored-By: Claude --- examples/c/diskann_example.c | 57 +++++++++++++++++------------------- src/include/zvec/c_api.h | 17 ++++------- tests/c/c_api_test.c | 13 +++----- 3 files changed, 37 insertions(+), 50 deletions(-) diff --git a/examples/c/diskann_example.c b/examples/c/diskann_example.c index 02434a51c..235196d93 100644 --- a/examples/c/diskann_example.c +++ b/examples/c/diskann_example.c @@ -101,15 +101,16 @@ int main(void) { goto cleanup_schema; } zvec_index_params_set_metric_type(diskann_params, ZVEC_METRIC_TYPE_L2); - zvec_index_params_set_diskann_params(diskann_params, - 64, /* max_degree: graph connectivity */ - 100, /* list_size: build-time candidates */ - 8); /* pq_chunk_num: PQ chunks (0=auto) */ + zvec_index_params_set_diskann_params( + diskann_params, 64, /* max_degree: graph connectivity */ + 100, /* list_size: build-time candidates */ + 8); /* pq_chunk_num: PQ chunks (0=auto) */ - printf(" DiskANN index params: max_degree=%d, list_size=%d, pq_chunk_num=%d\n", - zvec_index_params_get_diskann_max_degree(diskann_params), - zvec_index_params_get_diskann_list_size(diskann_params), - zvec_index_params_get_diskann_pq_chunk_num(diskann_params)); + printf( + " DiskANN index params: max_degree=%d, list_size=%d, pq_chunk_num=%d\n", + zvec_index_params_get_diskann_max_degree(diskann_params), + zvec_index_params_get_diskann_list_size(diskann_params), + zvec_index_params_get_diskann_pq_chunk_num(diskann_params)); zvec_field_schema_t *embedding_field = zvec_field_schema_create( "embedding", ZVEC_DATA_TYPE_VECTOR_FP32, false, VECTOR_DIM); @@ -144,12 +145,12 @@ int main(void) { /* ------------------------------------------------------------------ * Step 3: Generate and insert documents * ------------------------------------------------------------------ */ - printf("\n[Step 3] Inserting %d documents with %dD vectors...\n", - NUM_DOCS, VECTOR_DIM); + printf("\n[Step 3] Inserting %d documents with %dD vectors...\n", NUM_DOCS, + VECTOR_DIM); /* Allocate vector storage */ - float (*vectors)[VECTOR_DIM] = - (float (*)[VECTOR_DIM])malloc(NUM_DOCS * VECTOR_DIM * sizeof(float)); + float(*vectors)[VECTOR_DIM] = + (float(*)[VECTOR_DIM])malloc(NUM_DOCS * VECTOR_DIM * sizeof(float)); if (!vectors) { fprintf(stderr, "Failed to allocate vector storage\n"); goto cleanup_collection; @@ -166,11 +167,9 @@ int main(void) { int batch_size = 20; size_t total_success = 0, total_error = 0; - for (int batch_start = 0; batch_start < NUM_DOCS; - batch_start += batch_size) { - int count = batch_start + batch_size > NUM_DOCS - ? NUM_DOCS - batch_start - : batch_size; + for (int batch_start = 0; batch_start < NUM_DOCS; batch_start += batch_size) { + int count = batch_start + batch_size > NUM_DOCS ? NUM_DOCS - batch_start + : batch_size; zvec_doc_t **docs = (zvec_doc_t **)malloc((size_t)count * sizeof(zvec_doc_t *)); @@ -203,8 +202,7 @@ int main(void) { } free(docs); } - printf(" Inserted: %zu succeeded, %zu failed\n", total_success, - total_error); + printf(" Inserted: %zu succeeded, %zu failed\n", total_success, total_error); /* ------------------------------------------------------------------ * Step 4: Flush to trigger index build (PQ training + graph construction) @@ -231,8 +229,7 @@ int main(void) { /* Create DiskANN query params — list_size controls the search frontier * (beam width). Larger values improve recall at the cost of latency. */ - zvec_diskann_query_params_t *da_qp = - zvec_query_params_diskann_create(200); + zvec_diskann_query_params_t *da_qp = zvec_query_params_diskann_create(200); if (!da_qp) { fprintf(stderr, "Failed to create DiskANN query params\n"); goto cleanup_vectors; @@ -260,13 +257,13 @@ int main(void) { /* Execute the query */ zvec_doc_t **results = NULL; size_t result_count = 0; - error = zvec_collection_query(collection, - (const zvec_vector_query_t *)query, + error = zvec_collection_query(collection, (const zvec_vector_query_t *)query, &results, &result_count); if (error != ZVEC_OK) { handle_error(error, "executing DiskANN query"); - printf(" (This is expected on non-Linux platforms — DiskANN requires " - "libaio)\n"); + printf( + " (This is expected on non-Linux platforms — DiskANN requires " + "libaio)\n"); } else { printf(" Query returned %zu results:\n", result_count); for (size_t r = 0; r < result_count && r < 5; r++) { @@ -306,12 +303,12 @@ int main(void) { zvec_doc_t **tune_results = NULL; size_t tune_count = 0; - error = zvec_collection_query( - collection, (const zvec_vector_query_t *)tune_query, - &tune_results, &tune_count); + error = zvec_collection_query(collection, + (const zvec_vector_query_t *)tune_query, + &tune_results, &tune_count); if (error == ZVEC_OK) { - printf(" list_size=%3d -> %zu results returned\n", - list_sizes[li], tune_count); + printf(" list_size=%3d -> %zu results returned\n", list_sizes[li], + tune_count); zvec_docs_free(tune_results, tune_count); } else { printf(" list_size=%3d -> query failed (expected on non-Linux)\n", diff --git a/src/include/zvec/c_api.h b/src/include/zvec/c_api.h index e8d54199b..09caf1b84 100644 --- a/src/include/zvec/c_api.h +++ b/src/include/zvec/c_api.h @@ -1698,17 +1698,15 @@ zvec_query_params_diskann_destroy(zvec_diskann_query_params_t *params); * @param list_size Search frontier size * @return zvec_error_code_t Error code */ -ZVEC_EXPORT zvec_error_code_t ZVEC_CALL -zvec_query_params_diskann_set_list_size(zvec_diskann_query_params_t *params, - int list_size); +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_query_params_diskann_set_list_size( + zvec_diskann_query_params_t *params, int list_size); /** * @brief Get search frontier size * @param params DiskANN query parameters pointer * @return int Search frontier size */ -ZVEC_EXPORT int ZVEC_CALL -zvec_query_params_diskann_get_list_size( +ZVEC_EXPORT int ZVEC_CALL zvec_query_params_diskann_get_list_size( const zvec_diskann_query_params_t *params); /** @@ -1726,8 +1724,7 @@ ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_query_params_diskann_set_radius( * @return float Search radius */ ZVEC_EXPORT float ZVEC_CALL -zvec_query_params_diskann_get_radius( - const zvec_diskann_query_params_t *params); +zvec_query_params_diskann_get_radius(const zvec_diskann_query_params_t *params); /** * @brief Set linear search mode (common parameter from QueryParams base) @@ -1735,8 +1732,7 @@ zvec_query_params_diskann_get_radius( * @param is_linear Whether linear search * @return zvec_error_code_t Error code */ -ZVEC_EXPORT zvec_error_code_t ZVEC_CALL -zvec_query_params_diskann_set_is_linear( +ZVEC_EXPORT zvec_error_code_t ZVEC_CALL zvec_query_params_diskann_set_is_linear( zvec_diskann_query_params_t *params, bool is_linear); /** @@ -1744,8 +1740,7 @@ zvec_query_params_diskann_set_is_linear( * @param params DiskANN query parameters pointer * @return bool Whether linear search */ -ZVEC_EXPORT bool ZVEC_CALL -zvec_query_params_diskann_get_is_linear( +ZVEC_EXPORT bool ZVEC_CALL zvec_query_params_diskann_get_is_linear( const zvec_diskann_query_params_t *params); /** diff --git a/tests/c/c_api_test.c b/tests/c/c_api_test.c index e62f469a9..ee25ddaa9 100644 --- a/tests/c/c_api_test.c +++ b/tests/c/c_api_test.c @@ -3787,7 +3787,6 @@ void test_query_params_functions(void) { zvec_query_params_diskann_destroy(diskann_params); - // Test boundary cases - null pointer handling zvec_query_params_hnsw_destroy(NULL); zvec_query_params_ivf_destroy(NULL); @@ -5980,8 +5979,7 @@ void test_diskann_index_params_functions(void) { TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(params) == 0); // Default metric type is L2 - TEST_ASSERT(zvec_index_params_get_metric_type(params) == - ZVEC_METRIC_TYPE_L2); + TEST_ASSERT(zvec_index_params_get_metric_type(params) == ZVEC_METRIC_TYPE_L2); // Set and verify custom values zvec_index_params_set_metric_type(params, ZVEC_METRIC_TYPE_COSINE); @@ -5996,8 +5994,7 @@ void test_diskann_index_params_functions(void) { TEST_ASSERT(zvec_index_params_get_diskann_pq_chunk_num(params) == 8); // Type-mismatch error path: HNSW params must not accept DiskANN setter - zvec_index_params_t *hnsw = - zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); + zvec_index_params_t *hnsw = zvec_index_params_create(ZVEC_INDEX_TYPE_HNSW); TEST_ASSERT(hnsw != NULL); err = zvec_index_params_set_diskann_params(hnsw, 100, 50, 0); TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); @@ -6092,8 +6089,7 @@ void test_diskann_wiring_on_vector_query(void) { TEST_ASSERT(err == ZVEC_OK); // NULL handling - zvec_diskann_query_params_t *dp_null = - zvec_query_params_diskann_create(100); + zvec_diskann_query_params_t *dp_null = zvec_query_params_diskann_create(100); err = zvec_vector_query_set_diskann_params(NULL, dp_null); TEST_ASSERT(err == ZVEC_ERROR_INVALID_ARGUMENT); zvec_query_params_diskann_destroy(dp_null); @@ -6104,8 +6100,7 @@ void test_diskann_wiring_on_vector_query(void) { zvec_vector_query_destroy(vq); // Test wiring on zvec_group_by_vector_query_t - zvec_group_by_vector_query_t *gbq = - zvec_group_by_vector_query_create(); + zvec_group_by_vector_query_t *gbq = zvec_group_by_vector_query_create(); TEST_ASSERT(gbq != NULL); zvec_diskann_query_params_t *dp2 = zvec_query_params_diskann_create(200);