From 4cfe76eb5ceec143b3acdf719eeb21f7c30443a1 Mon Sep 17 00:00:00 2001 From: "jiliang.ljl" Date: Mon, 15 Jun 2026 21:55:27 +0800 Subject: [PATCH] fix(python): add missing FTS type annotations to stubs and wrappers - Add FtsIndexParam/FtsQueryParam class stubs to model/param/__init__.pyi - Add Fts/FtsIndexParam/FtsQueryParam to zvec/__init__.pyi exports - Extend Query.param type to include FtsQueryParam - Update Query._validate to allow fts + FtsQueryParam combination - Extend FieldSchema.index_param type to support FtsIndexParam - Extend Collection.create_index type to support FtsIndexParam --- python/zvec/__init__.pyi | 7 +- python/zvec/model/collection.py | 5 +- python/zvec/model/param/__init__.pyi | 103 +++++++++++++++++++++++ python/zvec/model/param/query.py | 22 +++-- python/zvec/model/schema/field_schema.py | 22 +++-- src/db/sqlengine/sqlengine_impl.cc | 5 ++ 6 files changed, 147 insertions(+), 17 deletions(-) diff --git a/python/zvec/__init__.pyi b/python/zvec/__init__.pyi index dd468cae1..cefa15b01 100644 --- a/python/zvec/__init__.pyi +++ b/python/zvec/__init__.pyi @@ -19,6 +19,8 @@ from .model.param import ( DiskAnnIndexParam, DiskAnnQueryParam, FlatIndexParam, + FtsIndexParam, + FtsQueryParam, HnswIndexParam, HnswQueryParam, HnswRabitqIndexParam, @@ -31,7 +33,7 @@ from .model.param import ( VamanaIndexParam, VamanaQueryParam, ) -from .model.param.query import Query, VectorQuery +from .model.param.query import Fts, Query, VectorQuery from .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema from .tool import require_module from .typing import ( @@ -60,6 +62,9 @@ __all__: list = [ "DocList", "FieldSchema", "FlatIndexParam", + "Fts", + "FtsIndexParam", + "FtsQueryParam", "HnswIndexParam", "HnswQueryParam", "HnswRabitqIndexParam", diff --git a/python/zvec/model/collection.py b/python/zvec/model/collection.py index de16753a6..3d1ae3834 100644 --- a/python/zvec/model/collection.py +++ b/python/zvec/model/collection.py @@ -28,6 +28,7 @@ AlterColumnOption, CollectionOption, FlatIndexParam, + FtsIndexParam, HnswIndexParam, HnswRabitqIndexParam, IndexOption, @@ -112,6 +113,7 @@ def create_index( IVFIndexParam, FlatIndexParam, InvertIndexParam, + FtsIndexParam, ], option: IndexOption = IndexOption(), ) -> None: @@ -119,10 +121,11 @@ def create_index( Vector index types (HNSW, IVF, FLAT) can only be applied to vector fields. Inverted index (`InvertIndexParam`) is for scalar fields. + FTS index (`FtsIndexParam`) is for full-text search on STRING fields. Args: field_name (str): Name of the field to index. - index_param (Union[HnswIndexParam, HnswRabitqIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam]): + index_param (Union[HnswIndexParam, HnswRabitqIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam, FtsIndexParam]): Index configuration. option (Optional[IndexOption], optional): Index creation options. Defaults to ``IndexOption()``. diff --git a/python/zvec/model/param/__init__.pyi b/python/zvec/model/param/__init__.pyi index 759b41348..c1d227280 100644 --- a/python/zvec/model/param/__init__.pyi +++ b/python/zvec/model/param/__init__.pyi @@ -14,6 +14,8 @@ __all__: list[str] = [ "AlterColumnOption", "CollectionOption", "FlatIndexParam", + "FtsIndexParam", + "FtsQueryParam", "HnswIndexParam", "HnswQueryParam", "HnswRabitqIndexParam", @@ -677,6 +679,107 @@ class VamanaQueryParam(QueryParam): def prefetch_lines(self) -> int: """int: Override of prefetch cache lines per vector (0=auto).""" +class FtsIndexParam(IndexParam): + """ + + Parameters for configuring a full-text search (FTS) index. + + Controls the tokenizer pipeline used during indexing and querying. + + Attributes: + type (IndexType): Always ``IndexType.FTS``. + tokenizer_name (str): Name of the tokenizer (e.g., "standard", "jieba"). + Default is "standard". + filters (list[str]): List of token filter names applied after tokenization. + Default is ["lowercase"]. + extra_params (str): Additional parameters passed to the tokenizer. + Default is "". + + Examples: + >>> params = FtsIndexParam(tokenizer_name="jieba", filters=["lowercase"]) + >>> print(params.tokenizer_name) + jieba + """ + + def __getstate__(self) -> tuple: ... + def __init__( + self, + tokenizer_name: str = "standard", + filters: list[str] = ..., + extra_params: str = "", + ) -> None: + """ + Constructs an FtsIndexParam instance. + + Args: + tokenizer_name (str, optional): Tokenizer name. Defaults to "standard". + filters (list[str], optional): Token filter names. Defaults to ["lowercase"]. + extra_params (str, optional): Extra tokenizer parameters. Defaults to "". + """ + + def __repr__(self) -> str: ... + def __setstate__(self, arg0: tuple) -> None: ... + def to_dict(self) -> dict: + """ + Convert to dictionary with all fields + """ + + @property + def tokenizer_name(self) -> str: + """ + str: Name of the tokenizer. + """ + + @property + def filters(self) -> list[str]: + """ + list[str]: Token filter names. + """ + + @property + def extra_params(self) -> str: + """ + str: Additional tokenizer parameters. + """ + +class FtsQueryParam(QueryParam): + """ + + Query parameters for full-text search (FTS) index. + + Controls the default boolean operator used to combine adjacent bare terms + in a query string. + + Attributes: + type (IndexType): Always ``IndexType.FTS``. + default_operator (str): Default boolean operator for adjacent bare terms. + Supported values (case-insensitive): "OR" (default), "AND". + + Examples: + >>> params = FtsQueryParam(default_operator="AND") + >>> print(params.default_operator) + AND + """ + def __getstate__(self) -> tuple: ... + def __init__( + self, + default_operator: str = "", + ) -> None: + """ + Constructs an FtsQueryParam instance. + + Args: + default_operator (str, optional): Default boolean operator for adjacent + bare terms. Supported: "OR", "AND". Defaults to "" (uses engine default). + """ + def __repr__(self) -> str: ... + def __setstate__(self, arg0: tuple) -> None: ... + @property + def default_operator(self) -> str: + """ + str: Default boolean operator for bare terms. + """ + class IndexOption: """ diff --git a/python/zvec/model/param/query.py b/python/zvec/model/param/query.py index f2c15ecd2..17b63d6d2 100644 --- a/python/zvec/model/param/query.py +++ b/python/zvec/model/param/query.py @@ -18,7 +18,7 @@ from typing import Optional, Union from ...common import VectorType -from . import HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam +from . import FtsQueryParam, HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam __all__ = ["Fts", "Query", "VectorQuery"] @@ -53,8 +53,8 @@ class Query: field_name (str): Name of the field to query. id (Optional[str], optional): Document ID to fetch vector from. Default is None. vector (VectorType, optional): Explicit query vector. Default is None. - param (Optional[Union[HnswQueryParam, IVFQueryParam]], optional): - Index-specific query parameters for vector search. Default is None. + param (Optional[Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam, FtsQueryParam]], optional): + Index-specific query parameters. Default is None. fts (Optional[Fts], optional): Full-text search parameters. Default is None. Examples: @@ -72,12 +72,20 @@ class Query: ... field_name="content", ... fts=Fts(match_string="machine learning") ... ) + >>> # FTS query with custom operator + >>> q4 = zvec.Query( + ... field_name="content", + ... fts=Fts(match_string="machine learning"), + ... param=FtsQueryParam(default_operator="AND") + ... ) """ field_name: str id: Optional[str] = None vector: VectorType = None - param: Optional[Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam]] = None + param: Optional[ + Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam, FtsQueryParam] + ] = None fts: Optional[Fts] = None def has_id(self) -> bool: @@ -111,11 +119,9 @@ def _validate(self) -> None: raise ValueError("Field name cannot be empty") if self.id and self.vector: raise ValueError("Cannot provide both id and vector") - if self.has_fts() and ( - self.has_vector() or self.has_id() or self.param is not None - ): + if self.has_fts() and (self.has_vector() or self.has_id()): raise ValueError( - "Cannot combine fts with vector search fields (id/vector/param) in a single Query" + "Cannot combine fts with vector search fields (id/vector) in a single Query" ) if self.fts is not None and self.fts.query_string and self.fts.match_string: raise ValueError( diff --git a/python/zvec/model/schema/field_schema.py b/python/zvec/model/schema/field_schema.py index 1af00b5da..ff1099710 100644 --- a/python/zvec/model/schema/field_schema.py +++ b/python/zvec/model/schema/field_schema.py @@ -20,6 +20,7 @@ from zvec.model.param import ( FlatIndexParam, + FtsIndexParam, HnswIndexParam, HnswRabitqIndexParam, InvertIndexParam, @@ -72,19 +73,26 @@ class FieldSchema: data_type (DataType): Data type of the field (e.g., INT64, STRING). nullable (bool, optional): Whether the field can contain null values. Defaults to False. - index_param (Optional[InvertIndexParam], optional): Inverted index - parameters for this field. Only applicable to fields that support - indexing (e.g., scalar fields used in filtering). Defaults to None. + index_param (Optional[Union[InvertIndexParam, FtsIndexParam]], optional): + Index parameters for this field. Use ``InvertIndexParam`` for scalar + inverted indexing, or ``FtsIndexParam`` for full-text search indexing + on STRING fields. Defaults to None. Examples: >>> from zvec.typing import DataType - >>> from zvec.model.param import InvertIndexParam + >>> from zvec.model.param import InvertIndexParam, FtsIndexParam >>> id_field = FieldSchema( ... name="id", ... data_type=DataType.INT64, ... nullable=False, ... index_param=InvertIndexParam(enable_range_optimization=True) ... ) + >>> content_field = FieldSchema( + ... name="content", + ... data_type=DataType.STRING, + ... nullable=False, + ... index_param=FtsIndexParam(tokenizer_name="standard") + ... ) """ def __init__( @@ -92,7 +100,7 @@ def __init__( name: str, data_type: DataType, nullable: bool = False, - index_param: Optional[InvertIndexParam] = None, + index_param: Optional[Union[InvertIndexParam, FtsIndexParam]] = None, ): if name is None or not isinstance(name, str): raise ValueError( @@ -141,8 +149,8 @@ def nullable(self) -> bool: return self._cpp_obj.nullable @property - def index_param(self) -> Optional[InvertIndexParam]: - """Optional[InvertIndexParam]: Inverted index configuration, if any.""" + def index_param(self) -> Optional[Union[InvertIndexParam, FtsIndexParam]]: + """Optional[Union[InvertIndexParam, FtsIndexParam]]: Index configuration, if any.""" return self._cpp_obj.index_param def __dict__(self) -> dict[str, Any]: diff --git a/src/db/sqlengine/sqlengine_impl.cc b/src/db/sqlengine/sqlengine_impl.cc index 69d8c7fe7..c0489ff35 100644 --- a/src/db/sqlengine/sqlengine_impl.cc +++ b/src/db/sqlengine/sqlengine_impl.cc @@ -171,6 +171,11 @@ Result SQLEngineImpl::parse_fts_query( } auto *fts_query_param = dynamic_cast(query_params.get()); + if (query_params && !fts_query_param) { + return tl::make_unexpected(Status::InvalidArgument( + "FTS query only accepts FtsQueryParam, got incompatible query param " + "type")); + } // Determine default operator once, shared by both query_string and // match_string paths. Accept "and"/"or" case-insensitively, empty means OR;