Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion python/zvec/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ from .model.param import (
DiskAnnIndexParam,
DiskAnnQueryParam,
FlatIndexParam,
FtsIndexParam,
FtsQueryParam,
HnswIndexParam,
HnswQueryParam,
HnswRabitqIndexParam,
Expand All @@ -31,7 +33,7 @@ from .model.param import (
VamanaIndexParam,
VamanaQueryParam,
)
from .model.param.query import Query, VectorQuery
from .model.param.query import Fts, Query, VectorQuery
from .model.schema import CollectionSchema, CollectionStats, FieldSchema, VectorSchema
from .tool import require_module
from .typing import (
Expand Down Expand Up @@ -60,6 +62,9 @@ __all__: list = [
"DocList",
"FieldSchema",
"FlatIndexParam",
"Fts",
"FtsIndexParam",
"FtsQueryParam",
"HnswIndexParam",
"HnswQueryParam",
"HnswRabitqIndexParam",
Expand Down
5 changes: 4 additions & 1 deletion python/zvec/model/collection.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
AlterColumnOption,
CollectionOption,
FlatIndexParam,
FtsIndexParam,
HnswIndexParam,
HnswRabitqIndexParam,
IndexOption,
Expand Down Expand Up @@ -112,17 +113,19 @@ def create_index(
IVFIndexParam,
FlatIndexParam,
InvertIndexParam,
FtsIndexParam,
],
option: IndexOption = IndexOption(),
) -> None:
"""Create an index on a field.

Vector index types (HNSW, IVF, FLAT) can only be applied to vector fields.
Inverted index (`InvertIndexParam`) is for scalar fields.
FTS index (`FtsIndexParam`) is for full-text search on STRING fields.

Args:
field_name (str): Name of the field to index.
index_param (Union[HnswIndexParam, HnswRabitqIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam]):
index_param (Union[HnswIndexParam, HnswRabitqIndexParam, IVFIndexParam, FlatIndexParam, InvertIndexParam, FtsIndexParam]):
Index configuration.
option (Optional[IndexOption], optional): Index creation options.
Defaults to ``IndexOption()``.
Expand Down
103 changes: 103 additions & 0 deletions python/zvec/model/param/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ __all__: list[str] = [
"AlterColumnOption",
"CollectionOption",
"FlatIndexParam",
"FtsIndexParam",
"FtsQueryParam",
"HnswIndexParam",
"HnswQueryParam",
"HnswRabitqIndexParam",
Expand Down Expand Up @@ -677,6 +679,107 @@ class VamanaQueryParam(QueryParam):
def prefetch_lines(self) -> int:
"""int: Override of prefetch cache lines per vector (0=auto)."""

class FtsIndexParam(IndexParam):
"""

Parameters for configuring a full-text search (FTS) index.

Controls the tokenizer pipeline used during indexing and querying.

Attributes:
type (IndexType): Always ``IndexType.FTS``.
tokenizer_name (str): Name of the tokenizer (e.g., "standard", "jieba").
Default is "standard".
filters (list[str]): List of token filter names applied after tokenization.
Default is ["lowercase"].
extra_params (str): Additional parameters passed to the tokenizer.
Default is "".

Examples:
>>> params = FtsIndexParam(tokenizer_name="jieba", filters=["lowercase"])
>>> print(params.tokenizer_name)
jieba
"""

def __getstate__(self) -> tuple: ...
def __init__(
self,
tokenizer_name: str = "standard",
filters: list[str] = ...,
extra_params: str = "",
) -> None:
"""
Constructs an FtsIndexParam instance.

Args:
tokenizer_name (str, optional): Tokenizer name. Defaults to "standard".
filters (list[str], optional): Token filter names. Defaults to ["lowercase"].
extra_params (str, optional): Extra tokenizer parameters. Defaults to "".
"""

def __repr__(self) -> str: ...
def __setstate__(self, arg0: tuple) -> None: ...
def to_dict(self) -> dict:
"""
Convert to dictionary with all fields
"""

@property
def tokenizer_name(self) -> str:
"""
str: Name of the tokenizer.
"""

@property
def filters(self) -> list[str]:
"""
list[str]: Token filter names.
"""

@property
def extra_params(self) -> str:
"""
str: Additional tokenizer parameters.
"""

class FtsQueryParam(QueryParam):
"""

Query parameters for full-text search (FTS) index.

Controls the default boolean operator used to combine adjacent bare terms
in a query string.

Attributes:
type (IndexType): Always ``IndexType.FTS``.
default_operator (str): Default boolean operator for adjacent bare terms.
Supported values (case-insensitive): "OR" (default), "AND".

Examples:
>>> params = FtsQueryParam(default_operator="AND")
>>> print(params.default_operator)
AND
"""
def __getstate__(self) -> tuple: ...
def __init__(
self,
default_operator: str = "",
) -> None:
"""
Constructs an FtsQueryParam instance.

Args:
default_operator (str, optional): Default boolean operator for adjacent
bare terms. Supported: "OR", "AND". Defaults to "" (uses engine default).
"""
def __repr__(self) -> str: ...
def __setstate__(self, arg0: tuple) -> None: ...
@property
def default_operator(self) -> str:
"""
str: Default boolean operator for bare terms.
"""

class IndexOption:
"""

Expand Down
22 changes: 14 additions & 8 deletions python/zvec/model/param/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from typing import Optional, Union

from ...common import VectorType
from . import HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam
from . import FtsQueryParam, HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam

__all__ = ["Fts", "Query", "VectorQuery"]

Expand Down Expand Up @@ -53,8 +53,8 @@ class Query:
field_name (str): Name of the field to query.
id (Optional[str], optional): Document ID to fetch vector from. Default is None.
vector (VectorType, optional): Explicit query vector. Default is None.
param (Optional[Union[HnswQueryParam, IVFQueryParam]], optional):
Index-specific query parameters for vector search. Default is None.
param (Optional[Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam, FtsQueryParam]], optional):
Index-specific query parameters. Default is None.
fts (Optional[Fts], optional): Full-text search parameters. Default is None.

Examples:
Expand All @@ -72,12 +72,20 @@ class Query:
... field_name="content",
... fts=Fts(match_string="machine learning")
... )
>>> # FTS query with custom operator
>>> q4 = zvec.Query(
... field_name="content",
... fts=Fts(match_string="machine learning"),
... param=FtsQueryParam(default_operator="AND")
... )
"""

field_name: str
id: Optional[str] = None
vector: VectorType = None
param: Optional[Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam]] = None
param: Optional[
Union[HnswQueryParam, HnswRabitqQueryParam, IVFQueryParam, FtsQueryParam]
] = None
fts: Optional[Fts] = None

def has_id(self) -> bool:
Expand Down Expand Up @@ -111,11 +119,9 @@ def _validate(self) -> None:
raise ValueError("Field name cannot be empty")
if self.id and self.vector:
raise ValueError("Cannot provide both id and vector")
if self.has_fts() and (
self.has_vector() or self.has_id() or self.param is not None
):
if self.has_fts() and (self.has_vector() or self.has_id()):
raise ValueError(
"Cannot combine fts with vector search fields (id/vector/param) in a single Query"
"Cannot combine fts with vector search fields (id/vector) in a single Query"
)
if self.fts is not None and self.fts.query_string and self.fts.match_string:
raise ValueError(
Expand Down
22 changes: 15 additions & 7 deletions python/zvec/model/schema/field_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@

from zvec.model.param import (
FlatIndexParam,
FtsIndexParam,
HnswIndexParam,
HnswRabitqIndexParam,
InvertIndexParam,
Expand Down Expand Up @@ -72,27 +73,34 @@ class FieldSchema:
data_type (DataType): Data type of the field (e.g., INT64, STRING).
nullable (bool, optional): Whether the field can contain null values.
Defaults to False.
index_param (Optional[InvertIndexParam], optional): Inverted index
parameters for this field. Only applicable to fields that support
indexing (e.g., scalar fields used in filtering). Defaults to None.
index_param (Optional[Union[InvertIndexParam, FtsIndexParam]], optional):
Index parameters for this field. Use ``InvertIndexParam`` for scalar
inverted indexing, or ``FtsIndexParam`` for full-text search indexing
on STRING fields. Defaults to None.

Examples:
>>> from zvec.typing import DataType
>>> from zvec.model.param import InvertIndexParam
>>> from zvec.model.param import InvertIndexParam, FtsIndexParam
>>> id_field = FieldSchema(
... name="id",
... data_type=DataType.INT64,
... nullable=False,
... index_param=InvertIndexParam(enable_range_optimization=True)
... )
>>> content_field = FieldSchema(
... name="content",
... data_type=DataType.STRING,
... nullable=False,
... index_param=FtsIndexParam(tokenizer_name="standard")
... )
"""

def __init__(
self,
name: str,
data_type: DataType,
nullable: bool = False,
index_param: Optional[InvertIndexParam] = None,
index_param: Optional[Union[InvertIndexParam, FtsIndexParam]] = None,
):
if name is None or not isinstance(name, str):
raise ValueError(
Expand Down Expand Up @@ -141,8 +149,8 @@ def nullable(self) -> bool:
return self._cpp_obj.nullable

@property
def index_param(self) -> Optional[InvertIndexParam]:
"""Optional[InvertIndexParam]: Inverted index configuration, if any."""
def index_param(self) -> Optional[Union[InvertIndexParam, FtsIndexParam]]:
"""Optional[Union[InvertIndexParam, FtsIndexParam]]: Index configuration, if any."""
return self._cpp_obj.index_param

def __dict__(self) -> dict[str, Any]:
Expand Down
5 changes: 5 additions & 0 deletions src/db/sqlengine/sqlengine_impl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,11 @@ Result<FtsCondInfo::Ptr> SQLEngineImpl::parse_fts_query(
}

auto *fts_query_param = dynamic_cast<FtsQueryParams *>(query_params.get());
if (query_params && !fts_query_param) {
return tl::make_unexpected(Status::InvalidArgument(
"FTS query only accepts FtsQueryParam, got incompatible query param "
"type"));
}

// Determine default operator once, shared by both query_string and
// match_string paths. Accept "and"/"or" case-insensitively, empty means OR;
Expand Down
Loading