diff --git a/README.md b/README.md index 8ef72ea..73b5a33 100644 --- a/README.md +++ b/README.md @@ -25,14 +25,14 @@ Endee is a Java client for a local vector database designed for maximum speed an io.endee endee-java-client - 1.0.0 + 0.1.1 ``` ### Gradle ```groovy -implementation 'io.endee:endee-java-client:1.0.0' +implementation 'io.endee:endee-java-client:0.1.1' ``` ## Quick Start diff --git a/src/main/java/io/endee/client/Index.java b/src/main/java/io/endee/client/Index.java index 4335be2..756c929 100644 --- a/src/main/java/io/endee/client/Index.java +++ b/src/main/java/io/endee/client/Index.java @@ -219,6 +219,12 @@ public List query(QueryOptions options) { if (options.getEf() > MAX_EF) { throw new IllegalArgumentException("ef search cannot be greater than " + MAX_EF); } + if (options.getPrefilterCardinalityThreshold() < 1_000 || options.getPrefilterCardinalityThreshold() > 1_000_000) { + throw new IllegalArgumentException("prefilterCardinalityThreshold must be between 1,000 and 1,000,000"); + } + if (options.getFilterBoostPercentage() < 0 || options.getFilterBoostPercentage() > 100) { + throw new IllegalArgumentException("filterBoostPercentage must be between 0 and 100"); + } boolean hasSparse = options.getSparseIndices() != null && options.getSparseIndices().length > 0 && options.getSparseValues() != null && options.getSparseValues().length > 0; @@ -258,6 +264,11 @@ public List query(QueryOptions options) { data.put("filter", JsonUtils.toJson(options.getFilter())); } + Map filterParams = new HashMap<>(); + filterParams.put("prefilter_cardinality_threshold", options.getPrefilterCardinalityThreshold()); + filterParams.put("filter_boost_percentage", options.getFilterBoostPercentage()); + data.put("filter_params", filterParams); + try { String jsonBody = JsonUtils.toJson(data); HttpRequest request = buildPostJsonRequest("/index/" + name + "/search", jsonBody); diff --git a/src/main/java/io/endee/client/types/QueryOptions.java b/src/main/java/io/endee/client/types/QueryOptions.java index 002d70d..b687fb1 100644 --- a/src/main/java/io/endee/client/types/QueryOptions.java +++ b/src/main/java/io/endee/client/types/QueryOptions.java @@ -19,6 +19,8 @@ * } */ public class QueryOptions { + private static final int DEFAULT_PREFILTER_CARDINALITY_THRESHOLD = 10_000; + private double[] vector; private int topK; private List> filter; @@ -26,6 +28,8 @@ public class QueryOptions { private boolean includeVectors = false; private int[] sparseIndices; private double[] sparseValues; + private int prefilterCardinalityThreshold = DEFAULT_PREFILTER_CARDINALITY_THRESHOLD; + private int filterBoostPercentage = 0; private QueryOptions() {} @@ -40,6 +44,8 @@ public static Builder builder() { public boolean isIncludeVectors() { return includeVectors; } public int[] getSparseIndices() { return sparseIndices; } public double[] getSparseValues() { return sparseValues; } + public int getPrefilterCardinalityThreshold() { return prefilterCardinalityThreshold; } + public int getFilterBoostPercentage() { return filterBoostPercentage; } public static class Builder { private final QueryOptions options = new QueryOptions(); @@ -86,6 +92,25 @@ public Builder sparseValues(double[] sparseValues) { return this; } + /** + * Sets the prefilter cardinality threshold. When the estimated number of + * matching vectors exceeds this value, postfiltering is used instead. + * Must be between 1,000 and 1,000,000. Default: 10,000. + */ + public Builder prefilterCardinalityThreshold(int prefilterCardinalityThreshold) { + options.prefilterCardinalityThreshold = prefilterCardinalityThreshold; + return this; + } + + /** + * Sets the filter boost percentage (0-100). Higher values bias results + * toward filter matches. Default: 0. + */ + public Builder filterBoostPercentage(int filterBoostPercentage) { + options.filterBoostPercentage = filterBoostPercentage; + return this; + } + public QueryOptions build() { return options; }