diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md new file mode 100644 index 000000000..469975ad8 --- /dev/null +++ b/.changeset/index-optimization-partial-and-or.md @@ -0,0 +1,12 @@ +--- +'@tanstack/db': patch +--- + +Fix incorrect results from index-optimized `where` clauses that combine indexed and non-indexed conditions. + +- `OR` expressions are now only served from indexes when every disjunct can use an index; otherwise the query falls back to a full scan. Previously, rows matched only by a non-indexed disjunct were missing from the result. +- `AND` expressions still use indexes for the conditions that have them, but the remaining conditions are now enforced by re-checking each candidate row against the full expression. Previously, non-indexed conditions were silently dropped, returning rows that did not match the query. +- Compound range conditions (e.g. `age > 5 AND age < 10`) combined with conditions on other fields no longer ignore those other conditions. +- Compound range conditions sharing the same boundary value (e.g. `age >= 5 AND age > 5`) now apply the strictest bound regardless of the order the conditions appear in, using the same value comparison semantics as the indexes (dates, locale strings, ...). +- Compound range conditions that only bound one side (e.g. `age > 5 AND age >= 8`) no longer return an empty result. +- Strict range comparisons (`gt`/`lt`) on BTree-indexed fields holding normalized values such as dates now correctly exclude the boundary value. diff --git a/packages/db/src/collection/change-events.ts b/packages/db/src/collection/change-events.ts index 6afac412c..3f4977b7b 100644 --- a/packages/db/src/collection/change-events.ts +++ b/packages/db/src/collection/change-events.ts @@ -138,11 +138,16 @@ export function currentStateAsChanges< ) if (optimizationResult.canOptimize) { - // Use index optimization + // Use index optimization. When the index lookup is inexact, the keys + // are a superset of the true result (some conditions could not be + // served by an index), so re-check each row against the full expression. + const filterFn = optimizationResult.isExact + ? undefined + : createFilterFunctionFromExpression(expression) const result: Array, TKey>> = [] for (const key of optimizationResult.matchingKeys) { const value = collection.get(key) - if (value !== undefined) { + if (value !== undefined && (filterFn?.(value) ?? true)) { result.push({ type: `insert`, key, diff --git a/packages/db/src/indexes/btree-index.ts b/packages/db/src/indexes/btree-index.ts index 17608950a..1d04d928d 100644 --- a/packages/db/src/indexes/btree-index.ts +++ b/packages/db/src/indexes/btree-index.ts @@ -247,7 +247,10 @@ export class BTreeIndex< toKey, toInclusive, (indexedValue, _) => { - if (!fromInclusive && this.compareFn(indexedValue, from) === 0) { + // Compare against the normalized key: indexed values are stored + // normalized (e.g. dates as timestamps), the raw `from` would + // never compare equal to them + if (!fromInclusive && this.compareFn(indexedValue, fromKey) === 0) { // the B+ tree `forRange` method does not support exclusive lower bounds // so we need to exclude it manually return diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 81b111af5..7d88ddc1e 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -18,6 +18,7 @@ import { DEFAULT_COMPARE_OPTIONS } from '../utils.js' import { ReverseIndex } from '../indexes/reverse-index.js' import { hasVirtualPropPath } from '../virtual-props.js' +import { makeComparator } from './comparison.js' import type { CompareOptions } from '../query/builder/types.js' import type { IndexInterface, IndexOperation } from '../indexes/base-index.js' import type { BasicExpression } from '../query/ir.js' @@ -29,6 +30,13 @@ import type { CollectionLike } from '../types.js' export interface OptimizationResult { canOptimize: boolean matchingKeys: Set + /** + * Whether `matchingKeys` is exactly the set of keys matching the expression. + * When `false`, the keys are a superset of the true result (some conditions + * could not be served by an index) and each row must be re-checked against + * the full expression before being included in the result. + */ + isExact: boolean } /** @@ -134,7 +142,7 @@ function optimizeQueryRecursive( } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -167,6 +175,14 @@ export function canOptimizeExpression< return false } +/** + * Result of compound range optimization, including which AND arguments + * were covered by the range query so the caller can process the rest. + */ +interface CompoundRangeResult extends OptimizationResult { + coveredArgIndices: Set +} + /** * Optimizes compound range queries on the same field * Example: WHERE age > 5 AND age < 10 @@ -177,9 +193,14 @@ function optimizeCompoundRangeQuery< >( expression: BasicExpression, collection: CollectionLike, -): OptimizationResult { +): CompoundRangeResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { + canOptimize: false, + matchingKeys: new Set(), + isExact: false, + coveredArgIndices: new Set(), + } } // Group range operations by field @@ -188,11 +209,12 @@ function optimizeCompoundRangeQuery< Array<{ operation: `gt` | `gte` | `lt` | `lte` value: any + argIndex: number }> >() // Collect all range operations from AND arguments - for (const arg of expression.args) { + for (const [argIndex, arg] of expression.args.entries()) { if (arg.type === `func` && [`gt`, `gte`, `lt`, `lte`].includes(arg.name)) { const rangeOp = arg as any if (rangeOp.args.length === 2) { @@ -238,7 +260,7 @@ function optimizeCompoundRangeQuery< if (!fieldOperations.has(fieldKey)) { fieldOperations.set(fieldKey, []) } - fieldOperations.get(fieldKey)!.push({ operation, value }) + fieldOperations.get(fieldKey)!.push({ operation, value, argIndex }) } } } @@ -251,7 +273,18 @@ function optimizeCompoundRangeQuery< const index = findIndexForField(collection, fieldPath) if (index && index.supports(`gt`) && index.supports(`lt`)) { - // Build range query options + // Compare values with the same semantics the index uses (dates, + // locale strings, ...), in ascending order since bounds are about + // value order regardless of the index direction + const compare = makeComparator({ + ...DEFAULT_COMPARE_OPTIONS, + ...collection.compareOptions, + direction: `asc`, + }) + + // Build range query options, keeping the strictest bound on each + // side: a larger lower bound (or smaller upper bound) wins, and at + // equal values the exclusive operation wins over the inclusive one let from: any = undefined let to: any = undefined let fromInclusive = true @@ -260,45 +293,59 @@ function optimizeCompoundRangeQuery< for (const { operation, value } of operations) { switch (operation) { case `gt`: - if (from === undefined || value > from) { + case `gte`: { + const cmp = from === undefined ? 1 : compare(value, from) + if (cmp > 0) { from = value + fromInclusive = operation === `gte` + } else if (cmp === 0 && operation === `gt`) { fromInclusive = false } break - case `gte`: - if (from === undefined || value > from) { - from = value - fromInclusive = true - } - break + } case `lt`: - if (to === undefined || value < to) { + case `lte`: { + const cmp = to === undefined ? -1 : compare(value, to) + if (cmp < 0) { to = value + toInclusive = operation === `lte` + } else if (cmp === 0 && operation === `lt`) { toInclusive = false } break - case `lte`: - if (to === undefined || value < to) { - to = value - toInclusive = true - } - break + } } } - const matchingKeys = (index as any).rangeQuery({ - from, - to, - fromInclusive, - toInclusive, - }) + // Only pass the bounds that exist: rangeQuery distinguishes an + // absent bound (open-ended) from an explicit undefined value + const rangeOptions: Record = {} + if (from !== undefined) { + rangeOptions.from = from + rangeOptions.fromInclusive = fromInclusive + } + if (to !== undefined) { + rangeOptions.to = to + rangeOptions.toInclusive = toInclusive + } + const matchingKeys = (index as any).rangeQuery(rangeOptions) - return { canOptimize: true, matchingKeys } + return { + canOptimize: true, + matchingKeys, + isExact: true, + coveredArgIndices: new Set(operations.map((op) => op.argIndex)), + } } } } - return { canOptimize: false, matchingKeys: new Set() } + return { + canOptimize: false, + matchingKeys: new Set(), + isExact: false, + coveredArgIndices: new Set(), + } } /** @@ -312,7 +359,7 @@ function optimizeSimpleComparison< collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length !== 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const leftArg = expression.args[0]! @@ -362,15 +409,15 @@ function optimizeSimpleComparison< // Check if the index supports this operation if (!index.supports(indexOperation)) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const matchingKeys = index.lookup(indexOperation, queryValue) - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -412,22 +459,38 @@ function optimizeAndExpression( collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } // First, try to optimize compound range queries on the same field + // (e.g. age > 5 AND age < 10 becomes a single range query) const compoundRangeResult = optimizeCompoundRangeQuery(expression, collection) - if (compoundRangeResult.canOptimize) { - return compoundRangeResult - } + const coveredArgIndices = compoundRangeResult.canOptimize + ? compoundRangeResult.coveredArgIndices + : new Set() const results: Array> = [] + if (compoundRangeResult.canOptimize) { + results.push(compoundRangeResult) + } - // Try to optimize each part, keep the optimizable ones - for (const arg of expression.args) { + // Try to optimize the remaining conjuncts, keep the optimizable ones. + // Conjuncts that cannot use an index make the result inexact: the + // intersection is then a superset of the true result and must be + // re-filtered against the full expression by the caller. + let allConjunctsExact = true + for (const [argIndex, arg] of expression.args.entries()) { + if (coveredArgIndices.has(argIndex)) { + continue + } const result = optimizeQueryRecursive(arg, collection) if (result.canOptimize) { results.push(result) + if (!result.isExact) { + allConjunctsExact = false + } + } else { + allConjunctsExact = false } } @@ -435,10 +498,14 @@ function optimizeAndExpression( // Use intersectSets utility for AND logic const allMatchingSets = results.map((r) => r.matchingKeys) const intersectedKeys = intersectSets(allMatchingSets) - return { canOptimize: true, matchingKeys: intersectedKeys } + return { + canOptimize: true, + matchingKeys: intersectedKeys, + isExact: allConjunctsExact, + } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -464,27 +531,31 @@ function optimizeOrExpression( collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const results: Array> = [] - // Try to optimize each part, keep the optimizable ones + // Every disjunct must be optimizable: rows matched only by a disjunct + // that cannot use an index would be missing from the union, and no + // post-filtering can recover them. In that case fall back to a full scan. for (const arg of expression.args) { const result = optimizeQueryRecursive(arg, collection) - if (result.canOptimize) { - results.push(result) + if (!result.canOptimize) { + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } + results.push(result) } - if (results.length > 0) { - // Use unionSets utility for OR logic - const allMatchingSets = results.map((r) => r.matchingKeys) - const unionedKeys = unionSets(allMatchingSets) - return { canOptimize: true, matchingKeys: unionedKeys } + // Use unionSets utility for OR logic + const allMatchingSets = results.map((r) => r.matchingKeys) + const unionedKeys = unionSets(allMatchingSets) + return { + canOptimize: true, + matchingKeys: unionedKeys, + // An inexact (superset) disjunct makes the union a superset as well + isExact: results.every((r) => r.isExact), } - - return { canOptimize: false, matchingKeys: new Set() } } /** @@ -498,8 +569,9 @@ function canOptimizeOrExpression< return false } - // If any argument can be optimized, we can gain some speedup - return expression.args.some((arg) => canOptimizeExpression(arg, collection)) + // Every disjunct must be optimizable, otherwise the union would miss + // rows matched only by the non-optimizable disjuncts + return expression.args.every((arg) => canOptimizeExpression(arg, collection)) } /** @@ -513,7 +585,7 @@ function optimizeInArrayExpression< collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length !== 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const fieldArg = expression.args[0]! @@ -532,7 +604,7 @@ function optimizeInArrayExpression< // Check if the index supports IN operation if (index.supports(`in`)) { const matchingKeys = index.lookup(`in`, values) - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } else if (index.supports(`eq`)) { // Fallback to multiple equality lookups const matchingKeys = new Set() @@ -542,12 +614,12 @@ function optimizeInArrayExpression< matchingKeys.add(key) } } - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /**