From cde2fd89a8e955d8e1fc221473eb243b700b19b7 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Jun 2026 14:31:51 +0200 Subject: [PATCH 1/2] fix: enforce all where conditions when index optimization is partial OR expressions now require every disjunct to be index-optimizable; otherwise the query falls back to a full scan, since rows matched only by a non-optimizable disjunct cannot be recovered from index lookups. AND expressions keep partial index optimization but the optimizer now reports whether the matching keys are exact. When they are a superset (some conjuncts could not use an index, or a compound range was combined with other conditions), currentStateAsChanges re-checks each candidate row against the full where expression. Co-Authored-By: Claude Fable 5 --- .../index-optimization-partial-and-or.md | 9 ++ packages/db/src/collection/change-events.ts | 9 +- packages/db/src/utils/index-optimization.ts | 128 +++++++++++++----- 3 files changed, 108 insertions(+), 38 deletions(-) create mode 100644 .changeset/index-optimization-partial-and-or.md diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md new file mode 100644 index 000000000..93296a50d --- /dev/null +++ b/.changeset/index-optimization-partial-and-or.md @@ -0,0 +1,9 @@ +--- +'@tanstack/db': patch +--- + +Fix incorrect results from index-optimized `where` clauses that combine indexed and non-indexed conditions. + +- `OR` expressions are now only served from indexes when every disjunct can use an index; otherwise the query falls back to a full scan. Previously, rows matched only by a non-indexed disjunct were missing from the result. +- `AND` expressions still use indexes for the conditions that have them, but the remaining conditions are now enforced by re-checking each candidate row against the full expression. Previously, non-indexed conditions were silently dropped, returning rows that did not match the query. +- Compound range conditions (e.g. `age > 5 AND age < 10`) combined with conditions on other fields no longer ignore those other conditions. diff --git a/packages/db/src/collection/change-events.ts b/packages/db/src/collection/change-events.ts index 6afac412c..3f4977b7b 100644 --- a/packages/db/src/collection/change-events.ts +++ b/packages/db/src/collection/change-events.ts @@ -138,11 +138,16 @@ export function currentStateAsChanges< ) if (optimizationResult.canOptimize) { - // Use index optimization + // Use index optimization. When the index lookup is inexact, the keys + // are a superset of the true result (some conditions could not be + // served by an index), so re-check each row against the full expression. + const filterFn = optimizationResult.isExact + ? undefined + : createFilterFunctionFromExpression(expression) const result: Array, TKey>> = [] for (const key of optimizationResult.matchingKeys) { const value = collection.get(key) - if (value !== undefined) { + if (value !== undefined && (filterFn?.(value) ?? true)) { result.push({ type: `insert`, key, diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 81b111af5..7bb98ad47 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -29,6 +29,13 @@ import type { CollectionLike } from '../types.js' export interface OptimizationResult { canOptimize: boolean matchingKeys: Set + /** + * Whether `matchingKeys` is exactly the set of keys matching the expression. + * When `false`, the keys are a superset of the true result (some conditions + * could not be served by an index) and each row must be re-checked against + * the full expression before being included in the result. + */ + isExact: boolean } /** @@ -134,7 +141,7 @@ function optimizeQueryRecursive( } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -167,6 +174,14 @@ export function canOptimizeExpression< return false } +/** + * Result of compound range optimization, including which AND arguments + * were covered by the range query so the caller can process the rest. + */ +interface CompoundRangeResult extends OptimizationResult { + coveredArgIndices: Set +} + /** * Optimizes compound range queries on the same field * Example: WHERE age > 5 AND age < 10 @@ -177,9 +192,14 @@ function optimizeCompoundRangeQuery< >( expression: BasicExpression, collection: CollectionLike, -): OptimizationResult { +): CompoundRangeResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { + canOptimize: false, + matchingKeys: new Set(), + isExact: false, + coveredArgIndices: new Set(), + } } // Group range operations by field @@ -188,11 +208,12 @@ function optimizeCompoundRangeQuery< Array<{ operation: `gt` | `gte` | `lt` | `lte` value: any + argIndex: number }> >() // Collect all range operations from AND arguments - for (const arg of expression.args) { + for (const [argIndex, arg] of expression.args.entries()) { if (arg.type === `func` && [`gt`, `gte`, `lt`, `lte`].includes(arg.name)) { const rangeOp = arg as any if (rangeOp.args.length === 2) { @@ -238,7 +259,7 @@ function optimizeCompoundRangeQuery< if (!fieldOperations.has(fieldKey)) { fieldOperations.set(fieldKey, []) } - fieldOperations.get(fieldKey)!.push({ operation, value }) + fieldOperations.get(fieldKey)!.push({ operation, value, argIndex }) } } } @@ -293,12 +314,22 @@ function optimizeCompoundRangeQuery< toInclusive, }) - return { canOptimize: true, matchingKeys } + return { + canOptimize: true, + matchingKeys, + isExact: true, + coveredArgIndices: new Set(operations.map((op) => op.argIndex)), + } } } } - return { canOptimize: false, matchingKeys: new Set() } + return { + canOptimize: false, + matchingKeys: new Set(), + isExact: false, + coveredArgIndices: new Set(), + } } /** @@ -312,7 +343,7 @@ function optimizeSimpleComparison< collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length !== 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const leftArg = expression.args[0]! @@ -362,15 +393,15 @@ function optimizeSimpleComparison< // Check if the index supports this operation if (!index.supports(indexOperation)) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const matchingKeys = index.lookup(indexOperation, queryValue) - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -412,22 +443,38 @@ function optimizeAndExpression( collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } // First, try to optimize compound range queries on the same field + // (e.g. age > 5 AND age < 10 becomes a single range query) const compoundRangeResult = optimizeCompoundRangeQuery(expression, collection) - if (compoundRangeResult.canOptimize) { - return compoundRangeResult - } + const coveredArgIndices = compoundRangeResult.canOptimize + ? compoundRangeResult.coveredArgIndices + : new Set() const results: Array> = [] + if (compoundRangeResult.canOptimize) { + results.push(compoundRangeResult) + } - // Try to optimize each part, keep the optimizable ones - for (const arg of expression.args) { + // Try to optimize the remaining conjuncts, keep the optimizable ones. + // Conjuncts that cannot use an index make the result inexact: the + // intersection is then a superset of the true result and must be + // re-filtered against the full expression by the caller. + let allConjunctsExact = true + for (const [argIndex, arg] of expression.args.entries()) { + if (coveredArgIndices.has(argIndex)) { + continue + } const result = optimizeQueryRecursive(arg, collection) if (result.canOptimize) { results.push(result) + if (!result.isExact) { + allConjunctsExact = false + } + } else { + allConjunctsExact = false } } @@ -435,10 +482,14 @@ function optimizeAndExpression( // Use intersectSets utility for AND logic const allMatchingSets = results.map((r) => r.matchingKeys) const intersectedKeys = intersectSets(allMatchingSets) - return { canOptimize: true, matchingKeys: intersectedKeys } + return { + canOptimize: true, + matchingKeys: intersectedKeys, + isExact: allConjunctsExact, + } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** @@ -464,27 +515,31 @@ function optimizeOrExpression( collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length < 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const results: Array> = [] - // Try to optimize each part, keep the optimizable ones + // Every disjunct must be optimizable: rows matched only by a disjunct + // that cannot use an index would be missing from the union, and no + // post-filtering can recover them. In that case fall back to a full scan. for (const arg of expression.args) { const result = optimizeQueryRecursive(arg, collection) - if (result.canOptimize) { - results.push(result) + if (!result.canOptimize) { + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } + results.push(result) } - if (results.length > 0) { - // Use unionSets utility for OR logic - const allMatchingSets = results.map((r) => r.matchingKeys) - const unionedKeys = unionSets(allMatchingSets) - return { canOptimize: true, matchingKeys: unionedKeys } + // Use unionSets utility for OR logic + const allMatchingSets = results.map((r) => r.matchingKeys) + const unionedKeys = unionSets(allMatchingSets) + return { + canOptimize: true, + matchingKeys: unionedKeys, + // An inexact (superset) disjunct makes the union a superset as well + isExact: results.every((r) => r.isExact), } - - return { canOptimize: false, matchingKeys: new Set() } } /** @@ -498,8 +553,9 @@ function canOptimizeOrExpression< return false } - // If any argument can be optimized, we can gain some speedup - return expression.args.some((arg) => canOptimizeExpression(arg, collection)) + // Every disjunct must be optimizable, otherwise the union would miss + // rows matched only by the non-optimizable disjuncts + return expression.args.every((arg) => canOptimizeExpression(arg, collection)) } /** @@ -513,7 +569,7 @@ function optimizeInArrayExpression< collection: CollectionLike, ): OptimizationResult { if (expression.type !== `func` || expression.args.length !== 2) { - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } const fieldArg = expression.args[0]! @@ -532,7 +588,7 @@ function optimizeInArrayExpression< // Check if the index supports IN operation if (index.supports(`in`)) { const matchingKeys = index.lookup(`in`, values) - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } else if (index.supports(`eq`)) { // Fallback to multiple equality lookups const matchingKeys = new Set() @@ -542,12 +598,12 @@ function optimizeInArrayExpression< matchingKeys.add(key) } } - return { canOptimize: true, matchingKeys } + return { canOptimize: true, matchingKeys, isExact: true } } } } - return { canOptimize: false, matchingKeys: new Set() } + return { canOptimize: false, matchingKeys: new Set(), isExact: false } } /** From 559a10df0894fc39f3776229d4723e4edcb61e71 Mon Sep 17 00:00:00 2001 From: Kevin De Porre Date: Wed, 10 Jun 2026 14:31:56 +0200 Subject: [PATCH 2/2] fix: apply strictest bound in compound range queries and fix related range edge cases Compound range conditions sharing a boundary value (gte(x,5) AND gt(x,5)) now keep the strict bound regardless of argument order. Bound values are compared with the same comparator the indexes use so dates and locale strings behave correctly. Two further issues surfaced by the regression tests: - One-sided compound ranges passed an explicit undefined bound to rangeQuery, which treats present-but-undefined as the undefined sentinel and returned an empty result. Bounds are now only passed when they exist. - BTreeIndex's exclusive lower bound check compared the normalized indexed value against the raw query value, so gt on date fields included the boundary row. It now compares against the normalized key. Co-Authored-By: Claude Fable 5 --- .../index-optimization-partial-and-or.md | 3 + packages/db/src/indexes/btree-index.ts | 5 +- packages/db/src/utils/index-optimization.ts | 58 ++++++++++++------- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/.changeset/index-optimization-partial-and-or.md b/.changeset/index-optimization-partial-and-or.md index 93296a50d..469975ad8 100644 --- a/.changeset/index-optimization-partial-and-or.md +++ b/.changeset/index-optimization-partial-and-or.md @@ -7,3 +7,6 @@ Fix incorrect results from index-optimized `where` clauses that combine indexed - `OR` expressions are now only served from indexes when every disjunct can use an index; otherwise the query falls back to a full scan. Previously, rows matched only by a non-indexed disjunct were missing from the result. - `AND` expressions still use indexes for the conditions that have them, but the remaining conditions are now enforced by re-checking each candidate row against the full expression. Previously, non-indexed conditions were silently dropped, returning rows that did not match the query. - Compound range conditions (e.g. `age > 5 AND age < 10`) combined with conditions on other fields no longer ignore those other conditions. +- Compound range conditions sharing the same boundary value (e.g. `age >= 5 AND age > 5`) now apply the strictest bound regardless of the order the conditions appear in, using the same value comparison semantics as the indexes (dates, locale strings, ...). +- Compound range conditions that only bound one side (e.g. `age > 5 AND age >= 8`) no longer return an empty result. +- Strict range comparisons (`gt`/`lt`) on BTree-indexed fields holding normalized values such as dates now correctly exclude the boundary value. diff --git a/packages/db/src/indexes/btree-index.ts b/packages/db/src/indexes/btree-index.ts index 17608950a..1d04d928d 100644 --- a/packages/db/src/indexes/btree-index.ts +++ b/packages/db/src/indexes/btree-index.ts @@ -247,7 +247,10 @@ export class BTreeIndex< toKey, toInclusive, (indexedValue, _) => { - if (!fromInclusive && this.compareFn(indexedValue, from) === 0) { + // Compare against the normalized key: indexed values are stored + // normalized (e.g. dates as timestamps), the raw `from` would + // never compare equal to them + if (!fromInclusive && this.compareFn(indexedValue, fromKey) === 0) { // the B+ tree `forRange` method does not support exclusive lower bounds // so we need to exclude it manually return diff --git a/packages/db/src/utils/index-optimization.ts b/packages/db/src/utils/index-optimization.ts index 7bb98ad47..7d88ddc1e 100644 --- a/packages/db/src/utils/index-optimization.ts +++ b/packages/db/src/utils/index-optimization.ts @@ -18,6 +18,7 @@ import { DEFAULT_COMPARE_OPTIONS } from '../utils.js' import { ReverseIndex } from '../indexes/reverse-index.js' import { hasVirtualPropPath } from '../virtual-props.js' +import { makeComparator } from './comparison.js' import type { CompareOptions } from '../query/builder/types.js' import type { IndexInterface, IndexOperation } from '../indexes/base-index.js' import type { BasicExpression } from '../query/ir.js' @@ -272,7 +273,18 @@ function optimizeCompoundRangeQuery< const index = findIndexForField(collection, fieldPath) if (index && index.supports(`gt`) && index.supports(`lt`)) { - // Build range query options + // Compare values with the same semantics the index uses (dates, + // locale strings, ...), in ascending order since bounds are about + // value order regardless of the index direction + const compare = makeComparator({ + ...DEFAULT_COMPARE_OPTIONS, + ...collection.compareOptions, + direction: `asc`, + }) + + // Build range query options, keeping the strictest bound on each + // side: a larger lower bound (or smaller upper bound) wins, and at + // equal values the exclusive operation wins over the inclusive one let from: any = undefined let to: any = undefined let fromInclusive = true @@ -281,38 +293,42 @@ function optimizeCompoundRangeQuery< for (const { operation, value } of operations) { switch (operation) { case `gt`: - if (from === undefined || value > from) { + case `gte`: { + const cmp = from === undefined ? 1 : compare(value, from) + if (cmp > 0) { from = value + fromInclusive = operation === `gte` + } else if (cmp === 0 && operation === `gt`) { fromInclusive = false } break - case `gte`: - if (from === undefined || value > from) { - from = value - fromInclusive = true - } - break + } case `lt`: - if (to === undefined || value < to) { + case `lte`: { + const cmp = to === undefined ? -1 : compare(value, to) + if (cmp < 0) { to = value + toInclusive = operation === `lte` + } else if (cmp === 0 && operation === `lt`) { toInclusive = false } break - case `lte`: - if (to === undefined || value < to) { - to = value - toInclusive = true - } - break + } } } - const matchingKeys = (index as any).rangeQuery({ - from, - to, - fromInclusive, - toInclusive, - }) + // Only pass the bounds that exist: rangeQuery distinguishes an + // absent bound (open-ended) from an explicit undefined value + const rangeOptions: Record = {} + if (from !== undefined) { + rangeOptions.from = from + rangeOptions.fromInclusive = fromInclusive + } + if (to !== undefined) { + rangeOptions.to = to + rangeOptions.toInclusive = toInclusive + } + const matchingKeys = (index as any).rangeQuery(rangeOptions) return { canOptimize: true,