From 0ad06235ecfe646df74bb3ad1d5faea7a3d98391 Mon Sep 17 00:00:00 2001 From: "jiliang.ljl" Date: Mon, 1 Jun 2026 22:42:11 +0800 Subject: [PATCH] perf: add block-max skip and score early-exit to ConjunctionIterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Block-max skip: skip entire non-competitive blocks (128 docs) in do_next() by checking block_max_info_for() score upper bounds - Score early-exit: short-circuit score() accumulation when remaining upper bound cannot beat the threshold - Phrase forwarding: PhraseDocIterator propagates min_competitive_score to its inner conjunction, enabling the above optimizations for phrase queries - optIsRequired: when must-only block_max < threshold, promote should clauses to required so docs without any should match are skipped - DisjunctionIterator::advance() bypasses the WAND loop to avoid incorrectly pruning target docs when used as a should clause Benchmark (500k docs, quora dataset, BitPacked mode): AND queries: 22-38% faster (topk=10: 3.65s→2.86s, topk=3: 3.63s→2.26s) Phrase queries: 33% faster (205s→137s) --- .../iterator/fts_conjunction_iterator.cc | 98 ++++++ .../iterator/fts_conjunction_iterator.h | 11 + .../iterator/fts_disjunction_iterator.cc | 24 +- .../fts_column/iterator/fts_phrase_iterator.h | 4 + .../fts_column/fts_conjunction_opt_test.cc | 286 ++++++++++++++++++ 5 files changed, 422 insertions(+), 1 deletion(-) create mode 100644 tests/db/index/column/fts_column/fts_conjunction_opt_test.cc diff --git a/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc index dacd2e1c6..2f9a6bb9d 100644 --- a/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc +++ b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.cc @@ -104,6 +104,59 @@ uint32_t ConjunctionIterator::advance(uint32_t target) { return cached_doc_id_; } +uint32_t ConjunctionIterator::skip_non_competitive_blocks(uint32_t candidate) { + while (true) { + float block_max_sum = 0.0f; + float must_only_sum = 0.0f; + uint32_t min_block_end = NO_MORE_DOCS; + + for (auto &iter : must_iterators_) { + auto info = iter->block_max_info_for(candidate); + block_max_sum += info.block_max_score; + must_only_sum += info.block_max_score; + if (info.block_last_doc < min_block_end) { + min_block_end = info.block_last_doc; + } + } + for (auto &iter : should_iterators_) { + auto info = iter->block_max_info_for(candidate); + block_max_sum += info.block_max_score; + if (info.block_last_doc < min_block_end) { + min_block_end = info.block_last_doc; + } + } + + // All iterators returned NO_MORE_DOCS — no block info, pass through + if (min_block_end == NO_MORE_DOCS) { + block_max_up_to_ = NO_MORE_DOCS; + must_block_max_sum_ = must_only_sum; + opt_is_required_ = (!should_iterators_.empty() && + must_block_max_sum_ < min_competitive_score_); + return candidate; + } + + if (block_max_sum >= min_competitive_score_) { + // Current block is competitive + block_max_up_to_ = min_block_end; + must_block_max_sum_ = must_only_sum; + opt_is_required_ = (!should_iterators_.empty() && + must_block_max_sum_ < min_competitive_score_); + return candidate; + } + + // Current block is non-competitive, skip to the next block + uint32_t next_block_start = min_block_end + 1; + if (next_block_start == 0) { + // overflow: min_block_end was MAX → 已耗尽 + return NO_MORE_DOCS; + } + candidate = must_iterators_[0]->advance(next_block_start); + if (candidate == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + } +} + uint32_t ConjunctionIterator::do_next(uint32_t candidate) { if (candidate == NO_MORE_DOCS) { return NO_MORE_DOCS; @@ -130,9 +183,39 @@ uint32_t ConjunctionIterator::do_next(uint32_t candidate) { } if (all_match) { + // Block-Max: skip non-competitive blocks before must_not check + if (min_competitive_score_ > 0.0f && candidate > block_max_up_to_) { + uint32_t orig = candidate; + candidate = skip_non_competitive_blocks(candidate); + if (candidate == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + if (candidate != orig) { + continue; + } + } + // All must iterators agree on this candidate // Check must_not exclusion if (!is_excluded(candidate)) { + // optIsRequired: should clauses promoted to required + if (opt_is_required_) { + bool any_should_match = false; + for (auto &iter : should_iterators_) { + uint32_t doc = iter->advance(candidate); + if (doc == candidate && iter->matches()) { + any_should_match = true; + break; + } + } + if (!any_should_match) { + candidate = must_iterators_[0]->next_doc(); + if (candidate == NO_MORE_DOCS) { + return NO_MORE_DOCS; + } + continue; + } + } return candidate; } // Excluded by must_not, advance lead to next doc @@ -167,15 +250,30 @@ bool ConjunctionIterator::matches() { float ConjunctionIterator::score() { float total = 0.0f; + float remaining_max = cached_max_score_; + for (auto &iter : must_iterators_) { + remaining_max -= iter->cached_max_score_; total += iter->score(); + // accumulated + remaining upper bound < threshold — cannot compete + if (min_competitive_score_ > 0.0f && + total + remaining_max < min_competitive_score_) { + return total; + } } + for (auto &iter : should_iterators_) { + remaining_max -= iter->cached_max_score_; uint32_t doc = iter->advance(cached_doc_id_); if (doc == cached_doc_id_ && iter->matches()) { total += iter->score(); } + if (min_competitive_score_ > 0.0f && + total + remaining_max < min_competitive_score_) { + return total; + } } + return total; } diff --git a/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h index 1c3ba26ce..9ec9a7854 100644 --- a/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h +++ b/src/db/index/column/fts_column/iterator/fts_conjunction_iterator.h @@ -62,12 +62,23 @@ class ConjunctionIterator : public DocIterator { // Check if candidate doc_id is excluded by any must_not iterator bool is_excluded(uint32_t candidate); + // Block-Max: skip blocks whose score upper bound cannot compete. + // Returns the first candidate whose block can potentially compete, + // or NO_MORE_DOCS if exhausted. + uint32_t skip_non_competitive_blocks(uint32_t candidate); + private: // must_iterators_[0] is the lead (lowest cost) std::vector must_iterators_; std::vector must_not_iterators_; std::vector should_iterators_; float min_competitive_score_{0.0f}; + // Block-Max: upper bound of doc_id range already verified as competitive + uint32_t block_max_up_to_{0}; + // optIsRequired: must-only block_max sum for current block + float must_block_max_sum_{0.0f}; + // optIsRequired: whether should iterators are upgraded to required + bool opt_is_required_{false}; }; } // namespace zvec::fts diff --git a/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc b/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc index 785f7f0fd..371ec8715 100644 --- a/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc +++ b/src/db/index/column/fts_column/iterator/fts_disjunction_iterator.cc @@ -223,7 +223,29 @@ uint32_t DisjunctionIterator::advance(uint32_t target) { iter->advance(target); } } - return next_doc(); + + // Bypass the WAND loop — advance() is a seek operation that must not + // apply competitive-score pruning. When this DisjunctionIterator serves + // as a should clause, the caller (ConjunctionIterator::score()) relies on + // advance() returning target if it exists; WAND block-max pruning would + // incorrectly skip target and silently lose the should score. + resort_postings(); + + if (postings_.empty() || postings_[0]->cached_doc_id_ == NO_MORE_DOCS) { + cached_doc_id_ = NO_MORE_DOCS; + return NO_MORE_DOCS; + } + + uint32_t doc = postings_[0]->cached_doc_id_; + for (size_t i = 0; i < postings_.size(); ++i) { + if (postings_[i]->cached_doc_id_ == doc) { + matching_iterators_.push_back(postings_[i]); + } else { + break; + } + } + cached_doc_id_ = doc; + return doc; } bool DisjunctionIterator::matches() { diff --git a/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h b/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h index 6af4df2c4..c1cabe903 100644 --- a/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h +++ b/src/db/index/column/fts_column/iterator/fts_phrase_iterator.h @@ -56,6 +56,10 @@ class PhraseDocIterator : public DocIterator { uint64_t cost() const override; float max_score() const override; + void set_min_competitive_score(float min_score) override { + conjunction_->set_min_competitive_score(min_score); + } + private: // Verify that terms appear at consecutive positions in the document. // Issues a single MultiGet across the unique terms in the phrase, decodes diff --git a/tests/db/index/column/fts_column/fts_conjunction_opt_test.cc b/tests/db/index/column/fts_column/fts_conjunction_opt_test.cc new file mode 100644 index 000000000..9c88461eb --- /dev/null +++ b/tests/db/index/column/fts_column/fts_conjunction_opt_test.cc @@ -0,0 +1,286 @@ +// Copyright 2025-present the zvec project +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include +#include +#include +#include "db/index/column/fts_column/iterator/fts_conjunction_iterator.h" +#include "db/index/column/fts_column/iterator/fts_disjunction_iterator.h" +#include "db/index/column/fts_column/iterator/fts_doc_iterator.h" + +using zvec::fts::ConjunctionIterator; +using zvec::fts::DisjunctionIterator; +using zvec::fts::DocIterator; +using zvec::fts::DocIteratorPtr; + +namespace { + +constexpr uint32_t kNoMore = DocIterator::NO_MORE_DOCS; + +// --------------------------------------------------------------------------- +// FakeDocIterator: controllable posting list with per-doc scores and +// per-block max score metadata. +// --------------------------------------------------------------------------- +class FakeDocIterator : public DocIterator { + public: + // Each entry: {doc_id, score} + // block_maxes: {block_last_doc -> block_max_score} + FakeDocIterator(std::vector> entries, + float max_score_val, + std::map block_maxes = {}) + : entries_(std::move(entries)), + max_score_val_(max_score_val), + block_maxes_(std::move(block_maxes)) { + cached_max_score_ = max_score_val_; + } + + uint32_t next_doc() override { + ++pos_; + if (pos_ >= entries_.size()) { + cached_doc_id_ = kNoMore; + return kNoMore; + } + cached_doc_id_ = entries_[pos_].first; + return cached_doc_id_; + } + + uint32_t advance(uint32_t target) override { + while (pos_ + 1 < entries_.size() && entries_[pos_ + 1].first < target) { + ++pos_; + } + return next_doc(); + } + + float score() override { + ++score_call_count_; + if (pos_ < entries_.size()) { + return entries_[pos_].second; + } + return 0.0f; + } + + uint64_t cost() const override { + return entries_.size(); + } + + float max_score() const override { + return max_score_val_; + } + + BlockMaxInfo block_max_info_for(uint32_t target) const override { + if (block_maxes_.empty()) { + return {max_score_val_, kNoMore}; + } + // Find the first block whose last_doc >= target + for (const auto &[last_doc, bm_score] : block_maxes_) { + if (last_doc >= target) { + return {bm_score, last_doc}; + } + } + return {0.0f, kNoMore}; + } + + int score_call_count() const { + return score_call_count_; + } + + private: + std::vector> entries_; + float max_score_val_; + std::map block_maxes_; + size_t pos_{SIZE_MAX}; // before first element + int score_call_count_{0}; +}; + +// Collect all doc_ids from an iterator +std::vector collect_docs(DocIterator *iter) { + std::vector docs; + uint32_t doc = iter->next_doc(); + while (doc != kNoMore) { + if (iter->matches()) { + docs.push_back(doc); + } + doc = iter->next_doc(); + } + return docs; +} + +} // namespace + +// ============================================================ +// Optimization 1: Block-Max skip +// ============================================================ + +// Block 0 [0..127] block_max sum = 1.0 < threshold 2.0 → skipped +// Block 1 [128..255] block_max sum = 10.0 >= 2.0 → kept +TEST(ConjunctionOptTest, BlockMaxSkipNonCompetitiveBlocks) { + std::vector> list1 = { + {10, 0.3f}, {50, 0.4f}, {130, 4.0f}, {200, 3.5f}}; + std::vector> list2 = { + {10, 0.2f}, {50, 0.3f}, {130, 5.0f}, {200, 4.0f}}; + + std::map bm1 = {{127, 0.5f}, {255, 5.0f}}; + std::map bm2 = {{127, 0.5f}, {255, 5.0f}}; + + std::vector musts; + musts.push_back(std::make_unique(list1, 5.0f, bm1)); + musts.push_back(std::make_unique(list2, 5.0f, bm2)); + + ConjunctionIterator conj(std::move(musts), std::vector{}); + conj.set_min_competitive_score(2.0f); + + auto docs = collect_docs(&conj); + ASSERT_EQ(docs.size(), 2u); + EXPECT_EQ(docs[0], 130u); + EXPECT_EQ(docs[1], 200u); +} + +// 3 blocks, threshold skips block 0 and block 1, only block 2 survives +TEST(ConjunctionOptTest, BlockMaxSkipMultipleBlocks) { + std::vector> list1 = { + {10, 0.1f}, {130, 1.5f}, {260, 4.0f}}; + std::vector> list2 = { + {10, 0.1f}, {130, 1.0f}, {260, 4.5f}}; + + std::map bm1 = {{127, 0.5f}, {255, 2.0f}, {383, 5.0f}}; + std::map bm2 = {{127, 0.5f}, {255, 2.0f}, {383, 5.0f}}; + + std::vector musts; + musts.push_back(std::make_unique(list1, 5.0f, bm1)); + musts.push_back(std::make_unique(list2, 5.0f, bm2)); + + ConjunctionIterator conj(std::move(musts), std::vector{}); + // block 0 sum=1.0, block 1 sum=4.0, block 2 sum=10.0; threshold=5.0 + conj.set_min_competitive_score(5.0f); + + auto docs = collect_docs(&conj); + ASSERT_EQ(docs.size(), 1u); + EXPECT_EQ(docs[0], 260u); +} + +// ============================================================ +// Optimization 3: Score early-exit +// ============================================================ + +// 3 must iterators sorted by cost (ascending). After scoring the first +// (lowest-cost) iterator, accumulated + remaining_max < threshold → +// score() exits early, skipping the remaining two score() calls. +TEST(ConjunctionOptTest, ScoreEarlyExitReducesScoreCalls) { + // iter0: cost=1, max_score=2.0, actual_score=0.1 + // iter1: cost=2, max_score=2.0, actual_score=1.0 + // iter2: cost=3, max_score=2.0, actual_score=1.5 + // cached_max_score_ = 6.0, threshold = 5.5 + // After iter0: remaining_max=4.0, total=0.1. 0.1+4.0=4.1 < 5.5 → exit + auto *raw0 = new FakeDocIterator({{1, 0.1f}}, 2.0f); + auto *raw1 = new FakeDocIterator({{1, 1.0f}}, 2.0f); + auto *raw2 = new FakeDocIterator({{1, 1.5f}}, 2.0f); + + std::vector musts; + musts.emplace_back(raw0); + musts.emplace_back(raw1); + musts.emplace_back(raw2); + + ConjunctionIterator conj(std::move(musts), std::vector{}); + conj.set_min_competitive_score(5.5f); + + uint32_t doc = conj.next_doc(); + ASSERT_EQ(doc, 1u); + conj.score(); + + // Only the first iterator's score() should have been called; + // the other two were skipped by early-exit. + EXPECT_EQ(raw0->score_call_count(), 1); + EXPECT_EQ(raw1->score_call_count(), 0); + EXPECT_EQ(raw2->score_call_count(), 0); +} + +// When scores are competitive, all iterators' score() are called +TEST(ConjunctionOptTest, ScoreNoEarlyExitCallsAll) { + auto *raw0 = new FakeDocIterator({{1, 3.0f}}, 3.0f); + auto *raw1 = new FakeDocIterator({{1, 3.0f}}, 3.0f); + + std::vector musts; + musts.emplace_back(raw0); + musts.emplace_back(raw1); + + ConjunctionIterator conj(std::move(musts), std::vector{}); + conj.set_min_competitive_score(5.0f); + + uint32_t doc = conj.next_doc(); + ASSERT_EQ(doc, 1u); + float s = conj.score(); + + EXPECT_FLOAT_EQ(s, 6.0f); + EXPECT_EQ(raw0->score_call_count(), 1); + EXPECT_EQ(raw1->score_call_count(), 1); +} + +// ============================================================ +// Optimization 4: Phrase threshold forwarding +// ============================================================ + +// set_min_competitive_score propagated into inner conjunction triggers +// block-max skip; without forwarding all docs would be returned. +TEST(ConjunctionOptTest, PhraseForwardingBlockMaxSkip) { + std::vector> list1 = {{10, 0.2f}, {130, 4.0f}}; + std::vector> list2 = {{10, 0.2f}, {130, 5.0f}}; + + std::map bm1 = {{127, 0.3f}, {255, 5.0f}}; + std::map bm2 = {{127, 0.3f}, {255, 5.0f}}; + + std::vector musts; + musts.push_back(std::make_unique(list1, 5.0f, bm1)); + musts.push_back(std::make_unique(list2, 5.0f, bm2)); + + auto inner = std::make_unique( + std::move(musts), std::vector{}); + + // Forward threshold as PhraseDocIterator would + inner->set_min_competitive_score(2.0f); + + auto docs = collect_docs(inner.get()); + // Block 0 (sum 0.6 < 2.0) skipped + ASSERT_EQ(docs.size(), 1u); + EXPECT_EQ(docs[0], 130u); +} + +// ============================================================ +// DisjunctionIterator::advance() bypass WAND fix +// ============================================================ + +// advance() must faithfully return target even with high min_competitive_score. +// Without the fix, advance() delegates to next_doc() which triggers WAND +// pruning and returns NO_MORE_DOCS. +TEST(ConjunctionOptTest, DisjunctionAdvanceBypassesWand) { + std::vector sub_iters; + sub_iters.push_back(std::make_unique( + std::vector>{{1, 0.1f}, {5, 0.1f}, {10, 0.1f}}, + 1.0f)); + sub_iters.push_back(std::make_unique( + std::vector>{{3, 0.2f}, {5, 0.2f}, {20, 0.2f}}, + 1.0f)); + + DisjunctionIterator disj(std::move(sub_iters)); + disj.set_min_competitive_score(100.0f); + + uint32_t doc = disj.advance(5); + EXPECT_EQ(doc, 5u); + EXPECT_TRUE(disj.matches()); + + doc = disj.advance(10); + EXPECT_EQ(doc, 10u); +}