Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,59 @@ uint32_t ConjunctionIterator::advance(uint32_t target) {
return cached_doc_id_;
}

uint32_t ConjunctionIterator::skip_non_competitive_blocks(uint32_t candidate) {
while (true) {
float block_max_sum = 0.0f;
float must_only_sum = 0.0f;
uint32_t min_block_end = NO_MORE_DOCS;

for (auto &iter : must_iterators_) {
auto info = iter->block_max_info_for(candidate);
block_max_sum += info.block_max_score;
must_only_sum += info.block_max_score;
if (info.block_last_doc < min_block_end) {
min_block_end = info.block_last_doc;
}
}
for (auto &iter : should_iterators_) {
auto info = iter->block_max_info_for(candidate);
block_max_sum += info.block_max_score;
if (info.block_last_doc < min_block_end) {
min_block_end = info.block_last_doc;
}
}

// All iterators returned NO_MORE_DOCS — no block info, pass through
if (min_block_end == NO_MORE_DOCS) {
block_max_up_to_ = NO_MORE_DOCS;
must_block_max_sum_ = must_only_sum;
opt_is_required_ = (!should_iterators_.empty() &&
must_block_max_sum_ < min_competitive_score_);
return candidate;
}

if (block_max_sum >= min_competitive_score_) {
// Current block is competitive
block_max_up_to_ = min_block_end;
must_block_max_sum_ = must_only_sum;
opt_is_required_ = (!should_iterators_.empty() &&
must_block_max_sum_ < min_competitive_score_);
return candidate;
}

// Current block is non-competitive, skip to the next block
uint32_t next_block_start = min_block_end + 1;
if (next_block_start == 0) {
// overflow: min_block_end was MAX → 已耗尽
return NO_MORE_DOCS;
}
candidate = must_iterators_[0]->advance(next_block_start);
if (candidate == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
}
}

uint32_t ConjunctionIterator::do_next(uint32_t candidate) {
if (candidate == NO_MORE_DOCS) {
return NO_MORE_DOCS;
Expand All @@ -130,9 +183,39 @@ uint32_t ConjunctionIterator::do_next(uint32_t candidate) {
}

if (all_match) {
// Block-Max: skip non-competitive blocks before must_not check
if (min_competitive_score_ > 0.0f && candidate > block_max_up_to_) {
uint32_t orig = candidate;
candidate = skip_non_competitive_blocks(candidate);
if (candidate == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
if (candidate != orig) {
continue;
}
}

// All must iterators agree on this candidate
// Check must_not exclusion
if (!is_excluded(candidate)) {
// optIsRequired: should clauses promoted to required
if (opt_is_required_) {
bool any_should_match = false;
for (auto &iter : should_iterators_) {
uint32_t doc = iter->advance(candidate);
if (doc == candidate && iter->matches()) {
any_should_match = true;
break;
}
}
if (!any_should_match) {
candidate = must_iterators_[0]->next_doc();
if (candidate == NO_MORE_DOCS) {
return NO_MORE_DOCS;
}
continue;
}
}
return candidate;
}
// Excluded by must_not, advance lead to next doc
Expand Down Expand Up @@ -167,15 +250,30 @@ bool ConjunctionIterator::matches() {

float ConjunctionIterator::score() {
float total = 0.0f;
float remaining_max = cached_max_score_;

for (auto &iter : must_iterators_) {
remaining_max -= iter->cached_max_score_;
total += iter->score();
// accumulated + remaining upper bound < threshold — cannot compete
if (min_competitive_score_ > 0.0f &&
total + remaining_max < min_competitive_score_) {
return total;
}
}

for (auto &iter : should_iterators_) {
remaining_max -= iter->cached_max_score_;
uint32_t doc = iter->advance(cached_doc_id_);
if (doc == cached_doc_id_ && iter->matches()) {
total += iter->score();
}
if (min_competitive_score_ > 0.0f &&
total + remaining_max < min_competitive_score_) {
return total;
}
}

return total;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -62,12 +62,23 @@ class ConjunctionIterator : public DocIterator {
// Check if candidate doc_id is excluded by any must_not iterator
bool is_excluded(uint32_t candidate);

// Block-Max: skip blocks whose score upper bound cannot compete.
// Returns the first candidate whose block can potentially compete,
// or NO_MORE_DOCS if exhausted.
uint32_t skip_non_competitive_blocks(uint32_t candidate);

private:
// must_iterators_[0] is the lead (lowest cost)
std::vector<DocIteratorPtr> must_iterators_;
std::vector<DocIteratorPtr> must_not_iterators_;
std::vector<DocIteratorPtr> should_iterators_;
float min_competitive_score_{0.0f};
// Block-Max: upper bound of doc_id range already verified as competitive
uint32_t block_max_up_to_{0};
// optIsRequired: must-only block_max sum for current block
float must_block_max_sum_{0.0f};
// optIsRequired: whether should iterators are upgraded to required
bool opt_is_required_{false};
};

} // namespace zvec::fts
Original file line number Diff line number Diff line change
Expand Up @@ -223,7 +223,29 @@ uint32_t DisjunctionIterator::advance(uint32_t target) {
iter->advance(target);
}
}
return next_doc();

// Bypass the WAND loop — advance() is a seek operation that must not
// apply competitive-score pruning. When this DisjunctionIterator serves
// as a should clause, the caller (ConjunctionIterator::score()) relies on
// advance() returning target if it exists; WAND block-max pruning would
// incorrectly skip target and silently lose the should score.
resort_postings();

if (postings_.empty() || postings_[0]->cached_doc_id_ == NO_MORE_DOCS) {
cached_doc_id_ = NO_MORE_DOCS;
return NO_MORE_DOCS;
}

uint32_t doc = postings_[0]->cached_doc_id_;
for (size_t i = 0; i < postings_.size(); ++i) {
if (postings_[i]->cached_doc_id_ == doc) {
matching_iterators_.push_back(postings_[i]);
} else {
break;
}
}
cached_doc_id_ = doc;
return doc;
}

bool DisjunctionIterator::matches() {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,10 @@ class PhraseDocIterator : public DocIterator {
uint64_t cost() const override;
float max_score() const override;

void set_min_competitive_score(float min_score) override {
conjunction_->set_min_competitive_score(min_score);
}

private:
// Verify that terms appear at consecutive positions in the document.
// Issues a single MultiGet across the unique terms in the phrase, decodes
Expand Down
Loading
Loading