Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ members = [
"khive-query",
"khive-gate",
"khive-gate-rego",
"khive-fusion",
"khive-runtime",
"khive-request",
"khive-pack-kg",
Expand Down
15 changes: 15 additions & 0 deletions crates/khive-fusion/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
[package]
name = "khive-fusion"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
homepage.workspace = true
keywords.workspace = true
categories.workspace = true
description = "Rank fusion strategies (RRF, Weighted, Union) with deterministic scoring"

[dependencies]
khive-score = { version = "0.2.0", path = "../khive-score" }
serde = { workspace = true }
172 changes: 172 additions & 0 deletions crates/khive-fusion/src/fuse.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,172 @@
//! Main fusion entry point.

use khive_score::DeterministicScore;
use std::hash::Hash;

use super::rrf::reciprocal_rank_fusion;
use super::strategy::FusionStrategy;
use super::union::union_fusion;
use super::weighted::weighted_fusion;

/// Fuse multiple ranked result lists into a single ranked list.
///
/// This is the main entry point for rank fusion. It supports multiple fusion
/// strategies and is generic over the ID type.
///
/// # Arguments
///
/// * `sources` - Vector of result lists from different retrievers.
/// Each list contains `(Id, DeterministicScore)` pairs, already sorted
/// by score descending (best first).
/// * `strategy` - The fusion strategy to use.
/// * `top_k` - Maximum number of results to return.
///
/// # Returns
///
/// A vector of `(Id, DeterministicScore)` pairs sorted by fused score descending,
/// truncated to `top_k` results.
///
/// # Type Parameters
///
/// * `Id` - The identifier type. Must implement `Eq`, `Hash`, `Clone`, and `Ord`.
/// Works with `EmbeddingId`, `DocumentId`, `String`, `Uuid`, etc.
/// `Ord` is required for deterministic tie-breaking when scores are equal.
///
/// # Example
///
/// ```rust
/// use khive_fusion::{fuse, FusionStrategy};
/// use khive_score::DeterministicScore;
///
/// let sources = vec![
/// vec![("a", DeterministicScore::from_f64(0.9))],
/// vec![("a", DeterministicScore::from_f64(0.8))],
/// ];
///
/// let results = fuse(sources, &FusionStrategy::default(), 10);
/// assert_eq!(results.len(), 1);
/// ```
pub fn fuse<Id: Eq + Hash + Clone + Ord>(
sources: Vec<Vec<(Id, DeterministicScore)>>,
strategy: &FusionStrategy,
top_k: usize,
) -> Vec<(Id, DeterministicScore)> {
if sources.is_empty() || top_k == 0 {
return Vec::new();
}

let fused = match strategy {
FusionStrategy::Rrf { k } => reciprocal_rank_fusion(sources, *k),
FusionStrategy::Weighted { weights } => weighted_fusion(sources, weights),
FusionStrategy::Union => union_fusion(sources),
// VectorOnly / KeywordOnly: the caller is responsible for ensuring only
// the relevant source list is passed. Within fuse(), we take the union
// (max-score per ID) which is a no-op when there is a single source.
FusionStrategy::VectorOnly | FusionStrategy::KeywordOnly => union_fusion(sources),
};

// Truncate to top_k
fused.into_iter().take(top_k).collect()
}

#[cfg(test)]
mod tests {
use super::*;

fn make_results<Id: Clone>(items: Vec<(Id, f64)>) -> Vec<(Id, DeterministicScore)> {
items
.into_iter()
.map(|(id, score)| (id, DeterministicScore::from_f64(score)))
.collect()
}

#[test]
fn test_fuse_rrf_strategy() {
let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
let fused = fuse(vec![source], &FusionStrategy::rrf(), 10);

assert_eq!(fused.len(), 2);
}

#[test]
fn test_fuse_weighted_strategy() {
let source = make_results(vec![("doc_a", 1.0)]);
let fused = fuse(vec![source], &FusionStrategy::weighted(vec![1.0]), 10);

assert_eq!(fused.len(), 1);
}

#[test]
fn test_fuse_union_strategy() {
let source = make_results(vec![("doc_a", 0.9)]);
let fused = fuse(vec![source], &FusionStrategy::union(), 10);

assert_eq!(fused.len(), 1);
}

#[test]
fn test_fuse_top_k_truncation() {
let source = make_results(vec![
("doc_a", 0.9),
("doc_b", 0.8),
("doc_c", 0.7),
("doc_d", 0.6),
("doc_e", 0.5),
]);

let fused = fuse(vec![source], &FusionStrategy::rrf(), 3);

assert_eq!(fused.len(), 3);
assert_eq!(fused[0].0, "doc_a");
assert_eq!(fused[1].0, "doc_b");
assert_eq!(fused[2].0, "doc_c");
}

#[test]
fn test_fuse_top_k_zero() {
let source = make_results(vec![("doc_a", 0.9)]);
let fused = fuse(vec![source], &FusionStrategy::rrf(), 0);

assert!(fused.is_empty());
}

#[test]
fn test_fuse_empty_sources() {
let fused: Vec<(&str, DeterministicScore)> = fuse(vec![], &FusionStrategy::rrf(), 10);
assert!(fused.is_empty());
}

#[test]
fn test_fuse_top_k_larger_than_results() {
let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
let fused = fuse(vec![source], &FusionStrategy::rrf(), 100);

assert_eq!(fused.len(), 2);
}

#[test]
fn test_fuse_with_string_ids() {
let source: Vec<(String, DeterministicScore)> = vec![
("doc_a".to_string(), DeterministicScore::from_f64(0.9)),
("doc_b".to_string(), DeterministicScore::from_f64(0.8)),
];

let fused = fuse(vec![source], &FusionStrategy::rrf(), 10);

assert_eq!(fused.len(), 2);
assert_eq!(fused[0].0, "doc_a");
}

#[test]
fn test_fuse_with_integer_ids() {
let source: Vec<(u64, DeterministicScore)> = vec![
(1, DeterministicScore::from_f64(0.9)),
(2, DeterministicScore::from_f64(0.8)),
];

let fused = fuse(vec![source], &FusionStrategy::rrf(), 10);

assert_eq!(fused.len(), 2);
assert_eq!(fused[0].0, 1);
}
}
68 changes: 68 additions & 0 deletions crates/khive-fusion/src/lib.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
//! Fusion algorithms for combining retrieval results.
//!
//! This module implements rank fusion strategies for hybrid search, combining
//! results from multiple retrieval sources (e.g., vector search, keyword search).
//!
//! # Supported Strategies
//!
//! - **RRF (Reciprocal Rank Fusion)**: Default and recommended. Uses only ranks,
//! making it robust to score distribution differences.
//! - **Weighted**: Linear combination of scores with configurable weights.
//! - **Union**: Takes the maximum score per ID across sources.
//!
//! # Algorithm
//!
//! RRF formula:
//! ```text
//! score(d) = Σ 1/(k + rank_i(d))
//! ```
//! where:
//! - k = 60 (standard, dampens high-rank dominance)
//! - rank_i(d) = position of d in retriever i's results (1-indexed)
//! - If d not in retriever i, contribution = 0
//!
//! # Example
//!
//! ```rust
//! use khive_fusion::{fuse, FusionStrategy, reciprocal_rank_fusion};
//! use khive_score::DeterministicScore;
//!
//! // Two retrieval sources with different rankings
//! let vector_results = vec![
//! ("doc_a", DeterministicScore::from_f64(0.95)),
//! ("doc_b", DeterministicScore::from_f64(0.90)),
//! ("doc_c", DeterministicScore::from_f64(0.85)),
//! ];
//!
//! let keyword_results = vec![
//! ("doc_b", DeterministicScore::from_f64(0.88)),
//! ("doc_c", DeterministicScore::from_f64(0.75)),
//! ("doc_d", DeterministicScore::from_f64(0.70)),
//! ];
//!
//! // Fuse using RRF with k=60
//! let fused = fuse(
//! vec![vector_results, keyword_results],
//! &FusionStrategy::Rrf { k: 60 },
//! 5,
//! );
//!
//! // doc_b appears in both sources, so it gets highest RRF score
//! assert_eq!(fused[0].0, "doc_b");
//! ```

mod fuse;
mod rrf;
mod strategy;
mod union;
mod weighted;

#[cfg(test)]
mod tests;

// Re-export public types and functions
pub use fuse::fuse;
pub use rrf::reciprocal_rank_fusion;
pub use strategy::{FusionStrategy, DEFAULT_RRF_K};
pub use union::union_fusion;
pub use weighted::{normalize_weights, weighted_fusion, weights_are_normalized};
Loading
Loading