Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ members = [
"khive-gate-rego",
"khive-fusion",
"khive-bm25",
"khive-hnsw",
"khive-runtime",
"khive-request",
"khive-pack-kg",
Expand Down
8 changes: 6 additions & 2 deletions crates/khive-bm25/src/index/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,9 @@ fn default_postings_epoch() -> u64 {
// TODO(port): was generated by `khive_types::transparent_string_newtype!` macro
// which does not yet exist in khive-types. Expanded here manually until the macro
// lands in khive-types and is re-adopted.
#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]
#[derive(
Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize,
)]
#[serde(transparent)]
pub struct DocumentId(String);

Expand Down Expand Up @@ -639,7 +641,9 @@ impl Bm25Index {
return id;
}
let id = self.next_internal_id;
self.next_internal_id = self.next_internal_id.checked_add(1)
self.next_internal_id = self
.next_internal_id
.checked_add(1)
.expect("internal document ID space exhausted (u32::MAX)");
self.id_to_internal.insert(doc_id.clone(), id);
if id as usize >= self.internal_to_id.len() {
Expand Down
12 changes: 6 additions & 6 deletions crates/khive-bm25/src/index/search.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1450,11 +1450,11 @@ mod tests_simd_scoring {
let tfs_4: [u8; 4] = [0, 0, 0, 0];
let dls_4: [f32; 4] = [10.0, 20.0, 5.0, 1.0];
let result = score_batch_4(&tfs_4, &dls_4, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
for i in 0..4 {
for val in &result {
assert!(
result[i].abs() < 1e-10,
val.abs() < 1e-10,
"tf=0 should produce ~0 score, got {}",
result[i]
val
);
}

Expand All @@ -1467,11 +1467,11 @@ mod tests_simd_scoring {
let result = unsafe {
score_batch_avx2(&tfs_8, &dls_8, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC)
};
for i in 0..8 {
for val in &result {
assert!(
result[i].abs() < 1e-10,
val.abs() < 1e-10,
"avx2 tf=0 should produce ~0 score, got {}",
result[i]
val
);
}
}
Expand Down
13 changes: 10 additions & 3 deletions crates/khive-bm25/src/metrics.rs
Original file line number Diff line number Diff line change
Expand Up @@ -63,13 +63,20 @@ pub struct RecordingSink {
}

#[cfg(test)]
impl RecordingSink {
/// Create an empty recording sink.
pub fn new() -> Self {
impl Default for RecordingSink {
fn default() -> Self {
Self {
events: Mutex::new(Vec::new()),
}
}
}

#[cfg(test)]
impl RecordingSink {
/// Create an empty recording sink.
pub fn new() -> Self {
Self::default()
}

/// Return a snapshot of all recorded events.
pub fn events(&self) -> Vec<MetricEvent> {
Expand Down
7 changes: 3 additions & 4 deletions crates/khive-bm25/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -813,8 +813,8 @@ mod golden_tests {
/// Memory budget enforcement tests for BM25.
#[cfg(test)]
mod memory_budget_tests {
use crate::{Bm25Config, Bm25Index};
use crate::error::{ErrorKind, RetrievalError};
use crate::{Bm25Config, Bm25Index};

#[test]
fn test_no_budget_allows_unlimited_indexing() {
Expand Down Expand Up @@ -844,9 +844,8 @@ mod memory_budget_tests {
format!("doc{i}"),
&format!("some content words for document number {i} with extra text"),
);
if result.is_err() {
if let Err(err) = result {
rejected = true;
let err = result.unwrap_err();
assert!(
matches!(err, RetrievalError::BudgetExceeded { .. }),
"Expected BudgetExceeded, got: {err:?}"
Expand Down Expand Up @@ -1019,8 +1018,8 @@ mod memory_budget_tests {

#[cfg(test)]
mod metrics_tests {
use crate::{Bm25Config, Bm25Index};
use crate::metrics::{names, MetricValue, RecordingSink};
use crate::{Bm25Config, Bm25Index};
use std::sync::Arc;

#[test]
Expand Down
6 changes: 4 additions & 2 deletions crates/khive-bm25/src/tokenizer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -247,8 +247,10 @@ mod tests {

#[test]
fn test_stop_words_disabled() {
let mut tokenizer = SimpleTokenizer::default();
tokenizer.filter_stop_words = false;
let tokenizer = SimpleTokenizer {
filter_stop_words: false,
..Default::default()
};
let tokens = tokenizer.tokenize("The Quick, Brown FOX!");
assert_eq!(tokens, vec!["the", "quick", "brown", "fox"]);
}
Expand Down
16 changes: 4 additions & 12 deletions crates/khive-fusion/src/tests.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,19 +221,11 @@ mod property_tests {
/// Verifies the `present_gt_absent` property from RRF.lean.
#[test]
fn prop_rrf_more_sources_higher_score() {
let source1: Vec<(String, DeterministicScore)> = vec![(
"doc_common".to_string(),
DeterministicScore::from_f64(0.9),
)];
let source1: Vec<(String, DeterministicScore)> =
vec![("doc_common".to_string(), DeterministicScore::from_f64(0.9))];
let source2: Vec<(String, DeterministicScore)> = vec![
(
"doc_common".to_string(),
DeterministicScore::from_f64(0.9),
),
(
"doc_single".to_string(),
DeterministicScore::from_f64(0.8),
),
("doc_common".to_string(), DeterministicScore::from_f64(0.9)),
("doc_single".to_string(), DeterministicScore::from_f64(0.8)),
];

let fused = reciprocal_rank_fusion(vec![source1, source2], 60);
Expand Down
30 changes: 30 additions & 0 deletions crates/khive-hnsw/Cargo.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
[package]
name = "khive-hnsw"
version.workspace = true
edition.workspace = true
authors.workspace = true
license.workspace = true
repository.workspace = true
homepage.workspace = true
keywords.workspace = true
categories.workspace = true
description = "HNSW (Hierarchical Navigable Small World) vector index with INT8 quantized two-phase search — formally verified in Lean4"

[dependencies]
khive-score = { version = "0.2.0", path = "../khive-score" }
khive-types = { version = "0.2.0", path = "../khive-types" }
lattice-embed = { workspace = true }
serde = { workspace = true }
serde_json = { workspace = true }
thiserror = { workspace = true }
parking_lot = { workspace = true }
tokio = { workspace = true }
rand = "0.8"
rayon = "1.10"
ulid = "1.1"

[dev-dependencies]
proptest = "1"

[features]
checkpoint = []
Loading
Loading