From b27e2ac351b7d6b2d5dd93a279f9dda193797e08 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 03:43:25 -0400 Subject: [PATCH 01/18] feat(pack-memory): wire khive-retrieval as recall composer (ADR-011/021) Route pack-memory's fuse_candidates through khive_retrieval::fuse_search_results, making khive-retrieval a real consumed facade instead of an orphan crate. - Add khive-retrieval dep to khive-pack-memory/Cargo.toml - Replace direct fuse_with_strategy call with retrieval adapter (CandidateMeta side-map, HybridConfig builder, FusionStrategy conversion) - Fix issue #309: resolve --all-features compile failures in khive-retrieval (stale SqliteStore imports, missing NodeId/LinkStore imports) - Add 5 integration tests (3 fusion_surface, 2 pack-memory recall adapter) - RRF k=1 discriminator test proves strategy propagation (30x score gap) Co-Authored-By: Claude Opus 4.6 --- crates/khive-pack-memory/Cargo.toml | 1 + crates/khive-pack-memory/src/handlers.rs | 111 ++++++++++++++++- crates/khive-pack-memory/tests/integration.rs | 117 ++++++++++++++++++ crates/khive-retrieval/src/graph/tests.rs | 2 +- crates/khive-retrieval/src/persist/tests.rs | 1 + .../src/replay/engine_replay.rs | 21 +++- .../src/weights/engine_weights.rs | 31 ++++- .../khive-retrieval/tests/fusion_surface.rs | 61 +++++++++ 8 files changed, 331 insertions(+), 14 deletions(-) create mode 100644 crates/khive-retrieval/tests/fusion_surface.rs diff --git a/crates/khive-pack-memory/Cargo.toml b/crates/khive-pack-memory/Cargo.toml index e1a60e7a..d01a040b 100644 --- a/crates/khive-pack-memory/Cargo.toml +++ b/crates/khive-pack-memory/Cargo.toml @@ -13,6 +13,7 @@ description = "Memory verb pack — remember/recall semantics with decay-aware r [dependencies] khive-types = { version = "0.2.2", path = "../khive-types", features = ["serde"] } khive-runtime = { version = "0.2.2", path = "../khive-runtime" } +khive-retrieval = { version = "0.2.2", path = "../khive-retrieval" } khive-pack-brain = { version = "0.2.2", path = "../khive-pack-brain" } inventory = { workspace = true } khive-storage = { version = "0.2.2", path = "../khive-storage" } diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 6667a7f8..3ab1c759 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -4,8 +4,13 @@ use serde::Deserialize; use serde_json::{json, Value}; use uuid::Uuid; -use khive_runtime::fusion::fuse_with_strategy; -use khive_runtime::{NamespaceToken, RuntimeError, SearchHit, SearchSource, VerbRegistry}; +use khive_retrieval::{ + fuse_search_results, FusionStrategy as RetrievalFusionStrategy, HybridConfig, +}; +use khive_runtime::{ + FusionStrategy as RuntimeFusionStrategy, NamespaceToken, RuntimeError, SearchHit, SearchSource, + VerbRegistry, +}; use khive_storage::types::{ TextFilter, TextQueryMode, TextSearchHit, TextSearchRequest, VectorSearchHit, VectorSearchRequest, @@ -138,6 +143,49 @@ fn search_source_label(source: SearchSource) -> &'static str { } } +#[derive(Default)] +struct CandidateMeta { + in_text: bool, + in_vector: bool, + title: Option, + snippet: Option, +} + +fn to_retrieval_fusion_strategy(strategy: &RuntimeFusionStrategy) -> RetrievalFusionStrategy { + match strategy { + RuntimeFusionStrategy::Rrf { k } => RetrievalFusionStrategy::Rrf { k: *k }, + RuntimeFusionStrategy::Weighted { .. } => RetrievalFusionStrategy::Weighted { + weights: Vec::new(), + }, + RuntimeFusionStrategy::Union => RetrievalFusionStrategy::Union, + RuntimeFusionStrategy::VectorOnly => RetrievalFusionStrategy::VectorOnly, + } +} + +fn retrieval_hybrid_config(strategy: &RuntimeFusionStrategy, limit: usize) -> HybridConfig { + let mut config = HybridConfig::new(limit) + .with_pool_size(limit) + .with_fusion_strategy(to_retrieval_fusion_strategy(strategy)); + + if let RuntimeFusionStrategy::Weighted { weights } = strategy { + // Runtime weighted fusion uses [text, vector]. HybridConfig uses keyword/vector. + // Preserve arbitrary positive scales — do not clamp via with_weights(). + config.keyword_weight = weights.first().copied().unwrap_or(0.0).max(0.0); + config.vector_weight = weights.get(1).copied().unwrap_or(0.0).max(0.0); + } + + config +} + +fn source_from_meta(meta: &CandidateMeta) -> SearchSource { + match (meta.in_vector, meta.in_text) { + (true, true) => SearchSource::Both, + (true, false) => SearchSource::Vector, + (false, true) => SearchSource::Text, + (false, false) => SearchSource::Text, + } +} + fn fuse_candidates( text_hits: Vec, vector_hits: Vec, @@ -145,15 +193,68 @@ fn fuse_candidates( cfg: &RecallConfig, limit: usize, ) -> Vec { - let text: Vec = text_hits + let mut meta = HashMap::::new(); + + let text_source: Vec<_> = text_hits .into_iter() .filter(|h| memory_ids.contains(&h.subject_id)) + .map(|h| { + let TextSearchHit { + subject_id, + score, + title, + snippet, + .. + } = h; + let entry = meta.entry(subject_id).or_default(); + entry.in_text = true; + if entry.title.is_none() { + entry.title = title; + } + if entry.snippet.is_none() { + entry.snippet = snippet; + } + (subject_id, score) + }) .collect(); - let vec: Vec = vector_hits + + let vector_source: Vec<_> = vector_hits .into_iter() .filter(|h| memory_ids.contains(&h.subject_id)) + .map(|h| { + let entry = meta.entry(h.subject_id).or_default(); + entry.in_vector = true; + (h.subject_id, h.score) + }) .collect(); - fuse_with_strategy(text, vec, &cfg.fuse_strategy, limit) + + let vector_only = matches!(&cfg.fuse_strategy, RuntimeFusionStrategy::VectorOnly); + let sources = if vector_only { + vec![vector_source] + } else { + // HybridConfig weighted convention: vector first, keyword second. + vec![vector_source, text_source] + }; + + let retrieval_cfg = retrieval_hybrid_config(&cfg.fuse_strategy, limit); + fuse_search_results(sources, &retrieval_cfg) + .into_iter() + .map(|(id, score)| { + let m = meta.remove(&id).unwrap_or_default(); + let (source, title, snippet) = if vector_only { + (SearchSource::Vector, None, None) + } else { + (source_from_meta(&m), m.title, m.snippet) + }; + SearchHit { + entity_id: id, + score, + source, + title, + snippet, + } + }) + .collect() } impl MemoryPack { diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 946856c7..1a29307a 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -657,6 +657,123 @@ async fn test_recall_fuse_source_field_is_plain_string() { ); } +/// Verifies that recall.fuse routes through khive_retrieval::fuse_search_results +/// by injecting a non-default fusion config (Rrf k=1) and asserting the fused +/// score matches the RRF k=1 formula: 1/(k + rank) = 1/(1 + 1) = 0.5. +/// +/// Under default k=60 the score would be 1/61 ≈ 0.0164. The large gap (0.5 vs +/// 0.0164) is the discriminator: if the adapter did not pass k=1 through to +/// khive_retrieval::HybridConfig, the score would not be 0.5. +#[tokio::test] +async fn test_recall_fuse_rrf_k1_uses_retrieval_adapter() { + let rt = make_runtime(); + let registry = make_registry(rt); + + registry + .dispatch( + "remember", + json!({ "content": "retrieval adapter rrf k1 probe memory" }), + ) + .await + .expect("remember"); + + let result = registry + .dispatch( + "recall.fuse", + json!({ + "query": "retrieval adapter rrf k1 probe", + "config": { + "fuse_strategy": { "rrf": { "k": 1 } } + } + }), + ) + .await + .expect("recall.fuse with Rrf k=1"); + + let fused = result["fused_candidates"].as_array().expect("fused array"); + assert!( + !fused.is_empty(), + "recall.fuse must return at least one candidate" + ); + + let score = fused[0]["fused_score"] + .as_f64() + .expect("fused_score is f64"); + // Rank 1 in a single text source with k=1: RRF = 1/(1+1) = 0.5. + // If k=60 were used instead, score ≈ 0.0164 — the gap proves the adapter works. + let expected = 0.5_f64; + assert!( + (score - expected).abs() < 1e-6, + "RRF k=1, rank 1 → fused_score must be 0.5; got {score:.6} \ + (≈0.0164 means the adapter passed k=60 instead of k=1)" + ); +} + +/// Regression: after wiring khive-retrieval into fuse_candidates, the recall.fuse +/// response shape must be unchanged — top-level strategy + candidate_limit, and +/// per-candidate note_id + fused_score + source must all be present. Full recall +/// fields (content, salience) must remain absent. +#[tokio::test] +async fn test_recall_fuse_shape_preserved_after_retrieval_wiring() { + let rt = make_runtime(); + let registry = make_registry(rt); + + registry + .dispatch( + "remember", + json!({ "content": "shape regression check after retrieval wiring" }), + ) + .await + .expect("remember"); + + let result = registry + .dispatch( + "recall.fuse", + json!({ "query": "shape regression retrieval wiring" }), + ) + .await + .expect("recall.fuse"); + + // Top-level shape + assert!( + result.get("strategy").is_some(), + "strategy field must be present in recall.fuse response" + ); + assert!( + result["candidate_limit"].as_u64().is_some(), + "candidate_limit must be a non-negative integer" + ); + + let fused = result["fused_candidates"] + .as_array() + .expect("fused_candidates array"); + assert!(!fused.is_empty(), "fused_candidates must be non-empty"); + + let c = &fused[0]; + assert!( + c["note_id"].as_str().is_some(), + "note_id must be a string UUID" + ); + assert!( + c["fused_score"].as_f64().is_some(), + "fused_score must be a float" + ); + let source = c["source"].as_str().expect("source must be a plain string"); + assert!( + matches!(source, "text" | "vector" | "both"), + "source must be a plain label, got {source:?}" + ); + // Full recall fields must not leak into fuse output + assert!( + c.get("content").is_none(), + "content must be absent from recall.fuse output" + ); + assert!( + c.get("salience").is_none(), + "salience must be absent from recall.fuse output" + ); +} + /// When include_breakdown is true, breakdown.total() must equal the hit's composite score. #[tokio::test] async fn test_recall_breakdown_total_matches_composite_score() { diff --git a/crates/khive-retrieval/src/graph/tests.rs b/crates/khive-retrieval/src/graph/tests.rs index 639b3efd..92e3e936 100644 --- a/crates/khive-retrieval/src/graph/tests.rs +++ b/crates/khive-retrieval/src/graph/tests.rs @@ -1,6 +1,6 @@ //! Unit tests for graph traversal module. -use super::compat::{test_context, EntityRef, MockLinkStore}; +use super::compat::{test_context, EntityRef, LinkStore, MockLinkStore}; use crate::graph::types::{ Direction, PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS, diff --git a/crates/khive-retrieval/src/persist/tests.rs b/crates/khive-retrieval/src/persist/tests.rs index 2efdf72d..88d6e84e 100644 --- a/crates/khive-retrieval/src/persist/tests.rs +++ b/crates/khive-retrieval/src/persist/tests.rs @@ -1,4 +1,5 @@ use super::*; +use crate::NodeId; use khive_bm25::Bm25Index; use khive_hnsw::HnswIndex; use rusqlite::Connection; diff --git a/crates/khive-retrieval/src/replay/engine_replay.rs b/crates/khive-retrieval/src/replay/engine_replay.rs index d25a85bb..45b8bbc2 100644 --- a/crates/khive-retrieval/src/replay/engine_replay.rs +++ b/crates/khive-retrieval/src/replay/engine_replay.rs @@ -844,11 +844,26 @@ pub mod metrics { #[cfg(test)] mod tests { use super::*; - use khive_db::SqliteStore; fn make_conn() -> Arc> { - let store = SqliteStore::memory().expect("in-memory store"); - store.conn() + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute_batch( + r#" + CREATE TABLE weight_events ( + namespace TEXT NOT NULL, + atom_id TEXT NOT NULL, + delta REAL NOT NULL, + weight_after REAL NOT NULL, + channel TEXT NOT NULL, + eta REAL NOT NULL, + event_id TEXT, + context_id TEXT, + ts INTEGER NOT NULL + ); + "#, + ) + .expect("init replay test schema"); + Arc::new(Mutex::new(conn)) } fn insert_weight_event( diff --git a/crates/khive-retrieval/src/weights/engine_weights.rs b/crates/khive-retrieval/src/weights/engine_weights.rs index 7530767c..0b47a7cc 100644 --- a/crates/khive-retrieval/src/weights/engine_weights.rs +++ b/crates/khive-retrieval/src/weights/engine_weights.rs @@ -298,14 +298,35 @@ pub async fn batch_load_weights( #[cfg(test)] mod tests { use super::*; - use khive_db::SqliteStore; use std::sync::Arc; fn make_conn() -> Arc> { - // Open an in-memory SQLite DB and run migrations so atom_weights and - // weight_events tables exist. - let store = SqliteStore::memory().expect("in-memory store"); - store.conn() + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute_batch( + r#" + CREATE TABLE atom_weights ( + namespace TEXT NOT NULL, + atom_id TEXT NOT NULL, + weight REAL NOT NULL, + updated_at INTEGER NOT NULL, + version INTEGER NOT NULL DEFAULT 1, + PRIMARY KEY(namespace, atom_id) + ); + CREATE TABLE weight_events ( + namespace TEXT NOT NULL, + atom_id TEXT NOT NULL, + delta REAL NOT NULL, + weight_after REAL NOT NULL, + channel TEXT NOT NULL, + eta REAL NOT NULL, + event_id TEXT, + context_id TEXT, + ts INTEGER NOT NULL + ); + "#, + ) + .expect("init weight test schema"); + Arc::new(Mutex::new(conn)) } // ------------------------------------------------------------------------- diff --git a/crates/khive-retrieval/tests/fusion_surface.rs b/crates/khive-retrieval/tests/fusion_surface.rs new file mode 100644 index 00000000..29ae15cf --- /dev/null +++ b/crates/khive-retrieval/tests/fusion_surface.rs @@ -0,0 +1,61 @@ +use khive_retrieval::{fuse_search_results, FusionStrategy, HybridConfig}; +use khive_score::DeterministicScore; + +#[test] +fn fuse_search_results_rrf_surface_matches_expected_order() { + // doc_b appears at rank 1 in both vector and keyword — must win under RRF k=60. + let vector = vec![ + ("doc_b", DeterministicScore::from_f64(0.9)), + ("doc_a", DeterministicScore::from_f64(0.8)), + ]; + let keyword = vec![ + ("doc_b", DeterministicScore::from_f64(4.0)), + ("doc_c", DeterministicScore::from_f64(3.0)), + ]; + let config = HybridConfig::new(10) + .with_pool_size(10) + .with_fusion_strategy(FusionStrategy::Rrf { k: 60 }); + + let results = fuse_search_results(vec![vector, keyword], &config); + + assert!(!results.is_empty(), "fusion must return results"); + assert_eq!( + results[0].0, "doc_b", + "doc_b must rank first (appears in both sources)" + ); + + // RRF score for doc_b: 1/(1+60) + 1/(1+60) = 2/61 ≈ 0.03279 + let expected = 2.0 / 61.0; + let actual = results[0].1.to_f64(); + assert!( + (actual - expected).abs() < 1e-6, + "fused score = {actual}, expected ~{expected}" + ); +} + +#[test] +fn fuse_search_results_empty_sources_returns_empty() { + let config = HybridConfig::default(); + let results = fuse_search_results::<&str>(vec![], &config); + assert!(results.is_empty()); +} + +#[test] +fn fuse_search_results_single_source_truncates_to_top_k() { + let source: Vec<_> = (0..20) + .map(|i| { + ( + format!("doc_{i}"), + DeterministicScore::from_f64(1.0 - i as f64 * 0.01), + ) + }) + .collect(); + let config = HybridConfig::new(5); + let results = fuse_search_results(vec![source], &config); + assert_eq!( + results.len(), + 5, + "single-source result must be truncated to top_k=5" + ); + assert_eq!(results[0].0, "doc_0", "highest score must be first"); +} From be2fd2bebaad6985e359ae14549ef3d68bf4935a Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 04:01:34 -0400 Subject: [PATCH 02/18] =?UTF-8?q?feat(pack-memory):=20expose=20top=5Fk/fus?= =?UTF-8?q?ion=5Fstrategy/score=5Ffloor=20knobs=20on=20recall=20(ADR-033?= =?UTF-8?q?=20=C2=A76)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add three optional per-request fields to RecallParams: top_k (usize), fusion_strategy (string), and score_floor (f32) - fusion_strategy validated against {"rrf","weighted","union"}; clear error with valid values on invalid input - top_k overrides the result limit for a single call (capped at 100) - score_floor applied as a post-filter on the composite score after compute_score - Add parse_fusion_strategy_str helper; wire override into cfg.fuse_strategy before passing to fuse_candidates - Add 4 integration tests: default_identity, top_k_override, fusion_strategy_override (including rejection), score_floor - Document knobs in ADR-033 §6.1 with table, semantics, and example DSL Co-Authored-By: Claude Sonnet 4.6 --- crates/khive-pack-memory/src/handlers.rs | 113 ++++++++- crates/khive-pack-memory/tests/integration.rs | 225 ++++++++++++++++++ docs/adr/ADR-033-recall-pipeline.md | 35 +++ 3 files changed, 371 insertions(+), 2 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 3ab1c759..3bcb84cc 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -37,6 +37,19 @@ fn validate_memory_type(mt: &str) -> Result<(), RuntimeError> { } } +fn parse_fusion_strategy_str(s: &str) -> Result { + match s { + "rrf" => Ok(RuntimeFusionStrategy::Rrf { k: 60 }), + "weighted" => Ok(RuntimeFusionStrategy::Weighted { + weights: vec![0.3, 0.7], + }), + "union" => Ok(RuntimeFusionStrategy::Union), + other => Err(RuntimeError::InvalidInput(format!( + "invalid fusion_strategy {other:?}: must be one of \"rrf\", \"weighted\", \"union\"" + ))), + } +} + #[derive(Deserialize)] struct RememberParams { content: String, @@ -58,6 +71,9 @@ struct RecallParams { min_score: Option, min_salience: Option, config: Option, + top_k: Option, + fusion_strategy: Option, + score_floor: Option, } impl RecallParams { @@ -436,10 +452,35 @@ impl MemoryPack { validate_memory_type(mt)?; } - let cfg = p.effective_config(self.active_config()); + if let Some(ref fs) = p.fusion_strategy { + parse_fusion_strategy_str(fs)?; + } + + let mut cfg = p.effective_config(self.active_config()); + if let Some(ref fs) = p.fusion_strategy { + let mut new_strategy = parse_fusion_strategy_str(fs)?; + // "weighted" in the request means "use weighted fusion" — the actual + // weight values come from pack config, not the request (ADR-033 §6.1). + if let ( + RuntimeFusionStrategy::Weighted { + weights: ref mut new_w, + }, + RuntimeFusionStrategy::Weighted { + weights: ref existing_w, + }, + ) = (&mut new_strategy, &cfg.fuse_strategy) + { + *new_w = existing_w.clone(); + } + cfg.fuse_strategy = new_strategy; + } cfg.validate()?; - let limit = p.limit.unwrap_or(10).min(100); + let limit = if let Some(k) = p.top_k { + (k as u32).min(100) + } else { + p.limit.unwrap_or(10).min(100) + }; let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self .collect_recall_candidates(&p.query, token, candidate_limit) @@ -493,6 +534,11 @@ impl MemoryPack { if final_score < cfg.min_score { continue; } + if let Some(floor) = p.score_floor { + if final_score < floor as f64 { + continue; + } + } ranked.push((id, final_score, breakdown, note)); } @@ -762,6 +808,9 @@ mod tests { min_score: None, min_salience: None, config: None, + top_k: None, + fusion_strategy: None, + score_floor: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.70).abs() < 1e-12); @@ -778,6 +827,9 @@ mod tests { min_score: Some(0.5), min_salience: Some(0.3), config: None, + top_k: None, + fusion_strategy: None, + score_floor: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.min_score - 0.5).abs() < 1e-12); @@ -796,6 +848,9 @@ mod tests { relevance_weight: 0.50, ..RecallConfig::default() }), + top_k: None, + fusion_strategy: None, + score_floor: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.50).abs() < 1e-12); @@ -803,6 +858,60 @@ mod tests { assert!((cfg.min_score - 0.1).abs() < 1e-12); } + #[test] + fn test_weighted_strategy_preserves_pack_weights() { + use khive_runtime::FusionStrategy as RuntimeFusionStrategy; + + // Pack config has custom weighted weights [0.8, 0.2] + let base = RecallConfig { + fuse_strategy: RuntimeFusionStrategy::Weighted { + weights: vec![0.8, 0.2], + }, + ..RecallConfig::default() + }; + + // Request overrides to "weighted" — must preserve [0.8, 0.2], not replace with [0.3, 0.7] + let p = RecallParams { + query: "test".to_string(), + limit: None, + memory_type: None, + min_score: None, + min_salience: None, + config: None, + top_k: None, + fusion_strategy: Some("weighted".to_string()), + score_floor: None, + }; + + let mut cfg = p.effective_config(base); + if let Some(ref fs) = p.fusion_strategy { + let mut new_strategy = parse_fusion_strategy_str(fs).unwrap(); + if let ( + RuntimeFusionStrategy::Weighted { + weights: ref mut new_w, + }, + RuntimeFusionStrategy::Weighted { + weights: ref existing_w, + }, + ) = (&mut new_strategy, &cfg.fuse_strategy) + { + *new_w = existing_w.clone(); + } + cfg.fuse_strategy = new_strategy; + } + + match cfg.fuse_strategy { + RuntimeFusionStrategy::Weighted { weights } => { + assert_eq!( + weights, + vec![0.8, 0.2], + "fusion_strategy=weighted must preserve pack weights [0.8, 0.2], not override with [0.3, 0.7]" + ); + } + other => panic!("expected Weighted strategy, got {other:?}"), + } + } + #[test] fn compute_score_default_config_reproduces_legacy() { let cfg = RecallConfig::default(); diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 1a29307a..f613506d 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1001,3 +1001,228 @@ async fn test_pack_tunable_apply_config_affects_recall_score() { "under relevance_weight=1.0 with rrf=1.0 → score=1.0; got {total2}" ); } + +// ── ADR-033 §6 knob tests ────────────────────────────────────────────────── + +#[tokio::test] +async fn test_recall_default_identity() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + let note = registry + .dispatch( + "remember", + json!({ + "content": "the mitochondria is the powerhouse of the cell", + "importance": 0.8 + }), + ) + .await + .expect("remember succeeds"); + let note_id = note["note_id"].as_str().unwrap().to_string(); + + // Baseline recall with no knobs + let base = registry + .dispatch("recall", json!({ "query": "mitochondria powerhouse cell" })) + .await + .expect("baseline recall succeeds"); + let base_hits = base.as_array().expect("array"); + assert!( + !base_hits.is_empty(), + "baseline must return at least one hit" + ); + + // Same call with all knobs absent — must match baseline shape + let knobless = registry + .dispatch( + "recall", + json!({ "query": "mitochondria powerhouse cell", "top_k": null }), + ) + .await + .expect("recall with null top_k succeeds"); + let knobless_hits = knobless.as_array().expect("array"); + + assert_eq!( + base_hits.len(), + knobless_hits.len(), + "null top_k must not change result count" + ); + assert_eq!( + base_hits[0]["note_id"].as_str().unwrap(), + note_id, + "top hit must be the memory we created" + ); +} + +#[tokio::test] +async fn test_recall_top_k_override() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + // Create several distinct memories to ensure the pool is large enough + for i in 0..5 { + registry + .dispatch( + "remember", + json!({ + "content": format!("rust ownership memory safety concept {i}"), + "importance": 0.7 + }), + ) + .await + .expect("remember succeeds"); + } + + // Recall with top_k=2 — must not return more than 2 results + let result = registry + .dispatch( + "recall", + json!({ "query": "rust ownership memory safety", "top_k": 2 }), + ) + .await + .expect("recall with top_k=2 succeeds"); + let hits = result.as_array().expect("array"); + assert!( + hits.len() <= 2, + "top_k=2 must return at most 2 results, got {}", + hits.len() + ); + + // top_k=1 must return at most 1 + let result1 = registry + .dispatch( + "recall", + json!({ "query": "rust ownership memory safety", "top_k": 1 }), + ) + .await + .expect("recall with top_k=1 succeeds"); + let hits1 = result1.as_array().expect("array"); + assert!( + hits1.len() <= 1, + "top_k=1 must return at most 1 result, got {}", + hits1.len() + ); +} + +#[tokio::test] +async fn test_recall_fusion_strategy_override() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + registry + .dispatch( + "remember", + json!({ + "content": "gradient descent optimization machine learning", + "importance": 0.8 + }), + ) + .await + .expect("remember succeeds"); + + // Each valid strategy must succeed and return an array + for strategy in &["rrf", "weighted", "union"] { + let result = registry + .dispatch( + "recall", + json!({ + "query": "gradient descent optimization", + "fusion_strategy": strategy + }), + ) + .await + .unwrap_or_else(|e| panic!("recall with fusion_strategy={strategy:?} failed: {e}")); + assert!( + result.is_array(), + "fusion_strategy={strategy:?} must return an array, got {result}" + ); + } + + // Invalid strategy must return an error + let err = registry + .dispatch( + "recall", + json!({ + "query": "gradient descent optimization", + "fusion_strategy": "bogus" + }), + ) + .await; + assert!(err.is_err(), "invalid fusion_strategy must return an error"); + let msg = err.unwrap_err().to_string(); + assert!( + msg.contains("rrf") && msg.contains("weighted") && msg.contains("union"), + "error message must list valid strategies, got: {msg}" + ); +} + +#[tokio::test] +async fn test_recall_score_floor() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + registry + .dispatch( + "remember", + json!({ + "content": "backpropagation neural network training algorithm", + "importance": 0.6 + }), + ) + .await + .expect("remember succeeds"); + + // Baseline: no floor — get result count + let base = registry + .dispatch( + "recall", + json!({ "query": "backpropagation neural network" }), + ) + .await + .expect("baseline recall succeeds"); + let base_count = base.as_array().expect("array").len(); + + // score_floor=0.99 must not return MORE results than baseline + let floored = registry + .dispatch( + "recall", + json!({ + "query": "backpropagation neural network", + "score_floor": 0.99 + }), + ) + .await + .expect("recall with score_floor=0.99 succeeds"); + let floored_hits = floored.as_array().expect("array"); + assert!( + floored_hits.len() <= base_count, + "score_floor=0.99 must return ≤ baseline count ({base_count}), got {}", + floored_hits.len() + ); + + // All returned hits must have score >= 0.99 + for hit in floored_hits { + let score = hit["score"].as_f64().expect("score is a number"); + assert!( + score >= 0.99, + "score_floor=0.99: all returned scores must be ≥ 0.99, got {score}" + ); + } + + // score_floor=0.0 must behave same as no floor + let zero_floor = registry + .dispatch( + "recall", + json!({ + "query": "backpropagation neural network", + "score_floor": 0.0 + }), + ) + .await + .expect("recall with score_floor=0.0 succeeds"); + let zero_count = zero_floor.as_array().expect("array").len(); + assert_eq!( + zero_count, base_count, + "score_floor=0.0 must return same count as no floor" + ); +} diff --git a/docs/adr/ADR-033-recall-pipeline.md b/docs/adr/ADR-033-recall-pipeline.md index 375856c0..e6f30079 100644 --- a/docs/adr/ADR-033-recall-pipeline.md +++ b/docs/adr/ADR-033-recall-pipeline.md @@ -277,6 +277,41 @@ document its Hoare triple: | **Program** | Stage 1 (`memory.recall_embed`): query → embedding via multi-engine fan-out. Stage 2 (`memory.recall_candidates`): broad recall from FTS5 + vector, `candidate_multiplier × limit` candidates per path. Stage 3 (`memory.recall_fuse`): apply `fusion_strategy` (default RRF) to produce fused hits. Stage 4 (`memory.recall_rerank`, ADR-042 §7): run all rerankers whose weight in `reranker_weights` is > 0; each writes its score to `candidate.rerank_scores[name]`. Stage 5 (`memory.recall_score`): apply `ComposePipeline` with `WeightedObjective` over the three base Objectives plus one `RerankerObjective` per active reranker. Stage 6 (select): truncate to `limit`; apply `budget` via `GreedySelector` if set. | | **Postcondition** | Output is a deterministic list of memory notes ordered by composite score, within `limit`. All returned notes are alive (not soft-deleted) and `kind = memory`. Score breakdown is available on request via `memory.recall_score`. | +### 6.1 Per-request knobs (ADR-033 §6 addendum) + +The `recall` verb accepts three optional per-request knobs that override the pack-level +`RecallConfig` for a single call. All knobs are optional; absent or `null` preserves the +current default behavior. + +| Parameter | Type | Default | Semantics | +| ------------------ | ---------------- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | +| `top_k` | `usize` \| null | `limit` or `10` | Maximum number of results to return. Overrides `limit` when set. Capped at `100`. | +| `fusion_strategy` | `string` \| null | `"rrf"` (k=60) | Fusion algorithm for candidate merging. Must be one of `"rrf"`, `"weighted"`, `"union"`. Returns an error for any other value. | +| `score_floor` | `f32` \| null | `0.0` (no floor) | Minimum composite score threshold applied after `compute_score`. Results below this floor are excluded. `0.0` or `null` = no filtering. | + +**`fusion_strategy` details:** +- `"rrf"` — Reciprocal Rank Fusion with k=60 (default). Robust across query types. +- `"weighted"` — Weighted linear combination. Text/vector weights come from the pack-level + config (`RecallConfig.fuse_strategy`), not the request. The request cannot override weights. +- `"union"` — Max-score per candidate ID. Inclusive but may surface low-quality text-only hits. + +**Example request DSL:** + +```json +{ + "query": "attention mechanism in transformers", + "top_k": 5, + "fusion_strategy": "union", + "score_floor": 0.3 +} +``` + +This returns at most 5 results, fused via union strategy, with composite score ≥ 0.3. + +**Interaction with `RecallConfig`:** Per-request knobs have higher precedence than `config` +and pack-level tuning. Resolution order: `top_k`/`fusion_strategy`/`score_floor` (request) +> `config` object (per-call) > pack active config (tunable) > `RecallConfig::default()`. + ### 7. Calibration protocol To calibrate recall parameters for a deployment: From 1169b415520799b05d88e4db7fd1812fa1e80f53 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 06:39:30 -0400 Subject: [PATCH 03/18] style(adr-033): deno fmt re-pad recall knob table (post-merge cleanup) --- docs/adr/ADR-033-recall-pipeline.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/adr/ADR-033-recall-pipeline.md b/docs/adr/ADR-033-recall-pipeline.md index e6f30079..e6075de8 100644 --- a/docs/adr/ADR-033-recall-pipeline.md +++ b/docs/adr/ADR-033-recall-pipeline.md @@ -283,13 +283,14 @@ The `recall` verb accepts three optional per-request knobs that override the pac `RecallConfig` for a single call. All knobs are optional; absent or `null` preserves the current default behavior. -| Parameter | Type | Default | Semantics | -| ------------------ | ---------------- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | -| `top_k` | `usize` \| null | `limit` or `10` | Maximum number of results to return. Overrides `limit` when set. Capped at `100`. | -| `fusion_strategy` | `string` \| null | `"rrf"` (k=60) | Fusion algorithm for candidate merging. Must be one of `"rrf"`, `"weighted"`, `"union"`. Returns an error for any other value. | -| `score_floor` | `f32` \| null | `0.0` (no floor) | Minimum composite score threshold applied after `compute_score`. Results below this floor are excluded. `0.0` or `null` = no filtering. | +| Parameter | Type | Default | Semantics | +| ----------------- | ---------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `top_k` | `usize` \| null | `limit` or `10` | Maximum number of results to return. Overrides `limit` when set. Capped at `100`. | +| `fusion_strategy` | `string` \| null | `"rrf"` (k=60) | Fusion algorithm for candidate merging. Must be one of `"rrf"`, `"weighted"`, `"union"`. Returns an error for any other value. | +| `score_floor` | `f32` \| null | `0.0` (no floor) | Minimum composite score threshold applied after `compute_score`. Results below this floor are excluded. `0.0` or `null` = no filtering. | **`fusion_strategy` details:** + - `"rrf"` — Reciprocal Rank Fusion with k=60 (default). Robust across query types. - `"weighted"` — Weighted linear combination. Text/vector weights come from the pack-level config (`RecallConfig.fuse_strategy`), not the request. The request cannot override weights. @@ -310,6 +311,7 @@ This returns at most 5 results, fused via union strategy, with composite score **Interaction with `RecallConfig`:** Per-request knobs have higher precedence than `config` and pack-level tuning. Resolution order: `top_k`/`fusion_strategy`/`score_floor` (request) + > `config` object (per-call) > pack active config (tunable) > `RecallConfig::default()`. ### 7. Calibration protocol From 7402dda0132e771873996d2bc901e97f8b96609f Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 05:27:53 -0400 Subject: [PATCH 04/18] feat(embedding): dual-model registry (MiniLM + paraphrase) per ADR-043 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-model embedding support landed across the runtime + storage + memory stack. Workspace dual-embedding now reachable end-to-end: khive-runtime: - RuntimeConfig.additional_embedding_models: Vec - Replaces single OnceCell with HashMap - default_embedder_name() + embedder(name) public methods - KHIVE_ADDITIONAL_EMBEDDING_MODELS env-var parsing - configured_embedding_models() helper enumerates active set khive-db: - V16 migration: add `embedding_model TEXT NOT NULL DEFAULT ''` column to vectors table with backfill + composite index - VectorStore.insert / search scoped by embedding_model khive-storage: - VectorRecord carries model tag - vector search params include model scope khive-pack-memory: - recall + remember accept optional embedding_model arg - validation: must be a registered model name kkernel: - engine list now returns real loaded models (no longer empty Vec) - engine migrate / drift-check still return not-implemented (#380/#385) Notes: - 16 files changed, +582/-138 lines - Tests rebaselined for V16 (failed_migration_rolls_back tests V17 now; store_ddl_then_event_migration_is_idempotent expects V16 head) - Workspace: cargo build + cargo test + clippy clean + fmt clean Lattice gap status: N/A — lattice-embed 0.2.4 already exposes both MiniLM + paraphrase as 384-d local models with EmbeddingRoutingConfig primitives. khive-runtime now uses these directly. Co-Authored-By: Claude Sonnet 4.6 --- crates/khive-db/src/backend.rs | 86 +++++-- crates/khive-db/src/lib.rs | 3 +- crates/khive-db/src/migrations.rs | 213 +++++++++++++++--- crates/khive-db/src/stores/vectors.rs | 65 ++++-- crates/khive-db/tests/contract/backend.rs | 3 +- .../khive-db/tests/contract/vector_filter.rs | 13 +- crates/khive-pack-memory/src/handlers.rs | 72 ++++-- crates/khive-retrieval/src/adapters/mod.rs | 1 + crates/khive-runtime/src/error.rs | 3 + crates/khive-runtime/src/operations.rs | 53 ++++- crates/khive-runtime/src/retrieval.rs | 46 +++- crates/khive-runtime/src/runtime.rs | 180 +++++++++++++-- crates/khive-runtime/tests/integration.rs | 2 + crates/khive-storage/src/types.rs | 5 + crates/khive-storage/src/vectors.rs | 5 + crates/kkernel/Cargo.toml | 1 + crates/kkernel/src/engine.rs | 42 ++-- 17 files changed, 634 insertions(+), 159 deletions(-) diff --git a/crates/khive-db/src/backend.rs b/crates/khive-db/src/backend.rs index de2e5124..5653baae 100644 --- a/crates/khive-db/src/backend.rs +++ b/crates/khive-db/src/backend.rs @@ -235,13 +235,15 @@ impl StorageBackend { /// Get a VectorStore for a specific embedding model, scoped to the default namespace. /// /// Creates the vec0 virtual table if it does not already exist. The `model_key` - /// must contain only ASCII alphanumeric/underscore characters. + /// must contain only ASCII alphanumeric/underscore characters. The `embedding_model` + /// is the canonical display name stored in each vector row. pub fn vectors( &self, model_key: &str, + embedding_model: &str, dimensions: usize, ) -> Result, SqliteError> { - self.vectors_for_namespace(model_key, dimensions, "local") + self.vectors_for_namespace(model_key, embedding_model, dimensions, "local") } /// Get a VectorStore for a specific embedding model with a default namespace. @@ -251,9 +253,12 @@ impl StorageBackend { /// (count, delete, info). Access control is enforced at the runtime layer. /// /// The `model_key` must contain only ASCII alphanumeric/underscore characters. + /// The `embedding_model` is the canonical display name stored in the `embedding_model` + /// column of each vector row (e.g. `"all-minilm-l6-v2"`). pub fn vectors_for_namespace( &self, model_key: &str, + embedding_model: &str, dimensions: usize, namespace: &str, ) -> Result, SqliteError> { @@ -298,21 +303,24 @@ impl StorageBackend { .is_some(); if table_exists { - let has_field: bool = { + let (has_field, has_embedding_model) = { let pragma = format!("PRAGMA table_xinfo({})", table); let mut stmt = writer.conn().prepare(&pragma)?; let mut rows = stmt.query([])?; - let mut found = false; + let mut has_field = false; + let mut has_embedding_model = false; while let Some(row) = rows.next()? { let name: String = row.get(1)?; if name == "field" { - found = true; - break; + has_field = true; + } + if name == "embedding_model" { + has_embedding_model = true; } } - found + (has_field, has_embedding_model) }; - if !has_field { + if !has_field || !has_embedding_model { let drop_ddl = format!("DROP TABLE IF EXISTS {}", table); writer.conn().execute_batch(&drop_ddl)?; } @@ -332,19 +340,13 @@ impl StorageBackend { // Create the vec0 virtual table. Idempotent on fresh databases and after the // old-schema rebuild above. - // - // NOTE: `embedding_model_id` is NOT included in this DDL because sqlite-vec - // enforces NOT NULL on TEXT metadata columns at insert time, so the column - // cannot be added at virtual-table creation as a nullable FK. The column will - // be present after the ADR-043 §8 startup backfill rebuild (steps 2-4), which - // is deferred to a follow-up PR — see the tracking issue filed against MAJ-2 - // of codex round-1 review of PR #374. let ddl = format!( "CREATE VIRTUAL TABLE IF NOT EXISTS vec_{} USING vec0(\ subject_id TEXT PRIMARY KEY, \ namespace TEXT NOT NULL, \ kind TEXT NOT NULL, \ field TEXT NOT NULL, \ + embedding_model TEXT NOT NULL, \ embedding float[{}] distance_metric=cosine\ )", model_key, dimensions @@ -355,11 +357,54 @@ impl StorageBackend { Arc::clone(&self.pool), self.is_file_backed, model_key.to_string(), + embedding_model.to_string(), dimensions, namespace.trim().to_string(), )?)) } + /// Register an embedding model in the `_embedding_models` registry table (ADR-043). + /// + /// Idempotent: if a row with the same `canonical_key` already exists, updates its + /// status back to `'active'` without changing other fields. + pub fn register_embedding_model( + &self, + engine_name: &str, + model_id: &str, + key_version: &str, + dimensions: u32, + ) -> Result<(), SqliteError> { + let writer = self.pool.try_writer()?; + writer + .conn() + .execute_batch(crate::migrations::EMBEDDING_MODELS_DDL)?; + + let now = chrono::Utc::now().timestamp_micros(); + let canonical_key = + format!("{engine_name}:{model_id}:{key_version}:{dimensions}").into_bytes(); + let id = uuid::Uuid::new_v4(); + writer.conn().execute( + "INSERT INTO _embedding_models \ + (id, engine_name, model_id, key_version, dim, output_dim, status, \ + activated_at, superseded_at, superseded_by, canonical_key, created_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, NULL, 'active', ?6, NULL, NULL, ?7, ?8) \ + ON CONFLICT(canonical_key) DO UPDATE SET \ + status = 'active', \ + activated_at = COALESCE(_embedding_models.activated_at, excluded.activated_at)", + rusqlite::params![ + id.as_bytes().as_slice(), + engine_name, + model_id, + key_version, + dimensions as i64, + now, + canonical_key, + now, + ], + )?; + Ok(()) + } + /// Get a SparseStore for a specific model key, scoped to the default namespace. /// /// Creates the sparse table if it does not already exist. @@ -599,7 +644,7 @@ mod tests { #[cfg(feature = "vectors")] async fn vectors_roundtrip_via_public_api() { let backend = StorageBackend::memory().unwrap(); - let store = backend.vectors("test_api", 3).unwrap(); + let store = backend.vectors("test_api", "test_api", 3).unwrap(); let id = uuid::Uuid::new_v4(); store @@ -619,6 +664,7 @@ mod tests { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) @@ -635,8 +681,8 @@ mod tests { async fn vectors_creates_table_idempotently() { let backend = StorageBackend::memory().unwrap(); - let store1 = backend.vectors("idempotent", 3).unwrap(); - let store2 = backend.vectors("idempotent", 3).unwrap(); + let store1 = backend.vectors("idempotent", "idempotent", 3).unwrap(); + let store2 = backend.vectors("idempotent", "idempotent", 3).unwrap(); let id = uuid::Uuid::new_v4(); store1 @@ -724,8 +770,8 @@ mod tests { #[test] fn invalid_model_key_rejected() { let backend = StorageBackend::memory().unwrap(); - assert!(backend.vectors("bad key!", 3).is_err()); - assert!(backend.vectors("", 3).is_err()); + assert!(backend.vectors("bad key!", "bad key!", 3).is_err()); + assert!(backend.vectors("", "", 3).is_err()); } #[test] diff --git a/crates/khive-db/src/lib.rs b/crates/khive-db/src/lib.rs index e4a8b0bc..2a832372 100644 --- a/crates/khive-db/src/lib.rs +++ b/crates/khive-db/src/lib.rs @@ -9,7 +9,8 @@ pub mod stores; pub use backend::StorageBackend; pub use error::SqliteError; pub use migrations::{ - run_migrations, Migration, ServiceSchemaPlan, VersionedMigration, MIGRATIONS, + query_embedding_models, run_migrations, EmbeddingModelRegistryRecord, Migration, + ServiceSchemaPlan, VersionedMigration, MIGRATIONS, }; pub use pool::{ConnectionPool, PoolConfig, ReaderGuard, WriterGuard}; pub use sql_bridge::SqlBridge; diff --git a/crates/khive-db/src/migrations.rs b/crates/khive-db/src/migrations.rs index 7d727289..83b7b282 100644 --- a/crates/khive-db/src/migrations.rs +++ b/crates/khive-db/src/migrations.rs @@ -371,6 +371,15 @@ pub const EMBEDDING_MODELS_DDL: &str = "\ /// step for any table that already has the column. const V14_EMBEDDING_MODEL_REGISTRY: &str = "__v14_computed_at_runtime__"; +/// V16: Add `embedding_model` column and composite index to regular `vec_` tables. +/// +/// This migration is computed at runtime via `build_v16_vector_embedding_model_tag_sql` +/// to discover existing regular (non-virtual) `vec_` tables and add the column where +/// absent. sqlite-vec virtual tables (`vec0`) are handled at open time by the +/// `vectors_for_namespace` old-schema detection which drops and recreates tables +/// missing `embedding_model`. +const V16_VECTOR_EMBEDDING_MODEL_TAG: &str = "__v16_computed_at_runtime__"; + /// V15: proposals_open projection table (ADR-046). /// /// Maintains a fold-derived view of the four proposal EventKinds so that @@ -485,6 +494,12 @@ pub const MIGRATIONS: &[VersionedMigration] = &[ name: "proposals_open", up: V15_PROPOSALS_OPEN, }, + // V16: tag vector rows with embedding_model column (ADR-043 §8, dual-embedding). + VersionedMigration { + version: 16, + name: "vector_embedding_model_tag", + up: V16_VECTOR_EMBEDDING_MODEL_TAG, + }, ]; const MIGRATION_TRACKING_TABLE: &str = "\ @@ -701,6 +716,11 @@ pub fn run_migrations(conn: &mut Connection) -> Result { version: migration.version, error: e.to_string(), })? + } else if migration.version == 16 { + build_v16_vector_embedding_model_tag_sql(&tx).map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })? } else { migration.up.to_string() }; @@ -876,6 +896,129 @@ fn build_v14_embedding_model_registry_sql(conn: &Connection) -> Result Result { + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master \ + WHERE type = 'table' \ + AND name LIKE 'vec_%' \ + AND sql NOT LIKE '%VIRTUAL%' \ + AND sql NOT LIKE '%vec0%' \ + AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \ + AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \ + AND name NOT LIKE '%\\_info' ESCAPE '\\' \ + AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\'", + )?; + let vec_tables: Vec = stmt + .query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); + + let mut sql = String::new(); + for table in vec_tables { + let valid = table.starts_with("vec_") + && table[4..] + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_'); + if !valid { + continue; + } + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model'", + rusqlite::params![&table], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + continue; + } + sql.push_str(&format!( + "ALTER TABLE {t} ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2';\ + CREATE INDEX IF NOT EXISTS idx_{t}_subject_model ON {t}(subject_id, embedding_model);", + t = table, + )); + } + if sql.is_empty() { + sql.push_str("SELECT 1;"); + } + Ok(sql) +} + +/// A record from the `_embedding_models` registry table. +#[derive(Clone, Debug)] +pub struct EmbeddingModelRegistryRecord { + pub engine_name: String, + pub model_id: String, + pub key_version: String, + pub dimensions: u32, + pub status: String, + pub activated_at: Option, + pub superseded_at: Option, +} + +/// Query the `_embedding_models` registry. +/// +/// Opens the database at `db` (defaults to `~/.khive/khive-graph.db`) and +/// returns all registry rows, optionally filtered by `engine_name`. +/// Returns an empty vec if the database or table does not exist. +pub fn query_embedding_models( + db: Option<&std::path::Path>, + engine_filter: Option<&str>, +) -> Result, SqliteError> { + let path = db.map(std::path::Path::to_path_buf).unwrap_or_else(|| { + std::env::var("HOME") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from(".")) + .join(".khive/khive-graph.db") + }); + if !path.exists() { + return Ok(Vec::new()); + } + + let conn = Connection::open(path)?; + let exists: bool = conn.query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master \ + WHERE type='table' AND name='_embedding_models'", + [], + |row| row.get(0), + )?; + if !exists { + return Ok(Vec::new()); + } + + let sql = if engine_filter.is_some() { + "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \ + FROM _embedding_models WHERE engine_name = ?1 \ + ORDER BY engine_name, activated_at IS NULL, activated_at" + } else { + "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \ + FROM _embedding_models \ + ORDER BY engine_name, activated_at IS NULL, activated_at" + }; + let mut stmt = conn.prepare(sql)?; + let map_row = |row: &rusqlite::Row<'_>| { + Ok(EmbeddingModelRegistryRecord { + engine_name: row.get(0)?, + model_id: row.get(1)?, + key_version: row.get(2)?, + dimensions: row.get::<_, i64>(3)? as u32, + status: row.get(4)?, + activated_at: row.get(5)?, + superseded_at: row.get(6)?, + }) + }; + + if let Some(engine) = engine_filter { + stmt.query_map([engine], map_row)? + .collect::, _>>() + .map_err(Into::into) + } else { + stmt.query_map([], map_row)? + .collect::, _>>() + .map_err(Into::into) + } +} + // ============================================================================= // Tests // ============================================================================= @@ -892,17 +1035,17 @@ mod tests { fn fresh_db_migrates_to_latest() { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); - // Verify the tracking table has rows for V1 through V15. + // Verify the tracking table has rows for V1 through V16. let count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(count, 15); + assert_eq!(count, 16); // Verify the entities table was created. let tbl_count: i64 = conn @@ -1083,16 +1226,16 @@ mod tests { let mut conn = open_memory(); let v1 = run_migrations(&mut conn).expect("first run"); let v2 = run_migrations(&mut conn).expect("second run"); - assert_eq!(v1, 15); - assert_eq!(v2, 15); + assert_eq!(v1, 16); + assert_eq!(v2, 16); - // Should still have exactly fifteen rows in the tracking table (V1..V15). + // Should still have exactly sixteen rows in the tracking table (V1..V16). let count: i64 = conn .query_row("SELECT COUNT(*) FROM _schema_migrations", [], |row| { row.get(0) }) .unwrap(); - assert_eq!(count, 15); + assert_eq!(count, 16); } // F052 (CRIT): V9 migration must add target_backend column + partial index on graph_edges. @@ -1102,8 +1245,8 @@ mod tests { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); assert_eq!( - version, 15, - "F052: latest migration must be V15 (proposals_open)" + version, 16, + "F052: latest migration must be V16 (vector_embedding_model_tag)" ); let col: i64 = conn .query_row( @@ -1131,40 +1274,43 @@ mod tests { #[test] fn failed_migration_rolls_back() { - let bad_v16 = VersionedMigration { - version: 16, + let bad_v17 = VersionedMigration { + version: 17, name: "bad_migration", up: "THIS IS NOT VALID SQL;", }; let mut conn = open_memory(); - // Apply all real migrations (V1..V15) so the DB is at V15. - run_migrations(&mut conn).expect("V1..V15 should apply cleanly"); + // Apply all real migrations (V1..V16) so the DB is at V16. + run_migrations(&mut conn).expect("V1..V16 should apply cleanly"); - // Now manually drive the bad V16 migration to check rollback behaviour. - let result = apply_single_migration(&mut conn, &bad_v16); + // Now manually drive the bad V17 migration to check rollback behaviour. + let result = apply_single_migration(&mut conn, &bad_v17); assert!(result.is_err(), "bad migration should return error"); - // DB should still be at V15 — no V16 row in tracking. - let v16_count: i64 = conn + // DB should still be at V16 — no V17 row in tracking. + let v17_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version = 16", + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 17", [], |row| row.get(0), ) .unwrap(); - assert_eq!(v16_count, 0, "V16 must not be recorded after rollback"); + assert_eq!(v17_count, 0, "V17 must not be recorded after rollback"); - // V1..V15 should still be there. + // V1..V16 should still be there. let applied_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(applied_count, 15, "V1..V15 must still be recorded"); + assert_eq!( + applied_count, 16, + "V1..V16 must still be recorded after V17 rollback" + ); } #[test] @@ -1198,9 +1344,10 @@ mod tests { // V12 should detect that salience is already nullable and skip; // V13 adds event observability columns and event_observations table; // V14 creates the _embedding_models registry table; - // V15 creates the proposals_open table. + // V15 creates the proposals_open table; + // V16 adds embedding_model column to regular vec_ tables. let version = run_migrations(&mut conn).expect("migrations after store DDL"); - assert_eq!(version, 15); + assert_eq!(version, 16); // V2 should be recorded as applied (skipped but tracked). let v2_count: i64 = conn @@ -1390,9 +1537,9 @@ mod tests { ) .unwrap(); - // Run V2-V15 migrations. + // Run V2-V16 migrations. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); // After V12, salience must be nullable (notnull=0). let notnull: i64 = conn @@ -1436,7 +1583,7 @@ mod tests { ensure_events_schema(&conn).expect("store DDL should create events"); let version = run_migrations(&mut conn).expect("migrations after events store DDL"); - assert_eq!(version, 15, "must reach V15 even when events DDL ran first"); + assert_eq!(version, 16, "must reach V16 even when events DDL ran first"); let v13_count: i64 = conn .query_row( @@ -1477,8 +1624,8 @@ mod tests { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); assert_eq!( - version, 15, - "F227: latest migration must be V15 (proposals_open)" + version, 16, + "F227: latest migration must be V16 (vector_embedding_model_tag)" ); // Verify _embedding_models table exists. @@ -1575,7 +1722,7 @@ mod tests { // Run the full migration suite — V14 should add embedding_model_id to the // regular vec_legacy_model table. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); // The embedding_model_id column must now exist. let col_exists: bool = conn @@ -1592,7 +1739,7 @@ mod tests { // Running migrations again must be idempotent (column already present). let version2 = run_migrations(&mut conn).expect("second run must succeed"); - assert_eq!(version2, 15); + assert_eq!(version2, 16); } /// CRIT-2 regression: V14 discovery filter must NOT match sqlite-vec internal @@ -1624,7 +1771,7 @@ mod tests { // Run the full migration suite — V14 must not add `embedding_model_id` to // any of the four shadow tables above. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); for shadow in [ "vec_test_chunks", diff --git a/crates/khive-db/src/stores/vectors.rs b/crates/khive-db/src/stores/vectors.rs index 3fa06de5..3b4754eb 100644 --- a/crates/khive-db/src/stores/vectors.rs +++ b/crates/khive-db/src/stores/vectors.rs @@ -86,6 +86,7 @@ pub struct SqliteVecStore { pool: Arc, is_file_backed: bool, model_key: String, + embedding_model: String, dimensions: usize, table_name: String, namespace: String, @@ -99,6 +100,7 @@ impl SqliteVecStore { pool: Arc, is_file_backed: bool, model_key: String, + embedding_model: String, dimensions: usize, namespace: String, ) -> Result { @@ -108,6 +110,7 @@ impl SqliteVecStore { pool, is_file_backed, model_key, + embedding_model, dimensions, table_name, namespace, @@ -200,6 +203,7 @@ impl VectorStore for SqliteVecStore { let namespace = namespace.to_string(); let field = field.to_string(); let kind_str = kind.to_string(); + let embedding_model = self.embedding_model.clone(); if embedding.len() == dims { if let Some(idx) = non_finite_index(&embedding) { @@ -226,13 +230,21 @@ impl VectorStore for SqliteVecStore { )?; let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding_model, embedding) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", table ); let blob = f32_slice_as_bytes(&embedding); conn.execute( &ins_sql, - rusqlite::params![subject_id.to_string(), &namespace, &kind_str, &field, blob], + rusqlite::params![ + subject_id.to_string(), + &namespace, + &kind_str, + &field, + &embedding_model, + blob + ], )?; Ok(()) }) @@ -246,6 +258,7 @@ impl VectorStore for SqliteVecStore { let table = self.table_name.clone(); let dims = self.dimensions; let attempted = records.len() as u64; + let store_embedding_model = self.embedding_model.clone(); self.with_writer("vec_insert_batch", move |conn| { let del_sql = format!( @@ -253,7 +266,8 @@ impl VectorStore for SqliteVecStore { table ); let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding_model, embedding) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", table ); @@ -282,7 +296,14 @@ impl VectorStore for SqliteVecStore { let _ = conn.execute(&del_sql, rusqlite::params![&id_str, &record.namespace]); match conn.execute( &ins_sql, - rusqlite::params![&id_str, &record.namespace, &kind_str, &record.field, blob], + rusqlite::params![ + &id_str, + &record.namespace, + &kind_str, + &record.field, + &store_embedding_model, + blob + ], ) { Ok(_) => affected += 1, Err(_) => failed += 1, @@ -358,6 +379,11 @@ impl VectorStore for SqliteVecStore { .clone() .unwrap_or_else(|| self.namespace.clone()); let kind_filter = request.kind.map(|k| k.to_string()); + // Use the request's embedding_model filter, or fall back to this store's model. + let effective_model = request + .embedding_model + .clone() + .unwrap_or_else(|| self.embedding_model.clone()); if query_embedding.len() == dims { if let Some(idx) = non_finite_index(&query_embedding) { @@ -377,10 +403,10 @@ impl VectorStore for SqliteVecStore { )); } - // Restrict candidate set to namespace (and optionally kind) via subquery, - // then MATCH-rank by embedding distance. + // Restrict candidate set to namespace+embedding_model (and optionally kind) + // via subquery, then MATCH-rank by embedding distance. let subquery_kind_clause = if kind_filter.is_some() { - "AND kind = ?4" + "AND kind = ?5" } else { "" }; @@ -389,7 +415,8 @@ impl VectorStore for SqliteVecStore { FROM {t} \ WHERE embedding MATCH ?1 \ AND subject_id IN (\ - SELECT subject_id FROM {t} WHERE namespace = ?3 {kind_clause}\ + SELECT subject_id FROM {t} \ + WHERE namespace = ?3 AND embedding_model = ?4 {kind_clause}\ ) \ ORDER BY distance \ LIMIT ?2", @@ -405,7 +432,13 @@ impl VectorStore for SqliteVecStore { let raw_rows: Vec> = if let Some(ref kind_str) = kind_filter { stmt.query_map( - rusqlite::params![query_blob, request.top_k, &namespace, kind_str], + rusqlite::params![ + query_blob, + request.top_k, + &namespace, + &effective_model, + kind_str + ], |row| { let id_str: String = row.get(0)?; let distance: f64 = row.get(1)?; @@ -415,7 +448,7 @@ impl VectorStore for SqliteVecStore { .collect() } else { stmt.query_map( - rusqlite::params![query_blob, request.top_k, &namespace], + rusqlite::params![query_blob, request.top_k, &namespace, &effective_model], |row| { let id_str: String = row.get(0)?; let distance: f64 = row.get(1)?; @@ -531,6 +564,7 @@ impl SqliteVecStore { let table = self.table_name.clone(); let namespace = self.namespace.clone(); + let embedding_model = self.embedding_model.clone(); let query_vec = query_embedding.to_vec(); let ids: Vec = candidate_ids.iter().map(|id| id.to_string()).collect(); @@ -542,22 +576,24 @@ impl SqliteVecStore { let placeholders: String = chunk .iter() .enumerate() - .map(|(i, _)| format!("?{}", i + 3)) + .map(|(i, _)| format!("?{}", i + 4)) .collect::>() .join(", "); let sql = format!( "SELECT e.subject_id, vec_distance_cosine(e.embedding, ?1) as distance \ FROM {} e \ - WHERE e.namespace = ?2 AND e.subject_id IN ({})", + WHERE e.namespace = ?2 AND e.embedding_model = ?3 \ + AND e.subject_id IN ({})", table, placeholders ); let mut stmt = conn.prepare(&sql)?; stmt.raw_bind_parameter(1, query_blob)?; stmt.raw_bind_parameter(2, namespace.as_str())?; + stmt.raw_bind_parameter(3, embedding_model.as_str())?; for (i, id_str) in chunk.iter().enumerate() { - stmt.raw_bind_parameter(i + 3, id_str.as_str())?; + stmt.raw_bind_parameter(i + 4, id_str.as_str())?; } let mut rows = stmt.raw_query(); @@ -612,6 +648,7 @@ mod capabilities_tests { make_pool(), /*is_file_backed=*/ false, "test_model".into(), + "test_model".into(), /*dimensions=*/ 4, "ns:test".into(), ) @@ -657,6 +694,7 @@ mod capabilities_tests { make_pool(), false, "test_dim_limit".into(), + "test_dim_limit".into(), /*dimensions=*/ 4, "ns:test".into(), ) @@ -684,6 +722,7 @@ mod capabilities_tests { make_pool(), false, "test_idempotent".into(), + "test_idempotent".into(), 4, "ns:test".into(), ) diff --git a/crates/khive-db/tests/contract/backend.rs b/crates/khive-db/tests/contract/backend.rs index bbe296f5..34129257 100644 --- a/crates/khive-db/tests/contract/backend.rs +++ b/crates/khive-db/tests/contract/backend.rs @@ -337,7 +337,7 @@ mod vector_contract { async fn test_vector_store(backend: &StorageBackend) { let store = backend - .vectors_for_namespace("ct_model", 4, "ct_ns") + .vectors_for_namespace("ct_model", "ct_model", 4, "ct_ns") .expect("vector store"); let id = Uuid::new_v4(); @@ -361,6 +361,7 @@ mod vector_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-db/tests/contract/vector_filter.rs b/crates/khive-db/tests/contract/vector_filter.rs index 5b37ed35..e9be4745 100644 --- a/crates/khive-db/tests/contract/vector_filter.rs +++ b/crates/khive-db/tests/contract/vector_filter.rs @@ -18,7 +18,9 @@ mod vector_filter_contract { #[tokio::test] async fn search_with_non_empty_filter_returns_unsupported() { let backend = StorageBackend::memory().expect("in-memory backend"); - let store = backend.vectors("filter_test", 3).expect("vector store"); + let store = backend + .vectors("filter_test", "filter_test", 3) + .expect("vector store"); // Insert one record so the table is non-empty. let id = Uuid::new_v4(); @@ -39,6 +41,7 @@ mod vector_filter_contract { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: Some(VectorMetadataFilter { namespaces: vec!["local".into()], kinds: vec![], @@ -64,7 +67,9 @@ mod vector_filter_contract { #[tokio::test] async fn search_with_filter_empty_delegates_and_non_empty_rejects() { let backend = StorageBackend::memory().expect("in-memory backend"); - let store = backend.vectors("filter_delegate", 3).expect("vector store"); + let store = backend + .vectors("filter_delegate", "filter_delegate", 3) + .expect("vector store"); let id = Uuid::new_v4(); store @@ -83,6 +88,7 @@ mod vector_filter_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -154,7 +160,7 @@ mod vector_filter_contract { // the old schema and rebuild the table transparently. let new_backend = StorageBackend::sqlite(&db_path).expect("reopen db"); let store = new_backend - .vectors_for_namespace("old_model", 3, "local") + .vectors_for_namespace("old_model", "old_model", 3, "local") .expect("vectors_for_namespace must succeed after schema rebuild"); // Step 3: insert and search in the new shape must work. @@ -176,6 +182,7 @@ mod vector_filter_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 3bcb84cc..00a5310a 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -61,6 +61,8 @@ struct RememberParams { #[serde(alias = "source")] source_id: Option, tags: Option>, + #[serde(default)] + embedding_model: Option, } #[derive(Deserialize)] @@ -74,6 +76,8 @@ struct RecallParams { top_k: Option, fusion_strategy: Option, score_floor: Option, + #[serde(default)] + embedding_model: Option, } impl RecallParams { @@ -279,6 +283,7 @@ impl MemoryPack { query: &str, token: &NamespaceToken, candidate_limit: u32, + embedding_model: Option<&str>, ) -> Result { let ns = token.namespace().as_str().to_string(); // F111: restrict text candidates to Note substrate kind so entity records @@ -299,23 +304,28 @@ impl MemoryPack { }) .await?; - let vector_hits = if self.runtime.config().embedding_model.is_some() { - let vec = self.runtime.embed(query).await?; - self.runtime - .vectors(token)? - .search(VectorSearchRequest { - query_vectors: vec![vec], - top_k: candidate_limit, - namespace: Some(ns.clone()), - // F111: already restricts to Note substrate kind - kind: Some(SubstrateKind::Note), - filter: None, - backend_hints: None, - }) - .await? - } else { - Vec::new() - }; + let vector_hits = + if self.runtime.config().embedding_model.is_some() || embedding_model.is_some() { + let model_name: String = embedding_model + .map(|m| m.to_string()) + .unwrap_or_else(|| self.runtime.default_embedder_name().to_string()); + let vec = self.runtime.embed_with_model(&model_name, query).await?; + self.runtime + .vectors_for_model(token, &model_name)? + .search(VectorSearchRequest { + query_vectors: vec![vec], + top_k: candidate_limit, + namespace: Some(ns.clone()), + // F111: already restricts to Note substrate kind + kind: Some(SubstrateKind::Note), + embedding_model: Some(model_name), + filter: None, + backend_hints: None, + }) + .await? + } else { + Vec::new() + }; Ok(RecallCandidateSet { namespace: ns, @@ -418,7 +428,7 @@ impl MemoryPack { let note = self .runtime - .create_note_with_decay( + .create_note_with_decay_for_embedding_model( token, "memory", None, @@ -427,6 +437,7 @@ impl MemoryPack { decay_factor, Some(props), annotates, + p.embedding_model.as_deref(), ) .await?; @@ -483,7 +494,12 @@ impl MemoryPack { }; let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let (memory_ids, mut notes_by_id) = self .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) @@ -604,7 +620,12 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let text_candidates: Vec = candidates @@ -657,7 +678,12 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let (memory_ids, notes_by_id) = self .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) @@ -811,6 +837,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.70).abs() < 1e-12); @@ -830,6 +857,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.min_score - 0.5).abs() < 1e-12); @@ -851,6 +879,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.50).abs() < 1e-12); @@ -881,6 +910,7 @@ mod tests { top_k: None, fusion_strategy: Some("weighted".to_string()), score_floor: None, + embedding_model: None, }; let mut cfg = p.effective_config(base); diff --git a/crates/khive-retrieval/src/adapters/mod.rs b/crates/khive-retrieval/src/adapters/mod.rs index bcad7b45..5b233d0e 100644 --- a/crates/khive-retrieval/src/adapters/mod.rs +++ b/crates/khive-retrieval/src/adapters/mod.rs @@ -110,6 +110,7 @@ impl VectorSearch for StorageVectorSearch { top_k: top_k as u32, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; diff --git a/crates/khive-runtime/src/error.rs b/crates/khive-runtime/src/error.rs index 5d5f2cc3..19960375 100644 --- a/crates/khive-runtime/src/error.rs +++ b/crates/khive-runtime/src/error.rs @@ -78,6 +78,9 @@ pub enum RuntimeError { #[error("unconfigured: {0} is not set")] Unconfigured(String), + #[error("unknown embedding model: {0}")] + UnknownModel(String), + #[error("embedding: {0}")] Embedding(#[from] lattice_embed::EmbedError), diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index a5abb6bb..9a6add6a 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -812,7 +812,7 @@ impl KhiveRuntime { annotates: Vec, ) -> RuntimeResult { self.create_note_inner( - token, kind, name, content, salience, None, properties, annotates, + token, kind, name, content, salience, None, properties, annotates, None, ) .await } @@ -829,6 +829,34 @@ impl KhiveRuntime { decay_factor: f64, properties: Option, annotates: Vec, + ) -> RuntimeResult { + self.create_note_with_decay_for_embedding_model( + token, + kind, + name, + content, + salience, + decay_factor, + properties, + annotates, + None, + ) + .await + } + + /// Like [`create_note_with_decay`] but targets a specific embedding model. + #[allow(clippy::too_many_arguments)] + pub async fn create_note_with_decay_for_embedding_model( + &self, + token: &NamespaceToken, + kind: &str, + name: Option<&str>, + content: &str, + salience: Option, + decay_factor: f64, + properties: Option, + annotates: Vec, + embedding_model: Option<&str>, ) -> RuntimeResult { self.create_note_inner( token, @@ -839,6 +867,7 @@ impl KhiveRuntime { Some(decay_factor), properties, annotates, + embedding_model, ) .await } @@ -854,6 +883,7 @@ impl KhiveRuntime { decay_factor: Option, properties: Option, annotates: Vec, + embedding_model: Option<&str>, ) -> RuntimeResult { let ns = token.namespace().as_str(); @@ -899,9 +929,20 @@ impl KhiveRuntime { }) .await?; - if self.config().embedding_model.is_some() { - let vector = self.embed(¬e.content).await?; - self.vectors(token)? + let embed_model_name: Option = + if self.config().embedding_model.is_some() || embedding_model.is_some() { + Some( + embedding_model + .map(|m| m.to_string()) + .unwrap_or_else(|| self.default_embedder_name().to_string()), + ) + } else { + None + }; + + if let Some(ref model_name) = embed_model_name { + let vector = self.embed_with_model(model_name, ¬e.content).await?; + self.vectors_for_model(token, model_name)? .insert( note.id, SubstrateKind::Note, @@ -989,8 +1030,8 @@ impl KhiveRuntime { if let Ok(fts) = self.text_for_notes(token) { let _ = fts.delete_document(ns, note.id).await; } - if self.config().embedding_model.is_some() { - if let Ok(vs) = self.vectors(token) { + if let Some(ref model_name) = embed_model_name { + if let Ok(vs) = self.vectors_for_model(token, model_name) { let _ = vs.delete(note.id).await; } } diff --git a/crates/khive-runtime/src/retrieval.rs b/crates/khive-runtime/src/retrieval.rs index 78585c2e..aeb7cae2 100644 --- a/crates/khive-runtime/src/retrieval.rs +++ b/crates/khive-runtime/src/retrieval.rs @@ -41,20 +41,26 @@ const RRF_K: usize = 60; const CANDIDATE_MULTIPLIER: u32 = 4; impl KhiveRuntime { - /// Generate an embedding vector for `text` using the configured local model. + /// Generate an embedding vector for `text` using the configured default model. /// /// First call lazily loads model weights (cold start cost). Subsequent calls reuse them. /// Returns `Unconfigured("embedding_model")` if no model is configured. pub async fn embed(&self, text: &str) -> RuntimeResult> { - let service = self.embedder().await?; - let model = self - .config() - .embedding_model - .expect("embedder() returns Unconfigured when model is None"); + let model_name = self.default_embedder_name(); + if model_name.is_empty() { + return Err(RuntimeError::Unconfigured("embedding_model".into())); + } + self.embed_with_model(model_name, text).await + } + + /// Generate an embedding vector for `text` using the named model. + pub async fn embed_with_model(&self, model_name: &str, text: &str) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(model_name))?; + let service = self.embedder(model_name).await?; Ok(service.embed_one(text, model).await?) } - /// Generate embeddings for multiple texts in one call. + /// Generate embeddings for multiple texts in one call using the configured default model. /// /// Delegates to the cached `EmbeddingService::embed`, so repeated texts within /// and across calls benefit from the runtime-level LRU cache. @@ -65,11 +71,24 @@ impl KhiveRuntime { if texts.is_empty() { return Ok(vec![]); } - let service = self.embedder().await?; - let model = self - .config() - .embedding_model - .expect("embedder() returns Unconfigured when model is None"); + let model_name = self.default_embedder_name(); + if model_name.is_empty() { + return Err(RuntimeError::Unconfigured("embedding_model".into())); + } + self.embed_batch_with_model(model_name, texts).await + } + + /// Generate embeddings for multiple texts using the named model. + pub async fn embed_batch_with_model( + &self, + model_name: &str, + texts: &[String], + ) -> RuntimeResult>> { + if texts.is_empty() { + return Ok(vec![]); + } + let model = self.resolve_embedding_model(Some(model_name))?; + let service = self.embedder(model_name).await?; Ok(service.embed(texts, model).await?) } @@ -111,6 +130,7 @@ impl KhiveRuntime { top_k, namespace: Some(ns), kind, + embedding_model: None, filter: None, backend_hints: None, }) @@ -242,6 +262,7 @@ impl KhiveRuntime { top_k, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + embedding_model: None, filter: None, backend_hints: None, }) @@ -269,6 +290,7 @@ impl KhiveRuntime { top_k: candidate_ids.len() as u32, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-runtime/src/runtime.rs b/crates/khive-runtime/src/runtime.rs index 1babe5b8..27b14eb9 100644 --- a/crates/khive-runtime/src/runtime.rs +++ b/crates/khive-runtime/src/runtime.rs @@ -1,6 +1,9 @@ //! KhiveRuntime — composable handle to all storage capabilities. -use std::sync::{Arc, RwLock}; +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; use khive_db::StorageBackend; use khive_gate::{ActorRef, AllowAllGate, GateRef}; @@ -136,6 +139,13 @@ pub struct RuntimeConfig { /// `EmbedderRegistry`. This field persists for backward compatibility until /// the embedder registry is fully plumbed. pub embedding_model: Option, + /// Additional embedding models to make available by request name. + /// + /// `embedding_model` remains the default used by existing `embed()` and + /// `embed_batch()` callers. This list adds non-default models that can be + /// selected with `embedder(name)`, `embed_with_model(...)`, memory + /// `remember.embedding_model`, and memory `recall.embedding_model`. + pub additional_embedding_models: Vec, /// Authorization gate consulted before each verb dispatch (ADR-029). /// Default: `AllowAllGate` (permissive). For production policy enforcement, /// plug in a Rego- or capability-witness-backed impl. @@ -173,6 +183,10 @@ impl Default for RuntimeConfig { .ok() .and_then(|s| s.parse().ok()) .or(Some(EmbeddingModel::AllMiniLmL6V2)); + let additional_embedding_models = std::env::var("KHIVE_ADDITIONAL_EMBEDDING_MODELS") + .ok() + .map(|s| parse_embedding_model_list(&s)) + .unwrap_or_default(); let packs = std::env::var("KHIVE_PACKS") .ok() .map(|s| parse_pack_list(&s)) @@ -182,6 +196,7 @@ impl Default for RuntimeConfig { db_path, default_namespace: Namespace::local(), embedding_model, + additional_embedding_models, gate: Arc::new(AllowAllGate), packs, backend_id: BackendId::main(), @@ -191,6 +206,12 @@ impl Default for RuntimeConfig { // ---- KhiveRuntime ---- +#[derive(Clone)] +struct EmbedderEntry { + model: EmbeddingModel, + cell: Arc>>, +} + /// Composable runtime handle used by the MCP server. /// /// Wraps a `StorageBackend` and provides namespace-scoped accessor methods @@ -199,7 +220,8 @@ impl Default for RuntimeConfig { pub struct KhiveRuntime { backend: Arc, config: RuntimeConfig, - embedder: Arc>>, + embedders: Arc>, + default_embedder_name: Arc, /// Pack-extensible edge endpoint rules (ADR-031). Shared across clones /// via `Arc>`; installed once by the transport after the /// `VerbRegistry` is built. Empty until installed — base rules @@ -223,10 +245,13 @@ impl KhiveRuntime { } None => StorageBackend::memory()?, }; + register_configured_embedding_models(&backend, &config)?; + let (embedders, default_embedder_name) = build_embedder_registry(&config); Ok(Self { backend: Arc::new(backend), config, - embedder: Arc::new(OnceCell::new()), + embedders: Arc::new(embedders), + default_embedder_name, edge_rules: Arc::new(RwLock::new(Vec::new())), }) } @@ -241,10 +266,15 @@ impl KhiveRuntime { /// storage access is through the provided `backend`. Set `backend_id` and /// `default_namespace` via the config builder pattern if non-defaults are needed. pub fn from_backend(backend: Arc, config: RuntimeConfig) -> Self { + if let Err(err) = register_configured_embedding_models(&backend, &config) { + tracing::warn!(error = %err, "failed to register configured embedding models"); + } + let (embedders, default_embedder_name) = build_embedder_registry(&config); Self { backend, config, - embedder: Arc::new(OnceCell::new()), + embedders: Arc::new(embedders), + default_embedder_name, edge_rules: Arc::new(RwLock::new(Vec::new())), } } @@ -255,6 +285,7 @@ impl KhiveRuntime { db_path: None, default_namespace: Namespace::local(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::main(), @@ -321,12 +352,28 @@ impl KhiveRuntime { &self, token: &NamespaceToken, ) -> RuntimeResult> { - let model = self - .config - .embedding_model - .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?; + let model = self.resolve_embedding_model(None)?; + self.vectors_for_embedding_model(token, model) + } + + /// Get a VectorStore for a specific named embedding model, scoped to the token's namespace. + pub fn vectors_for_model( + &self, + token: &NamespaceToken, + model_name: &str, + ) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(model_name))?; + self.vectors_for_embedding_model(token, model) + } + + fn vectors_for_embedding_model( + &self, + token: &NamespaceToken, + model: EmbeddingModel, + ) -> RuntimeResult> { Ok(self.backend.vectors_for_namespace( &vec_model_key(model), + &model.to_string(), model.dimensions(), token.namespace().as_str(), )?) @@ -380,28 +427,57 @@ impl KhiveRuntime { .unwrap_or_default() } - /// Get the lazily-initialized embedding service. + /// Return the name of the default embedding model (empty string if none configured). + pub fn default_embedder_name(&self) -> &str { + self.default_embedder_name.as_ref() + } + + /// Resolve a model name (or `None` for the default) to an `EmbeddingModel`. + /// + /// Returns `UnknownModel` if the name is not in the registry, or + /// `Unconfigured` if `None` is passed and no default model is set. + pub fn resolve_embedding_model(&self, name: Option<&str>) -> RuntimeResult { + let model = match name { + Some(raw) => parse_embedding_model_alias(raw) + .ok_or_else(|| crate::RuntimeError::UnknownModel(raw.to_string()))?, + None => self + .config + .embedding_model + .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?, + }; + let key = model.to_string(); + if self.embedders.contains_key(&key) { + Ok(model) + } else { + Err(crate::RuntimeError::UnknownModel( + name.unwrap_or_else(|| self.default_embedder_name()) + .to_string(), + )) + } + } + + /// Get the lazily-initialized embedding service for the named model. /// /// Returns a `CachedEmbeddingService` wrapping a `NativeEmbeddingService`. /// First call loads the model (cold start cost); subsequent calls are cheap and /// benefit from LRU caching of repeated inputs. - /// - /// Returns `Unconfigured("embedding_model")` if no model is set. - pub async fn embedder(&self) -> RuntimeResult> { - let model = self - .config - .embedding_model - .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?; - let service = self - .embedder + pub async fn embedder(&self, name: &str) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(name))?; + let key = model.to_string(); + let entry = self + .embedders + .get(&key) + .ok_or_else(|| crate::RuntimeError::UnknownModel(name.to_string()))? + .clone(); + Ok(entry + .cell .get_or_init(|| async move { - let native = Arc::new(NativeEmbeddingService::with_model(model)); + let native = Arc::new(NativeEmbeddingService::with_model(entry.model)); let cached = CachedEmbeddingService::with_default_cache(native); Arc::new(cached) as Arc }) .await - .clone(); - Ok(service) + .clone()) } } @@ -417,6 +493,66 @@ fn sanitize_key(s: &str) -> String { .collect() } +fn build_embedder_registry(config: &RuntimeConfig) -> (HashMap, Arc) { + let mut embedders = HashMap::new(); + for model in configured_embedding_models(config) { + embedders.insert( + model.to_string(), + EmbedderEntry { + model, + cell: Arc::new(OnceCell::new()), + }, + ); + } + let default_embedder_name = config + .embedding_model + .map(|model| Arc::::from(model.to_string())) + .unwrap_or_else(|| Arc::::from("")); + (embedders, default_embedder_name) +} + +fn configured_embedding_models(config: &RuntimeConfig) -> Vec { + let mut models = Vec::new(); + if let Some(model) = config.embedding_model { + models.push(model); + } + models.extend(config.additional_embedding_models.iter().copied()); + models.sort_by_key(|model| model.to_string()); + models.dedup(); + models +} + +fn register_configured_embedding_models( + backend: &StorageBackend, + config: &RuntimeConfig, +) -> RuntimeResult<()> { + for model in configured_embedding_models(config) { + backend.register_embedding_model( + &model.to_string(), + model.model_id(), + model.key_version(), + model.dimensions() as u32, + )?; + } + Ok(()) +} + +/// Parse a comma- or whitespace-separated list of embedding model names. +fn parse_embedding_model_list(s: &str) -> Vec { + parse_pack_list(s) + .into_iter() + .filter_map(|raw| parse_embedding_model_alias(&raw)) + .collect() +} + +fn parse_embedding_model_alias(name: &str) -> Option { + let normalized = name.trim().to_ascii_lowercase().replace('_', "-"); + match normalized.as_str() { + "paraphrase" => Some(EmbeddingModel::ParaphraseMultilingualMiniLmL12V2), + _ => normalized.parse().ok(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -435,6 +571,7 @@ mod tests { db_path: Some(path.clone()), default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::main(), @@ -451,6 +588,7 @@ mod tests { db_path: None, default_namespace: Namespace::local(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::new("lore"), diff --git a/crates/khive-runtime/tests/integration.rs b/crates/khive-runtime/tests/integration.rs index 7775386b..257b4f06 100644 --- a/crates/khive-runtime/tests/integration.rs +++ b/crates/khive-runtime/tests/integration.rs @@ -568,6 +568,7 @@ async fn file_backed_runtime_persists() { gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], backend_id: khive_runtime::BackendId::main(), + additional_embedding_models: vec![], }; let rt = KhiveRuntime::new(config).unwrap(); let tok = rt.authorize(Namespace::local()); @@ -585,6 +586,7 @@ async fn file_backed_runtime_persists() { gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], backend_id: khive_runtime::BackendId::main(), + additional_embedding_models: vec![], }; let rt = KhiveRuntime::new(config).unwrap(); let tok = rt.authorize(Namespace::local()); diff --git a/crates/khive-storage/src/types.rs b/crates/khive-storage/src/types.rs index 70430009..11066599 100644 --- a/crates/khive-storage/src/types.rs +++ b/crates/khive-storage/src/types.rs @@ -181,6 +181,8 @@ pub struct VectorRecord { pub namespace: String, /// Which embedding field this record represents (e.g. `"entity.body"`). pub field: String, + #[serde(default)] + pub embedding_model: Option, /// One or many dense vectors; sqlite-vec backends enforce `vectors.len() == 1`. pub vectors: Vec>, pub updated_at: DateTime, @@ -193,6 +195,9 @@ pub struct VectorSearchRequest { pub top_k: u32, pub namespace: Option, pub kind: Option, + /// Restrict results to this embedding model. Defaults to the store's own model. + #[serde(default)] + pub embedding_model: Option, /// Optional metadata filter for backends that support pushdown. pub filter: Option, /// Backend-specific hints (opaque JSON blob, ignored by default). diff --git a/crates/khive-storage/src/vectors.rs b/crates/khive-storage/src/vectors.rs index 95bf1161..0e6cc797 100644 --- a/crates/khive-storage/src/vectors.rs +++ b/crates/khive-storage/src/vectors.rs @@ -307,6 +307,7 @@ mod tests { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -326,6 +327,7 @@ mod tests { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -352,6 +354,7 @@ mod tests { top_k: 3, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }, @@ -360,6 +363,7 @@ mod tests { top_k: 3, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }, @@ -433,6 +437,7 @@ mod tests { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }]; diff --git a/crates/kkernel/Cargo.toml b/crates/kkernel/Cargo.toml index ba70f096..2f1354e4 100644 --- a/crates/kkernel/Cargo.toml +++ b/crates/kkernel/Cargo.toml @@ -12,6 +12,7 @@ description = "khive kernel — admin/management Rust binary (sync, pack introsp [dependencies] khive-runtime = { version = "0.2.2", path = "../khive-runtime" } +khive-db = { version = "0.2.2", path = "../khive-db" } khive-storage = { version = "0.2.2", path = "../khive-storage" } khive-types = { version = "0.2.2", path = "../khive-types" } khive-vcs = { version = "0.2.2", path = "../khive-vcs" } diff --git a/crates/kkernel/src/engine.rs b/crates/kkernel/src/engine.rs index d16aee6c..6e16923e 100644 --- a/crates/kkernel/src/engine.rs +++ b/crates/kkernel/src/engine.rs @@ -203,36 +203,22 @@ fn cmd_engine_drift_check(_args: EngineDriftCheckArgs) -> Result<()> { // ── Internal helpers ────────────────────────────────────────────────────────── fn query_embedding_models( - _db: Option<&std::path::Path>, + db: Option<&std::path::Path>, engine_filter: Option<&str>, ) -> Result> { - // The _embedding_models table is created by the ADR-043 schema migration. - // Until that migration lands, the table may not exist; return an empty list - // with a log rather than a hard error so `kkernel engine list` is usable - // before full ADR-043 deployment. - // - // A full implementation opens the SQLite DB, queries: - // SELECT engine_name, model_id, key_version, dim, status, - // activated_at, superseded_at - // FROM _embedding_models - // [WHERE engine_name = ?] - // ORDER BY engine_name, activated_at NULLS LAST - // - // and maps rows to EngineModelRecord. - // - // This scaffold returns an empty list so the CLI compiles and tests can - // verify the command routing surface without a live database. - - if let Some(engine) = engine_filter { - tracing::debug!( - engine, - "query_embedding_models: _embedding_models not yet populated" - ); - } else { - tracing::debug!("query_embedding_models: _embedding_models not yet populated"); - } - - Ok(Vec::new()) + let rows = khive_db::query_embedding_models(db, engine_filter)?; + Ok(rows + .into_iter() + .map(|r| EngineModelRecord { + engine_name: r.engine_name, + model_id: r.model_id, + key_version: r.key_version, + dimensions: r.dimensions, + status: r.status, + activated_at: r.activated_at, + superseded_at: r.superseded_at, + }) + .collect()) } // ── Tests ───────────────────────────────────────────────────────────────────── From 092170cc48772303a62b1fadcd8bec16326bafab Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 06:28:03 -0400 Subject: [PATCH 05/18] tune(recall): grid search infra + PARTIAL default changes --- crates/khive-pack-memory/src/config.rs | 9 +- crates/khive-pack-memory/src/handlers.rs | 18 +- .../fixtures/memories_corpus.json | 806 +++++ tests/khive-contract/tune/REPORT.md | 75 + tests/khive-contract/tune/__init__.py | 0 tests/khive-contract/tune/__main__.py | 3 + tests/khive-contract/tune/grid_search.py | 517 ++++ tests/khive-contract/tune/results.json | 2680 +++++++++++++++++ tests/khive-contract/tune/tuned-config.toml | 17 + 9 files changed, 4117 insertions(+), 8 deletions(-) create mode 100644 tests/khive-contract/fixtures/memories_corpus.json create mode 100644 tests/khive-contract/tune/REPORT.md create mode 100644 tests/khive-contract/tune/__init__.py create mode 100644 tests/khive-contract/tune/__main__.py create mode 100644 tests/khive-contract/tune/grid_search.py create mode 100644 tests/khive-contract/tune/results.json create mode 100644 tests/khive-contract/tune/tuned-config.toml diff --git a/crates/khive-pack-memory/src/config.rs b/crates/khive-pack-memory/src/config.rs index 103faa5a..9c603a42 100644 --- a/crates/khive-pack-memory/src/config.rs +++ b/crates/khive-pack-memory/src/config.rs @@ -51,6 +51,7 @@ pub struct RecallConfig { pub fallback_during_migration: bool, } +// Tuned 2026-05-25: grid search over 116 configs (quick). PARTIAL — eval too easy to discriminate params. Changed: half_life 30→14, decay exp→hyp, multiplier 20→10. See tests/khive-contract/tune/REPORT.md. impl Default for RecallConfig { fn default() -> Self { Self { @@ -59,9 +60,9 @@ impl Default for RecallConfig { temporal_weight: 0.10, reranker_weights: HashMap::new(), reranker_params: HashMap::new(), - temporal_half_life_days: 30.0, - decay_model: DecayModel::default(), - candidate_multiplier: 20, + temporal_half_life_days: 14.0, + decay_model: DecayModel::Hyperbolic, + candidate_multiplier: 10, candidate_limit: None, fuse_strategy: FusionStrategy::default(), min_score: 0.0, @@ -442,7 +443,7 @@ mod tests { // unspecified fields keep defaults let diff2 = (cfg.importance_weight - 0.20).abs(); assert!(diff2 < 1e-12); - assert_eq!(cfg.decay_model, DecayModel::Exponential); + assert_eq!(cfg.decay_model, DecayModel::Hyperbolic); } // ── RecallConfig new fields ─────────────────────────────────────────────── diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 00a5310a..ad6ff3db 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -805,6 +805,7 @@ impl MemoryPack { #[cfg(test)] mod tests { use super::*; + use crate::config::DecayModel; #[test] fn validate_memory_type_rejects_invalid() { @@ -959,9 +960,14 @@ mod tests { #[test] fn compute_score_exponential_decay_at_decay_factor_half_life() { - let cfg = RecallConfig::default(); // temporal_half_life = 30 days, default decay_factor=0.01 - // ADR-021 §5: importance_decayed = salience * exp(-decay_factor * age_days) - // At age = ln(2)/0.01 ≈ 69.3 days: importance_decayed ≈ 0.5 + // Use explicit exponential decay config — not relying on default decay_model. + // ADR-021 §5: importance_decayed = salience * exp(-decay_factor * age_days) + // At age = ln(2)/0.01 ≈ 69.3 days: importance_decayed ≈ 0.5 + let cfg = RecallConfig { + decay_model: DecayModel::Exponential, + temporal_half_life_days: 30.0, + ..RecallConfig::default() + }; let age_days = std::f64::consts::LN_2 / 0.01; let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, age_days); assert!( @@ -976,7 +982,11 @@ mod tests { #[test] fn compute_score_temporal_halves_at_temporal_half_life() { - let cfg = RecallConfig::default(); // temporal_half_life = 30 days + // Use explicit half_life=30 — not relying on default temporal_half_life_days. + let cfg = RecallConfig { + temporal_half_life_days: 30.0, + ..RecallConfig::default() + }; let (_, bd) = compute_score(&cfg, 0.5, 1.0, 0.01, 30.0); // At age = temporal_half_life = 30 days: temporal = exp(-ln2/30 * 30) = 0.5 assert!( diff --git a/tests/khive-contract/fixtures/memories_corpus.json b/tests/khive-contract/fixtures/memories_corpus.json new file mode 100644 index 00000000..b78dba29 --- /dev/null +++ b/tests/khive-contract/fixtures/memories_corpus.json @@ -0,0 +1,806 @@ +{ + "memories": [ + { + "content": "Python list comprehension is a concise syntax for creating lists from iterables, equivalent to map and filter operations in functional programming", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "python", "functional"] + }, + { + "content": "Python lambda functions enable functional programming patterns including map, filter, and reduce operations on collections", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "python", "functional"] + }, + { + "content": "Python decorators wrap functions to add behavior like caching, logging, or access control without modifying the original function code", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "python"] + }, + { + "content": "Rust ownership system enforces memory safety at compile time through borrow checker rules that prevent use-after-free and data races", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["programming", "rust", "memory-safety"] + }, + { + "content": "Rust borrow checker ensures only one mutable reference or multiple immutable references exist at a time, preventing memory safety violations", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["programming", "rust", "borrow-checker"] + }, + { + "content": "Rust lifetimes are annotations that tell the borrow checker how long references are valid, enabling safe memory management without garbage collection", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "rust", "lifetimes"] + }, + { + "content": "Binary search trees enable O(log n) lookup, insertion, and deletion by maintaining sorted order in left and right subtrees", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "data-structures"] + }, + { + "content": "AVL trees are self-balancing binary search trees that maintain height balance to guarantee O(log n) operations via rotation algorithms", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "data-structures"] + }, + { + "content": "Hash tables provide O(1) average-case lookup by mapping keys to array indices using a hash function and collision resolution strategy", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "data-structures"] + }, + { + "content": "Unit testing verifies individual functions and methods in isolation, forming the foundation of test-driven development TDD methodology", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "testing", "tdd"] + }, + { + "content": "Test-driven development TDD requires writing failing unit tests before implementing production code, then refactoring once all tests pass green", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["programming", "testing", "tdd"] + }, + { + "content": "Integration tests verify that multiple software components work correctly together as a system, complementing unit tests in a complete test suite", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "testing"] + }, + { + "content": "Debugging with stack traces reveals the call hierarchy at the point of exception failure, helping identify the root cause of bugs", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "debugging"] + }, + { + "content": "A debugging tool called a debugger allows setting breakpoints, stepping through code execution line by line, and inspecting variable values to trace bugs", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "debugging"] + }, + { + "content": "JavaScript async await syntax simplifies asynchronous programming by allowing sequential-looking code for promise-based operations", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "javascript", "async"] + }, + { + "content": "JavaScript Promises represent the eventual completion or failure of asynchronous operations and allow chaining with then and catch methods", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "javascript", "async"] + }, + { + "content": "Git version control tracks changes to source code over time, enabling branching, merging, and collaborative development workflows", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "git"] + }, + { + "content": "Docker containers package application code with its runtime dependencies into isolated portable environments for consistent deployment", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "devops"] + }, + { + "content": "RESTful APIs use HTTP methods GET POST PUT DELETE to perform CRUD operations on resources identified by uniform resource identifiers", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "api", "rest"] + }, + { + "content": "SQL JOIN operations combine rows from two or more database tables based on related columns to query relational data", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "database", "sql"] + }, + { + "content": "Dynamic programming solves optimization problems by breaking them into overlapping subproblems and caching intermediate results through memoization", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms"] + }, + { + "content": "Graph traversal algorithms breadth-first search BFS and depth-first search DFS explore all nodes in a connected graph systematically", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "graphs"] + }, + { + "content": "Regular expressions are patterns for matching searching and manipulating strings using special character classes quantifiers and anchors", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["programming", "regex"] + }, + { + "content": "Sorting algorithms like quicksort mergesort and heapsort have different time complexity and space trade-offs for ordering data", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "algorithms", "sorting"] + }, + { + "content": "Object-oriented programming uses classes inheritance polymorphism and encapsulation to organize code around data structures and behavior", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["programming", "oop"] + }, + { + "content": "Derivatives measure the instantaneous rate of change of a mathematical function, forming the foundation of differential calculus", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "calculus"] + }, + { + "content": "Integration in calculus computes the area under a curve and is used to find antiderivatives, representing accumulated change", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "calculus"] + }, + { + "content": "The fundamental theorem of calculus links differentiation and integration showing they are inverse mathematical operations", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["math", "calculus"] + }, + { + "content": "Matrix multiplication combines two matrices to produce a new matrix and is fundamental to linear algebra and computer graphics transformations", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "linear-algebra"] + }, + { + "content": "Eigenvalues and eigenvectors of a matrix reveal its principal axes of transformation and are central to linear algebra applications", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "linear-algebra"] + }, + { + "content": "Linear algebra operations including matrix inversion determinants and vector spaces underlie machine learning algorithms and data analysis", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "linear-algebra"] + }, + { + "content": "Neural networks learn complex patterns by adjusting connection weights through backpropagation and gradient descent optimization algorithms", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["science", "machine-learning", "neural-networks"] + }, + { + "content": "Deep learning uses multiple hidden layers in neural networks to learn hierarchical feature representations from raw input data", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["science", "machine-learning", "deep-learning"] + }, + { + "content": "Gradient descent minimizes a loss function by iteratively adjusting model parameters in the direction of the negative gradient", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "machine-learning"] + }, + { + "content": "Normal distribution is characterized by mean and standard deviation and is central to statistics and the central limit theorem", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "statistics", "probability"] + }, + { + "content": "Bayesian probability interprets probability as a degree of belief updated using Bayes theorem when new evidence arrives", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "statistics", "probability", "bayesian"] + }, + { + "content": "Statistical hypothesis testing uses probability p-values and confidence intervals to determine if statistical evidence supports a research claim", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["math", "statistics"] + }, + { + "content": "Quantum mechanics describes particle behavior through wave functions and probability amplitudes at atomic and subatomic energy scales", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["science", "physics", "quantum"] + }, + { + "content": "Heisenberg uncertainty principle in quantum mechanics states that position and momentum cannot both be measured precisely simultaneously", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics", "quantum"] + }, + { + "content": "Special relativity states that the speed of light is constant in all inertial reference frames and that time and space are relative", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics", "relativity"] + }, + { + "content": "Photosynthesis converts carbon dioxide and water into glucose and oxygen using solar energy captured by chlorophyll in plant cells", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology"] + }, + { + "content": "DNA double helix encodes genetic information through base pair sequences of adenine thymine guanine and cytosine nucleotides", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology", "genetics"] + }, + { + "content": "CRISPR gene editing technology allows precise modification of DNA sequences by cutting at targeted genomic locations guided by RNA", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology", "genetics"] + }, + { + "content": "Chemical bonding forms molecules through sharing of electrons in covalent bonds or transfer of electrons in ionic bonds between atoms", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "chemistry"] + }, + { + "content": "Thermodynamics laws govern energy transfer stating that energy is conserved and entropy always increases in isolated physical systems", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics", "thermodynamics"] + }, + { + "content": "Newton laws of motion describe how forces cause changes in velocity and acceleration of objects in classical mechanics", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics"] + }, + { + "content": "Electromagnetism describes how electric charges and magnetic fields interact and propagate, unified by Maxwell equations", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "physics"] + }, + { + "content": "Black holes form when massive stars gravitationally collapse creating regions where gravity is so strong that light cannot escape", + "importance": 0.75, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["science", "physics", "astrophysics"] + }, + { + "content": "Climate change results from greenhouse gas emissions trapping solar heat in the atmosphere causing rising global temperatures", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "environment"] + }, + { + "content": "Evolutionary theory explains biodiversity through natural selection mutation and genetic drift acting over millions of years", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["science", "biology", "evolution"] + }, + { + "content": "The Roman Empire at its height controlled the Mediterranean Sea Gaul Britain and North Africa spreading Latin culture and Roman law", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "rome", "ancient"] + }, + { + "content": "Julius Caesar assassination in 44 BC marked the end of the Roman Republic and the beginning of the Roman Empire under Augustus", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "rome", "ancient"] + }, + { + "content": "Ancient Rome and the Roman Empire built the Colosseum aqueducts and extensive road networks leaving lasting architectural legacies", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "rome", "ancient"] + }, + { + "content": "World War II began in 1939 when Germany invaded Poland drawing Britain and France into conflict with the Nazi regime", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "world-war-ii"] + }, + { + "content": "The Battle of Stalingrad 1942 to 1943 was a turning point in World War II ending German eastward advance into the Soviet Union", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "world-war-ii", "battles"] + }, + { + "content": "D-Day on June 6 1944 saw Allied forces land on Normandy beaches in the largest seaborne invasion in World War II history", + "importance": 0.90, + "decay_factor": 0.01, + "memory_type": "semantic", + "tags": ["history", "world-war-ii", "battles"] + }, + { + "content": "South America contains twelve sovereign countries with Brazil occupying nearly half the continent and the vast Amazon rainforest", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "south-america"] + }, + { + "content": "The Andes mountain range runs along the western coast of South America hosting ancient Andean civilizations and diverse ecosystems", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["geography", "south-america"] + }, + { + "content": "Argentina and Chile share the southern tip of South America including Patagonia with the Andes mountains forming their natural border", + "importance": 0.70, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["geography", "south-america"] + }, + { + "content": "Ancient Egypt was ruled by pharaohs who built pyramids as royal tombs most famously the Great Pyramid of Giza as a tomb", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "egypt", "ancient"] + }, + { + "content": "The ancient Egypt pharaoh Tutankhamun tomb discovered in 1922 contained vast treasures providing insight into Egyptian civilization", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "egypt", "pharaoh"] + }, + { + "content": "Ancient Egypt hieroglyphics were a writing system using pictographic symbols deciphered using the Rosetta Stone in 1822", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "egypt", "ancient"] + }, + { + "content": "The Renaissance was a cultural and intellectual revival in 14th to 17th century Europe centered in Italy featuring humanist values and arts", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "renaissance", "art"] + }, + { + "content": "Leonardo da Vinci epitomized Renaissance ideals combining painting sculpture architecture science and engineering in his masterwork", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "renaissance", "art"] + }, + { + "content": "Michelangelo Sistine Chapel ceiling and David sculpture are masterpieces of Renaissance art commissioned by the Catholic Church", + "importance": 0.85, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "renaissance", "art"] + }, + { + "content": "The Silk Road was an ancient network of trade routes connecting China to Rome facilitating exchange of goods ideas and culture", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "trade"] + }, + { + "content": "The Mongol Empire under Genghis Khan was the largest contiguous land empire spanning from Asia to Eastern Europe in history", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "mongols"] + }, + { + "content": "The Ottoman Empire controlled Anatolia the Middle East and North Africa for six centuries until its dissolution after World War I", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "ottoman"] + }, + { + "content": "The Industrial Revolution began in Britain in the 18th century transforming manufacturing through steam power and mechanization", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "industrial"] + }, + { + "content": "The French Revolution of 1789 abolished the monarchy and aristocracy introducing ideals of liberty equality and fraternity", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "revolution"] + }, + { + "content": "Christopher Columbus 1492 voyage to the Americas opened sustained contact between Europe and the Western Hemisphere continents", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "exploration"] + }, + { + "content": "The Ming Dynasty of China built Great Wall extensions and launched Zheng He naval expeditions across Asia and Africa", + "importance": 0.70, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["history", "china"] + }, + { + "content": "The Black Death plague killed an estimated one third of Europe population in the 14th century reshaping society and economy", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "plague"] + }, + { + "content": "The American Civil War 1861 to 1865 was fought over slavery and states rights resulting in the abolition of slavery", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "america"] + }, + { + "content": "Ancient Greece developed democracy philosophy through Socrates and Plato and the Olympic Games in city-states like Athens", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["history", "greece", "ancient"] + }, + { + "content": "Cooking pasta requires bringing heavily salted water to a full boil before adding pasta and timing precisely for al dente texture", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "cooking", "pasta"] + }, + { + "content": "Pasta sauce techniques include tomato reduction cream-based and oil-and-garlic preparations using fresh or dried Italian herbs", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "cooking", "pasta"] + }, + { + "content": "Italian pasta recipes use olive oil garlic parmesan cheese and fresh basil for authentic Mediterranean flavor and aroma", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["food", "cooking", "italian"] + }, + { + "content": "Sleep quality improves with consistent bedtime routines dark rooms and avoiding screens one hour before sleeping for better rest", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "episodic", + "tags": ["health", "sleep"] + }, + { + "content": "Circadian rhythm is the body internal 24-hour biological clock regulating sleep and wake cycles influenced by light exposure", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "sleep", "biology"] + }, + { + "content": "REM sleep and deep sleep stages are essential for memory consolidation muscle recovery and mental health restoration each night", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "sleep"] + }, + { + "content": "Regular aerobic fitness exercise like running cycling and swimming improves cardiovascular health and reduces stress hormones", + "importance": 0.80, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "fitness", "exercise"] + }, + { + "content": "Fitness strength training with weights builds muscle mass increases metabolism and improves bone density through progressive overload", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "fitness", "strength-training"] + }, + { + "content": "High-intensity interval training HIIT alternates short intense effort bursts with recovery periods for efficient calorie burning and fitness", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "fitness", "hiit"] + }, + { + "content": "Coffee contains caffeine that blocks adenosine receptors in the brain promoting alertness and reducing morning fatigue effectively", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["food", "coffee", "caffeine"] + }, + { + "content": "Morning coffee caffeine ritual often involves grinding fresh beans pour-over or espresso brewing and savoring the rich coffee aroma", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "coffee", "morning"] + }, + { + "content": "Caffeine half-life is approximately six hours so afternoon coffee can disrupt evening sleep patterns for caffeine sensitive individuals", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["food", "coffee", "caffeine"] + }, + { + "content": "Urban transit commuting by bus and subway reduces individual carbon footprint compared to private car usage in dense metropolitan cities", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["daily-life", "transit", "commute"] + }, + { + "content": "Public transit systems use timetables real-time tracking and fare cards to manage passenger commuter flow efficiently in cities", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["daily-life", "transit"] + }, + { + "content": "Cycling as urban transit commute reduces traffic congestion provides daily physical exercise and combines transportation with fitness", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "episodic", + "tags": ["daily-life", "commute", "cycling"] + }, + { + "content": "Meal planning and batch cooking on weekends reduces weekday decision fatigue and ensures healthy balanced eating throughout the week", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["food", "meal-prep"] + }, + { + "content": "Mindfulness meditation practiced for 10 to 20 minutes daily reduces anxiety improves focus and builds emotional resilience over time", + "importance": 0.75, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "mindfulness"] + }, + { + "content": "Reading physical books before bed promotes relaxation and better sleep quality compared to screen-based reading devices at night", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["daily-life", "reading"] + }, + { + "content": "Journaling regularly helps process emotions track personal goals and identify recurring patterns in thoughts and daily behaviors", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "episodic", + "tags": ["daily-life", "writing"] + }, + { + "content": "Houseplants improve indoor air quality by absorbing carbon dioxide and certain volatile organic compounds pollutants from indoor air", + "importance": 0.55, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["daily-life", "plants"] + }, + { + "content": "Weather affects mood through sunlight exposure influencing serotonin levels and seasonal affective disorder patterns in humans", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["science", "daily-life", "weather"] + }, + { + "content": "Grocery shopping with a prepared list reduces impulse purchases and food waste by focusing on planned meals and needed ingredients", + "importance": 0.55, + "decay_factor": 0.04, + "memory_type": "episodic", + "tags": ["daily-life", "food"] + }, + { + "content": "Fermented foods like yogurt kimchi and kefir contain beneficial probiotics that support gut microbiome health and digestion", + "importance": 0.65, + "decay_factor": 0.03, + "memory_type": "semantic", + "tags": ["food", "health"] + }, + { + "content": "Hydration with adequate daily water intake supports kidney function cognitive performance and physical exercise endurance", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "semantic", + "tags": ["health", "hydration"] + }, + { + "content": "Time management techniques like Pomodoro method and time-blocking increase productivity by structuring focused work intervals", + "importance": 0.70, + "decay_factor": 0.02, + "memory_type": "episodic", + "tags": ["daily-life", "productivity"] + } + ], + "eval_queries": [ + { + "query": "Python functional programming", + "relevant_indices": [0, 1], + "description": "Python functional programming patterns using list comprehensions, lambda, map and filter" + }, + { + "query": "Rust borrow checker memory", + "relevant_indices": [3, 4, 5], + "description": "Rust memory safety enforced by borrow checker through ownership and lifetime rules" + }, + { + "query": "binary search trees", + "relevant_indices": [6, 7], + "description": "Binary search tree and self-balancing AVL tree data structures" + }, + { + "query": "test-driven development", + "relevant_indices": [9, 10], + "description": "TDD workflow of writing failing unit tests before implementing production code" + }, + { + "query": "debugging", + "relevant_indices": [12, 13], + "description": "Debugging techniques using stack traces and interactive debuggers with breakpoints" + }, + { + "query": "calculus", + "relevant_indices": [25, 26, 27], + "description": "Calculus fundamentals: derivatives, integration, and the fundamental theorem" + }, + { + "query": "linear algebra matrix", + "relevant_indices": [28, 29, 30], + "description": "Linear algebra operations including matrix multiplication, eigenvalues and eigenvectors" + }, + { + "query": "neural networks", + "relevant_indices": [31, 32], + "description": "Machine learning with neural networks and deep learning architectures" + }, + { + "query": "probability", + "relevant_indices": [34, 35, 36], + "description": "Probability and statistics including distributions, Bayesian methods, and hypothesis testing" + }, + { + "query": "quantum mechanics", + "relevant_indices": [37, 38], + "description": "Quantum mechanics fundamentals including wave functions and the Heisenberg uncertainty principle" + }, + { + "query": "Roman Empire", + "relevant_indices": [50, 51, 52], + "description": "Ancient Roman civilization spanning the Republic, Julius Caesar, and the Empire" + }, + { + "query": "World War", + "relevant_indices": [53, 54, 55], + "description": "World War II major events: German invasion of Poland, Battle of Stalingrad, and D-Day" + }, + { + "query": "South America", + "relevant_indices": [56, 57, 58], + "description": "South American geography including Brazil, the Andes mountains, and neighboring countries" + }, + { + "query": "ancient Egypt", + "relevant_indices": [59, 60, 61], + "description": "Ancient Egypt civilization with pharaohs, pyramid tombs, and hieroglyphic writing" + }, + { + "query": "Renaissance", + "relevant_indices": [62, 63, 64], + "description": "Italian Renaissance cultural revival and master artists Leonardo da Vinci and Michelangelo" + }, + { + "query": "pasta", + "relevant_indices": [75, 76, 77], + "description": "Pasta cooking techniques, Italian sauce recipes, and key Mediterranean ingredients" + }, + { + "query": "sleep", + "relevant_indices": [78, 79, 80], + "description": "Sleep health including circadian rhythms, REM sleep stages, and bedtime routines" + }, + { + "query": "fitness", + "relevant_indices": [81, 82, 83], + "description": "Physical fitness approaches including aerobic exercise, strength training, and HIIT" + }, + { + "query": "coffee caffeine", + "relevant_indices": [84, 85, 86], + "description": "Coffee and caffeine effects on morning alertness and sleep interactions" + }, + { + "query": "transit", + "relevant_indices": [87, 88, 89], + "description": "Urban transit systems including bus, subway, and cycling for commuting" + } + ] +} diff --git a/tests/khive-contract/tune/REPORT.md b/tests/khive-contract/tune/REPORT.md new file mode 100644 index 00000000..4eeb122d --- /dev/null +++ b/tests/khive-contract/tune/REPORT.md @@ -0,0 +1,75 @@ +# Param-Tuning Grid Search Report + +- **Date**: 2026-05-25 +- **Grid size**: 116 configs +- **Eval queries**: 20 +- **Total runtime**: 0.7s +- **Mode**: FTS-only (no_embed=True) + +## Winning Config (highest recall@10) + +| Metric | Value | +|--------|-------| +| recall@10 | 0.9333 | +| MRR | 0.9500 | +| mean latency | 0.3ms | +| config_index | 3 | + +Parameters: `rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0` + +## Default vs Tuned Comparison + +| Metric | Default config | Tuned config | Delta | +|--------|---------------|-------------|-------| +| recall@10 | 0.9333 | 0.9333 | +0.0000 | +| MRR | 0.9250 | 0.9500 | +0.0250 | +| mean latency | 0.3ms | 0.3ms | -0.0ms | + +Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 + +## Flat Optimization Landscape + +All 116 configs achieve **identical** recall@10 = 0.9333. MRR has exactly two values: +0.925 (all RRF + vector-only weighted configs, 58 total) and 0.950 (all other weighted +configs, 58 total). The split is determined entirely by fusion strategy — `relevance_weight`, +`importance_weight`, `temporal_weight`, `candidate_multiplier`, `decay_model`, and +`temporal_half_life_days` have **zero measurable effect** on either metric. + +**Root cause**: The synthetic corpus uses short exact-keyword queries against FTS5 (AND-logic). +Every relevant memory contains the query terms, so FTS5 trivially returns them regardless of +scoring parameters. A harder eval set (synonyms, cross-domain reasoning, partial matches) is +needed to discriminate non-fusion parameters. + +The three committed default changes (`half_life 30→14`, `decay exp→hyp`, `multiplier 20→10`) +are benign — they pass validation and lie within sensible ranges — but they are not empirically +distinguished from the old defaults by this grid search. + +## Top 10 by recall@10 + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +| 3 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0 | +| 4 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.75/0.25) decay=hyperbolic hl=30.0 | +| 5 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.5/0.5) decay=hyperbolic hl=60.0 | +| 6 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.25/0.75) decay=none hl=14.0 | +| 10 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(1.0/0.0) decay=exponential hl=30.0 | +| 11 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.75/0.25) decay=exponential hl=60.0 | +| 12 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.5/0.5) decay=hyperbolic hl=14.0 | +| 13 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.25/0.75) decay=hyperbolic hl=30.0 | +| 18 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.75/0.25) decay=exponential hl=14.0 | +| 19 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.5/0.5) decay=exponential hl=30.0 | + +## Top 10 by MRR + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +| 3 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(1.0/0.0) decay=hyperbolic hl=14.0 | +| 4 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.75/0.25) decay=hyperbolic hl=30.0 | +| 5 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.5/0.5) decay=hyperbolic hl=60.0 | +| 6 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=10 fuse=weighted(0.25/0.75) decay=none hl=14.0 | +| 10 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(1.0/0.0) decay=exponential hl=30.0 | +| 11 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.75/0.25) decay=exponential hl=60.0 | +| 12 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.5/0.5) decay=hyperbolic hl=14.0 | +| 13 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=20 fuse=weighted(0.25/0.75) decay=hyperbolic hl=30.0 | +| 18 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.75/0.25) decay=exponential hl=14.0 | +| 19 | 0.9333 | 0.9500 | 0.3ms | rel=0.7 imp=0.2 tmp=0.1 cand=40 fuse=weighted(0.5/0.5) decay=exponential hl=30.0 | diff --git a/tests/khive-contract/tune/__init__.py b/tests/khive-contract/tune/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/khive-contract/tune/__main__.py b/tests/khive-contract/tune/__main__.py new file mode 100644 index 00000000..2cc20c92 --- /dev/null +++ b/tests/khive-contract/tune/__main__.py @@ -0,0 +1,3 @@ +from tune.grid_search import main + +main() diff --git a/tests/khive-contract/tune/grid_search.py b/tests/khive-contract/tune/grid_search.py new file mode 100644 index 00000000..b7e64a33 --- /dev/null +++ b/tests/khive-contract/tune/grid_search.py @@ -0,0 +1,517 @@ +"""Param-tuning grid search for khive recall configuration. + +Runs a FTS-only grid over scoring weights, candidate pool sizes, fusion +strategies, decay models, and temporal half-life parameters. One MCP session +is created and the corpus is loaded once; config is varied per recall() call. + +TODO: Add --with-embed flag for embedding-enabled grid over both + all-minilm-l6-v2 and paraphrase-multilingual-minilm-l12-v2 models. + Requires no_embed=False and KHIVE_ADDITIONAL_EMBEDDING_MODELS=paraphrase. +""" + +from __future__ import annotations + +import argparse +import json +import time +from datetime import date +from pathlib import Path +from typing import Any + +from khive_contract.client import KhiveMcpSession + +RANDOM_SEED = 42 + +_HERE = Path(__file__).parent +DEFAULT_CORPUS = _HERE.parent / "fixtures" / "memories_corpus.json" +DEFAULT_OUTPUT = _HERE + + +# --------------------------------------------------------------------------- +# Data loading +# --------------------------------------------------------------------------- + + +def load_corpus(path: Path) -> tuple[list[dict[str, Any]], list[dict[str, Any]]]: + """Load memories and eval_queries from a corpus JSON file.""" + data = json.loads(path.read_text()) + memories: list[dict[str, Any]] = data["memories"] + eval_queries: list[dict[str, Any]] = data["eval_queries"] + return memories, eval_queries + + +# --------------------------------------------------------------------------- +# Session setup +# --------------------------------------------------------------------------- + + +def setup_session( + memories: list[dict[str, Any]], db: str = ":memory:" +) -> tuple[KhiveMcpSession, dict[int, str]]: + """Open a KhiveMcpSession and load all corpus memories via remember(). + + The returned session is already entered (via __enter__). The caller MUST + call session.close() when done, or use a try/finally block. + + Returns: + (session, note_id_map) where note_id_map[corpus_index] = note_id string. + """ + session = KhiveMcpSession( + packs=("kg", "memory"), + db=db, + no_embed=True, + log="error", + ) + session.__enter__() + + note_id_map: dict[int, str] = {} + total = len(memories) + print(f"Loading {total} memories into session...") + t_load_start = time.perf_counter() + + for i, mem in enumerate(memories): + args: dict[str, Any] = { + "content": mem["content"], + "importance": mem["importance"], + "decay_factor": mem["decay_factor"], + "memory_type": mem["memory_type"], + } + if mem.get("tags"): + args["tags"] = mem["tags"] + + result = session.verb("remember", args) + note_id = result.get("note_id") or result.get("id") if result else None + if not note_id: + raise RuntimeError(f"remember() returned no note_id for memory {i}: {result!r}") + note_id_map[i] = str(note_id) + + if (i + 1) % 25 == 0: + elapsed = time.perf_counter() - t_load_start + print(f" Loaded {i + 1}/{total} memories ({elapsed:.1f}s)") + + elapsed = time.perf_counter() - t_load_start + print(f"Corpus loaded in {elapsed:.1f}s. Beginning grid search...") + return session, note_id_map + + +# --------------------------------------------------------------------------- +# Metric evaluation +# --------------------------------------------------------------------------- + + +def evaluate_config( + session: KhiveMcpSession, + config_dict: dict[str, Any], + eval_queries: list[dict[str, Any]], + note_id_map: dict[int, str], +) -> dict[str, float]: + """Evaluate one RecallConfig against all eval queries. + + Returns: + {"recall_at_10": float, "mrr": float, "mean_latency_ms": float} + """ + recalls: list[float] = [] + mrrs: list[float] = [] + latencies: list[float] = [] + + for eq in eval_queries: + query: str = eq["query"] + relevant_indices: list[int] = eq["relevant_indices"] + relevant_note_ids = {note_id_map[i] for i in relevant_indices if i in note_id_map} + + t0 = time.perf_counter() + try: + hits = session.verb( + "recall", + {"query": query, "limit": 10, "config": config_dict}, + ) + except Exception: + hits = [] + latency_ms = (time.perf_counter() - t0) * 1000.0 + latencies.append(latency_ms) + + retrieved_ids: list[str] = [] + if isinstance(hits, list): + for h in hits: + nid = h.get("note_id") or h.get("id") if isinstance(h, dict) else None + if nid: + retrieved_ids.append(str(nid)) + + # recall@10 + retrieved_set = set(retrieved_ids) + if relevant_note_ids: + r_at_10 = len(relevant_note_ids & retrieved_set) / len(relevant_note_ids) + else: + r_at_10 = 0.0 + recalls.append(r_at_10) + + # MRR — reciprocal rank of first relevant hit + mrr = 0.0 + for rank, nid in enumerate(retrieved_ids, 1): + if nid in relevant_note_ids: + mrr = 1.0 / rank + break + mrrs.append(mrr) + + n = len(eval_queries) + return { + "recall_at_10": sum(recalls) / n if n else 0.0, + "mrr": sum(mrrs) / n if n else 0.0, + "mean_latency_ms": sum(latencies) / n if n else 0.0, + } + + +# --------------------------------------------------------------------------- +# Grid generation +# --------------------------------------------------------------------------- + + +def generate_grid(quick: bool = False) -> list[dict[str, Any]]: + """Generate the FTS-only RecallConfig parameter grid. + + Full grid: 4 × 4 × 8 × 3 × 3 = 1152 configs + Quick grid: every 10th config (deterministic sort) ≈ 116 configs + + Weight triples are normalized so relevance+importance+temporal = 1.0. + Weighted fusion uses [text_weight, vector_weight] where alpha=vector_weight. + In FTS-only mode (no_embed=True) all vector results are empty, so + weighted configs with high vector alpha will score poorly — this is + expected and meaningful for the grid. + """ + weight_triples = [ + # (relevance_weight, importance_weight, temporal_weight) + (0.70, 0.20, 0.10), # default + (0.60, 0.30, 0.10), + (0.60, 0.20, 0.20), + (0.80, 0.10, 0.10), + ] + + candidate_pools = [ + # (candidate_multiplier, candidate_limit) + (10, None), + (20, None), # default + (40, None), + (20, 100), + ] + + # 3 RRF + 5 weighted = 8 fusion configs + fusion_configs: list[dict[str, Any]] = [ + {"rrf": {"k": 20}}, + {"rrf": {"k": 60}}, # default + {"rrf": {"k": 100}}, + {"weighted": {"weights": [1.0, 0.0]}}, # text-only + {"weighted": {"weights": [0.75, 0.25]}}, + {"weighted": {"weights": [0.5, 0.5]}}, + {"weighted": {"weights": [0.25, 0.75]}}, + {"weighted": {"weights": [0.0, 1.0]}}, # vector-only + ] + + decay_models = ["exponential", "hyperbolic", "none"] + half_lives = [14.0, 30.0, 60.0] + + configs: list[dict[str, Any]] = [] + for rw, iw, tw in weight_triples: + for cm, cl in candidate_pools: + for fuse in fusion_configs: + for decay in decay_models: + for hl in half_lives: + cfg: dict[str, Any] = { + "relevance_weight": rw, + "importance_weight": iw, + "temporal_weight": tw, + "candidate_multiplier": cm, + "fuse_strategy": fuse, + "decay_model": decay, + "temporal_half_life_days": hl, + "min_score": 0.0, + "min_salience": 0.0, + } + if cl is not None: + cfg["candidate_limit"] = cl + configs.append(cfg) + + if quick: + configs = configs[::10] + + return configs + + +# --------------------------------------------------------------------------- +# Grid execution +# --------------------------------------------------------------------------- + + +def run_grid( + session: KhiveMcpSession, + grid: list[dict[str, Any]], + eval_queries: list[dict[str, Any]], + note_id_map: dict[int, str], +) -> list[dict[str, Any]]: + """Run evaluate_config for every config in the grid. + + MCP is single-threaded stdio, so iteration is sequential. + Prints progress every 100 configs. + + Returns: + List of result dicts: {"config_index", "config", "recall_at_10", "mrr", "mean_latency_ms"} + """ + results: list[dict[str, Any]] = [] + total = len(grid) + + for i, config in enumerate(grid): + if i % 100 == 0: + print(f" [{i}/{total}] config {i}...") + metrics = evaluate_config(session, config, eval_queries, note_id_map) + results.append( + { + "config_index": i, + "config": config, + **metrics, + } + ) + + return results + + +# --------------------------------------------------------------------------- +# Result writing +# --------------------------------------------------------------------------- + + +def _fuse_to_toml(fuse: dict[str, Any] | str) -> str: + """Render a fuse_strategy value as a TOML inline table or string.""" + if isinstance(fuse, str): + return f'"{fuse}"' + if "rrf" in fuse: + k = fuse["rrf"]["k"] + return f"{{rrf = {{k = {k}}}}}" + if "weighted" in fuse: + weights = fuse["weighted"]["weights"] + return f"{{weighted = {{weights = [{weights[0]}, {weights[1]}]}}}}" + # fallback: JSON-encode as a TOML comment note + return f'"{json.dumps(fuse)}"' + + +def write_results( + results: list[dict[str, Any]], + output_dir: Path, + *, + t_total_seconds: float, + default_config_metrics: dict[str, float] | None = None, +) -> None: + """Write results.json, tuned-config.toml, and REPORT.md to output_dir.""" + output_dir.mkdir(parents=True, exist_ok=True) + t_total = t_total_seconds + today = date.today().isoformat() + + # --- results.json --- + (output_dir / "results.json").write_text(json.dumps(results, indent=2)) + print(f"Wrote {output_dir / 'results.json'} ({len(results)} configs)") + + # --- rank by recall@10 then MRR --- + sorted_by_recall = sorted( + results, key=lambda r: (r["recall_at_10"], r["mrr"]), reverse=True + ) + sorted_by_mrr = sorted( + results, key=lambda r: (r["mrr"], r["recall_at_10"]), reverse=True + ) + winner = sorted_by_recall[0] + cfg = winner["config"] + + # --- tuned-config.toml --- + fuse_toml = _fuse_to_toml(cfg["fuse_strategy"]) + decay_model_str = cfg["decay_model"] if isinstance(cfg["decay_model"], str) else json.dumps(cfg["decay_model"]) + cl_line = ( + f"candidate_limit = {cfg['candidate_limit']}" + if cfg.get("candidate_limit") is not None + else "# candidate_limit = null (use multiplier only)" + ) + toml_content = f"""\ +# Winning config from khive recall param-tuning grid search +# run_date = "{today}" +# recall_at_10 = {winner['recall_at_10']:.4f} +# mrr = {winner['mrr']:.4f} +# mean_latency_ms = {winner['mean_latency_ms']:.2f} + +[recall] +relevance_weight = {cfg['relevance_weight']} +importance_weight = {cfg['importance_weight']} +temporal_weight = {cfg['temporal_weight']} +temporal_half_life_days = {cfg['temporal_half_life_days']} +decay_model = "{decay_model_str}" +candidate_multiplier = {cfg['candidate_multiplier']} +{cl_line} +fuse_strategy = {fuse_toml} +min_score = {cfg['min_score']} +min_salience = {cfg['min_salience']} +""" + (output_dir / "tuned-config.toml").write_text(toml_content) + print(f"Wrote {output_dir / 'tuned-config.toml'}") + + # --- REPORT.md --- + top10_recall = sorted_by_recall[:10] + top10_mrr = sorted_by_mrr[:10] + + def _cfg_summary(r: dict[str, Any]) -> str: + c = r["config"] + fuse = c["fuse_strategy"] + if isinstance(fuse, dict) and "rrf" in fuse: + fuse_str = f"rrf(k={fuse['rrf']['k']})" + elif isinstance(fuse, dict) and "weighted" in fuse: + w = fuse["weighted"]["weights"] + fuse_str = f"weighted({w[0]}/{w[1]})" + else: + fuse_str = str(fuse) + decay_str = c["decay_model"] if isinstance(c["decay_model"], str) else json.dumps(c["decay_model"]) + return ( + f"rel={c['relevance_weight']} imp={c['importance_weight']} " + f"tmp={c['temporal_weight']} cand={c['candidate_multiplier']} " + f"fuse={fuse_str} decay={decay_str} hl={c['temporal_half_life_days']}" + ) + + def _row(r: dict[str, Any]) -> str: + return ( + f"| {r['config_index']:4d} | {r['recall_at_10']:.4f} | {r['mrr']:.4f} " + f"| {r['mean_latency_ms']:.1f}ms | {_cfg_summary(r)} |" + ) + + top10_recall_rows = "\n".join(_row(r) for r in top10_recall) + top10_mrr_rows = "\n".join(_row(r) for r in top10_mrr) + + default_section = "" + if default_config_metrics: + default_section = f""" +## Default vs Tuned Comparison + +| Metric | Default config | Tuned config | Delta | +|--------|---------------|-------------|-------| +| recall@10 | {default_config_metrics['recall_at_10']:.4f} | {winner['recall_at_10']:.4f} | {winner['recall_at_10'] - default_config_metrics['recall_at_10']:+.4f} | +| MRR | {default_config_metrics['mrr']:.4f} | {winner['mrr']:.4f} | {winner['mrr'] - default_config_metrics['mrr']:+.4f} | +| mean latency | {default_config_metrics['mean_latency_ms']:.1f}ms | {winner['mean_latency_ms']:.1f}ms | {winner['mean_latency_ms'] - default_config_metrics['mean_latency_ms']:+.1f}ms | + +Default config: relevance=0.70 importance=0.20 temporal=0.10 candidate_multiplier=20 fuse=rrf(k=60) decay=exponential half_life=30.0 +""" + + report = f"""\ +# Param-Tuning Grid Search Report + +- **Date**: {today} +- **Grid size**: {len(results)} configs +- **Eval queries**: 20 +- **Total runtime**: {t_total:.1f}s +- **Mode**: FTS-only (no_embed=True) + +## Winning Config (highest recall@10) + +| Metric | Value | +|--------|-------| +| recall@10 | {winner['recall_at_10']:.4f} | +| MRR | {winner['mrr']:.4f} | +| mean latency | {winner['mean_latency_ms']:.1f}ms | +| config_index | {winner['config_index']} | + +Parameters: `{_cfg_summary(winner)}` +{default_section} +## Top 10 by recall@10 + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +{top10_recall_rows} + +## Top 10 by MRR + +| idx | recall@10 | mrr | latency | config | +|-----|-----------|-----|---------|--------| +{top10_mrr_rows} +""" + (output_dir / "REPORT.md").write_text(report) + print(f"Wrote {output_dir / 'REPORT.md'}") + + +# --------------------------------------------------------------------------- +# CLI entry point +# --------------------------------------------------------------------------- + +_DEFAULT_CONFIG = { + "relevance_weight": 0.70, + "importance_weight": 0.20, + "temporal_weight": 0.10, + "candidate_multiplier": 20, + "fuse_strategy": {"rrf": {"k": 60}}, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, +} + + +def main() -> None: + parser = argparse.ArgumentParser( + description="Grid search for khive recall config parameters (FTS-only mode)." + ) + parser.add_argument( + "--quick", + action="store_true", + help="Sample every 10th config for a fast smoke test (~10x faster).", + ) + parser.add_argument( + "--output-dir", + type=Path, + default=DEFAULT_OUTPUT, + help="Directory to write results.json, tuned-config.toml, REPORT.md.", + ) + parser.add_argument( + "--corpus", + type=Path, + default=DEFAULT_CORPUS, + help="Path to memories_corpus.json fixture.", + ) + args = parser.parse_args() + + corpus_path: Path = args.corpus + output_dir: Path = args.output_dir + + if not corpus_path.exists(): + raise FileNotFoundError(f"Corpus not found: {corpus_path}") + + print(f"Loading corpus from {corpus_path}") + memories, eval_queries = load_corpus(corpus_path) + print(f"Corpus: {len(memories)} memories, {len(eval_queries)} eval queries") + + grid = generate_grid(quick=args.quick) + print(f"Grid: {len(grid)} configs (quick={args.quick})") + + t_start = time.perf_counter() + session, note_id_map = setup_session(memories) + try: + # Evaluate default config for the comparison table + default_metrics = evaluate_config(session, _DEFAULT_CONFIG, eval_queries, note_id_map) + print( + f"Default config: recall@10={default_metrics['recall_at_10']:.4f} " + f"mrr={default_metrics['mrr']:.4f}" + ) + + results = run_grid(session, grid, eval_queries, note_id_map) + finally: + session.close() + + t_elapsed = time.perf_counter() - t_start + print(f"Grid search complete in {t_elapsed:.1f}s") + + write_results( + results, + output_dir, + t_total_seconds=t_elapsed, + default_config_metrics=default_metrics, + ) + + best = max(results, key=lambda r: (r["recall_at_10"], r["mrr"])) + print( + f"\nBest config: recall@10={best['recall_at_10']:.4f} mrr={best['mrr']:.4f} " + f"(index {best['config_index']})" + ) + print(f"Results written to {output_dir}") + + +if __name__ == "__main__": + main() diff --git a/tests/khive-contract/tune/results.json b/tests/khive-contract/tune/results.json new file mode 100644 index 00000000..d6df8206 --- /dev/null +++ b/tests/khive-contract/tune/results.json @@ -0,0 +1,2680 @@ +[ + { + "config_index": 0, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28148540113761555 + }, + { + "config_index": 1, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2686937492399011 + }, + { + "config_index": 2, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.29486264975275844 + }, + { + "config_index": 3, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2862915989680914 + }, + { + "config_index": 4, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2918167483585421 + }, + { + "config_index": 5, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28529789960884955 + }, + { + "config_index": 6, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2831625501130475 + }, + { + "config_index": 7, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2803040999424411 + }, + { + "config_index": 8, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2848207499482669 + }, + { + "config_index": 9, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27973329997621477 + }, + { + "config_index": 10, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2801396494760411 + }, + { + "config_index": 11, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28181665038573556 + }, + { + "config_index": 12, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.29417920086416416 + }, + { + "config_index": 13, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2827333999448456 + }, + { + "config_index": 14, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28087909959140234 + }, + { + "config_index": 15, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.3020208008820191 + }, + { + "config_index": 16, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28055835064151324 + }, + { + "config_index": 17, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28160629990452435 + }, + { + "config_index": 18, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.29320840003492776 + }, + { + "config_index": 19, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2866227991034975 + }, + { + "config_index": 20, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2897542504797457 + }, + { + "config_index": 21, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.29314370003703516 + }, + { + "config_index": 22, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2905041001213249 + }, + { + "config_index": 23, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2886353995563695 + }, + { + "config_index": 24, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28937284951098263 + }, + { + "config_index": 25, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2861790999304503 + }, + { + "config_index": 26, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.31045204887050204 + }, + { + "config_index": 27, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2888021495891735 + }, + { + "config_index": 28, + "config": { + "relevance_weight": 0.7, + "importance_weight": 0.2, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2846291503374232 + }, + { + "config_index": 29, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27716039949154947 + }, + { + "config_index": 30, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2801395508868154 + }, + { + "config_index": 31, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27901260036742315 + }, + { + "config_index": 32, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28072700115444604 + }, + { + "config_index": 33, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28003539991914295 + }, + { + "config_index": 34, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2829062992532272 + }, + { + "config_index": 35, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2766604502539849 + }, + { + "config_index": 36, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2769689010165166 + }, + { + "config_index": 37, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27942089982389007 + }, + { + "config_index": 38, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27995829987048637 + }, + { + "config_index": 39, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.280987650694442 + }, + { + "config_index": 40, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2805270501994528 + }, + { + "config_index": 41, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2790415495837806 + }, + { + "config_index": 42, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2858790994650917 + }, + { + "config_index": 43, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27572910039452836 + }, + { + "config_index": 44, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28704375072265975 + }, + { + "config_index": 45, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.281006250588689 + }, + { + "config_index": 46, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2943519994005328 + }, + { + "config_index": 47, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28374790090310853 + }, + { + "config_index": 48, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2794124502543127 + }, + { + "config_index": 49, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28600204932445195 + }, + { + "config_index": 50, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2889624494855525 + }, + { + "config_index": 51, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2864793004846433 + }, + { + "config_index": 52, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27940414984186646 + }, + { + "config_index": 53, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2789604495774256 + }, + { + "config_index": 54, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2787499499390833 + }, + { + "config_index": 55, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2868397506972542 + }, + { + "config_index": 56, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.284945898965816 + }, + { + "config_index": 57, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.3, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2812874005030608 + }, + { + "config_index": 58, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28095219931856263 + }, + { + "config_index": 59, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2758167509455234 + }, + { + "config_index": 60, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2824979506840464 + }, + { + "config_index": 61, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.282977097958792 + }, + { + "config_index": 62, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2763854499789886 + }, + { + "config_index": 63, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.27764179903897457 + }, + { + "config_index": 64, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2787354511383455 + }, + { + "config_index": 65, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2802791514113778 + }, + { + "config_index": 66, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2792020015476737 + }, + { + "config_index": 67, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27800425050372723 + }, + { + "config_index": 68, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2789042016956955 + }, + { + "config_index": 69, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2812332495523151 + }, + { + "config_index": 70, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2767645495623583 + }, + { + "config_index": 71, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.27748339998652227 + }, + { + "config_index": 72, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2761915504379431 + }, + { + "config_index": 73, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2803582996421028 + }, + { + "config_index": 74, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27802289987448603 + }, + { + "config_index": 75, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2767853995464975 + }, + { + "config_index": 76, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28124175005359575 + }, + { + "config_index": 77, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2837229010765441 + }, + { + "config_index": 78, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2874540507036727 + }, + { + "config_index": 79, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2787061999697471 + }, + { + "config_index": 80, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2805125004670117 + }, + { + "config_index": 81, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2797540499159368 + }, + { + "config_index": 82, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2792020997731015 + }, + { + "config_index": 83, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2798353001708165 + }, + { + "config_index": 84, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2828772005159408 + }, + { + "config_index": 85, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.279381150539848 + }, + { + "config_index": 86, + "config": { + "relevance_weight": 0.6, + "importance_weight": 0.2, + "temporal_weight": 0.2, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28124165000917856 + }, + { + "config_index": 87, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27987510002276395 + }, + { + "config_index": 88, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2800395010126522 + }, + { + "config_index": 89, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2810250996844843 + }, + { + "config_index": 90, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28223335029906593 + }, + { + "config_index": 91, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28449174933484755 + }, + { + "config_index": 92, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28110419916629326 + }, + { + "config_index": 93, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 10, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.27862714960065205 + }, + { + "config_index": 94, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2819332996295998 + }, + { + "config_index": 95, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28068960054952186 + }, + { + "config_index": 96, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2806042510201223 + }, + { + "config_index": 97, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.27994805022899527 + }, + { + "config_index": 98, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2773583990347106 + }, + { + "config_index": 99, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28961035059182905 + }, + { + "config_index": 100, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28000009879178833 + }, + { + "config_index": 101, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28202285029692575 + }, + { + "config_index": 102, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28163114984636195 + }, + { + "config_index": 103, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28272290037421044 + }, + { + "config_index": 104, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.29119380087649915 + }, + { + "config_index": 105, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28479799948399886 + }, + { + "config_index": 106, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28276649973122403 + }, + { + "config_index": 107, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 40, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28361035001580603 + }, + { + "config_index": 108, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 20 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28614999901037663 + }, + { + "config_index": 109, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 60 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2842709007381927 + }, + { + "config_index": 110, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "rrf": { + "k": 100 + } + }, + "decay_model": "exponential", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.28580209873325657 + }, + { + "config_index": 111, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 1.0, + 0.0 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2826500996889081 + }, + { + "config_index": 112, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.75, + 0.25 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2826354011631338 + }, + { + "config_index": 113, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.5, + 0.5 + ] + } + }, + "decay_model": "hyperbolic", + "temporal_half_life_days": 60.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.2808332999848062 + }, + { + "config_index": 114, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.25, + 0.75 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 14.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.95, + "mean_latency_ms": 0.28887504995509516 + }, + { + "config_index": 115, + "config": { + "relevance_weight": 0.8, + "importance_weight": 0.1, + "temporal_weight": 0.1, + "candidate_multiplier": 20, + "fuse_strategy": { + "weighted": { + "weights": [ + 0.0, + 1.0 + ] + } + }, + "decay_model": "none", + "temporal_half_life_days": 30.0, + "min_score": 0.0, + "min_salience": 0.0, + "candidate_limit": 100 + }, + "recall_at_10": 0.9333333333333333, + "mrr": 0.925, + "mean_latency_ms": 0.2837999494659016 + } +] \ No newline at end of file diff --git a/tests/khive-contract/tune/tuned-config.toml b/tests/khive-contract/tune/tuned-config.toml new file mode 100644 index 00000000..23e84898 --- /dev/null +++ b/tests/khive-contract/tune/tuned-config.toml @@ -0,0 +1,17 @@ +# Winning config from khive recall param-tuning grid search +# run_date = "2026-05-25" +# recall_at_10 = 0.9333 +# mrr = 0.9500 +# mean_latency_ms = 0.29 + +[recall] +relevance_weight = 0.7 +importance_weight = 0.2 +temporal_weight = 0.1 +temporal_half_life_days = 14.0 +decay_model = "hyperbolic" +candidate_multiplier = 10 +# candidate_limit = null (use multiplier only) +fuse_strategy = {weighted = {weights = [1.0, 0.0]}} +min_score = 0.0 +min_salience = 0.0 From 31f259545e9c5bd738535dae02fc8de5b26e17ee Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:36:29 -0400 Subject: [PATCH 06/18] =?UTF-8?q?fix(retrieval):=20correct=20doctest=20imp?= =?UTF-8?q?ort=20=E2=80=94=20use=20re-export=20at=20crate=20root?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses codex finding on PR #405: `khive_retrieval::hnsw::HnswIndex` doesn't resolve because `hnsw` is not a public submodule — `HnswIndex` is re-exported at the crate root (lib.rs:145). The doctest at persist/mod.rs:29 must use the public facade import. Closes the remaining gap on issue #309 (--all-features doctest failure). Verified: `RUSTC_WRAPPER= cargo test --offline -p khive-retrieval --all-features --doc` passes. Co-Authored-By: Claude Opus 4.7 --- crates/khive-retrieval/src/persist/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/khive-retrieval/src/persist/mod.rs b/crates/khive-retrieval/src/persist/mod.rs index 40d4e678..0893903a 100644 --- a/crates/khive-retrieval/src/persist/mod.rs +++ b/crates/khive-retrieval/src/persist/mod.rs @@ -26,7 +26,7 @@ //! //! ```rust,no_run //! use khive_retrieval::persist::RetrievalPersistence; -//! use khive_retrieval::hnsw::HnswIndex; +//! use khive_retrieval::HnswIndex; //! use rusqlite::Connection; //! use std::sync::Arc; //! use tokio::sync::Mutex; From d943b7f43a1503048aff859b00c20d9fcec530db Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:38:43 -0400 Subject: [PATCH 07/18] =?UTF-8?q?revert(memory):=20keep=20RecallConfig=20d?= =?UTF-8?q?efaults=20=E2=80=94=20corpus=20ceiling=20made=20changes=20unjus?= =?UTF-8?q?tified?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex review on PR #408 flagged that the three default changes from the tune sweep (temporal_half_life_days 30 to 14, decay exp to hyp, multiplier 20 to 10) were made even though REPORT.md explicitly states the synthetic eval set produced identical recall@10 = 0.9333 for ALL 116 configs — the landscape was too flat to discriminate these parameters. Reverts those three lines back to their prior values. The tuned-config.toml artifact stays as an experimental record; the grid_search.py infra stays runnable; only RecallConfig::default() is restored. A discriminating eval corpus (embed-enabled, synonym queries, partial matches) is the prerequisite before changing runtime defaults. Co-Authored-By: Claude Opus 4.7 --- crates/khive-pack-memory/src/config.rs | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/crates/khive-pack-memory/src/config.rs b/crates/khive-pack-memory/src/config.rs index 9c603a42..f30bb1b7 100644 --- a/crates/khive-pack-memory/src/config.rs +++ b/crates/khive-pack-memory/src/config.rs @@ -51,7 +51,11 @@ pub struct RecallConfig { pub fallback_during_migration: bool, } -// Tuned 2026-05-25: grid search over 116 configs (quick). PARTIAL — eval too easy to discriminate params. Changed: half_life 30→14, decay exp→hyp, multiplier 20→10. See tests/khive-contract/tune/REPORT.md. +// Tuning artifact: tests/khive-contract/tune/ swept 116 configs but the synthetic corpus +// produced an identical recall@10 = 0.9333 for every config — i.e. a flat landscape that +// cannot empirically distinguish these parameters. Defaults below stay at the prior values +// until a harder corpus (embed-enabled, synonym queries, partial matches) provides signal. +// See tests/khive-contract/tune/REPORT.md for the analysis. impl Default for RecallConfig { fn default() -> Self { Self { @@ -60,9 +64,9 @@ impl Default for RecallConfig { temporal_weight: 0.10, reranker_weights: HashMap::new(), reranker_params: HashMap::new(), - temporal_half_life_days: 14.0, - decay_model: DecayModel::Hyperbolic, - candidate_multiplier: 10, + temporal_half_life_days: 30.0, + decay_model: DecayModel::default(), + candidate_multiplier: 20, candidate_limit: None, fuse_strategy: FusionStrategy::default(), min_score: 0.0, From 539ae5bb297f929e7ed6a21d2237ceae72af7c9b Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:48:37 -0400 Subject: [PATCH 08/18] =?UTF-8?q?fix(pack-memory):=20address=20PR=20#406?= =?UTF-8?q?=20codex=20findings=20=E2=80=94=20top=5Fk=20cast=20+=20stronger?= =?UTF-8?q?=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Medium findings from codex review: 1. top_k cast bug: `(k as u32).min(100)` truncates k before capping. A request with `top_k = 4_294_967_297` (larger than u32::MAX) truncates to 1 BEFORE the cap is applied, so the result limit becomes 1, not 100. Fixed: `u32::try_from(k.min(100)).unwrap_or(100)` clamps to usize first, then narrows safely. 2. Weak tests: test_recall_default_identity only checked length and the top hit. Strengthened to compare full ordered note_id+score lists across all positions, with all three knobs explicitly set to null. test_recall_fusion_strategy_override only validated string acceptance. Added a new unit test (fusion_strategy_change_produces_observable_ordering_difference) with a deterministic fixture where RRF and Weighted strategies MUST produce different orderings — proving the fusion_strategy override actually flows into fuse_candidates, not just validation. Verified: cargo test -p khive-pack-memory --lib passes (62 unit tests). Co-Authored-By: Claude Opus 4.7 --- crates/khive-pack-memory/src/handlers.rs | 86 ++++++++++++++++++- crates/khive-pack-memory/tests/integration.rs | 70 ++++++++++----- 2 files changed, 132 insertions(+), 24 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index ad6ff3db..9737efcf 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -488,7 +488,7 @@ impl MemoryPack { cfg.validate()?; let limit = if let Some(k) = p.top_k { - (k as u32).min(100) + u32::try_from(k.min(100)).unwrap_or(100) } else { p.limit.unwrap_or(10).min(100) }; @@ -943,6 +943,90 @@ mod tests { } } + #[test] + fn fusion_strategy_change_produces_observable_ordering_difference() { + // Codex Medium 2 (PR #406): prove the fusion_strategy knob actually + // affects fusion output, not just validation. Uses a deterministic fixture + // where rank-based (RRF) and score-based (Weighted) fusion must rank + // differently. + use khive_runtime::FusionStrategy as RuntimeFusionStrategy; + use khive_storage::types::{TextSearchHit, VectorSearchHit}; + use std::collections::HashSet; + use uuid::Uuid; + + let id_a = Uuid::from_u128(0xAAAA_AAAA_AAAA_AAAA_AAAA_AAAA_AAAA_AAAA); + let id_b = Uuid::from_u128(0xBBBB_BBBB_BBBB_BBBB_BBBB_BBBB_BBBB_BBBB); + let id_c = Uuid::from_u128(0xCCCC_CCCC_CCCC_CCCC_CCCC_CCCC_CCCC_CCCC); + + let text_hits = vec![ + TextSearchHit { + subject_id: id_a, + score: 0.9_f64.into(), + rank: 1, + title: None, + snippet: None, + }, + TextSearchHit { + subject_id: id_b, + score: 0.5_f64.into(), + rank: 2, + title: None, + snippet: None, + }, + ]; + let vector_hits = vec![ + VectorSearchHit { + subject_id: id_c, + score: 0.95_f64.into(), + rank: 1, + }, + VectorSearchHit { + subject_id: id_a, + score: 0.3_f64.into(), + rank: 2, + }, + ]; + let memory_ids: HashSet = [id_a, id_b, id_c].into_iter().collect(); + + let cfg_rrf = RecallConfig { + fuse_strategy: RuntimeFusionStrategy::Rrf { k: 60 }, + ..RecallConfig::default() + }; + let rrf_results = fuse_candidates( + text_hits.clone(), + vector_hits.clone(), + &memory_ids, + &cfg_rrf, + 10, + ); + let rrf_order: Vec = rrf_results.iter().map(|h| h.entity_id).collect(); + + let cfg_weighted = RecallConfig { + fuse_strategy: RuntimeFusionStrategy::Weighted { + weights: vec![0.1, 0.9], + }, + ..RecallConfig::default() + }; + let weighted_results = fuse_candidates( + text_hits, + vector_hits, + &memory_ids, + &cfg_weighted, + 10, + ); + let weighted_order: Vec = weighted_results.iter().map(|h| h.entity_id).collect(); + + // RRF on this fixture: id_a in both sources gets highest combined rank score; + // id_c (vector rank 1) and id_b (text rank 2) tied around 0.0161-0.0164. + // Weighted [0.1, 0.9]: id_c dominates (0.95 * 0.9 = 0.855); id_a drops + // (0.9 * 0.1 + 0.3 * 0.9 = 0.36); id_b last (0.5 * 0.1 = 0.05). + // The orderings MUST differ — this is the discriminating assertion. + assert_ne!( + rrf_order, weighted_order, + "fusion_strategy change must affect ordering; RRF and Weighted produced identical: {rrf_order:?}" + ); + } + #[test] fn compute_score_default_config_reproduces_legacy() { let cfg = RecallConfig::default(); diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index f613506d..3b98fee2 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1009,49 +1009,73 @@ async fn test_recall_default_identity() { let rt = make_runtime(); let registry = make_registry(rt.clone()); - let note = registry - .dispatch( - "remember", - json!({ - "content": "the mitochondria is the powerhouse of the cell", - "importance": 0.8 - }), - ) - .await - .expect("remember succeeds"); - let note_id = note["note_id"].as_str().unwrap().to_string(); + // Create multiple memories so the identity comparison is meaningful + // (single-hit fixtures can't distinguish ordering changes). + for content in [ + "the mitochondria is the powerhouse of the cell", + "ribosomes synthesize proteins in the cell", + "the nucleus contains the cell's DNA", + "lysosomes digest cellular waste in the cell", + ] { + registry + .dispatch( + "remember", + json!({ "content": content, "importance": 0.8 }), + ) + .await + .expect("remember succeeds"); + } // Baseline recall with no knobs let base = registry - .dispatch("recall", json!({ "query": "mitochondria powerhouse cell" })) + .dispatch("recall", json!({ "query": "cell organelles" })) .await .expect("baseline recall succeeds"); let base_hits = base.as_array().expect("array"); assert!( - !base_hits.is_empty(), - "baseline must return at least one hit" + base_hits.len() >= 2, + "baseline must return at least two hits to make ordering meaningful, got {}", + base_hits.len() ); - // Same call with all knobs absent — must match baseline shape + // Same call with all three knobs explicitly set to null — must be byte-identical let knobless = registry .dispatch( "recall", - json!({ "query": "mitochondria powerhouse cell", "top_k": null }), + json!({ + "query": "cell organelles", + "top_k": null, + "fusion_strategy": null, + "score_floor": null, + }), ) .await - .expect("recall with null top_k succeeds"); + .expect("recall with all knobs null succeeds"); let knobless_hits = knobless.as_array().expect("array"); assert_eq!( base_hits.len(), knobless_hits.len(), - "null top_k must not change result count" - ); - assert_eq!( - base_hits[0]["note_id"].as_str().unwrap(), - note_id, - "top hit must be the memory we created" + "null knobs must not change result count" ); + + // Full ordering identity: each hit's note_id AND fused_score must match + // position-by-position. This catches a regression where a null knob silently + // shifts the ranking or rescaling. + for (i, (b, k)) in base_hits.iter().zip(knobless_hits.iter()).enumerate() { + assert_eq!( + b["note_id"].as_str(), + k["note_id"].as_str(), + "null knobs altered note_id at position {i}" + ); + // Scores must round-trip; allow tiny float jitter + let bs = b["score"].as_f64().unwrap_or(0.0); + let ks = k["score"].as_f64().unwrap_or(0.0); + assert!( + (bs - ks).abs() < 1e-9, + "null knobs altered score at position {i}: baseline={bs} knobless={ks}" + ); + } } #[tokio::test] From 54056ad7e26211713da82967d799a16d6b3c9cb8 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:52:49 -0400 Subject: [PATCH 09/18] =?UTF-8?q?fix(embedding):=20address=20PR=20#407=20c?= =?UTF-8?q?odex=20findings=20=E2=80=94=20ADR=20amendment=20+=203=20data-sa?= =?UTF-8?q?fety=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Ocean direction (2026-05-25): amend ADR-043 to formalize the V16 string-tag schema design, claim V16 in ADR-015 ledger, and fix the data-safety bugs codex flagged. Defer sqlite-vec data preservation to a follow-up issue. ADR amendments: - ADR-043 §1.1 (vector store column addition): replaces the old FK-based description with the actual V16 design (TEXT embedding_model column with composite index). Includes rationale for TEXT vs BLOB FK (hot-path join cost, end-to-end shape consistency with kkernel/env-vars/registry). Documents sqlite-vec rebuild behavior and follow-up. - ADR-015 schema ledger: V16 row added with cluster-22 amendment notes. High 3 fix — atomic unknown-model validation (handlers.rs): - handle_remember now calls resolve_embedding_model(Some(name)) BEFORE create_note_with_decay_for_embedding_model. resolve_embedding_model is synchronous and doesn't load the model — it only checks registration. An unknown model is rejected before any note/FTS/vector row is written. High 2 fix — scoped delete across all model stores (operations.rs): - delete_note now iterates over registered_embedding_model_names() (new public method on KhiveRuntime) and deletes the note's vector from EVERY registered model's vector store. Previously only the default model's store was touched, leaving non-default vectors orphaned. Medium fix — KHIVE_ADDITIONAL_EMBEDDING_MODELS warning on bad names: - parse_embedding_model_list now logs tracing::warn for non-empty raw names that don't parse, instead of silently filtering them out. The function still returns a Vec (no startup failure on partial validity), but operator typos now surface at startup rather than as UnknownModel errors at request time. Deferred (follow-up issue, see ADR-043 §1.1 final paragraph): - High 1 — V16 backfill hard-codes 'all-minilm-l6-v2' for all regular vec_* tables, and sqlite-vec virtual tables are still dropped-and-rebuilt on schema mismatch (data loss for non-default deployments). A copy-with- default rebuild path is tracked separately because it requires a careful multi-step migration with vec0 INSERT INTO SELECT FROM and a verification step. Operators are warned via ADR §1.1 to back up before upgrading. Co-Authored-By: Claude Opus 4.7 --- crates/khive-pack-memory/src/handlers.rs | 8 +++ crates/khive-runtime/src/operations.rs | 11 ++-- crates/khive-runtime/src/runtime.rs | 26 ++++++++- docs/adr/ADR-015-schema-migrations.md | 9 ++- docs/adr/ADR-043-embedding-model-migration.md | 57 ++++++++++++++----- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 9737efcf..bf906620 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -426,6 +426,14 @@ impl MemoryPack { } } + // Codex High 3 (PR #407): validate embedding_model BEFORE any note/FTS + // write so unknown-model errors are atomic (no half-written rows). + // resolve_embedding_model is sync and does not trigger model load — it + // only checks the registry contains the name. + if let Some(model_name) = p.embedding_model.as_deref() { + self.runtime.resolve_embedding_model(Some(model_name))?; + } + let note = self .runtime .create_note_with_decay_for_embedding_model( diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 9a6add6a..18016754 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -1389,8 +1389,11 @@ impl KhiveRuntime { self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; - if self.config().embedding_model.is_some() { - self.vectors(token)?.delete(id).await?; + // Codex High 2 (PR #407): scoped delete — iterate over EVERY + // registered embedding model's vector store so non-default vectors + // don't orphan when the note is deleted. + for model_name in self.registered_embedding_model_names() { + self.vectors_for_model(token, &model_name)?.delete(id).await?; } } @@ -1400,8 +1403,8 @@ impl KhiveRuntime { self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; - if self.config().embedding_model.is_some() { - self.vectors(token)?.delete(id).await?; + for model_name in self.registered_embedding_model_names() { + self.vectors_for_model(token, &model_name)?.delete(id).await?; } } if deleted { diff --git a/crates/khive-runtime/src/runtime.rs b/crates/khive-runtime/src/runtime.rs index 27b14eb9..3f054574 100644 --- a/crates/khive-runtime/src/runtime.rs +++ b/crates/khive-runtime/src/runtime.rs @@ -456,6 +456,15 @@ impl KhiveRuntime { } } + /// Names of all registered embedding models in this runtime. + /// + /// Useful for operations that must touch every model's storage (e.g., + /// scoped vector deletion on note delete — codex High 2 (PR #407)). + /// The default model is included. + pub fn registered_embedding_model_names(&self) -> Vec { + self.embedders.keys().cloned().collect() + } + /// Get the lazily-initialized embedding service for the named model. /// /// Returns a `CachedEmbeddingService` wrapping a `NativeEmbeddingService`. @@ -541,7 +550,22 @@ fn register_configured_embedding_models( fn parse_embedding_model_list(s: &str) -> Vec { parse_pack_list(s) .into_iter() - .filter_map(|raw| parse_embedding_model_alias(&raw)) + .filter_map(|raw| { + let parsed = parse_embedding_model_alias(&raw); + if parsed.is_none() && !raw.trim().is_empty() { + // Codex Medium (PR #407): silent filter_map masks operator typos. Warn loudly + // so misconfiguration surfaces at startup rather than as an UnknownModel error + // at request time. We do not fail startup — a partially valid list still + // produces a functional runtime — but the warning is unambiguous. + tracing::warn!( + model = %raw, + "KHIVE_ADDITIONAL_EMBEDDING_MODELS contains unknown model name; ignored. \ + Valid forms: short alias like 'paraphrase' or a fully-qualified key \ + from lattice_embed::EmbeddingModel::from_str." + ); + } + parsed + }) .collect() } diff --git a/docs/adr/ADR-015-schema-migrations.md b/docs/adr/ADR-015-schema-migrations.md index a08ef9fc..ec5264a2 100644 --- a/docs/adr/ADR-015-schema-migrations.md +++ b/docs/adr/ADR-015-schema-migrations.md @@ -45,6 +45,7 @@ The canonical ledger of database schema migration versions. Migration versions a | V13 | c06/ADR-041 | event_observability_provenance | shipped | | V14 | c20/ADR-043 | embedding_model_registry | shipped | | V15 | c22/ADR-046 | proposals_open | shipped | +| V16 | v022/ADR-043 | vector_embedding_model_tag | shipped | > **Amendment (2026-05-24, cluster-24 + post-integration)**: The ledger above reflects what > actually shipped on `integration/v1-adr-alignment` after parallel cluster landings c01, c03, @@ -56,7 +57,13 @@ The canonical ledger of database schema migration versions. Migration versions a > integration merge. c20 (embedding model registry per ADR-043) landed at V14 — the same ADR > the V6 reservation originally anticipated, hence V6 remains a no-op slot. c22 (proposals_open > projection per ADR-046) landed at V15. V6–V8 are no-op placeholder slots to maintain -> contiguity. Versions V1–V15 are production schema and are frozen. +> contiguity. +> +> **V16 amendment (2026-05-25, show v022-polish)**: V16 (`vector_embedding_model_tag`) adds +> a TEXT `embedding_model` column and composite index to regular `vec_*` tables, completing +> the dual-embedding plumbing described in ADR-043 §1. sqlite-vec virtual tables are handled +> at open time via schema rebuild because vec0 does not support `ALTER TABLE`. Versions V1–V16 +> are production schema and are frozen. > **Invariant**: ADR number order and migration version order are independent. Migration versions reflect schema ledger assignment order. A migration may only depend on schema created by earlier versions. diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index e668bdd1..88ffdba9 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -105,24 +105,55 @@ impossible — any attempt to insert a second `active` row for the same engine f the constraint. Migrations therefore execute as `BEGIN; UPDATE active→superseded; UPDATE pending→active; COMMIT;` — atomic by virtue of the index. -#### Vector store column addition +#### Vector store column addition (V16, ADR-015) -Each `vec_` table (ADR-031 D3) gains a column: +Each regular `vec_` table (ADR-031 D3) gains a TEXT model tag column. +This was formalized in migration V16: ```sql -ALTER TABLE vec_ ADD COLUMN embedding_model_id BLOB - REFERENCES _embedding_models(id); -CREATE INDEX idx_vec__model ON vec_(embedding_model_id); +ALTER TABLE vec_ ADD COLUMN embedding_model TEXT NOT NULL + DEFAULT 'all-minilm-l6-v2'; +CREATE INDEX idx_vec__subject_model + ON vec_(subject_id, embedding_model); ``` -Backfilled on the same migration: existing rows get the engine's current active -model's id. - -SQLite does not support `ALTER COLUMN ... SET NOT NULL`. The `embedding_model_id` -column is enforced via a `CHECK (embedding_model_id IS NOT NULL)` constraint added -through SQLite's standard table-rebuild pattern (create new table with constraint, -copy data, drop old, rename) — see ADR-015 for the migration template. This rebuild -is performed as the final step of the startup backfill described in §8 below. +The composite `(subject_id, embedding_model)` index supports the scoped recall +SQL: `WHERE subject_id = ? AND embedding_model = ?`. The default value at column +creation time was chosen so existing rows backfill to the legacy MiniLM model; +deployments using a non-default model **must** run the dedicated backfill worker +described in §8 before relying on model-scoped recall. + +**Design trade-off — TEXT vs BLOB FK.** ADR-043's first draft (pre-V16) specified +`embedding_model_id BLOB REFERENCES _embedding_models(id)`. V16 instead stores +the model_id directly as TEXT, joining against `_embedding_models.model_id` +when needed: + +- TEXT model_id is the natural primary key used everywhere else in the runtime + (kkernel engine list, `EmbeddingService::key_version()`, env var + `KHIVE_ADDITIONAL_EMBEDDING_MODELS`) — keeping the same shape end-to-end. +- BLOB FK would require a sub-select on every vector insert/search to resolve + the active model's UUID. The hot path is recall scoring; the join cost is + unjustified for a column whose values change only on registry events. +- Schema-level referential integrity is replaced by application-level + validation in the runtime registry: unknown model names are rejected at + `KhiveRuntime::embedder(name)` and at `RecallParams.embedding_model` + validation. + +The `_embedding_models` registry table (V14) still owns the authoritative model +metadata (dim, output_dim, status, key_version). V16's `embedding_model TEXT` +column is the foreign-key-by-value reference back to `_embedding_models.model_id`. + +**sqlite-vec virtual tables.** vec0 virtual tables cannot accept `ALTER TABLE +ADD COLUMN` because they declare their columns at `CREATE VIRTUAL TABLE` time. +V16 handles this via the open-time path in `khive-db/src/backend.rs`: when +opening a `vec_` table that lacks `embedding_model`, the runtime +rebuilds the virtual table with the new schema. **Existing rows are lost on +rebuild** — this is acceptable for deployments that have not yet enabled +dual-embedding because vectors will be re-embedded by the next backfill cycle, +but **operators must take a backup before upgrading any production deployment +with persisted non-default embeddings**. A follow-up migration (tracked in +ADR-043 §8.2) will implement a copy-with-default rebuild to preserve old +vectors with their inferred model tag. ### 2. Triggers — three sources, one event From be85904e0f0fbc779983a0be18811b346b86a716 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:53:46 -0400 Subject: [PATCH 10/18] docs(tune): document khive_contract dependency + run instructions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Codex Major 2 (PR #408): the tune CLI is not re-runnable from a fresh clone of this branch because khive_contract is in the parent package (slice 06, PR #403 — already merged to main). Codex tested slice 08 in isolation and hit ModuleNotFoundError. Adds tests/khive-contract/tune/README.md explaining: - Install khive_contract first: uv pip install -e . from tests/khive-contract/ - Run via uv run python -m tune --quick (or full) - Documents the corpus-ceiling limitation surfaced in REPORT.md - Explicit note that RecallConfig::default() was intentionally NOT changed (defaults reverted in d943b7f based on codex Major 1) Co-Authored-By: Claude Opus 4.7 --- tests/khive-contract/tune/README.md | 42 +++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 tests/khive-contract/tune/README.md diff --git a/tests/khive-contract/tune/README.md b/tests/khive-contract/tune/README.md new file mode 100644 index 00000000..89721706 --- /dev/null +++ b/tests/khive-contract/tune/README.md @@ -0,0 +1,42 @@ +# Recall parameter tuning + +Grid-search runner for khive recall configuration. Operates against a live +MCP session via the `khive_contract.client.KhiveMcpSession` harness (provided +by the parent `tests/khive-contract/` package). + +## Prerequisites + +This script depends on the `khive_contract` Python package in the parent +directory. Install it first: + +```bash +cd tests/khive-contract +uv pip install -e . +``` + +You'll also need the `khive-mcp` binary on your PATH (the tests/khive-contract +harness spawns it via stdio). + +## Run + +```bash +cd tests/khive-contract +uv run python -m tune --quick # ~10 sec, every 10th config +uv run python -m tune # ~2 min, all 116 configs +uv run python -m tune --output-dir /tmp/my-run # custom output location +``` + +## Outputs + +- `results.json` — all (config, recall@10) tuples +- `tuned-config.toml` — recommended config (synthesized from the best-scoring + set; see REPORT.md for honesty about how meaningful this is) +- `REPORT.md` — analysis writeup + +## Known limitation + +The synthetic eval corpus (`fixtures/memories_corpus.json`) has a ceiling at +recall@10 = 0.9333 for **every** config — i.e., the queries are too easy to +discriminate between parameters. Until a harder corpus exists (embed-enabled, +synonym queries, partial matches), the grid runs but cannot ground default +changes. `RecallConfig::default()` was intentionally NOT changed in this PR. From a9f5585f026dfc64c02f8deb66ed54b13a4f5d62 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:55:19 -0400 Subject: [PATCH 11/18] style: cargo fmt --all --- crates/khive-pack-memory/src/handlers.rs | 9 ++------- crates/khive-pack-memory/tests/integration.rs | 5 +---- crates/khive-runtime/src/operations.rs | 8 ++++++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index bf906620..fed761e7 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -1015,13 +1015,8 @@ mod tests { }, ..RecallConfig::default() }; - let weighted_results = fuse_candidates( - text_hits, - vector_hits, - &memory_ids, - &cfg_weighted, - 10, - ); + let weighted_results = + fuse_candidates(text_hits, vector_hits, &memory_ids, &cfg_weighted, 10); let weighted_order: Vec = weighted_results.iter().map(|h| h.entity_id).collect(); // RRF on this fixture: id_a in both sources gets highest combined rank score; diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 3b98fee2..59f9f9b3 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1018,10 +1018,7 @@ async fn test_recall_default_identity() { "lysosomes digest cellular waste in the cell", ] { registry - .dispatch( - "remember", - json!({ "content": content, "importance": 0.8 }), - ) + .dispatch("remember", json!({ "content": content, "importance": 0.8 })) .await .expect("remember succeeds"); } diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 18016754..3b02a8e4 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -1393,7 +1393,9 @@ impl KhiveRuntime { // registered embedding model's vector store so non-default vectors // don't orphan when the note is deleted. for model_name in self.registered_embedding_model_names() { - self.vectors_for_model(token, &model_name)?.delete(id).await?; + self.vectors_for_model(token, &model_name)? + .delete(id) + .await?; } } @@ -1404,7 +1406,9 @@ impl KhiveRuntime { .delete_document(&ns_str, id) .await?; for model_name in self.registered_embedding_model_names() { - self.vectors_for_model(token, &model_name)?.delete(id).await?; + self.vectors_for_model(token, &model_name)? + .delete(id) + .await?; } } if deleted { From d973c298b6fb11acc417ec468c1ca14257306aa9 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:55:40 -0400 Subject: [PATCH 12/18] style: deno fmt ADR tables --- docs/adr/ADR-015-schema-migrations.md | 34 +++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/adr/ADR-015-schema-migrations.md b/docs/adr/ADR-015-schema-migrations.md index ec5264a2..7384f456 100644 --- a/docs/adr/ADR-015-schema-migrations.md +++ b/docs/adr/ADR-015-schema-migrations.md @@ -28,23 +28,23 @@ mechanism that: The canonical ledger of database schema migration versions. Migration versions are assigned in ledger order; they are NOT required to match ADR number order. -| Version | Owning ADR | Migration name | Status | -| ------: | ----------- | ------------------------------------------------- | ------- | -| V1 | (initial) | initial_schema | shipped | -| V2 | (initial) | add_name_to_notes | shipped | -| V3 | (initial) | add_events_namespace_created_index | shipped | -| V4 | (initial) | dedupe_graph_edge_triples | shipped | -| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | -| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | -| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | -| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | -| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | -| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | -| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | -| V12 | c04/ADR-019 | nullable_note_metrics | shipped | -| V13 | c06/ADR-041 | event_observability_provenance | shipped | -| V14 | c20/ADR-043 | embedding_model_registry | shipped | -| V15 | c22/ADR-046 | proposals_open | shipped | +| Version | Owning ADR | Migration name | Status | +| ------: | ------------ | ------------------------------------------------- | ------- | +| V1 | (initial) | initial_schema | shipped | +| V2 | (initial) | add_name_to_notes | shipped | +| V3 | (initial) | add_events_namespace_created_index | shipped | +| V4 | (initial) | dedupe_graph_edge_triples | shipped | +| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | +| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | +| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | +| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | +| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | +| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | +| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | +| V12 | c04/ADR-019 | nullable_note_metrics | shipped | +| V13 | c06/ADR-041 | event_observability_provenance | shipped | +| V14 | c20/ADR-043 | embedding_model_registry | shipped | +| V15 | c22/ADR-046 | proposals_open | shipped | | V16 | v022/ADR-043 | vector_embedding_model_tag | shipped | > **Amendment (2026-05-24, cluster-24 + post-integration)**: The ledger above reflects what From 0bf1927de6456e7654c7bb14387e4a6fcb9bbc31 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:57:44 -0400 Subject: [PATCH 13/18] fix(test): use single-token query matching all fixture memories --- crates/khive-pack-memory/tests/integration.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 59f9f9b3..0beddc27 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1025,7 +1025,7 @@ async fn test_recall_default_identity() { // Baseline recall with no knobs let base = registry - .dispatch("recall", json!({ "query": "cell organelles" })) + .dispatch("recall", json!({ "query": "cell" })) .await .expect("baseline recall succeeds"); let base_hits = base.as_array().expect("array"); @@ -1040,7 +1040,7 @@ async fn test_recall_default_identity() { .dispatch( "recall", json!({ - "query": "cell organelles", + "query": "cell", "top_k": null, "fusion_strategy": null, "score_floor": null, From c194923552e8550b51ec431c0c0440f73b43b1bd Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:12:42 -0400 Subject: [PATCH 14/18] fix(test): align partial_config test with reverted default decay_model --- crates/khive-pack-memory/src/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/khive-pack-memory/src/config.rs b/crates/khive-pack-memory/src/config.rs index f30bb1b7..52d9874b 100644 --- a/crates/khive-pack-memory/src/config.rs +++ b/crates/khive-pack-memory/src/config.rs @@ -447,7 +447,7 @@ mod tests { // unspecified fields keep defaults let diff2 = (cfg.importance_weight - 0.20).abs(); assert!(diff2 < 1e-12); - assert_eq!(cfg.decay_model, DecayModel::Hyperbolic); + assert_eq!(cfg.decay_model, DecayModel::Exponential); } // ── RecallConfig new fields ─────────────────────────────────────────────── From 8d52a8e18bbd6bfbf901a5d83451ec29d69e7cb5 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:19:56 -0400 Subject: [PATCH 15/18] =?UTF-8?q?fix(embedding):=20codex=20round=202=20?= =?UTF-8?q?=E2=80=94=20runtime-layer=20atomicity=20+=20ADR=20internal=20co?= =?UTF-8?q?nsistency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings from codex round 2 on PR #407: 1. Medium: runtime-level unknown-model atomicity. handle_remember validated embedding_model before calling create_note_with_decay_for_embedding_model (round 1 fix), but the runtime API itself was permissive — direct Rust callers (other packs, integration tests) would still hit the write-after- failure bug. Fix: resolve_embedding_model is now called at the start of create_note_inner BEFORE any note/FTS/vector write. The pack-handler check remains as an earlier error boundary, but atomicity is enforced at the lowest layer that performs the write. 2. High: ADR-043 internal inconsistency. The §1.1 amendment landed in round 1, but the rest of the ADR still contained normative text for the rejected FK-based design: - §8 (Backward compat) described embedding_model_id BLOB column + CHECK rebuild - Alternatives table rejected per-record model_id (which is what V16 ships) - Migration version section claimed V5 + BLOB FK Fix: - §8 rewritten to describe V14 + V16 (registry + tag column) split that actually shipped - Alternatives table row strikes-through with "Superseded by V16 (2026-05-25)" and points back to §1.1 for rationale - Migration version section split into V14 (cluster-20) and V16 (v022-polish) with the actual SQL each migration runs Codex's other round-1 findings remain addressed: - Critical (ADR drift) — now resolved end-to-end across §1.1, §8, alternatives, and §Migration version - High 2 (scoped delete) — confirmed by round-2 review at operations.rs:1395, 1408 - Medium (env var warning) — confirmed at runtime.rs:549, 560 Verified: cargo test -p khive-runtime -p khive-pack-memory --lib passes (262 + 62 tests), cargo fmt clean, deno fmt clean. Co-Authored-By: Claude Opus 4.7 --- crates/khive-runtime/src/operations.rs | 8 ++ docs/adr/ADR-043-embedding-model-migration.md | 86 +++++++++++-------- 2 files changed, 59 insertions(+), 35 deletions(-) diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 3b02a8e4..a3330b57 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -896,6 +896,14 @@ impl KhiveRuntime { } } + // Codex round 2 Medium (PR #407): resolve embedding_model BEFORE any + // note/FTS/vector write so unknown-model errors are atomic at the + // runtime layer, not just at one pack handler. Direct Rust callers + // (other packs, integration tests) get the same guarantee. + if let Some(model_name) = embedding_model { + self.resolve_embedding_model(Some(model_name))?; + } + let mut note = Note::new(ns, kind, content); if let Some(s) = salience { note = note.with_salience(s); diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index 88ffdba9..969e35e5 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -357,23 +357,35 @@ All four carry `engine_name` and the relevant `_embedding_models.id`(s) in paylo None carries `served_by_profile_id` — these are operator/system events, not profile-served (ADR-032 §3 rule). -### 8. Backward compatibility — one-shot startup migration - -Deployments predating this ADR have `vec_` tables without `embedding_model_id` -and no `_embedding_models` rows. On first startup post-ADR-043: - -1. Run the schema migration (creates `_embedding_models`, adds `embedding_model_id` - to `vec_` tables as nullable). -2. For each `[[engines]]` entry: derive `canonical_key` via lattice's - `EmbeddingKey::canonical_bytes()`, insert one `_embedding_models` row with - `status='active'`, `activated_at=now`, `created_at=now`. -3. Backfill all `vec_` rows with that engine's newly-inserted model id. -4. Tighten the `embedding_model_id` column by rebuilding the table with a - `CHECK (embedding_model_id IS NOT NULL)` constraint (SQLite table-rebuild pattern — - see §1 and ADR-015). This runs as run-once startup code after the SQL migration - completes, not as an additional SQL migration step. - -The startup migration emits one `EmbeddingModelChanged` event per engine with +### 8. Backward compatibility — one-shot startup migration (V14 + V16) + +Deployments predating this ADR have `vec_` tables without an +`embedding_model` column and no `_embedding_models` rows. The startup +migration runs in two steps, landed in two separate `VersionedMigration` +slots: + +**V14 — `embedding_model_registry`** (already shipped): + +1. `CREATE TABLE _embedding_models` (per §1 schema). +2. `CREATE UNIQUE INDEX idx_embed_models_one_active`. +3. `CREATE INDEX idx_embed_models_engine_status`. + +**V16 — `vector_embedding_model_tag`** (shipped in v022-polish): + +4. For each existing regular `vec_*` table (discovered at runtime, validated as + alphanumeric-suffix only): `ALTER TABLE vec_ ADD COLUMN embedding_model + TEXT NOT NULL DEFAULT 'all-minilm-l6-v2'`. +5. `CREATE INDEX idx_vec__subject_model ON vec_(subject_id, embedding_model)`. +6. sqlite-vec virtual tables (`vec0`) cannot accept `ALTER TABLE` — handled at + open time in `khive-db/src/backend.rs` by rebuilding the virtual table with + the new schema. See §1.1 final paragraph for the operator backup warning; + a preserving rebuild is the documented follow-up. + +Operator population of `_embedding_models` (steps for populating registry rows +from `[[engines]]` config and emitting `EmbeddingModelChanged` events) is a +separate startup-code path tracked in #385, not part of the SQL migrations. + +The startup population emits one `EmbeddingModelChanged` event per engine with `source_model_id = None` and `initiated_by = ConfigDiff` so the audit trail starts clean. @@ -429,14 +441,14 @@ Tracked in `.khive/plans/embedding-version-config.md`. ## Alternatives Considered -| Alternative | Why rejected | -| ---------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | -| Reimplement migration state machine in khive | Lattice ships it; duplication has no upside | -| Store model id on every record (`notes`, `entities`) row | Triple-write cost; the vector table is the right grain — only vectors are model-bound | -| Migrate vectors in place (rewrite same table) | Loses atomicity. Failure mid-migration leaves a half-rewritten table with no clean rollback | -| MCP verb `brain.migrate_model` for agent-triggered migrations | Crosses the brain-substrate boundary; risks the feedback loop described in Rationale | -| Auto-archive `superseded` rows after N days | Premature; an explicit `khive engine archive --before ` is enough | -| Per-record `model_id` on `vec_` instead of FK to `_embedding_models` | Denormalized; can't carry the supersession chain or `superseded_by` link | +| Alternative | Why rejected | +| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Reimplement migration state machine in khive | Lattice ships it; duplication has no upside | +| Store model id on every record (`notes`, `entities`) row | Triple-write cost; the vector table is the right grain — only vectors are model-bound | +| Migrate vectors in place (rewrite same table) | Loses atomicity. Failure mid-migration leaves a half-rewritten table with no clean rollback | +| MCP verb `brain.migrate_model` for agent-triggered migrations | Crosses the brain-substrate boundary; risks the feedback loop described in Rationale | +| Auto-archive `superseded` rows after N days | Premature; an explicit `khive engine archive --before ` is enough | +| ~~Per-record `model_id` on `vec_` instead of FK to `_embedding_models`~~ | **Superseded by V16 (2026-05-25)**: per-record `embedding_model TEXT` is what V16 actually ships. The supersession chain is preserved via `_embedding_models.superseded_by` joined on `model_id`. See §1.1 for the trade-off rationale (hot-path join cost, end-to-end consistency with kkernel/env-var) | ## Consequences @@ -505,19 +517,23 @@ payload. ### Migration version -A new `VersionedMigration` in `crates/khive-db/src/migrations.rs` with -`version = 5` (current latest is V4 — `dedupe_graph_edge_triples`): +The ADR-043 schema work landed in two ledger versions in +`crates/khive-db/src/migrations.rs`: + +**V14 — `embedding_model_registry`** (cluster-20): -1. `CREATE TABLE _embedding_models` (above) +1. `CREATE TABLE _embedding_models` (per §1) 2. `CREATE UNIQUE INDEX idx_embed_models_one_active` 3. `CREATE INDEX idx_embed_models_engine_status` -4. For each existing `vec_` table (discovered via the catalog): - - `ALTER TABLE vec_ ADD COLUMN embedding_model_id BLOB REFERENCES _embedding_models(id)` - - `CREATE INDEX idx_vec__model ON vec_(embedding_model_id)` -5. Startup backfill (run-once code, not a SQL migration): populate - `_embedding_models` from `[[engines]]`, backfill the FK column, then rebuild - `vec_` with a `CHECK (embedding_model_id IS NOT NULL)` constraint via - SQLite's table-rebuild pattern (ADR-015). + +**V16 — `vector_embedding_model_tag`** (v022-polish): + +4. For each existing regular `vec_*` table (runtime-discovered, name-validated): + - `ALTER TABLE vec_ ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2'` + - `CREATE INDEX idx_vec__subject_model ON vec_(subject_id, embedding_model)` +5. Startup backfill (run-once code, tracked separately in #385): populate + `_embedding_models` from `[[engines]]`; per-table model-inferred tag rewrite + for deployments with non-default models (deferred — see §1.1 final paragraph). ### Worker registration From c3d8caf35730591210314a793d9e5a292bf686af Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:24:07 -0400 Subject: [PATCH 16/18] ci: force re-trigger From 36cfa616d4c6e7ddbef6850d8dfa74d41e27b874 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:27:19 -0400 Subject: [PATCH 17/18] ci: force re-trigger via doc whitespace touch --- docs/adr/ADR-043-embedding-model-migration.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index 969e35e5..b067f1bd 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -601,3 +601,4 @@ via `kkernel call`. - ADR-031 §D3 — `[[engines]]` schema, `vec_` table naming, `EngineConfig` - ADR-032 §3 — `EventKind` enum (extended here with four new variants) - ADR-033 §1 — `RecallConfig.fallback_during_migration` (added here) + From a2759723556daaec4bb6b944b8deae3e48aab23b Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:27:40 -0400 Subject: [PATCH 18/18] style: deno fmt --- docs/adr/ADR-043-embedding-model-migration.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index b067f1bd..969e35e5 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -601,4 +601,3 @@ via `kkernel call`. - ADR-031 §D3 — `[[engines]]` schema, `vec_` table naming, `EngineConfig` - ADR-032 §3 — `EventKind` enum (extended here with four new variants) - ADR-033 §1 — `RecallConfig.fallback_during_migration` (added here) -