From b27e2ac351b7d6b2d5dd93a279f9dda193797e08 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 03:43:25 -0400 Subject: [PATCH 01/14] feat(pack-memory): wire khive-retrieval as recall composer (ADR-011/021) Route pack-memory's fuse_candidates through khive_retrieval::fuse_search_results, making khive-retrieval a real consumed facade instead of an orphan crate. - Add khive-retrieval dep to khive-pack-memory/Cargo.toml - Replace direct fuse_with_strategy call with retrieval adapter (CandidateMeta side-map, HybridConfig builder, FusionStrategy conversion) - Fix issue #309: resolve --all-features compile failures in khive-retrieval (stale SqliteStore imports, missing NodeId/LinkStore imports) - Add 5 integration tests (3 fusion_surface, 2 pack-memory recall adapter) - RRF k=1 discriminator test proves strategy propagation (30x score gap) Co-Authored-By: Claude Opus 4.6 --- crates/khive-pack-memory/Cargo.toml | 1 + crates/khive-pack-memory/src/handlers.rs | 111 ++++++++++++++++- crates/khive-pack-memory/tests/integration.rs | 117 ++++++++++++++++++ crates/khive-retrieval/src/graph/tests.rs | 2 +- crates/khive-retrieval/src/persist/tests.rs | 1 + .../src/replay/engine_replay.rs | 21 +++- .../src/weights/engine_weights.rs | 31 ++++- .../khive-retrieval/tests/fusion_surface.rs | 61 +++++++++ 8 files changed, 331 insertions(+), 14 deletions(-) create mode 100644 crates/khive-retrieval/tests/fusion_surface.rs diff --git a/crates/khive-pack-memory/Cargo.toml b/crates/khive-pack-memory/Cargo.toml index e1a60e7a..d01a040b 100644 --- a/crates/khive-pack-memory/Cargo.toml +++ b/crates/khive-pack-memory/Cargo.toml @@ -13,6 +13,7 @@ description = "Memory verb pack — remember/recall semantics with decay-aware r [dependencies] khive-types = { version = "0.2.2", path = "../khive-types", features = ["serde"] } khive-runtime = { version = "0.2.2", path = "../khive-runtime" } +khive-retrieval = { version = "0.2.2", path = "../khive-retrieval" } khive-pack-brain = { version = "0.2.2", path = "../khive-pack-brain" } inventory = { workspace = true } khive-storage = { version = "0.2.2", path = "../khive-storage" } diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 6667a7f8..3ab1c759 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -4,8 +4,13 @@ use serde::Deserialize; use serde_json::{json, Value}; use uuid::Uuid; -use khive_runtime::fusion::fuse_with_strategy; -use khive_runtime::{NamespaceToken, RuntimeError, SearchHit, SearchSource, VerbRegistry}; +use khive_retrieval::{ + fuse_search_results, FusionStrategy as RetrievalFusionStrategy, HybridConfig, +}; +use khive_runtime::{ + FusionStrategy as RuntimeFusionStrategy, NamespaceToken, RuntimeError, SearchHit, SearchSource, + VerbRegistry, +}; use khive_storage::types::{ TextFilter, TextQueryMode, TextSearchHit, TextSearchRequest, VectorSearchHit, VectorSearchRequest, @@ -138,6 +143,49 @@ fn search_source_label(source: SearchSource) -> &'static str { } } +#[derive(Default)] +struct CandidateMeta { + in_text: bool, + in_vector: bool, + title: Option, + snippet: Option, +} + +fn to_retrieval_fusion_strategy(strategy: &RuntimeFusionStrategy) -> RetrievalFusionStrategy { + match strategy { + RuntimeFusionStrategy::Rrf { k } => RetrievalFusionStrategy::Rrf { k: *k }, + RuntimeFusionStrategy::Weighted { .. } => RetrievalFusionStrategy::Weighted { + weights: Vec::new(), + }, + RuntimeFusionStrategy::Union => RetrievalFusionStrategy::Union, + RuntimeFusionStrategy::VectorOnly => RetrievalFusionStrategy::VectorOnly, + } +} + +fn retrieval_hybrid_config(strategy: &RuntimeFusionStrategy, limit: usize) -> HybridConfig { + let mut config = HybridConfig::new(limit) + .with_pool_size(limit) + .with_fusion_strategy(to_retrieval_fusion_strategy(strategy)); + + if let RuntimeFusionStrategy::Weighted { weights } = strategy { + // Runtime weighted fusion uses [text, vector]. HybridConfig uses keyword/vector. + // Preserve arbitrary positive scales — do not clamp via with_weights(). + config.keyword_weight = weights.first().copied().unwrap_or(0.0).max(0.0); + config.vector_weight = weights.get(1).copied().unwrap_or(0.0).max(0.0); + } + + config +} + +fn source_from_meta(meta: &CandidateMeta) -> SearchSource { + match (meta.in_vector, meta.in_text) { + (true, true) => SearchSource::Both, + (true, false) => SearchSource::Vector, + (false, true) => SearchSource::Text, + (false, false) => SearchSource::Text, + } +} + fn fuse_candidates( text_hits: Vec, vector_hits: Vec, @@ -145,15 +193,68 @@ fn fuse_candidates( cfg: &RecallConfig, limit: usize, ) -> Vec { - let text: Vec = text_hits + let mut meta = HashMap::::new(); + + let text_source: Vec<_> = text_hits .into_iter() .filter(|h| memory_ids.contains(&h.subject_id)) + .map(|h| { + let TextSearchHit { + subject_id, + score, + title, + snippet, + .. + } = h; + let entry = meta.entry(subject_id).or_default(); + entry.in_text = true; + if entry.title.is_none() { + entry.title = title; + } + if entry.snippet.is_none() { + entry.snippet = snippet; + } + (subject_id, score) + }) .collect(); - let vec: Vec = vector_hits + + let vector_source: Vec<_> = vector_hits .into_iter() .filter(|h| memory_ids.contains(&h.subject_id)) + .map(|h| { + let entry = meta.entry(h.subject_id).or_default(); + entry.in_vector = true; + (h.subject_id, h.score) + }) .collect(); - fuse_with_strategy(text, vec, &cfg.fuse_strategy, limit) + + let vector_only = matches!(&cfg.fuse_strategy, RuntimeFusionStrategy::VectorOnly); + let sources = if vector_only { + vec![vector_source] + } else { + // HybridConfig weighted convention: vector first, keyword second. + vec![vector_source, text_source] + }; + + let retrieval_cfg = retrieval_hybrid_config(&cfg.fuse_strategy, limit); + fuse_search_results(sources, &retrieval_cfg) + .into_iter() + .map(|(id, score)| { + let m = meta.remove(&id).unwrap_or_default(); + let (source, title, snippet) = if vector_only { + (SearchSource::Vector, None, None) + } else { + (source_from_meta(&m), m.title, m.snippet) + }; + SearchHit { + entity_id: id, + score, + source, + title, + snippet, + } + }) + .collect() } impl MemoryPack { diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 946856c7..1a29307a 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -657,6 +657,123 @@ async fn test_recall_fuse_source_field_is_plain_string() { ); } +/// Verifies that recall.fuse routes through khive_retrieval::fuse_search_results +/// by injecting a non-default fusion config (Rrf k=1) and asserting the fused +/// score matches the RRF k=1 formula: 1/(k + rank) = 1/(1 + 1) = 0.5. +/// +/// Under default k=60 the score would be 1/61 ≈ 0.0164. The large gap (0.5 vs +/// 0.0164) is the discriminator: if the adapter did not pass k=1 through to +/// khive_retrieval::HybridConfig, the score would not be 0.5. +#[tokio::test] +async fn test_recall_fuse_rrf_k1_uses_retrieval_adapter() { + let rt = make_runtime(); + let registry = make_registry(rt); + + registry + .dispatch( + "remember", + json!({ "content": "retrieval adapter rrf k1 probe memory" }), + ) + .await + .expect("remember"); + + let result = registry + .dispatch( + "recall.fuse", + json!({ + "query": "retrieval adapter rrf k1 probe", + "config": { + "fuse_strategy": { "rrf": { "k": 1 } } + } + }), + ) + .await + .expect("recall.fuse with Rrf k=1"); + + let fused = result["fused_candidates"].as_array().expect("fused array"); + assert!( + !fused.is_empty(), + "recall.fuse must return at least one candidate" + ); + + let score = fused[0]["fused_score"] + .as_f64() + .expect("fused_score is f64"); + // Rank 1 in a single text source with k=1: RRF = 1/(1+1) = 0.5. + // If k=60 were used instead, score ≈ 0.0164 — the gap proves the adapter works. + let expected = 0.5_f64; + assert!( + (score - expected).abs() < 1e-6, + "RRF k=1, rank 1 → fused_score must be 0.5; got {score:.6} \ + (≈0.0164 means the adapter passed k=60 instead of k=1)" + ); +} + +/// Regression: after wiring khive-retrieval into fuse_candidates, the recall.fuse +/// response shape must be unchanged — top-level strategy + candidate_limit, and +/// per-candidate note_id + fused_score + source must all be present. Full recall +/// fields (content, salience) must remain absent. +#[tokio::test] +async fn test_recall_fuse_shape_preserved_after_retrieval_wiring() { + let rt = make_runtime(); + let registry = make_registry(rt); + + registry + .dispatch( + "remember", + json!({ "content": "shape regression check after retrieval wiring" }), + ) + .await + .expect("remember"); + + let result = registry + .dispatch( + "recall.fuse", + json!({ "query": "shape regression retrieval wiring" }), + ) + .await + .expect("recall.fuse"); + + // Top-level shape + assert!( + result.get("strategy").is_some(), + "strategy field must be present in recall.fuse response" + ); + assert!( + result["candidate_limit"].as_u64().is_some(), + "candidate_limit must be a non-negative integer" + ); + + let fused = result["fused_candidates"] + .as_array() + .expect("fused_candidates array"); + assert!(!fused.is_empty(), "fused_candidates must be non-empty"); + + let c = &fused[0]; + assert!( + c["note_id"].as_str().is_some(), + "note_id must be a string UUID" + ); + assert!( + c["fused_score"].as_f64().is_some(), + "fused_score must be a float" + ); + let source = c["source"].as_str().expect("source must be a plain string"); + assert!( + matches!(source, "text" | "vector" | "both"), + "source must be a plain label, got {source:?}" + ); + // Full recall fields must not leak into fuse output + assert!( + c.get("content").is_none(), + "content must be absent from recall.fuse output" + ); + assert!( + c.get("salience").is_none(), + "salience must be absent from recall.fuse output" + ); +} + /// When include_breakdown is true, breakdown.total() must equal the hit's composite score. #[tokio::test] async fn test_recall_breakdown_total_matches_composite_score() { diff --git a/crates/khive-retrieval/src/graph/tests.rs b/crates/khive-retrieval/src/graph/tests.rs index 639b3efd..92e3e936 100644 --- a/crates/khive-retrieval/src/graph/tests.rs +++ b/crates/khive-retrieval/src/graph/tests.rs @@ -1,6 +1,6 @@ //! Unit tests for graph traversal module. -use super::compat::{test_context, EntityRef, MockLinkStore}; +use super::compat::{test_context, EntityRef, LinkStore, MockLinkStore}; use crate::graph::types::{ Direction, PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS, diff --git a/crates/khive-retrieval/src/persist/tests.rs b/crates/khive-retrieval/src/persist/tests.rs index 2efdf72d..88d6e84e 100644 --- a/crates/khive-retrieval/src/persist/tests.rs +++ b/crates/khive-retrieval/src/persist/tests.rs @@ -1,4 +1,5 @@ use super::*; +use crate::NodeId; use khive_bm25::Bm25Index; use khive_hnsw::HnswIndex; use rusqlite::Connection; diff --git a/crates/khive-retrieval/src/replay/engine_replay.rs b/crates/khive-retrieval/src/replay/engine_replay.rs index d25a85bb..45b8bbc2 100644 --- a/crates/khive-retrieval/src/replay/engine_replay.rs +++ b/crates/khive-retrieval/src/replay/engine_replay.rs @@ -844,11 +844,26 @@ pub mod metrics { #[cfg(test)] mod tests { use super::*; - use khive_db::SqliteStore; fn make_conn() -> Arc> { - let store = SqliteStore::memory().expect("in-memory store"); - store.conn() + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute_batch( + r#" + CREATE TABLE weight_events ( + namespace TEXT NOT NULL, + atom_id TEXT NOT NULL, + delta REAL NOT NULL, + weight_after REAL NOT NULL, + channel TEXT NOT NULL, + eta REAL NOT NULL, + event_id TEXT, + context_id TEXT, + ts INTEGER NOT NULL + ); + "#, + ) + .expect("init replay test schema"); + Arc::new(Mutex::new(conn)) } fn insert_weight_event( diff --git a/crates/khive-retrieval/src/weights/engine_weights.rs b/crates/khive-retrieval/src/weights/engine_weights.rs index 7530767c..0b47a7cc 100644 --- a/crates/khive-retrieval/src/weights/engine_weights.rs +++ b/crates/khive-retrieval/src/weights/engine_weights.rs @@ -298,14 +298,35 @@ pub async fn batch_load_weights( #[cfg(test)] mod tests { use super::*; - use khive_db::SqliteStore; use std::sync::Arc; fn make_conn() -> Arc> { - // Open an in-memory SQLite DB and run migrations so atom_weights and - // weight_events tables exist. - let store = SqliteStore::memory().expect("in-memory store"); - store.conn() + let conn = Connection::open_in_memory().expect("open in-memory db"); + conn.execute_batch( + r#" + CREATE TABLE atom_weights ( + namespace TEXT NOT NULL, + atom_id TEXT NOT NULL, + weight REAL NOT NULL, + updated_at INTEGER NOT NULL, + version INTEGER NOT NULL DEFAULT 1, + PRIMARY KEY(namespace, atom_id) + ); + CREATE TABLE weight_events ( + namespace TEXT NOT NULL, + atom_id TEXT NOT NULL, + delta REAL NOT NULL, + weight_after REAL NOT NULL, + channel TEXT NOT NULL, + eta REAL NOT NULL, + event_id TEXT, + context_id TEXT, + ts INTEGER NOT NULL + ); + "#, + ) + .expect("init weight test schema"); + Arc::new(Mutex::new(conn)) } // ------------------------------------------------------------------------- diff --git a/crates/khive-retrieval/tests/fusion_surface.rs b/crates/khive-retrieval/tests/fusion_surface.rs new file mode 100644 index 00000000..29ae15cf --- /dev/null +++ b/crates/khive-retrieval/tests/fusion_surface.rs @@ -0,0 +1,61 @@ +use khive_retrieval::{fuse_search_results, FusionStrategy, HybridConfig}; +use khive_score::DeterministicScore; + +#[test] +fn fuse_search_results_rrf_surface_matches_expected_order() { + // doc_b appears at rank 1 in both vector and keyword — must win under RRF k=60. + let vector = vec![ + ("doc_b", DeterministicScore::from_f64(0.9)), + ("doc_a", DeterministicScore::from_f64(0.8)), + ]; + let keyword = vec![ + ("doc_b", DeterministicScore::from_f64(4.0)), + ("doc_c", DeterministicScore::from_f64(3.0)), + ]; + let config = HybridConfig::new(10) + .with_pool_size(10) + .with_fusion_strategy(FusionStrategy::Rrf { k: 60 }); + + let results = fuse_search_results(vec![vector, keyword], &config); + + assert!(!results.is_empty(), "fusion must return results"); + assert_eq!( + results[0].0, "doc_b", + "doc_b must rank first (appears in both sources)" + ); + + // RRF score for doc_b: 1/(1+60) + 1/(1+60) = 2/61 ≈ 0.03279 + let expected = 2.0 / 61.0; + let actual = results[0].1.to_f64(); + assert!( + (actual - expected).abs() < 1e-6, + "fused score = {actual}, expected ~{expected}" + ); +} + +#[test] +fn fuse_search_results_empty_sources_returns_empty() { + let config = HybridConfig::default(); + let results = fuse_search_results::<&str>(vec![], &config); + assert!(results.is_empty()); +} + +#[test] +fn fuse_search_results_single_source_truncates_to_top_k() { + let source: Vec<_> = (0..20) + .map(|i| { + ( + format!("doc_{i}"), + DeterministicScore::from_f64(1.0 - i as f64 * 0.01), + ) + }) + .collect(); + let config = HybridConfig::new(5); + let results = fuse_search_results(vec![source], &config); + assert_eq!( + results.len(), + 5, + "single-source result must be truncated to top_k=5" + ); + assert_eq!(results[0].0, "doc_0", "highest score must be first"); +} From be2fd2bebaad6985e359ae14549ef3d68bf4935a Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 04:01:34 -0400 Subject: [PATCH 02/14] =?UTF-8?q?feat(pack-memory):=20expose=20top=5Fk/fus?= =?UTF-8?q?ion=5Fstrategy/score=5Ffloor=20knobs=20on=20recall=20(ADR-033?= =?UTF-8?q?=20=C2=A76)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add three optional per-request fields to RecallParams: top_k (usize), fusion_strategy (string), and score_floor (f32) - fusion_strategy validated against {"rrf","weighted","union"}; clear error with valid values on invalid input - top_k overrides the result limit for a single call (capped at 100) - score_floor applied as a post-filter on the composite score after compute_score - Add parse_fusion_strategy_str helper; wire override into cfg.fuse_strategy before passing to fuse_candidates - Add 4 integration tests: default_identity, top_k_override, fusion_strategy_override (including rejection), score_floor - Document knobs in ADR-033 §6.1 with table, semantics, and example DSL Co-Authored-By: Claude Sonnet 4.6 --- crates/khive-pack-memory/src/handlers.rs | 113 ++++++++- crates/khive-pack-memory/tests/integration.rs | 225 ++++++++++++++++++ docs/adr/ADR-033-recall-pipeline.md | 35 +++ 3 files changed, 371 insertions(+), 2 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 3ab1c759..3bcb84cc 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -37,6 +37,19 @@ fn validate_memory_type(mt: &str) -> Result<(), RuntimeError> { } } +fn parse_fusion_strategy_str(s: &str) -> Result { + match s { + "rrf" => Ok(RuntimeFusionStrategy::Rrf { k: 60 }), + "weighted" => Ok(RuntimeFusionStrategy::Weighted { + weights: vec![0.3, 0.7], + }), + "union" => Ok(RuntimeFusionStrategy::Union), + other => Err(RuntimeError::InvalidInput(format!( + "invalid fusion_strategy {other:?}: must be one of \"rrf\", \"weighted\", \"union\"" + ))), + } +} + #[derive(Deserialize)] struct RememberParams { content: String, @@ -58,6 +71,9 @@ struct RecallParams { min_score: Option, min_salience: Option, config: Option, + top_k: Option, + fusion_strategy: Option, + score_floor: Option, } impl RecallParams { @@ -436,10 +452,35 @@ impl MemoryPack { validate_memory_type(mt)?; } - let cfg = p.effective_config(self.active_config()); + if let Some(ref fs) = p.fusion_strategy { + parse_fusion_strategy_str(fs)?; + } + + let mut cfg = p.effective_config(self.active_config()); + if let Some(ref fs) = p.fusion_strategy { + let mut new_strategy = parse_fusion_strategy_str(fs)?; + // "weighted" in the request means "use weighted fusion" — the actual + // weight values come from pack config, not the request (ADR-033 §6.1). + if let ( + RuntimeFusionStrategy::Weighted { + weights: ref mut new_w, + }, + RuntimeFusionStrategy::Weighted { + weights: ref existing_w, + }, + ) = (&mut new_strategy, &cfg.fuse_strategy) + { + *new_w = existing_w.clone(); + } + cfg.fuse_strategy = new_strategy; + } cfg.validate()?; - let limit = p.limit.unwrap_or(10).min(100); + let limit = if let Some(k) = p.top_k { + (k as u32).min(100) + } else { + p.limit.unwrap_or(10).min(100) + }; let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self .collect_recall_candidates(&p.query, token, candidate_limit) @@ -493,6 +534,11 @@ impl MemoryPack { if final_score < cfg.min_score { continue; } + if let Some(floor) = p.score_floor { + if final_score < floor as f64 { + continue; + } + } ranked.push((id, final_score, breakdown, note)); } @@ -762,6 +808,9 @@ mod tests { min_score: None, min_salience: None, config: None, + top_k: None, + fusion_strategy: None, + score_floor: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.70).abs() < 1e-12); @@ -778,6 +827,9 @@ mod tests { min_score: Some(0.5), min_salience: Some(0.3), config: None, + top_k: None, + fusion_strategy: None, + score_floor: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.min_score - 0.5).abs() < 1e-12); @@ -796,6 +848,9 @@ mod tests { relevance_weight: 0.50, ..RecallConfig::default() }), + top_k: None, + fusion_strategy: None, + score_floor: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.50).abs() < 1e-12); @@ -803,6 +858,60 @@ mod tests { assert!((cfg.min_score - 0.1).abs() < 1e-12); } + #[test] + fn test_weighted_strategy_preserves_pack_weights() { + use khive_runtime::FusionStrategy as RuntimeFusionStrategy; + + // Pack config has custom weighted weights [0.8, 0.2] + let base = RecallConfig { + fuse_strategy: RuntimeFusionStrategy::Weighted { + weights: vec![0.8, 0.2], + }, + ..RecallConfig::default() + }; + + // Request overrides to "weighted" — must preserve [0.8, 0.2], not replace with [0.3, 0.7] + let p = RecallParams { + query: "test".to_string(), + limit: None, + memory_type: None, + min_score: None, + min_salience: None, + config: None, + top_k: None, + fusion_strategy: Some("weighted".to_string()), + score_floor: None, + }; + + let mut cfg = p.effective_config(base); + if let Some(ref fs) = p.fusion_strategy { + let mut new_strategy = parse_fusion_strategy_str(fs).unwrap(); + if let ( + RuntimeFusionStrategy::Weighted { + weights: ref mut new_w, + }, + RuntimeFusionStrategy::Weighted { + weights: ref existing_w, + }, + ) = (&mut new_strategy, &cfg.fuse_strategy) + { + *new_w = existing_w.clone(); + } + cfg.fuse_strategy = new_strategy; + } + + match cfg.fuse_strategy { + RuntimeFusionStrategy::Weighted { weights } => { + assert_eq!( + weights, + vec![0.8, 0.2], + "fusion_strategy=weighted must preserve pack weights [0.8, 0.2], not override with [0.3, 0.7]" + ); + } + other => panic!("expected Weighted strategy, got {other:?}"), + } + } + #[test] fn compute_score_default_config_reproduces_legacy() { let cfg = RecallConfig::default(); diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 1a29307a..f613506d 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1001,3 +1001,228 @@ async fn test_pack_tunable_apply_config_affects_recall_score() { "under relevance_weight=1.0 with rrf=1.0 → score=1.0; got {total2}" ); } + +// ── ADR-033 §6 knob tests ────────────────────────────────────────────────── + +#[tokio::test] +async fn test_recall_default_identity() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + let note = registry + .dispatch( + "remember", + json!({ + "content": "the mitochondria is the powerhouse of the cell", + "importance": 0.8 + }), + ) + .await + .expect("remember succeeds"); + let note_id = note["note_id"].as_str().unwrap().to_string(); + + // Baseline recall with no knobs + let base = registry + .dispatch("recall", json!({ "query": "mitochondria powerhouse cell" })) + .await + .expect("baseline recall succeeds"); + let base_hits = base.as_array().expect("array"); + assert!( + !base_hits.is_empty(), + "baseline must return at least one hit" + ); + + // Same call with all knobs absent — must match baseline shape + let knobless = registry + .dispatch( + "recall", + json!({ "query": "mitochondria powerhouse cell", "top_k": null }), + ) + .await + .expect("recall with null top_k succeeds"); + let knobless_hits = knobless.as_array().expect("array"); + + assert_eq!( + base_hits.len(), + knobless_hits.len(), + "null top_k must not change result count" + ); + assert_eq!( + base_hits[0]["note_id"].as_str().unwrap(), + note_id, + "top hit must be the memory we created" + ); +} + +#[tokio::test] +async fn test_recall_top_k_override() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + // Create several distinct memories to ensure the pool is large enough + for i in 0..5 { + registry + .dispatch( + "remember", + json!({ + "content": format!("rust ownership memory safety concept {i}"), + "importance": 0.7 + }), + ) + .await + .expect("remember succeeds"); + } + + // Recall with top_k=2 — must not return more than 2 results + let result = registry + .dispatch( + "recall", + json!({ "query": "rust ownership memory safety", "top_k": 2 }), + ) + .await + .expect("recall with top_k=2 succeeds"); + let hits = result.as_array().expect("array"); + assert!( + hits.len() <= 2, + "top_k=2 must return at most 2 results, got {}", + hits.len() + ); + + // top_k=1 must return at most 1 + let result1 = registry + .dispatch( + "recall", + json!({ "query": "rust ownership memory safety", "top_k": 1 }), + ) + .await + .expect("recall with top_k=1 succeeds"); + let hits1 = result1.as_array().expect("array"); + assert!( + hits1.len() <= 1, + "top_k=1 must return at most 1 result, got {}", + hits1.len() + ); +} + +#[tokio::test] +async fn test_recall_fusion_strategy_override() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + registry + .dispatch( + "remember", + json!({ + "content": "gradient descent optimization machine learning", + "importance": 0.8 + }), + ) + .await + .expect("remember succeeds"); + + // Each valid strategy must succeed and return an array + for strategy in &["rrf", "weighted", "union"] { + let result = registry + .dispatch( + "recall", + json!({ + "query": "gradient descent optimization", + "fusion_strategy": strategy + }), + ) + .await + .unwrap_or_else(|e| panic!("recall with fusion_strategy={strategy:?} failed: {e}")); + assert!( + result.is_array(), + "fusion_strategy={strategy:?} must return an array, got {result}" + ); + } + + // Invalid strategy must return an error + let err = registry + .dispatch( + "recall", + json!({ + "query": "gradient descent optimization", + "fusion_strategy": "bogus" + }), + ) + .await; + assert!(err.is_err(), "invalid fusion_strategy must return an error"); + let msg = err.unwrap_err().to_string(); + assert!( + msg.contains("rrf") && msg.contains("weighted") && msg.contains("union"), + "error message must list valid strategies, got: {msg}" + ); +} + +#[tokio::test] +async fn test_recall_score_floor() { + let rt = make_runtime(); + let registry = make_registry(rt.clone()); + + registry + .dispatch( + "remember", + json!({ + "content": "backpropagation neural network training algorithm", + "importance": 0.6 + }), + ) + .await + .expect("remember succeeds"); + + // Baseline: no floor — get result count + let base = registry + .dispatch( + "recall", + json!({ "query": "backpropagation neural network" }), + ) + .await + .expect("baseline recall succeeds"); + let base_count = base.as_array().expect("array").len(); + + // score_floor=0.99 must not return MORE results than baseline + let floored = registry + .dispatch( + "recall", + json!({ + "query": "backpropagation neural network", + "score_floor": 0.99 + }), + ) + .await + .expect("recall with score_floor=0.99 succeeds"); + let floored_hits = floored.as_array().expect("array"); + assert!( + floored_hits.len() <= base_count, + "score_floor=0.99 must return ≤ baseline count ({base_count}), got {}", + floored_hits.len() + ); + + // All returned hits must have score >= 0.99 + for hit in floored_hits { + let score = hit["score"].as_f64().expect("score is a number"); + assert!( + score >= 0.99, + "score_floor=0.99: all returned scores must be ≥ 0.99, got {score}" + ); + } + + // score_floor=0.0 must behave same as no floor + let zero_floor = registry + .dispatch( + "recall", + json!({ + "query": "backpropagation neural network", + "score_floor": 0.0 + }), + ) + .await + .expect("recall with score_floor=0.0 succeeds"); + let zero_count = zero_floor.as_array().expect("array").len(); + assert_eq!( + zero_count, base_count, + "score_floor=0.0 must return same count as no floor" + ); +} diff --git a/docs/adr/ADR-033-recall-pipeline.md b/docs/adr/ADR-033-recall-pipeline.md index 375856c0..e6f30079 100644 --- a/docs/adr/ADR-033-recall-pipeline.md +++ b/docs/adr/ADR-033-recall-pipeline.md @@ -277,6 +277,41 @@ document its Hoare triple: | **Program** | Stage 1 (`memory.recall_embed`): query → embedding via multi-engine fan-out. Stage 2 (`memory.recall_candidates`): broad recall from FTS5 + vector, `candidate_multiplier × limit` candidates per path. Stage 3 (`memory.recall_fuse`): apply `fusion_strategy` (default RRF) to produce fused hits. Stage 4 (`memory.recall_rerank`, ADR-042 §7): run all rerankers whose weight in `reranker_weights` is > 0; each writes its score to `candidate.rerank_scores[name]`. Stage 5 (`memory.recall_score`): apply `ComposePipeline` with `WeightedObjective` over the three base Objectives plus one `RerankerObjective` per active reranker. Stage 6 (select): truncate to `limit`; apply `budget` via `GreedySelector` if set. | | **Postcondition** | Output is a deterministic list of memory notes ordered by composite score, within `limit`. All returned notes are alive (not soft-deleted) and `kind = memory`. Score breakdown is available on request via `memory.recall_score`. | +### 6.1 Per-request knobs (ADR-033 §6 addendum) + +The `recall` verb accepts three optional per-request knobs that override the pack-level +`RecallConfig` for a single call. All knobs are optional; absent or `null` preserves the +current default behavior. + +| Parameter | Type | Default | Semantics | +| ------------------ | ---------------- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | +| `top_k` | `usize` \| null | `limit` or `10` | Maximum number of results to return. Overrides `limit` when set. Capped at `100`. | +| `fusion_strategy` | `string` \| null | `"rrf"` (k=60) | Fusion algorithm for candidate merging. Must be one of `"rrf"`, `"weighted"`, `"union"`. Returns an error for any other value. | +| `score_floor` | `f32` \| null | `0.0` (no floor) | Minimum composite score threshold applied after `compute_score`. Results below this floor are excluded. `0.0` or `null` = no filtering. | + +**`fusion_strategy` details:** +- `"rrf"` — Reciprocal Rank Fusion with k=60 (default). Robust across query types. +- `"weighted"` — Weighted linear combination. Text/vector weights come from the pack-level + config (`RecallConfig.fuse_strategy`), not the request. The request cannot override weights. +- `"union"` — Max-score per candidate ID. Inclusive but may surface low-quality text-only hits. + +**Example request DSL:** + +```json +{ + "query": "attention mechanism in transformers", + "top_k": 5, + "fusion_strategy": "union", + "score_floor": 0.3 +} +``` + +This returns at most 5 results, fused via union strategy, with composite score ≥ 0.3. + +**Interaction with `RecallConfig`:** Per-request knobs have higher precedence than `config` +and pack-level tuning. Resolution order: `top_k`/`fusion_strategy`/`score_floor` (request) +> `config` object (per-call) > pack active config (tunable) > `RecallConfig::default()`. + ### 7. Calibration protocol To calibrate recall parameters for a deployment: From 1169b415520799b05d88e4db7fd1812fa1e80f53 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 06:39:30 -0400 Subject: [PATCH 03/14] style(adr-033): deno fmt re-pad recall knob table (post-merge cleanup) --- docs/adr/ADR-033-recall-pipeline.md | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/docs/adr/ADR-033-recall-pipeline.md b/docs/adr/ADR-033-recall-pipeline.md index e6f30079..e6075de8 100644 --- a/docs/adr/ADR-033-recall-pipeline.md +++ b/docs/adr/ADR-033-recall-pipeline.md @@ -283,13 +283,14 @@ The `recall` verb accepts three optional per-request knobs that override the pac `RecallConfig` for a single call. All knobs are optional; absent or `null` preserves the current default behavior. -| Parameter | Type | Default | Semantics | -| ------------------ | ---------------- | ------------------ | ---------------------------------------------------------------------------------------------------------------------------------------- | -| `top_k` | `usize` \| null | `limit` or `10` | Maximum number of results to return. Overrides `limit` when set. Capped at `100`. | -| `fusion_strategy` | `string` \| null | `"rrf"` (k=60) | Fusion algorithm for candidate merging. Must be one of `"rrf"`, `"weighted"`, `"union"`. Returns an error for any other value. | -| `score_floor` | `f32` \| null | `0.0` (no floor) | Minimum composite score threshold applied after `compute_score`. Results below this floor are excluded. `0.0` or `null` = no filtering. | +| Parameter | Type | Default | Semantics | +| ----------------- | ---------------- | ---------------- | --------------------------------------------------------------------------------------------------------------------------------------- | +| `top_k` | `usize` \| null | `limit` or `10` | Maximum number of results to return. Overrides `limit` when set. Capped at `100`. | +| `fusion_strategy` | `string` \| null | `"rrf"` (k=60) | Fusion algorithm for candidate merging. Must be one of `"rrf"`, `"weighted"`, `"union"`. Returns an error for any other value. | +| `score_floor` | `f32` \| null | `0.0` (no floor) | Minimum composite score threshold applied after `compute_score`. Results below this floor are excluded. `0.0` or `null` = no filtering. | **`fusion_strategy` details:** + - `"rrf"` — Reciprocal Rank Fusion with k=60 (default). Robust across query types. - `"weighted"` — Weighted linear combination. Text/vector weights come from the pack-level config (`RecallConfig.fuse_strategy`), not the request. The request cannot override weights. @@ -310,6 +311,7 @@ This returns at most 5 results, fused via union strategy, with composite score **Interaction with `RecallConfig`:** Per-request knobs have higher precedence than `config` and pack-level tuning. Resolution order: `top_k`/`fusion_strategy`/`score_floor` (request) + > `config` object (per-call) > pack active config (tunable) > `RecallConfig::default()`. ### 7. Calibration protocol From 7402dda0132e771873996d2bc901e97f8b96609f Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 05:27:53 -0400 Subject: [PATCH 04/14] feat(embedding): dual-model registry (MiniLM + paraphrase) per ADR-043 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Multi-model embedding support landed across the runtime + storage + memory stack. Workspace dual-embedding now reachable end-to-end: khive-runtime: - RuntimeConfig.additional_embedding_models: Vec - Replaces single OnceCell with HashMap - default_embedder_name() + embedder(name) public methods - KHIVE_ADDITIONAL_EMBEDDING_MODELS env-var parsing - configured_embedding_models() helper enumerates active set khive-db: - V16 migration: add `embedding_model TEXT NOT NULL DEFAULT ''` column to vectors table with backfill + composite index - VectorStore.insert / search scoped by embedding_model khive-storage: - VectorRecord carries model tag - vector search params include model scope khive-pack-memory: - recall + remember accept optional embedding_model arg - validation: must be a registered model name kkernel: - engine list now returns real loaded models (no longer empty Vec) - engine migrate / drift-check still return not-implemented (#380/#385) Notes: - 16 files changed, +582/-138 lines - Tests rebaselined for V16 (failed_migration_rolls_back tests V17 now; store_ddl_then_event_migration_is_idempotent expects V16 head) - Workspace: cargo build + cargo test + clippy clean + fmt clean Lattice gap status: N/A — lattice-embed 0.2.4 already exposes both MiniLM + paraphrase as 384-d local models with EmbeddingRoutingConfig primitives. khive-runtime now uses these directly. Co-Authored-By: Claude Sonnet 4.6 --- crates/khive-db/src/backend.rs | 86 +++++-- crates/khive-db/src/lib.rs | 3 +- crates/khive-db/src/migrations.rs | 213 +++++++++++++++--- crates/khive-db/src/stores/vectors.rs | 65 ++++-- crates/khive-db/tests/contract/backend.rs | 3 +- .../khive-db/tests/contract/vector_filter.rs | 13 +- crates/khive-pack-memory/src/handlers.rs | 72 ++++-- crates/khive-retrieval/src/adapters/mod.rs | 1 + crates/khive-runtime/src/error.rs | 3 + crates/khive-runtime/src/operations.rs | 53 ++++- crates/khive-runtime/src/retrieval.rs | 46 +++- crates/khive-runtime/src/runtime.rs | 180 +++++++++++++-- crates/khive-runtime/tests/integration.rs | 2 + crates/khive-storage/src/types.rs | 5 + crates/khive-storage/src/vectors.rs | 5 + crates/kkernel/Cargo.toml | 1 + crates/kkernel/src/engine.rs | 42 ++-- 17 files changed, 634 insertions(+), 159 deletions(-) diff --git a/crates/khive-db/src/backend.rs b/crates/khive-db/src/backend.rs index de2e5124..5653baae 100644 --- a/crates/khive-db/src/backend.rs +++ b/crates/khive-db/src/backend.rs @@ -235,13 +235,15 @@ impl StorageBackend { /// Get a VectorStore for a specific embedding model, scoped to the default namespace. /// /// Creates the vec0 virtual table if it does not already exist. The `model_key` - /// must contain only ASCII alphanumeric/underscore characters. + /// must contain only ASCII alphanumeric/underscore characters. The `embedding_model` + /// is the canonical display name stored in each vector row. pub fn vectors( &self, model_key: &str, + embedding_model: &str, dimensions: usize, ) -> Result, SqliteError> { - self.vectors_for_namespace(model_key, dimensions, "local") + self.vectors_for_namespace(model_key, embedding_model, dimensions, "local") } /// Get a VectorStore for a specific embedding model with a default namespace. @@ -251,9 +253,12 @@ impl StorageBackend { /// (count, delete, info). Access control is enforced at the runtime layer. /// /// The `model_key` must contain only ASCII alphanumeric/underscore characters. + /// The `embedding_model` is the canonical display name stored in the `embedding_model` + /// column of each vector row (e.g. `"all-minilm-l6-v2"`). pub fn vectors_for_namespace( &self, model_key: &str, + embedding_model: &str, dimensions: usize, namespace: &str, ) -> Result, SqliteError> { @@ -298,21 +303,24 @@ impl StorageBackend { .is_some(); if table_exists { - let has_field: bool = { + let (has_field, has_embedding_model) = { let pragma = format!("PRAGMA table_xinfo({})", table); let mut stmt = writer.conn().prepare(&pragma)?; let mut rows = stmt.query([])?; - let mut found = false; + let mut has_field = false; + let mut has_embedding_model = false; while let Some(row) = rows.next()? { let name: String = row.get(1)?; if name == "field" { - found = true; - break; + has_field = true; + } + if name == "embedding_model" { + has_embedding_model = true; } } - found + (has_field, has_embedding_model) }; - if !has_field { + if !has_field || !has_embedding_model { let drop_ddl = format!("DROP TABLE IF EXISTS {}", table); writer.conn().execute_batch(&drop_ddl)?; } @@ -332,19 +340,13 @@ impl StorageBackend { // Create the vec0 virtual table. Idempotent on fresh databases and after the // old-schema rebuild above. - // - // NOTE: `embedding_model_id` is NOT included in this DDL because sqlite-vec - // enforces NOT NULL on TEXT metadata columns at insert time, so the column - // cannot be added at virtual-table creation as a nullable FK. The column will - // be present after the ADR-043 §8 startup backfill rebuild (steps 2-4), which - // is deferred to a follow-up PR — see the tracking issue filed against MAJ-2 - // of codex round-1 review of PR #374. let ddl = format!( "CREATE VIRTUAL TABLE IF NOT EXISTS vec_{} USING vec0(\ subject_id TEXT PRIMARY KEY, \ namespace TEXT NOT NULL, \ kind TEXT NOT NULL, \ field TEXT NOT NULL, \ + embedding_model TEXT NOT NULL, \ embedding float[{}] distance_metric=cosine\ )", model_key, dimensions @@ -355,11 +357,54 @@ impl StorageBackend { Arc::clone(&self.pool), self.is_file_backed, model_key.to_string(), + embedding_model.to_string(), dimensions, namespace.trim().to_string(), )?)) } + /// Register an embedding model in the `_embedding_models` registry table (ADR-043). + /// + /// Idempotent: if a row with the same `canonical_key` already exists, updates its + /// status back to `'active'` without changing other fields. + pub fn register_embedding_model( + &self, + engine_name: &str, + model_id: &str, + key_version: &str, + dimensions: u32, + ) -> Result<(), SqliteError> { + let writer = self.pool.try_writer()?; + writer + .conn() + .execute_batch(crate::migrations::EMBEDDING_MODELS_DDL)?; + + let now = chrono::Utc::now().timestamp_micros(); + let canonical_key = + format!("{engine_name}:{model_id}:{key_version}:{dimensions}").into_bytes(); + let id = uuid::Uuid::new_v4(); + writer.conn().execute( + "INSERT INTO _embedding_models \ + (id, engine_name, model_id, key_version, dim, output_dim, status, \ + activated_at, superseded_at, superseded_by, canonical_key, created_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, NULL, 'active', ?6, NULL, NULL, ?7, ?8) \ + ON CONFLICT(canonical_key) DO UPDATE SET \ + status = 'active', \ + activated_at = COALESCE(_embedding_models.activated_at, excluded.activated_at)", + rusqlite::params![ + id.as_bytes().as_slice(), + engine_name, + model_id, + key_version, + dimensions as i64, + now, + canonical_key, + now, + ], + )?; + Ok(()) + } + /// Get a SparseStore for a specific model key, scoped to the default namespace. /// /// Creates the sparse table if it does not already exist. @@ -599,7 +644,7 @@ mod tests { #[cfg(feature = "vectors")] async fn vectors_roundtrip_via_public_api() { let backend = StorageBackend::memory().unwrap(); - let store = backend.vectors("test_api", 3).unwrap(); + let store = backend.vectors("test_api", "test_api", 3).unwrap(); let id = uuid::Uuid::new_v4(); store @@ -619,6 +664,7 @@ mod tests { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) @@ -635,8 +681,8 @@ mod tests { async fn vectors_creates_table_idempotently() { let backend = StorageBackend::memory().unwrap(); - let store1 = backend.vectors("idempotent", 3).unwrap(); - let store2 = backend.vectors("idempotent", 3).unwrap(); + let store1 = backend.vectors("idempotent", "idempotent", 3).unwrap(); + let store2 = backend.vectors("idempotent", "idempotent", 3).unwrap(); let id = uuid::Uuid::new_v4(); store1 @@ -724,8 +770,8 @@ mod tests { #[test] fn invalid_model_key_rejected() { let backend = StorageBackend::memory().unwrap(); - assert!(backend.vectors("bad key!", 3).is_err()); - assert!(backend.vectors("", 3).is_err()); + assert!(backend.vectors("bad key!", "bad key!", 3).is_err()); + assert!(backend.vectors("", "", 3).is_err()); } #[test] diff --git a/crates/khive-db/src/lib.rs b/crates/khive-db/src/lib.rs index e4a8b0bc..2a832372 100644 --- a/crates/khive-db/src/lib.rs +++ b/crates/khive-db/src/lib.rs @@ -9,7 +9,8 @@ pub mod stores; pub use backend::StorageBackend; pub use error::SqliteError; pub use migrations::{ - run_migrations, Migration, ServiceSchemaPlan, VersionedMigration, MIGRATIONS, + query_embedding_models, run_migrations, EmbeddingModelRegistryRecord, Migration, + ServiceSchemaPlan, VersionedMigration, MIGRATIONS, }; pub use pool::{ConnectionPool, PoolConfig, ReaderGuard, WriterGuard}; pub use sql_bridge::SqlBridge; diff --git a/crates/khive-db/src/migrations.rs b/crates/khive-db/src/migrations.rs index 7d727289..83b7b282 100644 --- a/crates/khive-db/src/migrations.rs +++ b/crates/khive-db/src/migrations.rs @@ -371,6 +371,15 @@ pub const EMBEDDING_MODELS_DDL: &str = "\ /// step for any table that already has the column. const V14_EMBEDDING_MODEL_REGISTRY: &str = "__v14_computed_at_runtime__"; +/// V16: Add `embedding_model` column and composite index to regular `vec_` tables. +/// +/// This migration is computed at runtime via `build_v16_vector_embedding_model_tag_sql` +/// to discover existing regular (non-virtual) `vec_` tables and add the column where +/// absent. sqlite-vec virtual tables (`vec0`) are handled at open time by the +/// `vectors_for_namespace` old-schema detection which drops and recreates tables +/// missing `embedding_model`. +const V16_VECTOR_EMBEDDING_MODEL_TAG: &str = "__v16_computed_at_runtime__"; + /// V15: proposals_open projection table (ADR-046). /// /// Maintains a fold-derived view of the four proposal EventKinds so that @@ -485,6 +494,12 @@ pub const MIGRATIONS: &[VersionedMigration] = &[ name: "proposals_open", up: V15_PROPOSALS_OPEN, }, + // V16: tag vector rows with embedding_model column (ADR-043 §8, dual-embedding). + VersionedMigration { + version: 16, + name: "vector_embedding_model_tag", + up: V16_VECTOR_EMBEDDING_MODEL_TAG, + }, ]; const MIGRATION_TRACKING_TABLE: &str = "\ @@ -701,6 +716,11 @@ pub fn run_migrations(conn: &mut Connection) -> Result { version: migration.version, error: e.to_string(), })? + } else if migration.version == 16 { + build_v16_vector_embedding_model_tag_sql(&tx).map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })? } else { migration.up.to_string() }; @@ -876,6 +896,129 @@ fn build_v14_embedding_model_registry_sql(conn: &Connection) -> Result Result { + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master \ + WHERE type = 'table' \ + AND name LIKE 'vec_%' \ + AND sql NOT LIKE '%VIRTUAL%' \ + AND sql NOT LIKE '%vec0%' \ + AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \ + AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \ + AND name NOT LIKE '%\\_info' ESCAPE '\\' \ + AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\'", + )?; + let vec_tables: Vec = stmt + .query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); + + let mut sql = String::new(); + for table in vec_tables { + let valid = table.starts_with("vec_") + && table[4..] + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_'); + if !valid { + continue; + } + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model'", + rusqlite::params![&table], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + continue; + } + sql.push_str(&format!( + "ALTER TABLE {t} ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2';\ + CREATE INDEX IF NOT EXISTS idx_{t}_subject_model ON {t}(subject_id, embedding_model);", + t = table, + )); + } + if sql.is_empty() { + sql.push_str("SELECT 1;"); + } + Ok(sql) +} + +/// A record from the `_embedding_models` registry table. +#[derive(Clone, Debug)] +pub struct EmbeddingModelRegistryRecord { + pub engine_name: String, + pub model_id: String, + pub key_version: String, + pub dimensions: u32, + pub status: String, + pub activated_at: Option, + pub superseded_at: Option, +} + +/// Query the `_embedding_models` registry. +/// +/// Opens the database at `db` (defaults to `~/.khive/khive-graph.db`) and +/// returns all registry rows, optionally filtered by `engine_name`. +/// Returns an empty vec if the database or table does not exist. +pub fn query_embedding_models( + db: Option<&std::path::Path>, + engine_filter: Option<&str>, +) -> Result, SqliteError> { + let path = db.map(std::path::Path::to_path_buf).unwrap_or_else(|| { + std::env::var("HOME") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from(".")) + .join(".khive/khive-graph.db") + }); + if !path.exists() { + return Ok(Vec::new()); + } + + let conn = Connection::open(path)?; + let exists: bool = conn.query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master \ + WHERE type='table' AND name='_embedding_models'", + [], + |row| row.get(0), + )?; + if !exists { + return Ok(Vec::new()); + } + + let sql = if engine_filter.is_some() { + "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \ + FROM _embedding_models WHERE engine_name = ?1 \ + ORDER BY engine_name, activated_at IS NULL, activated_at" + } else { + "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \ + FROM _embedding_models \ + ORDER BY engine_name, activated_at IS NULL, activated_at" + }; + let mut stmt = conn.prepare(sql)?; + let map_row = |row: &rusqlite::Row<'_>| { + Ok(EmbeddingModelRegistryRecord { + engine_name: row.get(0)?, + model_id: row.get(1)?, + key_version: row.get(2)?, + dimensions: row.get::<_, i64>(3)? as u32, + status: row.get(4)?, + activated_at: row.get(5)?, + superseded_at: row.get(6)?, + }) + }; + + if let Some(engine) = engine_filter { + stmt.query_map([engine], map_row)? + .collect::, _>>() + .map_err(Into::into) + } else { + stmt.query_map([], map_row)? + .collect::, _>>() + .map_err(Into::into) + } +} + // ============================================================================= // Tests // ============================================================================= @@ -892,17 +1035,17 @@ mod tests { fn fresh_db_migrates_to_latest() { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); - // Verify the tracking table has rows for V1 through V15. + // Verify the tracking table has rows for V1 through V16. let count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(count, 15); + assert_eq!(count, 16); // Verify the entities table was created. let tbl_count: i64 = conn @@ -1083,16 +1226,16 @@ mod tests { let mut conn = open_memory(); let v1 = run_migrations(&mut conn).expect("first run"); let v2 = run_migrations(&mut conn).expect("second run"); - assert_eq!(v1, 15); - assert_eq!(v2, 15); + assert_eq!(v1, 16); + assert_eq!(v2, 16); - // Should still have exactly fifteen rows in the tracking table (V1..V15). + // Should still have exactly sixteen rows in the tracking table (V1..V16). let count: i64 = conn .query_row("SELECT COUNT(*) FROM _schema_migrations", [], |row| { row.get(0) }) .unwrap(); - assert_eq!(count, 15); + assert_eq!(count, 16); } // F052 (CRIT): V9 migration must add target_backend column + partial index on graph_edges. @@ -1102,8 +1245,8 @@ mod tests { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); assert_eq!( - version, 15, - "F052: latest migration must be V15 (proposals_open)" + version, 16, + "F052: latest migration must be V16 (vector_embedding_model_tag)" ); let col: i64 = conn .query_row( @@ -1131,40 +1274,43 @@ mod tests { #[test] fn failed_migration_rolls_back() { - let bad_v16 = VersionedMigration { - version: 16, + let bad_v17 = VersionedMigration { + version: 17, name: "bad_migration", up: "THIS IS NOT VALID SQL;", }; let mut conn = open_memory(); - // Apply all real migrations (V1..V15) so the DB is at V15. - run_migrations(&mut conn).expect("V1..V15 should apply cleanly"); + // Apply all real migrations (V1..V16) so the DB is at V16. + run_migrations(&mut conn).expect("V1..V16 should apply cleanly"); - // Now manually drive the bad V16 migration to check rollback behaviour. - let result = apply_single_migration(&mut conn, &bad_v16); + // Now manually drive the bad V17 migration to check rollback behaviour. + let result = apply_single_migration(&mut conn, &bad_v17); assert!(result.is_err(), "bad migration should return error"); - // DB should still be at V15 — no V16 row in tracking. - let v16_count: i64 = conn + // DB should still be at V16 — no V17 row in tracking. + let v17_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version = 16", + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 17", [], |row| row.get(0), ) .unwrap(); - assert_eq!(v16_count, 0, "V16 must not be recorded after rollback"); + assert_eq!(v17_count, 0, "V17 must not be recorded after rollback"); - // V1..V15 should still be there. + // V1..V16 should still be there. let applied_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(applied_count, 15, "V1..V15 must still be recorded"); + assert_eq!( + applied_count, 16, + "V1..V16 must still be recorded after V17 rollback" + ); } #[test] @@ -1198,9 +1344,10 @@ mod tests { // V12 should detect that salience is already nullable and skip; // V13 adds event observability columns and event_observations table; // V14 creates the _embedding_models registry table; - // V15 creates the proposals_open table. + // V15 creates the proposals_open table; + // V16 adds embedding_model column to regular vec_ tables. let version = run_migrations(&mut conn).expect("migrations after store DDL"); - assert_eq!(version, 15); + assert_eq!(version, 16); // V2 should be recorded as applied (skipped but tracked). let v2_count: i64 = conn @@ -1390,9 +1537,9 @@ mod tests { ) .unwrap(); - // Run V2-V15 migrations. + // Run V2-V16 migrations. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); // After V12, salience must be nullable (notnull=0). let notnull: i64 = conn @@ -1436,7 +1583,7 @@ mod tests { ensure_events_schema(&conn).expect("store DDL should create events"); let version = run_migrations(&mut conn).expect("migrations after events store DDL"); - assert_eq!(version, 15, "must reach V15 even when events DDL ran first"); + assert_eq!(version, 16, "must reach V16 even when events DDL ran first"); let v13_count: i64 = conn .query_row( @@ -1477,8 +1624,8 @@ mod tests { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); assert_eq!( - version, 15, - "F227: latest migration must be V15 (proposals_open)" + version, 16, + "F227: latest migration must be V16 (vector_embedding_model_tag)" ); // Verify _embedding_models table exists. @@ -1575,7 +1722,7 @@ mod tests { // Run the full migration suite — V14 should add embedding_model_id to the // regular vec_legacy_model table. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); // The embedding_model_id column must now exist. let col_exists: bool = conn @@ -1592,7 +1739,7 @@ mod tests { // Running migrations again must be idempotent (column already present). let version2 = run_migrations(&mut conn).expect("second run must succeed"); - assert_eq!(version2, 15); + assert_eq!(version2, 16); } /// CRIT-2 regression: V14 discovery filter must NOT match sqlite-vec internal @@ -1624,7 +1771,7 @@ mod tests { // Run the full migration suite — V14 must not add `embedding_model_id` to // any of the four shadow tables above. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); for shadow in [ "vec_test_chunks", diff --git a/crates/khive-db/src/stores/vectors.rs b/crates/khive-db/src/stores/vectors.rs index 3fa06de5..3b4754eb 100644 --- a/crates/khive-db/src/stores/vectors.rs +++ b/crates/khive-db/src/stores/vectors.rs @@ -86,6 +86,7 @@ pub struct SqliteVecStore { pool: Arc, is_file_backed: bool, model_key: String, + embedding_model: String, dimensions: usize, table_name: String, namespace: String, @@ -99,6 +100,7 @@ impl SqliteVecStore { pool: Arc, is_file_backed: bool, model_key: String, + embedding_model: String, dimensions: usize, namespace: String, ) -> Result { @@ -108,6 +110,7 @@ impl SqliteVecStore { pool, is_file_backed, model_key, + embedding_model, dimensions, table_name, namespace, @@ -200,6 +203,7 @@ impl VectorStore for SqliteVecStore { let namespace = namespace.to_string(); let field = field.to_string(); let kind_str = kind.to_string(); + let embedding_model = self.embedding_model.clone(); if embedding.len() == dims { if let Some(idx) = non_finite_index(&embedding) { @@ -226,13 +230,21 @@ impl VectorStore for SqliteVecStore { )?; let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding_model, embedding) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", table ); let blob = f32_slice_as_bytes(&embedding); conn.execute( &ins_sql, - rusqlite::params![subject_id.to_string(), &namespace, &kind_str, &field, blob], + rusqlite::params![ + subject_id.to_string(), + &namespace, + &kind_str, + &field, + &embedding_model, + blob + ], )?; Ok(()) }) @@ -246,6 +258,7 @@ impl VectorStore for SqliteVecStore { let table = self.table_name.clone(); let dims = self.dimensions; let attempted = records.len() as u64; + let store_embedding_model = self.embedding_model.clone(); self.with_writer("vec_insert_batch", move |conn| { let del_sql = format!( @@ -253,7 +266,8 @@ impl VectorStore for SqliteVecStore { table ); let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding_model, embedding) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", table ); @@ -282,7 +296,14 @@ impl VectorStore for SqliteVecStore { let _ = conn.execute(&del_sql, rusqlite::params![&id_str, &record.namespace]); match conn.execute( &ins_sql, - rusqlite::params![&id_str, &record.namespace, &kind_str, &record.field, blob], + rusqlite::params![ + &id_str, + &record.namespace, + &kind_str, + &record.field, + &store_embedding_model, + blob + ], ) { Ok(_) => affected += 1, Err(_) => failed += 1, @@ -358,6 +379,11 @@ impl VectorStore for SqliteVecStore { .clone() .unwrap_or_else(|| self.namespace.clone()); let kind_filter = request.kind.map(|k| k.to_string()); + // Use the request's embedding_model filter, or fall back to this store's model. + let effective_model = request + .embedding_model + .clone() + .unwrap_or_else(|| self.embedding_model.clone()); if query_embedding.len() == dims { if let Some(idx) = non_finite_index(&query_embedding) { @@ -377,10 +403,10 @@ impl VectorStore for SqliteVecStore { )); } - // Restrict candidate set to namespace (and optionally kind) via subquery, - // then MATCH-rank by embedding distance. + // Restrict candidate set to namespace+embedding_model (and optionally kind) + // via subquery, then MATCH-rank by embedding distance. let subquery_kind_clause = if kind_filter.is_some() { - "AND kind = ?4" + "AND kind = ?5" } else { "" }; @@ -389,7 +415,8 @@ impl VectorStore for SqliteVecStore { FROM {t} \ WHERE embedding MATCH ?1 \ AND subject_id IN (\ - SELECT subject_id FROM {t} WHERE namespace = ?3 {kind_clause}\ + SELECT subject_id FROM {t} \ + WHERE namespace = ?3 AND embedding_model = ?4 {kind_clause}\ ) \ ORDER BY distance \ LIMIT ?2", @@ -405,7 +432,13 @@ impl VectorStore for SqliteVecStore { let raw_rows: Vec> = if let Some(ref kind_str) = kind_filter { stmt.query_map( - rusqlite::params![query_blob, request.top_k, &namespace, kind_str], + rusqlite::params![ + query_blob, + request.top_k, + &namespace, + &effective_model, + kind_str + ], |row| { let id_str: String = row.get(0)?; let distance: f64 = row.get(1)?; @@ -415,7 +448,7 @@ impl VectorStore for SqliteVecStore { .collect() } else { stmt.query_map( - rusqlite::params![query_blob, request.top_k, &namespace], + rusqlite::params![query_blob, request.top_k, &namespace, &effective_model], |row| { let id_str: String = row.get(0)?; let distance: f64 = row.get(1)?; @@ -531,6 +564,7 @@ impl SqliteVecStore { let table = self.table_name.clone(); let namespace = self.namespace.clone(); + let embedding_model = self.embedding_model.clone(); let query_vec = query_embedding.to_vec(); let ids: Vec = candidate_ids.iter().map(|id| id.to_string()).collect(); @@ -542,22 +576,24 @@ impl SqliteVecStore { let placeholders: String = chunk .iter() .enumerate() - .map(|(i, _)| format!("?{}", i + 3)) + .map(|(i, _)| format!("?{}", i + 4)) .collect::>() .join(", "); let sql = format!( "SELECT e.subject_id, vec_distance_cosine(e.embedding, ?1) as distance \ FROM {} e \ - WHERE e.namespace = ?2 AND e.subject_id IN ({})", + WHERE e.namespace = ?2 AND e.embedding_model = ?3 \ + AND e.subject_id IN ({})", table, placeholders ); let mut stmt = conn.prepare(&sql)?; stmt.raw_bind_parameter(1, query_blob)?; stmt.raw_bind_parameter(2, namespace.as_str())?; + stmt.raw_bind_parameter(3, embedding_model.as_str())?; for (i, id_str) in chunk.iter().enumerate() { - stmt.raw_bind_parameter(i + 3, id_str.as_str())?; + stmt.raw_bind_parameter(i + 4, id_str.as_str())?; } let mut rows = stmt.raw_query(); @@ -612,6 +648,7 @@ mod capabilities_tests { make_pool(), /*is_file_backed=*/ false, "test_model".into(), + "test_model".into(), /*dimensions=*/ 4, "ns:test".into(), ) @@ -657,6 +694,7 @@ mod capabilities_tests { make_pool(), false, "test_dim_limit".into(), + "test_dim_limit".into(), /*dimensions=*/ 4, "ns:test".into(), ) @@ -684,6 +722,7 @@ mod capabilities_tests { make_pool(), false, "test_idempotent".into(), + "test_idempotent".into(), 4, "ns:test".into(), ) diff --git a/crates/khive-db/tests/contract/backend.rs b/crates/khive-db/tests/contract/backend.rs index bbe296f5..34129257 100644 --- a/crates/khive-db/tests/contract/backend.rs +++ b/crates/khive-db/tests/contract/backend.rs @@ -337,7 +337,7 @@ mod vector_contract { async fn test_vector_store(backend: &StorageBackend) { let store = backend - .vectors_for_namespace("ct_model", 4, "ct_ns") + .vectors_for_namespace("ct_model", "ct_model", 4, "ct_ns") .expect("vector store"); let id = Uuid::new_v4(); @@ -361,6 +361,7 @@ mod vector_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-db/tests/contract/vector_filter.rs b/crates/khive-db/tests/contract/vector_filter.rs index 5b37ed35..e9be4745 100644 --- a/crates/khive-db/tests/contract/vector_filter.rs +++ b/crates/khive-db/tests/contract/vector_filter.rs @@ -18,7 +18,9 @@ mod vector_filter_contract { #[tokio::test] async fn search_with_non_empty_filter_returns_unsupported() { let backend = StorageBackend::memory().expect("in-memory backend"); - let store = backend.vectors("filter_test", 3).expect("vector store"); + let store = backend + .vectors("filter_test", "filter_test", 3) + .expect("vector store"); // Insert one record so the table is non-empty. let id = Uuid::new_v4(); @@ -39,6 +41,7 @@ mod vector_filter_contract { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: Some(VectorMetadataFilter { namespaces: vec!["local".into()], kinds: vec![], @@ -64,7 +67,9 @@ mod vector_filter_contract { #[tokio::test] async fn search_with_filter_empty_delegates_and_non_empty_rejects() { let backend = StorageBackend::memory().expect("in-memory backend"); - let store = backend.vectors("filter_delegate", 3).expect("vector store"); + let store = backend + .vectors("filter_delegate", "filter_delegate", 3) + .expect("vector store"); let id = Uuid::new_v4(); store @@ -83,6 +88,7 @@ mod vector_filter_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -154,7 +160,7 @@ mod vector_filter_contract { // the old schema and rebuild the table transparently. let new_backend = StorageBackend::sqlite(&db_path).expect("reopen db"); let store = new_backend - .vectors_for_namespace("old_model", 3, "local") + .vectors_for_namespace("old_model", "old_model", 3, "local") .expect("vectors_for_namespace must succeed after schema rebuild"); // Step 3: insert and search in the new shape must work. @@ -176,6 +182,7 @@ mod vector_filter_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 3bcb84cc..00a5310a 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -61,6 +61,8 @@ struct RememberParams { #[serde(alias = "source")] source_id: Option, tags: Option>, + #[serde(default)] + embedding_model: Option, } #[derive(Deserialize)] @@ -74,6 +76,8 @@ struct RecallParams { top_k: Option, fusion_strategy: Option, score_floor: Option, + #[serde(default)] + embedding_model: Option, } impl RecallParams { @@ -279,6 +283,7 @@ impl MemoryPack { query: &str, token: &NamespaceToken, candidate_limit: u32, + embedding_model: Option<&str>, ) -> Result { let ns = token.namespace().as_str().to_string(); // F111: restrict text candidates to Note substrate kind so entity records @@ -299,23 +304,28 @@ impl MemoryPack { }) .await?; - let vector_hits = if self.runtime.config().embedding_model.is_some() { - let vec = self.runtime.embed(query).await?; - self.runtime - .vectors(token)? - .search(VectorSearchRequest { - query_vectors: vec![vec], - top_k: candidate_limit, - namespace: Some(ns.clone()), - // F111: already restricts to Note substrate kind - kind: Some(SubstrateKind::Note), - filter: None, - backend_hints: None, - }) - .await? - } else { - Vec::new() - }; + let vector_hits = + if self.runtime.config().embedding_model.is_some() || embedding_model.is_some() { + let model_name: String = embedding_model + .map(|m| m.to_string()) + .unwrap_or_else(|| self.runtime.default_embedder_name().to_string()); + let vec = self.runtime.embed_with_model(&model_name, query).await?; + self.runtime + .vectors_for_model(token, &model_name)? + .search(VectorSearchRequest { + query_vectors: vec![vec], + top_k: candidate_limit, + namespace: Some(ns.clone()), + // F111: already restricts to Note substrate kind + kind: Some(SubstrateKind::Note), + embedding_model: Some(model_name), + filter: None, + backend_hints: None, + }) + .await? + } else { + Vec::new() + }; Ok(RecallCandidateSet { namespace: ns, @@ -418,7 +428,7 @@ impl MemoryPack { let note = self .runtime - .create_note_with_decay( + .create_note_with_decay_for_embedding_model( token, "memory", None, @@ -427,6 +437,7 @@ impl MemoryPack { decay_factor, Some(props), annotates, + p.embedding_model.as_deref(), ) .await?; @@ -483,7 +494,12 @@ impl MemoryPack { }; let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let (memory_ids, mut notes_by_id) = self .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) @@ -604,7 +620,12 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let text_candidates: Vec = candidates @@ -657,7 +678,12 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let (memory_ids, notes_by_id) = self .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) @@ -811,6 +837,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.70).abs() < 1e-12); @@ -830,6 +857,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.min_score - 0.5).abs() < 1e-12); @@ -851,6 +879,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.50).abs() < 1e-12); @@ -881,6 +910,7 @@ mod tests { top_k: None, fusion_strategy: Some("weighted".to_string()), score_floor: None, + embedding_model: None, }; let mut cfg = p.effective_config(base); diff --git a/crates/khive-retrieval/src/adapters/mod.rs b/crates/khive-retrieval/src/adapters/mod.rs index bcad7b45..5b233d0e 100644 --- a/crates/khive-retrieval/src/adapters/mod.rs +++ b/crates/khive-retrieval/src/adapters/mod.rs @@ -110,6 +110,7 @@ impl VectorSearch for StorageVectorSearch { top_k: top_k as u32, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; diff --git a/crates/khive-runtime/src/error.rs b/crates/khive-runtime/src/error.rs index 5d5f2cc3..19960375 100644 --- a/crates/khive-runtime/src/error.rs +++ b/crates/khive-runtime/src/error.rs @@ -78,6 +78,9 @@ pub enum RuntimeError { #[error("unconfigured: {0} is not set")] Unconfigured(String), + #[error("unknown embedding model: {0}")] + UnknownModel(String), + #[error("embedding: {0}")] Embedding(#[from] lattice_embed::EmbedError), diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index a5abb6bb..9a6add6a 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -812,7 +812,7 @@ impl KhiveRuntime { annotates: Vec, ) -> RuntimeResult { self.create_note_inner( - token, kind, name, content, salience, None, properties, annotates, + token, kind, name, content, salience, None, properties, annotates, None, ) .await } @@ -829,6 +829,34 @@ impl KhiveRuntime { decay_factor: f64, properties: Option, annotates: Vec, + ) -> RuntimeResult { + self.create_note_with_decay_for_embedding_model( + token, + kind, + name, + content, + salience, + decay_factor, + properties, + annotates, + None, + ) + .await + } + + /// Like [`create_note_with_decay`] but targets a specific embedding model. + #[allow(clippy::too_many_arguments)] + pub async fn create_note_with_decay_for_embedding_model( + &self, + token: &NamespaceToken, + kind: &str, + name: Option<&str>, + content: &str, + salience: Option, + decay_factor: f64, + properties: Option, + annotates: Vec, + embedding_model: Option<&str>, ) -> RuntimeResult { self.create_note_inner( token, @@ -839,6 +867,7 @@ impl KhiveRuntime { Some(decay_factor), properties, annotates, + embedding_model, ) .await } @@ -854,6 +883,7 @@ impl KhiveRuntime { decay_factor: Option, properties: Option, annotates: Vec, + embedding_model: Option<&str>, ) -> RuntimeResult { let ns = token.namespace().as_str(); @@ -899,9 +929,20 @@ impl KhiveRuntime { }) .await?; - if self.config().embedding_model.is_some() { - let vector = self.embed(¬e.content).await?; - self.vectors(token)? + let embed_model_name: Option = + if self.config().embedding_model.is_some() || embedding_model.is_some() { + Some( + embedding_model + .map(|m| m.to_string()) + .unwrap_or_else(|| self.default_embedder_name().to_string()), + ) + } else { + None + }; + + if let Some(ref model_name) = embed_model_name { + let vector = self.embed_with_model(model_name, ¬e.content).await?; + self.vectors_for_model(token, model_name)? .insert( note.id, SubstrateKind::Note, @@ -989,8 +1030,8 @@ impl KhiveRuntime { if let Ok(fts) = self.text_for_notes(token) { let _ = fts.delete_document(ns, note.id).await; } - if self.config().embedding_model.is_some() { - if let Ok(vs) = self.vectors(token) { + if let Some(ref model_name) = embed_model_name { + if let Ok(vs) = self.vectors_for_model(token, model_name) { let _ = vs.delete(note.id).await; } } diff --git a/crates/khive-runtime/src/retrieval.rs b/crates/khive-runtime/src/retrieval.rs index 78585c2e..aeb7cae2 100644 --- a/crates/khive-runtime/src/retrieval.rs +++ b/crates/khive-runtime/src/retrieval.rs @@ -41,20 +41,26 @@ const RRF_K: usize = 60; const CANDIDATE_MULTIPLIER: u32 = 4; impl KhiveRuntime { - /// Generate an embedding vector for `text` using the configured local model. + /// Generate an embedding vector for `text` using the configured default model. /// /// First call lazily loads model weights (cold start cost). Subsequent calls reuse them. /// Returns `Unconfigured("embedding_model")` if no model is configured. pub async fn embed(&self, text: &str) -> RuntimeResult> { - let service = self.embedder().await?; - let model = self - .config() - .embedding_model - .expect("embedder() returns Unconfigured when model is None"); + let model_name = self.default_embedder_name(); + if model_name.is_empty() { + return Err(RuntimeError::Unconfigured("embedding_model".into())); + } + self.embed_with_model(model_name, text).await + } + + /// Generate an embedding vector for `text` using the named model. + pub async fn embed_with_model(&self, model_name: &str, text: &str) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(model_name))?; + let service = self.embedder(model_name).await?; Ok(service.embed_one(text, model).await?) } - /// Generate embeddings for multiple texts in one call. + /// Generate embeddings for multiple texts in one call using the configured default model. /// /// Delegates to the cached `EmbeddingService::embed`, so repeated texts within /// and across calls benefit from the runtime-level LRU cache. @@ -65,11 +71,24 @@ impl KhiveRuntime { if texts.is_empty() { return Ok(vec![]); } - let service = self.embedder().await?; - let model = self - .config() - .embedding_model - .expect("embedder() returns Unconfigured when model is None"); + let model_name = self.default_embedder_name(); + if model_name.is_empty() { + return Err(RuntimeError::Unconfigured("embedding_model".into())); + } + self.embed_batch_with_model(model_name, texts).await + } + + /// Generate embeddings for multiple texts using the named model. + pub async fn embed_batch_with_model( + &self, + model_name: &str, + texts: &[String], + ) -> RuntimeResult>> { + if texts.is_empty() { + return Ok(vec![]); + } + let model = self.resolve_embedding_model(Some(model_name))?; + let service = self.embedder(model_name).await?; Ok(service.embed(texts, model).await?) } @@ -111,6 +130,7 @@ impl KhiveRuntime { top_k, namespace: Some(ns), kind, + embedding_model: None, filter: None, backend_hints: None, }) @@ -242,6 +262,7 @@ impl KhiveRuntime { top_k, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + embedding_model: None, filter: None, backend_hints: None, }) @@ -269,6 +290,7 @@ impl KhiveRuntime { top_k: candidate_ids.len() as u32, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-runtime/src/runtime.rs b/crates/khive-runtime/src/runtime.rs index 1babe5b8..27b14eb9 100644 --- a/crates/khive-runtime/src/runtime.rs +++ b/crates/khive-runtime/src/runtime.rs @@ -1,6 +1,9 @@ //! KhiveRuntime — composable handle to all storage capabilities. -use std::sync::{Arc, RwLock}; +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; use khive_db::StorageBackend; use khive_gate::{ActorRef, AllowAllGate, GateRef}; @@ -136,6 +139,13 @@ pub struct RuntimeConfig { /// `EmbedderRegistry`. This field persists for backward compatibility until /// the embedder registry is fully plumbed. pub embedding_model: Option, + /// Additional embedding models to make available by request name. + /// + /// `embedding_model` remains the default used by existing `embed()` and + /// `embed_batch()` callers. This list adds non-default models that can be + /// selected with `embedder(name)`, `embed_with_model(...)`, memory + /// `remember.embedding_model`, and memory `recall.embedding_model`. + pub additional_embedding_models: Vec, /// Authorization gate consulted before each verb dispatch (ADR-029). /// Default: `AllowAllGate` (permissive). For production policy enforcement, /// plug in a Rego- or capability-witness-backed impl. @@ -173,6 +183,10 @@ impl Default for RuntimeConfig { .ok() .and_then(|s| s.parse().ok()) .or(Some(EmbeddingModel::AllMiniLmL6V2)); + let additional_embedding_models = std::env::var("KHIVE_ADDITIONAL_EMBEDDING_MODELS") + .ok() + .map(|s| parse_embedding_model_list(&s)) + .unwrap_or_default(); let packs = std::env::var("KHIVE_PACKS") .ok() .map(|s| parse_pack_list(&s)) @@ -182,6 +196,7 @@ impl Default for RuntimeConfig { db_path, default_namespace: Namespace::local(), embedding_model, + additional_embedding_models, gate: Arc::new(AllowAllGate), packs, backend_id: BackendId::main(), @@ -191,6 +206,12 @@ impl Default for RuntimeConfig { // ---- KhiveRuntime ---- +#[derive(Clone)] +struct EmbedderEntry { + model: EmbeddingModel, + cell: Arc>>, +} + /// Composable runtime handle used by the MCP server. /// /// Wraps a `StorageBackend` and provides namespace-scoped accessor methods @@ -199,7 +220,8 @@ impl Default for RuntimeConfig { pub struct KhiveRuntime { backend: Arc, config: RuntimeConfig, - embedder: Arc>>, + embedders: Arc>, + default_embedder_name: Arc, /// Pack-extensible edge endpoint rules (ADR-031). Shared across clones /// via `Arc>`; installed once by the transport after the /// `VerbRegistry` is built. Empty until installed — base rules @@ -223,10 +245,13 @@ impl KhiveRuntime { } None => StorageBackend::memory()?, }; + register_configured_embedding_models(&backend, &config)?; + let (embedders, default_embedder_name) = build_embedder_registry(&config); Ok(Self { backend: Arc::new(backend), config, - embedder: Arc::new(OnceCell::new()), + embedders: Arc::new(embedders), + default_embedder_name, edge_rules: Arc::new(RwLock::new(Vec::new())), }) } @@ -241,10 +266,15 @@ impl KhiveRuntime { /// storage access is through the provided `backend`. Set `backend_id` and /// `default_namespace` via the config builder pattern if non-defaults are needed. pub fn from_backend(backend: Arc, config: RuntimeConfig) -> Self { + if let Err(err) = register_configured_embedding_models(&backend, &config) { + tracing::warn!(error = %err, "failed to register configured embedding models"); + } + let (embedders, default_embedder_name) = build_embedder_registry(&config); Self { backend, config, - embedder: Arc::new(OnceCell::new()), + embedders: Arc::new(embedders), + default_embedder_name, edge_rules: Arc::new(RwLock::new(Vec::new())), } } @@ -255,6 +285,7 @@ impl KhiveRuntime { db_path: None, default_namespace: Namespace::local(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::main(), @@ -321,12 +352,28 @@ impl KhiveRuntime { &self, token: &NamespaceToken, ) -> RuntimeResult> { - let model = self - .config - .embedding_model - .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?; + let model = self.resolve_embedding_model(None)?; + self.vectors_for_embedding_model(token, model) + } + + /// Get a VectorStore for a specific named embedding model, scoped to the token's namespace. + pub fn vectors_for_model( + &self, + token: &NamespaceToken, + model_name: &str, + ) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(model_name))?; + self.vectors_for_embedding_model(token, model) + } + + fn vectors_for_embedding_model( + &self, + token: &NamespaceToken, + model: EmbeddingModel, + ) -> RuntimeResult> { Ok(self.backend.vectors_for_namespace( &vec_model_key(model), + &model.to_string(), model.dimensions(), token.namespace().as_str(), )?) @@ -380,28 +427,57 @@ impl KhiveRuntime { .unwrap_or_default() } - /// Get the lazily-initialized embedding service. + /// Return the name of the default embedding model (empty string if none configured). + pub fn default_embedder_name(&self) -> &str { + self.default_embedder_name.as_ref() + } + + /// Resolve a model name (or `None` for the default) to an `EmbeddingModel`. + /// + /// Returns `UnknownModel` if the name is not in the registry, or + /// `Unconfigured` if `None` is passed and no default model is set. + pub fn resolve_embedding_model(&self, name: Option<&str>) -> RuntimeResult { + let model = match name { + Some(raw) => parse_embedding_model_alias(raw) + .ok_or_else(|| crate::RuntimeError::UnknownModel(raw.to_string()))?, + None => self + .config + .embedding_model + .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?, + }; + let key = model.to_string(); + if self.embedders.contains_key(&key) { + Ok(model) + } else { + Err(crate::RuntimeError::UnknownModel( + name.unwrap_or_else(|| self.default_embedder_name()) + .to_string(), + )) + } + } + + /// Get the lazily-initialized embedding service for the named model. /// /// Returns a `CachedEmbeddingService` wrapping a `NativeEmbeddingService`. /// First call loads the model (cold start cost); subsequent calls are cheap and /// benefit from LRU caching of repeated inputs. - /// - /// Returns `Unconfigured("embedding_model")` if no model is set. - pub async fn embedder(&self) -> RuntimeResult> { - let model = self - .config - .embedding_model - .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?; - let service = self - .embedder + pub async fn embedder(&self, name: &str) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(name))?; + let key = model.to_string(); + let entry = self + .embedders + .get(&key) + .ok_or_else(|| crate::RuntimeError::UnknownModel(name.to_string()))? + .clone(); + Ok(entry + .cell .get_or_init(|| async move { - let native = Arc::new(NativeEmbeddingService::with_model(model)); + let native = Arc::new(NativeEmbeddingService::with_model(entry.model)); let cached = CachedEmbeddingService::with_default_cache(native); Arc::new(cached) as Arc }) .await - .clone(); - Ok(service) + .clone()) } } @@ -417,6 +493,66 @@ fn sanitize_key(s: &str) -> String { .collect() } +fn build_embedder_registry(config: &RuntimeConfig) -> (HashMap, Arc) { + let mut embedders = HashMap::new(); + for model in configured_embedding_models(config) { + embedders.insert( + model.to_string(), + EmbedderEntry { + model, + cell: Arc::new(OnceCell::new()), + }, + ); + } + let default_embedder_name = config + .embedding_model + .map(|model| Arc::::from(model.to_string())) + .unwrap_or_else(|| Arc::::from("")); + (embedders, default_embedder_name) +} + +fn configured_embedding_models(config: &RuntimeConfig) -> Vec { + let mut models = Vec::new(); + if let Some(model) = config.embedding_model { + models.push(model); + } + models.extend(config.additional_embedding_models.iter().copied()); + models.sort_by_key(|model| model.to_string()); + models.dedup(); + models +} + +fn register_configured_embedding_models( + backend: &StorageBackend, + config: &RuntimeConfig, +) -> RuntimeResult<()> { + for model in configured_embedding_models(config) { + backend.register_embedding_model( + &model.to_string(), + model.model_id(), + model.key_version(), + model.dimensions() as u32, + )?; + } + Ok(()) +} + +/// Parse a comma- or whitespace-separated list of embedding model names. +fn parse_embedding_model_list(s: &str) -> Vec { + parse_pack_list(s) + .into_iter() + .filter_map(|raw| parse_embedding_model_alias(&raw)) + .collect() +} + +fn parse_embedding_model_alias(name: &str) -> Option { + let normalized = name.trim().to_ascii_lowercase().replace('_', "-"); + match normalized.as_str() { + "paraphrase" => Some(EmbeddingModel::ParaphraseMultilingualMiniLmL12V2), + _ => normalized.parse().ok(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -435,6 +571,7 @@ mod tests { db_path: Some(path.clone()), default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::main(), @@ -451,6 +588,7 @@ mod tests { db_path: None, default_namespace: Namespace::local(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::new("lore"), diff --git a/crates/khive-runtime/tests/integration.rs b/crates/khive-runtime/tests/integration.rs index 7775386b..257b4f06 100644 --- a/crates/khive-runtime/tests/integration.rs +++ b/crates/khive-runtime/tests/integration.rs @@ -568,6 +568,7 @@ async fn file_backed_runtime_persists() { gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], backend_id: khive_runtime::BackendId::main(), + additional_embedding_models: vec![], }; let rt = KhiveRuntime::new(config).unwrap(); let tok = rt.authorize(Namespace::local()); @@ -585,6 +586,7 @@ async fn file_backed_runtime_persists() { gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], backend_id: khive_runtime::BackendId::main(), + additional_embedding_models: vec![], }; let rt = KhiveRuntime::new(config).unwrap(); let tok = rt.authorize(Namespace::local()); diff --git a/crates/khive-storage/src/types.rs b/crates/khive-storage/src/types.rs index 70430009..11066599 100644 --- a/crates/khive-storage/src/types.rs +++ b/crates/khive-storage/src/types.rs @@ -181,6 +181,8 @@ pub struct VectorRecord { pub namespace: String, /// Which embedding field this record represents (e.g. `"entity.body"`). pub field: String, + #[serde(default)] + pub embedding_model: Option, /// One or many dense vectors; sqlite-vec backends enforce `vectors.len() == 1`. pub vectors: Vec>, pub updated_at: DateTime, @@ -193,6 +195,9 @@ pub struct VectorSearchRequest { pub top_k: u32, pub namespace: Option, pub kind: Option, + /// Restrict results to this embedding model. Defaults to the store's own model. + #[serde(default)] + pub embedding_model: Option, /// Optional metadata filter for backends that support pushdown. pub filter: Option, /// Backend-specific hints (opaque JSON blob, ignored by default). diff --git a/crates/khive-storage/src/vectors.rs b/crates/khive-storage/src/vectors.rs index 95bf1161..0e6cc797 100644 --- a/crates/khive-storage/src/vectors.rs +++ b/crates/khive-storage/src/vectors.rs @@ -307,6 +307,7 @@ mod tests { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -326,6 +327,7 @@ mod tests { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -352,6 +354,7 @@ mod tests { top_k: 3, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }, @@ -360,6 +363,7 @@ mod tests { top_k: 3, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }, @@ -433,6 +437,7 @@ mod tests { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }]; diff --git a/crates/kkernel/Cargo.toml b/crates/kkernel/Cargo.toml index ba70f096..2f1354e4 100644 --- a/crates/kkernel/Cargo.toml +++ b/crates/kkernel/Cargo.toml @@ -12,6 +12,7 @@ description = "khive kernel — admin/management Rust binary (sync, pack introsp [dependencies] khive-runtime = { version = "0.2.2", path = "../khive-runtime" } +khive-db = { version = "0.2.2", path = "../khive-db" } khive-storage = { version = "0.2.2", path = "../khive-storage" } khive-types = { version = "0.2.2", path = "../khive-types" } khive-vcs = { version = "0.2.2", path = "../khive-vcs" } diff --git a/crates/kkernel/src/engine.rs b/crates/kkernel/src/engine.rs index d16aee6c..6e16923e 100644 --- a/crates/kkernel/src/engine.rs +++ b/crates/kkernel/src/engine.rs @@ -203,36 +203,22 @@ fn cmd_engine_drift_check(_args: EngineDriftCheckArgs) -> Result<()> { // ── Internal helpers ────────────────────────────────────────────────────────── fn query_embedding_models( - _db: Option<&std::path::Path>, + db: Option<&std::path::Path>, engine_filter: Option<&str>, ) -> Result> { - // The _embedding_models table is created by the ADR-043 schema migration. - // Until that migration lands, the table may not exist; return an empty list - // with a log rather than a hard error so `kkernel engine list` is usable - // before full ADR-043 deployment. - // - // A full implementation opens the SQLite DB, queries: - // SELECT engine_name, model_id, key_version, dim, status, - // activated_at, superseded_at - // FROM _embedding_models - // [WHERE engine_name = ?] - // ORDER BY engine_name, activated_at NULLS LAST - // - // and maps rows to EngineModelRecord. - // - // This scaffold returns an empty list so the CLI compiles and tests can - // verify the command routing surface without a live database. - - if let Some(engine) = engine_filter { - tracing::debug!( - engine, - "query_embedding_models: _embedding_models not yet populated" - ); - } else { - tracing::debug!("query_embedding_models: _embedding_models not yet populated"); - } - - Ok(Vec::new()) + let rows = khive_db::query_embedding_models(db, engine_filter)?; + Ok(rows + .into_iter() + .map(|r| EngineModelRecord { + engine_name: r.engine_name, + model_id: r.model_id, + key_version: r.key_version, + dimensions: r.dimensions, + status: r.status, + activated_at: r.activated_at, + superseded_at: r.superseded_at, + }) + .collect()) } // ── Tests ───────────────────────────────────────────────────────────────────── From d5dae5b679edb0427b0ffea4ed5456f8c503f164 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:36:29 -0400 Subject: [PATCH 05/14] =?UTF-8?q?fix(retrieval):=20correct=20doctest=20imp?= =?UTF-8?q?ort=20=E2=80=94=20use=20re-export=20at=20crate=20root?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses codex finding on PR #405: `khive_retrieval::hnsw::HnswIndex` doesn't resolve because `hnsw` is not a public submodule — `HnswIndex` is re-exported at the crate root (lib.rs:145). The doctest at persist/mod.rs:29 must use the public facade import. Closes the remaining gap on issue #309 (--all-features doctest failure). Verified: `RUSTC_WRAPPER= cargo test --offline -p khive-retrieval --all-features --doc` passes. Co-Authored-By: Claude Opus 4.7 --- crates/khive-retrieval/src/persist/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crates/khive-retrieval/src/persist/mod.rs b/crates/khive-retrieval/src/persist/mod.rs index 40d4e678..0893903a 100644 --- a/crates/khive-retrieval/src/persist/mod.rs +++ b/crates/khive-retrieval/src/persist/mod.rs @@ -26,7 +26,7 @@ //! //! ```rust,no_run //! use khive_retrieval::persist::RetrievalPersistence; -//! use khive_retrieval::hnsw::HnswIndex; +//! use khive_retrieval::HnswIndex; //! use rusqlite::Connection; //! use std::sync::Arc; //! use tokio::sync::Mutex; From 008a1f17605f57b888c7af5e916e70fee320d815 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:48:37 -0400 Subject: [PATCH 06/14] =?UTF-8?q?fix(pack-memory):=20address=20PR=20#406?= =?UTF-8?q?=20codex=20findings=20=E2=80=94=20top=5Fk=20cast=20+=20stronger?= =?UTF-8?q?=20tests?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two Medium findings from codex review: 1. top_k cast bug: `(k as u32).min(100)` truncates k before capping. A request with `top_k = 4_294_967_297` (larger than u32::MAX) truncates to 1 BEFORE the cap is applied, so the result limit becomes 1, not 100. Fixed: `u32::try_from(k.min(100)).unwrap_or(100)` clamps to usize first, then narrows safely. 2. Weak tests: test_recall_default_identity only checked length and the top hit. Strengthened to compare full ordered note_id+score lists across all positions, with all three knobs explicitly set to null. test_recall_fusion_strategy_override only validated string acceptance. Added a new unit test (fusion_strategy_change_produces_observable_ordering_difference) with a deterministic fixture where RRF and Weighted strategies MUST produce different orderings — proving the fusion_strategy override actually flows into fuse_candidates, not just validation. Verified: cargo test -p khive-pack-memory --lib passes (62 unit tests). Co-Authored-By: Claude Opus 4.7 --- crates/khive-pack-memory/src/handlers.rs | 86 ++++++++++++++++++- crates/khive-pack-memory/tests/integration.rs | 70 ++++++++++----- 2 files changed, 132 insertions(+), 24 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 00a5310a..8d0dbceb 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -488,7 +488,7 @@ impl MemoryPack { cfg.validate()?; let limit = if let Some(k) = p.top_k { - (k as u32).min(100) + u32::try_from(k.min(100)).unwrap_or(100) } else { p.limit.unwrap_or(10).min(100) }; @@ -942,6 +942,90 @@ mod tests { } } + #[test] + fn fusion_strategy_change_produces_observable_ordering_difference() { + // Codex Medium 2 (PR #406): prove the fusion_strategy knob actually + // affects fusion output, not just validation. Uses a deterministic fixture + // where rank-based (RRF) and score-based (Weighted) fusion must rank + // differently. + use khive_runtime::FusionStrategy as RuntimeFusionStrategy; + use khive_storage::types::{TextSearchHit, VectorSearchHit}; + use std::collections::HashSet; + use uuid::Uuid; + + let id_a = Uuid::from_u128(0xAAAA_AAAA_AAAA_AAAA_AAAA_AAAA_AAAA_AAAA); + let id_b = Uuid::from_u128(0xBBBB_BBBB_BBBB_BBBB_BBBB_BBBB_BBBB_BBBB); + let id_c = Uuid::from_u128(0xCCCC_CCCC_CCCC_CCCC_CCCC_CCCC_CCCC_CCCC); + + let text_hits = vec![ + TextSearchHit { + subject_id: id_a, + score: 0.9_f64.into(), + rank: 1, + title: None, + snippet: None, + }, + TextSearchHit { + subject_id: id_b, + score: 0.5_f64.into(), + rank: 2, + title: None, + snippet: None, + }, + ]; + let vector_hits = vec![ + VectorSearchHit { + subject_id: id_c, + score: 0.95_f64.into(), + rank: 1, + }, + VectorSearchHit { + subject_id: id_a, + score: 0.3_f64.into(), + rank: 2, + }, + ]; + let memory_ids: HashSet = [id_a, id_b, id_c].into_iter().collect(); + + let cfg_rrf = RecallConfig { + fuse_strategy: RuntimeFusionStrategy::Rrf { k: 60 }, + ..RecallConfig::default() + }; + let rrf_results = fuse_candidates( + text_hits.clone(), + vector_hits.clone(), + &memory_ids, + &cfg_rrf, + 10, + ); + let rrf_order: Vec = rrf_results.iter().map(|h| h.entity_id).collect(); + + let cfg_weighted = RecallConfig { + fuse_strategy: RuntimeFusionStrategy::Weighted { + weights: vec![0.1, 0.9], + }, + ..RecallConfig::default() + }; + let weighted_results = fuse_candidates( + text_hits, + vector_hits, + &memory_ids, + &cfg_weighted, + 10, + ); + let weighted_order: Vec = weighted_results.iter().map(|h| h.entity_id).collect(); + + // RRF on this fixture: id_a in both sources gets highest combined rank score; + // id_c (vector rank 1) and id_b (text rank 2) tied around 0.0161-0.0164. + // Weighted [0.1, 0.9]: id_c dominates (0.95 * 0.9 = 0.855); id_a drops + // (0.9 * 0.1 + 0.3 * 0.9 = 0.36); id_b last (0.5 * 0.1 = 0.05). + // The orderings MUST differ — this is the discriminating assertion. + assert_ne!( + rrf_order, weighted_order, + "fusion_strategy change must affect ordering; RRF and Weighted produced identical: {rrf_order:?}" + ); + } + #[test] fn compute_score_default_config_reproduces_legacy() { let cfg = RecallConfig::default(); diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index f613506d..3b98fee2 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1009,49 +1009,73 @@ async fn test_recall_default_identity() { let rt = make_runtime(); let registry = make_registry(rt.clone()); - let note = registry - .dispatch( - "remember", - json!({ - "content": "the mitochondria is the powerhouse of the cell", - "importance": 0.8 - }), - ) - .await - .expect("remember succeeds"); - let note_id = note["note_id"].as_str().unwrap().to_string(); + // Create multiple memories so the identity comparison is meaningful + // (single-hit fixtures can't distinguish ordering changes). + for content in [ + "the mitochondria is the powerhouse of the cell", + "ribosomes synthesize proteins in the cell", + "the nucleus contains the cell's DNA", + "lysosomes digest cellular waste in the cell", + ] { + registry + .dispatch( + "remember", + json!({ "content": content, "importance": 0.8 }), + ) + .await + .expect("remember succeeds"); + } // Baseline recall with no knobs let base = registry - .dispatch("recall", json!({ "query": "mitochondria powerhouse cell" })) + .dispatch("recall", json!({ "query": "cell organelles" })) .await .expect("baseline recall succeeds"); let base_hits = base.as_array().expect("array"); assert!( - !base_hits.is_empty(), - "baseline must return at least one hit" + base_hits.len() >= 2, + "baseline must return at least two hits to make ordering meaningful, got {}", + base_hits.len() ); - // Same call with all knobs absent — must match baseline shape + // Same call with all three knobs explicitly set to null — must be byte-identical let knobless = registry .dispatch( "recall", - json!({ "query": "mitochondria powerhouse cell", "top_k": null }), + json!({ + "query": "cell organelles", + "top_k": null, + "fusion_strategy": null, + "score_floor": null, + }), ) .await - .expect("recall with null top_k succeeds"); + .expect("recall with all knobs null succeeds"); let knobless_hits = knobless.as_array().expect("array"); assert_eq!( base_hits.len(), knobless_hits.len(), - "null top_k must not change result count" - ); - assert_eq!( - base_hits[0]["note_id"].as_str().unwrap(), - note_id, - "top hit must be the memory we created" + "null knobs must not change result count" ); + + // Full ordering identity: each hit's note_id AND fused_score must match + // position-by-position. This catches a regression where a null knob silently + // shifts the ranking or rescaling. + for (i, (b, k)) in base_hits.iter().zip(knobless_hits.iter()).enumerate() { + assert_eq!( + b["note_id"].as_str(), + k["note_id"].as_str(), + "null knobs altered note_id at position {i}" + ); + // Scores must round-trip; allow tiny float jitter + let bs = b["score"].as_f64().unwrap_or(0.0); + let ks = k["score"].as_f64().unwrap_or(0.0); + assert!( + (bs - ks).abs() < 1e-9, + "null knobs altered score at position {i}: baseline={bs} knobless={ks}" + ); + } } #[tokio::test] From 78f0a6ebfaecc9d123ce3a4b5b241b3f546219ad Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:52:49 -0400 Subject: [PATCH 07/14] =?UTF-8?q?fix(embedding):=20address=20PR=20#407=20c?= =?UTF-8?q?odex=20findings=20=E2=80=94=20ADR=20amendment=20+=203=20data-sa?= =?UTF-8?q?fety=20fixes?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per Ocean direction (2026-05-25): amend ADR-043 to formalize the V16 string-tag schema design, claim V16 in ADR-015 ledger, and fix the data-safety bugs codex flagged. Defer sqlite-vec data preservation to a follow-up issue. ADR amendments: - ADR-043 §1.1 (vector store column addition): replaces the old FK-based description with the actual V16 design (TEXT embedding_model column with composite index). Includes rationale for TEXT vs BLOB FK (hot-path join cost, end-to-end shape consistency with kkernel/env-vars/registry). Documents sqlite-vec rebuild behavior and follow-up. - ADR-015 schema ledger: V16 row added with cluster-22 amendment notes. High 3 fix — atomic unknown-model validation (handlers.rs): - handle_remember now calls resolve_embedding_model(Some(name)) BEFORE create_note_with_decay_for_embedding_model. resolve_embedding_model is synchronous and doesn't load the model — it only checks registration. An unknown model is rejected before any note/FTS/vector row is written. High 2 fix — scoped delete across all model stores (operations.rs): - delete_note now iterates over registered_embedding_model_names() (new public method on KhiveRuntime) and deletes the note's vector from EVERY registered model's vector store. Previously only the default model's store was touched, leaving non-default vectors orphaned. Medium fix — KHIVE_ADDITIONAL_EMBEDDING_MODELS warning on bad names: - parse_embedding_model_list now logs tracing::warn for non-empty raw names that don't parse, instead of silently filtering them out. The function still returns a Vec (no startup failure on partial validity), but operator typos now surface at startup rather than as UnknownModel errors at request time. Deferred (follow-up issue, see ADR-043 §1.1 final paragraph): - High 1 — V16 backfill hard-codes 'all-minilm-l6-v2' for all regular vec_* tables, and sqlite-vec virtual tables are still dropped-and-rebuilt on schema mismatch (data loss for non-default deployments). A copy-with- default rebuild path is tracked separately because it requires a careful multi-step migration with vec0 INSERT INTO SELECT FROM and a verification step. Operators are warned via ADR §1.1 to back up before upgrading. Co-Authored-By: Claude Opus 4.7 --- crates/khive-pack-memory/src/handlers.rs | 8 +++ crates/khive-runtime/src/operations.rs | 11 ++-- crates/khive-runtime/src/runtime.rs | 26 ++++++++- docs/adr/ADR-015-schema-migrations.md | 9 ++- docs/adr/ADR-043-embedding-model-migration.md | 57 ++++++++++++++----- 5 files changed, 92 insertions(+), 19 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 8d0dbceb..28d7fb39 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -426,6 +426,14 @@ impl MemoryPack { } } + // Codex High 3 (PR #407): validate embedding_model BEFORE any note/FTS + // write so unknown-model errors are atomic (no half-written rows). + // resolve_embedding_model is sync and does not trigger model load — it + // only checks the registry contains the name. + if let Some(model_name) = p.embedding_model.as_deref() { + self.runtime.resolve_embedding_model(Some(model_name))?; + } + let note = self .runtime .create_note_with_decay_for_embedding_model( diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 9a6add6a..18016754 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -1389,8 +1389,11 @@ impl KhiveRuntime { self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; - if self.config().embedding_model.is_some() { - self.vectors(token)?.delete(id).await?; + // Codex High 2 (PR #407): scoped delete — iterate over EVERY + // registered embedding model's vector store so non-default vectors + // don't orphan when the note is deleted. + for model_name in self.registered_embedding_model_names() { + self.vectors_for_model(token, &model_name)?.delete(id).await?; } } @@ -1400,8 +1403,8 @@ impl KhiveRuntime { self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; - if self.config().embedding_model.is_some() { - self.vectors(token)?.delete(id).await?; + for model_name in self.registered_embedding_model_names() { + self.vectors_for_model(token, &model_name)?.delete(id).await?; } } if deleted { diff --git a/crates/khive-runtime/src/runtime.rs b/crates/khive-runtime/src/runtime.rs index 27b14eb9..3f054574 100644 --- a/crates/khive-runtime/src/runtime.rs +++ b/crates/khive-runtime/src/runtime.rs @@ -456,6 +456,15 @@ impl KhiveRuntime { } } + /// Names of all registered embedding models in this runtime. + /// + /// Useful for operations that must touch every model's storage (e.g., + /// scoped vector deletion on note delete — codex High 2 (PR #407)). + /// The default model is included. + pub fn registered_embedding_model_names(&self) -> Vec { + self.embedders.keys().cloned().collect() + } + /// Get the lazily-initialized embedding service for the named model. /// /// Returns a `CachedEmbeddingService` wrapping a `NativeEmbeddingService`. @@ -541,7 +550,22 @@ fn register_configured_embedding_models( fn parse_embedding_model_list(s: &str) -> Vec { parse_pack_list(s) .into_iter() - .filter_map(|raw| parse_embedding_model_alias(&raw)) + .filter_map(|raw| { + let parsed = parse_embedding_model_alias(&raw); + if parsed.is_none() && !raw.trim().is_empty() { + // Codex Medium (PR #407): silent filter_map masks operator typos. Warn loudly + // so misconfiguration surfaces at startup rather than as an UnknownModel error + // at request time. We do not fail startup — a partially valid list still + // produces a functional runtime — but the warning is unambiguous. + tracing::warn!( + model = %raw, + "KHIVE_ADDITIONAL_EMBEDDING_MODELS contains unknown model name; ignored. \ + Valid forms: short alias like 'paraphrase' or a fully-qualified key \ + from lattice_embed::EmbeddingModel::from_str." + ); + } + parsed + }) .collect() } diff --git a/docs/adr/ADR-015-schema-migrations.md b/docs/adr/ADR-015-schema-migrations.md index a08ef9fc..ec5264a2 100644 --- a/docs/adr/ADR-015-schema-migrations.md +++ b/docs/adr/ADR-015-schema-migrations.md @@ -45,6 +45,7 @@ The canonical ledger of database schema migration versions. Migration versions a | V13 | c06/ADR-041 | event_observability_provenance | shipped | | V14 | c20/ADR-043 | embedding_model_registry | shipped | | V15 | c22/ADR-046 | proposals_open | shipped | +| V16 | v022/ADR-043 | vector_embedding_model_tag | shipped | > **Amendment (2026-05-24, cluster-24 + post-integration)**: The ledger above reflects what > actually shipped on `integration/v1-adr-alignment` after parallel cluster landings c01, c03, @@ -56,7 +57,13 @@ The canonical ledger of database schema migration versions. Migration versions a > integration merge. c20 (embedding model registry per ADR-043) landed at V14 — the same ADR > the V6 reservation originally anticipated, hence V6 remains a no-op slot. c22 (proposals_open > projection per ADR-046) landed at V15. V6–V8 are no-op placeholder slots to maintain -> contiguity. Versions V1–V15 are production schema and are frozen. +> contiguity. +> +> **V16 amendment (2026-05-25, show v022-polish)**: V16 (`vector_embedding_model_tag`) adds +> a TEXT `embedding_model` column and composite index to regular `vec_*` tables, completing +> the dual-embedding plumbing described in ADR-043 §1. sqlite-vec virtual tables are handled +> at open time via schema rebuild because vec0 does not support `ALTER TABLE`. Versions V1–V16 +> are production schema and are frozen. > **Invariant**: ADR number order and migration version order are independent. Migration versions reflect schema ledger assignment order. A migration may only depend on schema created by earlier versions. diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index e668bdd1..88ffdba9 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -105,24 +105,55 @@ impossible — any attempt to insert a second `active` row for the same engine f the constraint. Migrations therefore execute as `BEGIN; UPDATE active→superseded; UPDATE pending→active; COMMIT;` — atomic by virtue of the index. -#### Vector store column addition +#### Vector store column addition (V16, ADR-015) -Each `vec_` table (ADR-031 D3) gains a column: +Each regular `vec_` table (ADR-031 D3) gains a TEXT model tag column. +This was formalized in migration V16: ```sql -ALTER TABLE vec_ ADD COLUMN embedding_model_id BLOB - REFERENCES _embedding_models(id); -CREATE INDEX idx_vec__model ON vec_(embedding_model_id); +ALTER TABLE vec_ ADD COLUMN embedding_model TEXT NOT NULL + DEFAULT 'all-minilm-l6-v2'; +CREATE INDEX idx_vec__subject_model + ON vec_(subject_id, embedding_model); ``` -Backfilled on the same migration: existing rows get the engine's current active -model's id. - -SQLite does not support `ALTER COLUMN ... SET NOT NULL`. The `embedding_model_id` -column is enforced via a `CHECK (embedding_model_id IS NOT NULL)` constraint added -through SQLite's standard table-rebuild pattern (create new table with constraint, -copy data, drop old, rename) — see ADR-015 for the migration template. This rebuild -is performed as the final step of the startup backfill described in §8 below. +The composite `(subject_id, embedding_model)` index supports the scoped recall +SQL: `WHERE subject_id = ? AND embedding_model = ?`. The default value at column +creation time was chosen so existing rows backfill to the legacy MiniLM model; +deployments using a non-default model **must** run the dedicated backfill worker +described in §8 before relying on model-scoped recall. + +**Design trade-off — TEXT vs BLOB FK.** ADR-043's first draft (pre-V16) specified +`embedding_model_id BLOB REFERENCES _embedding_models(id)`. V16 instead stores +the model_id directly as TEXT, joining against `_embedding_models.model_id` +when needed: + +- TEXT model_id is the natural primary key used everywhere else in the runtime + (kkernel engine list, `EmbeddingService::key_version()`, env var + `KHIVE_ADDITIONAL_EMBEDDING_MODELS`) — keeping the same shape end-to-end. +- BLOB FK would require a sub-select on every vector insert/search to resolve + the active model's UUID. The hot path is recall scoring; the join cost is + unjustified for a column whose values change only on registry events. +- Schema-level referential integrity is replaced by application-level + validation in the runtime registry: unknown model names are rejected at + `KhiveRuntime::embedder(name)` and at `RecallParams.embedding_model` + validation. + +The `_embedding_models` registry table (V14) still owns the authoritative model +metadata (dim, output_dim, status, key_version). V16's `embedding_model TEXT` +column is the foreign-key-by-value reference back to `_embedding_models.model_id`. + +**sqlite-vec virtual tables.** vec0 virtual tables cannot accept `ALTER TABLE +ADD COLUMN` because they declare their columns at `CREATE VIRTUAL TABLE` time. +V16 handles this via the open-time path in `khive-db/src/backend.rs`: when +opening a `vec_` table that lacks `embedding_model`, the runtime +rebuilds the virtual table with the new schema. **Existing rows are lost on +rebuild** — this is acceptable for deployments that have not yet enabled +dual-embedding because vectors will be re-embedded by the next backfill cycle, +but **operators must take a backup before upgrading any production deployment +with persisted non-default embeddings**. A follow-up migration (tracked in +ADR-043 §8.2) will implement a copy-with-default rebuild to preserve old +vectors with their inferred model tag. ### 2. Triggers — three sources, one event From daa3c6bffe66f286271620b4f395678a995132de Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:55:11 -0400 Subject: [PATCH 08/14] style: cargo fmt --all --- crates/khive-pack-memory/src/handlers.rs | 9 ++------- crates/khive-pack-memory/tests/integration.rs | 5 +---- crates/khive-runtime/src/operations.rs | 8 ++++++-- 3 files changed, 9 insertions(+), 13 deletions(-) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 28d7fb39..3f83b818 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -1014,13 +1014,8 @@ mod tests { }, ..RecallConfig::default() }; - let weighted_results = fuse_candidates( - text_hits, - vector_hits, - &memory_ids, - &cfg_weighted, - 10, - ); + let weighted_results = + fuse_candidates(text_hits, vector_hits, &memory_ids, &cfg_weighted, 10); let weighted_order: Vec = weighted_results.iter().map(|h| h.entity_id).collect(); // RRF on this fixture: id_a in both sources gets highest combined rank score; diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 3b98fee2..59f9f9b3 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1018,10 +1018,7 @@ async fn test_recall_default_identity() { "lysosomes digest cellular waste in the cell", ] { registry - .dispatch( - "remember", - json!({ "content": content, "importance": 0.8 }), - ) + .dispatch("remember", json!({ "content": content, "importance": 0.8 })) .await .expect("remember succeeds"); } diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 18016754..3b02a8e4 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -1393,7 +1393,9 @@ impl KhiveRuntime { // registered embedding model's vector store so non-default vectors // don't orphan when the note is deleted. for model_name in self.registered_embedding_model_names() { - self.vectors_for_model(token, &model_name)?.delete(id).await?; + self.vectors_for_model(token, &model_name)? + .delete(id) + .await?; } } @@ -1404,7 +1406,9 @@ impl KhiveRuntime { .delete_document(&ns_str, id) .await?; for model_name in self.registered_embedding_model_names() { - self.vectors_for_model(token, &model_name)?.delete(id).await?; + self.vectors_for_model(token, &model_name)? + .delete(id) + .await?; } } if deleted { From f1889559c9b1ad6ec6d4236e201f9d2a4365e59d Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:55:40 -0400 Subject: [PATCH 09/14] style: deno fmt ADR tables --- docs/adr/ADR-015-schema-migrations.md | 34 +++++++++++++-------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/docs/adr/ADR-015-schema-migrations.md b/docs/adr/ADR-015-schema-migrations.md index ec5264a2..7384f456 100644 --- a/docs/adr/ADR-015-schema-migrations.md +++ b/docs/adr/ADR-015-schema-migrations.md @@ -28,23 +28,23 @@ mechanism that: The canonical ledger of database schema migration versions. Migration versions are assigned in ledger order; they are NOT required to match ADR number order. -| Version | Owning ADR | Migration name | Status | -| ------: | ----------- | ------------------------------------------------- | ------- | -| V1 | (initial) | initial_schema | shipped | -| V2 | (initial) | add_name_to_notes | shipped | -| V3 | (initial) | add_events_namespace_created_index | shipped | -| V4 | (initial) | dedupe_graph_edge_triples | shipped | -| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | -| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | -| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | -| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | -| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | -| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | -| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | -| V12 | c04/ADR-019 | nullable_note_metrics | shipped | -| V13 | c06/ADR-041 | event_observability_provenance | shipped | -| V14 | c20/ADR-043 | embedding_model_registry | shipped | -| V15 | c22/ADR-046 | proposals_open | shipped | +| Version | Owning ADR | Migration name | Status | +| ------: | ------------ | ------------------------------------------------- | ------- | +| V1 | (initial) | initial_schema | shipped | +| V2 | (initial) | add_name_to_notes | shipped | +| V3 | (initial) | add_events_namespace_created_index | shipped | +| V4 | (initial) | dedupe_graph_edge_triples | shipped | +| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | +| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | +| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | +| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | +| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | +| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | +| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | +| V12 | c04/ADR-019 | nullable_note_metrics | shipped | +| V13 | c06/ADR-041 | event_observability_provenance | shipped | +| V14 | c20/ADR-043 | embedding_model_registry | shipped | +| V15 | c22/ADR-046 | proposals_open | shipped | | V16 | v022/ADR-043 | vector_embedding_model_tag | shipped | > **Amendment (2026-05-24, cluster-24 + post-integration)**: The ledger above reflects what From 7f959d09244e35e82adea9e139f2c218e178fad0 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 11:57:43 -0400 Subject: [PATCH 10/14] fix(test): use single-token query matching all fixture memories --- crates/khive-pack-memory/tests/integration.rs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 59f9f9b3..0beddc27 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1025,7 +1025,7 @@ async fn test_recall_default_identity() { // Baseline recall with no knobs let base = registry - .dispatch("recall", json!({ "query": "cell organelles" })) + .dispatch("recall", json!({ "query": "cell" })) .await .expect("baseline recall succeeds"); let base_hits = base.as_array().expect("array"); @@ -1040,7 +1040,7 @@ async fn test_recall_default_identity() { .dispatch( "recall", json!({ - "query": "cell organelles", + "query": "cell", "top_k": null, "fusion_strategy": null, "score_floor": null, From 55c1f4a20dda4003b355b7f4c3030a363703046f Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:19:56 -0400 Subject: [PATCH 11/14] =?UTF-8?q?fix(embedding):=20codex=20round=202=20?= =?UTF-8?q?=E2=80=94=20runtime-layer=20atomicity=20+=20ADR=20internal=20co?= =?UTF-8?q?nsistency?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two findings from codex round 2 on PR #407: 1. Medium: runtime-level unknown-model atomicity. handle_remember validated embedding_model before calling create_note_with_decay_for_embedding_model (round 1 fix), but the runtime API itself was permissive — direct Rust callers (other packs, integration tests) would still hit the write-after- failure bug. Fix: resolve_embedding_model is now called at the start of create_note_inner BEFORE any note/FTS/vector write. The pack-handler check remains as an earlier error boundary, but atomicity is enforced at the lowest layer that performs the write. 2. High: ADR-043 internal inconsistency. The §1.1 amendment landed in round 1, but the rest of the ADR still contained normative text for the rejected FK-based design: - §8 (Backward compat) described embedding_model_id BLOB column + CHECK rebuild - Alternatives table rejected per-record model_id (which is what V16 ships) - Migration version section claimed V5 + BLOB FK Fix: - §8 rewritten to describe V14 + V16 (registry + tag column) split that actually shipped - Alternatives table row strikes-through with "Superseded by V16 (2026-05-25)" and points back to §1.1 for rationale - Migration version section split into V14 (cluster-20) and V16 (v022-polish) with the actual SQL each migration runs Codex's other round-1 findings remain addressed: - Critical (ADR drift) — now resolved end-to-end across §1.1, §8, alternatives, and §Migration version - High 2 (scoped delete) — confirmed by round-2 review at operations.rs:1395, 1408 - Medium (env var warning) — confirmed at runtime.rs:549, 560 Verified: cargo test -p khive-runtime -p khive-pack-memory --lib passes (262 + 62 tests), cargo fmt clean, deno fmt clean. Co-Authored-By: Claude Opus 4.7 --- crates/khive-runtime/src/operations.rs | 8 ++ docs/adr/ADR-043-embedding-model-migration.md | 86 +++++++++++-------- 2 files changed, 59 insertions(+), 35 deletions(-) diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index 3b02a8e4..a3330b57 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -896,6 +896,14 @@ impl KhiveRuntime { } } + // Codex round 2 Medium (PR #407): resolve embedding_model BEFORE any + // note/FTS/vector write so unknown-model errors are atomic at the + // runtime layer, not just at one pack handler. Direct Rust callers + // (other packs, integration tests) get the same guarantee. + if let Some(model_name) = embedding_model { + self.resolve_embedding_model(Some(model_name))?; + } + let mut note = Note::new(ns, kind, content); if let Some(s) = salience { note = note.with_salience(s); diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index 88ffdba9..969e35e5 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -357,23 +357,35 @@ All four carry `engine_name` and the relevant `_embedding_models.id`(s) in paylo None carries `served_by_profile_id` — these are operator/system events, not profile-served (ADR-032 §3 rule). -### 8. Backward compatibility — one-shot startup migration - -Deployments predating this ADR have `vec_` tables without `embedding_model_id` -and no `_embedding_models` rows. On first startup post-ADR-043: - -1. Run the schema migration (creates `_embedding_models`, adds `embedding_model_id` - to `vec_` tables as nullable). -2. For each `[[engines]]` entry: derive `canonical_key` via lattice's - `EmbeddingKey::canonical_bytes()`, insert one `_embedding_models` row with - `status='active'`, `activated_at=now`, `created_at=now`. -3. Backfill all `vec_` rows with that engine's newly-inserted model id. -4. Tighten the `embedding_model_id` column by rebuilding the table with a - `CHECK (embedding_model_id IS NOT NULL)` constraint (SQLite table-rebuild pattern — - see §1 and ADR-015). This runs as run-once startup code after the SQL migration - completes, not as an additional SQL migration step. - -The startup migration emits one `EmbeddingModelChanged` event per engine with +### 8. Backward compatibility — one-shot startup migration (V14 + V16) + +Deployments predating this ADR have `vec_` tables without an +`embedding_model` column and no `_embedding_models` rows. The startup +migration runs in two steps, landed in two separate `VersionedMigration` +slots: + +**V14 — `embedding_model_registry`** (already shipped): + +1. `CREATE TABLE _embedding_models` (per §1 schema). +2. `CREATE UNIQUE INDEX idx_embed_models_one_active`. +3. `CREATE INDEX idx_embed_models_engine_status`. + +**V16 — `vector_embedding_model_tag`** (shipped in v022-polish): + +4. For each existing regular `vec_*` table (discovered at runtime, validated as + alphanumeric-suffix only): `ALTER TABLE vec_ ADD COLUMN embedding_model + TEXT NOT NULL DEFAULT 'all-minilm-l6-v2'`. +5. `CREATE INDEX idx_vec__subject_model ON vec_(subject_id, embedding_model)`. +6. sqlite-vec virtual tables (`vec0`) cannot accept `ALTER TABLE` — handled at + open time in `khive-db/src/backend.rs` by rebuilding the virtual table with + the new schema. See §1.1 final paragraph for the operator backup warning; + a preserving rebuild is the documented follow-up. + +Operator population of `_embedding_models` (steps for populating registry rows +from `[[engines]]` config and emitting `EmbeddingModelChanged` events) is a +separate startup-code path tracked in #385, not part of the SQL migrations. + +The startup population emits one `EmbeddingModelChanged` event per engine with `source_model_id = None` and `initiated_by = ConfigDiff` so the audit trail starts clean. @@ -429,14 +441,14 @@ Tracked in `.khive/plans/embedding-version-config.md`. ## Alternatives Considered -| Alternative | Why rejected | -| ---------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | -| Reimplement migration state machine in khive | Lattice ships it; duplication has no upside | -| Store model id on every record (`notes`, `entities`) row | Triple-write cost; the vector table is the right grain — only vectors are model-bound | -| Migrate vectors in place (rewrite same table) | Loses atomicity. Failure mid-migration leaves a half-rewritten table with no clean rollback | -| MCP verb `brain.migrate_model` for agent-triggered migrations | Crosses the brain-substrate boundary; risks the feedback loop described in Rationale | -| Auto-archive `superseded` rows after N days | Premature; an explicit `khive engine archive --before ` is enough | -| Per-record `model_id` on `vec_` instead of FK to `_embedding_models` | Denormalized; can't carry the supersession chain or `superseded_by` link | +| Alternative | Why rejected | +| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Reimplement migration state machine in khive | Lattice ships it; duplication has no upside | +| Store model id on every record (`notes`, `entities`) row | Triple-write cost; the vector table is the right grain — only vectors are model-bound | +| Migrate vectors in place (rewrite same table) | Loses atomicity. Failure mid-migration leaves a half-rewritten table with no clean rollback | +| MCP verb `brain.migrate_model` for agent-triggered migrations | Crosses the brain-substrate boundary; risks the feedback loop described in Rationale | +| Auto-archive `superseded` rows after N days | Premature; an explicit `khive engine archive --before ` is enough | +| ~~Per-record `model_id` on `vec_` instead of FK to `_embedding_models`~~ | **Superseded by V16 (2026-05-25)**: per-record `embedding_model TEXT` is what V16 actually ships. The supersession chain is preserved via `_embedding_models.superseded_by` joined on `model_id`. See §1.1 for the trade-off rationale (hot-path join cost, end-to-end consistency with kkernel/env-var) | ## Consequences @@ -505,19 +517,23 @@ payload. ### Migration version -A new `VersionedMigration` in `crates/khive-db/src/migrations.rs` with -`version = 5` (current latest is V4 — `dedupe_graph_edge_triples`): +The ADR-043 schema work landed in two ledger versions in +`crates/khive-db/src/migrations.rs`: + +**V14 — `embedding_model_registry`** (cluster-20): -1. `CREATE TABLE _embedding_models` (above) +1. `CREATE TABLE _embedding_models` (per §1) 2. `CREATE UNIQUE INDEX idx_embed_models_one_active` 3. `CREATE INDEX idx_embed_models_engine_status` -4. For each existing `vec_` table (discovered via the catalog): - - `ALTER TABLE vec_ ADD COLUMN embedding_model_id BLOB REFERENCES _embedding_models(id)` - - `CREATE INDEX idx_vec__model ON vec_(embedding_model_id)` -5. Startup backfill (run-once code, not a SQL migration): populate - `_embedding_models` from `[[engines]]`, backfill the FK column, then rebuild - `vec_` with a `CHECK (embedding_model_id IS NOT NULL)` constraint via - SQLite's table-rebuild pattern (ADR-015). + +**V16 — `vector_embedding_model_tag`** (v022-polish): + +4. For each existing regular `vec_*` table (runtime-discovered, name-validated): + - `ALTER TABLE vec_ ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2'` + - `CREATE INDEX idx_vec__subject_model ON vec_(subject_id, embedding_model)` +5. Startup backfill (run-once code, tracked separately in #385): populate + `_embedding_models` from `[[engines]]`; per-table model-inferred tag rewrite + for deployments with non-default models (deferred — see §1.1 final paragraph). ### Worker registration From 84441493df48aa3a4fa56ab50d13a34dfad0b2b7 Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:23:43 -0400 Subject: [PATCH 12/14] ci: force re-trigger From fd0b92d4191ce995738011107531b29a1b9bf8ce Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:27:14 -0400 Subject: [PATCH 13/14] ci: force re-trigger via doc whitespace touch --- docs/adr/ADR-043-embedding-model-migration.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index 969e35e5..b067f1bd 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -601,3 +601,4 @@ via `kkernel call`. - ADR-031 §D3 — `[[engines]]` schema, `vec_` table naming, `EngineConfig` - ADR-032 §3 — `EventKind` enum (extended here with four new variants) - ADR-033 §1 — `RecallConfig.fallback_during_migration` (added here) + From 4c404b598fa5994b43c75327c92cbf860c5d05ea Mon Sep 17 00:00:00 2001 From: OceanLi <122793010+ohdearquant@users.noreply.github.com> Date: Mon, 25 May 2026 13:27:39 -0400 Subject: [PATCH 14/14] style: deno fmt --- docs/adr/ADR-043-embedding-model-migration.md | 1 - 1 file changed, 1 deletion(-) diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index b067f1bd..969e35e5 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -601,4 +601,3 @@ via `kkernel call`. - ADR-031 §D3 — `[[engines]]` schema, `vec_` table naming, `EngineConfig` - ADR-032 §3 — `EventKind` enum (extended here with four new variants) - ADR-033 §1 — `RecallConfig.fallback_during_migration` (added here) -