diff --git a/crates/khive-db/src/backend.rs b/crates/khive-db/src/backend.rs index de2e5124..5653baae 100644 --- a/crates/khive-db/src/backend.rs +++ b/crates/khive-db/src/backend.rs @@ -235,13 +235,15 @@ impl StorageBackend { /// Get a VectorStore for a specific embedding model, scoped to the default namespace. /// /// Creates the vec0 virtual table if it does not already exist. The `model_key` - /// must contain only ASCII alphanumeric/underscore characters. + /// must contain only ASCII alphanumeric/underscore characters. The `embedding_model` + /// is the canonical display name stored in each vector row. pub fn vectors( &self, model_key: &str, + embedding_model: &str, dimensions: usize, ) -> Result, SqliteError> { - self.vectors_for_namespace(model_key, dimensions, "local") + self.vectors_for_namespace(model_key, embedding_model, dimensions, "local") } /// Get a VectorStore for a specific embedding model with a default namespace. @@ -251,9 +253,12 @@ impl StorageBackend { /// (count, delete, info). Access control is enforced at the runtime layer. /// /// The `model_key` must contain only ASCII alphanumeric/underscore characters. + /// The `embedding_model` is the canonical display name stored in the `embedding_model` + /// column of each vector row (e.g. `"all-minilm-l6-v2"`). pub fn vectors_for_namespace( &self, model_key: &str, + embedding_model: &str, dimensions: usize, namespace: &str, ) -> Result, SqliteError> { @@ -298,21 +303,24 @@ impl StorageBackend { .is_some(); if table_exists { - let has_field: bool = { + let (has_field, has_embedding_model) = { let pragma = format!("PRAGMA table_xinfo({})", table); let mut stmt = writer.conn().prepare(&pragma)?; let mut rows = stmt.query([])?; - let mut found = false; + let mut has_field = false; + let mut has_embedding_model = false; while let Some(row) = rows.next()? { let name: String = row.get(1)?; if name == "field" { - found = true; - break; + has_field = true; + } + if name == "embedding_model" { + has_embedding_model = true; } } - found + (has_field, has_embedding_model) }; - if !has_field { + if !has_field || !has_embedding_model { let drop_ddl = format!("DROP TABLE IF EXISTS {}", table); writer.conn().execute_batch(&drop_ddl)?; } @@ -332,19 +340,13 @@ impl StorageBackend { // Create the vec0 virtual table. Idempotent on fresh databases and after the // old-schema rebuild above. - // - // NOTE: `embedding_model_id` is NOT included in this DDL because sqlite-vec - // enforces NOT NULL on TEXT metadata columns at insert time, so the column - // cannot be added at virtual-table creation as a nullable FK. The column will - // be present after the ADR-043 §8 startup backfill rebuild (steps 2-4), which - // is deferred to a follow-up PR — see the tracking issue filed against MAJ-2 - // of codex round-1 review of PR #374. let ddl = format!( "CREATE VIRTUAL TABLE IF NOT EXISTS vec_{} USING vec0(\ subject_id TEXT PRIMARY KEY, \ namespace TEXT NOT NULL, \ kind TEXT NOT NULL, \ field TEXT NOT NULL, \ + embedding_model TEXT NOT NULL, \ embedding float[{}] distance_metric=cosine\ )", model_key, dimensions @@ -355,11 +357,54 @@ impl StorageBackend { Arc::clone(&self.pool), self.is_file_backed, model_key.to_string(), + embedding_model.to_string(), dimensions, namespace.trim().to_string(), )?)) } + /// Register an embedding model in the `_embedding_models` registry table (ADR-043). + /// + /// Idempotent: if a row with the same `canonical_key` already exists, updates its + /// status back to `'active'` without changing other fields. + pub fn register_embedding_model( + &self, + engine_name: &str, + model_id: &str, + key_version: &str, + dimensions: u32, + ) -> Result<(), SqliteError> { + let writer = self.pool.try_writer()?; + writer + .conn() + .execute_batch(crate::migrations::EMBEDDING_MODELS_DDL)?; + + let now = chrono::Utc::now().timestamp_micros(); + let canonical_key = + format!("{engine_name}:{model_id}:{key_version}:{dimensions}").into_bytes(); + let id = uuid::Uuid::new_v4(); + writer.conn().execute( + "INSERT INTO _embedding_models \ + (id, engine_name, model_id, key_version, dim, output_dim, status, \ + activated_at, superseded_at, superseded_by, canonical_key, created_at) \ + VALUES (?1, ?2, ?3, ?4, ?5, NULL, 'active', ?6, NULL, NULL, ?7, ?8) \ + ON CONFLICT(canonical_key) DO UPDATE SET \ + status = 'active', \ + activated_at = COALESCE(_embedding_models.activated_at, excluded.activated_at)", + rusqlite::params![ + id.as_bytes().as_slice(), + engine_name, + model_id, + key_version, + dimensions as i64, + now, + canonical_key, + now, + ], + )?; + Ok(()) + } + /// Get a SparseStore for a specific model key, scoped to the default namespace. /// /// Creates the sparse table if it does not already exist. @@ -599,7 +644,7 @@ mod tests { #[cfg(feature = "vectors")] async fn vectors_roundtrip_via_public_api() { let backend = StorageBackend::memory().unwrap(); - let store = backend.vectors("test_api", 3).unwrap(); + let store = backend.vectors("test_api", "test_api", 3).unwrap(); let id = uuid::Uuid::new_v4(); store @@ -619,6 +664,7 @@ mod tests { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) @@ -635,8 +681,8 @@ mod tests { async fn vectors_creates_table_idempotently() { let backend = StorageBackend::memory().unwrap(); - let store1 = backend.vectors("idempotent", 3).unwrap(); - let store2 = backend.vectors("idempotent", 3).unwrap(); + let store1 = backend.vectors("idempotent", "idempotent", 3).unwrap(); + let store2 = backend.vectors("idempotent", "idempotent", 3).unwrap(); let id = uuid::Uuid::new_v4(); store1 @@ -724,8 +770,8 @@ mod tests { #[test] fn invalid_model_key_rejected() { let backend = StorageBackend::memory().unwrap(); - assert!(backend.vectors("bad key!", 3).is_err()); - assert!(backend.vectors("", 3).is_err()); + assert!(backend.vectors("bad key!", "bad key!", 3).is_err()); + assert!(backend.vectors("", "", 3).is_err()); } #[test] diff --git a/crates/khive-db/src/lib.rs b/crates/khive-db/src/lib.rs index e4a8b0bc..2a832372 100644 --- a/crates/khive-db/src/lib.rs +++ b/crates/khive-db/src/lib.rs @@ -9,7 +9,8 @@ pub mod stores; pub use backend::StorageBackend; pub use error::SqliteError; pub use migrations::{ - run_migrations, Migration, ServiceSchemaPlan, VersionedMigration, MIGRATIONS, + query_embedding_models, run_migrations, EmbeddingModelRegistryRecord, Migration, + ServiceSchemaPlan, VersionedMigration, MIGRATIONS, }; pub use pool::{ConnectionPool, PoolConfig, ReaderGuard, WriterGuard}; pub use sql_bridge::SqlBridge; diff --git a/crates/khive-db/src/migrations.rs b/crates/khive-db/src/migrations.rs index 7d727289..83b7b282 100644 --- a/crates/khive-db/src/migrations.rs +++ b/crates/khive-db/src/migrations.rs @@ -371,6 +371,15 @@ pub const EMBEDDING_MODELS_DDL: &str = "\ /// step for any table that already has the column. const V14_EMBEDDING_MODEL_REGISTRY: &str = "__v14_computed_at_runtime__"; +/// V16: Add `embedding_model` column and composite index to regular `vec_` tables. +/// +/// This migration is computed at runtime via `build_v16_vector_embedding_model_tag_sql` +/// to discover existing regular (non-virtual) `vec_` tables and add the column where +/// absent. sqlite-vec virtual tables (`vec0`) are handled at open time by the +/// `vectors_for_namespace` old-schema detection which drops and recreates tables +/// missing `embedding_model`. +const V16_VECTOR_EMBEDDING_MODEL_TAG: &str = "__v16_computed_at_runtime__"; + /// V15: proposals_open projection table (ADR-046). /// /// Maintains a fold-derived view of the four proposal EventKinds so that @@ -485,6 +494,12 @@ pub const MIGRATIONS: &[VersionedMigration] = &[ name: "proposals_open", up: V15_PROPOSALS_OPEN, }, + // V16: tag vector rows with embedding_model column (ADR-043 §8, dual-embedding). + VersionedMigration { + version: 16, + name: "vector_embedding_model_tag", + up: V16_VECTOR_EMBEDDING_MODEL_TAG, + }, ]; const MIGRATION_TRACKING_TABLE: &str = "\ @@ -701,6 +716,11 @@ pub fn run_migrations(conn: &mut Connection) -> Result { version: migration.version, error: e.to_string(), })? + } else if migration.version == 16 { + build_v16_vector_embedding_model_tag_sql(&tx).map_err(|e| SqliteError::Migration { + version: migration.version, + error: e.to_string(), + })? } else { migration.up.to_string() }; @@ -876,6 +896,129 @@ fn build_v14_embedding_model_registry_sql(conn: &Connection) -> Result Result { + let mut stmt = conn.prepare( + "SELECT name FROM sqlite_master \ + WHERE type = 'table' \ + AND name LIKE 'vec_%' \ + AND sql NOT LIKE '%VIRTUAL%' \ + AND sql NOT LIKE '%vec0%' \ + AND name NOT LIKE '%\\_chunks' ESCAPE '\\' \ + AND name NOT LIKE '%\\_rowids' ESCAPE '\\' \ + AND name NOT LIKE '%\\_info' ESCAPE '\\' \ + AND name NOT LIKE '%\\_vector\\_chunks%' ESCAPE '\\'", + )?; + let vec_tables: Vec = stmt + .query_map([], |row| row.get(0))? + .filter_map(|r| r.ok()) + .collect(); + + let mut sql = String::new(); + for table in vec_tables { + let valid = table.starts_with("vec_") + && table[4..] + .chars() + .all(|c| c.is_ascii_alphanumeric() || c == '_'); + if !valid { + continue; + } + let col_exists: bool = conn + .query_row( + "SELECT COUNT(*) > 0 FROM pragma_table_info(?1) WHERE name = 'embedding_model'", + rusqlite::params![&table], + |row| row.get(0), + ) + .unwrap_or(false); + if col_exists { + continue; + } + sql.push_str(&format!( + "ALTER TABLE {t} ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2';\ + CREATE INDEX IF NOT EXISTS idx_{t}_subject_model ON {t}(subject_id, embedding_model);", + t = table, + )); + } + if sql.is_empty() { + sql.push_str("SELECT 1;"); + } + Ok(sql) +} + +/// A record from the `_embedding_models` registry table. +#[derive(Clone, Debug)] +pub struct EmbeddingModelRegistryRecord { + pub engine_name: String, + pub model_id: String, + pub key_version: String, + pub dimensions: u32, + pub status: String, + pub activated_at: Option, + pub superseded_at: Option, +} + +/// Query the `_embedding_models` registry. +/// +/// Opens the database at `db` (defaults to `~/.khive/khive-graph.db`) and +/// returns all registry rows, optionally filtered by `engine_name`. +/// Returns an empty vec if the database or table does not exist. +pub fn query_embedding_models( + db: Option<&std::path::Path>, + engine_filter: Option<&str>, +) -> Result, SqliteError> { + let path = db.map(std::path::Path::to_path_buf).unwrap_or_else(|| { + std::env::var("HOME") + .map(std::path::PathBuf::from) + .unwrap_or_else(|_| std::path::PathBuf::from(".")) + .join(".khive/khive-graph.db") + }); + if !path.exists() { + return Ok(Vec::new()); + } + + let conn = Connection::open(path)?; + let exists: bool = conn.query_row( + "SELECT COUNT(*) > 0 FROM sqlite_master \ + WHERE type='table' AND name='_embedding_models'", + [], + |row| row.get(0), + )?; + if !exists { + return Ok(Vec::new()); + } + + let sql = if engine_filter.is_some() { + "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \ + FROM _embedding_models WHERE engine_name = ?1 \ + ORDER BY engine_name, activated_at IS NULL, activated_at" + } else { + "SELECT engine_name, model_id, key_version, dim, status, activated_at, superseded_at \ + FROM _embedding_models \ + ORDER BY engine_name, activated_at IS NULL, activated_at" + }; + let mut stmt = conn.prepare(sql)?; + let map_row = |row: &rusqlite::Row<'_>| { + Ok(EmbeddingModelRegistryRecord { + engine_name: row.get(0)?, + model_id: row.get(1)?, + key_version: row.get(2)?, + dimensions: row.get::<_, i64>(3)? as u32, + status: row.get(4)?, + activated_at: row.get(5)?, + superseded_at: row.get(6)?, + }) + }; + + if let Some(engine) = engine_filter { + stmt.query_map([engine], map_row)? + .collect::, _>>() + .map_err(Into::into) + } else { + stmt.query_map([], map_row)? + .collect::, _>>() + .map_err(Into::into) + } +} + // ============================================================================= // Tests // ============================================================================= @@ -892,17 +1035,17 @@ mod tests { fn fresh_db_migrates_to_latest() { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); - // Verify the tracking table has rows for V1 through V15. + // Verify the tracking table has rows for V1 through V16. let count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(count, 15); + assert_eq!(count, 16); // Verify the entities table was created. let tbl_count: i64 = conn @@ -1083,16 +1226,16 @@ mod tests { let mut conn = open_memory(); let v1 = run_migrations(&mut conn).expect("first run"); let v2 = run_migrations(&mut conn).expect("second run"); - assert_eq!(v1, 15); - assert_eq!(v2, 15); + assert_eq!(v1, 16); + assert_eq!(v2, 16); - // Should still have exactly fifteen rows in the tracking table (V1..V15). + // Should still have exactly sixteen rows in the tracking table (V1..V16). let count: i64 = conn .query_row("SELECT COUNT(*) FROM _schema_migrations", [], |row| { row.get(0) }) .unwrap(); - assert_eq!(count, 15); + assert_eq!(count, 16); } // F052 (CRIT): V9 migration must add target_backend column + partial index on graph_edges. @@ -1102,8 +1245,8 @@ mod tests { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); assert_eq!( - version, 15, - "F052: latest migration must be V15 (proposals_open)" + version, 16, + "F052: latest migration must be V16 (vector_embedding_model_tag)" ); let col: i64 = conn .query_row( @@ -1131,40 +1274,43 @@ mod tests { #[test] fn failed_migration_rolls_back() { - let bad_v16 = VersionedMigration { - version: 16, + let bad_v17 = VersionedMigration { + version: 17, name: "bad_migration", up: "THIS IS NOT VALID SQL;", }; let mut conn = open_memory(); - // Apply all real migrations (V1..V15) so the DB is at V15. - run_migrations(&mut conn).expect("V1..V15 should apply cleanly"); + // Apply all real migrations (V1..V16) so the DB is at V16. + run_migrations(&mut conn).expect("V1..V16 should apply cleanly"); - // Now manually drive the bad V16 migration to check rollback behaviour. - let result = apply_single_migration(&mut conn, &bad_v16); + // Now manually drive the bad V17 migration to check rollback behaviour. + let result = apply_single_migration(&mut conn, &bad_v17); assert!(result.is_err(), "bad migration should return error"); - // DB should still be at V15 — no V16 row in tracking. - let v16_count: i64 = conn + // DB should still be at V16 — no V17 row in tracking. + let v17_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version = 16", + "SELECT COUNT(*) FROM _schema_migrations WHERE version = 17", [], |row| row.get(0), ) .unwrap(); - assert_eq!(v16_count, 0, "V16 must not be recorded after rollback"); + assert_eq!(v17_count, 0, "V17 must not be recorded after rollback"); - // V1..V15 should still be there. + // V1..V16 should still be there. let applied_count: i64 = conn .query_row( - "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15)", + "SELECT COUNT(*) FROM _schema_migrations WHERE version IN (1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16)", [], |row| row.get(0), ) .unwrap(); - assert_eq!(applied_count, 15, "V1..V15 must still be recorded"); + assert_eq!( + applied_count, 16, + "V1..V16 must still be recorded after V17 rollback" + ); } #[test] @@ -1198,9 +1344,10 @@ mod tests { // V12 should detect that salience is already nullable and skip; // V13 adds event observability columns and event_observations table; // V14 creates the _embedding_models registry table; - // V15 creates the proposals_open table. + // V15 creates the proposals_open table; + // V16 adds embedding_model column to regular vec_ tables. let version = run_migrations(&mut conn).expect("migrations after store DDL"); - assert_eq!(version, 15); + assert_eq!(version, 16); // V2 should be recorded as applied (skipped but tracked). let v2_count: i64 = conn @@ -1390,9 +1537,9 @@ mod tests { ) .unwrap(); - // Run V2-V15 migrations. + // Run V2-V16 migrations. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); // After V12, salience must be nullable (notnull=0). let notnull: i64 = conn @@ -1436,7 +1583,7 @@ mod tests { ensure_events_schema(&conn).expect("store DDL should create events"); let version = run_migrations(&mut conn).expect("migrations after events store DDL"); - assert_eq!(version, 15, "must reach V15 even when events DDL ran first"); + assert_eq!(version, 16, "must reach V16 even when events DDL ran first"); let v13_count: i64 = conn .query_row( @@ -1477,8 +1624,8 @@ mod tests { let mut conn = open_memory(); let version = run_migrations(&mut conn).expect("migrations should succeed"); assert_eq!( - version, 15, - "F227: latest migration must be V15 (proposals_open)" + version, 16, + "F227: latest migration must be V16 (vector_embedding_model_tag)" ); // Verify _embedding_models table exists. @@ -1575,7 +1722,7 @@ mod tests { // Run the full migration suite — V14 should add embedding_model_id to the // regular vec_legacy_model table. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); // The embedding_model_id column must now exist. let col_exists: bool = conn @@ -1592,7 +1739,7 @@ mod tests { // Running migrations again must be idempotent (column already present). let version2 = run_migrations(&mut conn).expect("second run must succeed"); - assert_eq!(version2, 15); + assert_eq!(version2, 16); } /// CRIT-2 regression: V14 discovery filter must NOT match sqlite-vec internal @@ -1624,7 +1771,7 @@ mod tests { // Run the full migration suite — V14 must not add `embedding_model_id` to // any of the four shadow tables above. let version = run_migrations(&mut conn).expect("migrations should succeed"); - assert_eq!(version, 15); + assert_eq!(version, 16); for shadow in [ "vec_test_chunks", diff --git a/crates/khive-db/src/stores/vectors.rs b/crates/khive-db/src/stores/vectors.rs index 3fa06de5..3b4754eb 100644 --- a/crates/khive-db/src/stores/vectors.rs +++ b/crates/khive-db/src/stores/vectors.rs @@ -86,6 +86,7 @@ pub struct SqliteVecStore { pool: Arc, is_file_backed: bool, model_key: String, + embedding_model: String, dimensions: usize, table_name: String, namespace: String, @@ -99,6 +100,7 @@ impl SqliteVecStore { pool: Arc, is_file_backed: bool, model_key: String, + embedding_model: String, dimensions: usize, namespace: String, ) -> Result { @@ -108,6 +110,7 @@ impl SqliteVecStore { pool, is_file_backed, model_key, + embedding_model, dimensions, table_name, namespace, @@ -200,6 +203,7 @@ impl VectorStore for SqliteVecStore { let namespace = namespace.to_string(); let field = field.to_string(); let kind_str = kind.to_string(); + let embedding_model = self.embedding_model.clone(); if embedding.len() == dims { if let Some(idx) = non_finite_index(&embedding) { @@ -226,13 +230,21 @@ impl VectorStore for SqliteVecStore { )?; let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding_model, embedding) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", table ); let blob = f32_slice_as_bytes(&embedding); conn.execute( &ins_sql, - rusqlite::params![subject_id.to_string(), &namespace, &kind_str, &field, blob], + rusqlite::params![ + subject_id.to_string(), + &namespace, + &kind_str, + &field, + &embedding_model, + blob + ], )?; Ok(()) }) @@ -246,6 +258,7 @@ impl VectorStore for SqliteVecStore { let table = self.table_name.clone(); let dims = self.dimensions; let attempted = records.len() as u64; + let store_embedding_model = self.embedding_model.clone(); self.with_writer("vec_insert_batch", move |conn| { let del_sql = format!( @@ -253,7 +266,8 @@ impl VectorStore for SqliteVecStore { table ); let ins_sql = format!( - "INSERT INTO {} (subject_id, namespace, kind, field, embedding) VALUES (?1, ?2, ?3, ?4, ?5)", + "INSERT INTO {} (subject_id, namespace, kind, field, embedding_model, embedding) \ + VALUES (?1, ?2, ?3, ?4, ?5, ?6)", table ); @@ -282,7 +296,14 @@ impl VectorStore for SqliteVecStore { let _ = conn.execute(&del_sql, rusqlite::params![&id_str, &record.namespace]); match conn.execute( &ins_sql, - rusqlite::params![&id_str, &record.namespace, &kind_str, &record.field, blob], + rusqlite::params![ + &id_str, + &record.namespace, + &kind_str, + &record.field, + &store_embedding_model, + blob + ], ) { Ok(_) => affected += 1, Err(_) => failed += 1, @@ -358,6 +379,11 @@ impl VectorStore for SqliteVecStore { .clone() .unwrap_or_else(|| self.namespace.clone()); let kind_filter = request.kind.map(|k| k.to_string()); + // Use the request's embedding_model filter, or fall back to this store's model. + let effective_model = request + .embedding_model + .clone() + .unwrap_or_else(|| self.embedding_model.clone()); if query_embedding.len() == dims { if let Some(idx) = non_finite_index(&query_embedding) { @@ -377,10 +403,10 @@ impl VectorStore for SqliteVecStore { )); } - // Restrict candidate set to namespace (and optionally kind) via subquery, - // then MATCH-rank by embedding distance. + // Restrict candidate set to namespace+embedding_model (and optionally kind) + // via subquery, then MATCH-rank by embedding distance. let subquery_kind_clause = if kind_filter.is_some() { - "AND kind = ?4" + "AND kind = ?5" } else { "" }; @@ -389,7 +415,8 @@ impl VectorStore for SqliteVecStore { FROM {t} \ WHERE embedding MATCH ?1 \ AND subject_id IN (\ - SELECT subject_id FROM {t} WHERE namespace = ?3 {kind_clause}\ + SELECT subject_id FROM {t} \ + WHERE namespace = ?3 AND embedding_model = ?4 {kind_clause}\ ) \ ORDER BY distance \ LIMIT ?2", @@ -405,7 +432,13 @@ impl VectorStore for SqliteVecStore { let raw_rows: Vec> = if let Some(ref kind_str) = kind_filter { stmt.query_map( - rusqlite::params![query_blob, request.top_k, &namespace, kind_str], + rusqlite::params![ + query_blob, + request.top_k, + &namespace, + &effective_model, + kind_str + ], |row| { let id_str: String = row.get(0)?; let distance: f64 = row.get(1)?; @@ -415,7 +448,7 @@ impl VectorStore for SqliteVecStore { .collect() } else { stmt.query_map( - rusqlite::params![query_blob, request.top_k, &namespace], + rusqlite::params![query_blob, request.top_k, &namespace, &effective_model], |row| { let id_str: String = row.get(0)?; let distance: f64 = row.get(1)?; @@ -531,6 +564,7 @@ impl SqliteVecStore { let table = self.table_name.clone(); let namespace = self.namespace.clone(); + let embedding_model = self.embedding_model.clone(); let query_vec = query_embedding.to_vec(); let ids: Vec = candidate_ids.iter().map(|id| id.to_string()).collect(); @@ -542,22 +576,24 @@ impl SqliteVecStore { let placeholders: String = chunk .iter() .enumerate() - .map(|(i, _)| format!("?{}", i + 3)) + .map(|(i, _)| format!("?{}", i + 4)) .collect::>() .join(", "); let sql = format!( "SELECT e.subject_id, vec_distance_cosine(e.embedding, ?1) as distance \ FROM {} e \ - WHERE e.namespace = ?2 AND e.subject_id IN ({})", + WHERE e.namespace = ?2 AND e.embedding_model = ?3 \ + AND e.subject_id IN ({})", table, placeholders ); let mut stmt = conn.prepare(&sql)?; stmt.raw_bind_parameter(1, query_blob)?; stmt.raw_bind_parameter(2, namespace.as_str())?; + stmt.raw_bind_parameter(3, embedding_model.as_str())?; for (i, id_str) in chunk.iter().enumerate() { - stmt.raw_bind_parameter(i + 3, id_str.as_str())?; + stmt.raw_bind_parameter(i + 4, id_str.as_str())?; } let mut rows = stmt.raw_query(); @@ -612,6 +648,7 @@ mod capabilities_tests { make_pool(), /*is_file_backed=*/ false, "test_model".into(), + "test_model".into(), /*dimensions=*/ 4, "ns:test".into(), ) @@ -657,6 +694,7 @@ mod capabilities_tests { make_pool(), false, "test_dim_limit".into(), + "test_dim_limit".into(), /*dimensions=*/ 4, "ns:test".into(), ) @@ -684,6 +722,7 @@ mod capabilities_tests { make_pool(), false, "test_idempotent".into(), + "test_idempotent".into(), 4, "ns:test".into(), ) diff --git a/crates/khive-db/tests/contract/backend.rs b/crates/khive-db/tests/contract/backend.rs index bbe296f5..34129257 100644 --- a/crates/khive-db/tests/contract/backend.rs +++ b/crates/khive-db/tests/contract/backend.rs @@ -337,7 +337,7 @@ mod vector_contract { async fn test_vector_store(backend: &StorageBackend) { let store = backend - .vectors_for_namespace("ct_model", 4, "ct_ns") + .vectors_for_namespace("ct_model", "ct_model", 4, "ct_ns") .expect("vector store"); let id = Uuid::new_v4(); @@ -361,6 +361,7 @@ mod vector_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-db/tests/contract/vector_filter.rs b/crates/khive-db/tests/contract/vector_filter.rs index 5b37ed35..e9be4745 100644 --- a/crates/khive-db/tests/contract/vector_filter.rs +++ b/crates/khive-db/tests/contract/vector_filter.rs @@ -18,7 +18,9 @@ mod vector_filter_contract { #[tokio::test] async fn search_with_non_empty_filter_returns_unsupported() { let backend = StorageBackend::memory().expect("in-memory backend"); - let store = backend.vectors("filter_test", 3).expect("vector store"); + let store = backend + .vectors("filter_test", "filter_test", 3) + .expect("vector store"); // Insert one record so the table is non-empty. let id = Uuid::new_v4(); @@ -39,6 +41,7 @@ mod vector_filter_contract { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: Some(VectorMetadataFilter { namespaces: vec!["local".into()], kinds: vec![], @@ -64,7 +67,9 @@ mod vector_filter_contract { #[tokio::test] async fn search_with_filter_empty_delegates_and_non_empty_rejects() { let backend = StorageBackend::memory().expect("in-memory backend"); - let store = backend.vectors("filter_delegate", 3).expect("vector store"); + let store = backend + .vectors("filter_delegate", "filter_delegate", 3) + .expect("vector store"); let id = Uuid::new_v4(); store @@ -83,6 +88,7 @@ mod vector_filter_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -154,7 +160,7 @@ mod vector_filter_contract { // the old schema and rebuild the table transparently. let new_backend = StorageBackend::sqlite(&db_path).expect("reopen db"); let store = new_backend - .vectors_for_namespace("old_model", 3, "local") + .vectors_for_namespace("old_model", "old_model", 3, "local") .expect("vectors_for_namespace must succeed after schema rebuild"); // Step 3: insert and search in the new shape must work. @@ -176,6 +182,7 @@ mod vector_filter_contract { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-pack-memory/src/handlers.rs b/crates/khive-pack-memory/src/handlers.rs index 8346977f..3f83b818 100644 --- a/crates/khive-pack-memory/src/handlers.rs +++ b/crates/khive-pack-memory/src/handlers.rs @@ -61,6 +61,8 @@ struct RememberParams { #[serde(alias = "source")] source_id: Option, tags: Option>, + #[serde(default)] + embedding_model: Option, } #[derive(Deserialize)] @@ -74,6 +76,8 @@ struct RecallParams { top_k: Option, fusion_strategy: Option, score_floor: Option, + #[serde(default)] + embedding_model: Option, } impl RecallParams { @@ -279,6 +283,7 @@ impl MemoryPack { query: &str, token: &NamespaceToken, candidate_limit: u32, + embedding_model: Option<&str>, ) -> Result { let ns = token.namespace().as_str().to_string(); // F111: restrict text candidates to Note substrate kind so entity records @@ -299,23 +304,28 @@ impl MemoryPack { }) .await?; - let vector_hits = if self.runtime.config().embedding_model.is_some() { - let vec = self.runtime.embed(query).await?; - self.runtime - .vectors(token)? - .search(VectorSearchRequest { - query_vectors: vec![vec], - top_k: candidate_limit, - namespace: Some(ns.clone()), - // F111: already restricts to Note substrate kind - kind: Some(SubstrateKind::Note), - filter: None, - backend_hints: None, - }) - .await? - } else { - Vec::new() - }; + let vector_hits = + if self.runtime.config().embedding_model.is_some() || embedding_model.is_some() { + let model_name: String = embedding_model + .map(|m| m.to_string()) + .unwrap_or_else(|| self.runtime.default_embedder_name().to_string()); + let vec = self.runtime.embed_with_model(&model_name, query).await?; + self.runtime + .vectors_for_model(token, &model_name)? + .search(VectorSearchRequest { + query_vectors: vec![vec], + top_k: candidate_limit, + namespace: Some(ns.clone()), + // F111: already restricts to Note substrate kind + kind: Some(SubstrateKind::Note), + embedding_model: Some(model_name), + filter: None, + backend_hints: None, + }) + .await? + } else { + Vec::new() + }; Ok(RecallCandidateSet { namespace: ns, @@ -416,9 +426,17 @@ impl MemoryPack { } } + // Codex High 3 (PR #407): validate embedding_model BEFORE any note/FTS + // write so unknown-model errors are atomic (no half-written rows). + // resolve_embedding_model is sync and does not trigger model load — it + // only checks the registry contains the name. + if let Some(model_name) = p.embedding_model.as_deref() { + self.runtime.resolve_embedding_model(Some(model_name))?; + } + let note = self .runtime - .create_note_with_decay( + .create_note_with_decay_for_embedding_model( token, "memory", None, @@ -427,6 +445,7 @@ impl MemoryPack { decay_factor, Some(props), annotates, + p.embedding_model.as_deref(), ) .await?; @@ -483,7 +502,12 @@ impl MemoryPack { }; let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let (memory_ids, mut notes_by_id) = self .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) @@ -604,7 +628,12 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let text_candidates: Vec = candidates @@ -657,7 +686,12 @@ impl MemoryPack { let limit = p.limit.unwrap_or(10).min(100); let candidate_limit = recall_candidate_count(&cfg, limit); let candidates = self - .collect_recall_candidates(&p.query, token, candidate_limit) + .collect_recall_candidates( + &p.query, + token, + candidate_limit, + p.embedding_model.as_deref(), + ) .await?; let (memory_ids, notes_by_id) = self .load_memory_candidate_notes(token, &candidates.text_hits, &candidates.vector_hits) @@ -811,6 +845,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.70).abs() < 1e-12); @@ -830,6 +865,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.min_score - 0.5).abs() < 1e-12); @@ -851,6 +887,7 @@ mod tests { top_k: None, fusion_strategy: None, score_floor: None, + embedding_model: None, }; let cfg = p.effective_config(RecallConfig::default()); assert!((cfg.relevance_weight - 0.50).abs() < 1e-12); @@ -881,6 +918,7 @@ mod tests { top_k: None, fusion_strategy: Some("weighted".to_string()), score_floor: None, + embedding_model: None, }; let mut cfg = p.effective_config(base); diff --git a/crates/khive-pack-memory/tests/integration.rs b/crates/khive-pack-memory/tests/integration.rs index 60b467b0..0beddc27 100644 --- a/crates/khive-pack-memory/tests/integration.rs +++ b/crates/khive-pack-memory/tests/integration.rs @@ -1023,7 +1023,7 @@ async fn test_recall_default_identity() { .expect("remember succeeds"); } - // Baseline recall with no knobs — query a term present in all 4 memories + // Baseline recall with no knobs let base = registry .dispatch("recall", json!({ "query": "cell" })) .await diff --git a/crates/khive-retrieval/src/adapters/mod.rs b/crates/khive-retrieval/src/adapters/mod.rs index bcad7b45..5b233d0e 100644 --- a/crates/khive-retrieval/src/adapters/mod.rs +++ b/crates/khive-retrieval/src/adapters/mod.rs @@ -110,6 +110,7 @@ impl VectorSearch for StorageVectorSearch { top_k: top_k as u32, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; diff --git a/crates/khive-runtime/src/error.rs b/crates/khive-runtime/src/error.rs index 5d5f2cc3..19960375 100644 --- a/crates/khive-runtime/src/error.rs +++ b/crates/khive-runtime/src/error.rs @@ -78,6 +78,9 @@ pub enum RuntimeError { #[error("unconfigured: {0} is not set")] Unconfigured(String), + #[error("unknown embedding model: {0}")] + UnknownModel(String), + #[error("embedding: {0}")] Embedding(#[from] lattice_embed::EmbedError), diff --git a/crates/khive-runtime/src/operations.rs b/crates/khive-runtime/src/operations.rs index a5abb6bb..a3330b57 100644 --- a/crates/khive-runtime/src/operations.rs +++ b/crates/khive-runtime/src/operations.rs @@ -812,7 +812,7 @@ impl KhiveRuntime { annotates: Vec, ) -> RuntimeResult { self.create_note_inner( - token, kind, name, content, salience, None, properties, annotates, + token, kind, name, content, salience, None, properties, annotates, None, ) .await } @@ -829,6 +829,34 @@ impl KhiveRuntime { decay_factor: f64, properties: Option, annotates: Vec, + ) -> RuntimeResult { + self.create_note_with_decay_for_embedding_model( + token, + kind, + name, + content, + salience, + decay_factor, + properties, + annotates, + None, + ) + .await + } + + /// Like [`create_note_with_decay`] but targets a specific embedding model. + #[allow(clippy::too_many_arguments)] + pub async fn create_note_with_decay_for_embedding_model( + &self, + token: &NamespaceToken, + kind: &str, + name: Option<&str>, + content: &str, + salience: Option, + decay_factor: f64, + properties: Option, + annotates: Vec, + embedding_model: Option<&str>, ) -> RuntimeResult { self.create_note_inner( token, @@ -839,6 +867,7 @@ impl KhiveRuntime { Some(decay_factor), properties, annotates, + embedding_model, ) .await } @@ -854,6 +883,7 @@ impl KhiveRuntime { decay_factor: Option, properties: Option, annotates: Vec, + embedding_model: Option<&str>, ) -> RuntimeResult { let ns = token.namespace().as_str(); @@ -866,6 +896,14 @@ impl KhiveRuntime { } } + // Codex round 2 Medium (PR #407): resolve embedding_model BEFORE any + // note/FTS/vector write so unknown-model errors are atomic at the + // runtime layer, not just at one pack handler. Direct Rust callers + // (other packs, integration tests) get the same guarantee. + if let Some(model_name) = embedding_model { + self.resolve_embedding_model(Some(model_name))?; + } + let mut note = Note::new(ns, kind, content); if let Some(s) = salience { note = note.with_salience(s); @@ -899,9 +937,20 @@ impl KhiveRuntime { }) .await?; - if self.config().embedding_model.is_some() { - let vector = self.embed(¬e.content).await?; - self.vectors(token)? + let embed_model_name: Option = + if self.config().embedding_model.is_some() || embedding_model.is_some() { + Some( + embedding_model + .map(|m| m.to_string()) + .unwrap_or_else(|| self.default_embedder_name().to_string()), + ) + } else { + None + }; + + if let Some(ref model_name) = embed_model_name { + let vector = self.embed_with_model(model_name, ¬e.content).await?; + self.vectors_for_model(token, model_name)? .insert( note.id, SubstrateKind::Note, @@ -989,8 +1038,8 @@ impl KhiveRuntime { if let Ok(fts) = self.text_for_notes(token) { let _ = fts.delete_document(ns, note.id).await; } - if self.config().embedding_model.is_some() { - if let Ok(vs) = self.vectors(token) { + if let Some(ref model_name) = embed_model_name { + if let Ok(vs) = self.vectors_for_model(token, model_name) { let _ = vs.delete(note.id).await; } } @@ -1348,8 +1397,13 @@ impl KhiveRuntime { self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; - if self.config().embedding_model.is_some() { - self.vectors(token)?.delete(id).await?; + // Codex High 2 (PR #407): scoped delete — iterate over EVERY + // registered embedding model's vector store so non-default vectors + // don't orphan when the note is deleted. + for model_name in self.registered_embedding_model_names() { + self.vectors_for_model(token, &model_name)? + .delete(id) + .await?; } } @@ -1359,8 +1413,10 @@ impl KhiveRuntime { self.text_for_notes(token)? .delete_document(&ns_str, id) .await?; - if self.config().embedding_model.is_some() { - self.vectors(token)?.delete(id).await?; + for model_name in self.registered_embedding_model_names() { + self.vectors_for_model(token, &model_name)? + .delete(id) + .await?; } } if deleted { diff --git a/crates/khive-runtime/src/retrieval.rs b/crates/khive-runtime/src/retrieval.rs index 78585c2e..aeb7cae2 100644 --- a/crates/khive-runtime/src/retrieval.rs +++ b/crates/khive-runtime/src/retrieval.rs @@ -41,20 +41,26 @@ const RRF_K: usize = 60; const CANDIDATE_MULTIPLIER: u32 = 4; impl KhiveRuntime { - /// Generate an embedding vector for `text` using the configured local model. + /// Generate an embedding vector for `text` using the configured default model. /// /// First call lazily loads model weights (cold start cost). Subsequent calls reuse them. /// Returns `Unconfigured("embedding_model")` if no model is configured. pub async fn embed(&self, text: &str) -> RuntimeResult> { - let service = self.embedder().await?; - let model = self - .config() - .embedding_model - .expect("embedder() returns Unconfigured when model is None"); + let model_name = self.default_embedder_name(); + if model_name.is_empty() { + return Err(RuntimeError::Unconfigured("embedding_model".into())); + } + self.embed_with_model(model_name, text).await + } + + /// Generate an embedding vector for `text` using the named model. + pub async fn embed_with_model(&self, model_name: &str, text: &str) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(model_name))?; + let service = self.embedder(model_name).await?; Ok(service.embed_one(text, model).await?) } - /// Generate embeddings for multiple texts in one call. + /// Generate embeddings for multiple texts in one call using the configured default model. /// /// Delegates to the cached `EmbeddingService::embed`, so repeated texts within /// and across calls benefit from the runtime-level LRU cache. @@ -65,11 +71,24 @@ impl KhiveRuntime { if texts.is_empty() { return Ok(vec![]); } - let service = self.embedder().await?; - let model = self - .config() - .embedding_model - .expect("embedder() returns Unconfigured when model is None"); + let model_name = self.default_embedder_name(); + if model_name.is_empty() { + return Err(RuntimeError::Unconfigured("embedding_model".into())); + } + self.embed_batch_with_model(model_name, texts).await + } + + /// Generate embeddings for multiple texts using the named model. + pub async fn embed_batch_with_model( + &self, + model_name: &str, + texts: &[String], + ) -> RuntimeResult>> { + if texts.is_empty() { + return Ok(vec![]); + } + let model = self.resolve_embedding_model(Some(model_name))?; + let service = self.embedder(model_name).await?; Ok(service.embed(texts, model).await?) } @@ -111,6 +130,7 @@ impl KhiveRuntime { top_k, namespace: Some(ns), kind, + embedding_model: None, filter: None, backend_hints: None, }) @@ -242,6 +262,7 @@ impl KhiveRuntime { top_k, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + embedding_model: None, filter: None, backend_hints: None, }) @@ -269,6 +290,7 @@ impl KhiveRuntime { top_k: candidate_ids.len() as u32, namespace: Some(ns), kind: Some(SubstrateKind::Entity), + embedding_model: None, filter: None, backend_hints: None, }) diff --git a/crates/khive-runtime/src/runtime.rs b/crates/khive-runtime/src/runtime.rs index 1babe5b8..3f054574 100644 --- a/crates/khive-runtime/src/runtime.rs +++ b/crates/khive-runtime/src/runtime.rs @@ -1,6 +1,9 @@ //! KhiveRuntime — composable handle to all storage capabilities. -use std::sync::{Arc, RwLock}; +use std::{ + collections::HashMap, + sync::{Arc, RwLock}, +}; use khive_db::StorageBackend; use khive_gate::{ActorRef, AllowAllGate, GateRef}; @@ -136,6 +139,13 @@ pub struct RuntimeConfig { /// `EmbedderRegistry`. This field persists for backward compatibility until /// the embedder registry is fully plumbed. pub embedding_model: Option, + /// Additional embedding models to make available by request name. + /// + /// `embedding_model` remains the default used by existing `embed()` and + /// `embed_batch()` callers. This list adds non-default models that can be + /// selected with `embedder(name)`, `embed_with_model(...)`, memory + /// `remember.embedding_model`, and memory `recall.embedding_model`. + pub additional_embedding_models: Vec, /// Authorization gate consulted before each verb dispatch (ADR-029). /// Default: `AllowAllGate` (permissive). For production policy enforcement, /// plug in a Rego- or capability-witness-backed impl. @@ -173,6 +183,10 @@ impl Default for RuntimeConfig { .ok() .and_then(|s| s.parse().ok()) .or(Some(EmbeddingModel::AllMiniLmL6V2)); + let additional_embedding_models = std::env::var("KHIVE_ADDITIONAL_EMBEDDING_MODELS") + .ok() + .map(|s| parse_embedding_model_list(&s)) + .unwrap_or_default(); let packs = std::env::var("KHIVE_PACKS") .ok() .map(|s| parse_pack_list(&s)) @@ -182,6 +196,7 @@ impl Default for RuntimeConfig { db_path, default_namespace: Namespace::local(), embedding_model, + additional_embedding_models, gate: Arc::new(AllowAllGate), packs, backend_id: BackendId::main(), @@ -191,6 +206,12 @@ impl Default for RuntimeConfig { // ---- KhiveRuntime ---- +#[derive(Clone)] +struct EmbedderEntry { + model: EmbeddingModel, + cell: Arc>>, +} + /// Composable runtime handle used by the MCP server. /// /// Wraps a `StorageBackend` and provides namespace-scoped accessor methods @@ -199,7 +220,8 @@ impl Default for RuntimeConfig { pub struct KhiveRuntime { backend: Arc, config: RuntimeConfig, - embedder: Arc>>, + embedders: Arc>, + default_embedder_name: Arc, /// Pack-extensible edge endpoint rules (ADR-031). Shared across clones /// via `Arc>`; installed once by the transport after the /// `VerbRegistry` is built. Empty until installed — base rules @@ -223,10 +245,13 @@ impl KhiveRuntime { } None => StorageBackend::memory()?, }; + register_configured_embedding_models(&backend, &config)?; + let (embedders, default_embedder_name) = build_embedder_registry(&config); Ok(Self { backend: Arc::new(backend), config, - embedder: Arc::new(OnceCell::new()), + embedders: Arc::new(embedders), + default_embedder_name, edge_rules: Arc::new(RwLock::new(Vec::new())), }) } @@ -241,10 +266,15 @@ impl KhiveRuntime { /// storage access is through the provided `backend`. Set `backend_id` and /// `default_namespace` via the config builder pattern if non-defaults are needed. pub fn from_backend(backend: Arc, config: RuntimeConfig) -> Self { + if let Err(err) = register_configured_embedding_models(&backend, &config) { + tracing::warn!(error = %err, "failed to register configured embedding models"); + } + let (embedders, default_embedder_name) = build_embedder_registry(&config); Self { backend, config, - embedder: Arc::new(OnceCell::new()), + embedders: Arc::new(embedders), + default_embedder_name, edge_rules: Arc::new(RwLock::new(Vec::new())), } } @@ -255,6 +285,7 @@ impl KhiveRuntime { db_path: None, default_namespace: Namespace::local(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::main(), @@ -321,12 +352,28 @@ impl KhiveRuntime { &self, token: &NamespaceToken, ) -> RuntimeResult> { - let model = self - .config - .embedding_model - .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?; + let model = self.resolve_embedding_model(None)?; + self.vectors_for_embedding_model(token, model) + } + + /// Get a VectorStore for a specific named embedding model, scoped to the token's namespace. + pub fn vectors_for_model( + &self, + token: &NamespaceToken, + model_name: &str, + ) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(model_name))?; + self.vectors_for_embedding_model(token, model) + } + + fn vectors_for_embedding_model( + &self, + token: &NamespaceToken, + model: EmbeddingModel, + ) -> RuntimeResult> { Ok(self.backend.vectors_for_namespace( &vec_model_key(model), + &model.to_string(), model.dimensions(), token.namespace().as_str(), )?) @@ -380,28 +427,66 @@ impl KhiveRuntime { .unwrap_or_default() } - /// Get the lazily-initialized embedding service. + /// Return the name of the default embedding model (empty string if none configured). + pub fn default_embedder_name(&self) -> &str { + self.default_embedder_name.as_ref() + } + + /// Resolve a model name (or `None` for the default) to an `EmbeddingModel`. + /// + /// Returns `UnknownModel` if the name is not in the registry, or + /// `Unconfigured` if `None` is passed and no default model is set. + pub fn resolve_embedding_model(&self, name: Option<&str>) -> RuntimeResult { + let model = match name { + Some(raw) => parse_embedding_model_alias(raw) + .ok_or_else(|| crate::RuntimeError::UnknownModel(raw.to_string()))?, + None => self + .config + .embedding_model + .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?, + }; + let key = model.to_string(); + if self.embedders.contains_key(&key) { + Ok(model) + } else { + Err(crate::RuntimeError::UnknownModel( + name.unwrap_or_else(|| self.default_embedder_name()) + .to_string(), + )) + } + } + + /// Names of all registered embedding models in this runtime. + /// + /// Useful for operations that must touch every model's storage (e.g., + /// scoped vector deletion on note delete — codex High 2 (PR #407)). + /// The default model is included. + pub fn registered_embedding_model_names(&self) -> Vec { + self.embedders.keys().cloned().collect() + } + + /// Get the lazily-initialized embedding service for the named model. /// /// Returns a `CachedEmbeddingService` wrapping a `NativeEmbeddingService`. /// First call loads the model (cold start cost); subsequent calls are cheap and /// benefit from LRU caching of repeated inputs. - /// - /// Returns `Unconfigured("embedding_model")` if no model is set. - pub async fn embedder(&self) -> RuntimeResult> { - let model = self - .config - .embedding_model - .ok_or_else(|| crate::RuntimeError::Unconfigured("embedding_model".into()))?; - let service = self - .embedder + pub async fn embedder(&self, name: &str) -> RuntimeResult> { + let model = self.resolve_embedding_model(Some(name))?; + let key = model.to_string(); + let entry = self + .embedders + .get(&key) + .ok_or_else(|| crate::RuntimeError::UnknownModel(name.to_string()))? + .clone(); + Ok(entry + .cell .get_or_init(|| async move { - let native = Arc::new(NativeEmbeddingService::with_model(model)); + let native = Arc::new(NativeEmbeddingService::with_model(entry.model)); let cached = CachedEmbeddingService::with_default_cache(native); Arc::new(cached) as Arc }) .await - .clone(); - Ok(service) + .clone()) } } @@ -417,6 +502,81 @@ fn sanitize_key(s: &str) -> String { .collect() } +fn build_embedder_registry(config: &RuntimeConfig) -> (HashMap, Arc) { + let mut embedders = HashMap::new(); + for model in configured_embedding_models(config) { + embedders.insert( + model.to_string(), + EmbedderEntry { + model, + cell: Arc::new(OnceCell::new()), + }, + ); + } + let default_embedder_name = config + .embedding_model + .map(|model| Arc::::from(model.to_string())) + .unwrap_or_else(|| Arc::::from("")); + (embedders, default_embedder_name) +} + +fn configured_embedding_models(config: &RuntimeConfig) -> Vec { + let mut models = Vec::new(); + if let Some(model) = config.embedding_model { + models.push(model); + } + models.extend(config.additional_embedding_models.iter().copied()); + models.sort_by_key(|model| model.to_string()); + models.dedup(); + models +} + +fn register_configured_embedding_models( + backend: &StorageBackend, + config: &RuntimeConfig, +) -> RuntimeResult<()> { + for model in configured_embedding_models(config) { + backend.register_embedding_model( + &model.to_string(), + model.model_id(), + model.key_version(), + model.dimensions() as u32, + )?; + } + Ok(()) +} + +/// Parse a comma- or whitespace-separated list of embedding model names. +fn parse_embedding_model_list(s: &str) -> Vec { + parse_pack_list(s) + .into_iter() + .filter_map(|raw| { + let parsed = parse_embedding_model_alias(&raw); + if parsed.is_none() && !raw.trim().is_empty() { + // Codex Medium (PR #407): silent filter_map masks operator typos. Warn loudly + // so misconfiguration surfaces at startup rather than as an UnknownModel error + // at request time. We do not fail startup — a partially valid list still + // produces a functional runtime — but the warning is unambiguous. + tracing::warn!( + model = %raw, + "KHIVE_ADDITIONAL_EMBEDDING_MODELS contains unknown model name; ignored. \ + Valid forms: short alias like 'paraphrase' or a fully-qualified key \ + from lattice_embed::EmbeddingModel::from_str." + ); + } + parsed + }) + .collect() +} + +fn parse_embedding_model_alias(name: &str) -> Option { + let normalized = name.trim().to_ascii_lowercase().replace('_', "-"); + match normalized.as_str() { + "paraphrase" => Some(EmbeddingModel::ParaphraseMultilingualMiniLmL12V2), + _ => normalized.parse().ok(), + } +} + #[cfg(test)] mod tests { use super::*; @@ -435,6 +595,7 @@ mod tests { db_path: Some(path.clone()), default_namespace: Namespace::parse("test").unwrap(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::main(), @@ -451,6 +612,7 @@ mod tests { db_path: None, default_namespace: Namespace::local(), embedding_model: None, + additional_embedding_models: vec![], gate: Arc::new(AllowAllGate), packs: vec!["kg".to_string()], backend_id: BackendId::new("lore"), diff --git a/crates/khive-runtime/tests/integration.rs b/crates/khive-runtime/tests/integration.rs index 7775386b..257b4f06 100644 --- a/crates/khive-runtime/tests/integration.rs +++ b/crates/khive-runtime/tests/integration.rs @@ -568,6 +568,7 @@ async fn file_backed_runtime_persists() { gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], backend_id: khive_runtime::BackendId::main(), + additional_embedding_models: vec![], }; let rt = KhiveRuntime::new(config).unwrap(); let tok = rt.authorize(Namespace::local()); @@ -585,6 +586,7 @@ async fn file_backed_runtime_persists() { gate: std::sync::Arc::new(khive_runtime::AllowAllGate), packs: vec!["kg".to_string()], backend_id: khive_runtime::BackendId::main(), + additional_embedding_models: vec![], }; let rt = KhiveRuntime::new(config).unwrap(); let tok = rt.authorize(Namespace::local()); diff --git a/crates/khive-storage/src/types.rs b/crates/khive-storage/src/types.rs index 70430009..11066599 100644 --- a/crates/khive-storage/src/types.rs +++ b/crates/khive-storage/src/types.rs @@ -181,6 +181,8 @@ pub struct VectorRecord { pub namespace: String, /// Which embedding field this record represents (e.g. `"entity.body"`). pub field: String, + #[serde(default)] + pub embedding_model: Option, /// One or many dense vectors; sqlite-vec backends enforce `vectors.len() == 1`. pub vectors: Vec>, pub updated_at: DateTime, @@ -193,6 +195,9 @@ pub struct VectorSearchRequest { pub top_k: u32, pub namespace: Option, pub kind: Option, + /// Restrict results to this embedding model. Defaults to the store's own model. + #[serde(default)] + pub embedding_model: Option, /// Optional metadata filter for backends that support pushdown. pub filter: Option, /// Backend-specific hints (opaque JSON blob, ignored by default). diff --git a/crates/khive-storage/src/vectors.rs b/crates/khive-storage/src/vectors.rs index 95bf1161..0e6cc797 100644 --- a/crates/khive-storage/src/vectors.rs +++ b/crates/khive-storage/src/vectors.rs @@ -307,6 +307,7 @@ mod tests { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -326,6 +327,7 @@ mod tests { top_k: 5, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }; @@ -352,6 +354,7 @@ mod tests { top_k: 3, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }, @@ -360,6 +363,7 @@ mod tests { top_k: 3, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }, @@ -433,6 +437,7 @@ mod tests { top_k: 1, namespace: None, kind: None, + embedding_model: None, filter: None, backend_hints: None, }]; diff --git a/crates/kkernel/Cargo.toml b/crates/kkernel/Cargo.toml index ba70f096..2f1354e4 100644 --- a/crates/kkernel/Cargo.toml +++ b/crates/kkernel/Cargo.toml @@ -12,6 +12,7 @@ description = "khive kernel — admin/management Rust binary (sync, pack introsp [dependencies] khive-runtime = { version = "0.2.2", path = "../khive-runtime" } +khive-db = { version = "0.2.2", path = "../khive-db" } khive-storage = { version = "0.2.2", path = "../khive-storage" } khive-types = { version = "0.2.2", path = "../khive-types" } khive-vcs = { version = "0.2.2", path = "../khive-vcs" } diff --git a/crates/kkernel/src/engine.rs b/crates/kkernel/src/engine.rs index d16aee6c..6e16923e 100644 --- a/crates/kkernel/src/engine.rs +++ b/crates/kkernel/src/engine.rs @@ -203,36 +203,22 @@ fn cmd_engine_drift_check(_args: EngineDriftCheckArgs) -> Result<()> { // ── Internal helpers ────────────────────────────────────────────────────────── fn query_embedding_models( - _db: Option<&std::path::Path>, + db: Option<&std::path::Path>, engine_filter: Option<&str>, ) -> Result> { - // The _embedding_models table is created by the ADR-043 schema migration. - // Until that migration lands, the table may not exist; return an empty list - // with a log rather than a hard error so `kkernel engine list` is usable - // before full ADR-043 deployment. - // - // A full implementation opens the SQLite DB, queries: - // SELECT engine_name, model_id, key_version, dim, status, - // activated_at, superseded_at - // FROM _embedding_models - // [WHERE engine_name = ?] - // ORDER BY engine_name, activated_at NULLS LAST - // - // and maps rows to EngineModelRecord. - // - // This scaffold returns an empty list so the CLI compiles and tests can - // verify the command routing surface without a live database. - - if let Some(engine) = engine_filter { - tracing::debug!( - engine, - "query_embedding_models: _embedding_models not yet populated" - ); - } else { - tracing::debug!("query_embedding_models: _embedding_models not yet populated"); - } - - Ok(Vec::new()) + let rows = khive_db::query_embedding_models(db, engine_filter)?; + Ok(rows + .into_iter() + .map(|r| EngineModelRecord { + engine_name: r.engine_name, + model_id: r.model_id, + key_version: r.key_version, + dimensions: r.dimensions, + status: r.status, + activated_at: r.activated_at, + superseded_at: r.superseded_at, + }) + .collect()) } // ── Tests ───────────────────────────────────────────────────────────────────── diff --git a/docs/adr/ADR-015-schema-migrations.md b/docs/adr/ADR-015-schema-migrations.md index a08ef9fc..7384f456 100644 --- a/docs/adr/ADR-015-schema-migrations.md +++ b/docs/adr/ADR-015-schema-migrations.md @@ -28,23 +28,24 @@ mechanism that: The canonical ledger of database schema migration versions. Migration versions are assigned in ledger order; they are NOT required to match ADR number order. -| Version | Owning ADR | Migration name | Status | -| ------: | ----------- | ------------------------------------------------- | ------- | -| V1 | (initial) | initial_schema | shipped | -| V2 | (initial) | add_name_to_notes | shipped | -| V3 | (initial) | add_events_namespace_created_index | shipped | -| V4 | (initial) | dedupe_graph_edge_triples | shipped | -| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | -| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | -| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | -| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | -| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | -| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | -| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | -| V12 | c04/ADR-019 | nullable_note_metrics | shipped | -| V13 | c06/ADR-041 | event_observability_provenance | shipped | -| V14 | c20/ADR-043 | embedding_model_registry | shipped | -| V15 | c22/ADR-046 | proposals_open | shipped | +| Version | Owning ADR | Migration name | Status | +| ------: | ------------ | ------------------------------------------------- | ------- | +| V1 | (initial) | initial_schema | shipped | +| V2 | (initial) | add_name_to_notes | shipped | +| V3 | (initial) | add_events_namespace_created_index | shipped | +| V4 | (initial) | dedupe_graph_edge_triples | shipped | +| V5 | c01/ADR-001 | add_entity_type_to_entities | shipped | +| V6 | (no-op) | reserved_adr043_embedding_pipeline_extensions | shipped | +| V7 | (no-op) | reserved_adr046_event_sourced_proposals_index | shipped | +| V8 | (no-op) | reserved_adr041_event_observations_and_session_id | shipped | +| V9 | c03/ADR-004 | edge_lifecycle_and_target_backend | shipped | +| V10 | c04/ADR-019 | note_status_and_nullable_metrics | shipped | +| V11 | c04/ADR-014 | entity_tombstone_columns | shipped | +| V12 | c04/ADR-019 | nullable_note_metrics | shipped | +| V13 | c06/ADR-041 | event_observability_provenance | shipped | +| V14 | c20/ADR-043 | embedding_model_registry | shipped | +| V15 | c22/ADR-046 | proposals_open | shipped | +| V16 | v022/ADR-043 | vector_embedding_model_tag | shipped | > **Amendment (2026-05-24, cluster-24 + post-integration)**: The ledger above reflects what > actually shipped on `integration/v1-adr-alignment` after parallel cluster landings c01, c03, @@ -56,7 +57,13 @@ The canonical ledger of database schema migration versions. Migration versions a > integration merge. c20 (embedding model registry per ADR-043) landed at V14 — the same ADR > the V6 reservation originally anticipated, hence V6 remains a no-op slot. c22 (proposals_open > projection per ADR-046) landed at V15. V6–V8 are no-op placeholder slots to maintain -> contiguity. Versions V1–V15 are production schema and are frozen. +> contiguity. +> +> **V16 amendment (2026-05-25, show v022-polish)**: V16 (`vector_embedding_model_tag`) adds +> a TEXT `embedding_model` column and composite index to regular `vec_*` tables, completing +> the dual-embedding plumbing described in ADR-043 §1. sqlite-vec virtual tables are handled +> at open time via schema rebuild because vec0 does not support `ALTER TABLE`. Versions V1–V16 +> are production schema and are frozen. > **Invariant**: ADR number order and migration version order are independent. Migration versions reflect schema ledger assignment order. A migration may only depend on schema created by earlier versions. diff --git a/docs/adr/ADR-043-embedding-model-migration.md b/docs/adr/ADR-043-embedding-model-migration.md index e668bdd1..969e35e5 100644 --- a/docs/adr/ADR-043-embedding-model-migration.md +++ b/docs/adr/ADR-043-embedding-model-migration.md @@ -105,24 +105,55 @@ impossible — any attempt to insert a second `active` row for the same engine f the constraint. Migrations therefore execute as `BEGIN; UPDATE active→superseded; UPDATE pending→active; COMMIT;` — atomic by virtue of the index. -#### Vector store column addition +#### Vector store column addition (V16, ADR-015) -Each `vec_` table (ADR-031 D3) gains a column: +Each regular `vec_` table (ADR-031 D3) gains a TEXT model tag column. +This was formalized in migration V16: ```sql -ALTER TABLE vec_ ADD COLUMN embedding_model_id BLOB - REFERENCES _embedding_models(id); -CREATE INDEX idx_vec__model ON vec_(embedding_model_id); +ALTER TABLE vec_ ADD COLUMN embedding_model TEXT NOT NULL + DEFAULT 'all-minilm-l6-v2'; +CREATE INDEX idx_vec__subject_model + ON vec_(subject_id, embedding_model); ``` -Backfilled on the same migration: existing rows get the engine's current active -model's id. - -SQLite does not support `ALTER COLUMN ... SET NOT NULL`. The `embedding_model_id` -column is enforced via a `CHECK (embedding_model_id IS NOT NULL)` constraint added -through SQLite's standard table-rebuild pattern (create new table with constraint, -copy data, drop old, rename) — see ADR-015 for the migration template. This rebuild -is performed as the final step of the startup backfill described in §8 below. +The composite `(subject_id, embedding_model)` index supports the scoped recall +SQL: `WHERE subject_id = ? AND embedding_model = ?`. The default value at column +creation time was chosen so existing rows backfill to the legacy MiniLM model; +deployments using a non-default model **must** run the dedicated backfill worker +described in §8 before relying on model-scoped recall. + +**Design trade-off — TEXT vs BLOB FK.** ADR-043's first draft (pre-V16) specified +`embedding_model_id BLOB REFERENCES _embedding_models(id)`. V16 instead stores +the model_id directly as TEXT, joining against `_embedding_models.model_id` +when needed: + +- TEXT model_id is the natural primary key used everywhere else in the runtime + (kkernel engine list, `EmbeddingService::key_version()`, env var + `KHIVE_ADDITIONAL_EMBEDDING_MODELS`) — keeping the same shape end-to-end. +- BLOB FK would require a sub-select on every vector insert/search to resolve + the active model's UUID. The hot path is recall scoring; the join cost is + unjustified for a column whose values change only on registry events. +- Schema-level referential integrity is replaced by application-level + validation in the runtime registry: unknown model names are rejected at + `KhiveRuntime::embedder(name)` and at `RecallParams.embedding_model` + validation. + +The `_embedding_models` registry table (V14) still owns the authoritative model +metadata (dim, output_dim, status, key_version). V16's `embedding_model TEXT` +column is the foreign-key-by-value reference back to `_embedding_models.model_id`. + +**sqlite-vec virtual tables.** vec0 virtual tables cannot accept `ALTER TABLE +ADD COLUMN` because they declare their columns at `CREATE VIRTUAL TABLE` time. +V16 handles this via the open-time path in `khive-db/src/backend.rs`: when +opening a `vec_` table that lacks `embedding_model`, the runtime +rebuilds the virtual table with the new schema. **Existing rows are lost on +rebuild** — this is acceptable for deployments that have not yet enabled +dual-embedding because vectors will be re-embedded by the next backfill cycle, +but **operators must take a backup before upgrading any production deployment +with persisted non-default embeddings**. A follow-up migration (tracked in +ADR-043 §8.2) will implement a copy-with-default rebuild to preserve old +vectors with their inferred model tag. ### 2. Triggers — three sources, one event @@ -326,23 +357,35 @@ All four carry `engine_name` and the relevant `_embedding_models.id`(s) in paylo None carries `served_by_profile_id` — these are operator/system events, not profile-served (ADR-032 §3 rule). -### 8. Backward compatibility — one-shot startup migration +### 8. Backward compatibility — one-shot startup migration (V14 + V16) + +Deployments predating this ADR have `vec_` tables without an +`embedding_model` column and no `_embedding_models` rows. The startup +migration runs in two steps, landed in two separate `VersionedMigration` +slots: + +**V14 — `embedding_model_registry`** (already shipped): + +1. `CREATE TABLE _embedding_models` (per §1 schema). +2. `CREATE UNIQUE INDEX idx_embed_models_one_active`. +3. `CREATE INDEX idx_embed_models_engine_status`. -Deployments predating this ADR have `vec_` tables without `embedding_model_id` -and no `_embedding_models` rows. On first startup post-ADR-043: +**V16 — `vector_embedding_model_tag`** (shipped in v022-polish): -1. Run the schema migration (creates `_embedding_models`, adds `embedding_model_id` - to `vec_` tables as nullable). -2. For each `[[engines]]` entry: derive `canonical_key` via lattice's - `EmbeddingKey::canonical_bytes()`, insert one `_embedding_models` row with - `status='active'`, `activated_at=now`, `created_at=now`. -3. Backfill all `vec_` rows with that engine's newly-inserted model id. -4. Tighten the `embedding_model_id` column by rebuilding the table with a - `CHECK (embedding_model_id IS NOT NULL)` constraint (SQLite table-rebuild pattern — - see §1 and ADR-015). This runs as run-once startup code after the SQL migration - completes, not as an additional SQL migration step. +4. For each existing regular `vec_*` table (discovered at runtime, validated as + alphanumeric-suffix only): `ALTER TABLE vec_ ADD COLUMN embedding_model + TEXT NOT NULL DEFAULT 'all-minilm-l6-v2'`. +5. `CREATE INDEX idx_vec__subject_model ON vec_(subject_id, embedding_model)`. +6. sqlite-vec virtual tables (`vec0`) cannot accept `ALTER TABLE` — handled at + open time in `khive-db/src/backend.rs` by rebuilding the virtual table with + the new schema. See §1.1 final paragraph for the operator backup warning; + a preserving rebuild is the documented follow-up. -The startup migration emits one `EmbeddingModelChanged` event per engine with +Operator population of `_embedding_models` (steps for populating registry rows +from `[[engines]]` config and emitting `EmbeddingModelChanged` events) is a +separate startup-code path tracked in #385, not part of the SQL migrations. + +The startup population emits one `EmbeddingModelChanged` event per engine with `source_model_id = None` and `initiated_by = ConfigDiff` so the audit trail starts clean. @@ -398,14 +441,14 @@ Tracked in `.khive/plans/embedding-version-config.md`. ## Alternatives Considered -| Alternative | Why rejected | -| ---------------------------------------------------------------------------- | ------------------------------------------------------------------------------------------- | -| Reimplement migration state machine in khive | Lattice ships it; duplication has no upside | -| Store model id on every record (`notes`, `entities`) row | Triple-write cost; the vector table is the right grain — only vectors are model-bound | -| Migrate vectors in place (rewrite same table) | Loses atomicity. Failure mid-migration leaves a half-rewritten table with no clean rollback | -| MCP verb `brain.migrate_model` for agent-triggered migrations | Crosses the brain-substrate boundary; risks the feedback loop described in Rationale | -| Auto-archive `superseded` rows after N days | Premature; an explicit `khive engine archive --before ` is enough | -| Per-record `model_id` on `vec_` instead of FK to `_embedding_models` | Denormalized; can't carry the supersession chain or `superseded_by` link | +| Alternative | Why rejected | +| -------------------------------------------------------------------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | +| Reimplement migration state machine in khive | Lattice ships it; duplication has no upside | +| Store model id on every record (`notes`, `entities`) row | Triple-write cost; the vector table is the right grain — only vectors are model-bound | +| Migrate vectors in place (rewrite same table) | Loses atomicity. Failure mid-migration leaves a half-rewritten table with no clean rollback | +| MCP verb `brain.migrate_model` for agent-triggered migrations | Crosses the brain-substrate boundary; risks the feedback loop described in Rationale | +| Auto-archive `superseded` rows after N days | Premature; an explicit `khive engine archive --before ` is enough | +| ~~Per-record `model_id` on `vec_` instead of FK to `_embedding_models`~~ | **Superseded by V16 (2026-05-25)**: per-record `embedding_model TEXT` is what V16 actually ships. The supersession chain is preserved via `_embedding_models.superseded_by` joined on `model_id`. See §1.1 for the trade-off rationale (hot-path join cost, end-to-end consistency with kkernel/env-var) | ## Consequences @@ -474,19 +517,23 @@ payload. ### Migration version -A new `VersionedMigration` in `crates/khive-db/src/migrations.rs` with -`version = 5` (current latest is V4 — `dedupe_graph_edge_triples`): +The ADR-043 schema work landed in two ledger versions in +`crates/khive-db/src/migrations.rs`: + +**V14 — `embedding_model_registry`** (cluster-20): -1. `CREATE TABLE _embedding_models` (above) +1. `CREATE TABLE _embedding_models` (per §1) 2. `CREATE UNIQUE INDEX idx_embed_models_one_active` 3. `CREATE INDEX idx_embed_models_engine_status` -4. For each existing `vec_` table (discovered via the catalog): - - `ALTER TABLE vec_ ADD COLUMN embedding_model_id BLOB REFERENCES _embedding_models(id)` - - `CREATE INDEX idx_vec__model ON vec_(embedding_model_id)` -5. Startup backfill (run-once code, not a SQL migration): populate - `_embedding_models` from `[[engines]]`, backfill the FK column, then rebuild - `vec_` with a `CHECK (embedding_model_id IS NOT NULL)` constraint via - SQLite's table-rebuild pattern (ADR-015). + +**V16 — `vector_embedding_model_tag`** (v022-polish): + +4. For each existing regular `vec_*` table (runtime-discovered, name-validated): + - `ALTER TABLE vec_ ADD COLUMN embedding_model TEXT NOT NULL DEFAULT 'all-minilm-l6-v2'` + - `CREATE INDEX idx_vec__subject_model ON vec_(subject_id, embedding_model)` +5. Startup backfill (run-once code, tracked separately in #385): populate + `_embedding_models` from `[[engines]]`; per-table model-inferred tag rewrite + for deployments with non-default models (deferred — see §1.1 final paragraph). ### Worker registration