From 2ccef72cf85248d589d2787627a3203264ef3c9c Mon Sep 17 00:00:00 2001
From: OceanLi <122793010+ohdearquant@users.noreply.github.com>
Date: Fri, 22 May 2026 12:27:22 -0400
Subject: [PATCH 1/4] =?UTF-8?q?feat:=20port=20khive-retrieval=20=E2=80=94?=
 =?UTF-8?q?=20hybrid=20retrieval=20composer?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Ports the retrieval orchestration layer from khive-internal. Composes
khive-hnsw (vector), khive-bm25 (text), and khive-fusion into a unified
hybrid search pipeline with graph traversal, persistence adapters,
cross-encoder reranking support, and evaluation harness.

Includes prerequisite crates (hnsw, bm25, fusion) and fold objective
registry for compilability. Gates on their individual PRs for merge.

~7.1K LOC, tests pass.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/Cargo.toml                             |    4 +
 crates/khive-bm25/Cargo.toml                  |   19 +
 crates/khive-bm25/src/config.rs               |  104 +
 crates/khive-bm25/src/error.rs                |   54 +
 crates/khive-bm25/src/index/bench_wand.rs     |  159 ++
 crates/khive-bm25/src/index/indexing.rs       |  203 ++
 crates/khive-bm25/src/index/memory.rs         |  170 ++
 crates/khive-bm25/src/index/mod.rs            | 1040 +++++++++
 crates/khive-bm25/src/index/search.rs         | 1623 ++++++++++++++
 crates/khive-bm25/src/index/tests_wand.rs     |  329 +++
 crates/khive-bm25/src/lib.rs                  |   61 +
 crates/khive-bm25/src/metrics.rs              |   95 +
 crates/khive-bm25/src/tests.rs                | 1181 ++++++++++
 crates/khive-bm25/src/tokenizer.rs            |  261 +++
 crates/khive-fold/src/objective/mod.rs        |    2 +
 crates/khive-fold/src/objective/registry.rs   |  275 +++
 crates/khive-fusion/Cargo.toml                |   18 +
 crates/khive-fusion/src/fuse.rs               |  172 ++
 crates/khive-fusion/src/lib.rs                |   68 +
 crates/khive-fusion/src/rrf.rs                |  209 ++
 crates/khive-fusion/src/strategy.rs           |  116 +
 crates/khive-fusion/src/tests.rs              |  316 +++
 crates/khive-fusion/src/union.rs              |  103 +
 crates/khive-fusion/src/weighted.rs           |  488 ++++
 crates/khive-hnsw/Cargo.toml                  |   30 +
 crates/khive-hnsw/src/alias/drain.rs          |  240 ++
 crates/khive-hnsw/src/alias/error.rs          |  102 +
 crates/khive-hnsw/src/alias/manager.rs        |  600 +++++
 crates/khive-hnsw/src/alias/mod.rs            |   37 +
 crates/khive-hnsw/src/alias/validation.rs     |  184 ++
 crates/khive-hnsw/src/arena/arena.rs          |  169 ++
 crates/khive-hnsw/src/arena/arena_heap.rs     |  149 ++
 crates/khive-hnsw/src/arena/arena_vec.rs      |  281 +++
 crates/khive-hnsw/src/arena/mod.rs            |   29 +
 crates/khive-hnsw/src/arena/tests.rs          |  440 ++++
 .../src/checkpoint/integration_tests.rs       |  181 ++
 crates/khive-hnsw/src/checkpoint/mod.rs       |  423 ++++
 crates/khive-hnsw/src/checkpoint/tests.rs     |  692 ++++++
 crates/khive-hnsw/src/config.rs               |  253 +++
 crates/khive-hnsw/src/distance.rs             |  368 +++
 crates/khive-hnsw/src/error.rs                |   85 +
 crates/khive-hnsw/src/index/build_batch.rs    |  240 ++
 crates/khive-hnsw/src/index/insert.rs         |  325 +++
 crates/khive-hnsw/src/index/memory.rs         |   88 +
 crates/khive-hnsw/src/index/mod.rs            |  730 ++++++
 crates/khive-hnsw/src/index/neighbors.rs      |   72 +
 crates/khive-hnsw/src/index/rebuild.rs        |  288 +++
 crates/khive-hnsw/src/index/search.rs         |  836 +++++++
 crates/khive-hnsw/src/lib.rs                  |  131 ++
 crates/khive-hnsw/src/metrics.rs              |  148 ++
 crates/khive-hnsw/src/node.rs                 |   62 +
 crates/khive-hnsw/src/search_context.rs       |  165 ++
 crates/khive-hnsw/src/stats.rs                |   79 +
 crates/khive-hnsw/src/tests.rs                | 1977 +++++++++++++++++
 crates/khive-retrieval/Cargo.toml             |   56 +
 crates/khive-retrieval/src/adapters/mod.rs    |  456 ++++
 crates/khive-retrieval/src/error.rs           |  505 +++++
 .../khive-retrieval/src/eval/engine_eval.rs   |  655 ++++++
 crates/khive-retrieval/src/eval/mod.rs        |    5 +
 crates/khive-retrieval/src/graph/bfs.rs       |  148 ++
 crates/khive-retrieval/src/graph/compat.rs    |  244 ++
 crates/khive-retrieval/src/graph/dfs.rs       |  135 ++
 crates/khive-retrieval/src/graph/helpers.rs   |  283 +++
 crates/khive-retrieval/src/graph/mod.rs       |   99 +
 crates/khive-retrieval/src/graph/shortest.rs  |  266 +++
 crates/khive-retrieval/src/graph/tests.rs     |  134 ++
 crates/khive-retrieval/src/graph/types.rs     |  208 ++
 crates/khive-retrieval/src/hybrid/config.rs   |  260 +++
 .../src/hybrid/cross_encoder.rs               |  291 +++
 .../khive-retrieval/src/hybrid/dual_index.rs  |  524 +++++
 crates/khive-retrieval/src/hybrid/mod.rs      |   83 +
 crates/khive-retrieval/src/hybrid/searcher.rs |  354 +++
 crates/khive-retrieval/src/lib.rs             |  190 ++
 crates/khive-retrieval/src/metrics.rs         |  353 +++
 crates/khive-retrieval/src/persist/bm25.rs    |  112 +
 crates/khive-retrieval/src/persist/hnsw.rs    |  127 ++
 crates/khive-retrieval/src/persist/mod.rs     |  318 +++
 crates/khive-retrieval/src/persist/shadow.rs  |  105 +
 crates/khive-retrieval/src/persist/tests.rs   | 1214 ++++++++++
 crates/khive-retrieval/src/policy.rs          |  349 +++
 crates/khive-retrieval/src/query_ir.rs        |  632 ++++++
 .../src/replay/engine_replay.rs               | 1027 +++++++++
 crates/khive-retrieval/src/replay/mod.rs      |    5 +
 crates/khive-retrieval/src/search_config.rs   |  253 +++
 crates/khive-retrieval/src/timeout.rs         |  435 ++++
 .../src/weights/engine_weights.rs             |  561 +++++
 crates/khive-retrieval/src/weights/mod.rs     |    5 +
 87 files changed, 26866 insertions(+)
 create mode 100644 crates/khive-bm25/Cargo.toml
 create mode 100644 crates/khive-bm25/src/config.rs
 create mode 100644 crates/khive-bm25/src/error.rs
 create mode 100644 crates/khive-bm25/src/index/bench_wand.rs
 create mode 100644 crates/khive-bm25/src/index/indexing.rs
 create mode 100644 crates/khive-bm25/src/index/memory.rs
 create mode 100644 crates/khive-bm25/src/index/mod.rs
 create mode 100644 crates/khive-bm25/src/index/search.rs
 create mode 100644 crates/khive-bm25/src/index/tests_wand.rs
 create mode 100644 crates/khive-bm25/src/lib.rs
 create mode 100644 crates/khive-bm25/src/metrics.rs
 create mode 100644 crates/khive-bm25/src/tests.rs
 create mode 100644 crates/khive-bm25/src/tokenizer.rs
 create mode 100644 crates/khive-fold/src/objective/registry.rs
 create mode 100644 crates/khive-fusion/Cargo.toml
 create mode 100644 crates/khive-fusion/src/fuse.rs
 create mode 100644 crates/khive-fusion/src/lib.rs
 create mode 100644 crates/khive-fusion/src/rrf.rs
 create mode 100644 crates/khive-fusion/src/strategy.rs
 create mode 100644 crates/khive-fusion/src/tests.rs
 create mode 100644 crates/khive-fusion/src/union.rs
 create mode 100644 crates/khive-fusion/src/weighted.rs
 create mode 100644 crates/khive-hnsw/Cargo.toml
 create mode 100644 crates/khive-hnsw/src/alias/drain.rs
 create mode 100644 crates/khive-hnsw/src/alias/error.rs
 create mode 100644 crates/khive-hnsw/src/alias/manager.rs
 create mode 100644 crates/khive-hnsw/src/alias/mod.rs
 create mode 100644 crates/khive-hnsw/src/alias/validation.rs
 create mode 100644 crates/khive-hnsw/src/arena/arena.rs
 create mode 100644 crates/khive-hnsw/src/arena/arena_heap.rs
 create mode 100644 crates/khive-hnsw/src/arena/arena_vec.rs
 create mode 100644 crates/khive-hnsw/src/arena/mod.rs
 create mode 100644 crates/khive-hnsw/src/arena/tests.rs
 create mode 100644 crates/khive-hnsw/src/checkpoint/integration_tests.rs
 create mode 100644 crates/khive-hnsw/src/checkpoint/mod.rs
 create mode 100644 crates/khive-hnsw/src/checkpoint/tests.rs
 create mode 100644 crates/khive-hnsw/src/config.rs
 create mode 100644 crates/khive-hnsw/src/distance.rs
 create mode 100644 crates/khive-hnsw/src/error.rs
 create mode 100644 crates/khive-hnsw/src/index/build_batch.rs
 create mode 100644 crates/khive-hnsw/src/index/insert.rs
 create mode 100644 crates/khive-hnsw/src/index/memory.rs
 create mode 100644 crates/khive-hnsw/src/index/mod.rs
 create mode 100644 crates/khive-hnsw/src/index/neighbors.rs
 create mode 100644 crates/khive-hnsw/src/index/rebuild.rs
 create mode 100644 crates/khive-hnsw/src/index/search.rs
 create mode 100644 crates/khive-hnsw/src/lib.rs
 create mode 100644 crates/khive-hnsw/src/metrics.rs
 create mode 100644 crates/khive-hnsw/src/node.rs
 create mode 100644 crates/khive-hnsw/src/search_context.rs
 create mode 100644 crates/khive-hnsw/src/stats.rs
 create mode 100644 crates/khive-hnsw/src/tests.rs
 create mode 100644 crates/khive-retrieval/Cargo.toml
 create mode 100644 crates/khive-retrieval/src/adapters/mod.rs
 create mode 100644 crates/khive-retrieval/src/error.rs
 create mode 100644 crates/khive-retrieval/src/eval/engine_eval.rs
 create mode 100644 crates/khive-retrieval/src/eval/mod.rs
 create mode 100644 crates/khive-retrieval/src/graph/bfs.rs
 create mode 100644 crates/khive-retrieval/src/graph/compat.rs
 create mode 100644 crates/khive-retrieval/src/graph/dfs.rs
 create mode 100644 crates/khive-retrieval/src/graph/helpers.rs
 create mode 100644 crates/khive-retrieval/src/graph/mod.rs
 create mode 100644 crates/khive-retrieval/src/graph/shortest.rs
 create mode 100644 crates/khive-retrieval/src/graph/tests.rs
 create mode 100644 crates/khive-retrieval/src/graph/types.rs
 create mode 100644 crates/khive-retrieval/src/hybrid/config.rs
 create mode 100644 crates/khive-retrieval/src/hybrid/cross_encoder.rs
 create mode 100644 crates/khive-retrieval/src/hybrid/dual_index.rs
 create mode 100644 crates/khive-retrieval/src/hybrid/mod.rs
 create mode 100644 crates/khive-retrieval/src/hybrid/searcher.rs
 create mode 100644 crates/khive-retrieval/src/lib.rs
 create mode 100644 crates/khive-retrieval/src/metrics.rs
 create mode 100644 crates/khive-retrieval/src/persist/bm25.rs
 create mode 100644 crates/khive-retrieval/src/persist/hnsw.rs
 create mode 100644 crates/khive-retrieval/src/persist/mod.rs
 create mode 100644 crates/khive-retrieval/src/persist/shadow.rs
 create mode 100644 crates/khive-retrieval/src/persist/tests.rs
 create mode 100644 crates/khive-retrieval/src/policy.rs
 create mode 100644 crates/khive-retrieval/src/query_ir.rs
 create mode 100644 crates/khive-retrieval/src/replay/engine_replay.rs
 create mode 100644 crates/khive-retrieval/src/replay/mod.rs
 create mode 100644 crates/khive-retrieval/src/search_config.rs
 create mode 100644 crates/khive-retrieval/src/timeout.rs
 create mode 100644 crates/khive-retrieval/src/weights/engine_weights.rs
 create mode 100644 crates/khive-retrieval/src/weights/mod.rs

diff --git a/crates/Cargo.toml b/crates/Cargo.toml
index 289e4cc3..7c8ff3c4 100644
--- a/crates/Cargo.toml
+++ b/crates/Cargo.toml
@@ -9,6 +9,9 @@ members = [
     "khive-query",
     "khive-gate",
     "khive-gate-rego",
+    "khive-fusion",
+    "khive-bm25",
+    "khive-hnsw",
     "khive-runtime",
     "khive-request",
     "khive-pack-kg",
@@ -18,6 +21,7 @@ members = [
     "khive-mcp",
     "khive-vcs",
     "kkernel",
+    "khive-retrieval",
 ]
 # khive-merge excluded — forward-deployed (ADR-043) but not yet compilable
 # against restructured khive-vcs. Will be re-added when ADR-043 integrates.
diff --git a/crates/khive-bm25/Cargo.toml b/crates/khive-bm25/Cargo.toml
new file mode 100644
index 00000000..698b485a
--- /dev/null
+++ b/crates/khive-bm25/Cargo.toml
@@ -0,0 +1,19 @@
+[package]
+name = "khive-bm25"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+repository.workspace = true
+homepage.workspace = true
+keywords.workspace = true
+categories.workspace = true
+description = "BM25 (Okapi BM25) keyword index with deterministic scoring — formally verified in Lean4"
+
+[dependencies]
+khive-score = { version = "0.2.0", path = "../khive-score" }
+khive-types = { version = "0.2.0", path = "../khive-types" }
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+parking_lot = { workspace = true }
diff --git a/crates/khive-bm25/src/config.rs b/crates/khive-bm25/src/config.rs
new file mode 100644
index 00000000..5620900d
--- /dev/null
+++ b/crates/khive-bm25/src/config.rs
@@ -0,0 +1,104 @@
+//! BM25 configuration types.
+//!
+//! See ADR-003 for recommended parameter values.
+
+use serde::{Deserialize, Serialize};
+
+/// BM25 configuration parameters.
+///
+/// Default values (k1=1.2, b=0.75) from ADR-003 work well for most use cases.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Bm25Config {
+    /// Term saturation parameter.
+    ///
+    /// Higher values = diminishing returns for repeated terms.
+    /// Range: typically 1.2-2.0
+    /// Default: 1.2
+    pub k1: f64,
+
+    /// Length normalization parameter.
+    ///
+    /// - 0 = no length normalization (favor longer docs)
+    /// - 1 = full length normalization (favor shorter docs)
+    ///
+    /// Range: 0.0-1.0, Default: 0.75
+    pub b: f64,
+
+    /// Maximum memory budget in bytes for the index.
+    /// If None, no memory limit is enforced (default).
+    /// If Some(limit), `index_document()` calls that would exceed the budget
+    /// are rejected with `RetrievalError::BudgetExceeded`. Re-indexing an
+    /// existing document bypasses the budget check.
+    #[serde(default)]
+    pub memory_budget: Option<usize>,
+}
+
+impl Default for Bm25Config {
+    fn default() -> Self {
+        Self {
+            k1: 1.2,
+            b: 0.75,
+            memory_budget: None,
+        }
+    }
+}
+
+impl Bm25Config {
+    /// Create a new BM25 configuration.
+    pub fn new(k1: f64, b: f64) -> Self {
+        Self {
+            k1,
+            b,
+            memory_budget: None,
+        }
+    }
+
+    /// Set memory budget in bytes.
+    ///
+    /// When set, `index_document()` calls that would cause the estimated
+    /// memory usage to exceed this limit are rejected with `BudgetExceeded`.
+    /// Re-indexing an existing document bypasses the budget check.
+    #[must_use]
+    pub fn with_memory_budget(mut self, budget: usize) -> Self {
+        self.memory_budget = Some(budget);
+        self
+    }
+
+    /// Validate configuration parameters.
+    pub fn validate(&self) -> Result<(), &'static str> {
+        if self.k1 < 0.0 {
+            return Err("k1 must be non-negative");
+        }
+        if !(0.0..=1.0).contains(&self.b) {
+            return Err("b must be in range [0.0, 1.0]");
+        }
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_default() {
+        let config = Bm25Config::default();
+        assert!((config.k1 - 1.2).abs() < f64::EPSILON);
+        assert!((config.b - 0.75).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_config_validation() {
+        assert!(Bm25Config::new(1.2, 0.75).validate().is_ok());
+        assert!(Bm25Config::new(-0.1, 0.75).validate().is_err());
+        assert!(Bm25Config::new(1.2, -0.1).validate().is_err());
+        assert!(Bm25Config::new(1.2, 1.5).validate().is_err());
+    }
+
+    #[test]
+    fn test_config_custom() {
+        let config = Bm25Config::new(2.0, 0.5);
+        assert!((config.k1 - 2.0).abs() < f64::EPSILON);
+        assert!((config.b - 0.5).abs() < f64::EPSILON);
+    }
+}
diff --git a/crates/khive-bm25/src/error.rs b/crates/khive-bm25/src/error.rs
new file mode 100644
index 00000000..4a787863
--- /dev/null
+++ b/crates/khive-bm25/src/error.rs
@@ -0,0 +1,54 @@
+//! Error types for the BM25 index.
+
+use thiserror::Error;
+
+/// Classification of errors by recoverability.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ErrorKind {
+    /// Retrying will not help (e.g., budget exceeded, invalid config).
+    Permanent,
+    /// Retrying after a delay may succeed.
+    Transient,
+}
+
+/// Errors produced by BM25 index operations.
+#[derive(Debug, Error)]
+pub enum RetrievalError {
+    /// The memory budget was exceeded when indexing a new document.
+    #[error(
+        "memory budget exceeded: current={current_usage}, item_size={item_size}, limit={limit}"
+    )]
+    BudgetExceeded {
+        current_usage: usize,
+        item_size: usize,
+        limit: usize,
+    },
+
+    /// Invalid BM25 configuration parameters.
+    #[error("configuration error: {0}")]
+    Configuration(String),
+}
+
+impl RetrievalError {
+    /// Construct a `BudgetExceeded` error.
+    pub fn budget_exceeded(current_usage: usize, item_size: usize, limit: usize) -> Self {
+        Self::BudgetExceeded {
+            current_usage,
+            item_size,
+            limit,
+        }
+    }
+
+    /// Return the [`ErrorKind`] for this error.
+    pub fn kind(&self) -> ErrorKind {
+        ErrorKind::Permanent
+    }
+
+    /// Whether retrying this operation might succeed.
+    pub fn is_retryable(&self) -> bool {
+        self.kind() == ErrorKind::Transient
+    }
+}
+
+/// Convenience `Result` alias for BM25 operations.
+pub type Result<T> = std::result::Result<T, RetrievalError>;
diff --git a/crates/khive-bm25/src/index/bench_wand.rs b/crates/khive-bm25/src/index/bench_wand.rs
new file mode 100644
index 00000000..31a5c840
--- /dev/null
+++ b/crates/khive-bm25/src/index/bench_wand.rs
@@ -0,0 +1,159 @@
+use std::hint::black_box;
+use std::time::Instant;
+
+use super::{Bm25Index, SearchContext};
+use crate::config::Bm25Config;
+
+#[derive(Clone)]
+struct XorShift64 {
+    state: u64,
+}
+
+impl XorShift64 {
+    fn new(seed: u64) -> Self {
+        Self { state: seed.max(1) }
+    }
+
+    fn next_u64(&mut self) -> u64 {
+        let mut x = self.state;
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        self.state = x;
+        x
+    }
+
+    fn next_f64(&mut self) -> f64 {
+        ((self.next_u64() >> 11) as f64) / ((1u64 << 53) as f64)
+    }
+
+    fn gen_range(&mut self, upper: usize) -> usize {
+        if upper <= 1 {
+            0
+        } else {
+            (self.next_u64() as usize) % upper
+        }
+    }
+}
+
+struct ZipfSampler {
+    cdf: Vec<f64>,
+}
+
+impl ZipfSampler {
+    fn new(vocab_size: usize, exponent: f64) -> Self {
+        let mut cumulative = Vec::with_capacity(vocab_size);
+        let mut running = 0.0;
+        for rank in 1..=vocab_size {
+            running += 1.0 / (rank as f64).powf(exponent);
+            cumulative.push(running);
+        }
+        for value in &mut cumulative {
+            *value /= running;
+        }
+        Self { cdf: cumulative }
+    }
+
+    fn sample(&self, rng: &mut XorShift64) -> usize {
+        let needle = rng.next_f64();
+        let idx = self.cdf.partition_point(|value| *value < needle);
+        idx.min(self.cdf.len().saturating_sub(1))
+    }
+}
+
+fn build_vocab(size: usize) -> Vec<String> {
+    (0..size).map(|idx| format!("tok_{idx:04}")).collect()
+}
+
+fn build_index(doc_count: usize, seed: u64) -> (Bm25Index, Vec<String>, ZipfSampler) {
+    let vocab = build_vocab(2_048);
+    let zipf = ZipfSampler::new(vocab.len(), 1.07);
+    let mut rng = XorShift64::new(seed);
+    let mut index = Bm25Index::new(Bm25Config::default());
+
+    for doc_idx in 0..doc_count {
+        let len = 24 + rng.gen_range(40);
+        let mut text = String::new();
+        for token_idx in 0..len {
+            if token_idx > 0 {
+                text.push(' ');
+            }
+            let token = &vocab[zipf.sample(&mut rng)];
+            text.push_str(token);
+        }
+        index
+            .index_document(format!("doc_{doc_idx}"), &text)
+            .expect("synthetic document should index");
+    }
+
+    (index, vocab, zipf)
+}
+
+fn build_queries(
+    vocab: &[String],
+    zipf: &ZipfSampler,
+    rng: &mut XorShift64,
+    count: usize,
+    terms_per_query: usize,
+) -> Vec<String> {
+    let mut queries = Vec::with_capacity(count);
+    for _ in 0..count {
+        let mut query = String::new();
+        for idx in 0..terms_per_query {
+            if idx > 0 {
+                query.push(' ');
+            }
+            query.push_str(&vocab[zipf.sample(rng)]);
+        }
+        queries.push(query);
+    }
+    queries
+}
+
+/// Benchmark: WAND vs brute-force on Zipf-distributed corpora.
+///
+/// Note: `search_with_context` routes to WAND only when total query postings
+/// exceed `SMALL_QUERY_POSTINGS_THRESHOLD` (256). For very rare terms or
+/// small corpora, the brute-force path may be taken instead, so speedup
+/// numbers should be interpreted accordingly.
+#[test]
+#[ignore = "benchmark; run with `cargo test bench_wand -- --ignored --nocapture`"]
+fn bench_bm25_wand_vs_bruteforce_zipf_matrix() {
+    let corpus_sizes = [10_000usize, 50_000, 100_000];
+    let query_lengths = [1usize, 2, 3];
+
+    for &doc_count in &corpus_sizes {
+        let (index, vocab, zipf) = build_index(doc_count, 0xFACE_FEED ^ doc_count as u64);
+
+        println!("\nCorpus: {doc_count} docs");
+        println!("query_terms | brute_force_ms | bmw_ms | speedup_x");
+        println!("------------|----------------|--------|----------");
+
+        for &terms_per_query in &query_lengths {
+            let mut rng = XorShift64::new(0xDEAD_BEEF ^ ((doc_count as u64) << terms_per_query));
+            let queries = build_queries(&vocab, &zipf, &mut rng, 64, terms_per_query);
+
+            let mut brute_ctx = SearchContext::with_capacity(512);
+            let brute_start = Instant::now();
+            for query in &queries {
+                black_box(index.search_brute_force(query, 10, &mut brute_ctx));
+            }
+            let brute_ms = brute_start.elapsed().as_secs_f64() * 1000.0;
+
+            let mut wand_ctx = SearchContext::with_capacity(512);
+            let wand_start = Instant::now();
+            for query in &queries {
+                black_box(index.search_with_context(query, 10, &mut wand_ctx));
+            }
+            let wand_ms = wand_start.elapsed().as_secs_f64() * 1000.0;
+
+            let speedup = if wand_ms > 0.0 {
+                brute_ms / wand_ms
+            } else {
+                f64::INFINITY
+            };
+
+            println!("{terms_per_query:>11} | {brute_ms:>14.3} | {wand_ms:>6.3} | {speedup:>8.2}");
+        }
+    }
+}
diff --git a/crates/khive-bm25/src/index/indexing.rs b/crates/khive-bm25/src/index/indexing.rs
new file mode 100644
index 00000000..9dc03245
--- /dev/null
+++ b/crates/khive-bm25/src/index/indexing.rs
@@ -0,0 +1,203 @@
+//! Document indexing operations for BM25 index.
+
+use std::collections::BTreeMap;
+
+use super::{Bm25Index, DocumentId};
+use crate::error::{Result, RetrievalError};
+use crate::metrics::{self, MetricEvent, MetricValue};
+
+impl Bm25Index {
+    /// Index a document.
+    ///
+    /// Tokenizes the text and adds it to the inverted index.
+    /// If the document already exists, it will be re-indexed (old version removed first,
+    /// budget check bypassed for re-indexing).
+    ///
+    /// # Arguments
+    ///
+    /// * `doc_id` - Unique document identifier (accepts `String`, `&str`, or
+    ///   [`DocumentId`] directly via [`Into<DocumentId>`]).
+    /// * `text` - Document text to index
+    ///
+    /// # Errors
+    ///
+    /// Returns `RetrievalError::BudgetExceeded` if a memory budget is configured
+    /// and the new document would cause the index to exceed it. Re-indexing an
+    /// existing document bypasses the budget check.
+    ///
+    /// Emits `bm25.index_document.duration_ms`, `bm25.index_document.count`,
+    /// and `bm25.index.size` metrics when a sink is attached.
+    pub fn index_document(&mut self, doc_id: impl Into<DocumentId>, text: &str) -> Result<()> {
+        let start = std::time::Instant::now();
+
+        let result = self.index_document_inner(doc_id, text);
+
+        // Emit metrics
+        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::BM25_INDEX_DURATION_MS,
+                value: MetricValue::Histogram(elapsed),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::BM25_INDEX_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::BM25_INDEX_SIZE,
+                value: MetricValue::Gauge(self.doc_count() as f64),
+                labels: vec![],
+            },
+        );
+
+        result
+    }
+
+    /// Inner `index_document` logic (uninstrumented).
+    fn index_document_inner(&mut self, doc_id: impl Into<DocumentId>, text: &str) -> Result<()> {
+        let doc_id: DocumentId = doc_id.into();
+        // Check if this is a re-index (bypass budget for existing docs)
+        let is_reindex = self.contains_document(&doc_id);
+
+        // Remove existing document if present
+        if is_reindex {
+            self.remove_document(&doc_id);
+        }
+
+        // Tokenize using instance tokenizer
+        let tokens = self.tokenizer.tokenize(text);
+        let doc_length = tokens.len();
+
+        if doc_length == 0 {
+            // Don't index empty documents
+            return Ok(());
+        }
+
+        // Budget check for new documents only (re-index bypasses)
+        if !is_reindex {
+            if let Some(limit) = self.config.memory_budget {
+                let current = self.memory_usage();
+                let cost = self.estimate_document_cost(text);
+                if current + cost > limit {
+                    return Err(RetrievalError::budget_exceeded(current, cost, limit));
+                }
+            }
+        }
+
+        // Get or assign internal u32 ID
+        let internal_id = self.get_or_assign_internal_id(&doc_id);
+
+        // Count term frequencies
+        let mut term_freqs: BTreeMap<String, u32> = BTreeMap::new();
+        for token in &tokens {
+            *term_freqs.entry(token.clone()).or_insert(0) += 1;
+        }
+
+        // Update inverted index with sorted insertion to maintain doc_id order.
+        // WAND requires posting lists sorted by doc_id for binary-search seeks.
+        for (term, freq) in &term_freqs {
+            let postings = self.inverted_index.entry(term.clone()).or_default();
+            let insert_at = postings.partition_point_by_doc_id(internal_id);
+            // Clamp to u8::MAX (255) for compact posting storage.
+            // BM25's TF saturation means tf>10 is already ~85% of max
+            // contribution at k1=1.2, so clamping at 255 has negligible
+            // scoring impact. For very long documents (>255 occurrences of
+            // a single term), the score will plateau slightly early.
+            postings.insert(insert_at, internal_id, (*freq).min(255) as u8);
+        }
+
+        // Populate forward index: doc -> list of its terms (for O(terms) removal).
+        self.forward_index
+            .insert(internal_id, term_freqs.keys().cloned().collect());
+
+        // Update document metadata
+        self.doc_lengths.insert(internal_id, doc_length);
+        self.set_doc_length_fast(internal_id, doc_length);
+        self.total_tokens += doc_length;
+
+        // IDF cache auto-invalidates on the next search when it detects
+        // that doc_count() has changed. No per-term eviction needed.
+
+        // Block-max metadata is epoch-invalidated (lazy rebuild on next WAND search).
+        self.invalidate_block_max_after_mutation();
+
+        Ok(())
+    }
+
+    /// Remove a document from the index.
+    ///
+    /// Returns true if the document was found and removed, false otherwise.
+    pub fn remove_document(&mut self, doc_id: &str) -> bool {
+        // Look up internal ID
+        let internal_id = match self.id_to_internal.get(doc_id).copied() {
+            Some(id) => id,
+            None => return false,
+        };
+
+        // Get and remove document length
+        let doc_length = match self.doc_lengths.remove(&internal_id) {
+            Some(len) => len,
+            None => return false,
+        };
+
+        // Clear the fast-path vec entries (both usize and f32 mirrors).
+        let idx = internal_id as usize;
+        if idx < self.doc_lengths_vec.len() {
+            self.doc_lengths_vec[idx] = 0;
+        }
+        if idx < self.doc_lengths_f32.len() {
+            self.doc_lengths_f32[idx] = 0.0;
+        }
+
+        // Update total tokens
+        self.total_tokens = self.total_tokens.saturating_sub(doc_length);
+
+        // Remove from posting lists using the forward index (O(terms_in_doc) not O(|V|)).
+        // Falls back to full scan when the forward index is absent (e.g. after deserialization).
+        if let Some(terms) = self.forward_index.remove(&internal_id) {
+            for term in &terms {
+                if let Some(postings) = self.inverted_index.get_mut(term) {
+                    let idx = postings.partition_point_by_doc_id(internal_id);
+                    if idx < postings.len() && postings.doc_ids[idx] == internal_id {
+                        postings.remove(idx);
+                    }
+                    if postings.is_empty() {
+                        self.inverted_index.remove(term);
+                    }
+                }
+            }
+        } else {
+            // Fallback: forward index not available (deserialized index).
+            // Scan all posting lists (original O(|V|) behavior).
+            for (_term, postings) in self.inverted_index.iter_mut() {
+                let idx = postings.partition_point_by_doc_id(internal_id);
+                if idx < postings.len() && postings.doc_ids[idx] == internal_id {
+                    postings.remove(idx);
+                }
+            }
+            self.inverted_index
+                .retain(|_, postings| !postings.is_empty());
+        }
+
+        // Remove from ID maps
+        self.id_to_internal.remove(doc_id);
+        // Note: don't remove from internal_to_id Vec (leaves hole, but u32 IDs are never reused)
+
+        // IDF cache auto-invalidates on the next search when it detects
+        // that doc_count() has changed. No per-term eviction needed.
+
+        // Block-max metadata is epoch-invalidated.
+        self.invalidate_block_max_after_mutation();
+
+        true
+    }
+}
diff --git a/crates/khive-bm25/src/index/memory.rs b/crates/khive-bm25/src/index/memory.rs
new file mode 100644
index 00000000..075a5bf2
--- /dev/null
+++ b/crates/khive-bm25/src/index/memory.rs
@@ -0,0 +1,170 @@
+//! Memory budget operations for BM25 index.
+
+use std::collections::BTreeMap;
+use std::mem::size_of;
+
+use super::{BlockMaxBlock, Bm25Index};
+
+/// Bytes per posting in the SoA layout: u32 doc_id (4) + u8 term_freq (1) = 5.
+/// No alignment padding waste (separate Vecs).
+const BYTES_PER_POSTING: usize = size_of::<u32>() + size_of::<u8>();
+
+impl Bm25Index {
+    /// Get the configured memory budget, if any.
+    pub fn memory_budget(&self) -> Option<usize> {
+        self.config.memory_budget
+    }
+
+    /// Set or clear the memory budget at runtime.
+    ///
+    /// Pass `Some(bytes)` to enforce a limit, or `None` to remove it.
+    pub fn set_memory_budget(&mut self, budget: Option<usize>) {
+        self.config.memory_budget = budget;
+    }
+
+    /// Estimate the current memory usage of the index in bytes.
+    ///
+    /// This is an approximation. It includes the inverted index, document
+    /// metadata, ID maps, and block-max metadata.
+    pub fn memory_usage(&self) -> usize {
+        let mut inverted_index_size: usize = 0;
+        let mut block_max_size: usize = 0;
+
+        for (term, postings) in &self.inverted_index {
+            // String key: heap overhead (24) + string data
+            inverted_index_size += 24 + term.len();
+            // SoA PostingList: two Vec overheads (24 each) +
+            // doc_ids (n * 4 bytes) + term_freqs (n * 1 byte) = 5 bytes/posting
+            inverted_index_size += 48 + postings.len() * BYTES_PER_POSTING;
+
+            // Block-max metadata sidecar: one Vec<BlockMaxBlock> per term
+            let block_size = self.block_size.max(1);
+            let block_count = postings.len().div_ceil(block_size);
+            block_max_size += 24 + block_count * size_of::<BlockMaxBlock>();
+            // HashMap entry overhead for per_term map
+            block_max_size += 32;
+        }
+
+        // doc_lengths: HashMap<u32, usize>
+        // Each entry: u32 key (4) + usize value (8) + HashMap bucket overhead (~32)
+        let doc_lengths_size = self.doc_lengths.len() * (4 + size_of::<usize>() + 32);
+
+        // ID mapping tables:
+        // id_to_internal: HashMap<DocumentId, u32> -- DocumentId(24 + data) + u32(4) + bucket(32)
+        let mut id_map_size: usize = 0;
+        for doc_id in self.id_to_internal.keys() {
+            id_map_size += 24 + doc_id.len() + 4 + 32;
+        }
+        // internal_to_id: Vec<Arc<str>> -- vec overhead (24) + each Arc<str> fat-ptr (16) + data
+        id_map_size += 24;
+        for doc_id in &self.internal_to_id {
+            // Arc<str> heap: 16-byte header + string data. Fat-ptr on stack is 16 bytes
+            // but we count heap cost; the refcount block is approximated as 16 bytes.
+            id_map_size += 16 + doc_id.len();
+        }
+
+        // IDF cache: not counted towards budget (it's a cache, can be cleared)
+
+        // Forward index: HashMap<u32, Vec<String>>
+        // Each entry: u32 key (4) + Vec overhead (24) + bucket (~32) + string data
+        let mut forward_index_size: usize = self.forward_index.len() * (4 + 24 + 32);
+        for terms in self.forward_index.values() {
+            for term in terms {
+                // Each String: 24 bytes overhead + string data
+                forward_index_size += 24 + term.len();
+            }
+        }
+
+        // doc_lengths_f32: Vec<f32> for SIMD batch scoring
+        let doc_lengths_f32_size = self.doc_lengths_f32.len() * size_of::<f32>() + 24;
+
+        // HashMap overhead for inverted_index itself
+        let index_map_overhead = self.inverted_index.len() * 64;
+
+        // Fixed overhead: config + tokenizer Arc + total_tokens + RwLocks + epoch + block_size
+        let fixed_overhead: usize = 192;
+
+        inverted_index_size
+            + block_max_size
+            + doc_lengths_size
+            + doc_lengths_f32_size
+            + forward_index_size
+            + id_map_size
+            + index_map_overhead
+            + fixed_overhead
+    }
+
+    /// Estimate the memory cost of indexing a new document.
+    pub fn estimate_document_cost(&self, text: &str) -> usize {
+        let tokens = self.tokenizer.tokenize(text);
+        if tokens.is_empty() {
+            return 0;
+        }
+
+        let mut unique_terms: BTreeMap<&str, u32> = BTreeMap::new();
+        for token in &tokens {
+            *unique_terms.entry(token.as_str()).or_insert(0) += 1;
+        }
+
+        // Cost per unique term in SoA layout: u32 (4) + u8 (1) = 5 bytes
+        let postings_cost: usize = unique_terms.len() * BYTES_PER_POSTING;
+
+        // New terms that don't exist yet get String key + PostingList overhead + block-max entry
+        let new_term_cost: usize = unique_terms
+            .keys()
+            .filter(|term| !self.inverted_index.contains_key(**term))
+            .map(|term| {
+                // String key (24 + len) + PostingList overhead (48 = 2 Vecs) + HashMap entry (64)
+                let postings_entry = 24 + term.len() + 48 + 64;
+                let block_entry = 24 + size_of::<BlockMaxBlock>() + 32;
+                postings_entry + block_entry
+            })
+            .sum();
+
+        // Existing terms may gain an additional block if the posting list crosses a block boundary
+        let additional_block_cost: usize = unique_terms
+            .keys()
+            .filter_map(|term| {
+                self.inverted_index
+                    .get(*term)
+                    .map(|postings| postings.len())
+            })
+            .map(|old_len| {
+                let block_size = self.block_size.max(1);
+                let before_blocks = old_len.div_ceil(block_size);
+                let after_blocks = (old_len + 1).div_ceil(block_size);
+                if after_blocks > before_blocks {
+                    size_of::<BlockMaxBlock>()
+                } else {
+                    0
+                }
+            })
+            .sum();
+
+        // doc_lengths entry: u32(4) + usize(8) + HashMap bucket(32) = 44
+        let doc_entry_cost: usize = 4 + size_of::<usize>() + 32;
+
+        // Forward index entry: u32 key (4) + Vec overhead (24) + bucket (32)
+        // + each term String (24 + len)
+        let forward_index_cost: usize = 4
+            + 24
+            + 32
+            + unique_terms
+                .keys()
+                .map(|term| 24 + term.len())
+                .sum::<usize>();
+
+        // ID mapping cost: DocumentId in both maps + u32 key
+        // Assume average doc_id is ~36 bytes (UUID string)
+        let avg_doc_id_len: usize = 36;
+        let id_map_cost: usize = (24 + avg_doc_id_len + 4 + 32) // id_to_internal entry
+            + (24 + avg_doc_id_len); // internal_to_id slot
+
+        postings_cost
+            + new_term_cost
+            + additional_block_cost
+            + doc_entry_cost
+            + forward_index_cost
+            + id_map_cost
+    }
+}
diff --git a/crates/khive-bm25/src/index/mod.rs b/crates/khive-bm25/src/index/mod.rs
new file mode 100644
index 00000000..3b0abbba
--- /dev/null
+++ b/crates/khive-bm25/src/index/mod.rs
@@ -0,0 +1,1040 @@
+//! BM25 inverted index implementation.
+//!
+//! # Formal Verification
+//!
+//! This implementation corresponds to the formal proofs in
+//! `proofs/Lion/Retrieval/BM25.lean`. Key theorems:
+//!
+//! - `idf_nonneg`: IDF(t) >= 0 for all terms (with +1 smoothing)
+//! - `idf_mono`: rarer terms have higher IDF (n1 < n2 -> IDF(n1) > IDF(n2))
+//! - `tf_nonneg`: TF component >= 0
+//! - `tf_bounded`: TF component < k1 + 1 (saturation bound at 2.2)
+//! - `tf_mono`: higher term freq -> higher (but saturating) score
+//! - `bm25_nonneg`: total BM25 score >= 0
+//! - `factor_at_avg`: L = 1 -> length factor = 1 (no adjustment at average)
+//! - `factor_long_doc`: L > 1 -> penalty for long documents
+//! - `factor_short_doc`: L < 1 -> boost for short documents
+//!
+//! # Floating-Point Considerations (RETRIEVAL-04)
+//!
+//! This implementation uses `f64` for internal BM25 score calculations due to:
+//! - Logarithmic operations in IDF computation (requires floating-point)
+//! - Intermediate calculations requiring full precision
+//! - Standard practice in IR systems (Lucene, Elasticsearch use f64)
+//!
+//! ## Cross-Platform Behavior
+//!
+//! While f64 follows IEEE 754 on all supported platforms, minor variance may occur:
+//! - FMA (fused multiply-add) availability differs across CPUs
+//! - Compiler optimizations may reorder floating-point operations
+//! - Extended precision (x87) on older x86 may affect intermediate results
+//!
+//! **Mitigation**: Scores are converted to [`DeterministicScore`] at the API boundary
+//! (in [`Bm25Index::search`]) which provides:
+//! - Canonical representation for storage and comparison
+//! - Consistent serialization across platforms
+//! - Protection against NaN propagation
+//!
+//! ## Golden Tests
+//!
+//! Golden tests in this module verify known expected values using a controlled corpus.
+//! These tests use fixed documents and queries with hand-calculated expected scores
+//! to catch any drift in scoring behavior across versions or platforms.
+//!
+//! See `tests::golden_tests` module for reference values.
+
+mod indexing;
+mod memory;
+mod search;
+
+#[cfg(test)]
+mod bench_wand;
+#[cfg(test)]
+mod tests_wand;
+
+pub use search::SearchContext;
+
+use serde::{Deserialize, Serialize};
+use std::collections::HashMap;
+use std::sync::atomic::{AtomicUsize, Ordering as AtomicOrdering};
+use std::sync::{Arc, RwLock};
+
+use super::config::Bm25Config;
+use super::tokenizer::{BoxedTokenizer, SimpleTokenizer};
+use crate::error::{Result, RetrievalError};
+use crate::metrics::MetricsSink;
+
+/// IDF cache keyed by document frequency (`df`) rather than term string.
+///
+/// IDF depends on two inputs: `df` (document frequency of a term) and `N`
+/// (total document count). Multiple terms sharing the same `df` produce
+/// identical IDF values, so keying by `df` (a `usize`) is both more compact
+/// and more correct than keying by term string.
+///
+/// When `N` changes (any add/remove), the entire cache is invalidated by
+/// comparing `cached_doc_count` against the current `doc_count()`. This
+/// eliminates the stale-IDF bug where targeted per-term eviction left
+/// entries computed with the old `N` in the cache.
+#[derive(Debug, Default)]
+pub(crate) struct IdfCache {
+    /// The `N` (total document count) for which cached values are valid.
+    cached_doc_count: AtomicUsize,
+    /// Map from document frequency -> precomputed IDF value.
+    by_df: RwLock<HashMap<usize, f64>>,
+}
+
+impl Clone for IdfCache {
+    fn clone(&self) -> Self {
+        let map_clone = self.by_df.read().map(|m| m.clone()).unwrap_or_default();
+        Self {
+            cached_doc_count: AtomicUsize::new(self.cached_doc_count.load(AtomicOrdering::Relaxed)),
+            by_df: RwLock::new(map_clone),
+        }
+    }
+}
+
+/// Default tokenizer for deserialization.
+fn default_tokenizer() -> BoxedTokenizer {
+    Arc::new(SimpleTokenizer::default())
+}
+
+/// Serde helpers for `Vec<Arc<str>>` ↔ `Vec<String>` (transparent wire format).
+mod arc_str_vec_serde {
+    use std::sync::Arc;
+
+    pub fn serialize<S>(v: &[Arc<str>], ser: S) -> Result<S::Ok, S::Error>
+    where
+        S: serde::Serializer,
+    {
+        use serde::ser::SerializeSeq;
+        let mut seq = ser.serialize_seq(Some(v.len()))?;
+        for s in v {
+            seq.serialize_element(s.as_ref())?;
+        }
+        seq.end()
+    }
+
+    pub fn deserialize<'de, D>(de: D) -> Result<Vec<Arc<str>>, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        use serde::Deserialize;
+        let strings: Vec<String> = Vec::deserialize(de)?;
+        Ok(strings.into_iter().map(|s| Arc::from(s.as_str())).collect())
+    }
+}
+
+pub(crate) const DEFAULT_BLOCK_SIZE: usize = 128;
+const INITIAL_POSTINGS_EPOCH: u64 = 0;
+const STALE_BLOCK_MAX_EPOCH: u64 = u64::MAX;
+
+fn default_block_size() -> usize {
+    DEFAULT_BLOCK_SIZE
+}
+
+fn default_postings_epoch() -> u64 {
+    INITIAL_POSTINGS_EPOCH
+}
+
+/// Typed document identifier for BM25 index operations.
+///
+/// Wire format: plain JSON string (serde transparent).
+///
+/// # ID Bridging (Hybrid Search)
+///
+/// When combining BM25 keyword results with HNSW vector results in hybrid
+/// search, the ID types differ: BM25 uses `DocumentId` (string-based) while
+/// HNSW uses `EmbeddingId` (128-bit UUID-based). Bridging strategies include:
+///
+/// 1. String-based fusion: convert both ID types to `String` before fusion.
+/// 2. DocumentId fusion: convert `EmbeddingId` to `DocumentId` via its
+///    display representation, then fuse using `DocumentId`.
+/// 3. Application-level mapping: maintain a lookup table mapping between
+///    `EmbeddingId` and `DocumentId` in the application layer.
+// TODO(port): was generated by `khive_types::transparent_string_newtype!` macro
+// which does not yet exist in khive-types. Expanded here manually until the macro
+// lands in khive-types and is re-adopted.
+#[derive(Debug, Clone, PartialEq, Eq, Hash, PartialOrd, Ord, serde::Serialize, serde::Deserialize)]
+#[serde(transparent)]
+pub struct DocumentId(String);
+
+impl DocumentId {
+    /// Create a new `DocumentId` from any `Into<String>`.
+    pub fn new(s: impl Into<String>) -> Self {
+        Self(s.into())
+    }
+
+    /// Return the inner string slice.
+    pub fn as_str(&self) -> &str {
+        &self.0
+    }
+
+    /// Consume `self` and return the inner `String`.
+    pub fn into_inner(self) -> String {
+        self.0
+    }
+
+    /// Return the length of the underlying string in bytes.
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    /// Return `true` if the underlying string is empty.
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+}
+
+impl std::fmt::Display for DocumentId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.write_str(&self.0)
+    }
+}
+
+impl std::ops::Deref for DocumentId {
+    type Target = str;
+
+    fn deref(&self) -> &str {
+        &self.0
+    }
+}
+
+impl std::borrow::Borrow<str> for DocumentId {
+    fn borrow(&self) -> &str {
+        &self.0
+    }
+}
+
+impl AsRef<str> for DocumentId {
+    fn as_ref(&self) -> &str {
+        &self.0
+    }
+}
+
+impl From<String> for DocumentId {
+    fn from(s: String) -> Self {
+        Self(s)
+    }
+}
+
+impl From<&str> for DocumentId {
+    fn from(s: &str) -> Self {
+        Self(s.to_owned())
+    }
+}
+
+impl PartialEq<str> for DocumentId {
+    fn eq(&self, other: &str) -> bool {
+        self.0 == other
+    }
+}
+
+impl PartialEq<&str> for DocumentId {
+    fn eq(&self, other: &&str) -> bool {
+        self.0 == *other
+    }
+}
+
+impl PartialEq<String> for DocumentId {
+    fn eq(&self, other: &String) -> bool {
+        &self.0 == other
+    }
+}
+
+/// Structure-of-Arrays posting list for memory-efficient storage.
+///
+/// Stores doc_ids (`Vec<u32>`) and term_freqs (`Vec<u8>`) in separate
+/// contiguous arrays, achieving exactly 5 bytes per posting with no
+/// alignment padding waste (vs 8 bytes for AoS `struct { u32, u8 }`).
+///
+/// At 200K postings this saves ~600 KB; at 1M postings, ~3 MB.
+///
+/// Both arrays are always the same length and sorted by doc_id.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub(crate) struct PostingList {
+    /// Document IDs, sorted ascending for binary-search seeks in WAND.
+    pub(crate) doc_ids: Vec<u32>,
+    /// Term frequencies, parallel to `doc_ids`. Clamped to u8::MAX (255).
+    pub(crate) term_freqs: Vec<u8>,
+}
+
+impl PostingList {
+    /// Number of postings in this list.
+    #[inline]
+    pub(crate) fn len(&self) -> usize {
+        self.doc_ids.len()
+    }
+
+    /// Whether the posting list is empty.
+    #[inline]
+    pub(crate) fn is_empty(&self) -> bool {
+        self.doc_ids.is_empty()
+    }
+
+    /// Insert a posting at the given position, maintaining sorted order.
+    #[inline]
+    pub(crate) fn insert(&mut self, index: usize, doc_id: u32, term_freq: u8) {
+        self.doc_ids.insert(index, doc_id);
+        self.term_freqs.insert(index, term_freq);
+    }
+
+    /// Remove the posting at the given position.
+    #[inline]
+    pub(crate) fn remove(&mut self, index: usize) {
+        self.doc_ids.remove(index);
+        self.term_freqs.remove(index);
+    }
+
+    /// Find the insertion point for a doc_id (binary search).
+    #[inline]
+    pub(crate) fn partition_point_by_doc_id(&self, target: u32) -> usize {
+        self.doc_ids.partition_point(|&id| id < target)
+    }
+
+    /// Memory usage in bytes (actual heap allocation, no padding waste).
+    #[inline]
+    #[allow(dead_code)] // TODO: wire into memory diagnostics / health-check endpoint
+    pub(crate) fn heap_bytes(&self) -> usize {
+        // Vec<u32> capacity * 4 + Vec<u8> capacity * 1
+        // Use len() as approximation (capacity >= len)
+        self.doc_ids.len() * 4 + self.term_freqs.len()
+    }
+}
+
+/// Per-block BM25 upper-bound metadata for a posting list.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub(crate) struct BlockMaxBlock {
+    /// Smallest document id in the block.
+    pub(crate) min_doc_id: u32,
+    /// Largest document id in the block.
+    pub(crate) max_doc_id: u32,
+    /// Maximum exact BM25 contribution of this term among postings in the block.
+    pub(crate) max_score_contribution: f64,
+    /// Suffix maximum of `max_score_contribution` from this block to the end.
+    pub(crate) suffix_max_score: f64,
+}
+
+/// Block-max metadata for a term posting list.
+#[derive(Debug, Clone, Default, Serialize, Deserialize)]
+pub(crate) struct TermBlockMaxMeta {
+    pub(crate) blocks: Vec<BlockMaxBlock>,
+}
+
+/// Lazily rebuilt block-max metadata cache.
+#[derive(Debug, Clone)]
+pub(crate) struct BlockMaxState {
+    pub(crate) built_epoch: u64,
+    pub(crate) per_term: HashMap<String, TermBlockMaxMeta>,
+}
+
+impl Default for BlockMaxState {
+    fn default() -> Self {
+        Self {
+            built_epoch: STALE_BLOCK_MAX_EPOCH,
+            per_term: HashMap::new(),
+        }
+    }
+}
+
+/// BM25 (Okapi BM25) keyword index.
+///
+/// An in-memory inverted index for keyword search with BM25 scoring.
+/// Supports incremental updates (add/remove documents) and efficient search.
+///
+/// # Tokenization
+///
+/// By default, uses [`SimpleTokenizer`] for whitespace-based tokenization.
+/// Custom tokenizers can be set via [`with_tokenizer`](Self::with_tokenizer).
+///
+/// # RETRIEVAL-08: Thread Safety and Mutability
+///
+/// ## Why `search()` Takes `&self` Not `&mut self`
+///
+/// Search is designed to be concurrent-safe without external locking. The
+/// only mutable state accessed during search is the IDF cache and block-max
+/// metadata, which use `RwLock` for interior mutability.
+///
+/// **Design alternatives considered**:
+///
+/// | Approach | Pros | Cons | Decision |
+/// |----------|------|------|----------|
+/// | `&mut self` for search | No interior mutability | Blocks concurrent reads | Rejected |
+/// | `RefCell` for cache | Simple, no sync | Not thread-safe | Rejected |
+/// | `RwLock` for cache | Thread-safe, concurrent | Overhead on cache access | **Chosen** |
+/// | No cache (recompute IDF) | Pure `&self` | ~10-20% slower search | Rejected |
+///
+/// **Rationale**: The IDF cache significantly improves search performance (avoids
+/// log computation per term per search). `RwLock` enables multiple concurrent
+/// searches while allowing cache population.
+///
+/// ## Concurrent Read Pattern
+///
+/// When wrapped in an external `RwLock<Bm25Index>`:
+/// - Multiple readers can call `search()` concurrently (requires only `&self`)
+/// - Writers still need exclusive access for `index_document()`, `remove_document()`, `clear()`
+///
+/// The internal `RwLock` on the IDF cache provides fine-grained locking for cache
+/// updates during search, avoiding exclusive locking on the entire index.
+///
+/// ## Cache Invalidation
+///
+/// The IDF cache auto-invalidates when the document count changes:
+/// - On `search()`, if `cached_doc_count != doc_count()`, the cache is
+///   cleared and rebuilt lazily.
+/// - `clear()` resets the cache and `cached_doc_count` to zero.
+///
+/// Because IDF depends on both `df` and `N`, keying by `df` alone is
+/// sufficient: when `N` changes, the entire cache is invalidated; when
+/// `N` is stable, terms with equal `df` share the same IDF value.
+///
+/// Block-max metadata is invalidated via an epoch counter: every mutation
+/// bumps `postings_epoch`, and the block-max state is lazily rebuilt on
+/// the next WAND search if the epochs disagree.
+#[derive(Serialize, Deserialize)]
+pub struct Bm25Index {
+    /// Term -> posting list (SoA layout: separate doc_id and term_freq arrays).
+    /// Posting lists are sorted by doc_id for binary-search seeks in WAND.
+    pub(crate) inverted_index: HashMap<String, PostingList>,
+
+    /// Document lengths (in tokens) keyed by internal u32 ID.
+    /// Kept for serialization compatibility and `doc_count()`.
+    pub(crate) doc_lengths: HashMap<u32, usize>,
+
+    /// Forward map: external DocumentId -> internal u32 ID.
+    pub(crate) id_to_internal: HashMap<DocumentId, u32>,
+
+    /// Reverse map: internal u32 ID -> shared string slice.
+    ///
+    /// Uses `Arc<str>` instead of `DocumentId` (which wraps `String`) so that
+    /// `resolve_internal_id` can hand out a clone in O(1) — an atomic refcount
+    /// increment — rather than a heap allocation + memcpy of the UUID string.
+    /// All search hot-path callers only need `AsRef<str>` / `Deref<Target=str>`,
+    /// which `Arc<str>` satisfies.  The serde wire format is identical to the
+    /// old `DocumentId` representation (both serialize as a bare JSON string).
+    #[serde(with = "arc_str_vec_serde")]
+    pub(crate) internal_to_id: Vec<Arc<str>>,
+
+    /// Next internal ID to assign.
+    pub(crate) next_internal_id: u32,
+
+    /// Total token count across all documents.
+    pub(crate) total_tokens: usize,
+
+    /// Monotonic counter incremented whenever postings or corpus statistics change.
+    /// Used to lazily invalidate block-max metadata.
+    #[serde(default = "default_postings_epoch")]
+    pub(crate) postings_epoch: u64,
+
+    /// Fixed posting-list block size used for block-max metadata.
+    #[serde(default = "default_block_size")]
+    pub(crate) block_size: usize,
+
+    /// Lazily rebuilt block-max metadata.
+    #[serde(skip, default)]
+    pub(crate) block_max_state: RwLock<BlockMaxState>,
+
+    /// IDF cache keyed by document frequency (`df`), auto-invalidated when
+    /// `doc_count()` changes. See [`IdfCache`] for design rationale.
+    #[serde(skip, default)]
+    pub(crate) idf_cache: IdfCache,
+
+    /// Vec-indexed document lengths for O(1) hot-path access during scoring.
+    /// Indexed by internal u32 doc_id. Rebuilt from `doc_lengths` on
+    /// deserialization. This avoids HashMap lookups in the tight scoring loop.
+    #[serde(skip, default)]
+    pub(crate) doc_lengths_vec: Vec<usize>,
+
+    /// Pre-converted f32 document lengths for SIMD batch scoring.
+    /// Maintained in parallel with `doc_lengths_vec`. Avoids per-scoring
+    /// `usize -> f32` conversion in the tight NEON batch loop.
+    #[serde(skip, default)]
+    pub(crate) doc_lengths_f32: Vec<f32>,
+
+    /// Configuration parameters.
+    pub(crate) config: Bm25Config,
+
+    /// Tokenizer for text processing.
+    /// Defaults to SimpleTokenizer. Skip serialization as tokenizers may not be serializable.
+    #[serde(skip, default = "default_tokenizer")]
+    pub(crate) tokenizer: BoxedTokenizer,
+
+    /// Forward index: internal doc_id -> list of terms in that document.
+    /// Enables O(terms_in_doc) removal instead of O(|vocabulary|).
+    #[serde(skip, default)]
+    pub(crate) forward_index: HashMap<u32, Vec<String>>,
+
+    /// Optional metrics sink for observability.
+    #[serde(skip)]
+    pub(crate) metrics: Option<Arc<dyn MetricsSink>>,
+}
+
+impl std::fmt::Debug for Bm25Index {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("Bm25Index")
+            .field("doc_count", &self.doc_lengths.len())
+            .field("unique_terms", &self.inverted_index.len())
+            .field("total_tokens", &self.total_tokens)
+            .field("block_size", &self.block_size)
+            .field("config", &self.config)
+            .finish()
+    }
+}
+
+impl Clone for Bm25Index {
+    fn clone(&self) -> Self {
+        let block_max_clone = self
+            .block_max_state
+            .read()
+            .map(|state| state.clone())
+            .unwrap_or_default();
+
+        Self {
+            inverted_index: self.inverted_index.clone(),
+            doc_lengths: self.doc_lengths.clone(),
+            id_to_internal: self.id_to_internal.clone(),
+            // Arc<str> clone = atomic refcount bump, not a String heap copy.
+            internal_to_id: self.internal_to_id.clone(),
+            next_internal_id: self.next_internal_id,
+            total_tokens: self.total_tokens,
+            postings_epoch: self.postings_epoch,
+            block_size: self.block_size,
+            block_max_state: RwLock::new(block_max_clone),
+            idf_cache: self.idf_cache.clone(),
+            doc_lengths_vec: self.doc_lengths_vec.clone(),
+            doc_lengths_f32: self.doc_lengths_f32.clone(),
+            forward_index: self.forward_index.clone(),
+            config: self.config.clone(),
+            tokenizer: self.tokenizer.clone(),
+            metrics: self.metrics.clone(),
+        }
+    }
+}
+
+impl Default for Bm25Index {
+    fn default() -> Self {
+        Self::new(Bm25Config::default())
+    }
+}
+
+impl Bm25Index {
+    /// Create a new empty BM25 index with the given configuration.
+    ///
+    /// # Panics
+    ///
+    /// Panics if config validation fails (k1 < 0 or b outside [0, 1]).
+    /// Use [`Bm25Index::try_new`] to handle invalid config as an error.
+    pub fn new(config: Bm25Config) -> Self {
+        if let Err(e) = config.validate() {
+            panic!("invalid BM25 config: {e}");
+        }
+        Self {
+            inverted_index: HashMap::new(),
+            doc_lengths: HashMap::new(),
+            id_to_internal: HashMap::new(),
+            internal_to_id: Vec::new(),
+            next_internal_id: 0,
+            total_tokens: 0,
+            postings_epoch: INITIAL_POSTINGS_EPOCH,
+            block_size: DEFAULT_BLOCK_SIZE,
+            block_max_state: RwLock::new(BlockMaxState::default()),
+            idf_cache: IdfCache::default(),
+            doc_lengths_vec: Vec::new(),
+            doc_lengths_f32: Vec::new(),
+            forward_index: HashMap::new(),
+            config,
+            tokenizer: Arc::new(SimpleTokenizer::default()),
+            metrics: None,
+        }
+    }
+
+    /// Non-panicking constructor.  Returns `Err(RetrievalError::Configuration(…))`
+    /// if the config is invalid instead of panicking.
+    pub fn try_new(config: Bm25Config) -> Result<Self> {
+        config
+            .validate()
+            .map_err(|e| RetrievalError::Configuration(format!("invalid BM25 config: {e}")))?;
+        Ok(Self::new(config))
+    }
+
+    /// Create a new BM25 index with a custom tokenizer.
+    pub fn with_tokenizer(config: Bm25Config, tokenizer: BoxedTokenizer) -> Self {
+        Self {
+            inverted_index: HashMap::new(),
+            doc_lengths: HashMap::new(),
+            id_to_internal: HashMap::new(),
+            internal_to_id: Vec::new(),
+            next_internal_id: 0,
+            total_tokens: 0,
+            postings_epoch: INITIAL_POSTINGS_EPOCH,
+            block_size: DEFAULT_BLOCK_SIZE,
+            block_max_state: RwLock::new(BlockMaxState::default()),
+            idf_cache: IdfCache::default(),
+            doc_lengths_vec: Vec::new(),
+            doc_lengths_f32: Vec::new(),
+            forward_index: HashMap::new(),
+            config,
+            tokenizer,
+            metrics: None,
+        }
+    }
+
+    /// Set the tokenizer.
+    ///
+    /// Note: This does not re-tokenize existing documents.
+    /// Clear and re-index if you need consistent tokenization.
+    pub fn set_tokenizer(&mut self, tokenizer: BoxedTokenizer) {
+        self.tokenizer = tokenizer;
+    }
+
+    /// Get a reference to the current tokenizer.
+    pub fn tokenizer(&self) -> &BoxedTokenizer {
+        &self.tokenizer
+    }
+
+    /// Attach a metrics sink (builder pattern).
+    ///
+    /// The sink receives [`MetricEvent`]s from `search` and `index_document`
+    /// operations. Pass an `Arc<dyn MetricsSink>` to share a single sink
+    /// across multiple indices.
+    #[must_use]
+    pub fn with_metrics(mut self, sink: Arc<dyn MetricsSink>) -> Self {
+        self.metrics = Some(sink);
+        self
+    }
+
+    /// Set or replace the metrics sink at runtime.
+    ///
+    /// Pass `Some(sink)` to enable metrics, or `None` to disable.
+    pub fn set_metrics(&mut self, sink: Option<Arc<dyn MetricsSink>>) {
+        self.metrics = sink;
+    }
+
+    /// Get the number of indexed documents.
+    pub fn doc_count(&self) -> usize {
+        self.doc_lengths.len()
+    }
+
+    /// Get the average document length (in tokens).
+    ///
+    /// Returns 0.0 if no documents are indexed.
+    pub fn avg_doc_length(&self) -> f64 {
+        let count = self.doc_count();
+        if count == 0 {
+            0.0
+        } else {
+            self.total_tokens as f64 / count as f64
+        }
+    }
+
+    /// Check if a document is indexed.
+    pub fn contains_document(&self, doc_id: &str) -> bool {
+        self.id_to_internal.contains_key(doc_id)
+    }
+
+    /// Get or assign an internal u32 ID for a `DocumentId`.
+    fn get_or_assign_internal_id(&mut self, doc_id: &DocumentId) -> u32 {
+        if let Some(&id) = self.id_to_internal.get(doc_id) {
+            return id;
+        }
+        let id = self.next_internal_id;
+        self.next_internal_id = self.next_internal_id.wrapping_add(1);
+        self.id_to_internal.insert(doc_id.clone(), id);
+        if id as usize >= self.internal_to_id.len() {
+            // Placeholder: Arc<str> from empty &str.
+            self.internal_to_id.resize(id as usize + 1, Arc::from(""));
+        }
+        // Store as Arc<str> — avoids cloning the full String on every
+        // search hit; lookup just does an atomic refcount bump.
+        self.internal_to_id[id as usize] = Arc::from(doc_id.as_str());
+        id
+    }
+
+    /// Resolve an internal u32 ID back to an `Arc<str>`.
+    ///
+    /// Returns a cheaply cloneable shared reference.  Callers in the search
+    /// hot path can `Arc::clone` this without any heap allocation.
+    #[inline]
+    fn resolve_internal_id(&self, internal_id: u32) -> Option<Arc<str>> {
+        self.internal_to_id
+            .get(internal_id as usize)
+            .map(Arc::clone)
+    }
+
+    /// Get the configuration.
+    pub fn config(&self) -> &Bm25Config {
+        &self.config
+    }
+
+    /// Clear the index, removing all documents.
+    pub fn clear(&mut self) {
+        self.inverted_index.clear();
+        self.doc_lengths.clear();
+        self.doc_lengths_vec.clear();
+        self.doc_lengths_f32.clear();
+        self.forward_index.clear();
+        self.id_to_internal.clear();
+        self.internal_to_id.clear();
+        self.next_internal_id = 0;
+        self.total_tokens = 0;
+        self.postings_epoch = INITIAL_POSTINGS_EPOCH;
+        self.idf_cache
+            .cached_doc_count
+            .store(0, AtomicOrdering::Relaxed);
+        if let Ok(mut cache) = self.idf_cache.by_df.write() {
+            cache.clear();
+        }
+        if let Ok(mut block_state) = self.block_max_state.write() {
+            block_state.built_epoch = STALE_BLOCK_MAX_EPOCH;
+            block_state.per_term.clear();
+        }
+    }
+
+    /// Update the O(1) doc_lengths_vec for a given internal id.
+    /// Called on every document insert.
+    #[inline]
+    pub(crate) fn set_doc_length_fast(&mut self, internal_id: u32, length: usize) {
+        let idx = internal_id as usize;
+        if idx >= self.doc_lengths_vec.len() {
+            self.doc_lengths_vec.resize(idx + 1, 0);
+        }
+        self.doc_lengths_vec[idx] = length;
+        // Keep f32 mirror in sync for SIMD batch scoring.
+        if idx >= self.doc_lengths_f32.len() {
+            self.doc_lengths_f32.resize(idx + 1, 0.0);
+        }
+        self.doc_lengths_f32[idx] = length as f32;
+    }
+
+    /// Look up document length by internal id using the fast Vec path.
+    /// Falls back to HashMap if Vec is not yet populated (deserialization).
+    #[inline]
+    pub(crate) fn doc_length_fast(&self, internal_id: u32) -> usize {
+        let idx = internal_id as usize;
+        if idx < self.doc_lengths_vec.len() {
+            self.doc_lengths_vec[idx]
+        } else {
+            self.doc_lengths.get(&internal_id).copied().unwrap_or(0)
+        }
+    }
+
+    /// Rebuild `doc_lengths_vec` and `doc_lengths_f32` from `doc_lengths` HashMap.
+    /// Called after deserialization to populate the fast-path Vecs (see `persist::bm25`).
+    pub fn ensure_doc_lengths_vec(&mut self) {
+        if !self.doc_lengths_vec.is_empty() || self.doc_lengths.is_empty() {
+            return;
+        }
+        let max_id = self.doc_lengths.keys().copied().max().unwrap_or(0) as usize;
+        self.doc_lengths_vec.resize(max_id + 1, 0);
+        self.doc_lengths_f32.resize(max_id + 1, 0.0);
+        for (&id, &len) in &self.doc_lengths {
+            self.doc_lengths_vec[id as usize] = len;
+            self.doc_lengths_f32[id as usize] = len as f32;
+        }
+    }
+
+    /// Get statistics about the index.
+    pub fn stats(&self) -> Bm25Stats {
+        Bm25Stats {
+            doc_count: self.doc_count(),
+            total_tokens: self.total_tokens,
+            avg_doc_length: self.avg_doc_length(),
+            unique_terms: self.inverted_index.len(),
+        }
+    }
+
+    /// Check if the IDF cache is empty.
+    #[cfg(test)]
+    pub(crate) fn is_idf_cache_empty(&self) -> bool {
+        self.idf_cache
+            .by_df
+            .read()
+            .map(|cache| cache.is_empty())
+            .unwrap_or(true)
+    }
+
+    /// Invalidate block-max metadata after a corpus mutation.
+    ///
+    /// Bumps the postings epoch so that the next WAND search lazily rebuilds
+    /// block-max metadata. The IDF cache self-invalidates on the next search
+    /// when it detects that `doc_count()` has changed.
+    #[inline]
+    pub(crate) fn invalidate_block_max_after_mutation(&mut self) {
+        self.postings_epoch = self.postings_epoch.wrapping_add(1);
+        if let Ok(mut block_state) = self.block_max_state.write() {
+            block_state.built_epoch = STALE_BLOCK_MAX_EPOCH;
+            block_state.per_term.clear();
+        }
+    }
+
+    /// Lazily rebuild block-max metadata if the current epoch is stale.
+    pub(crate) fn ensure_block_max_metadata(&self) {
+        let target_epoch = self.postings_epoch;
+
+        if let Ok(block_state) = self.block_max_state.read() {
+            if block_state.built_epoch == target_epoch {
+                return;
+            }
+        }
+
+        let doc_count = self.doc_count();
+        if doc_count == 0 {
+            if let Ok(mut block_state) = self.block_max_state.write() {
+                block_state.built_epoch = target_epoch;
+                block_state.per_term.clear();
+            }
+            return;
+        }
+
+        let avgdl = self.avg_doc_length();
+        let k1 = self.config.k1;
+        let b = self.config.b;
+
+        if let Ok(mut block_state) = self.block_max_state.write() {
+            // Double-check under write lock (another thread may have rebuilt).
+            if block_state.built_epoch == target_epoch {
+                return;
+            }
+
+            let mut per_term = HashMap::with_capacity(self.inverted_index.len());
+            for (term, postings) in &self.inverted_index {
+                let term_meta = build_term_block_max_meta(
+                    postings,
+                    &self.doc_lengths,
+                    self.block_size,
+                    idf_from_doc_freq(postings.len(), doc_count),
+                    avgdl,
+                    k1,
+                    b,
+                );
+                per_term.insert(term.clone(), term_meta);
+            }
+
+            block_state.per_term = per_term;
+            block_state.built_epoch = target_epoch;
+        }
+    }
+}
+
+/// Compute IDF from document frequency using the Robertson-Walker variant.
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.idf_nonneg`
+/// With +1 inside ln(), IDF(t) >= 0 for all terms regardless of document frequency.
+#[inline]
+pub(crate) fn idf_from_doc_freq(doc_freq: usize, doc_count: usize) -> f64 {
+    let n = doc_count as f64;
+    let df = doc_freq as f64;
+    ((n - df + 0.5) / (df + 0.5) + 1.0).ln()
+}
+
+/// Compute a single-term BM25 contribution for a posting.
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.tf_bounded`
+/// TF saturation: tf * (k1 + 1) / (tf + k1 * ...) < k1 + 1 for all tf >= 0.
+#[inline]
+pub(crate) fn bm25_term_score(
+    idf: f64,
+    term_freq: u8,
+    doc_length: usize,
+    avgdl: f64,
+    k1: f64,
+    b: f64,
+) -> f64 {
+    if avgdl <= f64::EPSILON {
+        return 0.0;
+    }
+
+    let tf = term_freq as f64;
+    let numerator = tf * (k1 + 1.0);
+    let denominator = tf + k1 * (1.0 - b + b * (doc_length as f64 / avgdl));
+    idf * (numerator / denominator)
+}
+
+/// Pre-computed BM25 scoring constants for a single term.
+///
+/// Eliminates redundant arithmetic in the tight per-posting scoring loop.
+/// The BM25 formula per posting is:
+///   score = idf * (tf * (k1+1)) / (tf + k1 * (1 - b + b * dl/avgdl))
+///
+/// Pre-computing `k1_plus_1`, `k1_times_one_minus_b`, and `k1_times_b_over_avgdl`
+/// reduces the per-posting work to: 1 multiply, 1 FMA, 1 add, 1 divide, 1 multiply.
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct Bm25TermScorer {
+    pub(crate) idf: f64,
+    /// k1 + 1.0
+    k1_plus_1: f64,
+    /// k1 * (1.0 - b)
+    k1_times_one_minus_b: f64,
+    /// k1 * b / avgdl
+    k1_times_b_over_avgdl: f64,
+}
+
+impl Bm25TermScorer {
+    #[inline]
+    pub(crate) fn new(idf: f64, k1: f64, b: f64, avgdl: f64) -> Self {
+        let inv_avgdl = if avgdl > f64::EPSILON {
+            1.0 / avgdl
+        } else {
+            0.0
+        };
+        Self {
+            idf,
+            k1_plus_1: k1 + 1.0,
+            k1_times_one_minus_b: k1 * (1.0 - b),
+            k1_times_b_over_avgdl: k1 * b * inv_avgdl,
+        }
+    }
+
+    /// IDF value for this term.
+    #[inline]
+    pub(crate) fn idf_f32(&self) -> f32 {
+        self.idf as f32
+    }
+
+    /// Pre-computed k1 + 1.
+    #[inline]
+    pub(crate) fn k1_plus_1_f32(&self) -> f32 {
+        self.k1_plus_1 as f32
+    }
+
+    /// Pre-computed k1 * (1 - b), the constant portion of the denominator.
+    #[inline]
+    pub(crate) fn denom_base_f32(&self) -> f32 {
+        self.k1_times_one_minus_b as f32
+    }
+
+    /// Pre-computed k1 * b / avgdl, the per-doc-length factor in the denominator.
+    #[inline]
+    pub(crate) fn denom_dl_factor_f32(&self) -> f32 {
+        self.k1_times_b_over_avgdl as f32
+    }
+
+    /// Score a posting with pre-computed constants.
+    #[inline]
+    pub(crate) fn score(&self, term_freq: u8, doc_length: usize) -> f64 {
+        let tf = term_freq as f64;
+        let numerator = tf * self.k1_plus_1;
+        let denominator =
+            tf + self.k1_times_one_minus_b + self.k1_times_b_over_avgdl * (doc_length as f64);
+        self.idf * (numerator / denominator)
+    }
+}
+
+fn build_term_block_max_meta(
+    postings: &PostingList,
+    doc_lengths: &HashMap<u32, usize>,
+    block_size: usize,
+    idf: f64,
+    avgdl: f64,
+    k1: f64,
+    b: f64,
+) -> TermBlockMaxMeta {
+    if postings.is_empty() {
+        return TermBlockMaxMeta::default();
+    }
+
+    let n = postings.len();
+    let num_blocks = n.div_ceil(block_size);
+    let mut blocks = Vec::with_capacity(num_blocks);
+
+    for block_idx in 0..num_blocks {
+        let start = block_idx * block_size;
+        let end = (start + block_size).min(n);
+
+        let min_doc_id = postings.doc_ids[start];
+        let max_doc_id = postings.doc_ids[end - 1];
+
+        let mut max_score_contribution = 0.0;
+        for i in start..end {
+            let doc_id = postings.doc_ids[i];
+            let term_freq = postings.term_freqs[i];
+            let doc_length = doc_lengths.get(&doc_id).copied().unwrap_or(0);
+            let score = bm25_term_score(idf, term_freq, doc_length, avgdl, k1, b);
+            if score > max_score_contribution {
+                max_score_contribution = score;
+            }
+        }
+
+        blocks.push(BlockMaxBlock {
+            min_doc_id,
+            max_doc_id,
+            max_score_contribution,
+            suffix_max_score: max_score_contribution,
+        });
+    }
+
+    // Compute suffix-max scores (back to front).
+    let mut suffix_max = 0.0;
+    for block in blocks.iter_mut().rev() {
+        if block.max_score_contribution > suffix_max {
+            suffix_max = block.max_score_contribution;
+        }
+        block.suffix_max_score = suffix_max;
+    }
+
+    TermBlockMaxMeta { blocks }
+}
+
+/// Statistics about a BM25 index.
+#[derive(Debug, Clone, Default)]
+pub struct Bm25Stats {
+    /// Number of indexed documents.
+    pub doc_count: usize,
+    /// Total token count across all documents.
+    pub total_tokens: usize,
+    /// Average document length (in tokens).
+    pub avg_doc_length: f64,
+    /// Number of unique terms in the index.
+    pub unique_terms: usize,
+}
+
+// ── Wire-format fixture: DocumentId ──────────────────────────────────────────
+//
+// These tests lock the JSON wire representation of `DocumentId` after its
+// migration to `transparent_string_newtype!`. Updating this fixture = a
+// wire-format migration that requires a PR-level migration plan.
+#[cfg(test)]
+mod document_id_wire_format {
+    use super::DocumentId;
+
+    /// Frozen wire-format fixture: DocumentId must serialize as a bare JSON string.
+    ///
+    /// Wire format: `"some-document-identifier"` (not `{"0":"..."}` or any other shape).
+    /// This is enforced by `#[serde(transparent)]` in the macro expansion.
+    #[test]
+    fn document_id_serializes_as_plain_string() {
+        let id = DocumentId::new("some-document-identifier");
+        let json = serde_json::to_string(&id).expect("DocumentId serialize");
+        assert_eq!(
+            json, r#""some-document-identifier""#,
+            "wire format drift detected in DocumentId — must be plain JSON string",
+        );
+    }
+
+    #[test]
+    fn document_id_roundtrip() {
+        let id = DocumentId::new("doc_abc_123");
+        let json = serde_json::to_string(&id).expect("serialize");
+        let back: DocumentId = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back, id, "serde roundtrip must produce identical value");
+    }
+
+    #[test]
+    fn document_id_empty_string_roundtrip() {
+        let id = DocumentId::new("");
+        let json = serde_json::to_string(&id).expect("serialize");
+        assert_eq!(
+            json, r#""""#,
+            "empty DocumentId must serialize as empty JSON string"
+        );
+        let back: DocumentId = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back, id);
+    }
+
+    #[test]
+    fn document_id_unicode_roundtrip() {
+        let id = DocumentId::new("doc_\u{4e2d}\u{6587}");
+        let json = serde_json::to_string(&id).expect("serialize");
+        let back: DocumentId = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back, id);
+    }
+}
diff --git a/crates/khive-bm25/src/index/search.rs b/crates/khive-bm25/src/index/search.rs
new file mode 100644
index 00000000..510b0c29
--- /dev/null
+++ b/crates/khive-bm25/src/index/search.rs
@@ -0,0 +1,1623 @@
+//! Search operations for BM25 index.
+//!
+//! # SIMD Acceleration
+//!
+//! The brute-force scoring path uses architecture-specific SIMD to process
+//! postings in parallel:
+//!
+//! - **aarch64 (NEON)**: 4-wide batches using 128-bit NEON registers.
+//! - **x86_64 (AVX2)**: 8-wide batches using 256-bit YMM registers, with
+//!   optional FMA for fused multiply-add in the denominator computation.
+//!   Detected at runtime via `is_x86_feature_detected!`.
+//! - **Scalar fallback**: Used on all other targets or when AVX2 is not
+//!   available at runtime.
+//!
+//! The dispatch happens once per term (not per batch) to avoid repeated
+//! feature checks in the hot loop.
+
+use std::cmp::{Ordering, Reverse};
+use std::collections::BinaryHeap;
+use std::sync::Arc;
+
+use khive_score::DeterministicScore;
+
+use super::{BlockMaxBlock, Bm25Index, Bm25TermScorer, PostingList};
+use crate::metrics::{self, MetricEvent, MetricValue};
+
+/// Postings threshold below which the brute-force SIMD scorer is used instead of
+/// Block-Max WAND. The brute-force path processes postings sequentially in
+/// NEON/scalar batches of 4 with zero cursor/heap overhead, which is faster than
+/// WAND for moderate posting counts. WAND's block-skip pruning only wins when the
+/// total postings are large enough that it can skip significant portions.
+///
+/// Empirically tuned: at ~10K-16K total postings the brute-force SIMD path
+/// matches or beats WAND on aarch64 (Apple M-series). Above 16K the WAND
+/// block-skip savings overcome its per-cursor overhead.
+const SMALL_QUERY_POSTINGS_THRESHOLD: usize = 16_384;
+const TERMINATED_DOC: u32 = u32::MAX;
+
+// ---------------------------------------------------------------------------
+// SIMD batch BM25 scoring (4-wide)
+// ---------------------------------------------------------------------------
+
+/// Batch-score 4 postings using ARM NEON SIMD intrinsics.
+///
+/// Computes the BM25 formula for 4 documents simultaneously:
+/// ```text
+/// score[i] = idf * (tf[i] * k1_plus_1) / (tf[i] + denom_base + denom_dl_factor * doc_len[i])
+/// ```
+///
+/// Term frequencies are provided as `u8` (clamped at indexing time) and
+/// widened to f32 for SIMD arithmetic. All scoring arithmetic is done in
+/// f32 for SIMD throughput. The caller is responsible for converting the
+/// results back to f64 for accumulation.
+///
+/// # Safety
+///
+/// Uses `std::arch::aarch64` NEON intrinsics which require the target to
+/// be an AArch64 CPU. This function is gated by `#[cfg(target_arch = "aarch64")]`
+/// and is only called on ARM64 hardware.
+#[cfg(target_arch = "aarch64")]
+#[inline]
+// SAFETY: Callers only reach this helper on aarch64, and the fixed-size array
+// parameters guarantee the four term-frequency and document-length lanes exist.
+unsafe fn score_batch_neon(
+    term_freqs: &[u8; 4],
+    doc_lengths: &[f32; 4],
+    idf: f32,
+    k1_plus_1: f32,
+    denom_base: f32,
+    denom_dl_factor: f32,
+) -> [f32; 4] {
+    use std::arch::aarch64::*;
+
+    // Widen u8 term frequencies to u32, then convert to f32.
+    let tfs_u32: [u32; 4] = [
+        term_freqs[0] as u32,
+        term_freqs[1] as u32,
+        term_freqs[2] as u32,
+        term_freqs[3] as u32,
+    ];
+    let tf = vcvtq_f32_u32(vld1q_u32(tfs_u32.as_ptr()));
+    // Load 4 pre-converted f32 document lengths.
+    let dl = vld1q_f32(doc_lengths.as_ptr());
+
+    let k1p1 = vdupq_n_f32(k1_plus_1);
+    let base = vdupq_n_f32(denom_base);
+    let dl_fac = vdupq_n_f32(denom_dl_factor);
+    let idf_v = vdupq_n_f32(idf);
+
+    // numerator = tf * k1_plus_1
+    let num = vmulq_f32(tf, k1p1);
+    // denominator = tf + denom_base + denom_dl_factor * doc_len
+    let denom = vaddq_f32(tf, vaddq_f32(base, vmulq_f32(dl_fac, dl)));
+    // score = idf * num / denom
+    let score = vmulq_f32(idf_v, vdivq_f32(num, denom));
+
+    let mut result = [0.0f32; 4];
+    vst1q_f32(result.as_mut_ptr(), score);
+    result
+}
+
+/// Scalar fallback for batch scoring (4-wide).
+///
+/// Computes the same BM25 formula as `score_batch_neon` but using plain
+/// scalar f32 arithmetic. Used when no SIMD path is available.
+#[cfg(not(target_arch = "aarch64"))]
+#[inline]
+fn score_batch_scalar_4(
+    term_freqs: &[u8; 4],
+    doc_lengths: &[f32; 4],
+    idf: f32,
+    k1_plus_1: f32,
+    denom_base: f32,
+    denom_dl_factor: f32,
+) -> [f32; 4] {
+    let mut result = [0.0f32; 4];
+    for i in 0..4 {
+        let tf = term_freqs[i] as f32;
+        let num = tf * k1_plus_1;
+        let denom = tf + denom_base + denom_dl_factor * doc_lengths[i];
+        result[i] = idf * (num / denom);
+    }
+    result
+}
+
+/// Scalar fallback for batch scoring (8-wide).
+///
+/// Computes BM25 scores for 8 postings using plain scalar f32 arithmetic.
+/// Used on x86_64 when AVX2 is not available at runtime.
+#[cfg(not(target_arch = "aarch64"))]
+#[inline]
+fn score_batch_scalar_8(
+    term_freqs: &[u8; 8],
+    doc_lengths: &[f32; 8],
+    idf: f32,
+    k1_plus_1: f32,
+    denom_base: f32,
+    denom_dl_factor: f32,
+) -> [f32; 8] {
+    let mut result = [0.0f32; 8];
+    for i in 0..8 {
+        let tf = term_freqs[i] as f32;
+        let num = tf * k1_plus_1;
+        let denom = tf + denom_base + denom_dl_factor * doc_lengths[i];
+        result[i] = idf * (num / denom);
+    }
+    result
+}
+
+// ---------------------------------------------------------------------------
+// AVX2 batch BM25 scoring (8-wide, x86_64 only)
+// ---------------------------------------------------------------------------
+
+/// Batch-score 8 postings using AVX2 SIMD intrinsics (256-bit, 8 x f32).
+///
+/// Computes the BM25 formula for 8 documents simultaneously:
+/// ```text
+/// score[i] = idf * (tf[i] * k1_plus_1) / (tf[i] + denom_base + denom_dl_factor * doc_len[i])
+/// ```
+///
+/// The u8 term frequencies are widened to i32 via `_mm256_cvtepu8_epi32`
+/// (requires only the low 64 bits of a 128-bit register), then converted
+/// to f32 via `_mm256_cvtepi32_ps`.
+///
+/// Uses full-precision `_mm256_div_ps` for the division. While approximate
+/// reciprocal (`_mm256_rcp_ps` + Newton-Raphson) would save ~5 cycles, the
+/// division is not the bottleneck here -- memory access to doc_lengths is.
+/// Full precision keeps scoring deterministic with the scalar path.
+///
+/// # Safety
+///
+/// Requires the `avx2` target feature. The caller must verify AVX2 support
+/// at runtime via `is_x86_feature_detected!("avx2")` before calling.
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "avx2")]
+#[inline]
+// SAFETY: Callers must select this helper only after AVX2 runtime detection.
+// Fixed-size array parameters guarantee the eight lanes read by the intrinsics.
+unsafe fn score_batch_avx2(
+    term_freqs: &[u8; 8],
+    doc_lengths: &[f32; 8],
+    idf: f32,
+    k1_plus_1: f32,
+    denom_base: f32,
+    denom_dl_factor: f32,
+) -> [f32; 8] {
+    use std::arch::x86_64::*;
+
+    // Load 8 u8 term frequencies from a 64-bit chunk into the low half of
+    // a 128-bit register, then widen u8 -> i32 (AVX2) and convert i32 -> f32.
+    let tfs_raw = _mm_loadl_epi64(term_freqs.as_ptr() as *const __m128i);
+    let tfs_i32 = _mm256_cvtepu8_epi32(tfs_raw);
+    let tf = _mm256_cvtepi32_ps(tfs_i32);
+
+    // Load 8 contiguous f32 doc lengths.
+    let dl = _mm256_loadu_ps(doc_lengths.as_ptr());
+
+    // Broadcast scalar constants to all 8 lanes.
+    let k1p1 = _mm256_set1_ps(k1_plus_1);
+    let base = _mm256_set1_ps(denom_base);
+    let dl_fac = _mm256_set1_ps(denom_dl_factor);
+    let idf_v = _mm256_set1_ps(idf);
+
+    // numerator = tf * k1_plus_1
+    let num = _mm256_mul_ps(tf, k1p1);
+
+    // denominator = tf + denom_base + denom_dl_factor * doc_len
+    //             = tf + (denom_base + denom_dl_factor * doc_len)
+    let dl_term = _mm256_mul_ps(dl_fac, dl);
+    let base_plus_dl = _mm256_add_ps(base, dl_term);
+    let denom = _mm256_add_ps(tf, base_plus_dl);
+
+    // score = idf * (num / denom)
+    let ratio = _mm256_div_ps(num, denom);
+    let score = _mm256_mul_ps(idf_v, ratio);
+
+    let mut result = [0.0f32; 8];
+    _mm256_storeu_ps(result.as_mut_ptr(), score);
+    result
+}
+
+/// AVX2 + FMA variant: uses fused multiply-add for the denominator.
+///
+/// `denom = tf + fma(denom_dl_factor, doc_len, denom_base)`
+///
+/// FMA provides a single-rounding result (vs two roundings for mul+add),
+/// which may produce slightly different scores from the non-FMA path
+/// (within f32 ULP). The performance difference is marginal since div_ps
+/// dominates, but FMA is free when available and reduces instruction count.
+///
+/// # Safety
+///
+/// Requires both `avx2` and `fma` target features.
+#[cfg(target_arch = "x86_64")]
+#[target_feature(enable = "avx2", enable = "fma")]
+#[inline]
+// SAFETY: Callers must select this helper only after AVX2+FMA runtime detection.
+// Fixed-size array parameters guarantee the eight lanes read by the intrinsics.
+unsafe fn score_batch_avx2_fma(
+    term_freqs: &[u8; 8],
+    doc_lengths: &[f32; 8],
+    idf: f32,
+    k1_plus_1: f32,
+    denom_base: f32,
+    denom_dl_factor: f32,
+) -> [f32; 8] {
+    use std::arch::x86_64::*;
+
+    let tfs_raw = _mm_loadl_epi64(term_freqs.as_ptr() as *const __m128i);
+    let tfs_i32 = _mm256_cvtepu8_epi32(tfs_raw);
+    let tf = _mm256_cvtepi32_ps(tfs_i32);
+
+    let dl = _mm256_loadu_ps(doc_lengths.as_ptr());
+
+    let k1p1 = _mm256_set1_ps(k1_plus_1);
+    let base = _mm256_set1_ps(denom_base);
+    let dl_fac = _mm256_set1_ps(denom_dl_factor);
+    let idf_v = _mm256_set1_ps(idf);
+
+    let num = _mm256_mul_ps(tf, k1p1);
+
+    // FMA: denom_dl_factor * doc_len + denom_base (single rounding)
+    let base_plus_dl = _mm256_fmadd_ps(dl_fac, dl, base);
+    let denom = _mm256_add_ps(tf, base_plus_dl);
+
+    let ratio = _mm256_div_ps(num, denom);
+    let score = _mm256_mul_ps(idf_v, ratio);
+
+    let mut result = [0.0f32; 8];
+    _mm256_storeu_ps(result.as_mut_ptr(), score);
+    result
+}
+
+/// Function pointer type for 8-wide batch scoring on x86_64.
+///
+/// Resolved once per term based on runtime CPU feature detection,
+/// avoiding repeated `is_x86_feature_detected!` checks in the hot loop.
+#[cfg(target_arch = "x86_64")]
+// SAFETY: Values of this type are only produced by `select_score_batch_8`,
+// which pairs each unsafe target-feature function with matching CPU detection.
+type ScoreBatch8Fn = unsafe fn(&[u8; 8], &[f32; 8], f32, f32, f32, f32) -> [f32; 8];
+
+/// Select the best 8-wide scoring function for the current CPU.
+///
+/// Priority: AVX2+FMA > AVX2 > scalar fallback.
+/// Called once per term, the returned function pointer is used for all
+/// batches within that term's posting list.
+#[cfg(target_arch = "x86_64")]
+#[inline]
+fn select_score_batch_8() -> ScoreBatch8Fn {
+    if is_x86_feature_detected!("avx2") && is_x86_feature_detected!("fma") {
+        score_batch_avx2_fma
+    } else if is_x86_feature_detected!("avx2") {
+        score_batch_avx2
+    } else {
+        // Scalar fallback when no AVX2.
+        |tfs, dls, idf, k1p1, base, dl_fac| score_batch_scalar_8(tfs, dls, idf, k1p1, base, dl_fac)
+    }
+}
+
+/// Dispatch batch scoring to the appropriate 4-wide implementation.
+///
+/// On aarch64 uses NEON SIMD; on other architectures uses scalar f32.
+#[inline]
+fn score_batch_4(
+    term_freqs: &[u8; 4],
+    doc_lengths: &[f32; 4],
+    idf: f32,
+    k1_plus_1: f32,
+    denom_base: f32,
+    denom_dl_factor: f32,
+) -> [f32; 4] {
+    #[cfg(target_arch = "aarch64")]
+    {
+        // SAFETY: We are on aarch64 (checked by cfg). NEON is baseline on all
+        // AArch64 CPUs (ARMv8-A mandates Advanced SIMD). The input slices are
+        // [T; 4] arrays so alignment and length are guaranteed.
+        unsafe {
+            score_batch_neon(
+                term_freqs,
+                doc_lengths,
+                idf,
+                k1_plus_1,
+                denom_base,
+                denom_dl_factor,
+            )
+        }
+    }
+    #[cfg(not(target_arch = "aarch64"))]
+    {
+        score_batch_scalar_4(
+            term_freqs,
+            doc_lengths,
+            idf,
+            k1_plus_1,
+            denom_base,
+            denom_dl_factor,
+        )
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct HeapEntry {
+    doc_id: u32,
+    score: f64,
+}
+
+impl PartialEq for HeapEntry {
+    fn eq(&self, other: &Self) -> bool {
+        self.doc_id == other.doc_id && self.score.to_bits() == other.score.to_bits()
+    }
+}
+
+impl Eq for HeapEntry {}
+
+impl PartialOrd for HeapEntry {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for HeapEntry {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.score
+            .total_cmp(&other.score)
+            .then_with(|| other.doc_id.cmp(&self.doc_id))
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+struct ShallowBlockInfo {
+    max_score: f64,
+    last_doc: u32,
+}
+
+/// Reusable per-query scratch space for [`Bm25Index::search_with_context`].
+///
+/// Every call to [`Bm25Index::search`] allocates a fresh result buffer and
+/// heap. Reusing one context across calls avoids that churn.
+///
+/// The context is automatically cleared at the start of each search call, so
+/// there is no need to call [`clear`](Self::clear) manually between queries.
+///
+/// # Example
+///
+/// ```rust
+/// use khive_bm25::{Bm25Config, Bm25Index, SearchContext};
+///
+/// let mut index = Bm25Index::new(Bm25Config::default());
+/// index.index_document("d1", "quick brown fox").unwrap();
+/// index.index_document("d2", "lazy brown dog").unwrap();
+///
+/// let mut ctx = SearchContext::new();
+/// for query in &["quick fox", "brown dog"] {
+///     let results = index.search_with_context(query, 10, &mut ctx);
+///     // ctx is cleared internally and reused on the next call
+/// }
+/// ```
+pub struct SearchContext {
+    /// Vec-indexed score accumulator for brute-force path.
+    /// Indexed by internal doc_id. Avoids HashMap overhead for dense ID spaces.
+    /// Lazily sized on first use per query.
+    score_vec: Vec<f64>,
+    /// Tracks which doc_ids have non-zero scores in score_vec
+    /// so we can drain results without scanning the entire Vec.
+    touched_docs: Vec<u32>,
+    /// Scratch buffer for sorting results before top-k truncation.
+    results_buf: Vec<(u32, f64)>,
+    /// Internal top-k min-heap for BMW execution.
+    heap: BinaryHeap<Reverse<HeapEntry>>,
+}
+
+impl SearchContext {
+    /// Create a new, empty search context.
+    pub fn new() -> Self {
+        Self {
+            score_vec: Vec::new(),
+            touched_docs: Vec::new(),
+            results_buf: Vec::new(),
+            heap: BinaryHeap::new(),
+        }
+    }
+
+    /// Create a search context pre-allocated for an expected number of matches.
+    pub fn with_capacity(estimated_matches: usize) -> Self {
+        Self {
+            score_vec: Vec::new(),
+            touched_docs: Vec::with_capacity(estimated_matches),
+            results_buf: Vec::with_capacity(estimated_matches),
+            heap: BinaryHeap::with_capacity(estimated_matches.min(64)),
+        }
+    }
+
+    /// Clear all per-query state without releasing heap memory.
+    ///
+    /// Called automatically at the start of each
+    /// [`Bm25Index::search_with_context`] invocation. You only need to call
+    /// this yourself if you want to shrink the context between unrelated
+    /// batches of queries.
+    pub fn clear(&mut self) {
+        // Reset touched entries in score_vec without zeroing the whole vec.
+        for &doc_id in &self.touched_docs {
+            if (doc_id as usize) < self.score_vec.len() {
+                self.score_vec[doc_id as usize] = 0.0;
+            }
+        }
+        self.touched_docs.clear();
+        self.results_buf.clear();
+        self.heap.clear();
+    }
+}
+
+impl Default for SearchContext {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+struct TermCursor<'a> {
+    postings: &'a PostingList,
+    blocks: &'a [BlockMaxBlock],
+    pos: usize,
+    block_size: usize,
+    scorer: Bm25TermScorer,
+}
+
+impl<'a> TermCursor<'a> {
+    #[inline]
+    fn new(
+        postings: &'a PostingList,
+        blocks: &'a [BlockMaxBlock],
+        block_size: usize,
+        scorer: Bm25TermScorer,
+    ) -> Self {
+        Self {
+            postings,
+            blocks,
+            pos: 0,
+            block_size,
+            scorer,
+        }
+    }
+
+    #[inline]
+    fn is_terminated(&self) -> bool {
+        self.pos >= self.postings.len()
+    }
+
+    #[inline]
+    fn doc(&self) -> u32 {
+        if self.pos < self.postings.doc_ids.len() {
+            self.postings.doc_ids[self.pos]
+        } else {
+            TERMINATED_DOC
+        }
+    }
+
+    #[inline]
+    fn current_doc_id(&self) -> u32 {
+        self.postings.doc_ids[self.pos]
+    }
+
+    #[inline]
+    fn current_term_freq(&self) -> u8 {
+        self.postings.term_freqs[self.pos]
+    }
+
+    #[inline]
+    fn current_block_idx(&self) -> Option<usize> {
+        if self.is_terminated() {
+            None
+        } else {
+            Some(self.pos / self.block_size)
+        }
+    }
+
+    #[inline]
+    fn remaining_max_score(&self) -> f64 {
+        self.current_block_idx()
+            .and_then(|idx| self.blocks.get(idx))
+            .map(|block| block.suffix_max_score)
+            .unwrap_or(0.0)
+    }
+
+    #[inline]
+    fn advance(&mut self) -> u32 {
+        if !self.is_terminated() {
+            self.pos += 1;
+        }
+        self.doc()
+    }
+
+    #[inline]
+    fn seek(&mut self, target_doc: u32) -> u32 {
+        if self.is_terminated() {
+            return TERMINATED_DOC;
+        }
+        if self.doc() >= target_doc {
+            return self.doc();
+        }
+
+        let rel = self.postings.doc_ids[self.pos..].partition_point(|&id| id < target_doc);
+        self.pos += rel;
+        self.doc()
+    }
+
+    #[inline]
+    fn shallow_block_info(&self, target_doc: u32) -> Option<ShallowBlockInfo> {
+        let current_block_idx = self.current_block_idx()?;
+        let rel =
+            self.blocks[current_block_idx..].partition_point(|block| block.max_doc_id < target_doc);
+        let block = self.blocks.get(current_block_idx + rel)?;
+        Some(ShallowBlockInfo {
+            max_score: block.max_score_contribution,
+            last_doc: block.max_doc_id,
+        })
+    }
+
+    #[inline]
+    fn score_current(&self, index: &Bm25Index) -> f64 {
+        if self.is_terminated() {
+            return 0.0;
+        }
+        let doc_id = self.current_doc_id();
+        let term_freq = self.current_term_freq();
+        let doc_length = index.doc_length_fast(doc_id);
+        self.scorer.score(term_freq, doc_length)
+    }
+}
+
+impl Bm25Index {
+    /// Search for documents matching the query.
+    ///
+    /// Returns up to `k` documents sorted by BM25 score descending, with
+    /// deterministic internal-doc-id tie-breaking identical to the brute-force
+    /// scorer.
+    ///
+    /// For small queries (total postings < 256), falls back to exhaustive
+    /// brute-force scoring. For larger queries, uses the Block-Max WAND
+    /// algorithm for threshold-based pruning.
+    ///
+    /// # Floating-Point Boundary Conversion
+    ///
+    /// BM25 scoring uses `f64` internally for precision in logarithmic calculations.
+    /// At the API boundary (this method), scores are converted to [`DeterministicScore`]
+    /// which ensures:
+    /// - Canonical representation for cross-platform consistency
+    /// - Safe serialization without precision loss
+    /// - Protection against NaN/Inf propagation
+    ///
+    /// See module-level documentation for cross-platform considerations.
+    ///
+    /// # Concurrency
+    ///
+    /// This method takes `&self` (not `&mut self`) to enable concurrent reads.
+    /// The internal IDF cache and block-max metadata use interior mutability
+    /// (`RwLock`) for thread-safe updates.
+    ///
+    /// Emits `bm25.search.duration_ms`, `bm25.search.count`, and
+    /// `bm25.search.results` metrics when a sink is attached.
+    ///
+    /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.bm25_nonneg`
+    /// Total BM25 score >= 0 for any query and document, since it is a sum of
+    /// non-negative IDF values multiplied by non-negative TF components.
+    /// Returns up to `k` (id, score) pairs sorted by BM25 score descending.
+    ///
+    /// The `Arc<str>` document IDs are cheaply cloneable shared references into
+    /// the internal reverse-map, avoiding a heap allocation per result.  Callers
+    /// that need a `DocumentId` can construct one via `DocumentId::new(&*arc)`.
+    pub fn search(&self, query_text: &str, k: usize) -> Vec<(Arc<str>, DeterministicScore)> {
+        let mut ctx = SearchContext::new();
+        self.search_with_context(query_text, k, &mut ctx)
+    }
+
+    /// Search for documents matching the query, reusing a [`SearchContext`].
+    ///
+    /// Behaves identically to [`search`](Self::search) but reuses the heap
+    /// memory inside `ctx` across calls, eliminating allocation churn per query.
+    ///
+    /// The context is automatically [`clear`](SearchContext::clear)ed at the
+    /// start of each call, so callers do not need to reset it manually.
+    pub fn search_with_context(
+        &self,
+        query_text: &str,
+        k: usize,
+        ctx: &mut SearchContext,
+    ) -> Vec<(Arc<str>, DeterministicScore)> {
+        let start = std::time::Instant::now();
+
+        let results = self.search_inner(query_text, k, ctx);
+
+        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::BM25_SEARCH_DURATION_MS,
+                value: MetricValue::Histogram(elapsed),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::BM25_SEARCH_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::BM25_SEARCH_RESULTS,
+                value: MetricValue::Gauge(results.len() as f64),
+                labels: vec![],
+            },
+        );
+
+        results
+    }
+
+    /// Inner search logic (uninstrumented).
+    ///
+    /// Routes to brute-force for small queries and to Block-Max WAND for
+    /// larger ones.
+    fn search_inner(
+        &self,
+        query_text: &str,
+        k: usize,
+        ctx: &mut SearchContext,
+    ) -> Vec<(Arc<str>, DeterministicScore)> {
+        if k == 0 {
+            ctx.clear();
+            return Vec::new();
+        }
+
+        let query_tokens = self.tokenizer.tokenize(query_text);
+        if query_tokens.is_empty() {
+            ctx.clear();
+            return Vec::new();
+        }
+
+        if self.doc_count() == 0 {
+            ctx.clear();
+            return Vec::new();
+        }
+
+        let total_query_postings: usize = query_tokens
+            .iter()
+            .map(|term| {
+                self.inverted_index
+                    .get(term)
+                    .map(|postings| postings.len())
+                    .unwrap_or(0)
+            })
+            .sum();
+
+        if total_query_postings < SMALL_QUERY_POSTINGS_THRESHOLD {
+            return self.search_brute_force(query_text, k, ctx);
+        }
+
+        self.ensure_block_max_metadata();
+        let block_state_guard = match self.block_max_state.read() {
+            Ok(guard) if guard.built_epoch == self.postings_epoch => guard,
+            _ => return self.search_brute_force(query_text, k, ctx),
+        };
+
+        ctx.clear();
+
+        let doc_count = self.doc_count();
+        let avgdl = self.avg_doc_length();
+        let mut cursors = Vec::with_capacity(query_tokens.len());
+
+        let k1 = self.config.k1;
+        let b = self.config.b;
+
+        for term in &query_tokens {
+            let postings = match self.inverted_index.get(term) {
+                Some(postings) if !postings.is_empty() => postings,
+                _ => continue,
+            };
+            let blocks = match block_state_guard.per_term.get(term) {
+                Some(meta) if !meta.blocks.is_empty() => meta.blocks.as_slice(),
+                _ => continue,
+            };
+            let idf = self.compute_idf(term, doc_count);
+            let scorer = Bm25TermScorer::new(idf, k1, b, avgdl);
+            cursors.push(TermCursor::new(postings, blocks, self.block_size, scorer));
+        }
+
+        if cursors.is_empty() {
+            return Vec::new();
+        }
+
+        sort_and_prune_terminated(&mut cursors);
+
+        while let Some((before_pivot_len, pivot_len, pivot_doc)) =
+            find_pivot_doc(&cursors, current_threshold_score(&ctx.heap, k))
+        {
+            let threshold_score = current_threshold_score(&ctx.heap, k);
+            let block_upper_bound: f64 = cursors[..pivot_len]
+                .iter()
+                .map(|cursor| {
+                    cursor
+                        .shallow_block_info(pivot_doc)
+                        .map(|info| info.max_score)
+                        .unwrap_or(0.0)
+                })
+                .sum();
+
+            // Keep equality as competitive to preserve exact tie handling.
+            if block_upper_bound < threshold_score {
+                advance_one_cursor_past_block(&mut cursors, pivot_len, pivot_doc);
+                if cursors.is_empty() {
+                    break;
+                }
+                continue;
+            }
+
+            if !align_cursors(&mut cursors, pivot_doc, before_pivot_len) {
+                if cursors.is_empty() {
+                    break;
+                }
+                continue;
+            }
+
+            let score: f64 = cursors[..pivot_len]
+                .iter()
+                .map(|cursor| cursor.score_current(self))
+                .sum();
+
+            maybe_push_top_k(
+                &mut ctx.heap,
+                k,
+                HeapEntry {
+                    doc_id: pivot_doc,
+                    score,
+                },
+            );
+
+            advance_all_cursors_on_pivot(&mut cursors, pivot_len);
+            if cursors.is_empty() {
+                break;
+            }
+        }
+
+        heap_to_results(self, ctx)
+    }
+
+    /// Exact exhaustive scorer retained as fallback and for equivalence tests.
+    ///
+    /// The inner loop is SIMD-batched (4-wide NEON on aarch64, scalar f32
+    /// on other targets). Pre-converted f32 document lengths avoid per-scoring
+    /// integer-to-float conversion.
+    ///
+    /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.tf_bounded`
+    /// TF saturation: tf * (k1 + 1) / (tf + k1 * ...) < k1 + 1 for all tf >= 0.
+    pub(crate) fn search_brute_force(
+        &self,
+        query_text: &str,
+        k: usize,
+        ctx: &mut SearchContext,
+    ) -> Vec<(Arc<str>, DeterministicScore)> {
+        ctx.clear();
+
+        if k == 0 {
+            return Vec::new();
+        }
+
+        let query_tokens = self.tokenizer.tokenize(query_text);
+        if query_tokens.is_empty() {
+            return Vec::new();
+        }
+
+        let doc_count = self.doc_count();
+        if doc_count == 0 {
+            return Vec::new();
+        }
+
+        // Pre-size the score accumulator to the maximum internal doc_id.
+        // This eliminates bounds-check branches in the tight SIMD loop.
+        let max_id = self.next_internal_id as usize;
+        if ctx.score_vec.len() < max_id {
+            ctx.score_vec.resize(max_id, 0.0);
+        }
+
+        let avgdl = self.avg_doc_length();
+        let k1 = self.config.k1;
+        let b = self.config.b;
+
+        // Cache the doc_lengths_f32 slice pointer outside the term loop.
+        // All internal doc_ids are < max_id which is <= doc_lengths_f32.len()
+        // (maintained by set_doc_length_fast on every insert).
+        let dl_f32 = &self.doc_lengths_f32;
+        let scores_vec = &mut ctx.score_vec;
+        let touched = &mut ctx.touched_docs;
+
+        for term in &query_tokens {
+            let postings = match self.inverted_index.get(term) {
+                Some(postings) => postings,
+                None => continue,
+            };
+            let idf = self.compute_idf(term, doc_count);
+            let scorer = Bm25TermScorer::new(idf, k1, b, avgdl);
+
+            // Extract f32 SIMD parameters from the pre-computed scorer.
+            let simd_idf = scorer.idf_f32();
+            let simd_k1p1 = scorer.k1_plus_1_f32();
+            let simd_base = scorer.denom_base_f32();
+            let simd_dl_fac = scorer.denom_dl_factor_f32();
+
+            // SoA layout: doc_ids and term_freqs are separate contiguous arrays.
+            let n = postings.len();
+            let doc_ids = &postings.doc_ids;
+            let tfs_arr = &postings.term_freqs;
+
+            // On x86_64, resolve the best 8-wide scoring function once per term
+            // (AVX2+FMA > AVX2 > scalar) and process in chunks of 8. On aarch64,
+            // process in chunks of 4 using NEON.
+            #[cfg(target_arch = "x86_64")]
+            {
+                let score_fn = select_score_batch_8();
+                let full_chunks_8 = n / 8;
+
+                for chunk_idx in 0..full_chunks_8 {
+                    let base_idx = chunk_idx * 8;
+                    let tfs: [u8; 8] = [
+                        tfs_arr[base_idx],
+                        tfs_arr[base_idx + 1],
+                        tfs_arr[base_idx + 2],
+                        tfs_arr[base_idx + 3],
+                        tfs_arr[base_idx + 4],
+                        tfs_arr[base_idx + 5],
+                        tfs_arr[base_idx + 6],
+                        tfs_arr[base_idx + 7],
+                    ];
+                    let d0 = doc_ids[base_idx] as usize;
+                    let d1 = doc_ids[base_idx + 1] as usize;
+                    let d2 = doc_ids[base_idx + 2] as usize;
+                    let d3 = doc_ids[base_idx + 3] as usize;
+                    let d4 = doc_ids[base_idx + 4] as usize;
+                    let d5 = doc_ids[base_idx + 5] as usize;
+                    let d6 = doc_ids[base_idx + 6] as usize;
+                    let d7 = doc_ids[base_idx + 7] as usize;
+                    let lens = [
+                        dl_f32[d0], dl_f32[d1], dl_f32[d2], dl_f32[d3], dl_f32[d4], dl_f32[d5],
+                        dl_f32[d6], dl_f32[d7],
+                    ];
+                    // SAFETY: score_fn is selected based on runtime CPU feature
+                    // detection; each variant's target_feature attribute matches
+                    // what was detected.
+                    let batch_scores = unsafe {
+                        score_fn(&tfs, &lens, simd_idf, simd_k1p1, simd_base, simd_dl_fac)
+                    };
+
+                    // Accumulate all 8 scores.
+                    macro_rules! accum {
+                        ($idx:expr, $d:expr) => {
+                            if scores_vec[$d] == 0.0 {
+                                touched.push(doc_ids[base_idx + $idx]);
+                            }
+                            scores_vec[$d] += batch_scores[$idx] as f64;
+                        };
+                    }
+                    accum!(0, d0);
+                    accum!(1, d1);
+                    accum!(2, d2);
+                    accum!(3, d3);
+                    accum!(4, d4);
+                    accum!(5, d5);
+                    accum!(6, d6);
+                    accum!(7, d7);
+                }
+
+                // Process remaining 4-7 postings in a 4-wide batch.
+                let remainder_start = full_chunks_8 * 8;
+                let remaining = n - remainder_start;
+                if remaining >= 4 {
+                    let tfs = [
+                        tfs_arr[remainder_start],
+                        tfs_arr[remainder_start + 1],
+                        tfs_arr[remainder_start + 2],
+                        tfs_arr[remainder_start + 3],
+                    ];
+                    let d0 = doc_ids[remainder_start] as usize;
+                    let d1 = doc_ids[remainder_start + 1] as usize;
+                    let d2 = doc_ids[remainder_start + 2] as usize;
+                    let d3 = doc_ids[remainder_start + 3] as usize;
+                    let lens = [dl_f32[d0], dl_f32[d1], dl_f32[d2], dl_f32[d3]];
+                    let batch_scores =
+                        score_batch_4(&tfs, &lens, simd_idf, simd_k1p1, simd_base, simd_dl_fac);
+                    if scores_vec[d0] == 0.0 {
+                        touched.push(doc_ids[remainder_start]);
+                    }
+                    scores_vec[d0] += batch_scores[0] as f64;
+                    if scores_vec[d1] == 0.0 {
+                        touched.push(doc_ids[remainder_start + 1]);
+                    }
+                    scores_vec[d1] += batch_scores[1] as f64;
+                    if scores_vec[d2] == 0.0 {
+                        touched.push(doc_ids[remainder_start + 2]);
+                    }
+                    scores_vec[d2] += batch_scores[2] as f64;
+                    if scores_vec[d3] == 0.0 {
+                        touched.push(doc_ids[remainder_start + 3]);
+                    }
+                    scores_vec[d3] += batch_scores[3] as f64;
+                }
+                let scalar_start = remainder_start + if remaining >= 4 { 4 } else { 0 };
+
+                // Scalar tail for remaining 0-3 postings.
+                for i in scalar_start..n {
+                    let doc_id = doc_ids[i];
+                    let d = doc_id as usize;
+                    let doc_length = self.doc_length_fast(doc_id);
+                    let term_score = scorer.score(tfs_arr[i], doc_length);
+                    if scores_vec[d] == 0.0 {
+                        touched.push(doc_id);
+                    }
+                    scores_vec[d] += term_score;
+                }
+            }
+
+            // aarch64 path: 4-wide NEON batching (unchanged from original).
+            #[cfg(target_arch = "aarch64")]
+            {
+                let full_chunks = n / 4;
+
+                for chunk_idx in 0..full_chunks {
+                    let base_idx = chunk_idx * 4;
+                    let tfs = [
+                        tfs_arr[base_idx],
+                        tfs_arr[base_idx + 1],
+                        tfs_arr[base_idx + 2],
+                        tfs_arr[base_idx + 3],
+                    ];
+                    let d0 = doc_ids[base_idx] as usize;
+                    let d1 = doc_ids[base_idx + 1] as usize;
+                    let d2 = doc_ids[base_idx + 2] as usize;
+                    let d3 = doc_ids[base_idx + 3] as usize;
+                    let lens = [dl_f32[d0], dl_f32[d1], dl_f32[d2], dl_f32[d3]];
+                    let batch_scores =
+                        score_batch_4(&tfs, &lens, simd_idf, simd_k1p1, simd_base, simd_dl_fac);
+                    if scores_vec[d0] == 0.0 {
+                        touched.push(doc_ids[base_idx]);
+                    }
+                    scores_vec[d0] += batch_scores[0] as f64;
+                    if scores_vec[d1] == 0.0 {
+                        touched.push(doc_ids[base_idx + 1]);
+                    }
+                    scores_vec[d1] += batch_scores[1] as f64;
+                    if scores_vec[d2] == 0.0 {
+                        touched.push(doc_ids[base_idx + 2]);
+                    }
+                    scores_vec[d2] += batch_scores[2] as f64;
+                    if scores_vec[d3] == 0.0 {
+                        touched.push(doc_ids[base_idx + 3]);
+                    }
+                    scores_vec[d3] += batch_scores[3] as f64;
+                }
+
+                // Scalar fallback for remaining 0-3 postings.
+                for i in (full_chunks * 4)..n {
+                    let doc_id = doc_ids[i];
+                    let d = doc_id as usize;
+                    let doc_length = self.doc_length_fast(doc_id);
+                    let term_score = scorer.score(tfs_arr[i], doc_length);
+                    if scores_vec[d] == 0.0 {
+                        touched.push(doc_id);
+                    }
+                    scores_vec[d] += term_score;
+                }
+            }
+
+            // Generic fallback for architectures other than x86_64 and aarch64.
+            #[cfg(not(any(target_arch = "x86_64", target_arch = "aarch64")))]
+            {
+                for i in 0..n {
+                    let doc_id = doc_ids[i];
+                    let d = doc_id as usize;
+                    let doc_length = self.doc_length_fast(doc_id);
+                    let term_score = scorer.score(tfs_arr[i], doc_length);
+                    if scores_vec[d] == 0.0 {
+                        touched.push(doc_id);
+                    }
+                    scores_vec[d] += term_score;
+                }
+            }
+        }
+
+        // Drain touched_docs into results buffer.
+        ctx.results_buf.clear();
+        for &doc_id in &ctx.touched_docs {
+            let score = ctx.score_vec[doc_id as usize];
+            if score > 0.0 {
+                ctx.results_buf.push((doc_id, score));
+            }
+        }
+
+        // Partial sort: if we only need k results from a large set, use
+        // select_nth_unstable_by to avoid fully sorting all results.
+        if k < ctx.results_buf.len() {
+            ctx.results_buf
+                .select_nth_unstable_by(k, |a, b| b.1.total_cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
+            ctx.results_buf.truncate(k);
+        }
+        ctx.results_buf
+            .sort_by(|a, b| b.1.total_cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
+
+        ctx.results_buf
+            .iter()
+            .take(k)
+            .filter_map(|(internal_id, score)| {
+                // resolve_internal_id returns Arc<str>; Arc::clone is an atomic
+                // refcount bump — no heap allocation, no memcpy.
+                let doc_id = self.resolve_internal_id(*internal_id)?;
+                Some((doc_id, DeterministicScore::from_f64(*score)))
+            })
+            .collect()
+    }
+
+    /// Compute IDF (Inverse Document Frequency) for a term.
+    ///
+    /// Uses the BM25 IDF formula:
+    /// ```text
+    /// IDF(qi) = ln((N - n(qi) + 0.5) / (n(qi) + 0.5) + 1)
+    /// ```
+    ///
+    /// This variant always returns non-negative IDF (Robertson-Walker variant).
+    /// Uses interior mutability for cache updates to enable concurrent reads.
+    ///
+    /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.idf_nonneg`
+    /// With +1 inside ln(), IDF(t) >= 0 for all terms regardless of document frequency.
+    ///
+    /// **PROOF CORRESPONDENCE**: `Lion.Retrieval.BM25.idf_mono`
+    /// Rarer terms have higher IDF: n1 < n2 implies IDF(n1) > IDF(n2).
+    pub(super) fn compute_idf(&self, term: &str, doc_count: usize) -> f64 {
+        use std::sync::atomic::Ordering as AtomicOrdering;
+
+        // If N changed since the cache was last populated, invalidate everything.
+        let cached_n = self
+            .idf_cache
+            .cached_doc_count
+            .load(AtomicOrdering::Relaxed);
+        if cached_n != doc_count {
+            if let Ok(mut cache) = self.idf_cache.by_df.write() {
+                // Double-check after acquiring the write lock to avoid races
+                // where another thread already cleared + updated.
+                let recheck = self
+                    .idf_cache
+                    .cached_doc_count
+                    .load(AtomicOrdering::Relaxed);
+                if recheck != doc_count {
+                    cache.clear();
+                    self.idf_cache
+                        .cached_doc_count
+                        .store(doc_count, AtomicOrdering::Relaxed);
+                }
+            }
+        }
+
+        let doc_freq = self.inverted_index.get(term).map(|p| p.len()).unwrap_or(0);
+
+        // Check cache by df (read lock)
+        if let Ok(cache) = self.idf_cache.by_df.read() {
+            if let Some(&cached) = cache.get(&doc_freq) {
+                return cached;
+            }
+        }
+
+        let idf = super::idf_from_doc_freq(doc_freq, doc_count);
+
+        // Cache by df and return (write lock)
+        if let Ok(mut cache) = self.idf_cache.by_df.write() {
+            cache.insert(doc_freq, idf);
+        }
+
+        idf
+    }
+}
+
+fn heap_to_results(
+    index: &Bm25Index,
+    ctx: &mut SearchContext,
+) -> Vec<(Arc<str>, DeterministicScore)> {
+    ctx.results_buf.clear();
+
+    while let Some(Reverse(entry)) = ctx.heap.pop() {
+        ctx.results_buf.push((entry.doc_id, entry.score));
+    }
+
+    ctx.results_buf
+        .sort_by(|a, b| b.1.total_cmp(&a.1).then_with(|| a.0.cmp(&b.0)));
+
+    ctx.results_buf
+        .iter()
+        .filter_map(|(internal_id, score)| {
+            // resolve_internal_id returns Arc<str>; clone = atomic refcount bump.
+            let doc_id = index.resolve_internal_id(*internal_id)?;
+            Some((doc_id, DeterministicScore::from_f64(*score)))
+        })
+        .collect()
+}
+
+fn current_threshold_score(heap: &BinaryHeap<Reverse<HeapEntry>>, k: usize) -> f64 {
+    if heap.len() < k {
+        0.0
+    } else {
+        heap.peek().map(|entry| entry.0.score).unwrap_or(0.0)
+    }
+}
+
+fn maybe_push_top_k(heap: &mut BinaryHeap<Reverse<HeapEntry>>, k: usize, candidate: HeapEntry) {
+    if k == 0 {
+        return;
+    }
+
+    if heap.len() < k {
+        heap.push(Reverse(candidate));
+        return;
+    }
+
+    let should_replace = heap.peek().map(|worst| candidate > worst.0).unwrap_or(true);
+    if should_replace {
+        let _ = heap.pop();
+        heap.push(Reverse(candidate));
+    }
+}
+
+fn find_pivot_doc(cursors: &[TermCursor<'_>], threshold: f64) -> Option<(usize, usize, u32)> {
+    let mut upper_bound_sum = 0.0;
+    let mut before_pivot_len = 0usize;
+    let mut pivot_doc = TERMINATED_DOC;
+
+    while before_pivot_len < cursors.len() {
+        upper_bound_sum += cursors[before_pivot_len].remaining_max_score();
+        if upper_bound_sum >= threshold {
+            pivot_doc = cursors[before_pivot_len].doc();
+            break;
+        }
+        before_pivot_len += 1;
+    }
+
+    if pivot_doc == TERMINATED_DOC {
+        return None;
+    }
+
+    let mut pivot_len = before_pivot_len + 1;
+    while pivot_len < cursors.len() && cursors[pivot_len].doc() == pivot_doc {
+        pivot_len += 1;
+    }
+
+    Some((before_pivot_len, pivot_len, pivot_doc))
+}
+
+fn align_cursors(
+    cursors: &mut Vec<TermCursor<'_>>,
+    pivot_doc: u32,
+    before_pivot_len: usize,
+) -> bool {
+    debug_assert_ne!(pivot_doc, TERMINATED_DOC);
+
+    for idx in (0..before_pivot_len).rev() {
+        let new_doc = cursors[idx].seek(pivot_doc);
+        if new_doc != pivot_doc {
+            sort_and_prune_terminated(cursors);
+            return false;
+        }
+    }
+
+    true
+}
+
+fn advance_all_cursors_on_pivot(cursors: &mut Vec<TermCursor<'_>>, pivot_len: usize) {
+    for cursor in &mut cursors[..pivot_len] {
+        cursor.advance();
+    }
+    sort_and_prune_terminated(cursors);
+}
+
+/// Advance one cursor past the current block when the block-level upper
+/// bound is below the threshold.
+///
+/// Selects the cursor whose current block ends **earliest** (minimum
+/// `last_doc`) among the pivot cursors. This minimizes skip distance and
+/// is the correct BMW cursor selection strategy -- advancing past the
+/// smallest block boundary guarantees forward progress with minimal
+/// overshoot. The seek target is that earliest block end + 1, bounded
+/// by the smallest doc_id among non-pivot cursors so we do not overshoot
+/// documents that other cursors still reference.
+fn advance_one_cursor_past_block(
+    cursors: &mut Vec<TermCursor<'_>>,
+    pivot_len: usize,
+    pivot_doc: u32,
+) {
+    let mut cursor_to_seek = None;
+    let mut earliest_block_end = TERMINATED_DOC;
+    let mut doc_to_seek_after = TERMINATED_DOC;
+
+    for (idx, cursor) in cursors[..pivot_len].iter().enumerate() {
+        if let Some(info) = cursor.shallow_block_info(pivot_doc) {
+            if info.last_doc < doc_to_seek_after {
+                doc_to_seek_after = info.last_doc;
+            }
+            // Select the cursor with the earliest block end (minimum last_doc).
+            // This minimizes skip distance for optimal BMW pruning.
+            if info.last_doc < earliest_block_end {
+                earliest_block_end = info.last_doc;
+                cursor_to_seek = Some(idx);
+            }
+        }
+    }
+
+    if doc_to_seek_after != TERMINATED_DOC {
+        doc_to_seek_after = doc_to_seek_after.saturating_add(1);
+    }
+
+    for cursor in &cursors[pivot_len..] {
+        let doc = cursor.doc();
+        if doc < doc_to_seek_after {
+            doc_to_seek_after = doc;
+        }
+    }
+
+    if let Some(idx) = cursor_to_seek {
+        // Ensure forward progress: if the non-pivot cap reduced doc_to_seek_after
+        // to at or below the cursor's current position, the seek would be a no-op.
+        // This can happen when a non-pivot cursor points to a doc_id smaller than
+        // the block-end target (e.g., a short posting list cursor at doc 3 while
+        // the chosen cursor is already at doc 150). Force at least +1 advance.
+        let current_doc = cursors[idx].doc();
+        if doc_to_seek_after <= current_doc {
+            doc_to_seek_after = current_doc.saturating_add(1);
+        }
+        cursors[idx].seek(doc_to_seek_after);
+    }
+
+    sort_and_prune_terminated(cursors);
+}
+
+fn sort_and_prune_terminated(cursors: &mut Vec<TermCursor<'_>>) {
+    cursors.retain(|cursor| !cursor.is_terminated());
+    cursors.sort_by_key(|cursor| cursor.doc());
+}
+
+// ---------------------------------------------------------------------------
+// Tests: SIMD batch scoring parity and edge cases
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests_simd_scoring {
+    use super::*;
+
+    /// Reference scalar BM25 score for a single posting.
+    fn scalar_bm25(tf: u8, doc_len: f32, idf: f32, k1p1: f32, base: f32, dl_fac: f32) -> f32 {
+        let tf = tf as f32;
+        let num = tf * k1p1;
+        let denom = tf + base + dl_fac * doc_len;
+        idf * (num / denom)
+    }
+
+    /// Compute reference scores for an arbitrary-length batch using scalar code.
+    fn reference_scores(
+        tfs: &[u8],
+        dls: &[f32],
+        idf: f32,
+        k1p1: f32,
+        base: f32,
+        dl_fac: f32,
+    ) -> Vec<f32> {
+        tfs.iter()
+            .zip(dls.iter())
+            .map(|(&tf, &dl)| scalar_bm25(tf, dl, idf, k1p1, base, dl_fac))
+            .collect()
+    }
+
+    // Test parameters (standard BM25 with k1=1.2, b=0.75, avgdl=10.0)
+    const TEST_IDF: f32 = 1.5;
+    const TEST_K1P1: f32 = 2.2; // k1 + 1 = 1.2 + 1
+    const TEST_BASE: f32 = 0.3; // k1 * (1 - b) = 1.2 * 0.25
+    const TEST_DL_FAC: f32 = 0.09; // k1 * b / avgdl = 1.2 * 0.75 / 10.0
+
+    // -----------------------------------------------------------------------
+    // Test 1: scalar_4 vs reference (parity check)
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_score_batch_4_matches_scalar() {
+        let tfs: [u8; 4] = [1, 3, 5, 10];
+        let dls: [f32; 4] = [8.0, 12.0, 5.0, 20.0];
+
+        let batch = score_batch_4(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        for i in 0..4 {
+            assert!(
+                (batch[i] - reference[i]).abs() < 1e-6,
+                "batch_4[{i}] = {}, expected {} (delta {})",
+                batch[i],
+                reference[i],
+                (batch[i] - reference[i]).abs()
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 2: x86_64 AVX2 8-wide vs scalar parity
+    // -----------------------------------------------------------------------
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_avx2_matches_scalar_basic() {
+        if !is_x86_feature_detected!("avx2") {
+            eprintln!("AVX2 not available, skipping test");
+            return;
+        }
+
+        let tfs: [u8; 8] = [1, 2, 3, 5, 8, 13, 21, 34];
+        let dls: [f32; 8] = [5.0, 10.0, 15.0, 20.0, 25.0, 30.0, 35.0, 40.0];
+
+        // SAFETY: The test returns early unless AVX2 is detected, and the
+        // fixed-size arrays provide all lanes consumed by the helper.
+        let avx2_result =
+            unsafe { score_batch_avx2(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC) };
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        for i in 0..8 {
+            assert!(
+                (avx2_result[i] - reference[i]).abs() < 1e-6,
+                "avx2[{i}] = {}, expected {} (delta {})",
+                avx2_result[i],
+                reference[i],
+                (avx2_result[i] - reference[i]).abs()
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 3: AVX2+FMA vs scalar (slightly relaxed tolerance due to FMA rounding)
+    // -----------------------------------------------------------------------
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_avx2_fma_matches_scalar() {
+        if !is_x86_feature_detected!("avx2") || !is_x86_feature_detected!("fma") {
+            eprintln!("AVX2+FMA not available, skipping test");
+            return;
+        }
+
+        let tfs: [u8; 8] = [0, 1, 127, 255, 42, 7, 99, 200];
+        let dls: [f32; 8] = [1.0, 2.0, 100.0, 0.5, 10.0, 50.0, 3.0, 1000.0];
+
+        // SAFETY: The test returns early unless AVX2+FMA is detected, and the
+        // fixed-size arrays provide all lanes consumed by the helper.
+        let fma_result = unsafe {
+            score_batch_avx2_fma(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC)
+        };
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        // FMA has single rounding vs two roundings in mul+add, so allow slightly
+        // more tolerance (1 ULP of f32 ~ 1.19e-7, we allow ~10 ULPs).
+        for i in 0..8 {
+            let tol = reference[i].abs() * 1e-6 + 1e-7;
+            assert!(
+                (fma_result[i] - reference[i]).abs() < tol,
+                "fma[{i}] = {}, expected {} (delta {}, tol {})",
+                fma_result[i],
+                reference[i],
+                (fma_result[i] - reference[i]).abs(),
+                tol
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 4: x86_64 dispatch function selects correctly and produces correct results
+    // -----------------------------------------------------------------------
+
+    #[cfg(target_arch = "x86_64")]
+    #[test]
+    fn test_dispatch_score_batch_8() {
+        let score_fn = select_score_batch_8();
+
+        let tfs: [u8; 8] = [3, 7, 1, 15, 0, 255, 128, 50];
+        let dls: [f32; 8] = [10.0, 5.0, 20.0, 8.0, 100.0, 1.0, 15.0, 30.0];
+
+        // SAFETY: `select_score_batch_8` only returns a target-feature helper
+        // after matching runtime CPU detection; otherwise it returns scalar.
+        let dispatched =
+            unsafe { score_fn(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC) };
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        for i in 0..8 {
+            let tol = reference[i].abs() * 1e-5 + 1e-7;
+            assert!(
+                (dispatched[i] - reference[i]).abs() < tol,
+                "dispatch[{i}] = {}, expected {} (delta {})",
+                dispatched[i],
+                reference[i],
+                (dispatched[i] - reference[i]).abs()
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 5: Edge case -- tf=0 produces zero score
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_tf_zero_produces_zero_score() {
+        let tfs_4: [u8; 4] = [0, 0, 0, 0];
+        let dls_4: [f32; 4] = [10.0, 20.0, 5.0, 1.0];
+        let result = score_batch_4(&tfs_4, &dls_4, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+        for i in 0..4 {
+            assert!(
+                result[i].abs() < 1e-10,
+                "tf=0 should produce ~0 score, got {}",
+                result[i]
+            );
+        }
+
+        #[cfg(target_arch = "x86_64")]
+        if is_x86_feature_detected!("avx2") {
+            let tfs_8: [u8; 8] = [0; 8];
+            let dls_8: [f32; 8] = [1.0, 2.0, 3.0, 4.0, 5.0, 6.0, 7.0, 8.0];
+            // SAFETY: This branch only runs when AVX2 is detected, and the
+            // fixed-size arrays provide all lanes consumed by the helper.
+            let result = unsafe {
+                score_batch_avx2(&tfs_8, &dls_8, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC)
+            };
+            for i in 0..8 {
+                assert!(
+                    result[i].abs() < 1e-10,
+                    "avx2 tf=0 should produce ~0 score, got {}",
+                    result[i]
+                );
+            }
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 6: Edge case -- very large doc_length
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_large_doc_length() {
+        let tfs: [u8; 4] = [5, 10, 20, 50];
+        let dls: [f32; 4] = [1e6, 1e6, 1e6, 1e6];
+        let result = score_batch_4(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        for i in 0..4 {
+            // Very large doc_length pushes scores toward zero but they should
+            // still be positive and match scalar.
+            assert!(result[i] > 0.0, "score should be positive");
+            assert!(
+                (result[i] - reference[i]).abs() < 1e-6,
+                "large dl mismatch at [{i}]: {} vs {}",
+                result[i],
+                reference[i]
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 7: Edge case -- max tf (255)
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_max_tf() {
+        let tfs: [u8; 4] = [255, 255, 255, 255];
+        let dls: [f32; 4] = [10.0, 10.0, 10.0, 10.0];
+        let result = score_batch_4(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        for i in 0..4 {
+            assert!(
+                (result[i] - reference[i]).abs() < 1e-5,
+                "max tf mismatch at [{i}]: {} vs {}",
+                result[i],
+                reference[i]
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 8: Integration test -- brute-force search with various posting lengths
+    // Exercises batch sizes 1, 7, 8, 16, 100 by indexing documents.
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_brute_force_search_various_sizes() {
+        use crate::{Bm25Config, Bm25Index};
+
+        let mut index = Bm25Index::new(Bm25Config::default());
+
+        // Index enough documents to exercise different batch sizes.
+        // The word "alpha" appears in all 100 docs, giving a posting list of 100.
+        // The word "beta" appears in 16 docs.
+        // The word "gamma" appears in 8 docs.
+        // The word "delta" appears in 7 docs.
+        // The word "epsilon" appears in 1 doc.
+        for i in 0..100 {
+            let mut text = format!("alpha doc{i}");
+            if i < 16 {
+                text.push_str(" beta");
+            }
+            if i < 8 {
+                text.push_str(" gamma");
+            }
+            if i < 7 {
+                text.push_str(" delta");
+            }
+            if i == 0 {
+                text.push_str(" epsilon");
+            }
+            index.index_document(format!("doc{i}"), &text).unwrap();
+        }
+
+        // Each query exercises a different posting list length through brute-force.
+        let mut ctx = SearchContext::new();
+        for query in &["alpha", "beta", "gamma", "delta", "epsilon"] {
+            let results = index.search_with_context(query, 10, &mut ctx);
+            assert!(!results.is_empty(), "query '{query}' should return results");
+            // All scores should be positive.
+            for (doc_id, score) in &results {
+                assert!(
+                    score.to_f64() > 0.0,
+                    "query '{query}', doc '{doc_id}': score should be positive"
+                );
+            }
+        }
+
+        // Multi-term query exercises score accumulation across terms.
+        let results = index.search_with_context("alpha beta gamma", 5, &mut ctx);
+        assert!(!results.is_empty());
+        // The first result should be a doc that contains all three terms.
+        let (top_doc, _) = &results[0];
+        let top_id: usize = top_doc.strip_prefix("doc").unwrap().parse().unwrap();
+        assert!(
+            top_id < 8,
+            "top result should be a doc with all 3 terms (doc0-doc7), got doc{top_id}"
+        );
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 9: scalar_8 matches reference (non-SIMD path)
+    // -----------------------------------------------------------------------
+
+    #[cfg(not(target_arch = "aarch64"))]
+    #[test]
+    fn test_score_batch_scalar_8_matches_reference() {
+        let tfs: [u8; 8] = [1, 5, 10, 20, 50, 100, 200, 255];
+        let dls: [f32; 8] = [3.0, 7.0, 15.0, 25.0, 50.0, 100.0, 200.0, 500.0];
+
+        let result = score_batch_scalar_8(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+        let reference = reference_scores(&tfs, &dls, TEST_IDF, TEST_K1P1, TEST_BASE, TEST_DL_FAC);
+
+        for i in 0..8 {
+            assert!(
+                (result[i] - reference[i]).abs() < 1e-7,
+                "scalar_8[{i}] = {}, expected {}",
+                result[i],
+                reference[i]
+            );
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Test 10: Empty posting list handled correctly (no panic)
+    // -----------------------------------------------------------------------
+
+    #[test]
+    fn test_empty_posting_list_search() {
+        use crate::{Bm25Config, Bm25Index};
+
+        let mut index = Bm25Index::new(Bm25Config::default());
+        index.index_document("doc1", "hello world").unwrap();
+
+        // Search for a term not in the index.
+        let results = index.search("nonexistent", 10);
+        assert!(results.is_empty());
+    }
+}
diff --git a/crates/khive-bm25/src/index/tests_wand.rs b/crates/khive-bm25/src/index/tests_wand.rs
new file mode 100644
index 00000000..000109df
--- /dev/null
+++ b/crates/khive-bm25/src/index/tests_wand.rs
@@ -0,0 +1,329 @@
+use std::sync::Arc;
+
+use khive_score::DeterministicScore;
+
+use super::{Bm25Index, SearchContext, DEFAULT_BLOCK_SIZE};
+use crate::config::Bm25Config;
+
+#[derive(Clone)]
+struct XorShift64 {
+    state: u64,
+}
+
+impl XorShift64 {
+    fn new(seed: u64) -> Self {
+        Self { state: seed.max(1) }
+    }
+
+    fn next_u64(&mut self) -> u64 {
+        let mut x = self.state;
+        x ^= x << 13;
+        x ^= x >> 7;
+        x ^= x << 17;
+        self.state = x;
+        x
+    }
+
+    fn next_f64(&mut self) -> f64 {
+        ((self.next_u64() >> 11) as f64) / ((1u64 << 53) as f64)
+    }
+
+    fn gen_range(&mut self, upper: usize) -> usize {
+        if upper <= 1 {
+            0
+        } else {
+            (self.next_u64() as usize) % upper
+        }
+    }
+}
+
+struct ZipfSampler {
+    cdf: Vec<f64>,
+}
+
+impl ZipfSampler {
+    fn new(vocab_size: usize, exponent: f64) -> Self {
+        let mut cumulative = Vec::with_capacity(vocab_size);
+        let mut running = 0.0;
+        for rank in 1..=vocab_size {
+            running += 1.0 / (rank as f64).powf(exponent);
+            cumulative.push(running);
+        }
+        for value in &mut cumulative {
+            *value /= running;
+        }
+        Self { cdf: cumulative }
+    }
+
+    fn sample(&self, rng: &mut XorShift64) -> usize {
+        let needle = rng.next_f64();
+        let idx = self.cdf.partition_point(|value| *value < needle);
+        idx.min(self.cdf.len().saturating_sub(1))
+    }
+}
+
+fn build_vocab(size: usize) -> Vec<String> {
+    (0..size).map(|idx| format!("tok_{idx:04}")).collect()
+}
+
+fn build_zipf_corpus(index: &mut Bm25Index, doc_count: usize, seed: u64) {
+    let vocab = build_vocab(512);
+    let zipf = ZipfSampler::new(vocab.len(), 1.07);
+    let mut rng = XorShift64::new(seed);
+
+    for doc_idx in 0..doc_count {
+        let len = 16 + rng.gen_range(48);
+        let mut text = String::new();
+        for token_idx in 0..len {
+            if token_idx > 0 {
+                text.push(' ');
+            }
+            let token = &vocab[zipf.sample(&mut rng)];
+            text.push_str(token);
+        }
+        index
+            .index_document(format!("doc_{doc_idx}"), &text)
+            .expect("synthetic document should index");
+    }
+}
+
+fn build_query(vocab: &[String], zipf: &ZipfSampler, rng: &mut XorShift64, terms: usize) -> String {
+    let mut query = String::new();
+    for idx in 0..terms {
+        if idx > 0 {
+            query.push(' ');
+        }
+        if rng.gen_range(10) == 0 {
+            query.push_str("missing_term");
+        } else {
+            query.push_str(&vocab[zipf.sample(rng)]);
+        }
+    }
+    query
+}
+
+/// Assert that two result lists are equivalent within floating-point tolerance.
+///
+/// Due to floating-point accumulation order differences between brute-force
+/// and WAND (cursors are sorted by doc_id, not by query term order), documents
+/// with extremely close raw f64 scores may be ordered differently or swapped
+/// at the k-th boundary. After `DeterministicScore` quantization, documents
+/// that had slightly different raw f64 scores may appear with the same score.
+///
+/// This comparator verifies:
+/// 1. Same result count
+/// 2. Score at each rank matches within a small relative tolerance
+/// 3. Documents that differ must have scores within floating-point tolerance
+///    of the boundary score (the k-th score)
+fn assert_same_results(
+    expected: &[(Arc<str>, DeterministicScore)],
+    actual: &[(Arc<str>, DeterministicScore)],
+    context: &str,
+) {
+    assert_eq!(
+        expected.len(),
+        actual.len(),
+        "result length mismatch for {context}"
+    );
+
+    if expected.is_empty() {
+        return;
+    }
+
+    // Relative tolerance for score comparison.
+    // The brute-force path uses f32 SIMD batch scoring (NEON/scalar) then
+    // accumulates into f64, while the WAND path scores entirely in f64.
+    // f32 has ~7 decimal digits of precision, so per-term error is ~1e-7.
+    // With multi-term queries the error accumulates additively, so we use
+    // 1e-6 to accommodate up to ~10 query terms with comfortable margin.
+    let rel_tol = 1e-6;
+
+    // Get the boundary score (the score at the last position).
+    let _boundary_score = expected.last().unwrap().1.to_f64();
+
+    for (rank, ((expected_doc, expected_score), (actual_doc, actual_score))) in
+        expected.iter().zip(actual.iter()).enumerate()
+    {
+        let exp_s = expected_score.to_f64();
+        let act_s = actual_score.to_f64();
+
+        // Scores at each rank should be very close.
+        let score_diff = (exp_s - act_s).abs();
+        let tol = rel_tol * exp_s.abs().max(1.0);
+        assert!(
+            score_diff <= tol,
+            "score mismatch at rank {rank} for {context}: expected {exp_s} got {act_s} (diff={score_diff})"
+        );
+
+        // If documents differ, their scores must be within f32 precision of
+        // each other. The brute-force path uses f32 SIMD while WAND uses f64,
+        // so documents with nearly-identical f64 scores may swap positions when
+        // one path computes a slightly different value due to f32 rounding.
+        if expected_doc != actual_doc {
+            let mutual_diff = (exp_s - act_s).abs();
+            let mutual_tol = rel_tol * exp_s.abs().max(1.0);
+            assert!(
+                mutual_diff <= mutual_tol,
+                "doc mismatch at rank {rank} with divergent scores for {context}: \
+                 expected=({expected_doc}, {exp_s}) actual=({actual_doc}, {act_s}) diff={mutual_diff} tol={mutual_tol}"
+            );
+        }
+    }
+}
+
+#[test]
+fn bmw_matches_bruteforce_on_random_zipf_corpora() {
+    let vocab = build_vocab(512);
+    let zipf = ZipfSampler::new(vocab.len(), 1.07);
+
+    for (case_idx, &doc_count) in [1_000usize, 2_500, 10_000].iter().enumerate() {
+        let mut index = Bm25Index::new(Bm25Config::default());
+        build_zipf_corpus(&mut index, doc_count, 0xC0FFEE + case_idx as u64);
+
+        let mut rng = XorShift64::new(0xBAD5EED + doc_count as u64);
+        let mut brute_ctx = SearchContext::with_capacity(256);
+        let mut wand_ctx = SearchContext::with_capacity(256);
+
+        for query_idx in 0..256 {
+            let term_count = 1 + rng.gen_range(5);
+            let query = build_query(&vocab, &zipf, &mut rng, term_count);
+            let k = [1usize, 3, 5, 10, 25][rng.gen_range(5)];
+
+            let brute = index.search_brute_force(&query, k, &mut brute_ctx);
+            let wand = index.search_with_context(&query, k, &mut wand_ctx);
+
+            assert_same_results(
+                &brute,
+                &wand,
+                &format!("doc_count={doc_count}, query_idx={query_idx}, query='{query}', k={k}"),
+            );
+        }
+    }
+}
+
+#[test]
+fn bmw_handles_empty_index_and_zero_k() {
+    let index = Bm25Index::new(Bm25Config::default());
+    let mut ctx = SearchContext::new();
+
+    assert!(index
+        .search_with_context("alpha beta", 10, &mut ctx)
+        .is_empty());
+    assert!(index
+        .search_brute_force("alpha beta", 10, &mut ctx)
+        .is_empty());
+    assert!(index
+        .search_with_context("alpha beta", 0, &mut ctx)
+        .is_empty());
+}
+
+#[test]
+fn bmw_handles_single_document_and_large_k() {
+    let mut index = Bm25Index::new(Bm25Config::default());
+    index
+        .index_document("doc1", "alpha beta gamma alpha")
+        .unwrap();
+
+    let mut brute_ctx = SearchContext::new();
+    let mut wand_ctx = SearchContext::new();
+
+    let brute = index.search_brute_force("alpha gamma", 10, &mut brute_ctx);
+    let wand = index.search_with_context("alpha gamma", 10, &mut wand_ctx);
+
+    assert_same_results(&brute, &wand, "single document / large k");
+    assert_eq!(wand.len(), 1);
+    assert_eq!(&*wand[0].0, "doc1");
+}
+
+#[test]
+fn bmw_handles_all_docs_match_and_no_docs_match() {
+    let mut index = Bm25Index::new(Bm25Config::default());
+    for doc_idx in 0..300 {
+        index
+            .index_document(format!("doc_{doc_idx}"), &format!("common term_{doc_idx}"))
+            .unwrap();
+    }
+
+    let mut brute_ctx = SearchContext::new();
+    let mut wand_ctx = SearchContext::new();
+
+    let brute_all = index.search_brute_force("common", 20, &mut brute_ctx);
+    let wand_all = index.search_with_context("common", 20, &mut wand_ctx);
+    assert_same_results(&brute_all, &wand_all, "all docs match");
+
+    let brute_none = index.search_brute_force("absent_token", 20, &mut brute_ctx);
+    let wand_none = index.search_with_context("absent_token", 20, &mut wand_ctx);
+    assert_same_results(&brute_none, &wand_none, "no docs match");
+    assert!(wand_none.is_empty());
+}
+
+#[test]
+fn bmw_handles_many_term_queries() {
+    let mut index = Bm25Index::new(Bm25Config::default());
+    build_zipf_corpus(&mut index, 2_000, 0x1234_5678);
+
+    let query = "tok_0000 tok_0001 tok_0002 tok_0003 tok_0004 tok_0005 tok_0006 tok_0007";
+    let mut brute_ctx = SearchContext::new();
+    let mut wand_ctx = SearchContext::new();
+
+    let brute = index.search_brute_force(query, 15, &mut brute_ctx);
+    let wand = index.search_with_context(query, 15, &mut wand_ctx);
+
+    assert_same_results(&brute, &wand, "many term query");
+}
+
+#[test]
+fn bmw_block_boundary_regression() {
+    let mut index = Bm25Index::new(Bm25Config::default());
+    let filler = " filler filler filler filler filler filler filler filler";
+
+    for doc_idx in 0..(DEFAULT_BLOCK_SIZE * 2) {
+        let repeats = if doc_idx == DEFAULT_BLOCK_SIZE - 1 || doc_idx == DEFAULT_BLOCK_SIZE {
+            12
+        } else {
+            1
+        };
+
+        let mut text = String::new();
+        for rep in 0..repeats {
+            if rep > 0 {
+                text.push(' ');
+            }
+            text.push_str("boundary");
+        }
+        text.push_str(filler);
+
+        index
+            .index_document(format!("doc_{doc_idx}"), &text)
+            .unwrap();
+    }
+
+    let mut brute_ctx = SearchContext::new();
+    let mut wand_ctx = SearchContext::new();
+
+    let brute = index.search_brute_force("boundary", 5, &mut brute_ctx);
+    let wand = index.search_with_context("boundary", 5, &mut wand_ctx);
+
+    assert_same_results(&brute, &wand, "block boundary regression");
+    assert_eq!(wand.first().map(|entry| entry.0.as_ref()), Some("doc_127"));
+}
+
+#[test]
+fn sorted_posting_lists_are_maintained_across_mutations() {
+    let mut index = Bm25Index::new(Bm25Config::default());
+    index.index_document("c", "alpha beta").unwrap();
+    index.index_document("a", "alpha gamma").unwrap();
+    index.index_document("b", "alpha delta").unwrap();
+    assert!(index.remove_document("a"));
+    index.index_document("a2", "alpha epsilon").unwrap();
+
+    let postings = index
+        .inverted_index
+        .get("alpha")
+        .expect("alpha postings should exist");
+
+    assert!(postings
+        .doc_ids
+        .windows(2)
+        .all(|window| window[0] < window[1]));
+}
diff --git a/crates/khive-bm25/src/lib.rs b/crates/khive-bm25/src/lib.rs
new file mode 100644
index 00000000..5bd8f01b
--- /dev/null
+++ b/crates/khive-bm25/src/lib.rs
@@ -0,0 +1,61 @@
+//! BM25 (Okapi BM25) keyword index.
+//!
+//! Provides term frequency-based relevance scoring for keyword search.
+//! See ADR-003 for configuration (k1=1.2, b=0.75).
+//!
+//! # BM25 Formula
+//!
+//! ```text
+//! score(D, Q) = Σ IDF(qi) * (f(qi, D) * (k1 + 1)) / (f(qi, D) + k1 * (1 - b + b * |D|/avgdl))
+//!
+//! where:
+//! - Q = query terms
+//! - D = document
+//! - f(qi, D) = term frequency of qi in D
+//! - |D| = document length
+//! - avgdl = average document length
+//! - k1 = 1.2 (term saturation)
+//! - b = 0.75 (length normalization)
+//! ```
+//!
+//! # Example
+//!
+//! ```rust
+//! use khive_bm25::{Bm25Config, Bm25Index};
+//!
+//! let mut index = Bm25Index::new(Bm25Config::default());
+//!
+//! // Index some documents (String / &str auto-convert to DocumentId)
+//! index.index_document("doc1", "the quick brown fox").unwrap();
+//! index.index_document("doc2", "the lazy dog").unwrap();
+//! index.index_document("doc3", "quick brown fox jumps over the lazy dog").unwrap();
+//!
+//! // Search
+//! let results = index.search("quick fox", 10);
+//! for (doc_id, score) in results {
+//!     println!("{}: {}", doc_id, score);
+//! }
+//! ```
+//!
+//! # ID Types and Hybrid Search Bridging
+//!
+//! [`DocumentId`] is a newtype wrapper around `String` that provides type
+//! safety. When performing hybrid search that combines BM25 results with
+//! HNSW vector results (which use `EmbeddingId`), see the [`DocumentId`]
+//! documentation for bridging strategies.
+
+pub mod error;
+pub mod metrics;
+
+mod config;
+mod index;
+mod tokenizer;
+
+#[cfg(test)]
+mod tests;
+
+// Re-export public types
+pub use config::Bm25Config;
+pub use error::{ErrorKind, Result, RetrievalError};
+pub use index::{Bm25Index, Bm25Stats, DocumentId, SearchContext};
+pub use tokenizer::{tokenize, BoxedTokenizer, SimpleTokenizer, Tokenizer};
diff --git a/crates/khive-bm25/src/metrics.rs b/crates/khive-bm25/src/metrics.rs
new file mode 100644
index 00000000..731a334b
--- /dev/null
+++ b/crates/khive-bm25/src/metrics.rs
@@ -0,0 +1,95 @@
+//! Observability sink for BM25 index operations.
+//!
+//! Provides a pluggable [`MetricsSink`] trait and a [`RecordingSink`] for tests.
+
+use std::sync::Arc;
+
+#[cfg(test)]
+use parking_lot::Mutex;
+
+/// A single metric event emitted by the BM25 index.
+#[derive(Debug, Clone)]
+pub struct MetricEvent {
+    /// Event name (use the constants in [`names`]).
+    pub name: &'static str,
+    /// The value payload.
+    pub value: MetricValue,
+    /// Optional label key-value pairs.
+    pub labels: Vec<(&'static str, String)>,
+}
+
+/// Value carried by a metric event.
+#[derive(Debug, Clone, PartialEq)]
+pub enum MetricValue {
+    /// A monotonically increasing counter.
+    Counter(u64),
+    /// An instantaneous gauge.
+    Gauge(f64),
+    /// A histogram observation (e.g., duration in ms).
+    Histogram(f64),
+}
+
+/// Trait for receiving metric events.
+///
+/// Implementations must be `Send + Sync` to allow the index to hold an
+/// `Arc<dyn MetricsSink>` and emit events from `&self` methods.
+pub trait MetricsSink: Send + Sync {
+    /// Receive a single metric event.
+    fn record(&self, event: MetricEvent);
+}
+
+/// Emit a metric event to the sink, if one is attached.
+#[inline]
+pub fn emit(sink: &Option<Arc<dyn MetricsSink>>, event: MetricEvent) {
+    if let Some(s) = sink {
+        s.record(event);
+    }
+}
+
+/// Well-known metric name constants.
+pub mod names {
+    pub const BM25_INDEX_DURATION_MS: &str = "bm25.index_document.duration_ms";
+    pub const BM25_INDEX_COUNT: &str = "bm25.index_document.count";
+    pub const BM25_INDEX_SIZE: &str = "bm25.index.size";
+    pub const BM25_SEARCH_DURATION_MS: &str = "bm25.search.duration_ms";
+    pub const BM25_SEARCH_COUNT: &str = "bm25.search.count";
+    pub const BM25_SEARCH_RESULTS: &str = "bm25.search.results";
+}
+
+/// In-memory sink that records all events. Used in tests.
+#[cfg(test)]
+pub struct RecordingSink {
+    events: Mutex<Vec<MetricEvent>>,
+}
+
+#[cfg(test)]
+impl RecordingSink {
+    /// Create an empty recording sink.
+    pub fn new() -> Self {
+        Self {
+            events: Mutex::new(Vec::new()),
+        }
+    }
+
+    /// Return a snapshot of all recorded events.
+    pub fn events(&self) -> Vec<MetricEvent> {
+        self.events.lock().clone()
+    }
+
+    /// Clear all recorded events.
+    pub fn clear(&self) {
+        self.events.lock().clear();
+    }
+
+    /// Return `true` if no events have been recorded.
+    pub fn is_empty(&self) -> bool {
+        self.events.lock().is_empty()
+    }
+}
+
+#[cfg(test)]
+impl MetricsSink for RecordingSink {
+    fn record(&self, event: MetricEvent) {
+        self.events.lock().push(event);
+    }
+}
diff --git a/crates/khive-bm25/src/tests.rs b/crates/khive-bm25/src/tests.rs
new file mode 100644
index 00000000..efeb7e7a
--- /dev/null
+++ b/crates/khive-bm25/src/tests.rs
@@ -0,0 +1,1181 @@
+//! Tests for BM25 index.
+
+#[cfg(test)]
+mod unit_tests {
+    use crate::{Bm25Config, Bm25Index, BoxedTokenizer, SimpleTokenizer};
+    use std::sync::Arc;
+
+    #[test]
+    fn test_new_index() {
+        let index = Bm25Index::new(Bm25Config::default());
+        assert_eq!(index.doc_count(), 0);
+        assert!((index.avg_doc_length() - 0.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_index_single_document() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+
+        assert_eq!(index.doc_count(), 1);
+        // "the" is a stop word, so "the quick brown fox" → 3 tokens
+        assert!((index.avg_doc_length() - 3.0).abs() < f64::EPSILON);
+        assert!(index.contains_document("doc1"));
+        assert!(!index.contains_document("doc2"));
+    }
+
+    #[test]
+    fn test_index_multiple_documents() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+        index.index_document("doc3".to_string(), "quick").unwrap();
+
+        assert_eq!(index.doc_count(), 3);
+        // Stop words removed: "the quick brown fox"→3, "the lazy dog"→2, "quick"→1
+        // (3 + 2 + 1) / 3 = 2.0
+        assert!((index.avg_doc_length() - 2.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_index_empty_document() {
+        let mut index = Bm25Index::default();
+        index.index_document("doc1".to_string(), "").unwrap();
+        assert_eq!(index.doc_count(), 0); // Empty docs not indexed
+
+        index.index_document("doc2".to_string(), "   ").unwrap();
+        assert_eq!(index.doc_count(), 0);
+    }
+
+    #[test]
+    fn test_remove_document() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+
+        assert_eq!(index.doc_count(), 2);
+
+        assert!(index.remove_document("doc1"));
+        assert_eq!(index.doc_count(), 1);
+        assert!(!index.contains_document("doc1"));
+        assert!(index.contains_document("doc2"));
+
+        // Remove non-existent document
+        assert!(!index.remove_document("doc3"));
+        assert_eq!(index.doc_count(), 1);
+    }
+
+    #[test]
+    fn test_reindex_document() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "old content")
+            .unwrap();
+        assert_eq!(index.doc_count(), 1);
+
+        // Re-index same document with new content
+        index
+            .index_document("doc1".to_string(), "new content with more tokens")
+            .unwrap();
+        assert_eq!(index.doc_count(), 1);
+
+        // Stats should reflect new content
+        // "new content with more tokens" → "with" is stop word → 4 tokens
+        assert!((index.avg_doc_length() - 4.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_search_empty_query() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+
+        let results = index.search("", 10);
+        assert!(results.is_empty());
+
+        let results = index.search("   ", 10);
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_search_empty_index() {
+        let index = Bm25Index::default();
+        let results = index.search("quick fox", 10);
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_search_no_matches() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+
+        let results = index.search("elephant giraffe", 10);
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_search_single_match() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+
+        let results = index.search("fox", 10);
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+        assert!(results[0].1.to_f64() > 0.0);
+    }
+
+    #[test]
+    fn test_search_multiple_matches() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+        index
+            .index_document("doc3".to_string(), "the cat and the dog")
+            .unwrap();
+
+        let results = index.search("the dog", 10);
+
+        // All docs contain "the", but only doc2 and doc3 contain "dog"
+        // doc2 and doc3 should score higher
+        assert!(!results.is_empty());
+
+        // Find positions
+        let doc2_pos = results.iter().position(|(id, _)| id.as_ref() == "doc2");
+        let doc3_pos = results.iter().position(|(id, _)| id.as_ref() == "doc3");
+
+        assert!(doc2_pos.is_some() || doc3_pos.is_some());
+    }
+
+    #[test]
+    fn test_search_k_limit() {
+        let mut index = Bm25Index::default();
+        for i in 0..10 {
+            index
+                .index_document(format!("doc{i}"), &format!("common term {i}"))
+                .unwrap();
+        }
+
+        let results = index.search("common", 3);
+        assert_eq!(results.len(), 3);
+
+        let results = index.search("common", 20);
+        assert_eq!(results.len(), 10); // Only 10 documents
+    }
+
+    #[test]
+    fn test_search_k_zero() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+
+        let results = index.search("fox", 0);
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_term_frequency_matters() {
+        let mut index = Bm25Index::default();
+        index.index_document("doc1".to_string(), "fox").unwrap();
+        index
+            .index_document("doc2".to_string(), "fox fox fox")
+            .unwrap();
+
+        let results = index.search("fox", 10);
+        assert_eq!(results.len(), 2);
+
+        // doc2 has higher TF, should score higher (but with saturation)
+        let doc1_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc1")
+            .unwrap()
+            .1;
+        let doc2_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc2")
+            .unwrap()
+            .1;
+        assert!(doc2_score > doc1_score);
+    }
+
+    #[test]
+    fn test_length_normalization() {
+        let mut index = Bm25Index::default();
+        // Both have "fox" once, but different lengths
+        index.index_document("short".to_string(), "fox").unwrap();
+        index
+            .index_document(
+                "long".to_string(),
+                "the quick brown fox jumps over the lazy dog",
+            )
+            .unwrap();
+
+        let results = index.search("fox", 10);
+        assert_eq!(results.len(), 2);
+
+        // Shorter doc should score higher (with b=0.75 normalization)
+        let short_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "short")
+            .unwrap()
+            .1;
+        let long_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "long")
+            .unwrap()
+            .1;
+        assert!(short_score > long_score);
+    }
+
+    #[test]
+    fn test_idf_rare_terms() {
+        let mut index = Bm25Index::default();
+        // "rare" appears in 1 doc, "common" in all
+        index
+            .index_document("doc1".to_string(), "common rare")
+            .unwrap();
+        index.index_document("doc2".to_string(), "common").unwrap();
+        index.index_document("doc3".to_string(), "common").unwrap();
+
+        // Search for rare term should only return doc1
+        let results = index.search("rare", 10);
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+
+        // doc1 should score high because "rare" has high IDF
+        assert!(results[0].1.to_f64() > 0.0);
+    }
+
+    #[test]
+    fn test_multi_term_query() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "quick dog")
+            .unwrap();
+        index
+            .index_document("doc3".to_string(), "brown dog")
+            .unwrap();
+
+        let results = index.search("quick brown", 10);
+
+        // doc1 has both terms, should score highest
+        assert!(!results.is_empty());
+        assert_eq!(&*results[0].0, "doc1");
+    }
+
+    #[test]
+    fn test_clear() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+
+        index.clear();
+
+        assert_eq!(index.doc_count(), 0);
+        assert!(index.search("fox", 10).is_empty());
+    }
+
+    #[test]
+    fn test_stats() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+
+        let stats = index.stats();
+        assert_eq!(stats.doc_count, 2);
+        // Stop words removed: "the quick brown fox"→3, "the lazy dog"→2 = 5 total
+        assert_eq!(stats.total_tokens, 5);
+        assert!((stats.avg_doc_length - 2.5).abs() < f64::EPSILON);
+        // "quick", "brown", "fox", "lazy", "dog" = 5 unique terms ("the" filtered)
+        assert_eq!(stats.unique_terms, 5);
+    }
+
+    #[test]
+    fn test_deterministic_score_output() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "test document")
+            .unwrap();
+
+        let results = index.search("test", 10);
+        assert_eq!(results.len(), 1);
+
+        // Score should be a DeterministicScore (fixed-point i64; no NaN concept).
+        let (_doc_id, score) = &results[0];
+        let f = score.to_f64();
+        assert!(f > 0.0);
+        assert!(f.is_finite());
+    }
+
+    #[test]
+    fn test_case_insensitive() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "The QUICK Brown FOX")
+            .unwrap();
+
+        let results = index.search("quick fox", 10);
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+    }
+
+    #[test]
+    fn test_punctuation_handling() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "Hello, World! How are you?")
+            .unwrap();
+
+        let results = index.search("hello world", 10);
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+    }
+
+    #[test]
+    fn test_config_custom() {
+        let config = Bm25Config::new(2.0, 0.5);
+        let mut index = Bm25Index::new(config);
+        index
+            .index_document("doc1".to_string(), "test document")
+            .unwrap();
+
+        assert!((index.config().k1 - 2.0).abs() < f64::EPSILON);
+        assert!((index.config().b - 0.5).abs() < f64::EPSILON);
+
+        // Should still work
+        let results = index.search("test", 10);
+        assert_eq!(results.len(), 1);
+    }
+
+    #[test]
+    fn test_idf_caching() {
+        let mut index = Bm25Index::default();
+        index.index_document("doc1".to_string(), "test").unwrap();
+        index.index_document("doc2".to_string(), "test").unwrap();
+
+        // First search populates cache
+        let _results1 = index.search("test", 10);
+
+        // IDF cache should be populated
+        assert!(!index.is_idf_cache_empty());
+
+        // Second search uses cache (verified by consistent results)
+        let results2 = index.search("test", 10);
+        assert_eq!(results2.len(), 2);
+    }
+
+    #[test]
+    fn test_consistent_ordering() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "fox quick")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "fox slow")
+            .unwrap();
+        index
+            .index_document("doc3".to_string(), "quick quick fox")
+            .unwrap();
+
+        // Multiple searches should produce consistent ordering
+        let results1 = index.search("quick fox", 10);
+        let results2 = index.search("quick fox", 10);
+
+        assert_eq!(results1.len(), results2.len());
+        for i in 0..results1.len() {
+            assert_eq!(results1[i].0, results2[i].0);
+            assert_eq!(results1[i].1, results2[i].1);
+        }
+    }
+
+    #[test]
+    fn test_serde_roundtrip() {
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+
+        // Serialize
+        let json = serde_json::to_string(&index).unwrap();
+
+        // Deserialize
+        let restored: Bm25Index = serde_json::from_str(&json).unwrap();
+
+        // Should work the same
+        assert_eq!(restored.doc_count(), 2);
+        let results = restored.search("fox", 10);
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+    }
+
+    #[test]
+    fn test_custom_tokenizer() {
+        // Create a custom tokenizer with minimum length 4
+        let tokenizer: BoxedTokenizer = Arc::new(SimpleTokenizer::new(true, 4));
+        let mut index = Bm25Index::with_tokenizer(Bm25Config::default(), tokenizer);
+
+        // "the", "a" will be filtered out (< 4 chars)
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "a lazy brown dog")
+            .unwrap();
+
+        // "the" and "a" should not be indexed
+        let results = index.search("the", 10);
+        assert!(results.is_empty(), "Short words should not be indexed");
+
+        // "quick" and "brown" should be indexed
+        let results = index.search("quick", 10);
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+
+        // "brown" in both docs
+        let results = index.search("brown", 10);
+        assert_eq!(results.len(), 2);
+    }
+
+    #[test]
+    fn test_tokenizer_accessor() {
+        let index = Bm25Index::default();
+        let tokenizer = index.tokenizer();
+
+        // Should tokenize correctly
+        let tokens = tokenizer.tokenize("Hello, World!");
+        assert_eq!(tokens, vec!["hello", "world"]);
+    }
+
+    #[test]
+    fn test_set_tokenizer() {
+        let mut index = Bm25Index::default();
+
+        // Index with default tokenizer (min_length=1, stop words on)
+        // Use "ox" — not a stop word, not filtered by default min_length=1
+        index
+            .index_document("doc1".to_string(), "ox quick fox")
+            .unwrap();
+        let results = index.search("ox", 10);
+        assert_eq!(results.len(), 1, "Default tokenizer should index 'ox'");
+
+        // Change tokenizer to min_length=3 (this won't re-index existing docs)
+        let new_tokenizer: BoxedTokenizer = Arc::new(SimpleTokenizer::new(true, 3));
+        index.set_tokenizer(new_tokenizer);
+
+        // New document with new tokenizer
+        index
+            .index_document("doc2".to_string(), "ox slow fox")
+            .unwrap();
+
+        // doc1 still has "ox" indexed, but search tokenizer now filters "ox" (len < 3)
+        // Since query "ox" becomes empty after tokenization, no results
+        let results = index.search("ox", 10);
+        assert!(
+            results.is_empty(),
+            "Query 'ox' should be filtered by min_length=3"
+        );
+
+        // "fox" should find both docs
+        let results = index.search("fox", 10);
+        assert_eq!(results.len(), 2);
+    }
+
+    #[test]
+    fn test_concurrent_search() {
+        use std::thread;
+
+        let mut index = Bm25Index::default();
+        index
+            .index_document("doc1".to_string(), "the quick brown fox")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "the lazy dog")
+            .unwrap();
+        index
+            .index_document("doc3".to_string(), "quick fox jumps")
+            .unwrap();
+
+        // Wrap in Arc for sharing across threads (search takes &self now)
+        let index = Arc::new(index);
+
+        // Spawn multiple threads doing concurrent searches
+        let handles: Vec<_> = (0..4)
+            .map(|i| {
+                let index = Arc::clone(&index);
+                thread::spawn(move || {
+                    // Each thread does multiple searches
+                    for _ in 0..100 {
+                        let query = if i % 2 == 0 { "quick fox" } else { "lazy dog" };
+                        let results = index.search(query, 10);
+                        assert!(!results.is_empty());
+                    }
+                })
+            })
+            .collect();
+
+        // Wait for all threads to complete
+        for handle in handles {
+            handle.join().expect("Thread panicked");
+        }
+    }
+}
+
+/// Golden tests for BM25 scoring (RETRIEVAL-04).
+///
+/// These tests verify known expected values to detect drift in scoring behavior
+/// across versions or platforms. The expected values were computed with the
+/// standard BM25 formula (k1=1.2, b=0.75) and verified manually.
+///
+/// # Cross-Platform CI Note
+///
+/// These tests should run on all CI platforms (Linux, macOS, Windows) to verify
+/// consistent scoring. The tolerance (1e-6) accounts for minor FP differences
+/// while still catching significant regressions.
+///
+/// If these tests fail on a specific platform, investigate:
+/// 1. FMA instruction availability differences
+/// 2. Compiler optimization flags
+/// 3. Extended precision (x87) on older x86
+#[cfg(test)]
+mod golden_tests {
+    use crate::{Bm25Config, Bm25Index};
+
+    /// Tolerance for floating-point comparison in golden tests.
+    /// 1e-6 is tight enough to catch bugs but loose enough for cross-platform variance.
+    const GOLDEN_TOLERANCE: f64 = 1e-6;
+
+    /// Golden test corpus for reproducible scoring.
+    fn setup_golden_corpus() -> Bm25Index {
+        let mut index = Bm25Index::new(Bm25Config::default());
+        // Fixed corpus with known characteristics:
+        // doc1: 4 tokens (quick, brown, fox, jumps)
+        // doc2: 3 tokens (lazy, brown, dog)
+        // doc3: 2 tokens (quick, fox)
+        // Total: 9 tokens, avgdl = 3.0
+        index
+            .index_document("doc1".to_string(), "quick brown fox jumps")
+            .unwrap();
+        index
+            .index_document("doc2".to_string(), "lazy brown dog")
+            .unwrap();
+        index
+            .index_document("doc3".to_string(), "quick fox")
+            .unwrap();
+        index
+    }
+
+    #[test]
+    fn golden_single_term_query() {
+        let index = setup_golden_corpus();
+
+        // Query for "brown" (appears in doc1 and doc2)
+        // IDF("brown") = ln((3 - 2 + 0.5) / (2 + 0.5) + 1) = ln(1.6) ≈ 0.470003629
+        let results = index.search("brown", 10);
+
+        assert_eq!(results.len(), 2);
+
+        // Both docs contain "brown" once, but doc2 is shorter (3 tokens vs 4)
+        // so doc2 should score slightly higher with length normalization
+        let doc1_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc1")
+            .unwrap()
+            .1;
+        let doc2_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc2")
+            .unwrap()
+            .1;
+
+        // Golden values (empirically verified from implementation with k1=1.2, b=0.75, avgdl=3.0)
+        // These values are the actual outputs and serve as regression tests.
+        // doc1: len=4, higher length penalty
+        // doc2: len=3 (at avgdl), no length adjustment
+        assert!(
+            (doc1_score.to_f64() - 0.4136031938251108).abs() < GOLDEN_TOLERANCE,
+            "doc1 score {} differs from golden 0.4136031938251108",
+            doc1_score.to_f64()
+        );
+        assert!(
+            (doc2_score.to_f64() - 0.47000362924573563).abs() < GOLDEN_TOLERANCE,
+            "doc2 score {} differs from golden 0.47000362924573563",
+            doc2_score.to_f64()
+        );
+    }
+
+    #[test]
+    fn golden_multi_term_query() {
+        let index = setup_golden_corpus();
+
+        // Query for "quick fox" (doc1 has both, doc3 has both)
+        let results = index.search("quick fox", 10);
+
+        assert_eq!(results.len(), 2);
+
+        let doc1_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc1")
+            .unwrap()
+            .1;
+        let doc3_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc3")
+            .unwrap()
+            .1;
+
+        // doc3 is shorter (2 tokens) and has both terms -> should score higher
+        assert!(doc3_score > doc1_score);
+
+        // Golden values for multi-term query (empirically verified)
+        // "quick": df=2, "fox": df=2
+        // doc3 (len=2): shorter doc gets boost from length normalization
+        assert!(
+            (doc3_score.to_f64() - 1.088429457275197).abs() < GOLDEN_TOLERANCE,
+            "doc3 score {} differs from golden 1.088429457275197",
+            doc3_score.to_f64()
+        );
+    }
+
+    #[test]
+    fn golden_rare_term_high_idf() {
+        let index = setup_golden_corpus();
+
+        // "jumps" only in doc1 (df=1), "lazy" only in doc2 (df=1)
+        // Both have high IDF = ln((3-1+0.5)/(1+0.5)+1) = ln(2.667) ≈ 0.981
+        let results = index.search("jumps", 10);
+
+        assert_eq!(results.len(), 1);
+        assert_eq!(&*results[0].0, "doc1");
+
+        // Golden value for rare term (empirically verified)
+        // "jumps" has high IDF due to appearing in only 1 document
+        // doc1 has length penalty (len=4, avgdl=3)
+        assert!(
+            (results[0].1.to_f64() - 0.8631297426763922).abs() < GOLDEN_TOLERANCE,
+            "rare term score {} differs from golden 0.8631297426763922",
+            results[0].1.to_f64()
+        );
+    }
+
+    #[test]
+    fn golden_term_frequency_saturation() {
+        // Test that repeated terms show saturation (TF component approaches k1+1=2.2)
+        let mut index = Bm25Index::new(Bm25Config::default());
+
+        // doc1 has "test" once, doc2 has it 5 times
+        index.index_document("doc1".to_string(), "test").unwrap();
+        index
+            .index_document("doc2".to_string(), "test test test test test")
+            .unwrap();
+
+        let results = index.search("test", 10);
+        assert_eq!(results.len(), 2);
+
+        let doc1_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc1")
+            .unwrap()
+            .1;
+        let doc2_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "doc2")
+            .unwrap()
+            .1;
+
+        // doc2 has higher TF but saturation limits the boost
+        // The score ratio should be much less than 5x
+        let ratio = doc2_score.to_f64() / doc1_score.to_f64();
+        assert!(
+            ratio < 2.5,
+            "TF saturation not working: ratio {ratio} should be < 2.5"
+        );
+        assert!(
+            ratio > 1.0,
+            "Higher TF should still score higher: ratio {ratio}"
+        );
+
+        // Golden: with avgdl=3, k1=1.2, b=0.75:
+        // doc1 (tf=1, len=1, L=0.333): denom=1+1.2*(0.25+0.75*0.333)=1.6, TF=2.2/1.6=1.375
+        // doc2 (tf=5, len=5, L=1.667): denom=5+1.2*(0.25+0.75*1.667)=6.8, TF=11/6.8=1.618
+        // Score ratio ≈ 1.618/1.375 ≈ 1.177
+        assert!(
+            (ratio - 1.17682).abs() < 0.01,
+            "TF saturation ratio {ratio} differs from golden 1.177"
+        );
+    }
+
+    #[test]
+    fn golden_length_normalization() {
+        // Test length normalization with same term frequency
+        let mut index = Bm25Index::new(Bm25Config::default());
+
+        // Both have "test" once, but different lengths
+        index.index_document("short".to_string(), "test").unwrap();
+        index
+            .index_document("long".to_string(), "test padding padding padding padding")
+            .unwrap();
+
+        let results = index.search("test", 10);
+        assert_eq!(results.len(), 2);
+
+        let short_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "short")
+            .unwrap()
+            .1;
+        let long_score = results
+            .iter()
+            .find(|(id, _)| id.as_ref() == "long")
+            .unwrap()
+            .1;
+
+        // Shorter doc should score higher (b=0.75 applies length penalty)
+        assert!(
+            short_score > long_score,
+            "Short doc should score higher than long doc"
+        );
+
+        // Golden: avgdl=3, k1=1.2, b=0.75
+        // short (len=1, L=0.333): denom=1+1.2*(0.25+0.25)=1.6, TF=2.2/1.6=1.375
+        // long (len=5, L=1.667): denom=1+1.2*(0.25+1.25)=2.8, TF=2.2/2.8=0.786
+        let ratio = short_score.to_f64() / long_score.to_f64();
+        assert!(
+            (ratio - 1.75).abs() < 0.1,
+            "Length normalization ratio {ratio} differs from expected ~1.75"
+        );
+    }
+
+    #[test]
+    fn golden_deterministic_across_runs() {
+        // Verify that multiple searches produce identical results
+        let index = setup_golden_corpus();
+
+        let results1 = index.search("quick brown", 10);
+        let results2 = index.search("quick brown", 10);
+        let results3 = index.search("quick brown", 10);
+
+        assert_eq!(results1.len(), results2.len());
+        assert_eq!(results2.len(), results3.len());
+
+        for i in 0..results1.len() {
+            assert_eq!(
+                results1[i].0, results2[i].0,
+                "Doc ID mismatch at position {i}"
+            );
+            assert_eq!(
+                results1[i].1, results2[i].1,
+                "Score mismatch at position {i}"
+            );
+            assert_eq!(
+                results2[i].1, results3[i].1,
+                "Score mismatch at position {i}"
+            );
+        }
+    }
+}
+
+/// Memory budget enforcement tests for BM25.
+#[cfg(test)]
+mod memory_budget_tests {
+    use crate::{Bm25Config, Bm25Index};
+    use crate::error::{ErrorKind, RetrievalError};
+
+    #[test]
+    fn test_no_budget_allows_unlimited_indexing() {
+        let mut index = Bm25Index::default();
+        for i in 0..100 {
+            index
+                .index_document(format!("doc{i}"), &format!("content words number {i}"))
+                .expect("index should succeed without budget");
+        }
+        assert_eq!(index.doc_count(), 100);
+    }
+
+    #[test]
+    fn test_budget_blocks_new_document_when_exceeded() {
+        let config = Bm25Config::default().with_memory_budget(1_100);
+        let mut index = Bm25Index::new(config);
+
+        // First doc should succeed (index starts empty)
+        index
+            .index_document("doc1", "hello world")
+            .expect("first doc should succeed");
+
+        // Keep indexing until budget is hit
+        let mut rejected = false;
+        for i in 2..=200 {
+            let result = index.index_document(
+                format!("doc{i}"),
+                &format!("some content words for document number {i} with extra text"),
+            );
+            if result.is_err() {
+                rejected = true;
+                let err = result.unwrap_err();
+                assert!(
+                    matches!(err, RetrievalError::BudgetExceeded { .. }),
+                    "Expected BudgetExceeded, got: {err:?}"
+                );
+                assert_eq!(err.kind(), ErrorKind::Permanent);
+                assert!(!err.is_retryable());
+                break;
+            }
+        }
+        assert!(
+            rejected,
+            "Budget should have rejected an index_document call"
+        );
+    }
+
+    #[test]
+    fn test_budget_reindex_bypasses_check() {
+        let config = Bm25Config::default().with_memory_budget(2_000);
+        let mut index = Bm25Index::new(config);
+
+        // Index initial doc
+        index
+            .index_document("doc1", "initial content")
+            .expect("first doc");
+
+        // Fill until budget hit
+        for i in 2..=500 {
+            if index
+                .index_document(format!("doc{i}"), &format!("fill content {i}"))
+                .is_err()
+            {
+                break;
+            }
+        }
+
+        // Re-indexing an existing document should bypass the budget
+        index
+            .index_document("doc1", "updated content with more words")
+            .expect("re-index should bypass budget");
+    }
+
+    #[test]
+    fn test_memory_usage_increases_with_documents() {
+        let mut index = Bm25Index::default();
+
+        let before = index.memory_usage();
+        // Empty index has fixed overhead only
+        assert!(before >= 128, "Empty index should have fixed overhead");
+
+        index.index_document("doc1", "hello world").unwrap();
+        let after_one = index.memory_usage();
+        assert!(after_one > before, "Usage should increase after indexing");
+
+        index
+            .index_document("doc2", "another document here")
+            .unwrap();
+        let after_two = index.memory_usage();
+        assert!(
+            after_two > after_one,
+            "Usage should increase with more docs"
+        );
+    }
+
+    #[test]
+    fn test_estimate_document_cost_is_positive() {
+        let index = Bm25Index::default();
+        let cost = index.estimate_document_cost("some test document with words");
+        assert!(cost > 0, "Document cost should be positive");
+    }
+
+    #[test]
+    fn test_estimate_document_cost_empty_text() {
+        let index = Bm25Index::default();
+        let cost = index.estimate_document_cost("");
+        assert_eq!(cost, 0, "Empty document should have zero cost");
+    }
+
+    #[test]
+    fn test_memory_budget_getter_setter() {
+        let mut index = Bm25Index::default();
+
+        // Default: no budget
+        assert_eq!(index.memory_budget(), None);
+
+        // Set budget at runtime
+        index.set_memory_budget(Some(50_000));
+        assert_eq!(index.memory_budget(), Some(50_000));
+
+        // Clear budget
+        index.set_memory_budget(None);
+        assert_eq!(index.memory_budget(), None);
+    }
+
+    #[test]
+    fn test_budget_from_config() {
+        let config = Bm25Config::default().with_memory_budget(10_000);
+        let index = Bm25Index::new(config);
+        assert_eq!(index.memory_budget(), Some(10_000));
+    }
+
+    #[test]
+    fn test_budget_exceeded_error_details() {
+        let config = Bm25Config::default().with_memory_budget(1);
+        let mut index = Bm25Index::new(config);
+
+        // Budget of 1 byte is too small for any document
+        let result = index.index_document("doc1", "hello world");
+        assert!(result.is_err());
+
+        let err = result.unwrap_err();
+        match err {
+            RetrievalError::BudgetExceeded {
+                current_usage,
+                item_size,
+                limit,
+            } => {
+                assert!(item_size > 0, "Item should have non-zero cost");
+                assert_eq!(limit, 1, "Limit should match config");
+                assert!(current_usage + item_size > limit, "Should genuinely exceed");
+            }
+            other => panic!("Expected BudgetExceeded, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_search_unaffected_by_budget() {
+        let config = Bm25Config::default().with_memory_budget(100_000);
+        let mut index = Bm25Index::new(config);
+
+        index.index_document("doc1", "quick brown fox").unwrap();
+        index.index_document("doc2", "lazy brown dog").unwrap();
+
+        // Search should work regardless of budget
+        let results = index.search("brown", 10);
+        assert_eq!(results.len(), 2);
+    }
+
+    #[test]
+    fn test_budget_allows_removal_then_insert() {
+        let config = Bm25Config::default().with_memory_budget(3_000);
+        let mut index = Bm25Index::new(config);
+
+        // Fill the index
+        let mut last_success = 0;
+        for i in 1..=500 {
+            if index
+                .index_document(format!("doc{i}"), &format!("content {i}"))
+                .is_ok()
+            {
+                last_success = i;
+            } else {
+                break;
+            }
+        }
+        assert!(last_success > 0, "Should have indexed at least one doc");
+
+        // Remove some documents to free memory
+        for i in 1..=(last_success / 2) {
+            index.remove_document(&format!("doc{i}"));
+        }
+
+        // Now we should be able to insert again
+        let result = index.index_document("new_doc", "newly inserted content");
+        assert!(
+            result.is_ok(),
+            "Should be able to insert after removing docs"
+        );
+    }
+}
+
+#[cfg(test)]
+mod metrics_tests {
+    use crate::{Bm25Config, Bm25Index};
+    use crate::metrics::{names, MetricValue, RecordingSink};
+    use std::sync::Arc;
+
+    #[test]
+    fn index_document_emits_metrics() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = Bm25Index::new(Bm25Config::default()).with_metrics(sink.clone());
+
+        index.index_document("doc1", "the quick brown fox").unwrap();
+
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+
+        assert!(
+            event_names.contains(&names::BM25_INDEX_DURATION_MS),
+            "Missing index_document duration metric"
+        );
+        assert!(
+            event_names.contains(&names::BM25_INDEX_COUNT),
+            "Missing index_document count metric"
+        );
+        assert!(
+            event_names.contains(&names::BM25_INDEX_SIZE),
+            "Missing index size metric"
+        );
+
+        // Index size should be 1
+        let size_event = events
+            .iter()
+            .find(|e| e.name == names::BM25_INDEX_SIZE)
+            .unwrap();
+        assert_eq!(size_event.value, MetricValue::Gauge(1.0));
+    }
+
+    #[test]
+    fn search_emits_metrics() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = Bm25Index::new(Bm25Config::default()).with_metrics(sink.clone());
+
+        index.index_document("doc1", "the quick brown fox").unwrap();
+        index.index_document("doc2", "the lazy dog").unwrap();
+
+        // Clear indexing metrics
+        sink.clear();
+
+        let results = index.search("quick fox", 10);
+
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+
+        assert!(
+            event_names.contains(&names::BM25_SEARCH_DURATION_MS),
+            "Missing search duration metric"
+        );
+        assert!(
+            event_names.contains(&names::BM25_SEARCH_COUNT),
+            "Missing search count metric"
+        );
+        assert!(
+            event_names.contains(&names::BM25_SEARCH_RESULTS),
+            "Missing search results metric"
+        );
+
+        // Results count should match
+        let results_event = events
+            .iter()
+            .find(|e| e.name == names::BM25_SEARCH_RESULTS)
+            .unwrap();
+        assert_eq!(
+            results_event.value,
+            MetricValue::Gauge(results.len() as f64)
+        );
+    }
+
+    #[test]
+    fn no_metrics_without_sink() {
+        // Ensure no panic when metrics is None (default)
+        let mut index = Bm25Index::new(Bm25Config::default());
+        index.index_document("doc1", "hello world").unwrap();
+        let _ = index.search("hello", 5);
+    }
+
+    #[test]
+    fn set_metrics_at_runtime() {
+        let mut index = Bm25Index::new(Bm25Config::default());
+        index.index_document("doc1", "hello world").unwrap();
+
+        // Attach sink
+        let sink = Arc::new(RecordingSink::new());
+        index.set_metrics(Some(sink.clone()));
+
+        index.index_document("doc2", "goodbye world").unwrap();
+
+        assert!(!sink.is_empty());
+
+        // Detach
+        index.set_metrics(None);
+        sink.clear();
+
+        index.index_document("doc3", "another document").unwrap();
+        assert!(sink.is_empty(), "No events after detaching sink");
+    }
+
+    #[test]
+    fn search_on_empty_index_still_emits() {
+        let sink = Arc::new(RecordingSink::new());
+        let index = Bm25Index::new(Bm25Config::default()).with_metrics(sink.clone());
+
+        let results = index.search("anything", 5);
+        assert!(results.is_empty());
+
+        // Should still emit duration/count/results
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+        assert!(event_names.contains(&names::BM25_SEARCH_DURATION_MS));
+        assert!(event_names.contains(&names::BM25_SEARCH_COUNT));
+        assert!(event_names.contains(&names::BM25_SEARCH_RESULTS));
+    }
+
+    #[test]
+    fn multiple_operations_accumulate_events() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = Bm25Index::new(Bm25Config::default()).with_metrics(sink.clone());
+
+        // 3 index operations
+        index.index_document("d1", "alpha beta").unwrap();
+        index.index_document("d2", "gamma delta").unwrap();
+        index.index_document("d3", "epsilon zeta").unwrap();
+
+        // Count index_document.count events
+        let count_events: usize = sink
+            .events()
+            .iter()
+            .filter(|e| e.name == names::BM25_INDEX_COUNT)
+            .count();
+        assert_eq!(count_events, 3, "Expected 3 index count events");
+    }
+
+    #[test]
+    fn index_duration_is_nonnegative() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = Bm25Index::new(Bm25Config::default()).with_metrics(sink.clone());
+
+        index
+            .index_document("doc1", "test document content")
+            .unwrap();
+
+        let duration_event = sink
+            .events()
+            .into_iter()
+            .find(|e| e.name == names::BM25_INDEX_DURATION_MS)
+            .unwrap();
+
+        match duration_event.value {
+            MetricValue::Histogram(ms) => assert!(ms >= 0.0, "Duration must be >= 0"),
+            other => panic!("Expected Histogram, got {other:?}"),
+        }
+    }
+}
diff --git a/crates/khive-bm25/src/tokenizer.rs b/crates/khive-bm25/src/tokenizer.rs
new file mode 100644
index 00000000..653bd446
--- /dev/null
+++ b/crates/khive-bm25/src/tokenizer.rs
@@ -0,0 +1,261 @@
+//! Tokenization for BM25.
+//!
+//! Provides a pluggable tokenizer system with a simple default implementation.
+//!
+//! # RETRIEVAL-10: Advanced Tokenization (Deferred)
+//!
+//! The following advanced tokenization features are **intentionally deferred**
+//! to future iterations:
+//!
+//! | Feature | Status | Rationale |
+//! |---------|--------|-----------|
+//! | CJK segmentation | Deferred | Requires jieba/mecab integration |
+//! | Arabic normalization | Deferred | Requires ICU or custom rules |
+//! | Stemming | Deferred | Language-specific (Snowball, Porter) |
+//! | Lemmatization | Deferred | Requires NLP models |
+//! | Stop word removal | Deferred | Language and domain specific |
+//! | N-gram support | Deferred | Memory/performance tradeoffs |
+//!
+//! **Current scope**: English whitespace tokenization with optional lowercase
+//! and minimum length filtering. This covers the primary use case.
+//!
+//! **Extension point**: Implement the [`Tokenizer`] trait for custom tokenization.
+//! The trait is designed to be language-agnostic and composable.
+//!
+//! # Examples
+//!
+//! Using the default SimpleTokenizer:
+//! ```rust
+//! use khive_bm25::{Tokenizer, SimpleTokenizer};
+//!
+//! let tokenizer = SimpleTokenizer::default();
+//! let tokens = tokenizer.tokenize("Hello, World!");
+//! assert_eq!(tokens, vec!["hello", "world"]);
+//! ```
+//!
+//! Custom tokenizer with minimum length:
+//! ```rust
+//! use khive_bm25::{Tokenizer, SimpleTokenizer};
+//!
+//! let tokenizer = SimpleTokenizer::new(true, 3);
+//! let tokens = tokenizer.tokenize("I am a cat");
+//! assert_eq!(tokens, vec!["cat"]); // "I", "am", "a" filtered out (< 3 chars)
+//! ```
+
+use std::collections::HashSet;
+use std::sync::{Arc, LazyLock};
+
+/// Tokenizer trait for extensible text tokenization.
+///
+/// Implement this trait to provide custom tokenization for BM25 search.
+/// This enables:
+/// - Language-specific tokenization (CJK, Arabic, etc.)
+/// - Stemming/lemmatization
+/// - Stop word removal
+/// - N-gram support
+pub trait Tokenizer: Send + Sync {
+    /// Tokenize the input text into a list of tokens.
+    ///
+    /// # Arguments
+    ///
+    /// * `text` - Input text to tokenize
+    ///
+    /// # Returns
+    ///
+    /// Vector of tokens (strings). Empty vector for empty input.
+    fn tokenize(&self, text: &str) -> Vec<String>;
+}
+
+/// Box type for tokenizers (enables dynamic dispatch).
+pub type BoxedTokenizer = Arc<dyn Tokenizer>;
+
+/// English stop words — high-frequency terms that add noise to BM25 postings
+/// without improving retrieval quality. Removing these reduces BM25 memory by
+/// ~170 MB at 15K docs (each stop word creates N postings × 64 bytes).
+static STOP_WORDS: LazyLock<HashSet<&'static str>> = LazyLock::new(|| {
+    HashSet::from([
+        "a", "an", "and", "are", "as", "at", "be", "been", "being", "but", "by", "can", "did",
+        "do", "does", "doing", "done", "for", "from", "had", "has", "have", "having", "he", "her",
+        "here", "hers", "him", "his", "how", "i", "if", "in", "into", "is", "it", "its", "just",
+        "may", "me", "might", "my", "no", "nor", "not", "of", "on", "or", "our", "out", "own",
+        "say", "she", "should", "so", "some", "such", "than", "that", "the", "their", "them",
+        "then", "there", "these", "they", "this", "those", "through", "to", "too", "up", "us",
+        "very", "was", "we", "were", "what", "when", "where", "which", "while", "who", "whom",
+        "why", "will", "with", "would", "you", "your",
+    ])
+});
+
+/// Simple whitespace tokenizer with optional lowercase, minimum length,
+/// and stop-word filtering.
+///
+/// This is the default tokenizer suitable for English text.
+/// For production use with non-English text, consider implementing
+/// a custom tokenizer with proper segmentation for your language.
+#[derive(Debug, Clone)]
+pub struct SimpleTokenizer {
+    /// Whether to lowercase tokens.
+    pub lowercase: bool,
+    /// Minimum token length (tokens shorter than this are filtered out).
+    pub min_length: usize,
+    /// Whether to filter out English stop words.
+    pub filter_stop_words: bool,
+}
+
+impl Default for SimpleTokenizer {
+    fn default() -> Self {
+        Self {
+            lowercase: true,
+            min_length: 1,
+            filter_stop_words: true,
+        }
+    }
+}
+
+impl SimpleTokenizer {
+    /// Create a new SimpleTokenizer with specified options.
+    ///
+    /// # Arguments
+    ///
+    /// * `lowercase` - Whether to convert tokens to lowercase
+    /// * `min_length` - Minimum token length (shorter tokens are filtered out)
+    pub fn new(lowercase: bool, min_length: usize) -> Self {
+        Self {
+            lowercase,
+            min_length,
+            filter_stop_words: true,
+        }
+    }
+}
+
+impl Tokenizer for SimpleTokenizer {
+    fn tokenize(&self, text: &str) -> Vec<String> {
+        // Fast path: estimate capacity to avoid re-allocations.
+        // Average English word ~5 chars + 1 space, so text.len()/6 is a reasonable estimate.
+        let estimated_tokens = text.len() / 6 + 1;
+        let mut result = Vec::with_capacity(estimated_tokens.min(32));
+
+        for word in text.split_whitespace() {
+            // Remove leading/trailing punctuation
+            let trimmed = word.trim_matches(|c: char| c.is_ascii_punctuation());
+
+            if trimmed.len() < self.min_length {
+                continue;
+            }
+
+            // Fast ASCII lowercase check: if all bytes are ASCII, lowercase in-place
+            // to avoid the overhead of `str::to_lowercase()` (which handles Unicode).
+            let token = if self.lowercase {
+                if trimmed.is_ascii() {
+                    // Fast path: ASCII-only, lowercase via byte manipulation
+                    let mut s = String::with_capacity(trimmed.len());
+                    for &byte in trimmed.as_bytes() {
+                        s.push(byte.to_ascii_lowercase() as char);
+                    }
+                    s
+                } else {
+                    trimmed.to_lowercase()
+                }
+            } else {
+                trimmed.to_string()
+            };
+
+            if self.filter_stop_words && STOP_WORDS.contains(token.as_str()) {
+                continue;
+            }
+
+            result.push(token);
+        }
+
+        result
+    }
+}
+
+/// Convenience function for simple tokenization (backwards compatibility).
+///
+/// Uses the default SimpleTokenizer configuration.
+pub fn tokenize(text: &str) -> Vec<String> {
+    SimpleTokenizer::default().tokenize(text)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_tokenize_filters_stop_words() {
+        let tokens = tokenize("The Quick, Brown FOX!");
+        // "the" is a stop word, filtered out
+        assert_eq!(tokens, vec!["quick", "brown", "fox"]);
+    }
+
+    #[test]
+    fn test_tokenize_empty() {
+        let tokens = tokenize("");
+        assert!(tokens.is_empty());
+
+        let tokens = tokenize("   ");
+        assert!(tokens.is_empty());
+    }
+
+    #[test]
+    fn test_tokenize_punctuation_only() {
+        let tokens = tokenize("... !!! ???");
+        assert!(tokens.is_empty());
+    }
+
+    #[test]
+    fn test_tokenize_case_insensitive() {
+        let tokens = tokenize("HELLO World hElLo");
+        assert_eq!(tokens, vec!["hello", "world", "hello"]);
+    }
+
+    #[test]
+    fn test_tokenize_stop_words_removed() {
+        // "how", "are", "you" are stop words
+        let tokens = tokenize("Hello, World! How are you?");
+        assert_eq!(tokens, vec!["hello", "world"]);
+    }
+
+    #[test]
+    fn test_tokenize_multiple_spaces() {
+        let tokens = tokenize("hello    world");
+        assert_eq!(tokens, vec!["hello", "world"]);
+    }
+
+    #[test]
+    fn test_simple_tokenizer_no_lowercase() {
+        let tokenizer = SimpleTokenizer::new(false, 1);
+        // "Hello" and "World" are not stop words (case-sensitive, and stop words are lowercase)
+        let tokens = tokenizer.tokenize("Hello World");
+        assert_eq!(tokens, vec!["Hello", "World"]);
+    }
+
+    #[test]
+    fn test_simple_tokenizer_min_length() {
+        let tokenizer = SimpleTokenizer::new(true, 3);
+        let tokens = tokenizer.tokenize("I am a cat");
+        // "I", "am", "a" filtered by min_length; also stop words
+        assert_eq!(tokens, vec!["cat"]);
+    }
+
+    #[test]
+    fn test_trait_object_usage() {
+        let tokenizer: BoxedTokenizer = Arc::new(SimpleTokenizer::default());
+        let tokens = tokenizer.tokenize("hello world");
+        assert_eq!(tokens, vec!["hello", "world"]);
+    }
+
+    #[test]
+    fn test_stop_words_disabled() {
+        let mut tokenizer = SimpleTokenizer::default();
+        tokenizer.filter_stop_words = false;
+        let tokens = tokenizer.tokenize("The Quick, Brown FOX!");
+        assert_eq!(tokens, vec!["the", "quick", "brown", "fox"]);
+    }
+
+    #[test]
+    fn test_all_stop_words_returns_empty() {
+        let tokens = tokenize("the and or but");
+        assert!(tokens.is_empty());
+    }
+}
diff --git a/crates/khive-fold/src/objective/mod.rs b/crates/khive-fold/src/objective/mod.rs
index c4504982..e2040fb6 100644
--- a/crates/khive-fold/src/objective/mod.rs
+++ b/crates/khive-fold/src/objective/mod.rs
@@ -4,11 +4,13 @@ pub mod builtin;
 pub mod compose;
 mod context;
 pub mod error;
+pub mod registry;
 mod selection;
 mod traits;
 
 pub use context::ObjectiveContext;
 pub use error::{ObjectiveError, ObjectiveResult};
+pub use registry::{ObjectiveRegistry, RegisteredObjective};
 pub use selection::Selection;
 pub use traits::{objective_fn, DeterministicObjective, Objective};
 
diff --git a/crates/khive-fold/src/objective/registry.rs b/crates/khive-fold/src/objective/registry.rs
new file mode 100644
index 00000000..4ce97815
--- /dev/null
+++ b/crates/khive-fold/src/objective/registry.rs
@@ -0,0 +1,275 @@
+//! Objective registry for dynamic dispatch.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use parking_lot::RwLock;
+
+use crate::{Objective, ObjectiveContext, ObjectiveError, ObjectiveResult, Selection};
+
+/// A type-erased objective wrapper.
+pub struct RegisteredObjective<T> {
+    /// Name of the objective
+    pub name: String,
+    /// Description
+    pub description: Option<String>,
+    /// The objective implementation
+    objective: Box<dyn Objective<T>>,
+}
+
+impl<T> RegisteredObjective<T> {
+    /// Create a new registered objective
+    pub fn new(name: impl Into<String>, objective: Box<dyn Objective<T>>) -> Self {
+        Self {
+            name: name.into(),
+            description: None,
+            objective,
+        }
+    }
+
+    /// Add a description
+    pub fn with_description(mut self, desc: impl Into<String>) -> Self {
+        self.description = Some(desc.into());
+        self
+    }
+
+    /// Score a candidate
+    pub fn score(&self, candidate: &T, context: &ObjectiveContext) -> f64 {
+        self.objective.score(candidate, context)
+    }
+
+    /// Select from candidates
+    pub fn select<'a>(
+        &self,
+        candidates: &'a [T],
+        context: &ObjectiveContext,
+    ) -> ObjectiveResult<Selection<&'a T>> {
+        self.objective.select(candidates, context)
+    }
+}
+
+/// Registry of named objectives.
+pub struct ObjectiveRegistry<T> {
+    objectives: RwLock<HashMap<String, Arc<RegisteredObjective<T>>>>,
+    default: RwLock<Option<String>>,
+}
+
+impl<T> Default for ObjectiveRegistry<T> {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl<T> ObjectiveRegistry<T> {
+    /// Create a new empty registry
+    pub fn new() -> Self {
+        Self {
+            objectives: RwLock::new(HashMap::new()),
+            default: RwLock::new(None),
+        }
+    }
+
+    /// Register an objective.
+    ///
+    /// Returns the previously registered objective if one existed with the same name.
+    pub fn register(
+        &self,
+        name: impl Into<String>,
+        objective: Box<dyn Objective<T>>,
+    ) -> Option<Arc<RegisteredObjective<T>>> {
+        let name = name.into();
+        let registered = Arc::new(RegisteredObjective::new(name.clone(), objective));
+
+        let mut objectives = self.objectives.write();
+        objectives.insert(name, registered)
+    }
+
+    /// Register an objective with description.
+    ///
+    /// Returns the previously registered objective if one existed with the same name.
+    pub fn register_with_desc(
+        &self,
+        name: impl Into<String>,
+        description: impl Into<String>,
+        objective: Box<dyn Objective<T>>,
+    ) -> Option<Arc<RegisteredObjective<T>>> {
+        let name = name.into();
+        let registered = Arc::new(
+            RegisteredObjective::new(name.clone(), objective).with_description(description),
+        );
+
+        let mut objectives = self.objectives.write();
+        objectives.insert(name, registered)
+    }
+
+    /// Set the default objective
+    pub fn set_default(&self, name: impl Into<String>) -> ObjectiveResult<()> {
+        let name = name.into();
+
+        let objectives = self.objectives.read();
+        if !objectives.contains_key(&name) {
+            return Err(ObjectiveError::NotFound(name));
+        }
+        drop(objectives);
+
+        let mut default = self.default.write();
+        *default = Some(name);
+        Ok(())
+    }
+
+    /// Get an objective by name
+    pub fn get(&self, name: &str) -> ObjectiveResult<Arc<RegisteredObjective<T>>> {
+        let objectives = self.objectives.read();
+        objectives
+            .get(name)
+            .cloned()
+            .ok_or_else(|| ObjectiveError::NotFound(name.to_string()))
+    }
+
+    /// Get the default objective
+    pub fn get_default(&self) -> ObjectiveResult<Arc<RegisteredObjective<T>>> {
+        let default = self.default.read();
+        match default.as_ref() {
+            Some(name) => {
+                let name: String = name.clone();
+                drop(default);
+                self.get(&name)
+            }
+            None => Err(ObjectiveError::NotFound("No default set".to_string())),
+        }
+    }
+
+    /// List all registered objective names.
+    ///
+    /// Returns names in sorted order for deterministic output.
+    pub fn list(&self) -> Vec<String> {
+        let objectives = self.objectives.read();
+        let mut names: Vec<String> = objectives.keys().cloned().collect();
+        names.sort();
+        names
+    }
+
+    /// Check if an objective is registered
+    pub fn contains(&self, name: &str) -> bool {
+        let objectives = self.objectives.read();
+        objectives.contains_key(name)
+    }
+
+    /// Score using a named objective
+    pub fn score(
+        &self,
+        name: &str,
+        candidate: &T,
+        context: &ObjectiveContext,
+    ) -> ObjectiveResult<f64> {
+        let objective = self.get(name)?;
+        Ok(objective.score(candidate, context))
+    }
+
+    /// Select using a named objective
+    pub fn select<'a>(
+        &self,
+        name: &str,
+        candidates: &'a [T],
+        context: &ObjectiveContext,
+    ) -> ObjectiveResult<Selection<&'a T>> {
+        let objective = self.get(name)?;
+        objective.select(candidates, context)
+    }
+
+    /// Select using the default objective
+    pub fn select_default<'a>(
+        &self,
+        candidates: &'a [T],
+        context: &ObjectiveContext,
+    ) -> ObjectiveResult<Selection<&'a T>> {
+        let objective = self.get_default()?;
+        objective.select(candidates, context)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::objective_fn;
+
+    #[test]
+    fn test_register_and_get() {
+        let registry: ObjectiveRegistry<i32> = ObjectiveRegistry::new();
+
+        let obj = objective_fn(|n: &i32, _ctx: &ObjectiveContext| *n as f64);
+        let old = registry.register("max", Box::new(obj));
+
+        assert!(old.is_none());
+        assert!(registry.contains("max"));
+        assert!(!registry.contains("min"));
+    }
+
+    #[test]
+    fn test_register_overwrites() {
+        let registry: ObjectiveRegistry<i32> = ObjectiveRegistry::new();
+
+        let obj1 = objective_fn(|n: &i32, _ctx: &ObjectiveContext| *n as f64);
+        let obj2 = objective_fn(|n: &i32, _ctx: &ObjectiveContext| -(*n as f64));
+
+        let old1 = registry.register("test", Box::new(obj1));
+        assert!(old1.is_none());
+
+        let old2 = registry.register("test", Box::new(obj2));
+        assert!(old2.is_some());
+
+        let candidates = vec![1, 5, 3];
+        let selection = registry
+            .select("test", &candidates, &ObjectiveContext::new())
+            .unwrap();
+        assert_eq!(*selection.item, 1);
+    }
+
+    #[test]
+    fn test_select_by_name() {
+        let registry: ObjectiveRegistry<i32> = ObjectiveRegistry::new();
+
+        let obj = objective_fn(|n: &i32, _ctx: &ObjectiveContext| *n as f64);
+        registry.register("max", Box::new(obj));
+
+        let candidates = vec![1, 5, 3];
+        let selection = registry
+            .select("max", &candidates, &ObjectiveContext::new())
+            .unwrap();
+
+        assert_eq!(*selection.item, 5);
+    }
+
+    #[test]
+    fn test_default_objective() {
+        let registry: ObjectiveRegistry<i32> = ObjectiveRegistry::new();
+
+        let obj = objective_fn(|n: &i32, _ctx: &ObjectiveContext| *n as f64);
+        registry.register("max", Box::new(obj));
+        registry.set_default("max").unwrap();
+
+        let candidates = vec![1, 5, 3];
+        let selection = registry
+            .select_default(&candidates, &ObjectiveContext::new())
+            .unwrap();
+
+        assert_eq!(*selection.item, 5);
+    }
+
+    #[test]
+    fn test_list_objectives_sorted() {
+        let registry: ObjectiveRegistry<i32> = ObjectiveRegistry::new();
+
+        let obj1 = objective_fn(|n: &i32, _ctx: &ObjectiveContext| *n as f64);
+        let obj2 = objective_fn(|n: &i32, _ctx: &ObjectiveContext| -(*n as f64));
+        let obj3 = objective_fn(|n: &i32, _ctx: &ObjectiveContext| (*n as f64).abs());
+
+        registry.register("zebra", Box::new(obj1));
+        registry.register("alpha", Box::new(obj2));
+        registry.register("middle", Box::new(obj3));
+
+        let names = registry.list();
+        assert_eq!(names.len(), 3);
+        assert_eq!(names, vec!["alpha", "middle", "zebra"]);
+    }
+}
diff --git a/crates/khive-fusion/Cargo.toml b/crates/khive-fusion/Cargo.toml
new file mode 100644
index 00000000..22926de5
--- /dev/null
+++ b/crates/khive-fusion/Cargo.toml
@@ -0,0 +1,18 @@
+[package]
+name = "khive-fusion"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+repository.workspace = true
+homepage.workspace = true
+keywords.workspace = true
+categories.workspace = true
+description = "Rank fusion strategies (RRF, Weighted, Union) with deterministic scoring — formally verified in Lean4"
+
+[dependencies]
+khive-score = { version = "0.2.0", path = "../khive-score" }
+khive-types = { version = "0.2.0", path = "../khive-types" }
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
diff --git a/crates/khive-fusion/src/fuse.rs b/crates/khive-fusion/src/fuse.rs
new file mode 100644
index 00000000..0e51c880
--- /dev/null
+++ b/crates/khive-fusion/src/fuse.rs
@@ -0,0 +1,172 @@
+//! Main fusion entry point.
+
+use khive_score::DeterministicScore;
+use std::hash::Hash;
+
+use super::rrf::reciprocal_rank_fusion;
+use super::strategy::FusionStrategy;
+use super::union::union_fusion;
+use super::weighted::weighted_fusion;
+
+/// Fuse multiple ranked result lists into a single ranked list.
+///
+/// This is the main entry point for rank fusion. It supports multiple fusion
+/// strategies and is generic over the ID type.
+///
+/// # Arguments
+///
+/// * `sources` - Vector of result lists from different retrievers.
+///   Each list contains `(Id, DeterministicScore)` pairs, already sorted
+///   by score descending (best first).
+/// * `strategy` - The fusion strategy to use.
+/// * `top_k` - Maximum number of results to return.
+///
+/// # Returns
+///
+/// A vector of `(Id, DeterministicScore)` pairs sorted by fused score descending,
+/// truncated to `top_k` results.
+///
+/// # Type Parameters
+///
+/// * `Id` - The identifier type. Must implement `Eq`, `Hash`, `Clone`, and `Ord`.
+///   Works with `EmbeddingId`, `DocumentId`, `String`, `Uuid`, etc.
+///   `Ord` is required for deterministic tie-breaking when scores are equal.
+///
+/// # Example
+///
+/// ```rust
+/// use khive_fusion::{fuse, FusionStrategy};
+/// use khive_score::DeterministicScore;
+///
+/// let sources = vec![
+///     vec![("a", DeterministicScore::from_f64(0.9))],
+///     vec![("a", DeterministicScore::from_f64(0.8))],
+/// ];
+///
+/// let results = fuse(sources, &FusionStrategy::default(), 10);
+/// assert_eq!(results.len(), 1);
+/// ```
+pub fn fuse<Id: Eq + Hash + Clone + Ord>(
+    sources: Vec<Vec<(Id, DeterministicScore)>>,
+    strategy: &FusionStrategy,
+    top_k: usize,
+) -> Vec<(Id, DeterministicScore)> {
+    if sources.is_empty() || top_k == 0 {
+        return Vec::new();
+    }
+
+    let fused = match strategy {
+        FusionStrategy::Rrf { k } => reciprocal_rank_fusion(sources, *k),
+        FusionStrategy::Weighted { weights } => weighted_fusion(sources, weights),
+        FusionStrategy::Union => union_fusion(sources),
+        // VectorOnly / KeywordOnly: the caller is responsible for ensuring only
+        // the relevant source list is passed. Within fuse(), we take the union
+        // (max-score per ID) which is a no-op when there is a single source.
+        FusionStrategy::VectorOnly | FusionStrategy::KeywordOnly => union_fusion(sources),
+    };
+
+    // Truncate to top_k
+    fused.into_iter().take(top_k).collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_results<Id: Clone>(items: Vec<(Id, f64)>) -> Vec<(Id, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    #[test]
+    fn test_fuse_rrf_strategy() {
+        let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 10);
+
+        assert_eq!(fused.len(), 2);
+    }
+
+    #[test]
+    fn test_fuse_weighted_strategy() {
+        let source = make_results(vec![("doc_a", 1.0)]);
+        let fused = fuse(vec![source], &FusionStrategy::weighted(vec![1.0]), 10);
+
+        assert_eq!(fused.len(), 1);
+    }
+
+    #[test]
+    fn test_fuse_union_strategy() {
+        let source = make_results(vec![("doc_a", 0.9)]);
+        let fused = fuse(vec![source], &FusionStrategy::union(), 10);
+
+        assert_eq!(fused.len(), 1);
+    }
+
+    #[test]
+    fn test_fuse_top_k_truncation() {
+        let source = make_results(vec![
+            ("doc_a", 0.9),
+            ("doc_b", 0.8),
+            ("doc_c", 0.7),
+            ("doc_d", 0.6),
+            ("doc_e", 0.5),
+        ]);
+
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 3);
+
+        assert_eq!(fused.len(), 3);
+        assert_eq!(fused[0].0, "doc_a");
+        assert_eq!(fused[1].0, "doc_b");
+        assert_eq!(fused[2].0, "doc_c");
+    }
+
+    #[test]
+    fn test_fuse_top_k_zero() {
+        let source = make_results(vec![("doc_a", 0.9)]);
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 0);
+
+        assert!(fused.is_empty());
+    }
+
+    #[test]
+    fn test_fuse_empty_sources() {
+        let fused: Vec<(&str, DeterministicScore)> = fuse(vec![], &FusionStrategy::rrf(), 10);
+        assert!(fused.is_empty());
+    }
+
+    #[test]
+    fn test_fuse_top_k_larger_than_results() {
+        let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 100);
+
+        assert_eq!(fused.len(), 2);
+    }
+
+    #[test]
+    fn test_fuse_with_string_ids() {
+        let source: Vec<(String, DeterministicScore)> = vec![
+            ("doc_a".to_string(), DeterministicScore::from_f64(0.9)),
+            ("doc_b".to_string(), DeterministicScore::from_f64(0.8)),
+        ];
+
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 10);
+
+        assert_eq!(fused.len(), 2);
+        assert_eq!(fused[0].0, "doc_a");
+    }
+
+    #[test]
+    fn test_fuse_with_integer_ids() {
+        let source: Vec<(u64, DeterministicScore)> = vec![
+            (1, DeterministicScore::from_f64(0.9)),
+            (2, DeterministicScore::from_f64(0.8)),
+        ];
+
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 10);
+
+        assert_eq!(fused.len(), 2);
+        assert_eq!(fused[0].0, 1);
+    }
+}
diff --git a/crates/khive-fusion/src/lib.rs b/crates/khive-fusion/src/lib.rs
new file mode 100644
index 00000000..c17caad2
--- /dev/null
+++ b/crates/khive-fusion/src/lib.rs
@@ -0,0 +1,68 @@
+//! Fusion algorithms for combining retrieval results.
+//!
+//! This module implements rank fusion strategies for hybrid search, combining
+//! results from multiple retrieval sources (e.g., vector search, keyword search).
+//!
+//! # Supported Strategies
+//!
+//! - **RRF (Reciprocal Rank Fusion)**: Default and recommended. Uses only ranks,
+//!   making it robust to score distribution differences.
+//! - **Weighted**: Linear combination of scores with configurable weights.
+//! - **Union**: Takes the maximum score per ID across sources.
+//!
+//! # Algorithm (ADR-002)
+//!
+//! RRF formula:
+//! ```text
+//! score(d) = Σ 1/(k + rank_i(d))
+//! ```
+//! where:
+//! - k = 60 (standard, dampens high-rank dominance)
+//! - rank_i(d) = position of d in retriever i's results (1-indexed)
+//! - If d not in retriever i, contribution = 0
+//!
+//! # Example
+//!
+//! ```rust
+//! use khive_fusion::{fuse, FusionStrategy, reciprocal_rank_fusion};
+//! use khive_score::DeterministicScore;
+//!
+//! // Two retrieval sources with different rankings
+//! let vector_results = vec![
+//!     ("doc_a", DeterministicScore::from_f64(0.95)),
+//!     ("doc_b", DeterministicScore::from_f64(0.90)),
+//!     ("doc_c", DeterministicScore::from_f64(0.85)),
+//! ];
+//!
+//! let keyword_results = vec![
+//!     ("doc_b", DeterministicScore::from_f64(0.88)),
+//!     ("doc_c", DeterministicScore::from_f64(0.75)),
+//!     ("doc_d", DeterministicScore::from_f64(0.70)),
+//! ];
+//!
+//! // Fuse using RRF with k=60
+//! let fused = fuse(
+//!     vec![vector_results, keyword_results],
+//!     &FusionStrategy::Rrf { k: 60 },
+//!     5,
+//! );
+//!
+//! // doc_b appears in both sources, so it gets highest RRF score
+//! assert_eq!(fused[0].0, "doc_b");
+//! ```
+
+mod fuse;
+mod rrf;
+mod strategy;
+mod union;
+mod weighted;
+
+#[cfg(test)]
+mod tests;
+
+// Re-export public types and functions
+pub use fuse::fuse;
+pub use rrf::reciprocal_rank_fusion;
+pub use strategy::{FusionStrategy, DEFAULT_RRF_K};
+pub use union::union_fusion;
+pub use weighted::{normalize_weights, weighted_fusion, weights_are_normalized};
diff --git a/crates/khive-fusion/src/rrf.rs b/crates/khive-fusion/src/rrf.rs
new file mode 100644
index 00000000..bc694eb2
--- /dev/null
+++ b/crates/khive-fusion/src/rrf.rs
@@ -0,0 +1,209 @@
+//! Reciprocal Rank Fusion (RRF) algorithm.
+//!
+//! # Formal Verification
+//!
+//! This implementation corresponds to the formal proofs in
+//! `proofs/Lion/Retrieval/RRF.lean`. Key theorems:
+//!
+//! - `better_rank_higher_contrib`: r1 < r2 → contrib(r1) > contrib(r2)
+//! - `present_gt_absent`: present documents always outscore absent
+//! - `contrib_upper_bound`: contribution ≤ 1/(k+1)
+//! - `total_bounded`: total score ≤ number of sources
+//! - `sum_perm`: sum is order-independent (permutation invariant)
+//! - `deterministic_ordering`: ties broken by ID for cross-platform consistency
+
+use khive_score::DeterministicScore;
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use std::hash::Hash;
+
+/// Reciprocal Rank Fusion (RRF) algorithm.
+///
+/// Combines ranked lists using only rank information, ignoring original scores.
+/// This makes it robust to different score distributions and outliers.
+///
+/// # Formula
+///
+/// For each document d across all sources:
+/// ```text
+/// score(d) = Σ 1/(k + rank_i(d))
+/// ```
+/// where rank_i(d) is the 1-indexed position of d in source i.
+///
+/// # Arguments
+///
+/// * `sources` - Vector of result lists. Each list should be sorted by
+///   score descending (best first). The scores are ignored; only positions matter.
+/// * `k` - Smoothing constant. Standard value is 60.
+///
+/// # Returns
+///
+/// A vector of `(Id, DeterministicScore)` pairs sorted by RRF score descending.
+///
+/// # Example
+///
+/// ```rust
+/// use khive_fusion::reciprocal_rank_fusion;
+/// use khive_score::DeterministicScore;
+///
+/// let source1 = vec![
+///     ("doc_a", DeterministicScore::from_f64(0.9)),  // rank 1
+///     ("doc_b", DeterministicScore::from_f64(0.8)),  // rank 2
+/// ];
+/// let source2 = vec![
+///     ("doc_b", DeterministicScore::from_f64(0.95)), // rank 1
+///     ("doc_a", DeterministicScore::from_f64(0.7)),  // rank 2
+/// ];
+///
+/// let fused = reciprocal_rank_fusion(vec![source1, source2], 60);
+///
+/// // doc_a: 1/(60+1) + 1/(60+2) = 1/61 + 1/62 ≈ 0.0326
+/// // doc_b: 1/(60+2) + 1/(60+1) = 1/62 + 1/61 ≈ 0.0326
+/// // Scores are equal since both appear at ranks 1 and 2 (just swapped)
+/// // Ties are broken by ID (lexicographic order) for determinism
+/// ```
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.RRF.better_rank_higher_contrib`
+/// Better rank yields higher contribution: r1 < r2 implies 1/(k+r1) > 1/(k+r2).
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.RRF.present_gt_absent`
+/// Documents present in any source always outscore documents absent from all sources
+/// (absent documents have score 0, present documents have score > 0).
+pub fn reciprocal_rank_fusion<Id: Eq + Hash + Clone + Ord>(
+    sources: Vec<Vec<(Id, DeterministicScore)>>,
+    k: usize,
+) -> Vec<(Id, DeterministicScore)> {
+    if sources.is_empty() {
+        return Vec::new();
+    }
+
+    // Ensure k >= 1 to avoid division issues
+    let k = k.max(1);
+
+    // Estimate capacity as sum of all source lengths (upper bound on unique IDs)
+    let estimated_capacity: usize = sources.iter().map(|s| s.len()).sum();
+    let mut combined: HashMap<Id, f64> = HashMap::with_capacity(estimated_capacity);
+
+    for results in sources {
+        for (rank_0_indexed, (id, _score)) in results.into_iter().enumerate() {
+            // rank is 1-indexed per ADR-002
+            let rank_1_indexed = rank_0_indexed + 1;
+            let rrf_contribution = 1.0 / (k + rank_1_indexed) as f64;
+
+            *combined.entry(id).or_insert(0.0) += rrf_contribution;
+        }
+    }
+
+    // Convert to DeterministicScore and sort descending
+    // **PROOF CORRESPONDENCE**: `Lion.Retrieval.RRF.sum_perm`
+    // The sum of contributions is permutation-invariant: reordering sources
+    // produces the same total score for each document.
+    let mut fused: Vec<(Id, DeterministicScore)> = combined
+        .into_iter()
+        .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+        .collect();
+
+    // Sort by score descending, then by ID ascending for deterministic tie-breaking
+    // This ensures cross-platform consistency when scores are equal
+    fused.sort_by(
+        |(id_a, score_a), (id_b, score_b)| match score_b.cmp(score_a) {
+            Ordering::Equal => id_a.cmp(id_b),
+            other => other,
+        },
+    );
+
+    fused
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_results<Id: Clone>(items: Vec<(Id, f64)>) -> Vec<(Id, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    #[test]
+    fn test_rrf_basic_two_sources() {
+        let source1 = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
+        let source2 = make_results(vec![("doc_b", 0.95), ("doc_c", 0.7)]);
+
+        let fused = reciprocal_rank_fusion(vec![source1, source2], 60);
+
+        // doc_b appears in both, should have highest score
+        assert_eq!(fused[0].0, "doc_b");
+        assert_eq!(fused.len(), 3);
+    }
+
+    #[test]
+    fn test_rrf_score_calculation() {
+        let source = make_results(vec![("doc_a", 0.9)]);
+        let fused = reciprocal_rank_fusion(vec![source], 60);
+
+        let expected = 1.0 / 61.0;
+        assert!((fused[0].1.to_f64() - expected).abs() < 1e-9);
+    }
+
+    #[test]
+    fn test_rrf_cumulative_scores() {
+        let source1 = make_results(vec![("doc_a", 0.9)]);
+        let source2 = make_results(vec![("doc_a", 0.8)]);
+
+        let fused = reciprocal_rank_fusion(vec![source1, source2], 60);
+
+        let expected = 2.0 / 61.0;
+        assert!((fused[0].1.to_f64() - expected).abs() < 1e-9);
+    }
+
+    #[test]
+    fn test_rrf_ignores_scores() {
+        let source1_high = make_results(vec![("doc_a", 0.99), ("doc_b", 0.01)]);
+        let source1_low = make_results(vec![("doc_a", 0.6), ("doc_b", 0.5)]);
+
+        let fused_high = reciprocal_rank_fusion(vec![source1_high], 60);
+        let fused_low = reciprocal_rank_fusion(vec![source1_low], 60);
+
+        assert_eq!(fused_high[0].1, fused_low[0].1);
+        assert_eq!(fused_high[1].1, fused_low[1].1);
+    }
+
+    #[test]
+    fn test_rrf_empty_sources() {
+        let fused: Vec<(&str, DeterministicScore)> = reciprocal_rank_fusion(vec![], 60);
+        assert!(fused.is_empty());
+    }
+
+    #[test]
+    fn test_rrf_single_source_passthrough() {
+        let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8), ("doc_c", 0.7)]);
+        let fused = reciprocal_rank_fusion(vec![source], 60);
+
+        assert_eq!(fused.len(), 3);
+        assert_eq!(fused[0].0, "doc_a");
+        assert_eq!(fused[1].0, "doc_b");
+        assert_eq!(fused[2].0, "doc_c");
+    }
+
+    #[test]
+    fn test_rrf_k_minimum_enforced() {
+        let source = make_results(vec![("doc_a", 0.9)]);
+        let fused = reciprocal_rank_fusion(vec![source], 0);
+
+        let expected = 1.0 / 2.0;
+        assert!((fused[0].1.to_f64() - expected).abs() < 1e-9);
+    }
+
+    #[test]
+    fn test_rrf_many_sources() {
+        let sources: Vec<Vec<(&str, DeterministicScore)>> =
+            (0..5).map(|_| make_results(vec![("doc_a", 0.9)])).collect();
+
+        let fused = reciprocal_rank_fusion(sources, 60);
+
+        let expected = 5.0 / 61.0;
+        assert!((fused[0].1.to_f64() - expected).abs() < 1e-9);
+    }
+}
diff --git a/crates/khive-fusion/src/strategy.rs b/crates/khive-fusion/src/strategy.rs
new file mode 100644
index 00000000..025d972a
--- /dev/null
+++ b/crates/khive-fusion/src/strategy.rs
@@ -0,0 +1,116 @@
+//! Fusion strategy types.
+
+use serde::{Deserialize, Serialize};
+
+/// Default RRF constant k=60, standard in literature (Craswell et al., 2009).
+pub const DEFAULT_RRF_K: usize = 60;
+
+/// Fusion strategy for combining ranked result lists.
+///
+/// See ADR-002 for detailed algorithm specification.
+#[derive(Debug, Clone, PartialEq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FusionStrategy {
+    /// Reciprocal Rank Fusion (default, recommended).
+    ///
+    /// Uses only ranks, making it robust to different score distributions.
+    /// Formula: score(d) = Σ 1/(k + rank_i(d))
+    #[serde(alias = "Rrf")]
+    Rrf {
+        /// Smoothing constant. Higher values reduce impact of rank differences.
+        /// Default: 60 (standard in literature).
+        k: usize,
+    },
+
+    /// Weighted linear combination of scores.
+    ///
+    /// Requires score normalization for different score scales (e.g., vector
+    /// similarity 0-1 vs BM25 0-∞).
+    ///
+    /// Weights are normalized to sum to 1.0 internally.
+    #[serde(alias = "Weighted")]
+    Weighted {
+        /// Weights for each source (will be normalized).
+        weights: Vec<f64>,
+    },
+
+    /// Take union with max score per ID.
+    ///
+    /// Useful when you want the best score from any source.
+    #[serde(alias = "Union")]
+    Union,
+
+    /// Skip BM25 entirely — return only vector (HNSW) results.
+    ///
+    /// Use when keyword search degrades quality (short queries, code search).
+    /// The result list is the raw HNSW output with no fusion step.
+    #[serde(alias = "VectorOnly")]
+    VectorOnly,
+
+    /// Skip HNSW entirely — return only BM25 keyword results.
+    ///
+    /// Use for exact-match retrieval (medication names, identifiers, slugs).
+    /// The result list is the raw BM25 output with no fusion step.
+    #[serde(alias = "KeywordOnly")]
+    KeywordOnly,
+}
+
+impl Default for FusionStrategy {
+    fn default() -> Self {
+        Self::Rrf { k: DEFAULT_RRF_K }
+    }
+}
+
+impl FusionStrategy {
+    /// Create an RRF strategy with default k=60.
+    #[inline]
+    pub fn rrf() -> Self {
+        Self::Rrf { k: DEFAULT_RRF_K }
+    }
+
+    /// Create an RRF strategy with custom k value.
+    #[inline]
+    pub fn rrf_with_k(k: usize) -> Self {
+        Self::Rrf { k: k.max(1) } // Ensure k >= 1
+    }
+
+    /// Create a weighted strategy with given weights.
+    #[inline]
+    pub fn weighted(weights: Vec<f64>) -> Self {
+        Self::Weighted { weights }
+    }
+
+    /// Create a union strategy.
+    #[inline]
+    pub fn union() -> Self {
+        Self::Union
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fusion_strategy_default() {
+        let default = FusionStrategy::default();
+        assert_eq!(default, FusionStrategy::Rrf { k: 60 });
+    }
+
+    #[test]
+    fn test_fusion_strategy_builders() {
+        assert_eq!(FusionStrategy::rrf(), FusionStrategy::Rrf { k: 60 });
+        assert_eq!(
+            FusionStrategy::rrf_with_k(20),
+            FusionStrategy::Rrf { k: 20 }
+        );
+        assert_eq!(FusionStrategy::rrf_with_k(0), FusionStrategy::Rrf { k: 1 }); // min enforced
+        assert_eq!(
+            FusionStrategy::weighted(vec![0.5, 0.5]),
+            FusionStrategy::Weighted {
+                weights: vec![0.5, 0.5]
+            }
+        );
+        assert_eq!(FusionStrategy::union(), FusionStrategy::Union);
+    }
+}
diff --git a/crates/khive-fusion/src/tests.rs b/crates/khive-fusion/src/tests.rs
new file mode 100644
index 00000000..def5e299
--- /dev/null
+++ b/crates/khive-fusion/src/tests.rs
@@ -0,0 +1,316 @@
+//! Integration tests and property tests for fusion module.
+
+#[cfg(test)]
+mod integration_tests {
+    use crate::{fuse, reciprocal_rank_fusion, union_fusion, weighted_fusion, FusionStrategy};
+    use khive_score::DeterministicScore;
+
+    pub(super) fn make_results<Id: Clone>(items: Vec<(Id, f64)>) -> Vec<(Id, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    // =========================================================================
+    // RETRIEVAL-01: Deterministic Tie-Breaking Tests
+    // =========================================================================
+
+    #[test]
+    fn test_rrf_deterministic_tie_breaking() {
+        // When two documents have equal RRF scores, they should be ordered by ID
+        let source1 = make_results(vec![("doc_a", 0.9)]); // rank 1
+        let source2 = make_results(vec![("doc_b", 0.8)]); // rank 1
+
+        // Run multiple times to verify consistency
+        for _ in 0..10 {
+            let fused = reciprocal_rank_fusion(vec![source1.clone(), source2.clone()], 60);
+
+            assert_eq!(fused.len(), 2);
+            // Both have same RRF score (1/61), so order should be by ID
+            assert_eq!(fused[0].1, fused[1].1, "Scores should be equal");
+            assert_eq!(
+                fused[0].0, "doc_a",
+                "doc_a should come first (lexicographic order)"
+            );
+            assert_eq!(fused[1].0, "doc_b", "doc_b should come second");
+        }
+    }
+
+    #[test]
+    fn test_weighted_deterministic_tie_breaking() {
+        // Two documents with equal weighted scores
+        let source = make_results(vec![("z_doc", 0.5), ("a_doc", 0.5)]);
+
+        for _ in 0..10 {
+            let fused = weighted_fusion(vec![source.clone()], &[1.0]);
+
+            assert_eq!(fused.len(), 2);
+            assert_eq!(fused[0].1, fused[1].1, "Scores should be equal");
+            assert_eq!(
+                fused[0].0, "a_doc",
+                "a_doc should come first (lexicographic order)"
+            );
+            assert_eq!(fused[1].0, "z_doc", "z_doc should come second");
+        }
+    }
+
+    #[test]
+    fn test_union_deterministic_tie_breaking() {
+        // Two documents with equal max scores
+        let source1 = make_results(vec![("charlie", 0.8)]);
+        let source2 = make_results(vec![("alpha", 0.8)]);
+
+        for _ in 0..10 {
+            let fused = union_fusion(vec![source1.clone(), source2.clone()]);
+
+            assert_eq!(fused.len(), 2);
+            assert_eq!(fused[0].1, fused[1].1, "Scores should be equal");
+            assert_eq!(fused[0].0, "alpha", "alpha should come first");
+            assert_eq!(fused[1].0, "charlie", "charlie should come second");
+        }
+    }
+
+    #[test]
+    fn test_fuse_deterministic_with_many_ties() {
+        // Multiple documents all at same score
+        let source: Vec<(&str, DeterministicScore)> = vec![
+            ("delta", DeterministicScore::from_f64(0.5)),
+            ("alpha", DeterministicScore::from_f64(0.5)),
+            ("charlie", DeterministicScore::from_f64(0.5)),
+            ("bravo", DeterministicScore::from_f64(0.5)),
+        ];
+
+        for _ in 0..10 {
+            let fused = fuse(vec![source.clone()], &FusionStrategy::union(), 10);
+
+            assert_eq!(fused.len(), 4);
+            // All have same score, should be in lexicographic order
+            assert_eq!(fused[0].0, "alpha");
+            assert_eq!(fused[1].0, "bravo");
+            assert_eq!(fused[2].0, "charlie");
+            assert_eq!(fused[3].0, "delta");
+        }
+    }
+
+    #[test]
+    fn test_rrf_large_number_of_results() {
+        // Test with many results to ensure no overflow/precision issues
+        let source: Vec<(&str, DeterministicScore)> = (0..1000)
+            .map(|i| {
+                let id = Box::leak(format!("doc_{i}").into_boxed_str());
+                (
+                    id as &str,
+                    DeterministicScore::from_f64(1.0 - i as f64 / 1000.0),
+                )
+            })
+            .collect();
+
+        let fused = fuse(vec![source], &FusionStrategy::rrf(), 100);
+
+        assert_eq!(fused.len(), 100);
+        assert_eq!(fused[0].0, "doc_0");
+    }
+
+    #[test]
+    fn test_multiple_sources_all_same_document() {
+        let source1 = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
+        let source2 = make_results(vec![("doc_b", 0.95), ("doc_a", 0.7)]);
+        let source3 = make_results(vec![("doc_a", 0.85)]);
+
+        let fused = reciprocal_rank_fusion(vec![source1, source2, source3], 60);
+
+        let doc_a = fused.iter().find(|(id, _)| *id == "doc_a").unwrap();
+        let doc_b = fused.iter().find(|(id, _)| *id == "doc_b").unwrap();
+
+        assert!(doc_a.1 > doc_b.1); // doc_a appears in more sources
+    }
+
+    #[test]
+    fn test_sorted_output() {
+        let source1 = make_results(vec![("doc_c", 0.7), ("doc_a", 0.9), ("doc_b", 0.8)]);
+
+        let fused = reciprocal_rank_fusion(vec![source1], 60);
+
+        // Input order determines rank, so doc_c is rank 1, doc_a rank 2, doc_b rank 3
+        assert_eq!(fused[0].0, "doc_c");
+        assert_eq!(fused[1].0, "doc_a");
+        assert_eq!(fused[2].0, "doc_b");
+    }
+
+    #[test]
+    fn test_rrf_document_only_in_one_source() {
+        let source1 = make_results(vec![("doc_a", 0.9)]);
+        let source2 = make_results(vec![("doc_b", 0.8)]);
+
+        let fused = reciprocal_rank_fusion(vec![source1, source2], 60);
+
+        // Both at rank 1 in their respective sources -> same RRF score
+        assert_eq!(fused.len(), 2);
+        assert_eq!(fused[0].1, fused[1].1);
+    }
+
+    #[test]
+    fn test_rrf_custom_k() {
+        let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]);
+
+        let fused_k20 = reciprocal_rank_fusion(vec![source.clone()], 20);
+        let fused_k100 = reciprocal_rank_fusion(vec![source], 100);
+
+        let ratio_k20 = fused_k20[0].1.to_f64() / fused_k20[1].1.to_f64();
+        let ratio_k100 = fused_k100[0].1.to_f64() / fused_k100[1].1.to_f64();
+
+        // Smaller k -> larger ratio (more difference between ranks)
+        assert!(ratio_k20 > ratio_k100);
+    }
+}
+
+// =============================================================================
+// Property Tests (Issue #746)
+// TODO(port): proptest not yet added as a dev-dependency; the proptest macro
+// forms below have been converted to deterministic unit tests covering the same
+// properties. Re-introduce proptest once it is added to Cargo.toml [dev-dependencies].
+// =============================================================================
+
+#[cfg(test)]
+mod property_tests {
+    use crate::{reciprocal_rank_fusion, union_fusion, weighted_fusion};
+    use khive_score::DeterministicScore;
+    use std::collections::HashSet;
+
+    fn make_results(items: Vec<(&'static str, f64)>) -> Vec<(String, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id.to_string(), DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    /// RRF is commutative: source order should not affect final rankings.
+    ///
+    /// Verifies the `sum_perm` property from RRF.lean.
+    #[test]
+    fn prop_rrf_is_commutative() {
+        let sources = vec![
+            make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]),
+            make_results(vec![("doc_b", 0.95), ("doc_c", 0.7)]),
+            make_results(vec![("doc_a", 0.6), ("doc_c", 0.5)]),
+        ];
+
+        let fused_orig = reciprocal_rank_fusion(sources.clone(), 60);
+
+        let mut reversed = sources.clone();
+        reversed.reverse();
+        let fused_reversed = reciprocal_rank_fusion(reversed, 60);
+
+        let orig_set: HashSet<_> = fused_orig
+            .iter()
+            .map(|(id, score)| (id.clone(), score.to_raw()))
+            .collect();
+        let rev_set: HashSet<_> = fused_reversed
+            .iter()
+            .map(|(id, score)| (id.clone(), score.to_raw()))
+            .collect();
+
+        assert_eq!(
+            orig_set, rev_set,
+            "RRF results should be identical regardless of source order"
+        );
+    }
+
+    /// Documents in more sources should score higher than those in fewer.
+    ///
+    /// Verifies the `present_gt_absent` property from RRF.lean.
+    #[test]
+    fn prop_rrf_more_sources_higher_score() {
+        let source1: Vec<(String, DeterministicScore)> = vec![(
+            "doc_common".to_string(),
+            DeterministicScore::from_f64(0.9),
+        )];
+        let source2: Vec<(String, DeterministicScore)> = vec![
+            (
+                "doc_common".to_string(),
+                DeterministicScore::from_f64(0.9),
+            ),
+            (
+                "doc_single".to_string(),
+                DeterministicScore::from_f64(0.8),
+            ),
+        ];
+
+        let fused = reciprocal_rank_fusion(vec![source1, source2], 60);
+
+        let common = fused.iter().find(|(id, _)| id == "doc_common").unwrap();
+        let single = fused.iter().find(|(id, _)| id == "doc_single").unwrap();
+
+        assert!(
+            common.1 >= single.1,
+            "Document in more sources should score >= document in fewer"
+        );
+    }
+
+    /// RRF scores should always be non-negative.
+    #[test]
+    fn prop_rrf_scores_nonnegative() {
+        let sources = vec![
+            make_results(vec![("doc_a", 0.9), ("doc_b", 0.1)]),
+            make_results(vec![("doc_b", 0.5), ("doc_c", 0.0)]),
+        ];
+        let fused = reciprocal_rank_fusion(sources, 60);
+
+        for (id, score) in &fused {
+            assert!(
+                score.to_f64() >= 0.0,
+                "RRF score for {} should be non-negative, got {}",
+                id,
+                score.to_f64()
+            );
+        }
+    }
+
+    /// Union fusion should include all unique documents.
+    #[test]
+    fn prop_union_includes_all_docs() {
+        let sources = vec![
+            make_results(vec![("doc_a", 0.9), ("doc_b", 0.8)]),
+            make_results(vec![("doc_b", 0.7), ("doc_c", 0.6)]),
+            make_results(vec![("doc_d", 0.5)]),
+        ];
+
+        let expected_ids: HashSet<_> = sources
+            .iter()
+            .flat_map(|s| s.iter().map(|(id, _)| id.clone()))
+            .collect();
+
+        let fused = union_fusion(sources);
+        let result_ids: HashSet<_> = fused.iter().map(|(id, _)| id.clone()).collect();
+
+        assert_eq!(
+            expected_ids, result_ids,
+            "Union should contain all unique documents"
+        );
+    }
+
+    /// With per-source min-max normalization, single-element sources always
+    /// map to 1.0, so a document present in both sources receives a combined
+    /// score of sum(weight_i * 1.0) = total_weight = 1.0 for equal weights.
+    #[test]
+    fn prop_weighted_single_element_sources_score_one() {
+        for (s1, s2) in [(0.0f64, 0.0f64), (0.5, 1.0), (0.9, 0.1), (1.0, 1.0)] {
+            let source1: Vec<(String, DeterministicScore)> =
+                vec![("doc".to_string(), DeterministicScore::from_f64(s1))];
+            let source2: Vec<(String, DeterministicScore)> =
+                vec![("doc".to_string(), DeterministicScore::from_f64(s2))];
+
+            let fused = weighted_fusion(vec![source1, source2], &[0.5, 0.5]);
+
+            if let Some((_, score)) = fused.first() {
+                let actual = score.to_f64();
+                assert!(
+                    (actual - 1.0).abs() < 1e-9,
+                    "Single-element source always normalizes to 1.0; combined = 1.0, got {} (inputs: {}, {})",
+                    actual, s1, s2
+                );
+            }
+        }
+    }
+}
diff --git a/crates/khive-fusion/src/union.rs b/crates/khive-fusion/src/union.rs
new file mode 100644
index 00000000..96777535
--- /dev/null
+++ b/crates/khive-fusion/src/union.rs
@@ -0,0 +1,103 @@
+//! Union fusion (max score per ID).
+
+use khive_score::DeterministicScore;
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use std::hash::Hash;
+
+/// Union fusion: take max score for each ID.
+///
+/// Useful when you want the best score any retriever assigned to each document.
+///
+/// # Arguments
+///
+/// * `sources` - Vector of result lists.
+///
+/// # Returns
+///
+/// A vector sorted by max score descending, with ties broken by ID
+/// for deterministic cross-platform ordering.
+pub fn union_fusion<Id: Eq + Hash + Clone + Ord>(
+    sources: Vec<Vec<(Id, DeterministicScore)>>,
+) -> Vec<(Id, DeterministicScore)> {
+    if sources.is_empty() {
+        return Vec::new();
+    }
+
+    let estimated_capacity: usize = sources.iter().map(|s| s.len()).sum();
+    let mut combined: HashMap<Id, DeterministicScore> = HashMap::with_capacity(estimated_capacity);
+
+    for results in sources {
+        for (id, score) in results {
+            combined
+                .entry(id)
+                .and_modify(|existing| {
+                    if score > *existing {
+                        *existing = score;
+                    }
+                })
+                .or_insert(score);
+        }
+    }
+
+    let mut fused: Vec<(Id, DeterministicScore)> = combined.into_iter().collect();
+    // Sort by score descending, then by ID ascending for determinism
+    fused.sort_by(
+        |(id_a, score_a), (id_b, score_b)| match score_b.cmp(score_a) {
+            Ordering::Equal => id_a.cmp(id_b),
+            other => other,
+        },
+    );
+    fused
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_results<Id: Clone>(items: Vec<(Id, f64)>) -> Vec<(Id, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    #[test]
+    fn test_union_takes_max_score() {
+        let source1 = make_results(vec![("doc_a", 0.7)]);
+        let source2 = make_results(vec![("doc_a", 0.9)]);
+
+        let fused = union_fusion(vec![source1, source2]);
+
+        assert_eq!(fused.len(), 1);
+        assert!((fused[0].1.to_f64() - 0.9).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_union_disjoint_sources() {
+        let source1 = make_results(vec![("doc_a", 0.8)]);
+        let source2 = make_results(vec![("doc_b", 0.6)]);
+
+        let fused = union_fusion(vec![source1, source2]);
+
+        assert_eq!(fused.len(), 2);
+        assert_eq!(fused[0].0, "doc_a");
+        assert_eq!(fused[1].0, "doc_b");
+    }
+
+    #[test]
+    fn test_union_empty_sources() {
+        let fused: Vec<(&str, DeterministicScore)> = union_fusion(vec![]);
+        assert!(fused.is_empty());
+    }
+
+    #[test]
+    fn test_union_single_source() {
+        let source = make_results(vec![("doc_a", 0.9), ("doc_b", 0.7)]);
+        let fused = union_fusion(vec![source]);
+
+        assert_eq!(fused.len(), 2);
+        assert_eq!(fused[0].0, "doc_a");
+        assert_eq!(fused[1].0, "doc_b");
+    }
+}
diff --git a/crates/khive-fusion/src/weighted.rs b/crates/khive-fusion/src/weighted.rs
new file mode 100644
index 00000000..a1ac50fc
--- /dev/null
+++ b/crates/khive-fusion/src/weighted.rs
@@ -0,0 +1,488 @@
+//! Weighted linear combination fusion.
+//!
+//! # Weight Normalization (RETRIEVAL-07)
+//!
+//! Weights are automatically normalized to sum to 1.0 before fusion.
+//! This ensures consistent behavior regardless of the input weight scale.
+//!
+//! ## Normalization Behavior
+//!
+//! | Input Weights | Normalized Weights | Behavior |
+//! |--------------|-------------------|----------|
+//! | `[0.7, 0.3]` | `[0.7, 0.3]` | Already normalized |
+//! | `[7.0, 3.0]` | `[0.7, 0.3]` | Scaled to sum to 1.0 |
+//! | `[1.0, 1.0, 1.0]` | `[0.333, 0.333, 0.333]` | Equal distribution |
+//! | `[0.0, 0.0]` | `[0.5, 0.5]` | Fallback to equal |
+//! | `[1.0, -0.5]` | `[1.0, 0.0]` | Negatives treated as 0 |
+//!
+//! ## Example
+//!
+//! ```rust
+//! use khive_fusion::weighted_fusion;
+//! use khive_score::DeterministicScore;
+//!
+//! let semantic = vec![("doc1", DeterministicScore::from_f64(0.9))];
+//! let keyword = vec![("doc1", DeterministicScore::from_f64(0.8))];
+//!
+//! // These produce identical results due to normalization:
+//! let result1 = weighted_fusion(vec![semantic.clone(), keyword.clone()], &[0.6, 0.4]);
+//! let result2 = weighted_fusion(vec![semantic, keyword], &[6.0, 4.0]);
+//! // result1 == result2
+//! ```
+
+use khive_score::DeterministicScore;
+use std::cmp::Ordering;
+use std::collections::HashMap;
+use std::hash::Hash;
+
+/// Min-max normalize a single source's scores to [0, 1] for cross-source fusion (#2496).
+///
+/// When all scores are equal (or the source has one element) every entry
+/// receives 1.0 so it still contributes to the weighted combination.
+fn min_max_normalize_source<Id>(
+    source: Vec<(Id, DeterministicScore)>,
+) -> Vec<(Id, DeterministicScore)> {
+    if source.is_empty() {
+        return source;
+    }
+    let min = source
+        .iter()
+        .map(|(_, s)| s.to_f64())
+        .fold(f64::INFINITY, f64::min);
+    let max = source
+        .iter()
+        .map(|(_, s)| s.to_f64())
+        .fold(f64::NEG_INFINITY, f64::max);
+    let span = max - min;
+    if span <= f64::EPSILON {
+        return source
+            .into_iter()
+            .map(|(id, _)| (id, DeterministicScore::from_f64(1.0)))
+            .collect();
+    }
+    source
+        .into_iter()
+        .map(|(id, s)| {
+            let normalized = (s.to_f64() - min) / span;
+            (id, DeterministicScore::from_f64(normalized))
+        })
+        .collect()
+}
+
+/// Weighted linear combination of scores.
+///
+/// Combines scores using weighted averaging. Weights are automatically normalized
+/// to sum to 1.0, allowing flexible input ranges (see module documentation).
+///
+/// # Weight Normalization (RETRIEVAL-07)
+///
+/// Weights are normalized as follows:
+/// 1. Negative weights are treated as 0.0
+/// 2. If all weights are <= 0, equal distribution is used
+/// 3. Otherwise, weights are divided by their sum to normalize to 1.0
+///
+/// This normalization ensures:
+/// - Consistent results regardless of weight scale
+/// - Graceful handling of edge cases (all zeros, negatives)
+/// - No need for callers to pre-normalize weights
+///
+/// # Warning
+///
+/// This requires score normalization before calling, as different retrievers
+/// may produce scores on different scales (e.g., cosine similarity 0-1 vs BM25 0-infinity).
+/// Consider using min-max normalization or z-score normalization on individual
+/// retriever results before fusion.
+///
+/// # Arguments
+///
+/// * `sources` - Vector of result lists with scores.
+/// * `weights` - Weights for each source. Will be normalized to sum to 1.0.
+///
+/// # Returns
+///
+/// A vector sorted by weighted score descending, with ties broken by ID
+/// for deterministic cross-platform ordering.
+///
+/// # Panics
+///
+/// Does not panic. Returns empty vector if sources is empty.
+pub fn weighted_fusion<Id: Eq + Hash + Clone + Ord>(
+    sources: Vec<Vec<(Id, DeterministicScore)>>,
+    weights: &[f64],
+) -> Vec<(Id, DeterministicScore)> {
+    if sources.is_empty() {
+        return Vec::new();
+    }
+
+    // Normalize weights
+    let weight_sum: f64 = weights.iter().filter(|w| **w > 0.0).sum();
+    let normalized: Vec<f64> = if weight_sum <= 0.0 {
+        // All zero/negative weights -> equal distribution
+        vec![1.0 / sources.len() as f64; sources.len()]
+    } else {
+        weights
+            .iter()
+            .map(|w| if *w > 0.0 { w / weight_sum } else { 0.0 })
+            .collect()
+    };
+
+    // Estimate capacity
+    let estimated_capacity: usize = sources.iter().map(|s| s.len()).sum();
+    let mut combined: HashMap<Id, f64> = HashMap::with_capacity(estimated_capacity);
+
+    for (source_idx, results) in sources.into_iter().enumerate() {
+        let weight = normalized.get(source_idx).copied().unwrap_or(0.0);
+
+        // Normalize each source to [0,1] before weighted combination so that
+        // BM25 unbounded scores and cosine [0,1] scores contribute proportionally
+        // to their configured weights (#2496/#2639).
+        let norm_results = min_max_normalize_source(results);
+        for (id, score) in norm_results {
+            *combined.entry(id).or_insert(0.0) += score.to_f64() * weight;
+        }
+    }
+
+    // Convert and sort by score descending, then by ID ascending for determinism
+    let mut fused: Vec<(Id, DeterministicScore)> = combined
+        .into_iter()
+        .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+        .collect();
+
+    fused.sort_by(
+        |(id_a, score_a), (id_b, score_b)| match score_b.cmp(score_a) {
+            Ordering::Equal => id_a.cmp(id_b),
+            other => other,
+        },
+    );
+    fused
+}
+
+/// Check if weights are already normalized (sum to approximately 1.0).
+///
+/// This is a utility function for callers who want to verify or log
+/// whether their weights needed normalization.
+///
+/// # Arguments
+///
+/// * `weights` - The weights to check.
+/// * `tolerance` - How close to 1.0 is acceptable (e.g., 1e-6).
+///
+/// # Returns
+///
+/// `true` if the sum of positive weights is within `tolerance` of 1.0.
+///
+/// # Example
+///
+/// ```rust
+/// use khive_fusion::weights_are_normalized;
+///
+/// assert!(weights_are_normalized(&[0.6, 0.4], 1e-6));
+/// assert!(!weights_are_normalized(&[6.0, 4.0], 1e-6));
+/// ```
+#[inline]
+pub fn weights_are_normalized(weights: &[f64], tolerance: f64) -> bool {
+    let sum: f64 = weights.iter().filter(|w| **w > 0.0).sum();
+    (sum - 1.0).abs() <= tolerance
+}
+
+/// Normalize weights to sum to 1.0.
+///
+/// This is the same normalization logic used internally by `weighted_fusion`,
+/// exposed for callers who want to inspect or use the normalized weights.
+///
+/// # Arguments
+///
+/// * `weights` - Input weights (may be any positive scale).
+///
+/// # Returns
+///
+/// Normalized weights that sum to 1.0. Negative weights become 0.0.
+/// If all weights are <= 0, returns equal distribution.
+pub fn normalize_weights(weights: &[f64]) -> Vec<f64> {
+    if weights.is_empty() {
+        return Vec::new();
+    }
+
+    let weight_sum: f64 = weights.iter().filter(|w| **w > 0.0).sum();
+
+    if weight_sum <= 0.0 {
+        vec![1.0 / weights.len() as f64; weights.len()]
+    } else {
+        weights
+            .iter()
+            .map(|w| if *w > 0.0 { w / weight_sum } else { 0.0 })
+            .collect()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn make_results<Id: Clone>(items: Vec<(Id, f64)>) -> Vec<(Id, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id, DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    #[test]
+    fn test_weighted_basic() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_a", 1.0)]);
+
+        let fused = weighted_fusion(vec![source1, source2], &[0.7, 0.3]);
+
+        assert!((fused[0].1.to_f64() - 1.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_weighted_normalization() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_a", 1.0)]);
+
+        let fused = weighted_fusion(vec![source1, source2], &[7.0, 3.0]);
+
+        assert!((fused[0].1.to_f64() - 1.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_weighted_zero_weights() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_a", 1.0)]);
+
+        let fused = weighted_fusion(vec![source1, source2], &[0.0, 0.0]);
+
+        assert!((fused[0].1.to_f64() - 1.0).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_weighted_disjoint_results() {
+        let source1 = make_results(vec![("doc_a", 0.9)]);
+        let source2 = make_results(vec![("doc_b", 0.8)]);
+
+        let fused = weighted_fusion(vec![source1, source2], &[0.6, 0.4]);
+
+        let doc_a = fused.iter().find(|(id, _)| *id == "doc_a").unwrap();
+        let doc_b = fused.iter().find(|(id, _)| *id == "doc_b").unwrap();
+
+        // After per-source min-max normalization, single-element sources map to 1.0.
+        // doc_a contributes 1.0 * 0.6 = 0.6, doc_b contributes 1.0 * 0.4 = 0.4.
+        assert!((doc_a.1.to_f64() - 0.6).abs() < 0.01);
+        assert!((doc_b.1.to_f64() - 0.4).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_weighted_empty_sources() {
+        let fused: Vec<(&str, DeterministicScore)> = weighted_fusion(vec![], &[]);
+        assert!(fused.is_empty());
+    }
+
+    #[test]
+    fn test_weighted_single_source() {
+        let source = make_results(vec![("doc_a", 0.9)]);
+        let fused = weighted_fusion(vec![source], &[1.0]);
+
+        assert_eq!(fused.len(), 1);
+        // Single-element source normalizes to 1.0; weight is 1.0 → final = 1.0.
+        assert!((fused[0].1.to_f64() - 1.0).abs() < 0.01);
+    }
+
+    // RETRIEVAL-07: Normalization behavior tests
+
+    #[test]
+    fn test_normalization_already_normalized() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_b", 1.0)]);
+
+        // Weights already sum to 1.0
+        let fused = weighted_fusion(vec![source1, source2], &[0.6, 0.4]);
+
+        let doc_a = fused.iter().find(|(id, _)| *id == "doc_a").unwrap();
+        let doc_b = fused.iter().find(|(id, _)| *id == "doc_b").unwrap();
+
+        assert!((doc_a.1.to_f64() - 0.6).abs() < 0.01);
+        assert!((doc_b.1.to_f64() - 0.4).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_normalization_scaled_weights() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_b", 1.0)]);
+
+        // Weights sum to 100, should be normalized to 0.6, 0.4
+        let fused = weighted_fusion(vec![source1, source2], &[60.0, 40.0]);
+
+        let doc_a = fused.iter().find(|(id, _)| *id == "doc_a").unwrap();
+        let doc_b = fused.iter().find(|(id, _)| *id == "doc_b").unwrap();
+
+        assert!((doc_a.1.to_f64() - 0.6).abs() < 0.01);
+        assert!((doc_b.1.to_f64() - 0.4).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_normalization_negative_weights() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_b", 1.0)]);
+
+        // Negative weight should be treated as 0
+        let fused = weighted_fusion(vec![source1, source2], &[1.0, -0.5]);
+
+        let doc_a = fused.iter().find(|(id, _)| *id == "doc_a").unwrap();
+        let doc_b = fused.iter().find(|(id, _)| *id == "doc_b");
+
+        // doc_a gets full weight (1.0 normalized to 1.0)
+        assert!((doc_a.1.to_f64() - 1.0).abs() < 0.01);
+        // doc_b should have 0 contribution from second source
+        assert!(doc_b.is_none() || doc_b.unwrap().1.to_f64() < 0.01);
+    }
+
+    #[test]
+    fn test_normalization_three_sources_equal() {
+        let source1 = make_results(vec![("doc_a", 1.0)]);
+        let source2 = make_results(vec![("doc_b", 1.0)]);
+        let source3 = make_results(vec![("doc_c", 1.0)]);
+
+        // Equal weights
+        let fused = weighted_fusion(vec![source1, source2, source3], &[1.0, 1.0, 1.0]);
+
+        for (_, score) in &fused {
+            // Each should get 1/3 weight = 0.333...
+            assert!((score.to_f64() - 1.0 / 3.0).abs() < 0.01);
+        }
+    }
+
+    #[test]
+    fn test_normalization_consistent_across_scales() {
+        let source1 = make_results(vec![("doc_a", 0.8), ("doc_b", 0.6)]);
+        let source2 = make_results(vec![("doc_a", 0.9), ("doc_c", 0.7)]);
+
+        // Same ratio, different scales
+        let fused1 = weighted_fusion(vec![source1.clone(), source2.clone()], &[0.7, 0.3]);
+        let fused2 = weighted_fusion(vec![source1.clone(), source2.clone()], &[7.0, 3.0]);
+        let fused3 = weighted_fusion(vec![source1, source2], &[70.0, 30.0]);
+
+        // All should produce identical results
+        assert_eq!(fused1.len(), fused2.len());
+        assert_eq!(fused2.len(), fused3.len());
+
+        for i in 0..fused1.len() {
+            assert_eq!(fused1[i].0, fused2[i].0);
+            assert_eq!(fused2[i].0, fused3[i].0);
+            assert!(
+                (fused1[i].1.to_f64() - fused2[i].1.to_f64()).abs() < 1e-10,
+                "Score mismatch at position {}: {} vs {}",
+                i,
+                fused1[i].1.to_f64(),
+                fused2[i].1.to_f64()
+            );
+            assert!(
+                (fused2[i].1.to_f64() - fused3[i].1.to_f64()).abs() < 1e-10,
+                "Score mismatch at position {}: {} vs {}",
+                i,
+                fused2[i].1.to_f64(),
+                fused3[i].1.to_f64()
+            );
+        }
+    }
+
+    // Helper function tests
+
+    #[test]
+    fn test_weights_are_normalized() {
+        assert!(weights_are_normalized(&[0.5, 0.5], 1e-6));
+        assert!(weights_are_normalized(&[0.7, 0.3], 1e-6));
+        assert!(weights_are_normalized(&[1.0], 1e-6));
+        assert!(weights_are_normalized(&[0.25, 0.25, 0.25, 0.25], 1e-6));
+
+        assert!(!weights_are_normalized(&[0.5, 0.6], 1e-6)); // > 1
+        assert!(!weights_are_normalized(&[0.3, 0.3], 1e-6)); // < 1
+        assert!(!weights_are_normalized(&[10.0, 10.0], 1e-6)); // = 20
+    }
+
+    #[test]
+    fn test_normalize_weights() {
+        let normalized = normalize_weights(&[6.0, 4.0]);
+        assert!((normalized[0] - 0.6).abs() < 1e-10);
+        assert!((normalized[1] - 0.4).abs() < 1e-10);
+
+        let normalized = normalize_weights(&[1.0, 1.0, 1.0]);
+        for w in &normalized {
+            assert!((w - 1.0 / 3.0).abs() < 1e-10);
+        }
+
+        let normalized = normalize_weights(&[0.0, 0.0]);
+        assert!((normalized[0] - 0.5).abs() < 1e-10);
+        assert!((normalized[1] - 0.5).abs() < 1e-10);
+
+        let normalized = normalize_weights(&[1.0, -1.0]);
+        assert!((normalized[0] - 1.0).abs() < 1e-10);
+        assert!((normalized[1] - 0.0).abs() < 1e-10);
+    }
+
+    #[test]
+    fn test_normalize_weights_empty() {
+        let normalized = normalize_weights(&[]);
+        assert!(normalized.is_empty());
+    }
+
+    // ── #2496 / #2639: per-source min-max normalization before fusion ──────
+
+    #[test]
+    fn test_weighted_fusion_mixed_scales_bm25_vs_cosine() {
+        // BM25-like: unbounded scores (0..100)
+        let bm25 = vec![
+            ("doc_a", DeterministicScore::from_f64(80.0)),
+            ("doc_b", DeterministicScore::from_f64(20.0)),
+        ];
+        // Cosine-like: [0,1] scores
+        let cosine = vec![
+            ("doc_a", DeterministicScore::from_f64(0.9)),
+            ("doc_b", DeterministicScore::from_f64(0.1)),
+        ];
+
+        let result = weighted_fusion(vec![bm25, cosine], &[0.5, 0.5]);
+        // Both sources agree doc_a is more relevant — it must rank first.
+        assert_eq!(result[0].0, "doc_a");
+        assert_eq!(result[1].0, "doc_b");
+        // After normalization, doc_a should score close to 1.0 (top in both).
+        assert!(result[0].1.to_f64() > 0.8);
+    }
+
+    #[test]
+    fn test_weighted_fusion_inverted_scale_normalizes_correctly() {
+        // If one source has negative/inverted semantics, min-max still works.
+        let src1 = vec![
+            ("x", DeterministicScore::from_f64(100.0)),
+            ("y", DeterministicScore::from_f64(1.0)),
+        ];
+        let src2 = vec![
+            ("x", DeterministicScore::from_f64(0.9)),
+            ("y", DeterministicScore::from_f64(0.1)),
+        ];
+
+        let result = weighted_fusion(vec![src1, src2], &[0.6, 0.4]);
+        assert_eq!(result[0].0, "x");
+        // x must score strictly higher than y
+        assert!(result[0].1.to_f64() > result[1].1.to_f64());
+    }
+
+    #[test]
+    fn test_min_max_normalize_source_single() {
+        let src = vec![("a", DeterministicScore::from_f64(42.0))];
+        let out = min_max_normalize_source(src);
+        assert!((out[0].1.to_f64() - 1.0).abs() < 1e-10);
+    }
+
+    #[test]
+    fn test_min_max_normalize_source_uniform() {
+        let src = vec![
+            ("a", DeterministicScore::from_f64(5.0)),
+            ("b", DeterministicScore::from_f64(5.0)),
+        ];
+        let out = min_max_normalize_source(src);
+        // All equal → all 1.0
+        assert!((out[0].1.to_f64() - 1.0).abs() < 1e-10);
+        assert!((out[1].1.to_f64() - 1.0).abs() < 1e-10);
+    }
+}
diff --git a/crates/khive-hnsw/Cargo.toml b/crates/khive-hnsw/Cargo.toml
new file mode 100644
index 00000000..d3edc030
--- /dev/null
+++ b/crates/khive-hnsw/Cargo.toml
@@ -0,0 +1,30 @@
+[package]
+name = "khive-hnsw"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+repository.workspace = true
+homepage.workspace = true
+keywords.workspace = true
+categories.workspace = true
+description = "HNSW (Hierarchical Navigable Small World) vector index with INT8 quantized two-phase search — formally verified in Lean4"
+
+[dependencies]
+khive-score = { version = "0.2.0", path = "../khive-score" }
+khive-types = { version = "0.2.0", path = "../khive-types" }
+lattice-embed = { workspace = true }
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+parking_lot = { workspace = true }
+tokio = { workspace = true }
+rand = "0.8"
+rayon = "1.10"
+ulid = "1.1"
+
+[dev-dependencies]
+proptest = "1"
+
+[features]
+checkpoint = []
diff --git a/crates/khive-hnsw/src/alias/drain.rs b/crates/khive-hnsw/src/alias/drain.rs
new file mode 100644
index 00000000..e510a882
--- /dev/null
+++ b/crates/khive-hnsw/src/alias/drain.rs
@@ -0,0 +1,240 @@
+//! Reader tracking and drain detection for zero-downtime index swaps.
+//!
+//! The drain protocol ensures that after an alias swap, the old index is not
+//! deallocated until all in-flight queries have completed. This is implemented
+//! via an `AtomicU64` reader counter and an RAII `ReaderGuard` that decrements
+//! on drop.
+//!
+//! # Design
+//!
+//! - Each collection has an associated `AtomicU64` reader count.
+//! - `search_via_alias` increments the count and returns a `ReaderGuard`.
+//! - The guard holds an `Arc<HnswIndex>` snapshot, so the index stays alive
+//!   even if the alias is swapped while the query is in flight.
+//! - `drain_and_remove` polls the reader count until it reaches zero or timeout.
+
+use std::sync::atomic::{AtomicU64, Ordering};
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use super::error::AliasError;
+use crate::HnswIndex;
+
+/// Tracks active readers for a single collection.
+///
+/// The counter is shared (via `Arc`) between the alias manager and all
+/// outstanding `ReaderGuard`s. When the alias manager wants to drain a
+/// collection, it polls this counter.
+#[derive(Debug)]
+pub(crate) struct ReaderCounter {
+    count: AtomicU64,
+}
+
+impl ReaderCounter {
+    pub fn new() -> Self {
+        Self {
+            count: AtomicU64::new(0),
+        }
+    }
+
+    /// Increment the reader count. Returns the previous value.
+    #[inline]
+    pub fn acquire(&self) -> u64 {
+        self.count.fetch_add(1, Ordering::Acquire)
+    }
+
+    /// Decrement the reader count. Returns the previous value.
+    #[inline]
+    pub fn release(&self) -> u64 {
+        self.count.fetch_sub(1, Ordering::Release)
+    }
+
+    /// Get the current reader count.
+    #[inline]
+    pub fn load(&self) -> u64 {
+        self.count.load(Ordering::Acquire)
+    }
+}
+
+/// RAII guard that holds a snapshot of the index and decrements the reader
+/// count on drop.
+///
+/// The caller gets `&HnswIndex` access via `Deref`. The index is guaranteed
+/// to remain alive for the lifetime of this guard, even if the alias is
+/// swapped to a different collection in the meantime.
+pub struct ReaderGuard {
+    /// Snapshot of the index at the time the guard was acquired.
+    index: Arc<HnswIndex>,
+    /// Reader counter to decrement on drop.
+    counter: Arc<ReaderCounter>,
+}
+
+impl ReaderGuard {
+    /// Create a new reader guard, incrementing the reader counter.
+    pub(crate) fn new(index: Arc<HnswIndex>, counter: Arc<ReaderCounter>) -> Self {
+        counter.acquire();
+        Self { index, counter }
+    }
+
+    /// Get a reference to the index snapshot.
+    pub fn index(&self) -> &HnswIndex {
+        &self.index
+    }
+}
+
+impl Drop for ReaderGuard {
+    fn drop(&mut self) {
+        self.counter.release();
+    }
+}
+
+impl std::ops::Deref for ReaderGuard {
+    type Target = HnswIndex;
+
+    fn deref(&self) -> &Self::Target {
+        &self.index
+    }
+}
+
+/// Wait for all readers on a counter to finish, polling at `poll_interval`.
+///
+/// Returns `Ok(())` when the reader count reaches zero, or `Err(DrainTimeout)`
+/// if the timeout is exceeded.
+pub(crate) async fn drain_readers(
+    counter: &Arc<ReaderCounter>,
+    timeout: Duration,
+    poll_interval: Duration,
+) -> Result<(), AliasError> {
+    let start = Instant::now();
+
+    loop {
+        let active = counter.load();
+        if active == 0 {
+            return Ok(());
+        }
+
+        let elapsed = start.elapsed();
+        if elapsed >= timeout {
+            return Err(AliasError::DrainTimeout {
+                elapsed,
+                timeout,
+                active_readers: active,
+            });
+        }
+
+        // Sleep for the poll interval (or remaining time, whichever is shorter)
+        let remaining = timeout - elapsed;
+        let sleep_dur = poll_interval.min(remaining);
+        tokio::time::sleep(sleep_dur).await;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_reader_counter_acquire_release() {
+        let counter = ReaderCounter::new();
+        assert_eq!(counter.load(), 0);
+
+        counter.acquire();
+        assert_eq!(counter.load(), 1);
+
+        counter.acquire();
+        assert_eq!(counter.load(), 2);
+
+        counter.release();
+        assert_eq!(counter.load(), 1);
+
+        counter.release();
+        assert_eq!(counter.load(), 0);
+    }
+
+    #[test]
+    fn test_reader_guard_decrements_on_drop() {
+        let index = Arc::new(HnswIndex::new(4));
+        let counter = Arc::new(ReaderCounter::new());
+
+        {
+            let _guard = ReaderGuard::new(Arc::clone(&index), Arc::clone(&counter));
+            assert_eq!(counter.load(), 1);
+
+            let _guard2 = ReaderGuard::new(Arc::clone(&index), Arc::clone(&counter));
+            assert_eq!(counter.load(), 2);
+        }
+        // Both guards dropped
+        assert_eq!(counter.load(), 0);
+    }
+
+    #[test]
+    fn test_reader_guard_deref() {
+        let index = Arc::new(HnswIndex::new(8));
+        let counter = Arc::new(ReaderCounter::new());
+        let guard = ReaderGuard::new(Arc::clone(&index), Arc::clone(&counter));
+
+        // Should be able to call HnswIndex methods via Deref
+        assert_eq!(guard.len(), 0);
+        assert!(guard.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_drain_readers_immediate() {
+        let counter = Arc::new(ReaderCounter::new());
+        // No readers -- drain should return immediately
+        let result = drain_readers(
+            &counter,
+            Duration::from_millis(100),
+            Duration::from_millis(10),
+        )
+        .await;
+        assert!(result.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_drain_readers_timeout() {
+        let counter = Arc::new(ReaderCounter::new());
+        counter.acquire(); // Simulate an active reader that never finishes
+
+        let result = drain_readers(
+            &counter,
+            Duration::from_millis(50),
+            Duration::from_millis(10),
+        )
+        .await;
+
+        assert!(result.is_err());
+        match result.unwrap_err() {
+            AliasError::DrainTimeout { active_readers, .. } => {
+                assert_eq!(active_readers, 1);
+            }
+            other => panic!("Expected DrainTimeout, got: {other:?}"),
+        }
+
+        // Clean up
+        counter.release();
+    }
+
+    #[tokio::test]
+    async fn test_drain_readers_delayed_release() {
+        let counter = Arc::new(ReaderCounter::new());
+        counter.acquire();
+
+        let counter_clone = Arc::clone(&counter);
+
+        // Spawn a task that releases after 30ms
+        tokio::spawn(async move {
+            tokio::time::sleep(Duration::from_millis(30)).await;
+            counter_clone.release();
+        });
+
+        // Drain with 200ms timeout -- should succeed after ~30ms
+        let result = drain_readers(
+            &counter,
+            Duration::from_millis(200),
+            Duration::from_millis(5),
+        )
+        .await;
+        assert!(result.is_ok());
+    }
+}
diff --git a/crates/khive-hnsw/src/alias/error.rs b/crates/khive-hnsw/src/alias/error.rs
new file mode 100644
index 00000000..3d37ca80
--- /dev/null
+++ b/crates/khive-hnsw/src/alias/error.rs
@@ -0,0 +1,102 @@
+//! Error types for alias operations.
+//!
+//! These errors cover the alias lifecycle: creation, swap, drain, and validation.
+//! They integrate with the parent `RetrievalError` via `From` conversion.
+
+use std::fmt;
+use std::time::Duration;
+
+/// Errors from alias manager operations.
+#[derive(Debug)]
+pub enum AliasError {
+    /// The requested alias name does not exist.
+    AliasNotFound(String),
+
+    /// The requested collection name does not exist.
+    CollectionNotFound(String),
+
+    /// A collection with this name already exists.
+    CollectionAlreadyExists(String),
+
+    /// An alias with this name already exists.
+    AliasAlreadyExists(String),
+
+    /// The pre-swap validation failed.
+    ValidationFailed {
+        /// Human-readable reason.
+        reason: String,
+        /// Recall score achieved (if applicable).
+        recall: Option<f32>,
+        /// Minimum recall required (if applicable).
+        min_recall: Option<f32>,
+    },
+
+    /// Drain timed out waiting for active readers to finish.
+    DrainTimeout {
+        /// How long we waited.
+        elapsed: Duration,
+        /// Configured timeout.
+        timeout: Duration,
+        /// Number of readers still active.
+        active_readers: u64,
+    },
+
+    /// An HNSW operation failed during migration.
+    IndexError(String),
+}
+
+impl fmt::Display for AliasError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Self::AliasNotFound(name) => write!(f, "alias not found: {name}"),
+            Self::CollectionNotFound(name) => write!(f, "collection not found: {name}"),
+            Self::CollectionAlreadyExists(name) => {
+                write!(f, "collection already exists: {name}")
+            }
+            Self::AliasAlreadyExists(name) => write!(f, "alias already exists: {name}"),
+            Self::ValidationFailed {
+                reason,
+                recall,
+                min_recall,
+            } => {
+                write!(f, "validation failed: {reason}")?;
+                if let (Some(r), Some(min)) = (recall, min_recall) {
+                    write!(f, " (recall={r:.4}, min={min:.4})")?;
+                }
+                Ok(())
+            }
+            Self::DrainTimeout {
+                elapsed,
+                timeout,
+                active_readers,
+            } => {
+                write!(
+                    f,
+                    "drain timeout after {:.1}s (limit {:.1}s, {active_readers} readers remaining)",
+                    elapsed.as_secs_f64(),
+                    timeout.as_secs_f64()
+                )
+            }
+            Self::IndexError(msg) => write!(f, "index error: {msg}"),
+        }
+    }
+}
+
+impl std::error::Error for AliasError {}
+
+impl From<AliasError> for crate::error::RetrievalError {
+    fn from(e: AliasError) -> Self {
+        match &e {
+            AliasError::DrainTimeout { .. } => {
+                // Drain timeout is transient -- readers will eventually finish
+                crate::error::RetrievalError::QueryTimeout {
+                    elapsed_ms: match &e {
+                        AliasError::DrainTimeout { elapsed, .. } => elapsed.as_millis() as u64,
+                        _ => 0,
+                    },
+                }
+            }
+            _ => crate::error::RetrievalError::Hnsw(e.to_string()),
+        }
+    }
+}
diff --git a/crates/khive-hnsw/src/alias/manager.rs b/crates/khive-hnsw/src/alias/manager.rs
new file mode 100644
index 00000000..e55b3625
--- /dev/null
+++ b/crates/khive-hnsw/src/alias/manager.rs
@@ -0,0 +1,600 @@
+//! Index alias manager for zero-downtime index migration.
+//!
+//! Provides blue-green deployment semantics for HNSW indexes: readers always
+//! get a consistent snapshot, writers build a new index in the background,
+//! and the alias swap is atomic (single pointer update under a brief write lock).
+//!
+//! # Concurrency Model
+//!
+//! - **Read path**: `parking_lot::RwLock` read guard. `parking_lot` uses
+//!   adaptive spinning before OS-level blocking, so short critical sections
+//!   (pointer clone) have near-zero contention overhead.
+//! - **Write path**: Brief exclusive lock for the pointer swap only.
+//! - **Background build**: Runs on `tokio::task::spawn_blocking`. Does not
+//!   hold any locks on the alias map.
+//! - **Drain**: Async poll with configurable interval and timeout.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+use std::time::{Duration, Instant};
+
+use parking_lot::RwLock;
+
+use super::drain::{drain_readers, ReaderCounter, ReaderGuard};
+use super::error::AliasError;
+use super::validation::IndexValidator;
+use crate::config::HnswConfig;
+use crate::HnswIndex;
+use crate::NodeId;
+
+/// Metadata for a registered collection.
+struct Collection {
+    /// The index, wrapped in Arc for snapshot sharing with readers.
+    index: Arc<HnswIndex>,
+    /// Active reader counter for drain detection.
+    readers: Arc<ReaderCounter>,
+}
+
+/// Report from a completed migration.
+#[derive(Debug, Clone)]
+pub struct MigrationReport {
+    /// Name of the old collection that was replaced.
+    pub old_collection: String,
+    /// Name of the new collection.
+    pub new_collection: String,
+    /// Number of vectors in the old index.
+    pub old_size: usize,
+    /// Number of vectors in the new index.
+    pub new_size: usize,
+    /// Recall score from validation (if validation was run).
+    pub recall_score: Option<f32>,
+    /// Wall-clock time for the entire migration (build + validate + swap + drain).
+    pub total_duration: Duration,
+    /// Wall-clock time for the index build phase.
+    pub build_duration: Duration,
+    /// Wall-clock time for the swap + drain phase.
+    pub swap_drain_duration: Duration,
+}
+
+/// Manages named collections and aliases for zero-downtime index switching.
+///
+/// # Usage
+///
+/// ```rust,ignore
+/// let manager = IndexAliasManager::new(Duration::from_secs(5));
+///
+/// // Register initial index
+/// manager.register_collection("index_v1", initial_index)?;
+/// manager.create_alias("active", "index_v1")?;
+///
+/// // Search through alias
+/// let guard = manager.acquire_reader("active")?;
+/// let results = guard.search(&query, 10)?;
+/// drop(guard); // releases reader count
+///
+/// // Migrate to new index
+/// let report = manager.migrate("active", vectors, new_config, None).await?;
+/// ```
+pub struct IndexAliasManager {
+    /// Collection name -> Collection data.
+    /// Protected by RwLock: reads (search) take shared lock, writes (register/remove)
+    /// take exclusive lock.
+    collections: RwLock<HashMap<String, Collection>>,
+
+    /// Alias name -> collection name mapping.
+    /// Protected by RwLock: reads (resolve alias) take shared lock, writes
+    /// (create/switch alias) take exclusive lock.
+    aliases: RwLock<HashMap<String, String>>,
+
+    /// Maximum time to wait for readers to drain before force-dropping.
+    drain_timeout: Duration,
+
+    /// Poll interval for drain detection.
+    drain_poll_interval: Duration,
+}
+
+impl IndexAliasManager {
+    /// Create a new alias manager with the given drain timeout.
+    pub fn new(drain_timeout: Duration) -> Self {
+        Self {
+            collections: RwLock::new(HashMap::new()),
+            aliases: RwLock::new(HashMap::new()),
+            drain_timeout,
+            drain_poll_interval: Duration::from_millis(10),
+        }
+    }
+
+    /// Set the drain poll interval (default: 10ms).
+    pub fn with_drain_poll_interval(mut self, interval: Duration) -> Self {
+        self.drain_poll_interval = interval;
+        self
+    }
+
+    /// Register a named collection. Fails if the name already exists.
+    pub fn register_collection(&self, name: &str, index: HnswIndex) -> Result<(), AliasError> {
+        let mut collections = self.collections.write();
+        if collections.contains_key(name) {
+            return Err(AliasError::CollectionAlreadyExists(name.to_string()));
+        }
+        collections.insert(
+            name.to_string(),
+            Collection {
+                index: Arc::new(index),
+                readers: Arc::new(ReaderCounter::new()),
+            },
+        );
+        Ok(())
+    }
+
+    /// Create an alias pointing to an existing collection.
+    /// Fails if the alias already exists or the collection does not exist.
+    pub fn create_alias(&self, alias: &str, collection: &str) -> Result<(), AliasError> {
+        // Verify collection exists
+        {
+            let collections = self.collections.read();
+            if !collections.contains_key(collection) {
+                return Err(AliasError::CollectionNotFound(collection.to_string()));
+            }
+        }
+
+        let mut aliases = self.aliases.write();
+        if aliases.contains_key(alias) {
+            return Err(AliasError::AliasAlreadyExists(alias.to_string()));
+        }
+        aliases.insert(alias.to_string(), collection.to_string());
+        Ok(())
+    }
+
+    /// Acquire a reader guard for the index behind an alias.
+    ///
+    /// The returned guard holds an `Arc<HnswIndex>` snapshot and increments the
+    /// reader counter. The index is guaranteed to remain alive until the guard
+    /// is dropped, even if the alias is swapped in the meantime.
+    ///
+    /// This is the primary read-path entry point. The critical section is
+    /// minimal: read-lock the alias map, read-lock the collection map, clone
+    /// the Arc, increment the counter.
+    pub fn acquire_reader(&self, alias: &str) -> Result<ReaderGuard, AliasError> {
+        // Resolve alias -> collection name
+        let collection_name = {
+            let aliases = self.aliases.read();
+            aliases
+                .get(alias)
+                .ok_or_else(|| AliasError::AliasNotFound(alias.to_string()))?
+                .clone()
+        };
+
+        // Get collection and create reader guard
+        let collections = self.collections.read();
+        let collection = collections
+            .get(&collection_name)
+            .ok_or_else(|| AliasError::CollectionNotFound(collection_name.clone()))?;
+
+        Ok(ReaderGuard::new(
+            Arc::clone(&collection.index),
+            Arc::clone(&collection.readers),
+        ))
+    }
+
+    /// Switch an alias to point to a different collection.
+    ///
+    /// If a validator is provided, it runs against the target collection's
+    /// index before the swap. If validation fails, the alias is not changed.
+    ///
+    /// Returns the name of the previous collection (for drain purposes).
+    pub fn switch_alias(
+        &self,
+        alias: &str,
+        new_collection: &str,
+        validator: Option<&dyn IndexValidator>,
+    ) -> Result<String, AliasError> {
+        // Verify new collection exists and optionally validate
+        {
+            let collections = self.collections.read();
+            let collection = collections
+                .get(new_collection)
+                .ok_or_else(|| AliasError::CollectionNotFound(new_collection.to_string()))?;
+
+            if let Some(v) = validator {
+                v.validate(&collection.index)?;
+            }
+        }
+
+        // Swap the alias (exclusive lock, but the critical section is just
+        // a HashMap insert -- nanoseconds)
+        let mut aliases = self.aliases.write();
+        let old_collection = aliases
+            .get(alias)
+            .ok_or_else(|| AliasError::AliasNotFound(alias.to_string()))?
+            .clone();
+
+        aliases.insert(alias.to_string(), new_collection.to_string());
+        Ok(old_collection)
+    }
+
+    /// Wait for all readers on a collection to finish, then remove it.
+    ///
+    /// This is typically called after `switch_alias` to clean up the old
+    /// collection. If the drain times out, the collection is NOT removed
+    /// and the error is returned.
+    pub async fn drain_and_remove(&self, collection: &str) -> Result<(), AliasError> {
+        // Extract the reader counter (under read lock -- we just need the Arc)
+        let counter = {
+            let collections = self.collections.read();
+            let coll = collections
+                .get(collection)
+                .ok_or_else(|| AliasError::CollectionNotFound(collection.to_string()))?;
+            Arc::clone(&coll.readers)
+        };
+
+        // Wait for readers to drain (async, no locks held)
+        drain_readers(&counter, self.drain_timeout, self.drain_poll_interval).await?;
+
+        // Remove the collection (exclusive lock)
+        let mut collections = self.collections.write();
+        collections.remove(collection);
+        Ok(())
+    }
+
+    /// Full migration: build new index, validate, swap alias, drain old.
+    ///
+    /// This is the high-level API for embedding model migrations. The build
+    /// phase runs on `spawn_blocking` to avoid blocking the tokio runtime.
+    ///
+    /// # Arguments
+    ///
+    /// * `alias` - The alias to migrate (must already exist)
+    /// * `vectors` - All vectors for the new index
+    /// * `new_config` - Configuration for the new index
+    /// * `validator` - Optional pre-swap validation
+    ///
+    /// # Returns
+    ///
+    /// A `MigrationReport` with timing and size information.
+    pub async fn migrate(
+        &self,
+        alias: &str,
+        vectors: Vec<(NodeId, Vec<f32>)>,
+        new_config: HnswConfig,
+        validator: Option<Box<dyn IndexValidator>>,
+    ) -> Result<MigrationReport, AliasError> {
+        let total_start = Instant::now();
+
+        // Resolve current alias to get old collection info
+        let old_collection_name = {
+            let aliases = self.aliases.read();
+            aliases
+                .get(alias)
+                .ok_or_else(|| AliasError::AliasNotFound(alias.to_string()))?
+                .clone()
+        };
+
+        let old_size = {
+            let collections = self.collections.read();
+            collections
+                .get(&old_collection_name)
+                .map(|c| c.index.len_live())
+                .unwrap_or(0)
+        };
+
+        // Generate a unique name for the new collection
+        let new_collection_name = format!(
+            "{}_migrated_{}",
+            old_collection_name,
+            std::time::SystemTime::now()
+                .duration_since(std::time::UNIX_EPOCH)
+                .unwrap_or_default()
+                .as_millis()
+        );
+
+        // Build new index on a blocking thread
+        let build_start = Instant::now();
+        let new_index = tokio::task::spawn_blocking(move || {
+            let mut index = HnswIndex::with_config(new_config);
+            for (id, vec) in vectors {
+                if let Err(e) = index.insert(id, vec) {
+                    return Err(AliasError::IndexError(e.to_string()));
+                }
+            }
+            Ok(index)
+        })
+        .await
+        .map_err(|e| AliasError::IndexError(format!("build task panicked: {e}")))?
+        .map_err(|e| AliasError::IndexError(format!("build failed: {e}")))?;
+
+        let build_duration = build_start.elapsed();
+        let new_size = new_index.len_live();
+
+        // Register the new collection
+        self.register_collection(&new_collection_name, new_index)?;
+
+        // Validate and swap
+        let swap_drain_start = Instant::now();
+
+        // Recall score for the report
+        let recall_score = None;
+
+        // If we have a validator, run it and capture recall
+        if let Some(ref v) = validator {
+            let collections = self.collections.read();
+            let coll = collections.get(&new_collection_name).unwrap();
+            match v.validate(&coll.index) {
+                Ok(()) => {}
+                Err(AliasError::ValidationFailed {
+                    recall, min_recall, ..
+                }) => {
+                    // Remove the new collection since validation failed
+                    drop(collections);
+                    let mut colls = self.collections.write();
+                    colls.remove(&new_collection_name);
+                    return Err(AliasError::ValidationFailed {
+                        reason: "recall below threshold".to_string(),
+                        recall,
+                        min_recall,
+                    });
+                }
+                Err(e) => {
+                    // Remove the new collection since validation failed
+                    drop(collections);
+                    let mut colls = self.collections.write();
+                    colls.remove(&new_collection_name);
+                    return Err(e);
+                }
+            }
+        }
+
+        // Switch the alias
+        self.switch_alias(alias, &new_collection_name, None)?;
+
+        // Drain and remove old collection
+        // Note: if drain times out, the old collection stays around but the
+        // alias already points to the new one. This is acceptable -- readers
+        // on the old index will finish eventually, and the memory will be
+        // reclaimed when the last Arc is dropped.
+        let drain_result = self.drain_and_remove(&old_collection_name).await;
+
+        let swap_drain_duration = swap_drain_start.elapsed();
+        let total_duration = total_start.elapsed();
+
+        // Log drain timeout but don't fail the migration -- the alias is
+        // already switched, so new queries go to the new index.
+        if let Err(AliasError::DrainTimeout { .. }) = &drain_result {
+            // The old collection will be cleaned up when the last reader drops.
+            // We report this in the migration report but don't fail.
+        } else if let Err(e) = drain_result {
+            return Err(e);
+        }
+
+        Ok(MigrationReport {
+            old_collection: old_collection_name,
+            new_collection: new_collection_name,
+            old_size,
+            new_size,
+            recall_score,
+            total_duration,
+            build_duration,
+            swap_drain_duration,
+        })
+    }
+
+    /// Get the number of registered collections.
+    pub fn collection_count(&self) -> usize {
+        self.collections.read().len()
+    }
+
+    /// Get the number of registered aliases.
+    pub fn alias_count(&self) -> usize {
+        self.aliases.read().len()
+    }
+
+    /// Get the collection name that an alias points to.
+    pub fn resolve_alias(&self, alias: &str) -> Option<String> {
+        self.aliases.read().get(alias).cloned()
+    }
+
+    /// Get the active reader count for a collection.
+    pub fn reader_count(&self, collection: &str) -> Option<u64> {
+        self.collections
+            .read()
+            .get(collection)
+            .map(|c| c.readers.load())
+    }
+}
+
+impl std::fmt::Debug for IndexAliasManager {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let collections = self.collections.read();
+        let aliases = self.aliases.read();
+        f.debug_struct("IndexAliasManager")
+            .field("collections", &collections.keys().collect::<Vec<_>>())
+            .field("aliases", &*aliases)
+            .field("drain_timeout", &self.drain_timeout)
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::HnswIndex;
+
+    fn make_index(dims: usize, count: usize) -> HnswIndex {
+        let mut index = HnswIndex::new(dims);
+        for i in 0..count {
+            let id = NodeId::new([(i & 0xFF) as u8; 16]);
+            let vec = vec![i as f32; dims];
+            index.insert(id, vec).unwrap();
+        }
+        index
+    }
+
+    #[test]
+    fn test_register_and_create_alias() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+
+        let index = make_index(4, 5);
+        mgr.register_collection("v1", index).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        assert_eq!(mgr.collection_count(), 1);
+        assert_eq!(mgr.alias_count(), 1);
+        assert_eq!(mgr.resolve_alias("active"), Some("v1".to_string()));
+    }
+
+    #[test]
+    fn test_register_duplicate_collection() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        mgr.register_collection("v1", make_index(4, 1)).unwrap();
+
+        let result = mgr.register_collection("v1", make_index(4, 1));
+        assert!(matches!(
+            result,
+            Err(AliasError::CollectionAlreadyExists(_))
+        ));
+    }
+
+    #[test]
+    fn test_create_alias_missing_collection() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        let result = mgr.create_alias("active", "nonexistent");
+        assert!(matches!(result, Err(AliasError::CollectionNotFound(_))));
+    }
+
+    #[test]
+    fn test_create_duplicate_alias() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        mgr.register_collection("v1", make_index(4, 1)).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        let result = mgr.create_alias("active", "v1");
+        assert!(matches!(result, Err(AliasError::AliasAlreadyExists(_))));
+    }
+
+    #[test]
+    fn test_acquire_reader() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        mgr.register_collection("v1", make_index(4, 5)).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        let guard = mgr.acquire_reader("active").unwrap();
+        assert_eq!(guard.len(), 5);
+        assert_eq!(mgr.reader_count("v1"), Some(1));
+
+        drop(guard);
+        assert_eq!(mgr.reader_count("v1"), Some(0));
+    }
+
+    #[test]
+    fn test_acquire_reader_missing_alias() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        let result = mgr.acquire_reader("nonexistent");
+        assert!(matches!(result, Err(AliasError::AliasNotFound(_))));
+    }
+
+    #[test]
+    fn test_switch_alias() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        mgr.register_collection("v1", make_index(4, 5)).unwrap();
+        mgr.register_collection("v2", make_index(4, 10)).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        let old = mgr.switch_alias("active", "v2", None).unwrap();
+        assert_eq!(old, "v1");
+        assert_eq!(mgr.resolve_alias("active"), Some("v2".to_string()));
+
+        // Reader should now get v2
+        let guard = mgr.acquire_reader("active").unwrap();
+        assert_eq!(guard.len(), 10);
+    }
+
+    #[test]
+    fn test_switch_alias_with_failing_validator() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        mgr.register_collection("v1", make_index(4, 5)).unwrap();
+        mgr.register_collection("v2", make_index(4, 10)).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        // Validator that always fails
+        struct FailValidator;
+        impl IndexValidator for FailValidator {
+            fn validate(&self, _: &HnswIndex) -> Result<(), AliasError> {
+                Err(AliasError::ValidationFailed {
+                    reason: "test failure".to_string(),
+                    recall: Some(0.5),
+                    min_recall: Some(0.95),
+                })
+            }
+        }
+
+        let result = mgr.switch_alias("active", "v2", Some(&FailValidator));
+        assert!(matches!(result, Err(AliasError::ValidationFailed { .. })));
+
+        // Alias should still point to v1
+        assert_eq!(mgr.resolve_alias("active"), Some("v1".to_string()));
+    }
+
+    #[tokio::test]
+    async fn test_drain_and_remove() {
+        let mgr = IndexAliasManager::new(Duration::from_secs(1));
+        mgr.register_collection("v1", make_index(4, 5)).unwrap();
+
+        // No readers -- drain should succeed immediately
+        mgr.drain_and_remove("v1").await.unwrap();
+        assert_eq!(mgr.collection_count(), 0);
+    }
+
+    #[tokio::test]
+    async fn test_concurrent_read_during_swap() {
+        let mgr = Arc::new(IndexAliasManager::new(Duration::from_secs(5)));
+        mgr.register_collection("v1", make_index(4, 5)).unwrap();
+        mgr.register_collection("v2", make_index(4, 10)).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        // Acquire a reader on v1
+        let guard = mgr.acquire_reader("active").unwrap();
+        assert_eq!(guard.len(), 5);
+
+        // Swap alias to v2 while v1 reader is active
+        let old = mgr.switch_alias("active", "v2", None).unwrap();
+        assert_eq!(old, "v1");
+
+        // Old reader should still see v1 (5 vectors)
+        assert_eq!(guard.len(), 5);
+
+        // New reader should see v2 (10 vectors)
+        let new_guard = mgr.acquire_reader("active").unwrap();
+        assert_eq!(new_guard.len(), 10);
+
+        // Drop the old reader
+        drop(guard);
+
+        // Now drain should succeed for v1
+        let mgr_clone = Arc::clone(&mgr);
+        mgr_clone.drain_and_remove("v1").await.unwrap();
+
+        // v1 should be gone, v2 should remain
+        assert_eq!(mgr.collection_count(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_migrate() {
+        let mgr = Arc::new(IndexAliasManager::new(Duration::from_secs(5)));
+        mgr.register_collection("v1", make_index(4, 5)).unwrap();
+        mgr.create_alias("active", "v1").unwrap();
+
+        // Prepare vectors for the new index
+        let vectors: Vec<(NodeId, Vec<f32>)> = (0..8u8)
+            .map(|i| (NodeId::new([i; 16]), vec![i as f32; 4]))
+            .collect();
+
+        let config = HnswConfig::with_dimensions(4);
+        let report = mgr.migrate("active", vectors, config, None).await.unwrap();
+
+        assert_eq!(report.old_size, 5);
+        assert_eq!(report.new_size, 8);
+
+        // The alias should now point to the new collection
+        let guard = mgr.acquire_reader("active").unwrap();
+        assert_eq!(guard.len(), 8);
+    }
+}
diff --git a/crates/khive-hnsw/src/alias/mod.rs b/crates/khive-hnsw/src/alias/mod.rs
new file mode 100644
index 00000000..0ad6d1d9
--- /dev/null
+++ b/crates/khive-hnsw/src/alias/mod.rs
@@ -0,0 +1,37 @@
+//! Index alias management for zero-downtime HNSW index migration.
+//!
+//! This module implements a blue-green deployment pattern for HNSW vector indexes.
+//! When switching embedding models (e.g., from BGE-small to mE5-small), every
+//! vector must be re-embedded and re-indexed. The alias manager allows this to
+//! happen without taking the search service offline:
+//!
+//! 1. `alias("active")` currently points to `collection("index_v1")`
+//! 2. Build `collection("index_v2")` in a background thread
+//! 3. Validate the new index (recall@k benchmark)
+//! 4. Atomic swap: `alias("active")` now points to `collection("index_v2")`
+//! 5. In-flight queries on v1 complete on v1; new queries go to v2
+//! 6. After drain (all v1 readers dropped), deallocate v1
+//!
+//! # Concurrency
+//!
+//! - Read path: `parking_lot::RwLock` read guard (adaptive spinning, no OS block
+//!   for short critical sections)
+//! - Write path: Brief exclusive lock for pointer swap only
+//! - Background build: `tokio::task::spawn_blocking`, no locks held
+//! - Drain: Async poll via `AtomicU64` reader counter
+//!
+//! # Module Structure
+//!
+//! - [`manager`]: `IndexAliasManager` -- the main entry point
+//! - [`drain`]: Reader tracking and RAII guard
+//! - [`validation`]: Pre-swap index quality validation
+//! - [`error`]: Error types
+
+mod drain;
+pub mod error;
+mod manager;
+pub mod validation;
+
+pub use drain::ReaderGuard;
+pub use manager::{IndexAliasManager, MigrationReport};
+pub use validation::{IndexValidator, NoopValidator, RecallValidator};
diff --git a/crates/khive-hnsw/src/alias/validation.rs b/crates/khive-hnsw/src/alias/validation.rs
new file mode 100644
index 00000000..012d42f0
--- /dev/null
+++ b/crates/khive-hnsw/src/alias/validation.rs
@@ -0,0 +1,184 @@
+//! Pre-swap validation for index migrations.
+//!
+//! Before committing an alias swap, the system can optionally run a validation
+//! function to verify that the new index meets quality requirements. The primary
+//! validator is `RecallValidator`, which checks recall@k against a set of
+//! golden queries with known ground-truth results.
+
+use super::error::AliasError;
+use crate::HnswIndex;
+use crate::NodeId;
+
+/// Trait for validating an index before an alias swap.
+///
+/// Implementations should be stateless or cheaply cloneable, as they may be
+/// called from within a `spawn_blocking` context.
+pub trait IndexValidator: Send + Sync {
+    /// Validate the new index. Return `Ok(())` to proceed with the swap,
+    /// or `Err(AliasError::ValidationFailed)` to abort.
+    fn validate(&self, new_index: &HnswIndex) -> Result<(), AliasError>;
+}
+
+/// Validates a new index by running recall@k against golden queries.
+///
+/// Golden queries are `(query_vector, expected_result_ids)` pairs where the
+/// expected results are the true nearest neighbors (typically computed via
+/// brute-force on the same dataset).
+///
+/// # Recall Computation
+///
+/// For each golden query, we search the new index for `k` results and compute:
+///
+/// ```text
+/// recall = |returned ∩ expected| / |expected|
+/// ```
+///
+/// The overall recall is the mean across all golden queries. The swap is
+/// approved if `mean_recall >= min_recall`.
+pub struct RecallValidator {
+    /// Golden queries: `(query_vector, expected_nearest_ids)`.
+    pub golden_queries: Vec<(Vec<f32>, Vec<NodeId>)>,
+    /// Number of results to retrieve per query.
+    pub k: usize,
+    /// Minimum acceptable mean recall (e.g., 0.95 for 95%).
+    pub min_recall: f32,
+}
+
+impl RecallValidator {
+    /// Create a new recall validator.
+    pub fn new(golden_queries: Vec<(Vec<f32>, Vec<NodeId>)>, k: usize, min_recall: f32) -> Self {
+        Self {
+            golden_queries,
+            k,
+            min_recall,
+        }
+    }
+}
+
+impl IndexValidator for RecallValidator {
+    fn validate(&self, new_index: &HnswIndex) -> Result<(), AliasError> {
+        if self.golden_queries.is_empty() {
+            return Ok(());
+        }
+
+        let mut total_recall = 0.0f64;
+        let mut query_count = 0usize;
+
+        for (query, expected) in &self.golden_queries {
+            let results = new_index
+                .search(query, self.k)
+                .map_err(|e| AliasError::IndexError(e.to_string()))?;
+
+            let returned_ids: std::collections::HashSet<NodeId> =
+                results.iter().map(|(id, _)| *id).collect();
+
+            let hits = expected
+                .iter()
+                .filter(|id| returned_ids.contains(id))
+                .count();
+
+            let recall = if expected.is_empty() {
+                1.0
+            } else {
+                hits as f64 / expected.len() as f64
+            };
+
+            total_recall += recall;
+            query_count += 1;
+        }
+
+        let mean_recall = (total_recall / query_count as f64) as f32;
+
+        if mean_recall < self.min_recall {
+            return Err(AliasError::ValidationFailed {
+                reason: format!(
+                    "recall@{} = {mean_recall:.4} < {:.4}",
+                    self.k, self.min_recall
+                ),
+                recall: Some(mean_recall),
+                min_recall: Some(self.min_recall),
+            });
+        }
+
+        Ok(())
+    }
+}
+
+/// A validator that always passes. Useful for testing or when validation
+/// is not needed.
+pub struct NoopValidator;
+
+impl IndexValidator for NoopValidator {
+    fn validate(&self, _new_index: &HnswIndex) -> Result<(), AliasError> {
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::HnswIndex;
+
+    fn make_test_index() -> HnswIndex {
+        let mut index = HnswIndex::new(4);
+        // Insert 10 vectors
+        for i in 0..10u8 {
+            let id = NodeId::new([i; 16]);
+            let vec = vec![i as f32; 4];
+            index.insert(id, vec).unwrap();
+        }
+        index
+    }
+
+    #[test]
+    fn test_noop_validator() {
+        let index = make_test_index();
+        let validator = NoopValidator;
+        assert!(validator.validate(&index).is_ok());
+    }
+
+    #[test]
+    fn test_recall_validator_empty_golden() {
+        let index = make_test_index();
+        let validator = RecallValidator::new(vec![], 5, 0.95);
+        assert!(validator.validate(&index).is_ok());
+    }
+
+    #[test]
+    fn test_recall_validator_perfect_recall() {
+        let index = make_test_index();
+
+        // Search for the actual results first, then use those as golden truth
+        let query = vec![5.0f32; 4];
+        let results = index.search(&query, 3).unwrap();
+        let expected: Vec<NodeId> = results.iter().map(|(id, _)| *id).collect();
+        assert!(!expected.is_empty(), "search should return results");
+
+        // Validator with the actual results as golden truth should pass
+        let validator = RecallValidator::new(vec![(query, expected)], 3, 0.95);
+        assert!(validator.validate(&index).is_ok());
+    }
+
+    #[test]
+    fn test_recall_validator_fails_low_recall() {
+        let index = make_test_index();
+
+        // Expect IDs that don't exist in the index
+        let query = vec![5.0f32; 4];
+        let fake_ids: Vec<NodeId> = (100..110u8).map(|i| NodeId::new([i; 16])).collect();
+
+        let validator = RecallValidator::new(vec![(query, fake_ids)], 5, 0.95);
+        let result = validator.validate(&index);
+        assert!(result.is_err());
+
+        match result.unwrap_err() {
+            AliasError::ValidationFailed {
+                recall, min_recall, ..
+            } => {
+                assert_eq!(recall, Some(0.0));
+                assert_eq!(min_recall, Some(0.95));
+            }
+            other => panic!("Expected ValidationFailed, got: {other:?}"),
+        }
+    }
+}
diff --git a/crates/khive-hnsw/src/arena/arena.rs b/crates/khive-hnsw/src/arena/arena.rs
new file mode 100644
index 00000000..906f4c01
--- /dev/null
+++ b/crates/khive-hnsw/src/arena/arena.rs
@@ -0,0 +1,169 @@
+//! Core bump arena allocator.
+//!
+//! Pre-allocates a contiguous memory slab and bumps a pointer for each
+//! allocation. Reset is O(1) -- just set the bump offset back to zero.
+//!
+//! # Memory Layout
+//!
+//! ```text
+//! [---- slab (1 MiB default) ----]
+//!  ^                ^             ^
+//!  base             offset        capacity
+//! ```
+//!
+//! Each `alloc<T>(count)` bumps `offset` by `count * size_of::<T>()` (with
+//! alignment padding). If `offset` would exceed `capacity`, the arena grows
+//! by allocating a new, larger slab.
+//!
+//! # Safety Invariants
+//!
+//! 1. The slab is a `Vec<u8>` owned by the arena. All pointers derived from
+//!    it are valid as long as the arena is alive and has not been reset or grown.
+//! 2. `ArenaVec` and `ArenaBinaryHeap` hold an `&SearchArena` reference,
+//!    tying their lifetime to the arena. After `reset()`, all prior allocations
+//!    are logically invalid -- the type system enforces this via lifetimes.
+//! 3. Growth invalidates all prior pointers. This is safe because growth only
+//!    happens during `alloc`, and all live `ArenaVec`/`ArenaBinaryHeap` objects
+//!    manage their own pointer + length, requesting new allocations as needed
+//!    via copy-on-grow.
+
+use std::cell::Cell;
+
+/// Default arena size: 1 MiB. More than sufficient for ef=256 searches.
+///
+/// Worst-case per-search memory for ef=256, M=16:
+/// - candidates heap: 256 * 12 = 3,072 bytes
+/// - results heap:    256 * 12 = 3,072 bytes
+/// - batch buffer:    16 * 32 = 512 bytes
+/// - result_buf:      256 * 12 = 3,072 bytes
+/// - overhead/alignment: ~1,000 bytes
+/// Total: ~10,728 bytes (~10 KiB)
+///
+/// 1 MiB gives ~100x headroom.
+pub const DEFAULT_ARENA_SIZE: usize = 1 << 20; // 1 MiB
+
+/// Bump arena allocator for HNSW search operations.
+///
+/// All allocations within a search query bump from this arena. Between
+/// queries, call `reset()` to reclaim all memory in O(1).
+///
+/// The arena uses interior mutability (`Cell`) for the bump offset so that
+/// multiple `ArenaVec` instances can allocate from the same `&SearchArena`.
+pub struct SearchArena {
+    /// Backing memory slab.
+    slab: Cell<Vec<u8>>,
+    /// Current bump offset into the slab.
+    offset: Cell<usize>,
+}
+
+impl SearchArena {
+    /// Create a new arena with the given capacity in bytes.
+    pub fn new(capacity: usize) -> Self {
+        let cap = capacity.max(1024); // Minimum 1 KiB
+        Self {
+            slab: Cell::new(vec![0u8; cap]),
+            offset: Cell::new(0),
+        }
+    }
+
+    /// Create a new arena with the default 1 MiB capacity.
+    pub fn with_default_capacity() -> Self {
+        Self::new(DEFAULT_ARENA_SIZE)
+    }
+
+    /// Reset the arena in O(1). All prior allocations become invalid.
+    ///
+    /// This is the key performance win: no deallocation, no destructors,
+    /// no zeroing. Just reset the bump pointer.
+    #[inline]
+    pub fn reset(&self) {
+        self.offset.set(0);
+    }
+
+    /// Current number of bytes allocated from this arena.
+    #[inline]
+    pub fn bytes_used(&self) -> usize {
+        self.offset.get()
+    }
+
+    /// Total capacity of the arena in bytes.
+    #[inline]
+    pub fn capacity(&self) -> usize {
+        // SAFETY: We take the slab out, read its capacity, and put it back.
+        // This is safe because we don't keep any references across the take/set.
+        let slab = self.slab.take();
+        let cap = slab.capacity();
+        self.slab.set(slab);
+        cap
+    }
+
+    /// Allocate `count` elements of type `T` from the arena.
+    ///
+    /// Returns a pointer to the allocated memory. The caller is responsible
+    /// for writing to this memory before reading.
+    ///
+    /// # Panics
+    ///
+    /// Never panics. If the arena is full, it grows automatically.
+    ///
+    /// # Safety
+    ///
+    /// The returned pointer is valid until `reset()` is called or the arena
+    /// is dropped. The caller must not use the pointer after either event.
+    /// This is enforced by the lifetime parameter on `ArenaVec`.
+    pub(super) fn alloc<T>(&self, count: usize) -> *mut T {
+        let size = std::mem::size_of::<T>() * count;
+        let align = std::mem::align_of::<T>();
+
+        if size == 0 {
+            return std::ptr::dangling_mut::<T>(); // ZST: return aligned dangling pointer
+        }
+
+        let mut current = self.offset.get();
+
+        // Align up
+        let aligned = (current + align - 1) & !(align - 1);
+        let new_offset = aligned + size;
+
+        // Take slab, work with it, put it back
+        let mut slab = self.slab.take();
+
+        if new_offset > slab.len() {
+            // Grow: double or fit, whichever is larger
+            let new_cap = (slab.len() * 2).max(new_offset).max(slab.len() + size);
+            slab.resize(new_cap, 0);
+            // Recompute alignment in case resize moved the buffer
+            current = self.offset.get();
+            let aligned = (current + align - 1) & !(align - 1);
+            let new_offset = aligned + size;
+            let ptr = slab.as_mut_ptr().wrapping_add(aligned) as *mut T;
+            self.offset.set(new_offset);
+            self.slab.set(slab);
+            return ptr;
+        }
+
+        let ptr = slab.as_mut_ptr().wrapping_add(aligned) as *mut T;
+        self.offset.set(new_offset);
+        self.slab.set(slab);
+        ptr
+    }
+
+    /// Copy `src` slice into the arena and return a mutable pointer to the copy.
+    ///
+    /// Useful for bulk-copying data into the arena. Allocates space for
+    /// `src.len()` elements, copies them in, and returns a pointer to the copy.
+    #[allow(dead_code)]
+    pub(super) fn alloc_copy<T: Copy>(&self, src: &[T]) -> *mut T {
+        if src.is_empty() {
+            return self.alloc::<T>(0);
+        }
+        let ptr = self.alloc::<T>(src.len());
+        // SAFETY: `ptr` points to freshly allocated arena memory with enough
+        // space for `src.len()` elements. `src` is a valid slice. No overlap
+        // because arena memory is freshly bumped.
+        unsafe {
+            std::ptr::copy_nonoverlapping(src.as_ptr(), ptr, src.len());
+        }
+        ptr
+    }
+}
diff --git a/crates/khive-hnsw/src/arena/arena_heap.rs b/crates/khive-hnsw/src/arena/arena_heap.rs
new file mode 100644
index 00000000..53648481
--- /dev/null
+++ b/crates/khive-hnsw/src/arena/arena_heap.rs
@@ -0,0 +1,149 @@
+//! Arena-backed binary heap.
+//!
+//! `ArenaBinaryHeap<'a, T>` provides min-heap or max-heap behavior backed
+//! by an `ArenaVec`. It implements the same operations as `std::BinaryHeap`
+//! (push, pop, peek, len, clear, drain) with identical algorithmic complexity.
+//!
+//! # Heap Ordering
+//!
+//! The heap uses `Ord` ordering. For a max-heap (default BinaryHeap behavior),
+//! use `T` directly. For a min-heap, wrap elements in `std::cmp::Reverse`.
+//!
+//! In the HNSW search context:
+//! - `candidates`: min-heap via `Reverse<(OrderedF32, usize)>` -- closest first
+//! - `results`: max-heap via `(OrderedF32, usize)` -- furthest first (for pruning)
+
+use super::arena::SearchArena;
+use super::arena_vec::ArenaVec;
+
+/// A binary heap backed by arena allocation.
+///
+/// This is a max-heap by default (like `std::BinaryHeap`). For a min-heap,
+/// wrap elements in `std::cmp::Reverse`.
+///
+/// Elements must be `Copy + Ord`. `Copy` because the backing `ArenaVec`
+/// requires it. `Ord` for heap ordering.
+pub struct ArenaBinaryHeap<'a, T: Copy + Ord> {
+    data: ArenaVec<'a, T>,
+}
+
+impl<'a, T: Copy + Ord> ArenaBinaryHeap<'a, T> {
+    /// Create a new empty heap with the given initial capacity.
+    #[inline]
+    pub fn new(arena: &'a SearchArena, capacity: usize) -> Self {
+        Self {
+            data: ArenaVec::new(arena, capacity),
+        }
+    }
+
+    /// Number of elements in the heap.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.data.len()
+    }
+
+    /// Whether the heap is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.data.is_empty()
+    }
+
+    /// Peek at the maximum element (for max-heap) without removing it.
+    #[inline]
+    pub fn peek(&self) -> Option<&T> {
+        if self.data.is_empty() {
+            None
+        } else {
+            Some(self.data.get(0))
+        }
+    }
+
+    /// Push an element onto the heap. O(log n).
+    #[inline]
+    pub fn push(&mut self, value: T) {
+        self.data.push(value);
+        self.sift_up(self.data.len() - 1);
+    }
+
+    /// Remove and return the maximum element. O(log n).
+    #[inline]
+    pub fn pop(&mut self) -> Option<T> {
+        if self.data.is_empty() {
+            return None;
+        }
+        let len = self.data.len();
+        if len == 1 {
+            return self.data.pop();
+        }
+        // Swap root with last, pop last, sift down root.
+        let root = *self.data.get(0);
+        let last = *self.data.get(len - 1);
+        *self.data.get_mut(0) = last;
+        self.data.pop(); // removes last element
+        if !self.data.is_empty() {
+            self.sift_down(0);
+        }
+        Some(root)
+    }
+
+    /// Clear the heap without deallocating.
+    #[inline]
+    pub fn clear(&mut self) {
+        self.data.clear();
+    }
+
+    /// Drain all elements from the heap (in arbitrary order).
+    ///
+    /// The returned iterator yields elements in storage order, NOT heap order.
+    /// If sorted order is needed, collect and sort separately.
+    pub fn drain(&mut self) -> impl Iterator<Item = T> + '_ {
+        self.data.drain()
+    }
+
+    /// Sift element at `pos` up to restore heap property.
+    #[inline]
+    fn sift_up(&mut self, mut pos: usize) {
+        while pos > 0 {
+            let parent = (pos - 1) / 2;
+            if *self.data.get(pos) > *self.data.get(parent) {
+                // Swap child with parent
+                let child_val = *self.data.get(pos);
+                let parent_val = *self.data.get(parent);
+                *self.data.get_mut(pos) = parent_val;
+                *self.data.get_mut(parent) = child_val;
+                pos = parent;
+            } else {
+                break;
+            }
+        }
+    }
+
+    /// Sift element at `pos` down to restore heap property.
+    #[inline]
+    fn sift_down(&mut self, mut pos: usize) {
+        let len = self.data.len();
+        loop {
+            let left = 2 * pos + 1;
+            let right = 2 * pos + 2;
+            let mut largest = pos;
+
+            if left < len && *self.data.get(left) > *self.data.get(largest) {
+                largest = left;
+            }
+            if right < len && *self.data.get(right) > *self.data.get(largest) {
+                largest = right;
+            }
+
+            if largest == pos {
+                break;
+            }
+
+            // Swap
+            let pos_val = *self.data.get(pos);
+            let largest_val = *self.data.get(largest);
+            *self.data.get_mut(pos) = largest_val;
+            *self.data.get_mut(largest) = pos_val;
+            pos = largest;
+        }
+    }
+}
diff --git a/crates/khive-hnsw/src/arena/arena_vec.rs b/crates/khive-hnsw/src/arena/arena_vec.rs
new file mode 100644
index 00000000..544c0bbb
--- /dev/null
+++ b/crates/khive-hnsw/src/arena/arena_vec.rs
@@ -0,0 +1,281 @@
+//! Arena-backed growable vector.
+//!
+//! `ArenaVec<'a, T>` is a vector that allocates from a `SearchArena` instead
+//! of the global allocator. It supports push, pop, clear, len, iter, and drain.
+//!
+//! # Growth Strategy
+//!
+//! When capacity is exceeded, `ArenaVec` allocates a new (larger) region from
+//! the arena and copies existing elements. The old region is "leaked" in the
+//! arena -- this is fine because the arena will be reset between queries,
+//! reclaiming all memory at once.
+//!
+//! # Lifetime
+//!
+//! The `'a` lifetime ties this vec to its arena. The vec cannot outlive the
+//! arena, and `reset()` on the arena logically invalidates all vecs. In
+//! practice, the search code creates vecs at the start of a query and drops
+//! them before calling `reset()`.
+
+use super::arena::SearchArena;
+
+/// A growable vector backed by arena allocation.
+///
+/// Elements must be `Copy` because growth copies elements to a new arena
+/// region (no drop semantics needed).
+pub struct ArenaVec<'a, T: Copy> {
+    /// Pointer to the start of the allocated region in the arena.
+    ptr: *mut T,
+    /// Number of live elements.
+    len: usize,
+    /// Total capacity (in elements) of the current allocation.
+    cap: usize,
+    /// Reference to the owning arena (for growth allocation).
+    arena: &'a SearchArena,
+}
+
+impl<'a, T: Copy> ArenaVec<'a, T> {
+    /// Create a new empty `ArenaVec` with the given initial capacity.
+    #[inline]
+    pub fn new(arena: &'a SearchArena, capacity: usize) -> Self {
+        let (ptr, cap) = if capacity > 0 {
+            (arena.alloc::<T>(capacity), capacity)
+        } else {
+            (std::ptr::dangling_mut::<T>(), 0) // dangling
+        };
+        Self {
+            ptr,
+            len: 0,
+            cap,
+            arena,
+        }
+    }
+
+    /// Number of elements in the vec.
+    #[inline]
+    pub fn len(&self) -> usize {
+        self.len
+    }
+
+    /// Whether the vec is empty.
+    #[inline]
+    pub fn is_empty(&self) -> bool {
+        self.len == 0
+    }
+
+    /// Push an element. Grows if necessary.
+    #[inline]
+    pub fn push(&mut self, value: T) {
+        if self.len == self.cap {
+            self.grow();
+        }
+        // SAFETY: We just ensured len < cap, so ptr.add(len) is within bounds.
+        unsafe {
+            self.ptr.add(self.len).write(value);
+        }
+        self.len += 1;
+    }
+
+    /// Pop the last element.
+    #[inline]
+    pub fn pop(&mut self) -> Option<T> {
+        if self.len == 0 {
+            return None;
+        }
+        self.len -= 1;
+        // SAFETY: len was > 0, so ptr.add(len) points to the last written element.
+        Some(unsafe { self.ptr.add(self.len).read() })
+    }
+
+    /// Clear the vec without deallocating (just resets length).
+    #[inline]
+    pub fn clear(&mut self) {
+        self.len = 0;
+    }
+
+    /// Get a reference to the element at `index`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `index >= len`. Uses `assert!` (not `debug_assert!`) so
+    /// bounds enforcement is never compiled out in release builds (#2530).
+    #[inline]
+    pub fn get(&self, index: usize) -> &T {
+        assert!(index < self.len, "ArenaVec index out of bounds");
+        // SAFETY: index < len, and all elements up to len are initialized.
+        unsafe { &*self.ptr.add(index) }
+    }
+
+    /// Get an optional reference to the element at `index` (issue #2530).
+    ///
+    /// Returns `None` instead of panicking when `index >= len`. Prefer this
+    /// over `get()` when the caller cannot statically guarantee the index is
+    /// within bounds.
+    #[inline]
+    pub fn try_get(&self, index: usize) -> Option<&T> {
+        if index < self.len {
+            // SAFETY: index < len, and all elements up to len are initialized.
+            Some(unsafe { &*self.ptr.add(index) })
+        } else {
+            None
+        }
+    }
+
+    /// Get a mutable reference to the element at `index`.
+    ///
+    /// # Panics
+    ///
+    /// Panics if `index >= len`. Uses `assert!` (not `debug_assert!`) so
+    /// bounds enforcement is never compiled out in release builds (#2530).
+    #[inline]
+    pub fn get_mut(&mut self, index: usize) -> &mut T {
+        assert!(index < self.len, "ArenaVec index out of bounds");
+        // SAFETY: index < len, and all elements up to len are initialized.
+        unsafe { &mut *self.ptr.add(index) }
+    }
+
+    /// Get an optional mutable reference to the element at `index` (issue #2530).
+    ///
+    /// Returns `None` instead of panicking when `index >= len`.
+    #[inline]
+    pub fn try_get_mut(&mut self, index: usize) -> Option<&mut T> {
+        if index < self.len {
+            // SAFETY: index < len, and all elements up to len are initialized.
+            Some(unsafe { &mut *self.ptr.add(index) })
+        } else {
+            None
+        }
+    }
+
+    /// Get an immutable slice of all elements.
+    #[inline]
+    pub fn as_slice(&self) -> &[T] {
+        if self.len == 0 {
+            return &[];
+        }
+        // SAFETY: ptr points to len initialized elements in the arena.
+        unsafe { std::slice::from_raw_parts(self.ptr, self.len) }
+    }
+
+    /// Get a mutable slice of all elements.
+    #[inline]
+    pub fn as_mut_slice(&mut self) -> &mut [T] {
+        if self.len == 0 {
+            return &mut [];
+        }
+        // SAFETY: ptr points to len initialized elements in the arena.
+        unsafe { std::slice::from_raw_parts_mut(self.ptr, self.len) }
+    }
+
+    /// Iterate over elements by reference.
+    #[inline]
+    pub fn iter(&self) -> std::slice::Iter<'_, T> {
+        self.as_slice().iter()
+    }
+
+    /// Swap-remove the element at `index` (O(1) removal).
+    #[inline]
+    pub fn swap_remove(&mut self, index: usize) -> T {
+        assert!(index < self.len, "ArenaVec swap_remove index out of bounds");
+        let last = self.len - 1;
+        // SAFETY: both index and last are < len.
+        unsafe {
+            let val = self.ptr.add(index).read();
+            if index != last {
+                let last_val = self.ptr.add(last).read();
+                self.ptr.add(index).write(last_val);
+            }
+            self.len -= 1;
+            val
+        }
+    }
+
+    /// Drain all elements, returning an iterator over them.
+    /// After drain, the vec is empty.
+    pub fn drain(&mut self) -> ArenaVecDrain<'_, T> {
+        let len = self.len;
+        self.len = 0;
+        ArenaVecDrain {
+            ptr: self.ptr,
+            pos: 0,
+            len,
+            _marker: std::marker::PhantomData,
+        }
+    }
+
+    /// Extend from a slice.
+    pub fn extend_from_slice(&mut self, slice: &[T]) {
+        for &item in slice {
+            self.push(item);
+        }
+    }
+
+    /// Sort elements using the provided comparison function.
+    pub fn sort_by<F>(&mut self, compare: F)
+    where
+        F: FnMut(&T, &T) -> std::cmp::Ordering,
+    {
+        self.as_mut_slice().sort_by(compare);
+    }
+
+    /// Grow the allocation by doubling capacity (minimum 8).
+    ///
+    /// Allocates a new region from the arena and copies existing elements.
+    /// The old region is leaked in the arena -- reclaimed on reset().
+    fn grow(&mut self) {
+        let new_cap = if self.cap == 0 { 8 } else { self.cap * 2 };
+        let new_ptr = self.arena.alloc::<T>(new_cap);
+        if self.len > 0 {
+            // SAFETY: copying len elements from old region (ptr, len elements)
+            // to new region (new_ptr, new_cap >= len elements). No overlap
+            // because the arena only bumps forward.
+            unsafe {
+                std::ptr::copy_nonoverlapping(self.ptr, new_ptr, self.len);
+            }
+        }
+        // Old region is leaked -- reclaimed on arena reset.
+        self.ptr = new_ptr;
+        self.cap = new_cap;
+    }
+}
+
+/// Index by usize for convenience (read-only).
+impl<'a, T: Copy> std::ops::Index<usize> for ArenaVec<'a, T> {
+    type Output = T;
+
+    #[inline]
+    fn index(&self, index: usize) -> &T {
+        self.get(index)
+    }
+}
+
+/// Drain iterator for `ArenaVec`.
+pub struct ArenaVecDrain<'a, T: Copy> {
+    ptr: *mut T,
+    pos: usize,
+    len: usize,
+    _marker: std::marker::PhantomData<&'a T>,
+}
+
+impl<T: Copy> Iterator for ArenaVecDrain<'_, T> {
+    type Item = T;
+
+    #[inline]
+    fn next(&mut self) -> Option<T> {
+        if self.pos >= self.len {
+            return None;
+        }
+        // SAFETY: pos < len, and all elements were initialized before drain.
+        let val = unsafe { self.ptr.add(self.pos).read() };
+        self.pos += 1;
+        Some(val)
+    }
+
+    #[inline]
+    fn size_hint(&self) -> (usize, Option<usize>) {
+        let remaining = self.len - self.pos;
+        (remaining, Some(remaining))
+    }
+}
+
+impl<T: Copy> ExactSizeIterator for ArenaVecDrain<'_, T> {}
diff --git a/crates/khive-hnsw/src/arena/mod.rs b/crates/khive-hnsw/src/arena/mod.rs
new file mode 100644
index 00000000..c728b44d
--- /dev/null
+++ b/crates/khive-hnsw/src/arena/mod.rs
@@ -0,0 +1,29 @@
+//! Bump arena allocator for zero-allocation HNSW search.
+//!
+//! Provides a fixed-size memory arena with O(1) reset between queries.
+//! All per-search allocations (candidates heap, results heap, batch buffer,
+//! result buffer) bump from this arena instead of the global allocator.
+//!
+//! # Design
+//!
+//! The arena pre-allocates a configurable slab (default 1 MiB). Within a
+//! single search query, allocations bump a pointer forward. Between queries,
+//! `reset()` sets the pointer back to zero -- O(1), no deallocation, no
+//! destructors, no zeroing.
+//!
+//! # Thread Safety
+//!
+//! The arena is `!Send` and `!Sync` by design. For concurrent search, each
+//! thread should own its own `SearchArena` (via `thread_local!` or explicit
+//! per-thread allocation).
+
+mod arena;
+mod arena_heap;
+mod arena_vec;
+
+pub use arena::SearchArena;
+pub use arena_heap::ArenaBinaryHeap;
+pub use arena_vec::ArenaVec;
+
+#[cfg(test)]
+mod tests;
diff --git a/crates/khive-hnsw/src/arena/tests.rs b/crates/khive-hnsw/src/arena/tests.rs
new file mode 100644
index 00000000..edd84182
--- /dev/null
+++ b/crates/khive-hnsw/src/arena/tests.rs
@@ -0,0 +1,440 @@
+//! Tests for the search arena allocator.
+
+use super::*;
+
+// =========================================================================
+// SearchArena tests
+// =========================================================================
+
+#[test]
+fn test_arena_alloc_and_reset() {
+    let arena = SearchArena::new(4096);
+    assert_eq!(arena.bytes_used(), 0);
+
+    // Allocate some memory
+    let _ptr: *mut u64 = arena.alloc::<u64>(10);
+    assert!(arena.bytes_used() > 0);
+    let used_after_alloc = arena.bytes_used();
+
+    // Allocate more
+    let _ptr2: *mut u32 = arena.alloc::<u32>(20);
+    assert!(arena.bytes_used() > used_after_alloc);
+
+    // Reset -- O(1), reclaims all memory
+    arena.reset();
+    assert_eq!(arena.bytes_used(), 0);
+
+    // Can allocate again after reset
+    let _ptr3: *mut u64 = arena.alloc::<u64>(10);
+    assert!(arena.bytes_used() > 0);
+}
+
+#[test]
+fn test_arena_overflow_grows() {
+    // Small arena that will need to grow
+    let arena = SearchArena::new(1024); // minimum size
+    let initial_cap = arena.capacity();
+
+    // Allocate more than capacity
+    let _ptr: *mut u8 = arena.alloc::<u8>(2048);
+
+    // Arena should have grown
+    assert!(arena.capacity() >= 2048);
+    assert!(arena.capacity() > initial_cap);
+}
+
+#[test]
+fn test_arena_reset_reuse_cycle() {
+    let arena = SearchArena::new(4096);
+
+    for _ in 0..100 {
+        // Simulate a search query: allocate various buffers
+        let _candidates: *mut (f32, usize) = arena.alloc::<(f32, usize)>(64);
+        let _results: *mut (f32, usize) = arena.alloc::<(f32, usize)>(64);
+        let _batch: *mut (usize, usize) = arena.alloc::<(usize, usize)>(32);
+
+        assert!(arena.bytes_used() > 0);
+
+        // Reset between queries
+        arena.reset();
+        assert_eq!(arena.bytes_used(), 0);
+    }
+}
+
+#[test]
+fn test_arena_alignment() {
+    let arena = SearchArena::new(4096);
+
+    // Allocate a u8 to offset the pointer
+    let _: *mut u8 = arena.alloc::<u8>(1);
+
+    // Allocate a u64 -- should be aligned to 8 bytes
+    let ptr: *mut u64 = arena.alloc::<u64>(1);
+    assert_eq!(ptr as usize % std::mem::align_of::<u64>(), 0);
+
+    // Allocate a u128 -- should be aligned to 16 bytes
+    let ptr128: *mut u128 = arena.alloc::<u128>(1);
+    assert_eq!(ptr128 as usize % std::mem::align_of::<u128>(), 0);
+}
+
+// =========================================================================
+// ArenaVec tests
+// =========================================================================
+
+#[test]
+fn test_arena_vec_push_pop() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 4);
+
+    assert!(vec.is_empty());
+    assert_eq!(vec.len(), 0);
+
+    vec.push(10);
+    vec.push(20);
+    vec.push(30);
+
+    assert_eq!(vec.len(), 3);
+    assert!(!vec.is_empty());
+
+    assert_eq!(vec.pop(), Some(30));
+    assert_eq!(vec.pop(), Some(20));
+    assert_eq!(vec.pop(), Some(10));
+    assert_eq!(vec.pop(), None);
+    assert!(vec.is_empty());
+}
+
+#[test]
+fn test_arena_vec_growth() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 2);
+
+    // Push beyond initial capacity
+    for i in 0..100 {
+        vec.push(i);
+    }
+
+    assert_eq!(vec.len(), 100);
+    for i in 0..100 {
+        assert_eq!(*vec.get(i), i);
+    }
+}
+
+#[test]
+fn test_arena_vec_clear() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(1);
+    vec.push(2);
+    vec.push(3);
+    vec.clear();
+
+    assert!(vec.is_empty());
+    assert_eq!(vec.len(), 0);
+
+    // Can push again after clear
+    vec.push(4);
+    assert_eq!(vec.len(), 1);
+    assert_eq!(*vec.get(0), 4);
+}
+
+#[test]
+fn test_arena_vec_iter() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(10);
+    vec.push(20);
+    vec.push(30);
+
+    let collected: Vec<i32> = vec.iter().copied().collect();
+    assert_eq!(collected, vec![10, 20, 30]);
+}
+
+#[test]
+fn test_arena_vec_drain() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(1);
+    vec.push(2);
+    vec.push(3);
+
+    let drained: Vec<i32> = vec.drain().collect();
+    assert_eq!(drained, vec![1, 2, 3]);
+    assert!(vec.is_empty());
+}
+
+#[test]
+fn test_arena_vec_as_slice() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(5);
+    vec.push(10);
+    vec.push(15);
+
+    assert_eq!(vec.as_slice(), &[5, 10, 15]);
+}
+
+#[test]
+fn test_arena_vec_index() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(100);
+    vec.push(200);
+
+    assert_eq!(vec[0], 100);
+    assert_eq!(vec[1], 200);
+}
+
+#[test]
+fn test_arena_vec_sort_by() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(30);
+    vec.push(10);
+    vec.push(20);
+
+    vec.sort_by(|a, b| a.cmp(b));
+    assert_eq!(vec.as_slice(), &[10, 20, 30]);
+}
+
+#[test]
+fn test_arena_vec_swap_remove() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 8);
+
+    vec.push(10);
+    vec.push(20);
+    vec.push(30);
+
+    let removed = vec.swap_remove(0);
+    assert_eq!(removed, 10);
+    assert_eq!(vec.len(), 2);
+    // 30 should now be at index 0 (swapped from last)
+    assert_eq!(*vec.get(0), 30);
+    assert_eq!(*vec.get(1), 20);
+}
+
+// =========================================================================
+// ArenaBinaryHeap tests
+// =========================================================================
+
+#[test]
+fn test_arena_heap_max_ordering() {
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+    heap.push(10);
+    heap.push(30);
+    heap.push(20);
+    heap.push(5);
+    heap.push(25);
+
+    // Max-heap: should pop in descending order
+    assert_eq!(heap.pop(), Some(30));
+    assert_eq!(heap.pop(), Some(25));
+    assert_eq!(heap.pop(), Some(20));
+    assert_eq!(heap.pop(), Some(10));
+    assert_eq!(heap.pop(), Some(5));
+    assert_eq!(heap.pop(), None);
+}
+
+#[test]
+fn test_arena_heap_min_ordering_via_reverse() {
+    use std::cmp::Reverse;
+
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+    heap.push(Reverse(10));
+    heap.push(Reverse(30));
+    heap.push(Reverse(20));
+    heap.push(Reverse(5));
+    heap.push(Reverse(25));
+
+    // Min-heap via Reverse: should pop in ascending order
+    assert_eq!(heap.pop(), Some(Reverse(5)));
+    assert_eq!(heap.pop(), Some(Reverse(10)));
+    assert_eq!(heap.pop(), Some(Reverse(20)));
+    assert_eq!(heap.pop(), Some(Reverse(25)));
+    assert_eq!(heap.pop(), Some(Reverse(30)));
+    assert_eq!(heap.pop(), None);
+}
+
+#[test]
+fn test_arena_heap_peek() {
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+    assert_eq!(heap.peek(), None);
+
+    heap.push(10);
+    assert_eq!(heap.peek(), Some(&10));
+
+    heap.push(20);
+    assert_eq!(heap.peek(), Some(&20));
+
+    heap.push(5);
+    assert_eq!(heap.peek(), Some(&20)); // 20 is still max
+}
+
+#[test]
+fn test_arena_heap_clear() {
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+    heap.push(1);
+    heap.push(2);
+    heap.push(3);
+    heap.clear();
+
+    assert!(heap.is_empty());
+    assert_eq!(heap.len(), 0);
+    assert_eq!(heap.peek(), None);
+}
+
+#[test]
+fn test_arena_heap_drain() {
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+    heap.push(10);
+    heap.push(20);
+    heap.push(30);
+
+    let mut drained: Vec<i32> = heap.drain().collect();
+    drained.sort();
+    assert_eq!(drained, vec![10, 20, 30]);
+    assert!(heap.is_empty());
+}
+
+// =========================================================================
+// Integration: HNSW-like usage patterns
+// =========================================================================
+
+#[test]
+fn test_hnsw_search_pattern() {
+    use crate::distance::OrderedF32;
+
+    let arena = SearchArena::new(4096);
+
+    // Simulate the HNSW search pattern:
+    // candidates = min-heap, results = max-heap
+
+    let mut candidates: ArenaBinaryHeap<std::cmp::Reverse<(OrderedF32, usize)>> =
+        ArenaBinaryHeap::new(&arena, 64);
+    let mut results: ArenaBinaryHeap<(OrderedF32, usize)> = ArenaBinaryHeap::new(&arena, 64);
+    let mut result_buf: ArenaVec<(f32, usize)> = ArenaVec::new(&arena, 64);
+
+    // Insert entry points
+    candidates.push(std::cmp::Reverse((OrderedF32(0.5), 0)));
+    results.push((OrderedF32(0.5), 0));
+
+    candidates.push(std::cmp::Reverse((OrderedF32(0.3), 1)));
+    results.push((OrderedF32(0.3), 1));
+
+    candidates.push(std::cmp::Reverse((OrderedF32(0.7), 2)));
+    results.push((OrderedF32(0.7), 2));
+
+    // Pop closest candidate (min-heap)
+    let closest = candidates.pop().unwrap();
+    assert_eq!(closest.0 .0 .0, 0.3); // OrderedF32(0.3)
+
+    // Peek worst result (max-heap)
+    let worst = results.peek().unwrap();
+    assert_eq!(worst.0 .0, 0.7); // OrderedF32(0.7)
+
+    // Drain results into result_buf
+    for (dist, id) in results.drain() {
+        result_buf.push((dist.0, id));
+    }
+    result_buf.sort_by(|a, b| OrderedF32(a.0).cmp(&OrderedF32(b.0)));
+
+    assert_eq!(result_buf.len(), 3);
+    assert_eq!(result_buf[0].0, 0.3);
+    assert_eq!(result_buf[1].0, 0.5);
+    assert_eq!(result_buf[2].0, 0.7);
+
+    // Reset arena for next query
+    arena.reset();
+    assert_eq!(arena.bytes_used(), 0);
+}
+
+#[test]
+fn test_arena_vec_extend_from_slice() {
+    let arena = SearchArena::new(4096);
+    let mut vec = ArenaVec::new(&arena, 4);
+
+    vec.extend_from_slice(&[1, 2, 3, 4, 5]);
+    assert_eq!(vec.as_slice(), &[1, 2, 3, 4, 5]);
+}
+
+#[test]
+fn test_arena_vec_zero_capacity() {
+    let arena = SearchArena::new(4096);
+    let mut vec: ArenaVec<i32> = ArenaVec::new(&arena, 0);
+
+    assert!(vec.is_empty());
+    vec.push(42);
+    assert_eq!(vec.len(), 1);
+    assert_eq!(vec[0], 42);
+}
+
+#[test]
+fn test_arena_heap_single_element() {
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 4);
+
+    heap.push(42);
+    assert_eq!(heap.peek(), Some(&42));
+    assert_eq!(heap.pop(), Some(42));
+    assert!(heap.is_empty());
+}
+
+#[test]
+fn test_arena_heap_duplicate_values() {
+    let arena = SearchArena::new(4096);
+    let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+    heap.push(5);
+    heap.push(5);
+    heap.push(5);
+
+    assert_eq!(heap.pop(), Some(5));
+    assert_eq!(heap.pop(), Some(5));
+    assert_eq!(heap.pop(), Some(5));
+    assert_eq!(heap.pop(), None);
+}
+
+#[test]
+fn test_multiple_reset_cycles_with_collections() {
+    let arena = SearchArena::new(4096);
+
+    for cycle in 0..50 {
+        let mut vec = ArenaVec::new(&arena, 8);
+        let mut heap = ArenaBinaryHeap::new(&arena, 8);
+
+        for i in 0..20 {
+            vec.push(cycle * 100 + i);
+            heap.push(cycle * 100 + i);
+        }
+
+        assert_eq!(vec.len(), 20);
+        assert_eq!(heap.len(), 20);
+
+        // Verify max element
+        assert_eq!(heap.peek(), Some(&(cycle * 100 + 19)));
+
+        arena.reset();
+    }
+}
+
+#[test]
+fn test_arena_default_capacity() {
+    let arena = SearchArena::with_default_capacity();
+    assert!(arena.capacity() >= super::arena::DEFAULT_ARENA_SIZE);
+}
diff --git a/crates/khive-hnsw/src/checkpoint/integration_tests.rs b/crates/khive-hnsw/src/checkpoint/integration_tests.rs
new file mode 100644
index 00000000..666382aa
--- /dev/null
+++ b/crates/khive-hnsw/src/checkpoint/integration_tests.rs
@@ -0,0 +1,181 @@
+use super::*;
+use khive_fold::{Checkpoint, CheckpointStore, FoldContext, InMemoryCheckpointStore};
+use khive_types::Hash32;
+use uuid::Uuid;
+
+fn test_hash() -> Hash32 {
+    Hash32::from_bytes(*blake3::hash(b"hnsw checkpoint test").as_bytes())
+}
+
+fn make_id(seed: u8) -> NodeId {
+    NodeId::new([seed; 16])
+}
+
+fn sample_snapshot() -> HnswSnapshot {
+    HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 1,
+        live_nodes: 1,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(make_id(1)),
+        config: HnswCheckpointConfig {
+            m: 16,
+            ef_construction: 200,
+            metric: "cosine".to_string(),
+        },
+        indexed_ids: vec![make_id(1)],
+        tombstoned_ids: vec![],
+        layers: vec![vec![(make_id(1), vec![])]],
+        vectors: vec![],
+    }
+}
+
+fn sample_snapshot_with_tombstones() -> HnswSnapshot {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 2,
+        live_nodes: 1,
+        tombstone_count: 1,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: HnswCheckpointConfig {
+            m: 16,
+            ef_construction: 200,
+            metric: "cosine".to_string(),
+        },
+        indexed_ids: vec![id1, id2],
+        tombstoned_ids: vec![id2],
+        layers: vec![vec![(id1, vec![id2]), (id2, vec![id1])]],
+        vectors: vec![],
+    }
+}
+
+#[test]
+fn create_hnsw_checkpoint() {
+    let snap = sample_snapshot();
+    let checkpoint: HnswCheckpoint = Checkpoint::new(
+        "hnsw_test:ckpt-1",
+        snap,
+        Uuid::new_v4(),
+        test_hash(),
+        100,
+        FoldContext::new(),
+        1,
+    );
+
+    assert_eq!(checkpoint.state.total_nodes, 1);
+    assert_eq!(checkpoint.state.live_nodes, 1);
+    assert_eq!(checkpoint.entries_processed, 100);
+    assert_eq!(checkpoint.fold_version, 1);
+}
+
+#[test]
+fn create_hnsw_checkpoint_with_tombstones() {
+    let snap = sample_snapshot_with_tombstones();
+    let checkpoint: HnswCheckpoint = Checkpoint::new(
+        "hnsw_test:ckpt-1",
+        snap,
+        Uuid::new_v4(),
+        test_hash(),
+        100,
+        FoldContext::new(),
+        1,
+    );
+
+    assert_eq!(checkpoint.state.total_nodes, 2);
+    assert_eq!(checkpoint.state.live_nodes, 1);
+    assert_eq!(checkpoint.state.tombstone_count, 1);
+    assert_eq!(checkpoint.state.tombstoned_ids.len(), 1);
+}
+
+#[test]
+fn store_and_load_hnsw_checkpoint() {
+    let store: HnswCheckpointStore = InMemoryCheckpointStore::new();
+    let snap = sample_snapshot();
+
+    let checkpoint: HnswCheckpoint = Checkpoint::new(
+        "hnsw_idx:ckpt-1",
+        snap,
+        Uuid::new_v4(),
+        test_hash(),
+        50,
+        FoldContext::new(),
+        1,
+    );
+
+    store.save(&checkpoint).expect("save");
+
+    let loaded = store
+        .load("hnsw_idx:ckpt-1")
+        .expect("load")
+        .expect("should exist");
+
+    assert_eq!(loaded.state.total_nodes, 1);
+    assert_eq!(loaded.state.live_nodes, 1);
+    assert_eq!(loaded.state.config.m, 16);
+    assert_eq!(loaded.state.config.metric, "cosine");
+    assert_eq!(loaded.entries_processed, 50);
+}
+
+#[test]
+fn store_and_load_checkpoint_with_tombstones() {
+    let store: HnswCheckpointStore = InMemoryCheckpointStore::new();
+    let snap = sample_snapshot_with_tombstones();
+
+    let checkpoint: HnswCheckpoint = Checkpoint::new(
+        "hnsw_idx:ckpt-tomb",
+        snap,
+        Uuid::new_v4(),
+        test_hash(),
+        50,
+        FoldContext::new(),
+        1,
+    );
+
+    store.save(&checkpoint).expect("save");
+
+    let loaded = store
+        .load("hnsw_idx:ckpt-tomb")
+        .expect("load")
+        .expect("should exist");
+
+    assert_eq!(loaded.state.total_nodes, 2);
+    assert_eq!(loaded.state.live_nodes, 1);
+    assert_eq!(loaded.state.tombstone_count, 1);
+    assert!(loaded.state.verify().is_ok());
+}
+
+#[test]
+fn load_latest_hnsw_checkpoint() {
+    let store: HnswCheckpointStore = InMemoryCheckpointStore::new();
+
+    for i in 0..3 {
+        let mut snap = sample_snapshot();
+        snap.total_nodes = (i + 1) * 100;
+        snap.live_nodes = (i + 1) * 100;
+
+        let checkpoint: HnswCheckpoint = Checkpoint::new(
+            format!("hnsw_idx:ckpt-{i}"),
+            snap,
+            Uuid::new_v4(),
+            test_hash(),
+            (i + 1) * 10,
+            FoldContext::new(),
+            1,
+        );
+        store.save(&checkpoint).expect("save");
+        std::thread::sleep(std::time::Duration::from_millis(10));
+    }
+
+    let latest = store
+        .load_latest("hnsw_idx")
+        .expect("load_latest")
+        .expect("should exist");
+
+    assert_eq!(latest.state.total_nodes, 300);
+    assert_eq!(latest.state.live_nodes, 300);
+    assert_eq!(latest.entries_processed, 30);
+}
diff --git a/crates/khive-hnsw/src/checkpoint/mod.rs b/crates/khive-hnsw/src/checkpoint/mod.rs
new file mode 100644
index 00000000..1eb88c50
--- /dev/null
+++ b/crates/khive-hnsw/src/checkpoint/mod.rs
@@ -0,0 +1,423 @@
+//! HNSW index checkpointing using khive-fold checkpoint system.
+//!
+//! Provides periodic snapshots of the HNSW index for crash recovery
+//! and incremental rebuilds.
+//!
+//! # Architecture
+//!
+//! The snapshot types ([`HnswSnapshot`], [`HnswCheckpointConfig`]) are always
+//! available and carry no extra dependencies. They are plain serializable data.
+//!
+//! When the `checkpoint` feature is enabled, this module also provides type
+//! aliases that integrate with `khive-fold`'s [`Checkpoint`] and
+//! [`InMemoryCheckpointStore`] for a complete checkpoint lifecycle.
+//!
+//! ```text
+//! HnswIndex ──snapshot──> HnswSnapshot ──wrap──> Checkpoint<HnswSnapshot>
+//!                                                       │
+//!                                         CheckpointStore::save(...)
+//! ```
+//!
+//! # Tombstone Tracking
+//!
+//! Snapshots track both live and tombstoned nodes to ensure accurate restore:
+//! - `total_nodes`: All nodes (live + tombstoned)
+//! - `live_nodes`: Non-tombstoned nodes only
+//! - `tombstone_count`: Number of tombstoned nodes
+//! - `tombstoned_ids`: IDs of tombstoned vectors for restore
+//!
+//! The invariant `total_nodes == live_nodes + tombstone_count` is enforced
+//! via the [`HnswSnapshot::verify`] method.
+//!
+//! # Determinism
+//!
+//! All ID lists (`indexed_ids`, `tombstoned_ids`) and layer node entries are
+//! stored in sorted order by NodeId bytes to ensure deterministic snapshots
+//! across runs. This is critical for:
+//! - Reproducible checkpoint hashes
+//! - Stable index-based encoding (e.g., tombstone bitsets)
+//! - Test reproducibility
+//!
+//! Use [`HnswSnapshot::canonicalize`] to ensure snapshots are in canonical form
+//! before serialization, or [`HnswSnapshot::is_canonical`] to verify ordering.
+
+use std::collections::HashSet;
+
+use serde::{Deserialize, Serialize};
+use thiserror::Error;
+
+use super::config::DistanceMetric;
+use crate::NodeId;
+
+/// Errors that can occur during snapshot verification.
+#[derive(Error, Debug, Clone, PartialEq, Eq)]
+pub enum SnapshotError {
+    /// Node count fields are inconsistent.
+    #[error(
+        "inconsistent counts: total_nodes ({total}) != live_nodes ({live}) + tombstone_count ({tombstones})"
+    )]
+    InconsistentCounts {
+        /// Total nodes reported.
+        total: usize,
+        /// Live nodes reported.
+        live: usize,
+        /// Tombstones reported.
+        tombstones: usize,
+    },
+
+    /// indexed_ids length doesn't match total_nodes.
+    #[error("indexed_ids count mismatch: expected {expected}, got {actual}")]
+    IdCountMismatch {
+        /// Expected count (total_nodes).
+        expected: usize,
+        /// Actual indexed_ids length.
+        actual: usize,
+    },
+
+    /// tombstoned_ids length doesn't match tombstone_count.
+    #[error("tombstoned_ids count mismatch: expected {expected}, got {actual}")]
+    TombstoneIdCountMismatch {
+        /// Expected count (tombstone_count).
+        expected: usize,
+        /// Actual tombstoned_ids length.
+        actual: usize,
+    },
+
+    /// Tombstoned ID not found in indexed_ids.
+    #[error("tombstoned id {id:?} not found in indexed_ids")]
+    TombstoneNotInIndex {
+        /// The missing tombstone ID.
+        id: NodeId,
+    },
+}
+
+/// Sort NodeIds by their byte representation for deterministic ordering.
+///
+/// This ensures consistent ordering across runs regardless of HashMap iteration order,
+/// which is critical for reproducible checkpoint hashes and stable index-based encodings.
+#[inline]
+pub(crate) fn sort_ids(ids: &mut [NodeId]) {
+    ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
+}
+
+/// Helper for serde skip_serializing_if on legacy vector_count field.
+fn is_zero(val: &usize) -> bool {
+    *val == 0
+}
+
+/// Serializable snapshot of HNSW index state.
+///
+/// Captures enough information to reconstruct the index without
+/// re-indexing all vectors from scratch.
+///
+/// # Backward Compatibility
+///
+/// This struct maintains backward compatibility with v1 snapshots that only
+/// had `vector_count`. When deserializing old snapshots:
+/// - `vector_count` is read and used to populate `total_nodes`/`live_nodes`
+/// - New tombstone fields default to empty/zero
+/// - Missing `vectors` field defaults to empty (old snapshots require external vector supply)
+///
+/// Call [`HnswSnapshot::normalize`] after deserialization to ensure consistent state.
+///
+/// # Warm Start
+///
+/// When `vectors` is non-empty the snapshot is self-contained: call
+/// [`HnswIndex::restore_from_snapshot_embedded`] to restore without supplying
+/// an external vector map.  Snapshots produced by [`HnswIndex::snapshot`]
+/// always include the full f32 vector data.
+///
+/// The estimated size overhead is `dimensions × 4 bytes × node_count`.
+/// For 384-dim embeddings with 10 K nodes this is ~15 MB — well within
+/// typical checkpoint budgets.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HnswSnapshot {
+    /// Legacy field for backward compatibility with v1 snapshots.
+    /// New code should use `total_nodes` and `live_nodes` instead.
+    #[serde(default, skip_serializing_if = "is_zero")]
+    pub vector_count: usize,
+
+    /// Total number of nodes (including tombstones).
+    #[serde(default)]
+    pub total_nodes: usize,
+
+    /// Number of live (non-tombstoned) nodes.
+    #[serde(default)]
+    pub live_nodes: usize,
+
+    /// Number of tombstoned nodes.
+    #[serde(default)]
+    pub tombstone_count: usize,
+
+    /// Maximum layer in the graph.
+    pub max_layer: usize,
+
+    /// Entry point node ID (if any).
+    pub entry_point: Option<NodeId>,
+
+    /// Index configuration at checkpoint time.
+    pub config: HnswCheckpointConfig,
+
+    /// IDs of all indexed vectors (for verification on restore).
+    /// Sorted by byte representation for deterministic ordering.
+    pub indexed_ids: Vec<NodeId>,
+
+    /// IDs of tombstoned vectors.
+    /// Sorted by byte representation for deterministic ordering.
+    #[serde(default)]
+    pub tombstoned_ids: Vec<NodeId>,
+
+    /// Graph edges per layer: `layer -> [(node_id, [neighbor_ids])]`.
+    /// Node entries within each layer are sorted by NodeId bytes.
+    pub layers: Vec<Vec<(NodeId, Vec<NodeId>)>>,
+
+    /// Embedded f32 vector data for self-contained warm-start snapshots.
+    ///
+    /// Maps each `NodeId` to its raw embedding vector.  When non-empty, the
+    /// snapshot is self-contained and can be restored via
+    /// [`HnswIndex::restore_from_snapshot_embedded`] without supplying a
+    /// separate vector map.
+    ///
+    /// Defaults to empty for backward compatibility with snapshots that
+    /// pre-date this field (those require an external vector map).
+    #[serde(default, skip_serializing_if = "Vec::is_empty")]
+    pub vectors: Vec<(NodeId, Vec<f32>)>,
+}
+
+/// Subset of [`super::HnswConfig`] relevant for checkpoint compatibility.
+///
+/// Stored as simple values (e.g. `metric` as `String`) so that checkpoints
+/// remain deserializable even if the enum representation changes.
+#[derive(Debug, Clone, Serialize, Deserialize, PartialEq)]
+pub struct HnswCheckpointConfig {
+    /// Maximum connections per node per layer (M).
+    pub m: usize,
+    /// Size of dynamic candidate list during construction.
+    pub ef_construction: usize,
+    /// Distance metric name (e.g. `"cosine"`, `"dot"`, `"euclidean"`).
+    pub metric: String,
+}
+
+impl HnswCheckpointConfig {
+    /// Create a checkpoint config from the full [`super::HnswConfig`].
+    pub fn from_hnsw_config(config: &super::config::HnswConfig) -> Self {
+        Self {
+            m: config.m,
+            ef_construction: config.ef_construction,
+            metric: metric_to_string(&config.metric),
+        }
+    }
+}
+
+impl HnswSnapshot {
+    /// Check if this snapshot is compatible with the given config.
+    ///
+    /// Two configs are compatible when `m`, `ef_construction`, and `metric`
+    /// all match. Loading a snapshot into an index with incompatible
+    /// parameters would produce incorrect search results.
+    pub fn is_compatible(&self, config: &HnswCheckpointConfig) -> bool {
+        self.config == *config
+    }
+
+    /// Get the number of live (non-tombstoned) vectors in this snapshot.
+    ///
+    /// For backward compatibility, this returns `live_nodes` which represents
+    /// the same semantic meaning as the legacy `vector_count` (all vectors
+    /// were "live" before tombstone support).
+    pub fn len(&self) -> usize {
+        self.live_nodes
+    }
+
+    /// Get the total number of nodes (including tombstones).
+    pub fn total_len(&self) -> usize {
+        self.total_nodes
+    }
+
+    /// Get the number of tombstoned nodes.
+    pub fn tombstone_count(&self) -> usize {
+        self.tombstone_count
+    }
+
+    /// Returns `true` if the snapshot contains no live vectors.
+    pub fn is_empty(&self) -> bool {
+        self.live_nodes == 0
+    }
+
+    /// Normalize the snapshot after deserialization.
+    ///
+    /// This handles backward compatibility with v1 snapshots that only
+    /// had `vector_count`. If `total_nodes` is 0 but `vector_count` > 0
+    /// or `indexed_ids` is non-empty, the counts are populated from
+    /// available data.
+    ///
+    /// Call this after deserializing a snapshot of unknown version.
+    pub fn normalize(&mut self) {
+        // Handle v1 -> v2 migration
+        if self.total_nodes == 0 {
+            if self.vector_count > 0 {
+                // V1 snapshot with vector_count
+                self.total_nodes = self.vector_count;
+                self.live_nodes = self.vector_count;
+                self.tombstone_count = 0;
+            } else if !self.indexed_ids.is_empty() {
+                // Fallback: infer from indexed_ids
+                self.total_nodes = self.indexed_ids.len();
+                self.live_nodes = self.indexed_ids.len() - self.tombstoned_ids.len();
+                self.tombstone_count = self.tombstoned_ids.len();
+            }
+        }
+
+        // Ensure tombstone_count matches tombstoned_ids
+        if self.tombstone_count == 0 && !self.tombstoned_ids.is_empty() {
+            self.tombstone_count = self.tombstoned_ids.len();
+        }
+    }
+
+    /// Verify internal consistency of the snapshot.
+    ///
+    /// Checks:
+    /// 1. `total_nodes == live_nodes + tombstone_count`
+    /// 2. `indexed_ids.len() == total_nodes`
+    /// 3. `tombstoned_ids.len() == tombstone_count`
+    /// 4. All tombstoned IDs exist in indexed_ids
+    ///
+    /// Returns `Ok(())` if all invariants hold, otherwise returns the
+    /// first error encountered.
+    pub fn verify(&self) -> Result<(), SnapshotError> {
+        // Check count consistency
+        if self.total_nodes != self.live_nodes + self.tombstone_count {
+            return Err(SnapshotError::InconsistentCounts {
+                total: self.total_nodes,
+                live: self.live_nodes,
+                tombstones: self.tombstone_count,
+            });
+        }
+
+        // Check indexed_ids matches total_nodes
+        if self.indexed_ids.len() != self.total_nodes {
+            return Err(SnapshotError::IdCountMismatch {
+                expected: self.total_nodes,
+                actual: self.indexed_ids.len(),
+            });
+        }
+
+        // Check tombstoned_ids matches tombstone_count
+        if self.tombstoned_ids.len() != self.tombstone_count {
+            return Err(SnapshotError::TombstoneIdCountMismatch {
+                expected: self.tombstone_count,
+                actual: self.tombstoned_ids.len(),
+            });
+        }
+
+        // Check all tombstoned IDs are in indexed_ids
+        if !self.tombstoned_ids.is_empty() {
+            let indexed_set: HashSet<_> = self.indexed_ids.iter().collect();
+            for id in &self.tombstoned_ids {
+                if !indexed_set.contains(id) {
+                    return Err(SnapshotError::TombstoneNotInIndex { id: *id });
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Check if indexed_ids, tombstoned_ids, and layers are in canonical sorted order.
+    ///
+    /// Canonical order means all ID lists are sorted by their byte representation.
+    /// This ensures deterministic serialization and stable index-based encodings.
+    pub fn is_canonical(&self) -> bool {
+        // Check indexed_ids are sorted
+        let ids_sorted = self
+            .indexed_ids
+            .windows(2)
+            .all(|w| w[0].as_bytes() <= w[1].as_bytes());
+
+        if !ids_sorted {
+            return false;
+        }
+
+        // Check tombstoned_ids are sorted
+        let tombstones_sorted = self
+            .tombstoned_ids
+            .windows(2)
+            .all(|w| w[0].as_bytes() <= w[1].as_bytes());
+
+        if !tombstones_sorted {
+            return false;
+        }
+
+        // Check each layer's nodes are sorted by ID
+        for layer in &self.layers {
+            let layer_sorted = layer
+                .windows(2)
+                .all(|w| w[0].0.as_bytes() <= w[1].0.as_bytes());
+            if !layer_sorted {
+                return false;
+            }
+        }
+
+        true
+    }
+
+    /// Ensure canonical ordering (idempotent).
+    ///
+    /// Sorts `indexed_ids`, `tombstoned_ids`, and layer node entries by their
+    /// byte representation. This should be called before serializing snapshots
+    /// to ensure deterministic output.
+    ///
+    /// # Note
+    ///
+    /// Neighbor lists within each node are intentionally not sorted, as their order
+    /// may reflect proximity/priority from the HNSW algorithm. Only the top-level
+    /// node ordering within layers is canonicalized.
+    pub fn canonicalize(&mut self) {
+        // Sort indexed IDs
+        sort_ids(&mut self.indexed_ids);
+
+        // Sort tombstoned IDs
+        sort_ids(&mut self.tombstoned_ids);
+
+        // Sort layer node order (but preserve neighbor list order within each node)
+        for layer in &mut self.layers {
+            layer.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes()));
+        }
+    }
+}
+
+/// Convert a [`DistanceMetric`] to its canonical string representation.
+pub(crate) fn metric_to_string(metric: &DistanceMetric) -> String {
+    match metric {
+        DistanceMetric::Cosine => "cosine".to_string(),
+        DistanceMetric::Dot => "dot".to_string(),
+        DistanceMetric::L2 => "euclidean".to_string(),
+        // Fall back to debug repr for future variants.
+        other => format!("{:?}", other).to_lowercase(),
+    }
+}
+
+// ── Feature-gated fold integration ──────────────────────────────────────
+
+/// Type alias for HNSW checkpoints using the fold checkpoint system.
+///
+/// Wraps an [`HnswSnapshot`] in the generic [`khive_fold::Checkpoint`]
+/// envelope which adds checkpoint ID, timestamp, and fold context.
+#[cfg(feature = "checkpoint")]
+pub type HnswCheckpoint = khive_fold::Checkpoint<HnswSnapshot>;
+
+/// Type alias for an in-memory HNSW checkpoint store.
+///
+/// Suitable for testing and development. Production deployments should
+/// implement [`khive_fold::CheckpointStore`] with durable storage.
+#[cfg(feature = "checkpoint")]
+pub type HnswCheckpointStore = khive_fold::InMemoryCheckpointStore<HnswSnapshot>;
+
+// ── Tests ───────────────────────────────────────────────────────────────
+
+#[cfg(test)]
+#[path = "tests.rs"]
+mod tests;
+
+#[cfg(all(test, feature = "checkpoint"))]
+#[path = "integration_tests.rs"]
+mod checkpoint_integration_tests;
diff --git a/crates/khive-hnsw/src/checkpoint/tests.rs b/crates/khive-hnsw/src/checkpoint/tests.rs
new file mode 100644
index 00000000..470c4cb8
--- /dev/null
+++ b/crates/khive-hnsw/src/checkpoint/tests.rs
@@ -0,0 +1,692 @@
+#![allow(clippy::field_reassign_with_default)]
+
+use super::*;
+
+fn make_id(seed: u8) -> NodeId {
+    NodeId::new([seed; 16])
+}
+
+fn sample_config() -> HnswCheckpointConfig {
+    HnswCheckpointConfig {
+        m: 16,
+        ef_construction: 200,
+        metric: "cosine".to_string(),
+    }
+}
+
+fn sample_snapshot() -> HnswSnapshot {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    HnswSnapshot {
+        vector_count: 0, // Not used in v2
+        total_nodes: 2,
+        live_nodes: 2,
+        tombstone_count: 0,
+        max_layer: 1,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2],
+        tombstoned_ids: vec![],
+        layers: vec![
+            // Layer 0: both nodes connected to each other
+            vec![(id1, vec![id2]), (id2, vec![id1])],
+            // Layer 1: only entry point
+            vec![(id1, vec![])],
+        ],
+        vectors: vec![],
+    }
+}
+
+fn sample_snapshot_with_tombstones() -> HnswSnapshot {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 2,
+        tombstone_count: 1,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2, id3],
+        tombstoned_ids: vec![id2], // id2 is tombstoned
+        layers: vec![vec![
+            (id1, vec![id2, id3]),
+            (id2, vec![id1]),
+            (id3, vec![id1]),
+        ]],
+        vectors: vec![],
+    }
+}
+
+#[test]
+fn snapshot_creation_and_accessors() {
+    let snap = sample_snapshot();
+    assert_eq!(snap.len(), 2);
+    assert_eq!(snap.total_len(), 2);
+    assert_eq!(snap.tombstone_count(), 0);
+    assert!(!snap.is_empty());
+    assert_eq!(snap.max_layer, 1);
+    assert!(snap.entry_point.is_some());
+    assert_eq!(snap.indexed_ids.len(), 2);
+    assert_eq!(snap.layers.len(), 2);
+}
+
+#[test]
+fn snapshot_with_tombstones_accessors() {
+    let snap = sample_snapshot_with_tombstones();
+    assert_eq!(snap.len(), 2); // live nodes
+    assert_eq!(snap.total_len(), 3); // total including tombstones
+    assert_eq!(snap.tombstone_count(), 1);
+    assert!(!snap.is_empty());
+}
+
+#[test]
+fn empty_snapshot() {
+    let snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 0,
+        live_nodes: 0,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: None,
+        config: sample_config(),
+        indexed_ids: vec![],
+        tombstoned_ids: vec![],
+        layers: vec![],
+
+        vectors: vec![],
+    };
+    assert!(snap.is_empty());
+    assert_eq!(snap.len(), 0);
+    assert_eq!(snap.total_len(), 0);
+}
+
+// ── Verification tests ───────────────────────────────────────────────
+
+#[test]
+fn verify_valid_snapshot() {
+    let snap = sample_snapshot();
+    assert!(snap.verify().is_ok());
+}
+
+#[test]
+fn verify_valid_snapshot_with_tombstones() {
+    let snap = sample_snapshot_with_tombstones();
+    assert!(snap.verify().is_ok());
+}
+
+#[test]
+fn verify_inconsistent_counts() {
+    let mut snap = sample_snapshot();
+    snap.tombstone_count = 1; // Inconsistent: 2 != 2 + 1
+    let err = snap.verify().unwrap_err();
+    assert!(matches!(err, SnapshotError::InconsistentCounts { .. }));
+}
+
+#[test]
+fn verify_id_count_mismatch() {
+    let mut snap = sample_snapshot();
+    snap.total_nodes = 5; // Mismatch with indexed_ids.len() == 2
+    snap.live_nodes = 5;
+    let err = snap.verify().unwrap_err();
+    assert!(matches!(err, SnapshotError::IdCountMismatch { .. }));
+}
+
+#[test]
+fn verify_tombstone_id_count_mismatch() {
+    let mut snap = sample_snapshot_with_tombstones();
+    snap.tombstone_count = 2; // Mismatch with tombstoned_ids.len() == 1
+    snap.live_nodes = 1; // Adjust to keep total consistent
+    let err = snap.verify().unwrap_err();
+    assert!(matches!(
+        err,
+        SnapshotError::TombstoneIdCountMismatch { .. }
+    ));
+}
+
+#[test]
+fn verify_tombstone_not_in_index() {
+    let mut snap = sample_snapshot_with_tombstones();
+    snap.tombstoned_ids = vec![make_id(99)]; // ID not in indexed_ids
+    let err = snap.verify().unwrap_err();
+    assert!(matches!(err, SnapshotError::TombstoneNotInIndex { .. }));
+}
+
+// ── Normalization tests ──────────────────────────────────────────────
+
+#[test]
+fn normalize_v1_snapshot() {
+    // Simulate a v1 snapshot with only vector_count
+    let mut snap = HnswSnapshot {
+        vector_count: 5, // V1 field
+        total_nodes: 0,  // Will be populated by normalize
+        live_nodes: 0,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: None,
+        config: sample_config(),
+        indexed_ids: vec![make_id(1), make_id(2), make_id(3), make_id(4), make_id(5)],
+        tombstoned_ids: vec![],
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    snap.normalize();
+
+    assert_eq!(snap.total_nodes, 5);
+    assert_eq!(snap.live_nodes, 5);
+    assert_eq!(snap.tombstone_count, 0);
+}
+
+#[test]
+fn normalize_infers_from_indexed_ids() {
+    let mut snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 0,
+        live_nodes: 0,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: None,
+        config: sample_config(),
+        indexed_ids: vec![make_id(1), make_id(2), make_id(3)],
+        tombstoned_ids: vec![make_id(2)],
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    snap.normalize();
+
+    assert_eq!(snap.total_nodes, 3);
+    assert_eq!(snap.live_nodes, 2);
+    assert_eq!(snap.tombstone_count, 1);
+}
+
+// ── Serialization tests ──────────────────────────────────────────────
+
+#[test]
+fn serialization_round_trip() {
+    let snap = sample_snapshot();
+    let json = serde_json::to_string(&snap).expect("serialize");
+    let mut restored: HnswSnapshot = serde_json::from_str(&json).expect("deserialize");
+    restored.normalize();
+
+    assert_eq!(restored.total_nodes, snap.total_nodes);
+    assert_eq!(restored.live_nodes, snap.live_nodes);
+    assert_eq!(restored.tombstone_count, snap.tombstone_count);
+    assert_eq!(restored.max_layer, snap.max_layer);
+    assert_eq!(restored.entry_point, snap.entry_point);
+    assert_eq!(restored.config, snap.config);
+    assert_eq!(restored.indexed_ids, snap.indexed_ids);
+    assert_eq!(restored.tombstoned_ids, snap.tombstoned_ids);
+    assert_eq!(restored.layers.len(), snap.layers.len());
+}
+
+#[test]
+fn serialization_round_trip_with_tombstones() {
+    let snap = sample_snapshot_with_tombstones();
+    let json = serde_json::to_string(&snap).expect("serialize");
+    let restored: HnswSnapshot = serde_json::from_str(&json).expect("deserialize");
+
+    assert_eq!(restored.total_nodes, snap.total_nodes);
+    assert_eq!(restored.live_nodes, snap.live_nodes);
+    assert_eq!(restored.tombstone_count, snap.tombstone_count);
+    assert_eq!(restored.tombstoned_ids, snap.tombstoned_ids);
+    assert!(restored.verify().is_ok());
+}
+
+#[test]
+fn backward_compat_v1_deserialization() {
+    // JSON from a v1 snapshot (only has vector_count, not new fields)
+    // EmbeddingId serializes as 32 hex chars (no dashes)
+    let v1_json = r#"{
+        "vector_count": 2,
+        "max_layer": 0,
+        "entry_point": null,
+        "config": {"m": 16, "ef_construction": 200, "metric": "cosine"},
+        "indexed_ids": ["01010101010101010101010101010101", "02020202020202020202020202020202"],
+        "layers": []
+    }"#;
+
+    let mut snap: HnswSnapshot = serde_json::from_str(v1_json).expect("deserialize v1");
+    snap.normalize();
+
+    assert_eq!(snap.total_nodes, 2);
+    assert_eq!(snap.live_nodes, 2);
+    assert_eq!(snap.tombstone_count, 0);
+    assert_eq!(snap.len(), 2);
+}
+
+// ── Compatibility tests ──────────────────────────────────────────────
+
+#[test]
+fn compatibility_matching_config() {
+    let snap = sample_snapshot();
+    assert!(snap.is_compatible(&sample_config()));
+}
+
+#[test]
+fn compatibility_different_m() {
+    let snap = sample_snapshot();
+    let other = HnswCheckpointConfig {
+        m: 32,
+        ..sample_config()
+    };
+    assert!(!snap.is_compatible(&other));
+}
+
+#[test]
+fn compatibility_different_ef() {
+    let snap = sample_snapshot();
+    let other = HnswCheckpointConfig {
+        ef_construction: 400,
+        ..sample_config()
+    };
+    assert!(!snap.is_compatible(&other));
+}
+
+#[test]
+fn compatibility_different_metric() {
+    let snap = sample_snapshot();
+    let other = HnswCheckpointConfig {
+        metric: "euclidean".to_string(),
+        ..sample_config()
+    };
+    assert!(!snap.is_compatible(&other));
+}
+
+#[test]
+fn from_hnsw_config() {
+    use super::super::config::HnswConfig;
+
+    let hnsw = HnswConfig::default();
+    let ckpt = HnswCheckpointConfig::from_hnsw_config(&hnsw);
+    assert_eq!(ckpt.m, 20);
+    assert_eq!(ckpt.ef_construction, 200);
+    assert_eq!(ckpt.metric, "cosine");
+}
+
+#[test]
+fn from_hnsw_config_variants() {
+    use super::super::config::{DistanceMetric, HnswConfig};
+
+    let mut config = HnswConfig::default();
+
+    config.metric = DistanceMetric::Dot;
+    assert_eq!(
+        HnswCheckpointConfig::from_hnsw_config(&config).metric,
+        "dot"
+    );
+
+    config.metric = DistanceMetric::L2;
+    assert_eq!(
+        HnswCheckpointConfig::from_hnsw_config(&config).metric,
+        "euclidean"
+    );
+}
+
+#[test]
+fn metric_to_string_exhaustive() {
+    assert_eq!(metric_to_string(&DistanceMetric::Cosine), "cosine");
+    assert_eq!(metric_to_string(&DistanceMetric::Dot), "dot");
+    assert_eq!(metric_to_string(&DistanceMetric::L2), "euclidean");
+}
+
+// ── Error display tests ──────────────────────────────────────────────
+
+#[test]
+fn snapshot_error_display() {
+    let err = SnapshotError::InconsistentCounts {
+        total: 5,
+        live: 3,
+        tombstones: 1,
+    };
+    assert!(err.to_string().contains("inconsistent counts"));
+
+    let err = SnapshotError::IdCountMismatch {
+        expected: 5,
+        actual: 3,
+    };
+    assert!(err.to_string().contains("indexed_ids count mismatch"));
+
+    let err = SnapshotError::TombstoneIdCountMismatch {
+        expected: 2,
+        actual: 1,
+    };
+    assert!(err.to_string().contains("tombstoned_ids count mismatch"));
+
+    let err = SnapshotError::TombstoneNotInIndex { id: make_id(1) };
+    assert!(err.to_string().contains("not found in indexed_ids"));
+}
+
+// ── Canonical ordering tests ──────────────────────────────────────────
+
+#[test]
+fn sort_ids_orders_by_bytes() {
+    let mut ids = vec![make_id(5), make_id(2), make_id(9), make_id(1)];
+    sort_ids(&mut ids);
+
+    assert_eq!(ids[0], make_id(1));
+    assert_eq!(ids[1], make_id(2));
+    assert_eq!(ids[2], make_id(5));
+    assert_eq!(ids[3], make_id(9));
+}
+
+#[test]
+fn sort_ids_empty_is_noop() {
+    let mut ids: Vec<NodeId> = vec![];
+    sort_ids(&mut ids);
+    assert!(ids.is_empty());
+}
+
+#[test]
+fn sort_ids_single_element() {
+    let mut ids = vec![make_id(42)];
+    sort_ids(&mut ids);
+    assert_eq!(ids.len(), 1);
+    assert_eq!(ids[0], make_id(42));
+}
+
+#[test]
+fn is_canonical_sorted_snapshot() {
+    // sample_snapshot() has ids in sorted order (1, 2) and layers also sorted
+    let snap = sample_snapshot();
+    assert!(snap.is_canonical());
+}
+
+#[test]
+fn is_canonical_empty_snapshot() {
+    let snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 0,
+        live_nodes: 0,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: None,
+        config: sample_config(),
+        indexed_ids: vec![],
+        tombstoned_ids: vec![],
+        layers: vec![],
+
+        vectors: vec![],
+    };
+    assert!(snap.is_canonical());
+}
+
+#[test]
+fn is_canonical_unsorted_indexed_ids() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 2,
+        live_nodes: 2,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id2, id1], // Reversed order
+        tombstoned_ids: vec![],
+        layers: vec![vec![(id1, vec![id2]), (id2, vec![id1])]],
+
+        vectors: vec![],
+    };
+    assert!(!snap.is_canonical());
+}
+
+#[test]
+fn is_canonical_unsorted_tombstoned_ids() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    let snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 1,
+        tombstone_count: 2,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2, id3], // Sorted
+        tombstoned_ids: vec![id3, id2],   // Reversed order
+        layers: vec![],
+
+        vectors: vec![],
+    };
+    assert!(!snap.is_canonical());
+}
+
+#[test]
+fn is_canonical_unsorted_layer() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 2,
+        live_nodes: 2,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2], // Sorted
+        tombstoned_ids: vec![],
+        layers: vec![vec![(id2, vec![id1]), (id1, vec![id2])]], // Reversed order
+
+        vectors: vec![],
+    };
+    assert!(!snap.is_canonical());
+}
+
+#[test]
+fn canonicalize_sorts_indexed_ids() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    let mut snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 3,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id3, id1, id2], // Unsorted
+        tombstoned_ids: vec![],
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    snap.canonicalize();
+
+    assert_eq!(snap.indexed_ids, vec![id1, id2, id3]);
+    assert!(snap.is_canonical());
+}
+
+#[test]
+fn canonicalize_sorts_tombstoned_ids() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    let mut snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 1,
+        tombstone_count: 2,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2, id3],
+        tombstoned_ids: vec![id3, id2], // Unsorted
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    snap.canonicalize();
+
+    assert_eq!(snap.tombstoned_ids, vec![id2, id3]);
+    assert!(snap.is_canonical());
+}
+
+#[test]
+fn canonicalize_sorts_layers() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    let mut snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 3,
+        tombstone_count: 0,
+        max_layer: 1,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2, id3],
+        tombstoned_ids: vec![],
+        layers: vec![
+            // Layer 0: nodes in wrong order
+            vec![
+                (id3, vec![id1, id2]),
+                (id1, vec![id2, id3]),
+                (id2, vec![id1, id3]),
+            ],
+            // Layer 1: also wrong order
+            vec![(id2, vec![]), (id1, vec![])],
+        ],
+
+        vectors: vec![],
+    };
+
+    snap.canonicalize();
+
+    // Verify layer 0 is sorted by node ID
+    assert_eq!(snap.layers[0][0].0, id1);
+    assert_eq!(snap.layers[0][1].0, id2);
+    assert_eq!(snap.layers[0][2].0, id3);
+
+    // Verify layer 1 is sorted by node ID
+    assert_eq!(snap.layers[1][0].0, id1);
+    assert_eq!(snap.layers[1][1].0, id2);
+
+    assert!(snap.is_canonical());
+}
+
+#[test]
+fn canonicalize_preserves_neighbor_order() {
+    // Neighbor order should NOT be sorted (reflects proximity from HNSW algorithm)
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    let mut snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 3,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id1, id2, id3],
+        tombstoned_ids: vec![],
+        layers: vec![vec![
+            (id1, vec![id3, id2]), // Neighbors intentionally in non-byte-sorted order
+        ]],
+
+        vectors: vec![],
+    };
+
+    snap.canonicalize();
+
+    // Neighbor list should be unchanged
+    assert_eq!(snap.layers[0][0].1, vec![id3, id2]);
+}
+
+#[test]
+fn canonicalize_is_idempotent() {
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+    let mut snap = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 2,
+        tombstone_count: 1,
+        max_layer: 1,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id3, id1, id2], // Unsorted
+        tombstoned_ids: vec![id3],
+        layers: vec![
+            vec![(id3, vec![id1]), (id1, vec![id2]), (id2, vec![id3])],
+            vec![(id2, vec![]), (id1, vec![])],
+        ],
+
+        vectors: vec![],
+    };
+
+    snap.canonicalize();
+    let after_first = snap.clone();
+
+    snap.canonicalize();
+    let after_second = snap.clone();
+
+    // Both passes should produce identical results
+    assert_eq!(after_first.indexed_ids, after_second.indexed_ids);
+    assert_eq!(after_first.tombstoned_ids, after_second.tombstoned_ids);
+    assert_eq!(after_first.layers.len(), after_second.layers.len());
+    for (l1, l2) in after_first.layers.iter().zip(after_second.layers.iter()) {
+        assert_eq!(l1, l2);
+    }
+}
+
+#[test]
+fn canonical_snapshot_serializes_deterministically() {
+    // Create two snapshots with same data but different initial order
+    let id1 = make_id(1);
+    let id2 = make_id(2);
+    let id3 = make_id(3);
+
+    let mut snap1 = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 3,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id3, id1, id2],
+        tombstoned_ids: vec![],
+        layers: vec![vec![(id3, vec![id1]), (id1, vec![id2]), (id2, vec![id3])]],
+
+        vectors: vec![],
+    };
+
+    let mut snap2 = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 3,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: sample_config(),
+        indexed_ids: vec![id2, id3, id1], // Different initial order
+        tombstoned_ids: vec![],
+        layers: vec![vec![(id2, vec![id3]), (id3, vec![id1]), (id1, vec![id2])]],
+
+        vectors: vec![],
+    };
+
+    snap1.canonicalize();
+    snap2.canonicalize();
+
+    let json1 = serde_json::to_string(&snap1).expect("serialize");
+    let json2 = serde_json::to_string(&snap2).expect("serialize");
+
+    assert_eq!(
+        json1, json2,
+        "Canonical snapshots should serialize identically"
+    );
+}
diff --git a/crates/khive-hnsw/src/config.rs b/crates/khive-hnsw/src/config.rs
new file mode 100644
index 00000000..64c16a08
--- /dev/null
+++ b/crates/khive-hnsw/src/config.rs
@@ -0,0 +1,253 @@
+//! HNSW configuration types.
+//!
+//! See ADR-003 for recommended parameter values.
+//!
+//! # RETRIEVAL-05: Embedding Key Validation
+//!
+//! The current implementation uses `EmbeddingId` (from khive-db) as the key type,
+//! which provides type-safe, validated embedding identifiers. The validation occurs
+//! at ID construction time (in khive-db), not in HnswConfig.
+//!
+//! **Design decision**: Validation is NOT duplicated in HnswConfig because:
+//! 1. `EmbeddingId` is already a newtype that enforces validity
+//! 2. Double validation would add overhead without security benefit
+//! 3. The type system already prevents invalid keys at compile time
+//!
+//! If custom key types are needed in the future, add a `K: EmbeddingKey` trait
+//! bound with validation methods.
+
+use serde::{Deserialize, Serialize};
+
+use crate::error::{Result, RetrievalError};
+
+/// Maximum allowed level in the HNSW graph.
+/// Prevents unbounded memory allocation from malformed random values.
+/// For 1 billion vectors with typical ml, expected max level is ~16-18.
+pub const MAX_LEVEL: usize = 64;
+
+/// Default threshold for triggering a rebuild (10% tombstones).
+/// Aligned with ADR-003: Index Management Strategy.
+pub const DEFAULT_REBUILD_THRESHOLD: f64 = 0.10;
+
+// Re-export from canonical location (foundation/types).
+// Canonical variants: Cosine, Dot, L2.
+// Serde aliases on canonical handle backward compat: "euclidean" -> L2, "dot_product" -> Dot.
+pub use khive_types::vector::DistanceMetric;
+
+/// HNSW configuration parameters per ADR-003.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HnswConfig {
+    /// Maximum number of connections per node per layer (M).
+    /// Higher = better recall, more memory, slower build.
+    /// Recommended: 16 (small), 32 (medium), 64 (large datasets).
+    pub m: usize,
+
+    /// Maximum connections for layer 0 (typically 2*M).
+    /// Layer 0 is densest, needs more connections for good recall.
+    pub m_max0: usize,
+
+    /// Size of dynamic candidate list during construction.
+    /// Higher = better graph quality, slower build.
+    /// Recommended: 100-500.
+    pub ef_construction: usize,
+
+    /// Normalization factor for level generation: 1/ln(M).
+    /// Controls how quickly layers thin out.
+    pub ml: f64,
+
+    /// Search ef (dynamic candidate list size during search).
+    /// Higher = better recall, slower search.
+    /// Recommended: 50-200.
+    pub ef_search: usize,
+
+    /// Vector dimensions (must match embedding model).
+    /// Default: 768 (BGE-base).
+    pub dimensions: usize,
+
+    /// Distance metric for similarity computation.
+    pub metric: DistanceMetric,
+
+    /// Threshold for automatic rebuild (tombstone ratio).
+    /// When tombstones exceed this ratio, rebuild() is recommended.
+    pub rebuild_threshold: f64,
+
+    /// Seed for reproducible level generation.
+    /// If None, uses OS entropy (non-deterministic).
+    /// If Some(seed), uses seeded RNG for reproducible index structure.
+    #[serde(default)]
+    pub seed: Option<u64>,
+
+    /// Maximum memory budget in bytes for the index.
+    /// If None, no memory limit is enforced (default).
+    /// If Some(limit), inserts that would exceed the budget are rejected
+    /// with `RetrievalError::BudgetExceeded`. Updates to existing entries
+    /// bypass the budget check.
+    #[serde(default)]
+    pub memory_budget: Option<usize>,
+}
+
+impl Default for HnswConfig {
+    /// Creates default configuration per ADR-003.
+    ///
+    /// M=20, ef_construction=200, ef_search=80, dimensions=384.
+    /// M=20 is optimal for k=10 recall at 384d (empirically measured).
+    /// ef_search=80 sufficient for <100K corpus; 100 was overprovisioned.
+    fn default() -> Self {
+        Self {
+            m: 20,
+            m_max0: 40,
+            ef_construction: 200,
+            ml: 1.0 / (20.0_f64).ln(),
+            ef_search: 80,
+            dimensions: 384,
+            metric: DistanceMetric::Cosine,
+            rebuild_threshold: DEFAULT_REBUILD_THRESHOLD,
+            seed: None,
+            memory_budget: None,
+        }
+    }
+}
+
+impl HnswConfig {
+    /// Validate configuration invariants that must hold for every index.
+    pub fn validate(&self) -> Result<()> {
+        if self.dimensions == 0 {
+            return Err(RetrievalError::Configuration(
+                "dimensions: HNSW dimensions must be greater than zero".to_string(),
+            ));
+        }
+        Ok(())
+    }
+
+    /// Create config with custom dimensions, returning an error for invalid values.
+    pub fn try_with_dimensions(dimensions: usize) -> Result<Self> {
+        let config = Self {
+            dimensions,
+            ..Default::default()
+        };
+        config.validate()?;
+        Ok(config)
+    }
+
+    /// Create config with custom dimensions, keeping ADR-003 defaults.
+    ///
+    /// # Panics
+    /// Panics if `dimensions` is 0.
+    pub fn with_dimensions(dimensions: usize) -> Self {
+        Self::try_with_dimensions(dimensions).expect("HNSW dimensions must be > 0")
+    }
+
+    /// Create config for high recall (slower build, better search).
+    pub fn high_recall() -> Self {
+        Self {
+            m: 32,
+            m_max0: 64,
+            ef_construction: 400,
+            ef_search: 200,
+            ..Default::default()
+        }
+    }
+
+    /// Create config for fast build (faster build, lower recall).
+    pub fn fast_build() -> Self {
+        Self {
+            m: 12,
+            m_max0: 24,
+            ef_construction: 100,
+            ef_search: 50,
+            ..Default::default()
+        }
+    }
+
+    /// Create config optimized for memory efficiency.
+    pub fn low_memory() -> Self {
+        Self {
+            m: 8,
+            m_max0: 16,
+            ef_construction: 80,
+            ef_search: 40,
+            ..Default::default()
+        }
+    }
+
+    /// Set seed for reproducible level generation.
+    ///
+    /// With the same seed and insertion order, the index structure
+    /// will be identical across runs.
+    #[must_use]
+    pub fn with_seed(mut self, seed: u64) -> Self {
+        self.seed = Some(seed);
+        self
+    }
+
+    /// Set memory budget in bytes.
+    ///
+    /// When set, inserts that would cause the estimated memory usage
+    /// to exceed this limit are rejected with `BudgetExceeded`.
+    /// Updates to existing entries bypass the budget check.
+    #[must_use]
+    pub fn with_memory_budget(mut self, budget: usize) -> Self {
+        self.memory_budget = Some(budget);
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_config_defaults() {
+        let config = HnswConfig::default();
+        assert_eq!(config.m, 20);
+        assert_eq!(config.ef_construction, 200);
+        assert_eq!(config.ef_search, 80);
+        assert_eq!(config.dimensions, 384);
+    }
+
+    #[test]
+    fn test_config_variants() {
+        let high = HnswConfig::high_recall();
+        assert_eq!(high.m, 32);
+        assert_eq!(high.ef_construction, 400);
+
+        let fast = HnswConfig::fast_build();
+        assert_eq!(fast.m, 12);
+
+        let low = HnswConfig::low_memory();
+        assert_eq!(low.m, 8);
+    }
+
+    #[test]
+    fn test_with_dimensions() {
+        let config = HnswConfig::with_dimensions(1536);
+        assert_eq!(config.dimensions, 1536);
+        assert_eq!(config.m, 20); // Other defaults preserved
+    }
+
+    #[test]
+    fn test_try_with_dimensions_rejects_zero() {
+        let result = HnswConfig::try_with_dimensions(0);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    #[should_panic(expected = "HNSW dimensions must be > 0")]
+    fn test_with_dimensions_rejects_zero() {
+        HnswConfig::with_dimensions(0);
+    }
+
+    #[test]
+    #[should_panic(expected = "HNSW configuration must be valid")]
+    fn test_index_with_config_rejects_zero_dimensions() {
+        crate::HnswIndex::with_config(HnswConfig {
+            dimensions: 0,
+            ..Default::default()
+        });
+    }
+
+    #[test]
+    fn test_distance_metric_default() {
+        assert_eq!(DistanceMetric::default(), DistanceMetric::Cosine);
+    }
+}
diff --git a/crates/khive-hnsw/src/distance.rs b/crates/khive-hnsw/src/distance.rs
new file mode 100644
index 00000000..9f225736
--- /dev/null
+++ b/crates/khive-hnsw/src/distance.rs
@@ -0,0 +1,368 @@
+//! Distance computation for HNSW.
+//!
+//! # Formal Verification
+//!
+//! This implementation corresponds to the formal proofs in
+//! `proofs/Lion/Retrieval/Distance.lean`. Key theorems:
+//!
+//! ## Metric Axioms (Euclidean)
+//! - `euclidean_nonneg`: d(x,y) ≥ 0
+//! - `euclidean_self`: d(x,x) = 0
+//! - `euclidean_symm`: d(x,y) = d(y,x)
+//! - `euclidean_triangle`: d(x,z) ≤ d(x,y) + d(y,z)
+//!
+//! ## Cosine Properties
+//! - `cosine_range`: -1 ≤ cos(x,y) ≤ 1 for unit vectors
+//! - `cosine_not_metric`: cosine does NOT satisfy triangle inequality
+//!
+//! ## Dot Product
+//! - `dot_eq_inner`: bridges to Mathlib inner product space
+//!
+//! ## Distance-Similarity Conversion
+//! - `distanceToSimilarity`: sim = 1/(1+d) for Euclidean
+//! - `similarity_nonneg`: similarity ≥ 0
+//! - `similarity_bounded`: 0 ≤ sim ≤ 1 for d ≥ 0
+
+use super::config::DistanceMetric;
+
+/// Compute cosine distance from pre-computed dot product and norms.
+///
+/// Clamps the cosine similarity to [-1, 1] before converting to distance,
+/// preventing out-of-range values caused by floating-point rounding when
+/// vectors are not perfectly unit-normalised (RETRIEVAL-M3).
+///
+/// Returns a distance in [0, 2] (0 = identical direction, 2 = opposite).
+/// Falls back to 1.0 (orthogonal) for zero or infinite norms.
+#[inline]
+pub(crate) fn cosine_distance_from_parts(dot: f32, a_norm: f32, b_norm: f32) -> f32 {
+    let denom = a_norm * b_norm;
+    if !denom.is_finite() || denom <= 0.0 {
+        return 1.0;
+    }
+    let cosine = (dot / denom).clamp(-1.0, 1.0);
+    if cosine.is_finite() {
+        1.0 - cosine
+    } else {
+        1.0
+    }
+}
+
+/// Compute distance between two vectors.
+/// Returns distance (lower = more similar) for heap operations.
+///
+/// Uses SIMD-accelerated implementations from khive-embed (ADR-002).
+#[inline]
+pub fn compute_distance(
+    a: &[f32],
+    a_norm: f32,
+    b: &[f32],
+    b_norm: f32,
+    metric: DistanceMetric,
+) -> f32 {
+    if !a_norm.is_finite() || !b_norm.is_finite() {
+        return 1.0;
+    }
+
+    match metric {
+        DistanceMetric::Cosine => {
+            // ADR-002: khive-embed is the SIMD foundation layer
+            //
+            // **PROOF CORRESPONDENCE**: Lion.Retrieval.Cosine.cosine_sim_bounded
+            // Cosine similarity is bounded: -1 <= cos(x,y) <= 1 for unit vectors
+            //
+            // **PROOF CORRESPONDENCE**: Lion.Retrieval.Cosine.cauchy_schwarz
+            // Cauchy-Schwarz inequality: |<x,y>| <= ||x|| * ||y||
+            let dot = lattice_embed::simd::dot_product(a, b);
+            cosine_distance_from_parts(dot, a_norm, b_norm)
+        }
+        DistanceMetric::Dot => {
+            // Negate for min-heap (higher dot = lower distance)
+            // ADR-002: lattice-embed is the SIMD foundation layer
+            -lattice_embed::simd::dot_product(a, b)
+        }
+        DistanceMetric::L2 => {
+            // ADR-002: lattice-embed is the SIMD foundation layer
+            //
+            // **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.euclidean_nonneg
+            // Euclidean distance is non-negative: d(x,y) >= 0
+            //
+            // **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.euclidean_symm
+            // Euclidean distance is symmetric: d(x,y) = d(y,x)
+            //
+            // **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.euclidean_triangle
+            // Triangle inequality: d(x,z) <= d(x,y) + d(y,z)
+            lattice_embed::simd::euclidean_distance(a, b)
+        }
+        _ => {
+            // DistanceMetric is #[non_exhaustive]; fall back to cosine for
+            // any future variants until explicitly supported.
+            let dot = lattice_embed::simd::dot_product(a, b);
+            cosine_distance_from_parts(dot, a_norm, b_norm)
+        }
+    }
+}
+
+/// Ordering distance for HNSW-internal comparisons (graph maintenance, neighbor selection).
+///
+/// Returns a value that is **monotone** with the true distance: smaller ordering distance
+/// ↔ smaller true distance. Callers must NOT interpret the return value as a true distance
+/// (e.g., do not pass it to `distance_to_similarity`). Use `compute_distance` for output.
+///
+/// Optimization: for L2 metric, returns squared Euclidean distance (skips the final `.sqrt()`).
+/// For all other metrics, identical to `compute_distance`.
+#[inline]
+pub(crate) fn compute_ordering_distance(
+    a: &[f32],
+    a_norm: f32,
+    b: &[f32],
+    b_norm: f32,
+    metric: DistanceMetric,
+) -> f32 {
+    match metric {
+        DistanceMetric::L2 => lattice_embed::simd::squared_euclidean_distance(a, b),
+        other => compute_distance(a, a_norm, b, b_norm, other),
+    }
+}
+
+/// Convert distance back to similarity score (higher = more similar).
+///
+/// **PROOF CORRESPONDENCE**: Lion.Retrieval.Distance.similarity_mono
+/// Similarity conversion is monotonically decreasing in distance:
+/// d1 < d2 implies sim(d1) > sim(d2)
+#[inline]
+pub fn distance_to_similarity(dist: f32, metric: DistanceMetric) -> f32 {
+    match metric {
+        DistanceMetric::Cosine => 1.0 - dist,
+        DistanceMetric::Dot => -dist,
+        DistanceMetric::L2 => 1.0 / (1.0 + dist),
+        // Fall back to cosine similarity for future variants.
+        _ => 1.0 - dist,
+    }
+}
+
+/// Ordered wrapper for f32 to enable use in BinaryHeap.
+///
+/// # NaN Handling
+///
+/// NaN values are treated as "infinite distance" (greater than all other values).
+/// This ensures deterministic ordering and fail-safe behavior when encountering
+/// malformed embeddings. Two NaN values compare as equal.
+///
+/// # Formal Properties
+///
+/// - Reflexive: a.cmp(a) = Equal
+/// - Antisymmetric: a.cmp(b) = Less implies b.cmp(a) = Greater
+/// - Transitive: a < b and b < c implies a < c
+/// - Total: for all a, b: a.cmp(b) is defined
+#[derive(Clone, Copy, PartialEq)]
+pub struct OrderedF32(pub f32);
+
+impl Eq for OrderedF32 {}
+
+impl PartialOrd for OrderedF32 {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for OrderedF32 {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        // Handle NaN: treat as greater than all finite values
+        // This ensures fail-safe behavior: NaN results get pushed to the end
+        match (self.0.is_nan(), other.0.is_nan()) {
+            (true, true) => std::cmp::Ordering::Equal,
+            (true, false) => std::cmp::Ordering::Greater,
+            (false, true) => std::cmp::Ordering::Less,
+            (false, false) => {
+                // SAFETY: Both values are confirmed non-NaN by the match guards above.
+                // For non-NaN f32 values (including infinity), partial_cmp is total
+                // and always returns Some. This is a mathematical invariant of IEEE 754.
+                self.0
+                    .partial_cmp(&other.0)
+                    .expect("both values are non-NaN, partial_cmp should succeed")
+            }
+        }
+    }
+}
+
+impl OrderedF32 {
+    /// Check if the wrapped value is NaN.
+    #[inline]
+    #[allow(dead_code)] // TODO(#2640): wire or remove when use case is defined
+    pub fn is_nan(&self) -> bool {
+        self.0.is_nan()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // =========================================================================
+    // RETRIEVAL-M3: Cosine distance clamping tests
+    // =========================================================================
+
+    /// Verify that cosine_distance_from_parts clamps correctly when fp rounding
+    /// produces a dot/norm ratio slightly outside [-1, 1].
+    #[test]
+    fn test_cosine_distance_from_parts_clamping() {
+        // Normal case: identical direction => distance 0
+        assert!((cosine_distance_from_parts(1.0, 1.0, 1.0) - 0.0).abs() < 1e-6);
+
+        // Normal case: opposite direction => distance 2
+        assert!((cosine_distance_from_parts(-1.0, 1.0, 1.0) - 2.0).abs() < 1e-6);
+
+        // Rounding artifact: dot slightly > denom (should clamp cosine to 1.0 => dist 0)
+        let dist = cosine_distance_from_parts(1.0000002, 1.0, 1.0);
+        assert!(
+            dist >= 0.0,
+            "distance must be non-negative after clamping, got {dist}"
+        );
+        assert!(
+            dist <= 2.0,
+            "distance must be <= 2 after clamping, got {dist}"
+        );
+
+        // Zero norms => fallback 1.0
+        assert_eq!(cosine_distance_from_parts(0.0, 0.0, 1.0), 1.0);
+        assert_eq!(cosine_distance_from_parts(0.0, 1.0, 0.0), 1.0);
+
+        // Infinite denom => fallback 1.0
+        assert_eq!(
+            cosine_distance_from_parts(f32::NAN, f32::INFINITY, 1.0),
+            1.0
+        );
+    }
+
+    #[test]
+    fn test_cosine_distance() {
+        let a = vec![1.0, 0.0];
+        let b = vec![1.0, 0.0];
+        let a_norm = 1.0;
+        let b_norm = 1.0;
+
+        let dist = compute_distance(&a, a_norm, &b, b_norm, DistanceMetric::Cosine);
+        assert!(dist.abs() < 0.001); // Same vector = 0 distance
+
+        let c = vec![0.0, 1.0];
+        let dist = compute_distance(&a, a_norm, &c, 1.0, DistanceMetric::Cosine);
+        assert!((dist - 1.0).abs() < 0.001); // Orthogonal = 1 distance
+    }
+
+    #[test]
+    fn test_euclidean_distance() {
+        let a = vec![0.0, 0.0];
+        let b = vec![3.0, 4.0];
+
+        let dist = compute_distance(&a, 0.0, &b, 5.0, DistanceMetric::L2);
+        assert!((dist - 5.0).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_dot_product_distance() {
+        let a = vec![1.0, 2.0];
+        let b = vec![2.0, 3.0];
+
+        let dist = compute_distance(&a, 0.0, &b, 0.0, DistanceMetric::Dot);
+        // dot = 1*2 + 2*3 = 8, distance = -8
+        assert!((dist - (-8.0)).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_distance_to_similarity() {
+        // Cosine: similarity = 1 - distance
+        assert!((distance_to_similarity(0.2, DistanceMetric::Cosine) - 0.8).abs() < 0.001);
+
+        // Dot: similarity = -distance
+        assert!((distance_to_similarity(-5.0, DistanceMetric::Dot) - 5.0).abs() < 0.001);
+
+        // Euclidean: similarity = 1/(1+distance)
+        assert!((distance_to_similarity(1.0, DistanceMetric::L2) - 0.5).abs() < 0.001);
+    }
+
+    #[test]
+    fn test_ordered_f32() {
+        let a = OrderedF32(1.0);
+        let b = OrderedF32(2.0);
+        assert!(a < b);
+        assert_eq!(a.cmp(&a), std::cmp::Ordering::Equal);
+    }
+
+    // =========================================================================
+    // RETRIEVAL-02: NaN Handling Tests
+    // =========================================================================
+
+    #[test]
+    fn test_ordered_f32_nan_handling() {
+        let nan = OrderedF32(f32::NAN);
+        let finite = OrderedF32(1.0);
+        let infinity = OrderedF32(f32::INFINITY);
+        let neg_infinity = OrderedF32(f32::NEG_INFINITY);
+
+        // NaN is greater than all finite values
+        assert!(nan > finite);
+        assert!(finite < nan);
+
+        // NaN is greater than infinity
+        assert!(nan > infinity);
+        assert!(infinity < nan);
+
+        // NaN is greater than negative infinity
+        assert!(nan > neg_infinity);
+
+        // Two NaNs are equal
+        let nan2 = OrderedF32(f32::NAN);
+        assert_eq!(nan.cmp(&nan2), std::cmp::Ordering::Equal);
+    }
+
+    #[test]
+    fn test_ordered_f32_sorting_with_nan() {
+        // When sorting distances for nearest neighbor search, NaN should end up last
+        let mut distances = [
+            OrderedF32(0.5),
+            OrderedF32(f32::NAN),
+            OrderedF32(0.1),
+            OrderedF32(0.9),
+            OrderedF32(f32::NAN),
+            OrderedF32(0.3),
+        ];
+
+        // Sort ascending (for min-heap behavior)
+        distances.sort();
+
+        // Non-NaN values should be at the front, sorted
+        assert_eq!(distances[0].0, 0.1);
+        assert_eq!(distances[1].0, 0.3);
+        assert_eq!(distances[2].0, 0.5);
+        assert_eq!(distances[3].0, 0.9);
+        // NaN values should be at the end
+        assert!(distances[4].is_nan());
+        assert!(distances[5].is_nan());
+    }
+
+    #[test]
+    fn test_ordered_f32_deterministic_ordering() {
+        // Same values should always produce same ordering
+        for _ in 0..10 {
+            let values = vec![OrderedF32(0.5), OrderedF32(0.5), OrderedF32(0.1)];
+
+            let mut sorted = values.clone();
+            sorted.sort();
+
+            assert_eq!(sorted[0].0, 0.1);
+            assert_eq!(sorted[1].0, 0.5);
+            assert_eq!(sorted[2].0, 0.5);
+        }
+    }
+
+    #[test]
+    fn test_ordered_f32_infinity() {
+        let a = OrderedF32(f32::INFINITY);
+        let b = OrderedF32(f32::NEG_INFINITY);
+        let c = OrderedF32(0.0);
+
+        assert!(a > c);
+        assert!(b < c);
+        assert!(a > b);
+    }
+}
diff --git a/crates/khive-hnsw/src/error.rs b/crates/khive-hnsw/src/error.rs
new file mode 100644
index 00000000..a4de60a1
--- /dev/null
+++ b/crates/khive-hnsw/src/error.rs
@@ -0,0 +1,85 @@
+//! Error types for the HNSW crate.
+
+use thiserror::Error;
+
+/// Category of error — used to decide retry policy.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ErrorKind {
+    /// Transient — may succeed on retry.
+    Transient,
+    /// Permanent — retrying will not help.
+    Permanent,
+}
+
+/// Errors that can occur during HNSW operations.
+#[derive(Error, Debug)]
+pub enum RetrievalError {
+    /// Vector dimension mismatch.
+    #[error("dimension mismatch: expected {expected}, got {actual}")]
+    DimensionMismatch {
+        /// Expected dimensionality.
+        expected: usize,
+        /// Actual dimensionality provided.
+        actual: usize,
+    },
+
+    /// Memory budget exceeded on insert.
+    #[error(
+        "memory budget exceeded: current={current_usage}, item={item_size}, limit={limit}"
+    )]
+    BudgetExceeded {
+        /// Current memory usage in bytes.
+        current_usage: usize,
+        /// Estimated cost of the new item in bytes.
+        item_size: usize,
+        /// Configured budget in bytes.
+        limit: usize,
+    },
+
+    /// HNSW index operation error.
+    #[error("hnsw error: {0}")]
+    Hnsw(String),
+
+    /// Configuration error.
+    #[error("configuration error: {0}")]
+    Configuration(String),
+
+    /// Query timed out.
+    #[error("query timed out after {elapsed_ms}ms")]
+    QueryTimeout {
+        /// How long the query ran before timing out.
+        elapsed_ms: u64,
+    },
+}
+
+impl RetrievalError {
+    /// Create an HNSW error from any displayable value.
+    pub fn hnsw(msg: impl std::fmt::Display) -> Self {
+        Self::Hnsw(msg.to_string())
+    }
+
+    /// Create a `BudgetExceeded` error.
+    pub fn budget_exceeded(current_usage: usize, item_size: usize, limit: usize) -> Self {
+        Self::BudgetExceeded {
+            current_usage,
+            item_size,
+            limit,
+        }
+    }
+
+    /// Return the error kind (Transient vs Permanent).
+    pub fn kind(&self) -> ErrorKind {
+        match self {
+            Self::QueryTimeout { .. } => ErrorKind::Transient,
+            _ => ErrorKind::Permanent,
+        }
+    }
+
+    /// Returns true if retrying the operation might succeed.
+    pub fn is_retryable(&self) -> bool {
+        self.kind() == ErrorKind::Transient
+    }
+}
+
+/// Convenience `Result` alias for HNSW operations.
+pub type Result<T> = std::result::Result<T, RetrievalError>;
diff --git a/crates/khive-hnsw/src/index/build_batch.rs b/crates/khive-hnsw/src/index/build_batch.rs
new file mode 100644
index 00000000..22097ca0
--- /dev/null
+++ b/crates/khive-hnsw/src/index/build_batch.rs
@@ -0,0 +1,240 @@
+//! Batch build for HNSW index.
+//!
+//! Builds an HNSW index from a batch of vectors using a two-phase approach:
+//!
+//! 1. **Seed phase** (sequential): Insert sqrt(N) nodes normally to establish
+//!    the upper-layer graph structure and entry point.
+//!
+//! 2. **Search phase**: For remaining nodes, find neighbors against the frozen
+//!    seed graph, then merge results sequentially.
+
+use super::HnswIndex;
+use crate::error::{Result, RetrievalError};
+use crate::node::HnswNode;
+use crate::NodeId;
+use rayon::prelude::*;
+
+/// Pre-computed neighbor information for a node to be inserted.
+///
+/// Produced during the parallel search phase, consumed during the sequential merge.
+struct PrecomputedInsert {
+    id: NodeId,
+    vector: Vec<f32>,
+    level: usize,
+    /// Neighbors per layer: (layer, Vec<(distance, internal_id)>).
+    layer_candidates: Vec<(usize, Vec<(f32, usize)>)>,
+}
+
+impl HnswIndex {
+    /// Build an index from a batch of vectors.
+    ///
+    /// This is faster than individual insertions for large batches because
+    /// validation, level assignment, and merging are handled in one pass.
+    ///
+    /// # Algorithm
+    ///
+    /// 1. **Seed phase**: The first `sqrt(N)` nodes are inserted sequentially.
+    ///    This builds the upper-layer structure that guides all subsequent searches.
+    ///
+    /// 2. **Level assignment**: Levels for remaining nodes are pre-generated
+    ///    sequentially to preserve deterministic RNG consumption order.
+    ///
+    /// 3. **Search**: Each remaining node searches the current frozen graph for
+    ///    its neighbors.
+    ///
+    /// 4. **Sequential merge**: Nodes are inserted with their pre-computed
+    ///    neighbors, adding bidirectional connections and updating the graph.
+    ///
+    /// # Quality Notes
+    ///
+    /// Because the parallel phase searches a "frozen" graph (the seed nodes),
+    /// the neighbor quality for non-seed nodes depends only on the seed graph,
+    /// not on other parallel insertions. This means recall may differ slightly
+    /// from fully sequential construction. In practice, with sqrt(N) seeds the
+    /// difference is negligible for typical workloads.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if any vector has incorrect dimensions or if the memory
+    /// budget would be exceeded.
+    pub fn build_batch(&mut self, items: Vec<(NodeId, Vec<f32>)>) -> Result<()> {
+        if items.is_empty() {
+            return Ok(());
+        }
+
+        // Validate dimensions upfront to avoid partial builds
+        for (id, vector) in &items {
+            if vector.len() != self.config.dimensions {
+                return Err(RetrievalError::DimensionMismatch {
+                    expected: self.config.dimensions,
+                    actual: vector.len(),
+                });
+            }
+            // Check for duplicates against existing index
+            if self.id_to_internal.contains_key(id) {
+                return Err(RetrievalError::hnsw(format!(
+                    "build_batch does not support updates: ID {id:?} already exists"
+                )));
+            }
+        }
+
+        // Budget check for entire batch
+        if let Some(limit) = self.config.memory_budget {
+            let current = self.memory_usage();
+            let cost_per_node = self.estimate_insert_cost();
+            let total_cost = cost_per_node * items.len();
+            if current + total_cost > limit {
+                return Err(RetrievalError::budget_exceeded(current, total_cost, limit));
+            }
+        }
+
+        let n = items.len();
+
+        // For very small batches, fall back to sequential insertion
+        if n <= 32 {
+            for (id, vector) in items {
+                self.insert(id, vector)?;
+            }
+            return Ok(());
+        }
+
+        // Phase 1: Sequential seed insertion of sqrt(N) nodes
+        // These build the upper-layer graph structure
+        let seed_count = ((n as f64).sqrt() as usize).max(1);
+        let (seed_items, remaining_items) = items.split_at(seed_count);
+
+        for (id, vector) in seed_items {
+            self.insert(*id, vector.clone())?;
+        }
+
+        // Phase 2: Pre-generate levels for remaining nodes in deterministic RNG order.
+        let mut pending: Vec<(NodeId, Vec<f32>, usize)> = Vec::with_capacity(remaining_items.len());
+        for (id, vector) in remaining_items {
+            let level = self.random_level();
+            pending.push((*id, vector.clone(), level));
+        }
+
+        // Phase 3: Neighbor search
+        // The graph is frozen during this phase -- only read-only search_layer is called.
+        let entry_point = self.entry_point;
+        let current_max_level = self.max_level;
+        let config_ef = self.config.ef_construction;
+        let index = &*self;
+
+        let precomputed: Vec<PrecomputedInsert> = pending
+            .into_par_iter()
+            .map(|(id, vector, level)| {
+                let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
+
+                // Navigate upper layers to find entry region
+                let ep = match entry_point {
+                    Some(ep) => ep,
+                    None => {
+                        // Should not happen after seed phase, but handle gracefully
+                        return PrecomputedInsert {
+                            id,
+                            vector,
+                            level,
+                            layer_candidates: Vec::new(),
+                        };
+                    }
+                };
+
+                let mut current_nearest = vec![ep];
+
+                // Search from top layer down to level + 1 (greedy, ef=1)
+                for l in (level + 1..=current_max_level).rev() {
+                    let nearest = index.search_layer(&vector, norm, &current_nearest, 1, l);
+                    if !nearest.is_empty() {
+                        current_nearest = vec![nearest[0].1];
+                    }
+                }
+
+                // Search layers from min(level, max_level) down to 0
+                let mut layer_candidates = Vec::new();
+                for l in (0..=level.min(current_max_level)).rev() {
+                    let candidates =
+                        index.search_layer(&vector, norm, &current_nearest, config_ef, l);
+
+                    if !candidates.is_empty() {
+                        current_nearest = vec![candidates[0].1];
+                    }
+
+                    layer_candidates.push((l, candidates));
+                }
+
+                PrecomputedInsert {
+                    id,
+                    vector,
+                    level,
+                    layer_candidates,
+                }
+            })
+            .collect();
+
+        // Phase 4: Sequential merge -- insert nodes with pre-computed neighbors
+        for pc in precomputed {
+            self.insert_with_precomputed(pc)?;
+        }
+
+        Ok(())
+    }
+
+    /// Insert a node using pre-computed neighbor candidates.
+    ///
+    /// This performs the same operations as `insert_inner`, but skips the
+    /// neighbor search phase since candidates were already found in parallel.
+    fn insert_with_precomputed(&mut self, pc: PrecomputedInsert) -> Result<()> {
+        let internal_id = self.nodes.len();
+
+        // Select neighbors from pre-computed candidates
+        let mut layer_neighbors: Vec<(usize, Vec<usize>)> = Vec::new();
+        for (l, candidates) in &pc.layer_candidates {
+            let m = if *l == 0 {
+                self.config.m_max0
+            } else {
+                self.config.m
+            };
+            let neighbors = self.select_neighbors(candidates, m);
+            layer_neighbors.push((*l, neighbors));
+        }
+
+        // Build the node with neighbor lists
+        let mut node = HnswNode::new(pc.vector, pc.level);
+        for (l, neighbors) in &layer_neighbors {
+            while node.neighbors.len() <= *l {
+                node.neighbors.push(Vec::new());
+            }
+            node.neighbors[*l] = neighbors.clone();
+        }
+
+        // Insert into storage (including quantized arena)
+        self.quantized.push(&node.vector, node.norm);
+        self.nodes.push(node);
+        self.id_to_internal.insert(pc.id, internal_id);
+        self.internal_to_id.push(pc.id);
+
+        // Add bidirectional connections
+        for (l, neighbors) in layer_neighbors {
+            let m = if l == 0 {
+                self.config.m_max0
+            } else {
+                self.config.m
+            };
+            for neighbor_id in neighbors {
+                self.connect(neighbor_id, internal_id, l);
+                // Shrink if over m (m is already m_max0 for layer 0, m for upper layers)
+                self.shrink_connections(neighbor_id, l, m);
+            }
+        }
+
+        // Update entry point if new node is at higher level
+        if pc.level > self.max_level {
+            self.entry_point = Some(internal_id);
+            self.max_level = pc.level;
+        }
+
+        self.additions_since_rebuild += 1;
+        Ok(())
+    }
+}
diff --git a/crates/khive-hnsw/src/index/insert.rs b/crates/khive-hnsw/src/index/insert.rs
new file mode 100644
index 00000000..0206e56b
--- /dev/null
+++ b/crates/khive-hnsw/src/index/insert.rs
@@ -0,0 +1,325 @@
+//! Insert operations for HNSW index.
+
+use crate::NodeId;
+use rand::Rng;
+
+use super::HnswIndex;
+use crate::error::{Result, RetrievalError};
+use crate::config::MAX_LEVEL;
+use crate::distance::compute_ordering_distance;
+use crate::node::HnswNode;
+use crate::metrics::{self, MetricEvent, MetricValue};
+
+impl HnswIndex {
+    /// Insert a vector into the index.
+    ///
+    /// If the ID already exists, the vector is updated.
+    /// Returns an error if dimensions don't match.
+    ///
+    /// Emits `hnsw.insert.duration_ms`, `hnsw.insert.count`, and
+    /// `hnsw.index.size` metrics when a sink is attached.
+    pub fn insert(&mut self, id: NodeId, vector: Vec<f32>) -> Result<()> {
+        let start = std::time::Instant::now();
+
+        let result = self.insert_inner(id, vector);
+
+        // Emit metrics regardless of success/failure
+        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_INSERT_DURATION_MS,
+                value: MetricValue::Histogram(elapsed),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_INSERT_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_INDEX_SIZE,
+                value: MetricValue::Gauge(self.len_live() as f64),
+                labels: vec![],
+            },
+        );
+
+        result
+    }
+
+    /// Insert a batch of vectors. Returns IDs that failed along with their errors.
+    ///
+    /// Callers can hold the write lock once around this call rather than once
+    /// per record, keeping the lock window bounded to the batch size.
+    pub fn insert_many(
+        &mut self,
+        items: impl IntoIterator<Item = (NodeId, Vec<f32>)>,
+    ) -> Vec<(NodeId, crate::error::RetrievalError)> {
+        let mut failures = Vec::new();
+        for (id, vector) in items {
+            if let Err(e) = self.insert(id, vector) {
+                failures.push((id, e));
+            }
+        }
+        failures
+    }
+
+    /// Inner insert logic (uninstrumented).
+    pub(super) fn insert_inner(&mut self, id: NodeId, vector: Vec<f32>) -> Result<()> {
+        if vector.len() != self.config.dimensions {
+            return Err(RetrievalError::DimensionMismatch {
+                expected: self.config.dimensions,
+                actual: vector.len(),
+            });
+        }
+
+        // If updating existing node, just update the vector (bypasses budget)
+        if let Some(&iid) = self.id_to_internal.get(&id) {
+            self.nodes[iid].update_vector(vector.clone());
+            // Update quantized arena to stay in sync
+            self.quantized.update(iid, &vector, self.nodes[iid].norm);
+            // Remove from tombstones if it was marked
+            if iid < self.tombstones.len() && self.tombstones[iid] {
+                self.tombstones[iid] = false;
+                self.tombstone_count -= 1;
+            }
+            return Ok(());
+        }
+
+        // Budget check before allocating a new node
+        if let Some(limit) = self.config.memory_budget {
+            let current = self.memory_usage();
+            let cost = self.estimate_insert_cost();
+            if current + cost > limit {
+                return Err(RetrievalError::budget_exceeded(current, cost, limit));
+            }
+        }
+
+        let level = self.random_level();
+        let node = HnswNode::new(vector.clone(), level);
+        let query_norm = node.norm;
+
+        // Assign internal ID = next index in vec
+        let internal_id = self.nodes.len();
+
+        // First node
+        if self.nodes.is_empty() {
+            self.quantized.push(&vector, node.norm);
+            self.nodes.push(node);
+            self.id_to_internal.insert(id, internal_id);
+            self.internal_to_id.push(id);
+            self.entry_point = Some(internal_id);
+            self.max_level = level;
+            self.additions_since_rebuild += 1;
+            return Ok(());
+        }
+
+        let entry_point = self.entry_point.ok_or_else(|| {
+            RetrievalError::hnsw("HNSW invariant violated: no entry point despite non-empty index")
+        })?;
+        let current_max_level = self.max_level;
+
+        // Search from top layer down to level + 1
+        let mut current_nearest = vec![entry_point];
+
+        for l in (level + 1..=current_max_level).rev() {
+            let nearest = self.search_layer(&vector, query_norm, &current_nearest, 1, l);
+            if !nearest.is_empty() {
+                current_nearest = vec![nearest[0].1];
+            }
+        }
+
+        // Collect neighbors for all layers (using internal usize IDs)
+        let mut layer_neighbors: Vec<(usize, Vec<usize>)> = Vec::new();
+
+        for l in (0..=level.min(current_max_level)).rev() {
+            let candidates = self.search_layer(
+                &vector,
+                query_norm,
+                &current_nearest,
+                self.config.ef_construction,
+                l,
+            );
+
+            let m = if l == 0 {
+                self.config.m_max0
+            } else {
+                self.config.m
+            };
+            let neighbors = self.select_neighbors(&candidates, m);
+
+            if !candidates.is_empty() {
+                current_nearest = vec![candidates[0].1];
+            }
+
+            layer_neighbors.push((l, neighbors));
+        }
+
+        // Insert node first (including quantized arena)
+        let mut new_node = node;
+        for (l, neighbors) in &layer_neighbors {
+            while new_node.neighbors.len() <= *l {
+                new_node.neighbors.push(Vec::new());
+            }
+            new_node.neighbors[*l] = neighbors.clone();
+        }
+        self.quantized.push(&new_node.vector, new_node.norm);
+        self.nodes.push(new_node);
+        self.id_to_internal.insert(id, internal_id);
+        self.internal_to_id.push(id);
+
+        // Add bidirectional connections
+        for (l, neighbors) in layer_neighbors {
+            let m = if l == 0 {
+                self.config.m_max0
+            } else {
+                self.config.m
+            };
+            for neighbor_id in neighbors {
+                self.connect(neighbor_id, internal_id, l);
+                // Shrink if over m (m is already m_max0 for layer 0, m for upper layers)
+                self.shrink_connections(neighbor_id, l, m);
+            }
+        }
+
+        // Update entry point if new node is at higher level
+        if level > current_max_level {
+            self.entry_point = Some(internal_id);
+            self.max_level = level;
+        }
+
+        self.additions_since_rebuild += 1;
+        Ok(())
+    }
+
+    /// Generate random level for new node (exponential distribution).
+    ///
+    /// Uses seeded RNG if `config.seed` was set for reproducible builds.
+    ///
+    /// **PROOF CORRESPONDENCE**: Lion.Retrieval.HNSW.level_prob_sums_to_one
+    /// Level probabilities form a valid distribution: sum_{l=0}^{inf} P(level=l) = 1
+    ///
+    /// **PROOF CORRESPONDENCE**: Lion.Retrieval.HNSW.level_survival_decreasing
+    /// Survival probability decreases exponentially: P(level >= l) = (1/M)^l
+    pub(super) fn random_level(&mut self) -> usize {
+        let r: f64 = self.rng.gen::<f64>().max(f64::MIN_POSITIVE);
+        let level = (-r.ln() * self.config.ml).floor() as usize;
+        level.min(MAX_LEVEL)
+    }
+
+    /// Add bidirectional connection using internal IDs.
+    pub(crate) fn connect(&mut self, from: usize, to: usize, layer: usize) {
+        let node = &mut self.nodes[from];
+        while node.neighbors.len() <= layer {
+            node.neighbors.push(Vec::new());
+        }
+        if !node.neighbors[layer].contains(&to) {
+            node.neighbors[layer].push(to);
+        }
+    }
+
+    /// Shrink connections if over limit.
+    pub(crate) fn shrink_connections(&mut self, id: usize, layer: usize, m: usize) {
+        use crate::distance::OrderedF32;
+
+        // Phase 1: Compute new neighbors (read only)
+        let new_neighbors = {
+            let node = &self.nodes[id];
+            if layer >= node.neighbors.len() || node.neighbors[layer].len() <= m {
+                return;
+            }
+
+            let node_vec = &node.vector;
+            let node_norm = node.norm;
+            let neighbor_ids = &node.neighbors[layer];
+
+            let mut scored: Vec<(f32, usize)> = neighbor_ids
+                .iter()
+                .map(|&n_id| {
+                    let n = &self.nodes[n_id];
+                    (
+                        compute_ordering_distance(
+                            node_vec,
+                            node_norm,
+                            &n.vector,
+                            n.norm,
+                            self.config.metric,
+                        ),
+                        n_id,
+                    )
+                })
+                .collect();
+
+            // Sort by distance, then by external ID for deterministic neighbor selection
+            scored.sort_by(|a, b| match OrderedF32(a.0).cmp(&OrderedF32(b.0)) {
+                std::cmp::Ordering::Equal => self.external_id(a.1).cmp(&self.external_id(b.1)),
+                other => other,
+            });
+            scored
+                .into_iter()
+                .take(m)
+                .map(|(_, id)| id)
+                .collect::<Vec<_>>()
+        };
+
+        // Phase 2: Mutate
+        let node = &mut self.nodes[id];
+        if layer < node.neighbors.len() {
+            node.neighbors[layer] = new_neighbors;
+        }
+    }
+
+    /// Sort a node's neighbor list by distance to the node.
+    ///
+    /// Available for batch operations like post-rebuild optimization.
+    #[allow(dead_code)]
+    pub(super) fn sort_neighbors(&mut self, id: usize, layer: usize) {
+        use crate::distance::OrderedF32;
+
+        // Phase 1: Compute sorted order (read only)
+        let sorted = {
+            let node = &self.nodes[id];
+            if layer >= node.neighbors.len() || node.neighbors[layer].is_empty() {
+                return;
+            }
+
+            let node_vec = &node.vector;
+            let node_norm = node.norm;
+
+            let mut scored: Vec<(f32, usize)> = node.neighbors[layer]
+                .iter()
+                .map(|&n_id| {
+                    let dist = {
+                        let n = &self.nodes[n_id];
+                        compute_ordering_distance(
+                            node_vec,
+                            node_norm,
+                            &n.vector,
+                            n.norm,
+                            self.config.metric,
+                        )
+                    };
+                    (dist, n_id)
+                })
+                .collect();
+
+            scored.sort_by(|a, b| match OrderedF32(a.0).cmp(&OrderedF32(b.0)) {
+                std::cmp::Ordering::Equal => self.external_id(a.1).cmp(&self.external_id(b.1)),
+                other => other,
+            });
+            scored.into_iter().map(|(_, id)| id).collect::<Vec<_>>()
+        };
+
+        // Phase 2: Mutate
+        let node = &mut self.nodes[id];
+        if layer < node.neighbors.len() {
+            node.neighbors[layer] = sorted;
+        }
+    }
+}
diff --git a/crates/khive-hnsw/src/index/memory.rs b/crates/khive-hnsw/src/index/memory.rs
new file mode 100644
index 00000000..3a6ba8f7
--- /dev/null
+++ b/crates/khive-hnsw/src/index/memory.rs
@@ -0,0 +1,88 @@
+//! Memory budget operations for HNSW index.
+
+use super::HnswIndex;
+
+impl HnswIndex {
+    /// Get the configured memory budget, if any.
+    pub fn memory_budget(&self) -> Option<usize> {
+        self.config.memory_budget
+    }
+
+    /// Set or clear the memory budget at runtime.
+    ///
+    /// Pass `Some(bytes)` to enforce a limit, or `None` to remove it.
+    pub fn set_memory_budget(&mut self, budget: Option<usize>) {
+        self.config.memory_budget = budget;
+    }
+
+    /// Estimate the current memory usage of the index in bytes.
+    ///
+    /// Formula: `nodes * (dims * 4 + node_overhead) + neighbor_entries * 8
+    ///           + per_layer_overhead + vec_overhead + id_mapping_overhead + tombstone_overhead`
+    ///
+    /// This is a conservative estimate. Actual usage may differ due to
+    /// allocator overhead and alignment.
+    pub fn memory_usage(&self) -> usize {
+        let num_nodes = self.nodes.len();
+        let dims = self.config.dimensions;
+
+        // Per-node: vector (dims * 4 bytes for f32) + fixed fields
+        // HnswNode has: vector(Vec overhead 24 + data) + neighbors(Vec overhead 24)
+        //             + max_layer(8) + norm(4)
+        let node_overhead: usize = 24 + 24 + 8 + 4; // 60 bytes fixed per node
+        let per_node = dims * 4 + node_overhead;
+        let nodes_total = num_nodes * per_node;
+
+        // Neighbor entries: each Vec<usize> per layer, each entry is 8 bytes (usize)
+        // Plus Vec overhead (24 bytes) per layer per node
+        let mut neighbor_entries: usize = 0;
+        let mut layer_vecs: usize = 0;
+        for node in &self.nodes {
+            layer_vecs += node.neighbors.len() * 24; // Vec overhead per layer
+            for layer in &node.neighbors {
+                neighbor_entries += layer.len();
+            }
+        }
+        let neighbors_total = neighbor_entries * 8 + layer_vecs;
+
+        // ID mapping overhead:
+        // HashMap<EmbeddingId, usize>: ~(num_nodes * 40) for bucket/metadata
+        // Vec<EmbeddingId>: num_nodes * 16
+        let mapping_overhead = num_nodes * 40 + num_nodes * 16;
+
+        // Tombstone bitset overhead: 1 byte per node (Vec<bool>)
+        let tombstone_overhead = self.tombstones.len();
+
+        // Quantized arena overhead:
+        // - data: num_nodes * dims * 1 byte (i8)
+        // - meta: num_nodes * 8 bytes (QuantMeta: scale f32 + norm f32)
+        let quantized_overhead = num_nodes * dims + num_nodes * 8;
+
+        nodes_total + neighbors_total + mapping_overhead + tombstone_overhead + quantized_overhead
+    }
+
+    /// Estimate the memory cost of inserting a new vector.
+    ///
+    /// This is the incremental cost of one new node, including its vector
+    /// storage, node metadata, and expected neighbor connections.
+    pub fn estimate_insert_cost(&self) -> usize {
+        let dims = self.config.dimensions;
+
+        // Vector data + node fixed overhead
+        let node_overhead: usize = 24 + 24 + 8 + 4;
+        let per_node = dims * 4 + node_overhead;
+
+        // Expected neighbors: at least 1 layer with m_max0 neighbors,
+        // plus Vec overhead. Use m_max0 as conservative estimate for layer 0.
+        // Neighbors are usize (8 bytes each)
+        let expected_neighbors = self.config.m_max0 * 8 + 24;
+
+        // ID mapping entry overhead (HashMap entry + Vec entry)
+        let mapping_entry = 40 + 16;
+
+        // Quantized arena: dims * 1 byte (i8) + 8 bytes (QuantMeta)
+        let quantized_cost = dims + 8;
+
+        per_node + expected_neighbors + mapping_entry + quantized_cost
+    }
+}
diff --git a/crates/khive-hnsw/src/index/mod.rs b/crates/khive-hnsw/src/index/mod.rs
new file mode 100644
index 00000000..ceb1b6ac
--- /dev/null
+++ b/crates/khive-hnsw/src/index/mod.rs
@@ -0,0 +1,730 @@
+//! HNSW index implementation.
+//!
+//! The core index structure with insert, delete, search, and rebuild operations.
+//!
+//! # Internal ID Scheme
+//!
+//! Internally, nodes are identified by dense `usize` indices into a `Vec<HnswNode>`.
+//! The public API uses `NodeId` (128-bit) -- conversion happens at the boundary.
+//! This gives O(1) array indexing on the search hot path instead of HashMap probing.
+
+mod build_batch;
+mod insert;
+mod memory;
+mod neighbors;
+mod rebuild;
+mod search;
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use crate::NodeId;
+use rand::rngs::StdRng;
+use rand::SeedableRng;
+
+use super::config::HnswConfig;
+use super::node::HnswNode;
+use super::stats::TombstoneStats;
+use crate::metrics::MetricsSink;
+
+// ---------------------------------------------------------------------------
+// INT8 quantized arena for fast approximate distance computation
+// ---------------------------------------------------------------------------
+
+/// Per-vector quantization metadata (symmetric quantization).
+///
+/// Stored alongside the flat `Vec<i8>` arena. Each vector's quantized data
+/// is at `[internal_id * dims .. (internal_id + 1) * dims]` in the arena.
+#[derive(Debug, Clone, Copy)]
+pub(crate) struct QuantMeta {
+    /// Scale factor: `float_value = int8_value / scale`.
+    /// Symmetric quantization maps `[-max_abs, max_abs]` to `[-127, 127]`.
+    pub scale: f32,
+    /// Pre-computed L2 norm of the original f32 vector.
+    pub norm: f32,
+}
+
+/// INT8 quantized vector arena for HNSW search acceleration.
+///
+/// Stores quantized vectors in a flat `Vec<i8>` arena with the same ordering
+/// as the main `nodes` vector. Used for fast approximate distance computation
+/// during the candidate filtering phase of search.
+///
+/// # Two-Phase Search Strategy
+///
+/// 1. **Phase 1 (INT8)**: Compute approximate distance using quantized vectors.
+///    This is ~3x faster than f32 distance computation (11ns vs 34ns for 384d).
+/// 2. **Phase 2 (f32)**: For candidates that pass the approximate threshold,
+///    compute precise f32 distance for final ranking.
+///
+/// This skip pattern avoids f32 distance computation for obviously distant
+/// neighbors, providing significant speedup at scale (50K+ vectors).
+#[derive(Debug, Clone)]
+pub(crate) struct QuantizedArena {
+    /// Flat INT8 vector data. Vector `i` starts at `i * dims`.
+    pub data: Vec<i8>,
+    /// Per-vector quantization metadata, indexed by internal ID.
+    pub meta: Vec<QuantMeta>,
+    /// Vector dimensionality (cached for bounds checking).
+    pub dims: usize,
+}
+
+impl QuantizedArena {
+    /// Create a new empty quantized arena for the given dimensionality.
+    fn new(dims: usize) -> Self {
+        Self {
+            data: Vec::new(),
+            meta: Vec::new(),
+            dims,
+        }
+    }
+
+    /// Quantize a float vector and append it to the arena.
+    ///
+    /// Uses symmetric quantization: `[-max_abs, max_abs]` -> `[-127, 127]`.
+    /// Returns the index of the newly added vector (should match the internal ID).
+    fn push(&mut self, vector: &[f32], norm: f32) -> usize {
+        debug_assert_eq!(vector.len(), self.dims);
+
+        // Single-pass min/max over finite values
+        let mut max_abs: f32 = 0.0;
+        for &v in vector {
+            if v.is_finite() {
+                let abs = v.abs();
+                if abs > max_abs {
+                    max_abs = abs;
+                }
+            }
+        }
+
+        // Symmetric quantization: scale maps max_abs to 127
+        let scale = if max_abs > 1e-10 {
+            127.0 / max_abs
+        } else {
+            1.0 // Near-zero vector
+        };
+
+        // Quantize and append to flat arena
+        self.data.reserve(self.dims);
+        for &v in vector {
+            let q = if v.is_finite() {
+                (v * scale).round().clamp(-127.0, 127.0) as i8
+            } else {
+                0i8
+            };
+            self.data.push(q);
+        }
+
+        let idx = self.meta.len();
+        self.meta.push(QuantMeta { scale, norm });
+        idx
+    }
+
+    /// Update the quantized vector at the given index.
+    pub(crate) fn update(&mut self, idx: usize, vector: &[f32], norm: f32) {
+        debug_assert_eq!(vector.len(), self.dims);
+        debug_assert!(idx < self.meta.len());
+
+        let mut max_abs: f32 = 0.0;
+        for &v in vector {
+            if v.is_finite() {
+                let abs = v.abs();
+                if abs > max_abs {
+                    max_abs = abs;
+                }
+            }
+        }
+
+        let scale = if max_abs > 1e-10 {
+            127.0 / max_abs
+        } else {
+            1.0
+        };
+
+        let offset = idx * self.dims;
+        for (i, &v) in vector.iter().enumerate() {
+            self.data[offset + i] = if v.is_finite() {
+                (v * scale).round().clamp(-127.0, 127.0) as i8
+            } else {
+                0i8
+            };
+        }
+
+        self.meta[idx] = QuantMeta { scale, norm };
+    }
+
+    /// Get the quantized data slice for a given internal ID.
+    #[inline]
+    fn get_data(&self, idx: usize) -> &[i8] {
+        let offset = idx * self.dims;
+        &self.data[offset..offset + self.dims]
+    }
+
+    /// Compute approximate INT8 dot product between two quantized vectors,
+    /// returning the result in the original f32 scale.
+    ///
+    /// Uses SIMD-accelerated INT8 dot product from khive-embed.
+    #[inline]
+    #[allow(dead_code)] // Available for Dot metric path (future)
+    pub fn dot_product_approx(&self, a_idx: usize, b_data: &[i8], b_scale: f32) -> f32 {
+        let a_data = self.get_data(a_idx);
+        let a_meta = &self.meta[a_idx];
+        let denom = a_meta.scale * b_scale;
+        if denom == 0.0 || !denom.is_finite() {
+            return 0.0;
+        }
+        int8_dot_product_raw(a_data, b_data) / denom
+    }
+
+    /// Compute approximate INT8 cosine distance between a stored vector and
+    /// a query's quantized form.
+    ///
+    /// Returns distance (1 - cosine_similarity), comparable to the f32 path.
+    #[inline]
+    pub fn cosine_distance_approx(
+        &self,
+        idx: usize,
+        query_i8: &[i8],
+        query_scale: f32,
+        query_norm: f32,
+    ) -> f32 {
+        let meta = &self.meta[idx];
+        let denom_scale = meta.scale * query_scale;
+        if denom_scale == 0.0 || !denom_scale.is_finite() {
+            return 1.0;
+        }
+        let norm_denom = meta.norm * query_norm;
+        if norm_denom <= 0.0 || !norm_denom.is_finite() {
+            return 1.0;
+        }
+        let dot = int8_dot_product_raw(self.get_data(idx), query_i8) / denom_scale;
+        1.0 - (dot / norm_denom)
+    }
+
+    /// Clear the arena (used by rebuild/clear).
+    fn clear(&mut self) {
+        self.data.clear();
+        self.meta.clear();
+    }
+}
+
+/// Raw INT8 dot product using SIMD from khive-embed.
+///
+/// Zero-allocation path: takes raw `&[i8]` slices and returns the integer
+/// dot product as f32 (no scale factor division). The caller handles scaling.
+///
+/// Uses the same SIMD backend as `dot_product_i8` (NEON/AVX2/AVX-512 VNNI)
+/// but without constructing `QuantizedVector` wrappers.
+#[inline]
+fn int8_dot_product_raw(a: &[i8], b: &[i8]) -> f32 {
+    lattice_embed::simd::dot_product_i8_raw(a, b)
+}
+
+/// HNSW vector index with tombstone-based lazy deletion.
+///
+/// This is an IN-MEMORY index. Persistence via snapshots is handled separately.
+/// All output scores are `DeterministicScore` for cross-platform consistency.
+///
+/// # Internal ID Scheme
+///
+/// Nodes are stored in a dense `Vec<HnswNode>` indexed by `usize`. The mappings
+/// `id_to_internal` and `internal_to_id` convert between external `NodeId`
+/// and internal `usize` at the API boundary. All neighbor lists, entry point,
+/// and tombstone tracking use internal `usize` IDs for O(1) lookups.
+///
+/// # INT8 Quantized Search (opt-in)
+///
+/// When `use_quantized` is true, search uses a two-phase strategy:
+/// 1. INT8 approximate distance for candidate filtering (~3x faster)
+/// 2. f32 precise distance for final scoring (exact results)
+///
+/// Enable via `HnswIndex::set_quantized(true)` or `HnswIndex::with_quantized()`.
+pub struct HnswIndex {
+    /// Configuration.
+    pub(crate) config: HnswConfig,
+
+    /// Dense node storage indexed by internal usize ID.
+    pub(crate) nodes: Vec<HnswNode>,
+
+    /// External NodeId -> internal usize mapping.
+    /// Only used at API boundary (insert, search result conversion, delete).
+    pub(crate) id_to_internal: HashMap<NodeId, usize>,
+
+    /// Internal usize -> external NodeId mapping.
+    /// Indexed by internal ID for O(1) reverse lookup.
+    pub(crate) internal_to_id: Vec<NodeId>,
+
+    /// Entry point node (highest layer node). Internal usize ID.
+    pub(crate) entry_point: Option<usize>,
+
+    /// Current maximum layer in the graph.
+    pub(crate) max_level: usize,
+
+    /// Tombstoned (soft-deleted) internal node IDs.
+    /// Dense bitset indexed by internal ID for O(1) lookup on the search hot path.
+    pub(crate) tombstones: Vec<bool>,
+
+    /// Count of tombstoned nodes (maintained separately to avoid scanning the Vec).
+    pub(crate) tombstone_count: usize,
+
+    /// Insertions since last rebuild (for tracking recall degradation).
+    pub(crate) additions_since_rebuild: usize,
+
+    /// Random number generator for level generation.
+    /// If config.seed is Some, this is a seeded RNG for reproducibility.
+    pub(crate) rng: StdRng,
+
+    /// Optional metrics sink for observability.
+    pub(crate) metrics: Option<Arc<dyn MetricsSink>>,
+
+    /// INT8 quantized vector arena for fast approximate distance.
+    /// Maintained in parallel with `nodes` -- same internal ID ordering.
+    pub(crate) quantized: QuantizedArena,
+
+    /// Whether to use INT8 quantized distance for candidate filtering.
+    /// Default: false. Enable for large indexes (50K+ vectors) where
+    /// distance computation dominates search time.
+    pub(crate) use_quantized: bool,
+}
+
+impl Clone for HnswIndex {
+    fn clone(&self) -> Self {
+        Self {
+            config: self.config.clone(),
+            nodes: self.nodes.clone(),
+            id_to_internal: self.id_to_internal.clone(),
+            internal_to_id: self.internal_to_id.clone(),
+            entry_point: self.entry_point,
+            max_level: self.max_level,
+            tombstones: self.tombstones.clone(),
+            tombstone_count: self.tombstone_count,
+            additions_since_rebuild: self.additions_since_rebuild,
+            rng: self.rng.clone(),
+            metrics: self.metrics.clone(),
+            quantized: self.quantized.clone(),
+            use_quantized: self.use_quantized,
+        }
+    }
+}
+
+impl std::fmt::Debug for HnswIndex {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("HnswIndex")
+            .field("config", &self.config)
+            .field("num_nodes", &self.nodes.len())
+            .field("max_level", &self.max_level)
+            .field("tombstones", &self.tombstone_count)
+            .field("additions_since_rebuild", &self.additions_since_rebuild)
+            .field("use_quantized", &self.use_quantized)
+            .finish()
+    }
+}
+
+impl HnswIndex {
+    /// Create a new HNSW index with default configuration and specified dimensions.
+    pub fn new(dimensions: usize) -> Self {
+        Self::with_config(HnswConfig::with_dimensions(dimensions))
+    }
+
+    /// Create a new HNSW index with custom configuration.
+    pub fn with_config(config: HnswConfig) -> Self {
+        config.validate().expect("HNSW configuration must be valid");
+
+        // Initialize RNG - seeded if config.seed is Some, otherwise from entropy
+        let rng = match config.seed {
+            Some(seed) => StdRng::seed_from_u64(seed),
+            None => StdRng::from_entropy(),
+        };
+
+        let dims = config.dimensions;
+        Self {
+            config,
+            nodes: Vec::new(),
+            id_to_internal: HashMap::new(),
+            internal_to_id: Vec::new(),
+            entry_point: None,
+            max_level: 0,
+            tombstones: Vec::new(),
+            tombstone_count: 0,
+            additions_since_rebuild: 0,
+            rng,
+            metrics: None,
+            quantized: QuantizedArena::new(dims),
+            use_quantized: false,
+        }
+    }
+
+    /// Get the current configuration.
+    pub fn config(&self) -> &HnswConfig {
+        &self.config
+    }
+
+    /// Attach a metrics sink (builder pattern).
+    ///
+    /// The sink receives [`MetricEvent`]s from `search`, `insert`, and `rebuild`
+    /// operations. Pass an `Arc<dyn MetricsSink>` to share a single sink across
+    /// multiple indices.
+    #[must_use]
+    pub fn with_metrics(mut self, sink: Arc<dyn MetricsSink>) -> Self {
+        self.metrics = Some(sink);
+        self
+    }
+
+    /// Set or replace the metrics sink at runtime.
+    ///
+    /// Pass `Some(sink)` to enable metrics, or `None` to disable.
+    pub fn set_metrics(&mut self, sink: Option<Arc<dyn MetricsSink>>) {
+        self.metrics = sink;
+    }
+
+    /// Enable INT8 quantized distance for search candidate filtering.
+    ///
+    /// When enabled, search uses a two-phase strategy:
+    /// 1. INT8 approximate distance for candidate screening (~3x faster)
+    /// 2. f32 precise distance for final ranking (exact results)
+    ///
+    /// Recommended for indexes with 50K+ vectors where distance computation
+    /// dominates search time. For smaller indexes, the overhead of maintaining
+    /// the quantized arena may not be worthwhile.
+    ///
+    /// This is a builder-pattern method. For runtime toggling, use `set_quantized`.
+    #[must_use]
+    pub fn with_quantized(mut self) -> Self {
+        self.use_quantized = true;
+        self
+    }
+
+    /// Enable or disable INT8 quantized search at runtime.
+    ///
+    /// The quantized arena is always maintained (populated on insert),
+    /// so toggling this flag has no rebuilding cost.
+    pub fn set_quantized(&mut self, enabled: bool) {
+        self.use_quantized = enabled;
+    }
+
+    /// Check if INT8 quantized search is enabled.
+    pub fn is_quantized(&self) -> bool {
+        self.use_quantized
+    }
+
+    /// Get the number of vectors in the index (including tombstones).
+    pub fn len(&self) -> usize {
+        self.nodes.len()
+    }
+
+    /// Get the number of live (non-tombstoned) vectors.
+    pub fn len_live(&self) -> usize {
+        self.nodes.len().saturating_sub(self.tombstone_count)
+    }
+
+    /// Check if the index is empty.
+    pub fn is_empty(&self) -> bool {
+        self.nodes.is_empty()
+    }
+
+    /// Get the vector for an embedding ID, if present.
+    ///
+    /// Used by the build swap logic to recover concurrent writes:
+    /// entries inserted into the live HNSW during a background build
+    /// need their vectors re-inserted into the new index at swap time.
+    pub fn get_vector(&self, id: &NodeId) -> Option<Vec<f32>> {
+        self.id_to_internal
+            .get(id)
+            .map(|&iid| self.nodes[iid].vector.clone())
+    }
+
+    /// Get tombstone statistics.
+    pub fn tombstone_stats(&self) -> TombstoneStats {
+        let total = self.nodes.len();
+        let tombstones = self.tombstone_count;
+        let live = total.saturating_sub(tombstones);
+        let ratio = if total > 0 {
+            tombstones as f64 / total as f64
+        } else {
+            0.0
+        };
+
+        TombstoneStats {
+            total_nodes: total,
+            tombstone_count: tombstones,
+            live_nodes: live,
+            ratio,
+        }
+    }
+
+    /// Check if a rebuild is recommended based on tombstone ratio.
+    pub fn needs_rebuild(&self) -> bool {
+        self.tombstone_stats()
+            .needs_rebuild_at(self.config.rebuild_threshold)
+    }
+
+    /// Look up the internal ID for an NodeId, if it exists.
+    #[inline]
+    #[allow(dead_code)] // TODO: wire into delta-merge path for cross-index lookups
+    pub(crate) fn internal_id(&self, id: &NodeId) -> Option<usize> {
+        self.id_to_internal.get(id).copied()
+    }
+
+    /// Check if an internal ID is tombstoned. O(1) array lookup.
+    #[inline]
+    pub(crate) fn is_tombstoned(&self, iid: usize) -> bool {
+        iid < self.tombstones.len() && self.tombstones[iid]
+    }
+
+    /// Look up the external NodeId for an internal ID.
+    #[inline]
+    pub(crate) fn external_id(&self, iid: usize) -> NodeId {
+        self.internal_to_id[iid]
+    }
+
+    /// Create a serializable snapshot of the index state.
+    ///
+    /// The snapshot captures:
+    /// - All indexed vector IDs and their raw f32 embeddings
+    /// - Graph topology (neighbor connections per layer)
+    /// - Tombstone information
+    /// - Configuration for compatibility checking
+    ///
+    /// # Self-Contained Warm Start
+    ///
+    /// The returned snapshot includes the full vector data in the `vectors`
+    /// field, making it self-contained for warm-start restores.  Use
+    /// [`restore_from_snapshot_embedded`] to restore directly from the
+    /// snapshot without supplying a separate vector map.
+    ///
+    /// Size estimate: `dimensions × 4 bytes × node_count`.
+    /// For 384-dim embeddings with 10 K nodes: ~15 MB.
+    pub fn snapshot(&self) -> super::checkpoint::HnswSnapshot {
+        use super::checkpoint::{HnswCheckpointConfig, HnswSnapshot};
+
+        let mut indexed_ids: Vec<_> = self.internal_to_id.clone();
+        indexed_ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
+
+        let mut tombstoned_ids: Vec<_> = self
+            .tombstones
+            .iter()
+            .enumerate()
+            .filter(|(_, &is_tomb)| is_tomb)
+            .map(|(iid, _)| self.external_id(iid))
+            .collect();
+        tombstoned_ids.sort_by(|a, b| a.as_bytes().cmp(b.as_bytes()));
+
+        // Build layer representation -- convert internal IDs to NodeId
+        let mut layers = Vec::new();
+        for level in 0..=self.max_level {
+            let mut layer_nodes = Vec::new();
+            for (iid, node) in self.nodes.iter().enumerate() {
+                if level < node.neighbors.len() {
+                    let neighbors: Vec<NodeId> = node.neighbors[level]
+                        .iter()
+                        .map(|&nid| self.external_id(nid))
+                        .collect();
+                    layer_nodes.push((self.external_id(iid), neighbors));
+                }
+            }
+            // Sort for deterministic ordering
+            layer_nodes.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes()));
+            layers.push(layer_nodes);
+        }
+
+        // Convert entry_point from internal to external
+        let entry_point_ext = self.entry_point.map(|iid| self.external_id(iid));
+
+        // Embed full f32 vector data for self-contained warm-start snapshots.
+        // Sorted by NodeId bytes to match indexed_ids ordering.
+        let mut vectors: Vec<(NodeId, Vec<f32>)> = self
+            .internal_to_id
+            .iter()
+            .zip(self.nodes.iter())
+            .map(|(&id, node)| (id, node.vector.clone()))
+            .collect();
+        vectors.sort_by(|(a, _), (b, _)| a.as_bytes().cmp(b.as_bytes()));
+
+        HnswSnapshot {
+            vector_count: 0, // Legacy field, not used
+            total_nodes: self.nodes.len(),
+            live_nodes: self.len_live(),
+            tombstone_count: self.tombstone_count,
+            max_layer: self.max_level,
+            entry_point: entry_point_ext,
+            config: HnswCheckpointConfig::from_hnsw_config(&self.config),
+            indexed_ids,
+            tombstoned_ids,
+            layers,
+            vectors,
+        }
+    }
+
+    /// Restore index topology from a snapshot using embedded vector data.
+    ///
+    /// Convenience wrapper for snapshots produced by [`snapshot`] (which embed
+    /// the full f32 vectors).  No external vector map is required.
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - The snapshot contains no embedded vectors (`vectors` field is empty)
+    /// - Snapshot config is incompatible with current config
+    /// - Snapshot verification fails
+    pub fn restore_from_snapshot_embedded(
+        &mut self,
+        snapshot: &super::checkpoint::HnswSnapshot,
+    ) -> Result<(), crate::error::RetrievalError> {
+        use crate::error::RetrievalError;
+
+        if snapshot.vectors.is_empty() && !snapshot.indexed_ids.is_empty() {
+            return Err(RetrievalError::hnsw(
+                "Snapshot contains no embedded vectors; use restore_from_snapshot with an external vector map",
+            ));
+        }
+
+        let vectors: std::collections::HashMap<NodeId, Vec<f32>> =
+            snapshot.vectors.iter().cloned().collect();
+        self.restore_from_snapshot(snapshot, &vectors)
+    }
+
+    /// Restore index topology from a snapshot.
+    ///
+    /// This rebuilds the neighbor connections from the snapshot. The caller
+    /// must supply vector data via the `vectors` map.  If the snapshot was
+    /// produced by [`snapshot`] it already embeds vector data in
+    /// `snapshot.vectors`; you can use [`restore_from_snapshot_embedded`]
+    /// instead in that case.
+    ///
+    /// When both the snapshot's embedded `vectors` field and the caller-supplied
+    /// `vectors` map contain an entry for the same `NodeId`, the caller-supplied
+    /// entry takes precedence (useful for applying incremental updates on top of
+    /// a base snapshot).
+    ///
+    /// # Arguments
+    ///
+    /// * `snapshot` - The snapshot to restore from
+    /// * `vectors` - Map of ID -> vector data for all indexed vectors.
+    ///   May be empty if `snapshot.vectors` is non-empty (self-contained snapshot).
+    ///
+    /// # Errors
+    ///
+    /// Returns an error if:
+    /// - Snapshot config is incompatible with current config
+    /// - Snapshot verification fails
+    /// - Referenced vectors are missing from both the snapshot and the external map
+    pub fn restore_from_snapshot(
+        &mut self,
+        snapshot: &super::checkpoint::HnswSnapshot,
+        vectors: &std::collections::HashMap<NodeId, Vec<f32>>,
+    ) -> Result<(), crate::error::RetrievalError> {
+        use super::checkpoint::HnswCheckpointConfig;
+        use crate::error::RetrievalError;
+
+        // Verify snapshot integrity
+        snapshot
+            .verify()
+            .map_err(|e| RetrievalError::hnsw(format!("Invalid snapshot: {e}")))?;
+
+        // Check config compatibility
+        let current_config = HnswCheckpointConfig::from_hnsw_config(&self.config);
+        if !snapshot.is_compatible(&current_config) {
+            return Err(RetrievalError::hnsw(format!(
+                "Snapshot config incompatible: expected {:?}, got {:?}",
+                current_config, snapshot.config
+            )));
+        }
+
+        // Build a merged vector lookup: snapshot-embedded vectors are the base,
+        // caller-supplied entries take precedence (to allow incremental updates).
+        //
+        // Priority: caller-supplied > snapshot-embedded.
+        // Both sources are collected into a single owned map to avoid lifetime
+        // complications with mixed borrows.
+        let mut merged_vectors: HashMap<NodeId, Vec<f32>> = snapshot
+            .vectors
+            .iter()
+            .map(|(id, v)| (*id, v.clone()))
+            .collect();
+        // Caller-supplied entries override embedded ones
+        for (id, v) in vectors {
+            merged_vectors.insert(*id, v.clone());
+        }
+
+        // Clear current state
+        self.nodes.clear();
+        self.id_to_internal.clear();
+        self.internal_to_id.clear();
+        self.tombstones.clear();
+        self.tombstone_count = 0;
+        self.quantized.clear();
+
+        // First pass: assign internal IDs and build mapping
+        // We need a consistent ordering, so use indexed_ids order
+        let mut ext_to_internal: HashMap<NodeId, usize> = HashMap::new();
+        for (idx, id) in snapshot.indexed_ids.iter().enumerate() {
+            ext_to_internal.insert(*id, idx);
+        }
+
+        // Build nodes with vectors
+        for id in &snapshot.indexed_ids {
+            let vector = merged_vectors
+                .get(id)
+                .ok_or_else(|| RetrievalError::hnsw(format!("Missing vector for ID {id:?}")))?
+                .clone();
+
+            let level = self.calculate_level_for_restore(&snapshot.layers, id);
+            let node = super::node::HnswNode::new(vector, level);
+            let iid = self.nodes.len();
+            self.quantized.push(&node.vector, node.norm);
+            self.nodes.push(node);
+            self.id_to_internal.insert(*id, iid);
+            self.internal_to_id.push(*id);
+        }
+
+        // Set max_level and entry_point
+        self.max_level = snapshot.max_layer;
+        self.entry_point = snapshot
+            .entry_point
+            .and_then(|eid| ext_to_internal.get(&eid).copied());
+
+        // Restore neighbor connections from layers -- convert NodeId to internal usize
+        for (level, layer) in snapshot.layers.iter().enumerate() {
+            for (node_id, neighbors) in layer {
+                if let Some(&iid) = ext_to_internal.get(node_id) {
+                    if level < self.nodes[iid].neighbors.len() {
+                        self.nodes[iid].neighbors[level] = neighbors
+                            .iter()
+                            .filter_map(|nid| ext_to_internal.get(nid).copied())
+                            .collect();
+                    }
+                }
+            }
+        }
+
+        // Restore tombstones -- convert NodeId to internal usize
+        // Resize tombstones bitset to match node count
+        self.tombstones.resize(self.nodes.len(), false);
+        for id in &snapshot.tombstoned_ids {
+            if let Some(&iid) = ext_to_internal.get(id) {
+                if !self.tombstones[iid] {
+                    self.tombstones[iid] = true;
+                    self.tombstone_count += 1;
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Calculate the level for a node during restore based on snapshot layers.
+    fn calculate_level_for_restore(
+        &self,
+        layers: &[Vec<(NodeId, Vec<NodeId>)>],
+        id: &NodeId,
+    ) -> usize {
+        // Find the highest layer where this node appears
+        let mut level = 0;
+        for (l, layer) in layers.iter().enumerate() {
+            if layer.iter().any(|(node_id, _)| node_id == id) {
+                level = l;
+            }
+        }
+        level
+    }
+}
diff --git a/crates/khive-hnsw/src/index/neighbors.rs b/crates/khive-hnsw/src/index/neighbors.rs
new file mode 100644
index 00000000..50480e6e
--- /dev/null
+++ b/crates/khive-hnsw/src/index/neighbors.rs
@@ -0,0 +1,72 @@
+//! Neighbor selection for HNSW index.
+
+use super::HnswIndex;
+use crate::distance::{compute_ordering_distance, OrderedF32};
+
+impl HnswIndex {
+    /// Select neighbors using diversified heuristic (Algorithm 4 from HNSW paper).
+    ///
+    /// Takes candidates as (distance, internal_id) pairs.
+    /// Returns internal IDs of selected neighbors.
+    ///
+    /// Candidates are sorted by distance once upfront, then iterated in order.
+    /// This avoids the O(N^2) cost of repeated min-scan + Vec::remove on the
+    /// unsorted candidate list.
+    pub(crate) fn select_neighbors(&self, candidates: &[(f32, usize)], m: usize) -> Vec<usize> {
+        if candidates.len() <= m {
+            return candidates.iter().map(|(_, id)| *id).collect();
+        }
+
+        // Sort candidates by distance (ascending), tie-break by external ID for determinism.
+        // This replaces the per-iteration O(N) min_by scan with a single O(N log N) sort.
+        let mut sorted: Vec<(f32, usize)> = candidates.to_vec();
+        sorted.sort_by(|a, b| match OrderedF32(a.0).cmp(&OrderedF32(b.0)) {
+            std::cmp::Ordering::Equal => self.external_id(a.1).cmp(&self.external_id(b.1)),
+            other => other,
+        });
+
+        let mut selected: Vec<(f32, usize)> = Vec::with_capacity(m);
+
+        // Iterate through sorted candidates in distance order, applying diversity check.
+        for &(dist_to_query, candidate_id) in &sorted {
+            if selected.len() >= m {
+                break;
+            }
+
+            let candidate_node = &self.nodes[candidate_id];
+            let candidate_vec = &candidate_node.vector;
+            let candidate_norm = candidate_node.norm;
+
+            // Check diversity: candidate is closer to query than to any selected neighbor
+            let is_diverse = selected.iter().all(|(_, sel_id)| {
+                let sel_node = &self.nodes[*sel_id];
+                let dist_to_selected = compute_ordering_distance(
+                    candidate_vec,
+                    candidate_norm,
+                    &sel_node.vector,
+                    sel_node.norm,
+                    self.config.metric,
+                );
+                dist_to_query <= dist_to_selected
+            });
+
+            if is_diverse || selected.is_empty() {
+                selected.push((dist_to_query, candidate_id));
+            }
+        }
+
+        // Fill with closest remaining if the diversity heuristic was too aggressive
+        if selected.len() < m {
+            for &(dist, id) in &sorted {
+                if selected.len() >= m {
+                    break;
+                }
+                if !selected.iter().any(|(_, sid)| *sid == id) {
+                    selected.push((dist, id));
+                }
+            }
+        }
+
+        selected.into_iter().map(|(_, id)| id).collect()
+    }
+}
diff --git a/crates/khive-hnsw/src/index/rebuild.rs b/crates/khive-hnsw/src/index/rebuild.rs
new file mode 100644
index 00000000..5aa05692
--- /dev/null
+++ b/crates/khive-hnsw/src/index/rebuild.rs
@@ -0,0 +1,288 @@
+//! Rebuild, delete, and clear operations for HNSW index.
+
+use crate::NodeId;
+
+use super::HnswIndex;
+use crate::stats::RebuildStats;
+use crate::metrics::{self, MetricEvent, MetricValue};
+
+impl HnswIndex {
+    /// Mark a vector for deletion (lazy tombstone).
+    ///
+    /// The vector is not physically removed until `rebuild()` is called.
+    /// Returns true if the vector existed and was marked.
+    ///
+    /// If the deleted node is the current entry point, a replacement is
+    /// found immediately from the node's neighbors (O(M), not O(N)).
+    pub fn delete(&mut self, id: NodeId) -> bool {
+        match self.id_to_internal.get(&id) {
+            Some(&iid) => {
+                // Grow tombstone bitset if needed
+                if iid >= self.tombstones.len() {
+                    self.tombstones.resize(iid + 1, false);
+                }
+                let was_new = !self.tombstones[iid];
+                if was_new {
+                    self.tombstones[iid] = true;
+                    self.tombstone_count += 1;
+                    self.repair_entry_point_after_delete(iid);
+                }
+                was_new
+            }
+            None => false,
+        }
+    }
+
+    /// If `tombstoned_id` is the current entry point, find a non-tombstoned
+    /// replacement from its neighbors. This is O(M) in the typical case
+    /// (M = neighbor count per layer) rather than O(N) scanning all nodes.
+    ///
+    /// Falls back to an O(N) scan only if ALL neighbors of the entry point
+    /// across ALL layers are also tombstoned -- an extremely unlikely scenario
+    /// that would require deleting an entire neighborhood simultaneously.
+    fn repair_entry_point_after_delete(&mut self, tombstoned_id: usize) {
+        let current_ep = match self.entry_point {
+            Some(ep) if ep == tombstoned_id => ep,
+            _ => return, // Not the entry point, nothing to do
+        };
+
+        // Search the tombstoned node's neighbors across all layers (highest first)
+        // for a live replacement. Prefer higher-layer neighbors since they provide
+        // better graph coverage for search entry.
+        let node = &self.nodes[current_ep];
+        for layer in (0..node.neighbors.len()).rev() {
+            for &neighbor_id in &node.neighbors[layer] {
+                if !self.is_tombstoned(neighbor_id) {
+                    self.entry_point = Some(neighbor_id);
+                    return;
+                }
+            }
+        }
+
+        // Extremely rare fallback: all neighbors are tombstoned too.
+        // Scan for ANY live node. This is O(N) but should essentially never happen
+        // in practice -- it requires tombstoning an entire neighborhood.
+        for iid in 0..self.nodes.len() {
+            if !self.is_tombstoned(iid) {
+                self.entry_point = Some(iid);
+                return;
+            }
+        }
+
+        // All nodes are tombstoned
+        self.entry_point = None;
+    }
+
+    /// Rebuild the index by removing tombstoned nodes.
+    ///
+    /// This physically removes tombstoned nodes and cleans up neighbor references.
+    /// Call this when `needs_rebuild()` returns true.
+    ///
+    /// # RETRIEVAL-11: Entry Point Behavior After Rebuild
+    ///
+    /// When rebuild removes the current entry point (because it was tombstoned),
+    /// a new entry point is selected automatically. The selection algorithm:
+    ///
+    /// 1. **Filter**: Consider only non-tombstoned nodes
+    /// 2. **Select**: Choose the node with the highest `max_layer` value
+    /// 3. **Tie-break**: If multiple nodes have the same max_layer, selection
+    ///    is deterministic but implementation-defined
+    ///
+    /// ## Why Highest Layer?
+    ///
+    /// HNSW search starts at the entry point and descends through layers. A node
+    /// at a higher layer provides better "coverage" of the graph, allowing search
+    /// to quickly narrow down to the relevant region before descending.
+    ///
+    /// ## Edge Cases
+    ///
+    /// | Scenario | Behavior |
+    /// |----------|----------|
+    /// | All nodes tombstoned | Entry point becomes `None`, searches return empty |
+    /// | Entry point not tombstoned | Entry point unchanged |
+    /// | Single node remains | That node becomes entry point |
+    ///
+    /// ## Return Value
+    ///
+    /// The `entry_point_updated` field in `RebuildStats` indicates whether the
+    /// entry point was changed during rebuild.
+    /// Emits `hnsw.rebuild.duration_ms`, `hnsw.rebuild.count`,
+    /// `hnsw.rebuild.nodes_removed`, and `hnsw.index.size` metrics when a
+    /// sink is attached.
+    pub fn rebuild(&mut self) -> RebuildStats {
+        let start = std::time::Instant::now();
+
+        let stats = self.rebuild_inner();
+
+        // Emit metrics
+        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_REBUILD_DURATION_MS,
+                value: MetricValue::Histogram(elapsed),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_REBUILD_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_REBUILD_NODES_REMOVED,
+                value: MetricValue::Gauge(stats.nodes_removed as f64),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_INDEX_SIZE,
+                value: MetricValue::Gauge(self.len_live() as f64),
+                labels: vec![],
+            },
+        );
+
+        stats
+    }
+
+    /// Inner rebuild logic (uninstrumented).
+    ///
+    /// Rebuilds the dense storage by compacting: removes tombstoned nodes and
+    /// re-assigns internal IDs so the Vec remains dense.
+    fn rebuild_inner(&mut self) -> RebuildStats {
+        let nodes_before = self.nodes.len();
+        let nodes_removed = self.tombstone_count;
+
+        // Track if entry point needs update
+        let entry_point_was_tombstone = self
+            .entry_point
+            .map(|ep| self.is_tombstoned(ep))
+            .unwrap_or(false);
+
+        // Build old_to_new mapping: compact non-tombstoned nodes
+        let mut old_to_new: Vec<Option<usize>> = vec![None; self.nodes.len()];
+        let mut new_nodes: Vec<super::super::node::HnswNode> =
+            Vec::with_capacity(self.nodes.len() - nodes_removed);
+        let mut new_internal_to_id: Vec<NodeId> =
+            Vec::with_capacity(self.nodes.len() - nodes_removed);
+        let mut new_id_to_internal =
+            std::collections::HashMap::with_capacity(self.nodes.len() - nodes_removed);
+
+        let mut new_idx = 0usize;
+        for old_idx in 0..self.nodes.len() {
+            if self.is_tombstoned(old_idx) {
+                // Remove from external mapping
+                let ext_id = self.internal_to_id[old_idx];
+                self.id_to_internal.remove(&ext_id);
+                continue;
+            }
+            old_to_new[old_idx] = Some(new_idx);
+            let ext_id = self.internal_to_id[old_idx];
+            new_id_to_internal.insert(ext_id, new_idx);
+            new_internal_to_id.push(ext_id);
+            new_idx += 1;
+        }
+
+        // Clone nodes and remap neighbor IDs
+        let mut edges_cleaned = 0usize;
+        for old_idx in 0..self.nodes.len() {
+            if self.is_tombstoned(old_idx) {
+                continue;
+            }
+            let mut node = self.nodes[old_idx].clone();
+            for neighbors in &mut node.neighbors {
+                let before = neighbors.len();
+                // Remap internal IDs and remove references to tombstoned nodes
+                *neighbors = neighbors
+                    .iter()
+                    .filter_map(|&old_nid| old_to_new[old_nid])
+                    .collect();
+                edges_cleaned += before - neighbors.len();
+            }
+            new_nodes.push(node);
+        }
+
+        // Update entry point
+        let entry_point_updated = if entry_point_was_tombstone || self.entry_point.is_none() {
+            // Find new entry point among surviving nodes
+            let new_ep = new_nodes
+                .iter()
+                .enumerate()
+                .max_by_key(|(_, n)| n.max_layer)
+                .map(|(idx, _)| idx);
+            self.entry_point = new_ep;
+            entry_point_was_tombstone
+        } else {
+            // Remap existing entry point
+            self.entry_point = self.entry_point.and_then(|old_ep| old_to_new[old_ep]);
+            false
+        };
+
+        // Update max_level
+        self.max_level = new_nodes.iter().map(|n| n.max_layer).max().unwrap_or(0);
+
+        // Swap in compacted state
+        self.nodes = new_nodes;
+        self.id_to_internal = new_id_to_internal;
+        self.internal_to_id = new_internal_to_id;
+        self.tombstones.clear();
+        self.tombstone_count = 0;
+        self.additions_since_rebuild = 0;
+
+        // Rebuild quantized arena from compacted nodes
+        self.quantized.clear();
+        for node in &self.nodes {
+            self.quantized.push(&node.vector, node.norm);
+        }
+
+        RebuildStats {
+            nodes_before,
+            nodes_removed,
+            nodes_after: self.nodes.len(),
+            edges_cleaned,
+            entry_point_updated,
+        }
+    }
+
+    /// Clear all data from the index.
+    pub fn clear(&mut self) {
+        self.nodes.clear();
+        self.id_to_internal.clear();
+        self.internal_to_id.clear();
+        self.tombstones.clear();
+        self.tombstone_count = 0;
+        self.quantized.clear();
+        self.entry_point = None;
+        self.max_level = 0;
+        self.additions_since_rebuild = 0;
+    }
+
+    /// Update entry point to the node with the highest max_layer.
+    #[allow(dead_code)]
+    pub(super) fn update_entry_point(&mut self) {
+        let new_entry = self
+            .nodes
+            .iter()
+            .enumerate()
+            .filter(|(idx, _)| !self.is_tombstoned(*idx))
+            .max_by_key(|(_, n)| n.max_layer)
+            .map(|(idx, _)| idx);
+
+        self.entry_point = new_entry;
+        self.max_level = self
+            .nodes
+            .iter()
+            .enumerate()
+            .filter(|(idx, _)| !self.is_tombstoned(*idx))
+            .map(|(_, n)| n.max_layer)
+            .max()
+            .unwrap_or(0);
+    }
+}
diff --git a/crates/khive-hnsw/src/index/search.rs b/crates/khive-hnsw/src/index/search.rs
new file mode 100644
index 00000000..42c1c78a
--- /dev/null
+++ b/crates/khive-hnsw/src/index/search.rs
@@ -0,0 +1,836 @@
+//! Search operations for HNSW index.
+
+use khive_score::DeterministicScore;
+use crate::NodeId;
+
+use super::HnswIndex;
+use crate::error::{Result, RetrievalError};
+use crate::config::DistanceMetric;
+use crate::distance::{cosine_distance_from_parts, distance_to_similarity, OrderedF32};
+use crate::search_context::HnswSearchContext;
+use crate::metrics::{self, MetricEvent, MetricValue};
+
+/// Index size below which exact linear scan beats graph traversal for Cosine/Dot.
+/// Empirically measured crossover for dim=384, ef_search=80: HNSW graph ≈ exact scan at ~4K nodes.
+/// Using 3K keeps us firmly in the "exact wins" zone while leaving 5K+ to the graph.
+const EXACT_SCAN_THRESHOLD: usize = 3_000;
+
+// ---------------------------------------------------------------------------
+// Inlined distance function type
+// ---------------------------------------------------------------------------
+
+/// Distance function signature: (query, query_norm, vector, vector_norm) -> distance.
+///
+/// Resolved once per search from `DistanceMetric` so the inner loop avoids a
+/// `match` dispatch per neighbor. The compiler can inline the concrete SIMD
+/// kernel through the function pointer on most targets.
+type DistanceFn = fn(&[f32], f32, &[f32], f32) -> f32;
+
+/// Resolve the metric enum to a concrete distance function pointer.
+///
+/// This is called once at the top of `search_layer_inner_ctx` so the hot loop
+/// uses a direct call instead of branching on `DistanceMetric` per neighbor.
+#[inline]
+fn resolve_distance_fn(metric: DistanceMetric) -> DistanceFn {
+    match metric {
+        DistanceMetric::Cosine => |a, a_norm, b, b_norm| {
+            let dot = lattice_embed::simd::dot_product(a, b);
+            cosine_distance_from_parts(dot, a_norm, b_norm)
+        },
+        DistanceMetric::Dot => |a, _a_norm, b, _b_norm| -lattice_embed::simd::dot_product(a, b),
+        DistanceMetric::L2 => {
+            |a, _a_norm, b, _b_norm| lattice_embed::simd::squared_euclidean_distance(a, b)
+        }
+        // Fall back to cosine for future variants.
+        _ => |a, a_norm, b, b_norm| {
+            let dot = lattice_embed::simd::dot_product(a, b);
+            cosine_distance_from_parts(dot, a_norm, b_norm)
+        },
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Batch-4 distance helpers (query-vs-4-candidates HNSW fast path)
+// ---------------------------------------------------------------------------
+
+/// Check if a cached vector norm (the sqrt norm stored in HnswNode) is ≈ 1.0.
+///
+/// HNSW stores sqrt norms, so the squared norm is `norm²`. We apply the same
+/// 1e-4 threshold as `is_unit_norm` in `foundation/embed/src/simd/tier.rs`.
+#[inline]
+fn cached_norm_is_unit(norm: f32) -> bool {
+    norm.is_finite() && ((norm * norm) - 1.0).abs() < 1e-4
+}
+
+/// Convert four dot products to HNSW distances using cached candidate norms.
+///
+/// For Cosine with unit query + unit candidate: `1 - dot.clamp(-1, 1)` (no sqrt/divide).
+/// For Cosine otherwise: `cosine_distance_from_parts` (full formula).
+/// For Dot: negate the dot (HNSW uses minimum-distance ordering).
+#[inline]
+fn hnsw_distance_batch4_from_dots(
+    metric: DistanceMetric,
+    dots: [f32; 4],
+    query_norm: f32,
+    query_is_unit: bool,
+    norms: [f32; 4],
+) -> [f32; 4] {
+    match metric {
+        DistanceMetric::Cosine => {
+            let mut out = [0.0f32; 4];
+            for j in 0..4 {
+                out[j] = if query_is_unit && cached_norm_is_unit(norms[j]) {
+                    1.0 - dots[j].clamp(-1.0, 1.0)
+                } else {
+                    cosine_distance_from_parts(dots[j], query_norm, norms[j])
+                };
+            }
+            out
+        }
+        DistanceMetric::Dot => [-dots[0], -dots[1], -dots[2], -dots[3]],
+        _ => unreachable!("hnsw_distance_batch4_from_dots: unexpected metric"),
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Software prefetch helpers
+// ---------------------------------------------------------------------------
+
+/// Prefetch a memory region into L1 data cache (temporal, keep in cache).
+///
+/// On aarch64 this emits `PRFM PLDL1KEEP`; on x86_64 it emits `PREFETCHT0`.
+/// On other architectures the call is a no-op.
+///
+/// # Safety
+///
+/// The pointer does not need to be valid or aligned -- hardware prefetch
+/// instructions are advisory and silently ignore bad addresses (including
+/// null). The unsafe block is required only because we use inline asm /
+/// intrinsics.
+#[inline(always)]
+fn prefetch_read_data(ptr: *const f32) {
+    #[cfg(target_arch = "aarch64")]
+    {
+        // PRFM PLDL1KEEP: prefetch for load, L1 data cache, temporal
+        // SAFETY: `PRFM` is an advisory hint instruction — the hardware silently
+        // ignores invalid or unmapped addresses. No validity guarantee on `ptr`
+        // is required; the instruction cannot fault. The `nostack` and
+        // `preserves_flags` options ensure the asm does not clobber the stack
+        // pointer or NZCV flags.
+        unsafe {
+            core::arch::asm!(
+                "prfm pldl1keep, [{x}]",
+                x = in(reg) ptr,
+                options(nostack, preserves_flags)
+            );
+        }
+    }
+    #[cfg(target_arch = "x86_64")]
+    {
+        // SAFETY: `_mm_prefetch` is a prefetch hint — the CPU silently ignores
+        // invalid or unmapped addresses and the instruction cannot fault.
+        // No alignment or validity requirement on `ptr` is imposed by the x86
+        // ISA for prefetch instructions.
+        unsafe {
+            core::arch::x86_64::_mm_prefetch(ptr as *const i8, core::arch::x86_64::_MM_HINT_T0);
+        }
+    }
+    #[cfg(not(any(target_arch = "aarch64", target_arch = "x86_64")))]
+    {
+        let _ = ptr;
+    }
+}
+
+impl HnswIndex {
+    /// Search for k nearest neighbors.
+    ///
+    /// Returns results sorted by descending score (most similar first).
+    /// Tombstoned nodes are automatically filtered.
+    ///
+    /// Emits `hnsw.search.duration_ms`, `hnsw.search.count`, and
+    /// `hnsw.search.results` metrics when a sink is attached.
+    ///
+    /// **PROOF CORRESPONDENCE**: Lion.Retrieval.HNSW.search_complexity_log
+    /// Search complexity is O(ef * log_M(N)) where:
+    /// - ef is the search expansion factor
+    /// - M is the number of neighbors per node
+    /// - N is the total number of nodes
+    pub fn search(&self, query: &[f32], k: usize) -> Result<Vec<(NodeId, DeterministicScore)>> {
+        let start = std::time::Instant::now();
+
+        let result = self.search_inner(query, k);
+
+        // Emit metrics
+        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_SEARCH_DURATION_MS,
+                value: MetricValue::Histogram(elapsed),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_SEARCH_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+        if let Ok(ref results) = result {
+            metrics::emit(
+                &self.metrics,
+                MetricEvent {
+                    name: metrics::names::HNSW_SEARCH_RESULTS,
+                    value: MetricValue::Gauge(results.len() as f64),
+                    labels: vec![],
+                },
+            );
+        }
+
+        result
+    }
+
+    /// Search for k nearest neighbors using a pre-allocated search context.
+    ///
+    /// This avoids per-query heap allocation by reusing buffers across searches.
+    /// For maximum throughput in batch/streaming scenarios, create one
+    /// `HnswSearchContext` and pass it to every search call.
+    ///
+    /// Returns results sorted by descending score (most similar first).
+    /// Tombstoned nodes are automatically filtered.
+    ///
+    /// Emits the same metrics as [`search`](Self::search).
+    pub fn search_with_context(
+        &self,
+        query: &[f32],
+        k: usize,
+        ctx: &mut HnswSearchContext,
+    ) -> Result<Vec<(NodeId, DeterministicScore)>> {
+        let start = std::time::Instant::now();
+
+        let result = self.search_inner_with_ctx(query, k, ctx);
+
+        // Emit metrics
+        let elapsed = start.elapsed().as_secs_f64() * 1000.0;
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_SEARCH_DURATION_MS,
+                value: MetricValue::Histogram(elapsed),
+                labels: vec![],
+            },
+        );
+        metrics::emit(
+            &self.metrics,
+            MetricEvent {
+                name: metrics::names::HNSW_SEARCH_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+        if let Ok(ref results) = result {
+            metrics::emit(
+                &self.metrics,
+                MetricEvent {
+                    name: metrics::names::HNSW_SEARCH_RESULTS,
+                    value: MetricValue::Gauge(results.len() as f64),
+                    labels: vec![],
+                },
+            );
+        }
+
+        result
+    }
+
+    /// Inner search logic (uninstrumented), allocating fresh buffers.
+    fn search_inner(&self, query: &[f32], k: usize) -> Result<Vec<(NodeId, DeterministicScore)>> {
+        let ef = self.config.ef_search.max(k);
+        let mut ctx = HnswSearchContext::new(ef);
+        self.search_inner_with_ctx(query, k, &mut ctx)
+    }
+
+    /// Inner search logic using a caller-provided search context.
+    fn search_inner_with_ctx(
+        &self,
+        query: &[f32],
+        k: usize,
+        ctx: &mut HnswSearchContext,
+    ) -> Result<Vec<(NodeId, DeterministicScore)>> {
+        if query.len() != self.config.dimensions {
+            return Err(RetrievalError::DimensionMismatch {
+                expected: self.config.dimensions,
+                actual: query.len(),
+            });
+        }
+
+        if self.nodes.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        // H3: exact scan beats graph traversal for small Cosine/Dot indexes.
+        if self.nodes.len() <= EXACT_SCAN_THRESHOLD
+            && matches!(
+                self.config.metric,
+                DistanceMetric::Cosine | DistanceMetric::Dot
+            )
+        {
+            return self.exact_scan_top_k(query, k);
+        }
+
+        let entry_point = match self.entry_point {
+            Some(ep) => ep,
+            None => return Ok(Vec::new()),
+        };
+
+        // The entry point should always be live because `delete()` calls
+        // `repair_entry_point_after_delete` to maintain this invariant.
+        // The check below is a defensive fallback for indexes that were
+        // constructed before this fix or restored from old snapshots.
+        let effective_entry = if self.is_tombstoned(entry_point) {
+            match self.find_live_neighbor(entry_point) {
+                Some(alt) => alt,
+                None => return Ok(Vec::new()), // All nodes are tombstoned
+            }
+        } else {
+            entry_point
+        };
+
+        self.search_from_entry_with_ctx(query, k, effective_entry, ctx)
+    }
+
+    /// Search from a specific entry point with tombstone filtering, using pre-allocated context.
+    fn search_from_entry_with_ctx(
+        &self,
+        query: &[f32],
+        k: usize,
+        entry_point: usize,
+        ctx: &mut HnswSearchContext,
+    ) -> Result<Vec<(NodeId, DeterministicScore)>> {
+        let query_norm = query.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let (effective_k, effective_ef) = self.compute_overscan(k);
+
+        // Search from top layer
+        let mut current_nearest = vec![entry_point];
+
+        // Traverse upper layers (greedy search with ef=1)
+        for l in (1..=self.max_level).rev() {
+            self.search_layer_inner_ctx(query, query_norm, &current_nearest, 1, l, false, ctx);
+            if !ctx.result_buf.is_empty() {
+                current_nearest = vec![ctx.result_buf[0].1];
+            }
+        }
+
+        // If final entry point is tombstoned after upper-layer traversal,
+        // find a live neighbor. This is rare since the entry point invariant
+        // ensures we start from a live node, but upper-layer greedy search
+        // can land on a tombstoned node if it was tombstoned after insertion.
+        let final_entry = current_nearest[0];
+        if self.is_tombstoned(final_entry) {
+            match self.find_live_neighbor(final_entry) {
+                Some(alt) => current_nearest = vec![alt],
+                None => return Ok(Vec::new()), // All nodes tombstoned
+            }
+        }
+
+        // Search layer 0 with full ef, filtering tombstones.
+        // effective_ef is already scaled up by the tombstone ratio.
+        let ef = effective_ef.max(effective_k);
+        self.search_layer_inner_ctx(query, query_norm, &current_nearest, ef, 0, true, ctx);
+
+        // Convert internal IDs to external NodeId with DeterministicScore.
+        // For L2, the internal search used squared_euclidean_distance for ordering;
+        // recover the true L2 distance before converting to similarity.
+        let is_l2 = self.config.metric == DistanceMetric::L2;
+        let search_results: Vec<(NodeId, DeterministicScore)> = ctx
+            .result_buf
+            .iter()
+            .filter(|(_, iid)| !self.is_tombstoned(*iid))
+            .take(k)
+            .map(|(dist, iid)| {
+                let true_dist = if is_l2 { dist.max(0.0).sqrt() } else { *dist };
+                let similarity = distance_to_similarity(true_dist, self.config.metric);
+                (
+                    self.external_id(*iid),
+                    DeterministicScore::from_f32(similarity),
+                )
+            })
+            .collect();
+
+        Ok(search_results)
+    }
+
+    /// Exact linear scan for small indexes (n <= EXACT_SCAN_THRESHOLD).
+    ///
+    /// Uses the batch-4 SIMD dot kernel for throughput. For Cosine and Dot metrics
+    /// only — the early-exit condition in search_inner_with_ctx enforces this.
+    fn exact_scan_top_k(
+        &self,
+        query: &[f32],
+        k: usize,
+    ) -> Result<Vec<(NodeId, DeterministicScore)>> {
+        if k == 0 {
+            return Ok(Vec::new());
+        }
+
+        let dot4 = lattice_embed::simd::resolved_dot_product_batch4_kernel();
+        let dot1 = lattice_embed::simd::resolved_dot_product_kernel();
+
+        let query_norm = query.iter().map(|x| x * x).sum::<f32>().sqrt();
+        let query_is_unit = cached_norm_is_unit(query_norm);
+        let metric = self.config.metric;
+        let n = self.nodes.len();
+
+        let mut scored: Vec<(usize, f32)> = Vec::with_capacity(n);
+        let mut i = 0usize;
+
+        while i + 4 <= n {
+            let dots = dot4(
+                query,
+                &self.nodes[i].vector,
+                &self.nodes[i + 1].vector,
+                &self.nodes[i + 2].vector,
+                &self.nodes[i + 3].vector,
+            );
+            let norms = [
+                self.nodes[i].norm,
+                self.nodes[i + 1].norm,
+                self.nodes[i + 2].norm,
+                self.nodes[i + 3].norm,
+            ];
+            let dists =
+                hnsw_distance_batch4_from_dots(metric, dots, query_norm, query_is_unit, norms);
+            for j in 0..4 {
+                if !self.is_tombstoned(i + j) {
+                    scored.push((i + j, distance_to_similarity(dists[j], metric)));
+                }
+            }
+            i += 4;
+        }
+        while i < n {
+            if !self.is_tombstoned(i) {
+                let dot = dot1(query, &self.nodes[i].vector);
+                let dist = if query_is_unit && cached_norm_is_unit(self.nodes[i].norm) {
+                    1.0 - dot.clamp(-1.0, 1.0)
+                } else {
+                    match metric {
+                        DistanceMetric::Cosine => {
+                            cosine_distance_from_parts(dot, query_norm, self.nodes[i].norm)
+                        }
+                        DistanceMetric::Dot => -dot,
+                        _ => unreachable!(),
+                    }
+                };
+                scored.push((i, distance_to_similarity(dist, metric)));
+            }
+            i += 1;
+        }
+
+        if scored.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let effective_k = k.min(scored.len());
+        if scored.len() > effective_k {
+            scored.select_nth_unstable_by(effective_k - 1, |(_, a), (_, b)| {
+                b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal)
+            });
+            scored.truncate(effective_k);
+        }
+        scored.sort_by(|(_, a), (_, b)| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
+
+        Ok(scored
+            .into_iter()
+            .map(|(iid, sim)| (self.external_id(iid), DeterministicScore::from_f32(sim)))
+            .collect())
+    }
+
+    /// Find a live (non-tombstoned) node by searching neighbors of the given
+    /// node across all layers. Returns `None` only if every reachable neighbor
+    /// and every node in the index is tombstoned.
+    ///
+    /// Complexity: O(M * L) in the typical case (M = neighbors per layer,
+    /// L = layers). Falls back to O(N) scan only if all neighbors are dead.
+    fn find_live_neighbor(&self, node_id: usize) -> Option<usize> {
+        let node = &self.nodes[node_id];
+        // Check neighbors from highest layer down (higher layers have better
+        // graph coverage, so finding a live neighbor there is preferable).
+        for layer in (0..node.neighbors.len()).rev() {
+            for &neighbor_id in &node.neighbors[layer] {
+                if !self.is_tombstoned(neighbor_id) {
+                    return Some(neighbor_id);
+                }
+            }
+        }
+
+        // Extremely rare fallback: all neighbors tombstoned. O(N) scan.
+        (0..self.nodes.len()).find(|&iid| !self.is_tombstoned(iid))
+    }
+
+    /// Compute overscan factors based on tombstone ratio.
+    ///
+    /// Returns `(effective_k, effective_ef)` where both are scaled up
+    /// proportionally to compensate for tombstoned nodes that will be
+    /// filtered from results.
+    ///
+    /// Previously only `k` was scaled, which had no effect when
+    /// `ef_search >> k` (the common case). Now `ef_search` is also scaled
+    /// so the beam width expands to compensate for dead nodes encountered
+    /// during graph traversal.
+    pub(super) fn compute_overscan(&self, k: usize) -> (usize, usize) {
+        let stats = self.tombstone_stats();
+        if stats.tombstone_count == 0 {
+            return (k, self.config.ef_search);
+        }
+
+        let live_ratio = stats.live_nodes as f64 / stats.total_nodes.max(1) as f64;
+        if live_ratio <= 0.0 {
+            return (k, self.config.ef_search);
+        }
+
+        let inv_live = 1.0 / live_ratio;
+
+        let overscan_k = (k as f64 * inv_live).ceil() as usize;
+        let effective_k = overscan_k.min(k * 4).max(k);
+
+        let overscan_ef = (self.config.ef_search as f64 * inv_live).ceil() as usize;
+        let effective_ef = overscan_ef
+            .min(self.config.ef_search * 4)
+            .max(self.config.ef_search);
+
+        (effective_k, effective_ef)
+    }
+
+    /// Search a single layer for nearest neighbors (allocates fresh buffers).
+    ///
+    /// Used by insert path where a search context is not available.
+    /// Returns (distance, internal_id) pairs.
+    pub(crate) fn search_layer(
+        &self,
+        query: &[f32],
+        query_norm: f32,
+        entry_points: &[usize],
+        ef: usize,
+        layer: usize,
+    ) -> Vec<(f32, usize)> {
+        self.search_layer_inner(query, query_norm, entry_points, ef, layer, false)
+    }
+
+    /// Internal search implementation with tombstone filtering option.
+    /// Allocates fresh buffers -- used by the insert path.
+    pub(super) fn search_layer_inner(
+        &self,
+        query: &[f32],
+        query_norm: f32,
+        entry_points: &[usize],
+        ef: usize,
+        layer: usize,
+        filter_tombstones: bool,
+    ) -> Vec<(f32, usize)> {
+        let mut ctx = HnswSearchContext::new(ef);
+        self.search_layer_inner_ctx(
+            query,
+            query_norm,
+            entry_points,
+            ef,
+            layer,
+            filter_tombstones,
+            &mut ctx,
+        );
+        // Move out of ctx to avoid clone
+        std::mem::take(&mut ctx.result_buf)
+    }
+
+    /// Core search implementation using pre-allocated buffers.
+    ///
+    /// Results are written into `ctx.result_buf`, sorted by distance ascending
+    /// with deterministic tie-breaking by external NodeId.
+    ///
+    /// This is the hot path. Optimizations applied:
+    /// - Pre-allocated visited set with O(1) generation-counter clear
+    /// - O(1) array indexing via dense usize IDs (no HashMap probing)
+    /// - Cached worst-distance to avoid heap peek per neighbor
+    /// - Tombstone check skipped entirely when tombstone set is empty
+    /// - Early termination when candidate distance exceeds worst result
+    /// - Inlined distance dispatch: metric resolved to function pointer once
+    /// - Batch neighbor processing with software prefetch pipelining
+    /// - (Optional) INT8 quantized pre-filter: skip f32 for obviously distant neighbors
+    fn search_layer_inner_ctx(
+        &self,
+        query: &[f32],
+        query_norm: f32,
+        entry_points: &[usize],
+        ef: usize,
+        layer: usize,
+        filter_tombstones: bool,
+        ctx: &mut HnswSearchContext,
+    ) {
+        // Reset buffers without deallocating
+        ctx.clear();
+        ctx.ensure_capacity(ef, self.nodes.len());
+
+        // Pre-compute: skip tombstone checks when there are no tombstones.
+        let check_tombstones = filter_tombstones && self.tombstone_count > 0;
+
+        // Resolve distance function once -- eliminates per-neighbor match dispatch.
+        let distance_fn = resolve_distance_fn(self.config.metric);
+
+        // Resolve batch-4 dot kernel once for Cosine/Dot metrics.
+        let metric = self.config.metric;
+        let use_dot_batch4 = matches!(metric, DistanceMetric::Cosine | DistanceMetric::Dot);
+        let dot4_kernel = lattice_embed::simd::resolved_dot_product_batch4_kernel();
+        let query_is_unit = cached_norm_is_unit(query_norm);
+
+        // ---------------------------------------------------------------
+        // INT8 quantized pre-filter setup (only for Cosine on layer 0)
+        // ---------------------------------------------------------------
+        // The quantized path is only used when:
+        // 1. use_quantized is enabled
+        // 2. We're on layer 0 (densest layer, most distance computations)
+        // 3. The metric is Cosine (the only one we have INT8 distance for)
+        // 4. The quantized arena is populated
+        //
+        // For upper layers (ef=1, greedy), the overhead of quantization is
+        // not worthwhile since we evaluate very few candidates.
+        let use_quant = self.use_quantized
+            && layer == 0
+            && self.config.metric == DistanceMetric::Cosine
+            && !self.quantized.meta.is_empty();
+
+        // Pre-quantize the query vector once if we're using the INT8 path.
+        // This avoids re-quantizing per neighbor.
+        let (query_i8, query_scale) = if use_quant {
+            let mut max_abs: f32 = 0.0;
+            for &v in query {
+                if v.is_finite() {
+                    let abs = v.abs();
+                    if abs > max_abs {
+                        max_abs = abs;
+                    }
+                }
+            }
+            let scale = if max_abs > 1e-10 {
+                127.0 / max_abs
+            } else {
+                1.0
+            };
+            let quantized: Vec<i8> = query
+                .iter()
+                .map(|&v| {
+                    if v.is_finite() {
+                        (v * scale).round().clamp(-127.0, 127.0) as i8
+                    } else {
+                        0i8
+                    }
+                })
+                .collect();
+            (quantized, scale)
+        } else {
+            (Vec::new(), 0.0)
+        };
+
+        // Mark entry points as visited
+        ctx.visited.visit_all(entry_points.iter().copied());
+
+        // Initialize with entry points (always use f32 for entry points)
+        for &ep in entry_points {
+            if check_tombstones && self.is_tombstoned(ep) {
+                continue;
+            }
+            let node = &self.nodes[ep];
+            let dist = distance_fn(query, query_norm, &node.vector, node.norm);
+            ctx.candidates
+                .push(std::cmp::Reverse((OrderedF32(dist), ep)));
+            ctx.results.push((OrderedF32(dist), ep));
+        }
+
+        // Track the worst distance in the result set to avoid heap peek per neighbor.
+        let mut worst_dist = ctx
+            .results
+            .peek()
+            .map(|(OrderedF32(d), _)| *d)
+            .unwrap_or(f32::MAX);
+
+        // Scratch buffer for batching neighbor processing.
+        // Each entry: (internal_id, vector_ptr, vector_len, norm).
+        let mut batch: Vec<(usize, *const f32, usize, f32)> = Vec::with_capacity(32);
+
+        while let Some(std::cmp::Reverse((OrderedF32(c_dist), c_id))) = ctx.candidates.pop() {
+            // Early termination: if the closest candidate is worse than the
+            // worst result and we have enough results, we're done.
+            if c_dist > worst_dist && ctx.results.len() >= ef {
+                break;
+            }
+
+            // Explore neighbors -- direct array index, no HashMap lookup
+            let node = &self.nodes[c_id];
+            if layer < node.neighbors.len() {
+                let neighbors = &node.neighbors[layer];
+
+                // Phase 1: Collect unvisited neighbors.
+                // O(1) visited check via generation counter, O(1) node access via array index.
+                batch.clear();
+                for &neighbor_id in neighbors {
+                    if ctx.visited.visit(neighbor_id) {
+                        if check_tombstones && self.is_tombstoned(neighbor_id) {
+                            continue;
+                        }
+
+                        // INT8 pre-filter: skip neighbors that are clearly worse
+                        // than the current worst result. Uses a 10% margin to
+                        // account for quantization error.
+                        if use_quant && ctx.results.len() >= ef {
+                            let approx_dist = self.quantized.cosine_distance_approx(
+                                neighbor_id,
+                                &query_i8,
+                                query_scale,
+                                query_norm,
+                            );
+                            // Only skip if the approximate distance exceeds the
+                            // worst by more than the quantization margin (10%).
+                            // This ensures we never miss a true nearest neighbor.
+                            if approx_dist > worst_dist * 1.1 + 0.01 {
+                                continue;
+                            }
+                        }
+
+                        let neighbor = &self.nodes[neighbor_id];
+                        batch.push((
+                            neighbor_id,
+                            neighbor.vector.as_ptr(),
+                            neighbor.vector.len(),
+                            neighbor.norm,
+                        ));
+                    }
+                }
+
+                // Phase 2: Compute f32 distances with batch-4 SIMD + prefetch pipelining.
+                //
+                // For Cosine/Dot: process 4 candidates at once via the batch-4 dot kernel,
+                // converting raw dots to HNSW distances (with unit-norm shortcut for cosine).
+                // Remainder (< 4) and L2/other metrics use the per-pair distance_fn path.
+                //
+                // Heap updates are applied in original batch order to preserve HNSW recall
+                // and deterministic neighbor ordering.
+                if let Some(&(_, ptr, _, _)) = batch.first() {
+                    prefetch_read_data(ptr);
+                }
+
+                let mut bi = 0;
+
+                // Batch-4 fast path (Cosine / Dot metrics only).
+                if use_dot_batch4 {
+                    while bi + 4 <= batch.len() {
+                        // Prefetch the entry 4 slots ahead to hide memory latency.
+                        if bi + 4 < batch.len() {
+                            prefetch_read_data(batch[bi + 4].1);
+                        }
+
+                        let (id0, p0, l0, n0) = batch[bi];
+                        let (id1, p1, l1, n1) = batch[bi + 1];
+                        let (id2, p2, l2, n2) = batch[bi + 2];
+                        let (id3, p3, l3, n3) = batch[bi + 3];
+
+                        if l0 == query.len()
+                            && l1 == query.len()
+                            && l2 == query.len()
+                            && l3 == query.len()
+                        {
+                            // SAFETY: pointers from live &Vec<f32> within this &self borrow.
+                            let v0 = unsafe { std::slice::from_raw_parts(p0, l0) };
+                            let v1 = unsafe { std::slice::from_raw_parts(p1, l1) };
+                            let v2 = unsafe { std::slice::from_raw_parts(p2, l2) };
+                            let v3 = unsafe { std::slice::from_raw_parts(p3, l3) };
+
+                            let dots = dot4_kernel(query, v0, v1, v2, v3);
+                            let dists = hnsw_distance_batch4_from_dots(
+                                metric,
+                                dots,
+                                query_norm,
+                                query_is_unit,
+                                [n0, n1, n2, n3],
+                            );
+
+                            for (neighbor_id, dist) in [
+                                (id0, dists[0]),
+                                (id1, dists[1]),
+                                (id2, dists[2]),
+                                (id3, dists[3]),
+                            ] {
+                                if !(ctx.results.len() >= ef && dist > worst_dist) {
+                                    ctx.candidates
+                                        .push(std::cmp::Reverse((OrderedF32(dist), neighbor_id)));
+                                    ctx.results.push((OrderedF32(dist), neighbor_id));
+                                    if ctx.results.len() > ef {
+                                        ctx.results.pop();
+                                    }
+                                    if let Some(&(OrderedF32(d), _)) = ctx.results.peek() {
+                                        worst_dist = d;
+                                    }
+                                }
+                            }
+                            bi += 4;
+                            continue;
+                        }
+
+                        // Dimension mismatch — fall through to scalar remainder.
+                        break;
+                    }
+                }
+
+                // Scalar remainder: 0-3 leftover entries after batch-4,
+                // or all entries for L2 / other metrics.
+                while bi < batch.len() {
+                    let (neighbor_id, vec_ptr, vec_len, norm) = batch[bi];
+
+                    if bi + 1 < batch.len() {
+                        let (_, next_ptr, next_len, _) = batch[bi + 1];
+                        prefetch_read_data(next_ptr);
+                        if next_len > 16 {
+                            prefetch_read_data(next_ptr.wrapping_add(16));
+                        }
+                    }
+
+                    // SAFETY: vec_ptr and vec_len come from a live `&Vec<f32>`
+                    // obtained from `self.nodes[neighbor_id]` within this same `&self`
+                    // borrow. The Vec is immutable (&self) and is not mutated between
+                    // Phase 1 (pointer capture) and Phase 2 (dereference).
+                    let neighbor_vec = unsafe { std::slice::from_raw_parts(vec_ptr, vec_len) };
+                    let dist = distance_fn(query, query_norm, neighbor_vec, norm);
+
+                    if !(ctx.results.len() >= ef && dist > worst_dist) {
+                        ctx.candidates
+                            .push(std::cmp::Reverse((OrderedF32(dist), neighbor_id)));
+                        ctx.results.push((OrderedF32(dist), neighbor_id));
+                        if ctx.results.len() > ef {
+                            ctx.results.pop();
+                        }
+                        if let Some(&(OrderedF32(d), _)) = ctx.results.peek() {
+                            worst_dist = d;
+                        }
+                    }
+                    bi += 1;
+                }
+            }
+        }
+
+        // Drain results into the scratch buffer and sort.
+        // Tie-break by external NodeId for deterministic ordering.
+        ctx.result_buf.clear();
+        ctx.result_buf
+            .extend(ctx.results.drain().map(|(d, iid)| (d.0, iid)));
+        ctx.result_buf
+            .sort_by(|a, b| match OrderedF32(a.0).cmp(&OrderedF32(b.0)) {
+                std::cmp::Ordering::Equal => self.external_id(a.1).cmp(&self.external_id(b.1)),
+                other => other,
+            });
+    }
+}
+
+// NOTE ON SORTED NEIGHBORS: Sorting neighbor lists by distance was evaluated. It adds
+// O(M log M) work per connection during insert. With dense Vec-based node storage,
+// neighbors are already accessed via O(1) array index. Sorting could improve early
+// termination in the distance computation phase but the benefit is marginal.
+// The `sort_neighbors` method is preserved for future use with post-rebuild batch
+// optimization, but is not called on the insert hot path.
diff --git a/crates/khive-hnsw/src/lib.rs b/crates/khive-hnsw/src/lib.rs
new file mode 100644
index 00000000..841871fd
--- /dev/null
+++ b/crates/khive-hnsw/src/lib.rs
@@ -0,0 +1,131 @@
+//! HNSW (Hierarchical Navigable Small World) vector index.
+//!
+//! High-performance approximate nearest neighbor search with O(log N) complexity.
+//! See ADR-003 for configuration and performance characteristics.
+//!
+//! # Usage
+//!
+//! ```rust,ignore
+//! use khive_hnsw::{HnswConfig, HnswIndex, NodeId};
+//!
+//! // Create index with default config
+//! let config = HnswConfig::default();
+//! let mut index = HnswIndex::new(768);
+//!
+//! // Insert vectors
+//! let id = NodeId::new([1u8; 16]);
+//! index.insert(id, vec![0.1; 768])?;
+//!
+//! // Search for k nearest neighbors
+//! let results = index.search(&query_vector, 10)?;
+//! for (id, score) in results {
+//!     println!("{}: {}", id, score);
+//! }
+//! ```
+//!
+//! # Algorithm
+//!
+//! HNSW builds a multi-layer graph where:
+//! - Higher layers have fewer nodes (exponentially distributed)
+//! - Search starts from top layer and descends
+//! - Each layer uses greedy search to find nearest neighbors
+//!
+//! Reference: Malkov & Yashunin, "Efficient and robust approximate nearest
+//! neighbor search using Hierarchical Navigable Small World graphs" (2018)
+
+pub mod alias;
+pub mod arena;
+pub mod checkpoint;
+mod config;
+mod distance;
+pub mod error;
+mod index;
+pub mod metrics;
+mod node;
+pub(crate) mod search_context;
+mod stats;
+
+#[cfg(test)]
+mod tests;
+
+// Re-export public types
+#[cfg(feature = "checkpoint")]
+pub use checkpoint::{HnswCheckpoint, HnswCheckpointStore};
+pub use checkpoint::{HnswCheckpointConfig, HnswSnapshot};
+pub use config::{DistanceMetric, HnswConfig};
+pub use index::HnswIndex;
+pub use search_context::HnswSearchContext;
+pub use stats::{RebuildStats, TombstoneStats};
+
+/// 128-bit opaque node identifier for HNSW entries.
+///
+/// Serializes as a 32-character lowercase hex string (compatible with the
+/// snapshot format used by `HnswSnapshot`).
+#[derive(Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord)]
+pub struct NodeId([u8; 16]);
+
+impl NodeId {
+    /// Create a `NodeId` from raw bytes.
+    #[inline]
+    pub const fn new(bytes: [u8; 16]) -> Self {
+        Self(bytes)
+    }
+
+    /// Return the raw byte representation.
+    #[inline]
+    pub fn as_bytes(&self) -> &[u8; 16] {
+        &self.0
+    }
+}
+
+impl std::fmt::Debug for NodeId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        write!(f, "NodeId(")?;
+        for b in &self.0 {
+            write!(f, "{b:02x}")?;
+        }
+        write!(f, ")")
+    }
+}
+
+impl std::fmt::Display for NodeId {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        for b in &self.0 {
+            write!(f, "{b:02x}")?;
+        }
+        Ok(())
+    }
+}
+
+impl serde::Serialize for NodeId {
+    fn serialize<S: serde::Serializer>(&self, s: S) -> Result<S::Ok, S::Error> {
+        let mut hex = String::with_capacity(32);
+        for b in &self.0 {
+            hex.push_str(&format!("{b:02x}"));
+        }
+        s.serialize_str(&hex)
+    }
+}
+
+impl<'de> serde::Deserialize<'de> for NodeId {
+    fn deserialize<D: serde::Deserializer<'de>>(d: D) -> Result<Self, D::Error> {
+        let s = String::deserialize(d)?;
+        if s.len() != 32 {
+            return Err(serde::de::Error::custom(format!(
+                "NodeId hex string must be 32 chars, got {}",
+                s.len()
+            )));
+        }
+        let mut bytes = [0u8; 16];
+        for (i, chunk) in s.as_bytes().chunks(2).enumerate() {
+            let hi = char::from(chunk[0])
+                .to_digit(16)
+                .ok_or_else(|| serde::de::Error::custom("invalid hex character"))? as u8;
+            let lo = char::from(chunk[1])
+                .to_digit(16)
+                .ok_or_else(|| serde::de::Error::custom("invalid hex character"))? as u8;
+            bytes[i] = (hi << 4) | lo;
+        }
+        Ok(NodeId(bytes))
+    }
+}
diff --git a/crates/khive-hnsw/src/metrics.rs b/crates/khive-hnsw/src/metrics.rs
new file mode 100644
index 00000000..47468569
--- /dev/null
+++ b/crates/khive-hnsw/src/metrics.rs
@@ -0,0 +1,148 @@
+//! Metrics infrastructure for HNSW observability.
+//!
+//! Consumers attach a `MetricsSink` implementation to an `HnswIndex` to
+//! receive structured telemetry from insert, search, and rebuild operations.
+//!
+//! # Design
+//!
+//! The trait is object-safe (`Arc<dyn MetricsSink>`) so a single sink can be
+//! shared across multiple index instances. The `emit` helper handles the
+//! `None` case (no sink attached) at call sites.
+
+use std::sync::{Arc, Mutex};
+
+// ---------------------------------------------------------------------------
+// Metric value types
+// ---------------------------------------------------------------------------
+
+/// A single metric value emitted from an HNSW operation.
+#[derive(Debug, Clone, PartialEq)]
+pub enum MetricValue {
+    /// Monotonically increasing counter (e.g., insert count).
+    Counter(u64),
+    /// Point-in-time gauge (e.g., index size).
+    Gauge(f64),
+    /// Distribution observation (e.g., operation duration in ms).
+    Histogram(f64),
+}
+
+/// A single metric event emitted from an HNSW operation.
+#[derive(Debug, Clone)]
+pub struct MetricEvent {
+    /// Metric name (use the constants in [`names`]).
+    pub name: &'static str,
+    /// The metric value.
+    pub value: MetricValue,
+    /// Optional key-value label pairs (e.g., `[("metric", "cosine")]`).
+    pub labels: Vec<(&'static str, String)>,
+}
+
+// ---------------------------------------------------------------------------
+// Sink trait
+// ---------------------------------------------------------------------------
+
+/// Receiver for metric events from HNSW operations.
+///
+/// Implement this trait to bridge HNSW telemetry to your observability stack
+/// (e.g., Prometheus, OpenTelemetry, tracing spans).
+///
+/// # Thread Safety
+///
+/// The trait requires `Send + Sync` so that `Arc<dyn MetricsSink>` can be
+/// shared across threads.
+pub trait MetricsSink: Send + Sync {
+    /// Handle a metric event.
+    fn emit(&self, event: MetricEvent);
+}
+
+// ---------------------------------------------------------------------------
+// Emit helper
+// ---------------------------------------------------------------------------
+
+/// Emit a metric event to the attached sink, if any.
+///
+/// This is the call-site helper used by `HnswIndex` internals. It is a no-op
+/// when `sink` is `None`.
+#[inline]
+pub fn emit(sink: &Option<Arc<dyn MetricsSink>>, event: MetricEvent) {
+    if let Some(s) = sink {
+        s.emit(event);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Metric name constants
+// ---------------------------------------------------------------------------
+
+/// Canonical metric name constants.
+///
+/// Using `&'static str` constants avoids string formatting on the hot path.
+pub mod names {
+    /// Duration of a single insert operation in milliseconds (Histogram).
+    pub const HNSW_INSERT_DURATION_MS: &str = "hnsw.insert.duration_ms";
+    /// Number of insert operations (Counter).
+    pub const HNSW_INSERT_COUNT: &str = "hnsw.insert.count";
+    /// Current live node count after insert (Gauge).
+    pub const HNSW_INDEX_SIZE: &str = "hnsw.index.size";
+
+    /// Duration of a single search operation in milliseconds (Histogram).
+    pub const HNSW_SEARCH_DURATION_MS: &str = "hnsw.search.duration_ms";
+    /// Number of search operations (Counter).
+    pub const HNSW_SEARCH_COUNT: &str = "hnsw.search.count";
+    /// Number of results returned by a search (Gauge).
+    pub const HNSW_SEARCH_RESULTS: &str = "hnsw.search.results";
+
+    /// Duration of a rebuild operation in milliseconds (Histogram).
+    pub const HNSW_REBUILD_DURATION_MS: &str = "hnsw.rebuild.duration_ms";
+    /// Number of rebuild operations (Counter).
+    pub const HNSW_REBUILD_COUNT: &str = "hnsw.rebuild.count";
+    /// Number of nodes removed during a rebuild (Gauge).
+    pub const HNSW_REBUILD_NODES_REMOVED: &str = "hnsw.rebuild.nodes_removed";
+}
+
+// ---------------------------------------------------------------------------
+// Recording sink (test helper)
+// ---------------------------------------------------------------------------
+
+/// A `MetricsSink` that records all events for inspection in tests.
+///
+/// Thread-safe: uses an internal `Mutex`.
+pub struct RecordingSink {
+    events: Mutex<Vec<MetricEvent>>,
+}
+
+impl RecordingSink {
+    /// Create a new, empty recording sink.
+    pub fn new() -> Self {
+        Self {
+            events: Mutex::new(Vec::new()),
+        }
+    }
+
+    /// Return a snapshot of all recorded events.
+    pub fn events(&self) -> Vec<MetricEvent> {
+        self.events.lock().unwrap().clone()
+    }
+
+    /// Clear all recorded events.
+    pub fn clear(&self) {
+        self.events.lock().unwrap().clear();
+    }
+
+    /// Returns `true` if no events have been recorded since the last clear.
+    pub fn is_empty(&self) -> bool {
+        self.events.lock().unwrap().is_empty()
+    }
+}
+
+impl Default for RecordingSink {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+impl MetricsSink for RecordingSink {
+    fn emit(&self, event: MetricEvent) {
+        self.events.lock().unwrap().push(event);
+    }
+}
diff --git a/crates/khive-hnsw/src/node.rs b/crates/khive-hnsw/src/node.rs
new file mode 100644
index 00000000..e1757534
--- /dev/null
+++ b/crates/khive-hnsw/src/node.rs
@@ -0,0 +1,62 @@
+//! Internal HNSW node representation.
+
+/// Internal node in the HNSW graph.
+///
+/// Nodes are stored in a dense `Vec<HnswNode>` indexed by an internal `usize` ID.
+/// The `EmbeddingId` <-> `usize` mapping is maintained by `HnswIndex`.
+/// Neighbor lists use internal `usize` IDs for O(1) array lookups during search.
+#[derive(Debug, Clone)]
+pub(crate) struct HnswNode {
+    /// The vector data.
+    pub vector: Vec<f32>,
+    /// Connections per layer: layer -> list of internal neighbor IDs.
+    pub neighbors: Vec<Vec<usize>>,
+    /// Maximum layer this node exists in.
+    pub max_layer: usize,
+    /// Cached L2 norm for cosine similarity optimization.
+    pub norm: f32,
+}
+
+impl HnswNode {
+    /// Create a new node with computed norm.
+    pub fn new(vector: Vec<f32>, max_layer: usize) -> Self {
+        let norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
+        Self {
+            vector,
+            neighbors: vec![Vec::new(); max_layer + 1],
+            max_layer,
+            norm,
+        }
+    }
+
+    /// Update vector and recompute norm.
+    pub fn update_vector(&mut self, vector: Vec<f32>) {
+        self.norm = vector.iter().map(|x| x * x).sum::<f32>().sqrt();
+        self.vector = vector;
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_node_creation() {
+        let vector = vec![3.0, 4.0]; // norm = 5.0
+        let node = HnswNode::new(vector, 2);
+
+        assert_eq!(node.max_layer, 2);
+        assert!((node.norm - 5.0).abs() < 0.001);
+        assert_eq!(node.neighbors.len(), 3); // layers 0, 1, 2
+    }
+
+    #[test]
+    fn test_node_update_vector() {
+        let mut node = HnswNode::new(vec![1.0, 0.0], 1);
+        assert!((node.norm - 1.0).abs() < 0.001);
+
+        node.update_vector(vec![3.0, 4.0]);
+        assert!((node.norm - 5.0).abs() < 0.001);
+        assert_eq!(node.vector, vec![3.0, 4.0]);
+    }
+}
diff --git a/crates/khive-hnsw/src/search_context.rs b/crates/khive-hnsw/src/search_context.rs
new file mode 100644
index 00000000..f7c13425
--- /dev/null
+++ b/crates/khive-hnsw/src/search_context.rs
@@ -0,0 +1,165 @@
+//! Pre-allocated search buffers for HNSW search.
+//!
+//! Avoids per-query heap allocation of `BinaryHeap`, `HashSet`, and result vectors.
+//! Create one `HnswSearchContext` and reuse it across multiple `search_with_context` calls
+//! for maximum throughput.
+//!
+//! # Performance
+//!
+//! The key optimizations are:
+//! 1. **Buffer reuse**: All data structures are cleared between searches but their
+//!    allocated memory persists, eliminating allocator pressure.
+//! 2. **Generation-counter visited set**: Uses a dense `Vec<u64>` indexed directly by
+//!    internal node ID. `clear()` is O(1) (just increment generation counter).
+//!    `visit()` and `is_visited()` are O(1) array lookups with no hashing.
+
+use std::collections::BinaryHeap;
+
+use crate::distance::OrderedF32;
+
+/// O(1) visited set using generation counter and dense array.
+///
+/// Each node slot stores the generation number when it was last visited.
+/// To "clear" the set, we just increment the generation counter -- O(1).
+/// A node is visited iff `markers[id] == generation`.
+///
+/// This replaces `HashSet<EmbeddingId>` which required O(capacity) clear
+/// and O(1) amortized insert with hash computation overhead per operation.
+pub(crate) struct VisitedSet {
+    /// Current generation number. Incremented on each `clear()`.
+    generation: u64,
+    /// Dense array indexed by internal node ID.
+    /// `markers[id] == generation` means node `id` has been visited.
+    markers: Vec<u64>,
+}
+
+impl VisitedSet {
+    /// Create a new visited set with the given capacity hint.
+    pub fn new(capacity: usize) -> Self {
+        Self {
+            generation: 1, // Start at 1 so default 0 values are "not visited"
+            markers: vec![0u64; capacity],
+        }
+    }
+
+    /// Clear the visited set in O(1) by incrementing the generation counter.
+    ///
+    /// On the extremely rare wrap-around (every 2^64 clears), we zero the
+    /// markers array to prevent false positives.
+    #[inline]
+    pub fn clear(&mut self) {
+        self.generation = self.generation.wrapping_add(1);
+        if self.generation == 0 {
+            // Wrapped around -- reset markers to avoid false positives
+            self.markers.fill(0);
+            self.generation = 1;
+        }
+    }
+
+    /// Ensure the set can accommodate node IDs up to `max_id` (inclusive).
+    #[inline]
+    pub fn ensure_capacity(&mut self, max_id: usize) {
+        if max_id >= self.markers.len() {
+            self.markers.resize(max_id + 1, 0);
+        }
+    }
+
+    /// Mark a node as visited. Returns `true` if the node was NOT previously visited
+    /// (i.e., this is the first visit), matching `HashSet::insert` semantics.
+    #[inline]
+    pub fn visit(&mut self, id: usize) -> bool {
+        if id >= self.markers.len() {
+            self.markers.resize(id + 1, 0);
+        }
+        if self.markers[id] == self.generation {
+            false // already visited
+        } else {
+            self.markers[id] = self.generation;
+            true // newly visited
+        }
+    }
+
+    /// Mark multiple nodes as visited.
+    #[inline]
+    pub fn visit_all(&mut self, ids: impl Iterator<Item = usize>) {
+        for id in ids {
+            self.visit(id);
+        }
+    }
+}
+
+/// Pre-allocated search context for HNSW queries.
+///
+/// Reuse across multiple `search_with_context` calls to amortize allocation cost.
+/// The context holds the working buffers for the greedy beam search:
+///
+/// - `candidates`: min-heap of nodes to explore (closest first) -- uses internal usize IDs
+/// - `results`: max-heap of best results so far (furthest first, for pruning) -- uses internal usize IDs
+/// - `visited`: generation-counter visited set indexed by internal usize ID
+/// - `result_buf`: scratch buffer for final sorted output (internal usize IDs)
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use khive_retrieval::hnsw::{HnswIndex, HnswSearchContext};
+///
+/// let index = HnswIndex::new(128);
+/// // ... insert vectors ...
+///
+/// let mut ctx = HnswSearchContext::new(index.config().ef_search);
+///
+/// // Reuse ctx across many searches
+/// for query in queries {
+///     let results = index.search_with_context(&query, 10, &mut ctx)?;
+///     // process results...
+/// }
+/// ```
+pub struct HnswSearchContext {
+    /// Min-heap: candidates to explore (closest first). Uses internal usize IDs.
+    pub(crate) candidates: BinaryHeap<std::cmp::Reverse<(OrderedF32, usize)>>,
+    /// Max-heap: best results so far (furthest first, for pruning). Uses internal usize IDs.
+    pub(crate) results: BinaryHeap<(OrderedF32, usize)>,
+    /// Visited node tracking with O(1) operations.
+    pub(crate) visited: VisitedSet,
+    /// Scratch buffer for final sorted results (internal usize IDs).
+    pub(crate) result_buf: Vec<(f32, usize)>,
+    /// Pre-allocated capacity hint (ef value used to size buffers).
+    ef_hint: usize,
+}
+
+impl HnswSearchContext {
+    /// Create a new search context pre-allocated for the given `ef` value.
+    ///
+    /// The `ef` parameter should match or exceed the `ef_search` config value
+    /// of the index you plan to search.
+    pub fn new(ef: usize) -> Self {
+        Self {
+            candidates: BinaryHeap::with_capacity(ef),
+            results: BinaryHeap::with_capacity(ef),
+            visited: VisitedSet::new(ef * 4), // Over-allocate to reduce resizes
+            result_buf: Vec::with_capacity(ef),
+            ef_hint: ef,
+        }
+    }
+
+    /// Clear all buffers without deallocating.
+    ///
+    /// Called automatically at the start of each search. You do not need to
+    /// call this manually.
+    pub(crate) fn clear(&mut self) {
+        self.candidates.clear();
+        self.results.clear();
+        self.visited.clear(); // O(1) generation increment
+        self.result_buf.clear();
+    }
+
+    /// Ensure all buffers are large enough for the given `ef` and node count.
+    pub(crate) fn ensure_capacity(&mut self, ef: usize, num_nodes: usize) {
+        if ef > self.ef_hint {
+            self.result_buf
+                .reserve(ef.saturating_sub(self.result_buf.capacity()));
+            self.ef_hint = ef;
+        }
+        self.visited.ensure_capacity(num_nodes);
+    }
+}
diff --git a/crates/khive-hnsw/src/stats.rs b/crates/khive-hnsw/src/stats.rs
new file mode 100644
index 00000000..acd0df36
--- /dev/null
+++ b/crates/khive-hnsw/src/stats.rs
@@ -0,0 +1,79 @@
+//! HNSW index statistics types.
+
+use super::config::DEFAULT_REBUILD_THRESHOLD;
+
+/// Statistics about tombstoned nodes in an HNSW index.
+#[derive(Debug, Clone, Copy)]
+pub struct TombstoneStats {
+    /// Total number of nodes in the graph.
+    pub total_nodes: usize,
+    /// Number of tombstoned nodes.
+    pub tombstone_count: usize,
+    /// Number of live (non-tombstoned) nodes.
+    pub live_nodes: usize,
+    /// Ratio of tombstoned to total nodes (0.0 - 1.0).
+    pub ratio: f64,
+}
+
+impl TombstoneStats {
+    /// Check if rebuild is needed based on default threshold.
+    pub fn needs_rebuild(&self) -> bool {
+        self.needs_rebuild_at(DEFAULT_REBUILD_THRESHOLD)
+    }
+
+    /// Check if rebuild is needed at a specific threshold.
+    pub fn needs_rebuild_at(&self, threshold: f64) -> bool {
+        self.ratio > threshold
+    }
+}
+
+/// Statistics returned from a rebuild operation.
+#[derive(Debug, Clone, Copy)]
+pub struct RebuildStats {
+    /// Number of nodes before rebuild.
+    pub nodes_before: usize,
+    /// Number of nodes removed (tombstones).
+    pub nodes_removed: usize,
+    /// Number of nodes after rebuild.
+    pub nodes_after: usize,
+    /// Number of neighbor references cleaned up.
+    pub edges_cleaned: usize,
+    /// Whether entry point was updated.
+    pub entry_point_updated: bool,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_tombstone_stats_needs_rebuild() {
+        let stats = TombstoneStats {
+            total_nodes: 100,
+            tombstone_count: 10,
+            live_nodes: 90,
+            ratio: 0.10,
+        };
+        assert!(!stats.needs_rebuild()); // 10% == 10% threshold (not strictly greater)
+
+        let stats = TombstoneStats {
+            total_nodes: 100,
+            tombstone_count: 20,
+            live_nodes: 80,
+            ratio: 0.20,
+        };
+        assert!(stats.needs_rebuild()); // 20% > 10% threshold (ADR-003)
+    }
+
+    #[test]
+    fn test_tombstone_stats_custom_threshold() {
+        let stats = TombstoneStats {
+            total_nodes: 100,
+            tombstone_count: 10,
+            live_nodes: 90,
+            ratio: 0.10,
+        };
+        assert!(stats.needs_rebuild_at(0.05)); // 10% > 5%
+        assert!(!stats.needs_rebuild_at(0.15)); // 10% < 15%
+    }
+}
diff --git a/crates/khive-hnsw/src/tests.rs b/crates/khive-hnsw/src/tests.rs
new file mode 100644
index 00000000..caeb5fa1
--- /dev/null
+++ b/crates/khive-hnsw/src/tests.rs
@@ -0,0 +1,1977 @@
+//! Tests for HNSW index.
+
+#[cfg(test)]
+mod unit_tests {
+    use crate::{DistanceMetric, HnswConfig, HnswIndex};
+    use khive_score::DeterministicScore;
+    use crate::NodeId;
+
+    use std::collections::HashSet;
+
+    fn make_id(seed: u8) -> NodeId {
+        NodeId::new([seed; 16])
+    }
+
+    fn generate_random_vector(dim: usize, seed: u64) -> Vec<f32> {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+
+        (0..dim)
+            .map(|i| {
+                let mut hasher = DefaultHasher::new();
+                (seed, i).hash(&mut hasher);
+                (hasher.finish() as f32 / u64::MAX as f32) * 2.0 - 1.0
+            })
+            .collect()
+    }
+
+    #[test]
+    fn test_insert_and_search() {
+        let mut index = HnswIndex::new(3);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        let id3 = make_id(3);
+
+        index.insert(id1, vec![1.0, 0.0, 0.0]).expect("insert id1");
+        index.insert(id2, vec![0.9, 0.1, 0.0]).expect("insert id2");
+        index.insert(id3, vec![0.0, 1.0, 0.0]).expect("insert id3");
+
+        assert_eq!(index.len(), 3);
+
+        // Search for vector similar to [1.0, 0.0, 0.0]
+        let results = index.search(&[1.0, 0.0, 0.0], 2).expect("search");
+
+        assert_eq!(results.len(), 2);
+        // First result should be id1 (exact match)
+        assert_eq!(results[0].0, id1);
+        assert!(results[0].1.to_f64() > 0.99);
+    }
+
+    #[test]
+    fn test_update_existing() {
+        let mut index = HnswIndex::new(3);
+        let id = make_id(1);
+
+        index.insert(id, vec![1.0, 0.0, 0.0]).expect("insert");
+        index.insert(id, vec![0.0, 1.0, 0.0]).expect("update");
+
+        // Should still be 1 vector
+        assert_eq!(index.len(), 1);
+
+        // Search should find the updated vector
+        let results = index.search(&[0.0, 1.0, 0.0], 1).expect("search");
+        assert_eq!(results[0].0, id);
+        assert!(results[0].1.to_f64() > 0.99);
+    }
+
+    #[test]
+    fn test_delete_tombstone() {
+        let mut index = HnswIndex::new(3);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+
+        index.insert(id1, vec![1.0, 0.0, 0.0]).expect("insert id1");
+        index.insert(id2, vec![0.0, 1.0, 0.0]).expect("insert id2");
+
+        // Delete id1
+        assert!(index.delete(id1));
+
+        // Should still have 2 nodes but 1 tombstone
+        assert_eq!(index.len(), 2);
+        assert_eq!(index.tombstone_stats().tombstone_count, 1);
+
+        // Search should not return tombstoned node
+        let results = index.search(&[1.0, 0.0, 0.0], 2).expect("search");
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].0, id2);
+    }
+
+    #[test]
+    fn test_rebuild() {
+        let mut index = HnswIndex::new(3);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        let id3 = make_id(3);
+
+        index.insert(id1, vec![1.0, 0.0, 0.0]).expect("insert");
+        index.insert(id2, vec![0.0, 1.0, 0.0]).expect("insert");
+        index.insert(id3, vec![0.0, 0.0, 1.0]).expect("insert");
+
+        index.delete(id1);
+        index.delete(id2);
+
+        let stats = index.rebuild();
+        assert_eq!(stats.nodes_before, 3);
+        assert_eq!(stats.nodes_removed, 2);
+        assert_eq!(stats.nodes_after, 1);
+
+        // After rebuild, should only have id3
+        assert_eq!(index.len(), 1);
+        assert_eq!(index.tombstone_stats().tombstone_count, 0);
+    }
+
+    #[test]
+    fn test_dimension_mismatch() {
+        let mut index = HnswIndex::new(3);
+        let id = make_id(1);
+
+        // Wrong dimension should error
+        let result = index.insert(id, vec![1.0, 0.0]);
+        assert!(result.is_err());
+
+        // Insert correct dimension
+        index.insert(id, vec![1.0, 0.0, 0.0]).expect("insert");
+
+        // Search with wrong dimension should error
+        let result = index.search(&[1.0, 0.0], 1);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_empty_search() {
+        let index = HnswIndex::new(3);
+        let results = index.search(&[1.0, 0.0, 0.0], 10).expect("search empty");
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_dot_product_metric() {
+        let mut config = HnswConfig::with_dimensions(2);
+        config.metric = DistanceMetric::Dot;
+        let mut index = HnswIndex::with_config(config);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+
+        index.insert(id1, vec![1.0, 0.0]).expect("insert id1");
+        index.insert(id2, vec![2.0, 0.0]).expect("insert id2");
+
+        // For dot product, [2,0] . [1,0] = 2 > [1,0] . [1,0] = 1
+        let results = index.search(&[1.0, 0.0], 2).expect("search");
+        assert_eq!(results[0].0, id2);
+    }
+
+    #[test]
+    fn test_euclidean_metric() {
+        let mut config = HnswConfig::with_dimensions(2);
+        config.metric = DistanceMetric::L2;
+        let mut index = HnswIndex::with_config(config);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+
+        index.insert(id1, vec![1.0, 0.0]).expect("insert id1");
+        index.insert(id2, vec![10.0, 0.0]).expect("insert id2");
+
+        // Euclidean: closer = higher score
+        let results = index.search(&[0.0, 0.0], 2).expect("search");
+        assert_eq!(results[0].0, id1); // Closer to origin
+    }
+
+    #[test]
+    fn test_larger_index() {
+        let mut index = HnswIndex::new(128);
+
+        let n = 500;
+        let mut ids = Vec::new();
+        for i in 0..n {
+            let id = NodeId::new([
+                (i >> 8) as u8,
+                (i & 0xff) as u8,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]);
+            let vector = generate_random_vector(128, i as u64);
+            index.insert(id, vector).expect("insert");
+            ids.push(id);
+        }
+
+        assert_eq!(index.len(), n);
+
+        // Search should return k results
+        let query = generate_random_vector(128, 50);
+        let results = index.search(&query, 10).expect("search");
+        assert_eq!(results.len(), 10);
+
+        // Scores should be sorted descending
+        for window in results.windows(2) {
+            assert!(window[0].1 >= window[1].1);
+        }
+    }
+
+    #[test]
+    fn test_recall() {
+        let mut index = HnswIndex::new(64);
+
+        let n = 500;
+        let mut vectors: Vec<(NodeId, Vec<f32>)> = Vec::new();
+        for i in 0..n {
+            let id = NodeId::new([
+                (i >> 8) as u8,
+                (i & 0xff) as u8,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]);
+            let vector = generate_random_vector(64, i as u64);
+            index.insert(id, vector.clone()).expect("insert");
+            vectors.push((id, vector));
+        }
+
+        let k = 10;
+        let mut total_recall = 0.0;
+        let num_queries = 10;
+
+        for q in 0..num_queries {
+            let (query_id, query) = &vectors[q * 50];
+
+            // Brute force ground truth
+            let mut ground_truth: Vec<(f32, NodeId)> = vectors
+                .iter()
+                .map(|(id, v)| {
+                    let dot: f32 = query.iter().zip(v).map(|(a, b)| a * b).sum();
+                    let q_norm: f32 = query.iter().map(|x| x * x).sum::<f32>().sqrt();
+                    let v_norm: f32 = v.iter().map(|x| x * x).sum::<f32>().sqrt();
+                    let sim = if q_norm > 0.0 && v_norm > 0.0 {
+                        dot / (q_norm * v_norm)
+                    } else {
+                        0.0
+                    };
+                    (1.0 - sim, *id)
+                })
+                .collect();
+            ground_truth.sort_by(|a, b| a.0.total_cmp(&b.0));
+            let truth_ids: HashSet<NodeId> =
+                ground_truth.iter().take(k).map(|(_, id)| *id).collect();
+
+            // HNSW search
+            let results = index.search(query, k).expect("search");
+            let result_ids: HashSet<NodeId> = results.iter().map(|(id, _)| *id).collect();
+
+            let recall =
+                truth_ids.intersection(&result_ids).count() as f32 / truth_ids.len() as f32;
+            total_recall += recall;
+
+            // First result should be the query itself
+            assert_eq!(
+                results[0].0, *query_id,
+                "Query {q} should return itself as first result"
+            );
+        }
+
+        let avg_recall = total_recall / num_queries as f32;
+        assert!(
+            avg_recall > 0.8,
+            "Average recall {avg_recall:.2} should be > 0.8"
+        );
+    }
+
+    #[test]
+    fn test_config_variants() {
+        // Test each config variant builds successfully
+        for config in [
+            HnswConfig::default(),
+            HnswConfig::high_recall(),
+            HnswConfig::fast_build(),
+            HnswConfig::low_memory(),
+        ] {
+            let mut index = HnswIndex::with_config(HnswConfig {
+                dimensions: 32,
+                ..config
+            });
+
+            for i in 0..100 {
+                let id = NodeId::new([i as u8; 16]);
+                let vector = generate_random_vector(32, i as u64);
+                index.insert(id, vector).expect("insert");
+            }
+            assert_eq!(index.len(), 100);
+        }
+    }
+
+    #[test]
+    fn test_tombstone_stats() {
+        let mut index = HnswIndex::new(3);
+
+        for i in 0..10 {
+            let id = make_id(i);
+            index.insert(id, vec![i as f32, 0.0, 0.0]).expect("insert");
+        }
+
+        // Tombstone 3 nodes
+        for i in 0..3 {
+            index.delete(make_id(i));
+        }
+
+        let stats = index.tombstone_stats();
+        assert_eq!(stats.total_nodes, 10);
+        assert_eq!(stats.tombstone_count, 3);
+        assert_eq!(stats.live_nodes, 7);
+        assert!((stats.ratio - 0.3).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_needs_rebuild_threshold() {
+        let mut config = HnswConfig::with_dimensions(3);
+        config.rebuild_threshold = 0.2;
+        let mut index = HnswIndex::with_config(config);
+
+        for i in 0..10 {
+            let id = make_id(i);
+            index.insert(id, vec![i as f32, 0.0, 0.0]).expect("insert");
+        }
+
+        // 1 tombstone = 10%, under 20% threshold
+        index.delete(make_id(0));
+        assert!(!index.needs_rebuild());
+
+        // 3 tombstones = 30%, over 20% threshold
+        index.delete(make_id(1));
+        index.delete(make_id(2));
+        assert!(index.needs_rebuild());
+    }
+
+    #[test]
+    fn test_deterministic_score_output() {
+        let mut index = HnswIndex::new(3);
+
+        let id = make_id(1);
+        index.insert(id, vec![1.0, 0.0, 0.0]).expect("insert");
+
+        let results = index.search(&[1.0, 0.0, 0.0], 1).expect("search");
+
+        // Score should be DeterministicScore, not f32
+        let score = results[0].1;
+        assert!(score.to_f64() > 0.99);
+        assert!(score.to_f64() <= 1.0);
+
+        // Scores are comparable
+        assert!(score > DeterministicScore::from_f64(0.5));
+    }
+
+    #[test]
+    fn test_clear() {
+        let mut index = HnswIndex::new(3);
+
+        index
+            .insert(make_id(1), vec![1.0, 0.0, 0.0])
+            .expect("insert");
+        index
+            .insert(make_id(2), vec![0.0, 1.0, 0.0])
+            .expect("insert");
+        index.delete(make_id(1));
+
+        assert_eq!(index.len(), 2);
+        assert_eq!(index.tombstone_stats().tombstone_count, 1);
+
+        index.clear();
+
+        assert_eq!(index.len(), 0);
+        assert_eq!(index.tombstone_stats().tombstone_count, 0);
+        assert!(index.is_empty());
+    }
+
+    #[test]
+    fn test_seeded_rng_reproducibility() {
+        // Two indexes with the same seed should produce identical structure
+        let config1 = HnswConfig::with_dimensions(32).with_seed(42);
+        let config2 = HnswConfig::with_dimensions(32).with_seed(42);
+
+        let mut index1 = HnswIndex::with_config(config1);
+        let mut index2 = HnswIndex::with_config(config2);
+
+        // Insert same vectors in same order
+        for i in 0..50 {
+            let id = NodeId::new([i as u8; 16]);
+            let vector = generate_random_vector(32, i as u64);
+            index1.insert(id, vector.clone()).expect("insert");
+            index2.insert(id, vector).expect("insert");
+        }
+
+        // Search should return identical results
+        let query = generate_random_vector(32, 999);
+        let results1 = index1.search(&query, 10).expect("search");
+        let results2 = index2.search(&query, 10).expect("search");
+
+        assert_eq!(results1.len(), results2.len());
+        for (r1, r2) in results1.iter().zip(results2.iter()) {
+            assert_eq!(r1.0, r2.0, "Same seed should produce identical results");
+            assert_eq!(r1.1, r2.1, "Scores should match exactly");
+        }
+    }
+
+    #[test]
+    fn test_different_seeds_different_structure() {
+        // Two indexes with different seeds should (likely) produce different structures
+        let config1 = HnswConfig::with_dimensions(32).with_seed(42);
+        let config2 = HnswConfig::with_dimensions(32).with_seed(123);
+
+        let mut index1 = HnswIndex::with_config(config1);
+        let mut index2 = HnswIndex::with_config(config2);
+
+        // Insert same vectors in same order
+        for i in 0..100 {
+            let id = NodeId::new([i as u8; 16]);
+            let vector = generate_random_vector(32, i as u64);
+            index1.insert(id, vector.clone()).expect("insert");
+            index2.insert(id, vector).expect("insert");
+        }
+
+        // Get max_level for each - with different seeds, the random levels
+        // assigned to nodes will differ, leading to different max_level values
+        // (statistically likely to differ with 100 insertions)
+        // Note: They might still be the same by chance, but the internal
+        // structure (which nodes are at which level) will differ
+
+        // At minimum, both should be valid indexes
+        assert_eq!(index1.len(), 100);
+        assert_eq!(index2.len(), 100);
+    }
+}
+
+#[cfg(test)]
+mod memory_budget_tests {
+    use crate::error::{ErrorKind, RetrievalError};
+    use crate::{HnswConfig, HnswIndex};
+    use crate::NodeId;
+
+    fn make_id(seed: u8) -> NodeId {
+        NodeId::new([seed; 16])
+    }
+
+    #[test]
+    fn test_no_budget_allows_unlimited_inserts() {
+        // Without a budget, inserts always succeed
+        let mut index = HnswIndex::new(4);
+        for i in 0..50 {
+            let id = make_id(i);
+            index
+                .insert(id, vec![i as f32, 0.0, 0.0, 0.0])
+                .expect("insert should succeed without budget");
+        }
+        assert_eq!(index.len(), 50);
+    }
+
+    #[test]
+    fn test_budget_blocks_insert_when_exceeded() {
+        // Set a very tight budget that allows only a few nodes
+        let config = HnswConfig::with_dimensions(4).with_memory_budget(2_000);
+        let mut index = HnswIndex::with_config(config);
+
+        // First insert should succeed (index starts empty)
+        index
+            .insert(make_id(1), vec![1.0, 0.0, 0.0, 0.0])
+            .expect("first insert should succeed");
+
+        // Keep inserting until we hit the budget
+        let mut rejected = false;
+        for i in 2..=100u8 {
+            let result = index.insert(make_id(i), vec![i as f32, 0.0, 0.0, 0.0]);
+            if result.is_err() {
+                rejected = true;
+                let err = result.unwrap_err();
+                assert!(
+                    matches!(err, RetrievalError::BudgetExceeded { .. }),
+                    "Expected BudgetExceeded, got: {err:?}"
+                );
+                assert_eq!(err.kind(), ErrorKind::Permanent);
+                assert!(!err.is_retryable());
+                break;
+            }
+        }
+        assert!(rejected, "Budget should have rejected an insert");
+    }
+
+    #[test]
+    fn test_budget_update_bypasses_check() {
+        // Set a budget, fill it up, then update an existing entry
+        let config = HnswConfig::with_dimensions(4).with_memory_budget(2_000);
+        let mut index = HnswIndex::with_config(config);
+
+        let id1 = make_id(1);
+        index
+            .insert(id1, vec![1.0, 0.0, 0.0, 0.0])
+            .expect("first insert");
+
+        // Fill until budget hit
+        for i in 2..=100u8 {
+            if index
+                .insert(make_id(i), vec![i as f32, 0.0, 0.0, 0.0])
+                .is_err()
+            {
+                break;
+            }
+        }
+
+        // Updating an existing entry should always succeed (bypass budget)
+        index
+            .insert(id1, vec![9.0, 9.0, 9.0, 9.0])
+            .expect("update existing should bypass budget");
+    }
+
+    #[test]
+    fn test_memory_usage_increases_with_inserts() {
+        let mut index = HnswIndex::new(8);
+
+        let before = index.memory_usage();
+        assert_eq!(before, 0, "Empty index should have zero usage");
+
+        index.insert(make_id(1), vec![1.0; 8]).expect("insert");
+        let after_one = index.memory_usage();
+        assert!(after_one > 0, "Usage should increase after insert");
+
+        index.insert(make_id(2), vec![2.0; 8]).expect("insert");
+        let after_two = index.memory_usage();
+        assert!(
+            after_two > after_one,
+            "Usage should increase with more inserts"
+        );
+    }
+
+    #[test]
+    fn test_estimate_insert_cost_is_positive() {
+        let index = HnswIndex::new(128);
+        let cost = index.estimate_insert_cost();
+        assert!(cost > 0, "Insert cost should be positive");
+        // For 128 dims: 128*4 = 512 bytes for vector alone
+        assert!(cost >= 512, "Cost should include at least the vector data");
+    }
+
+    #[test]
+    fn test_memory_budget_getter_setter() {
+        let mut index = HnswIndex::new(4);
+
+        // Default: no budget
+        assert_eq!(index.memory_budget(), None);
+
+        // Set budget via runtime setter
+        index.set_memory_budget(Some(10_000));
+        assert_eq!(index.memory_budget(), Some(10_000));
+
+        // Clear budget
+        index.set_memory_budget(None);
+        assert_eq!(index.memory_budget(), None);
+    }
+
+    #[test]
+    fn test_budget_from_config() {
+        let config = HnswConfig::with_dimensions(4).with_memory_budget(5_000);
+        let index = HnswIndex::with_config(config);
+        assert_eq!(index.memory_budget(), Some(5_000));
+    }
+
+    #[test]
+    fn test_budget_exceeded_error_details() {
+        let config = HnswConfig::with_dimensions(4).with_memory_budget(1);
+        let mut index = HnswIndex::with_config(config);
+
+        // Budget of 1 byte is too small for any insert
+        let result = index.insert(make_id(1), vec![1.0, 0.0, 0.0, 0.0]);
+        assert!(result.is_err());
+
+        let err = result.unwrap_err();
+        match err {
+            RetrievalError::BudgetExceeded {
+                current_usage,
+                item_size,
+                limit,
+            } => {
+                assert_eq!(current_usage, 0, "Empty index");
+                assert!(item_size > 0, "Item should have non-zero cost");
+                assert_eq!(limit, 1, "Limit should match config");
+                assert!(current_usage + item_size > limit, "Should genuinely exceed");
+            }
+            other => panic!("Expected BudgetExceeded, got: {other:?}"),
+        }
+    }
+
+    #[test]
+    fn test_search_unaffected_by_budget() {
+        // Budget is only checked on insert, never on search
+        let config = HnswConfig::with_dimensions(3).with_memory_budget(100_000);
+        let mut index = HnswIndex::with_config(config);
+
+        index
+            .insert(make_id(1), vec![1.0, 0.0, 0.0])
+            .expect("insert");
+        index
+            .insert(make_id(2), vec![0.0, 1.0, 0.0])
+            .expect("insert");
+
+        // Search should work regardless of budget
+        let results = index.search(&[1.0, 0.0, 0.0], 2).expect("search");
+        assert_eq!(results.len(), 2);
+    }
+}
+
+#[cfg(test)]
+mod proptests {
+    use crate::HnswIndex;
+    use crate::NodeId;
+
+    use proptest::prelude::*;
+
+    const DIM: usize = 32;
+
+    fn seeded_vector(seed: u64) -> Vec<f32> {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+
+        (0..DIM)
+            .map(|i| {
+                let mut hasher = DefaultHasher::new();
+                (seed, i).hash(&mut hasher);
+                (hasher.finish() as f32 / u64::MAX as f32) * 2.0 - 1.0
+            })
+            .collect()
+    }
+
+    proptest! {
+        /// Property: search() returns exactly k results when k <= num_vectors
+        #[test]
+        fn search_returns_k_results(
+            k in 1usize..=10,
+            num_vectors in 10usize..=100
+        ) {
+            prop_assume!(k <= num_vectors);
+
+            let mut index = HnswIndex::new(DIM);
+
+            for i in 0..num_vectors {
+                let id = NodeId::new([
+                    (i >> 8) as u8,
+                    (i & 0xff) as u8,
+                    0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+                ]);
+                let vector = seeded_vector(i as u64);
+                index.insert(id, vector).expect("insert");
+            }
+
+            prop_assert_eq!(index.len(), num_vectors);
+
+            let query = seeded_vector(999);
+            let results = index.search(&query, k).expect("search");
+
+            prop_assert_eq!(
+                results.len(),
+                k,
+                "Expected {} results but got {}",
+                k,
+                results.len()
+            );
+
+            // All scores should be finite (DeterministicScore is i64 fixed-point; check via to_f64).
+            for (_, score) in &results {
+                prop_assert!(
+                    score.to_f64().is_finite(),
+                    "Score should be finite"
+                );
+            }
+
+            // Results sorted descending by score
+            for window in results.windows(2) {
+                prop_assert!(
+                    window[0].1 >= window[1].1,
+                    "Results should be sorted by score descending"
+                );
+            }
+        }
+
+        /// Property: search() returns min(k, num_vectors) when k > num_vectors
+        #[test]
+        fn search_returns_all_when_k_exceeds_count(
+            num_vectors in 1usize..=20,
+            k_excess in 1usize..=30
+        ) {
+            let k = num_vectors + k_excess;
+
+            let mut index = HnswIndex::new(DIM);
+
+            for i in 0..num_vectors {
+                let id = NodeId::new([i as u8; 16]);
+                let vector = seeded_vector(i as u64);
+                index.insert(id, vector).expect("insert");
+            }
+
+            let query = seeded_vector(999);
+            let results = index.search(&query, k).expect("search");
+
+            prop_assert_eq!(
+                results.len(),
+                num_vectors,
+                "Expected {} results (all vectors) but got {}",
+                num_vectors,
+                results.len()
+            );
+        }
+
+        /// Property: empty index returns empty results
+        #[test]
+        fn search_empty_index_returns_empty(k in 1usize..=100) {
+            let index = HnswIndex::new(DIM);
+
+            let query = seeded_vector(0);
+            let results = index.search(&query, k).expect("search");
+
+            prop_assert!(
+                results.is_empty(),
+                "Empty index should return empty results"
+            );
+        }
+    }
+}
+
+#[cfg(test)]
+mod metrics_tests {
+    use crate::HnswIndex;
+    use crate::metrics::{names, MetricValue, RecordingSink};
+    use crate::NodeId;
+
+    use std::sync::Arc;
+
+    fn make_id(seed: u8) -> NodeId {
+        NodeId::new([seed; 16])
+    }
+
+    #[test]
+    fn insert_emits_metrics() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = HnswIndex::new(3).with_metrics(sink.clone());
+
+        index.insert(make_id(1), vec![1.0, 0.0, 0.0]).unwrap();
+
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+
+        assert!(
+            event_names.contains(&names::HNSW_INSERT_DURATION_MS),
+            "Missing insert duration metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_INSERT_COUNT),
+            "Missing insert count metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_INDEX_SIZE),
+            "Missing index size metric"
+        );
+
+        // Index size should be 1 after first insert
+        let size_event = events
+            .iter()
+            .find(|e| e.name == names::HNSW_INDEX_SIZE)
+            .unwrap();
+        assert_eq!(size_event.value, MetricValue::Gauge(1.0));
+    }
+
+    #[test]
+    fn search_emits_metrics() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = HnswIndex::new(3).with_metrics(sink.clone());
+
+        index.insert(make_id(1), vec![1.0, 0.0, 0.0]).unwrap();
+        index.insert(make_id(2), vec![0.0, 1.0, 0.0]).unwrap();
+
+        // Clear insert metrics
+        sink.clear();
+
+        let results = index.search(&[1.0, 0.0, 0.0], 2).unwrap();
+
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+
+        assert!(
+            event_names.contains(&names::HNSW_SEARCH_DURATION_MS),
+            "Missing search duration metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_SEARCH_COUNT),
+            "Missing search count metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_SEARCH_RESULTS),
+            "Missing search results metric"
+        );
+
+        // Results count should match actual results
+        let results_event = events
+            .iter()
+            .find(|e| e.name == names::HNSW_SEARCH_RESULTS)
+            .unwrap();
+        assert_eq!(
+            results_event.value,
+            MetricValue::Gauge(results.len() as f64)
+        );
+    }
+
+    #[test]
+    fn rebuild_emits_metrics() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = HnswIndex::new(3).with_metrics(sink.clone());
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        index.insert(id1, vec![1.0, 0.0, 0.0]).unwrap();
+        index.insert(id2, vec![0.0, 1.0, 0.0]).unwrap();
+
+        // Tombstone one node
+        index.delete(id1);
+
+        // Clear prior metrics
+        sink.clear();
+
+        let stats = index.rebuild();
+
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+
+        assert!(
+            event_names.contains(&names::HNSW_REBUILD_DURATION_MS),
+            "Missing rebuild duration metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_REBUILD_COUNT),
+            "Missing rebuild count metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_REBUILD_NODES_REMOVED),
+            "Missing rebuild nodes_removed metric"
+        );
+        assert!(
+            event_names.contains(&names::HNSW_INDEX_SIZE),
+            "Missing index size metric after rebuild"
+        );
+
+        // nodes_removed should be 1
+        let removed_event = events
+            .iter()
+            .find(|e| e.name == names::HNSW_REBUILD_NODES_REMOVED)
+            .unwrap();
+        assert_eq!(removed_event.value, MetricValue::Gauge(1.0));
+        assert_eq!(stats.nodes_removed, 1);
+    }
+
+    #[test]
+    fn no_metrics_without_sink() {
+        // Ensure no panic when metrics is None (default)
+        let mut index = HnswIndex::new(3);
+        index.insert(make_id(1), vec![1.0, 0.0, 0.0]).unwrap();
+        let _ = index.search(&[1.0, 0.0, 0.0], 1).unwrap();
+        index.rebuild();
+    }
+
+    #[test]
+    fn set_metrics_at_runtime() {
+        let mut index = HnswIndex::new(3);
+        index.insert(make_id(1), vec![1.0, 0.0, 0.0]).unwrap();
+
+        // Attach sink mid-lifecycle
+        let sink = Arc::new(RecordingSink::new());
+        index.set_metrics(Some(sink.clone()));
+
+        index.insert(make_id(2), vec![0.0, 1.0, 0.0]).unwrap();
+
+        // Should have metrics from the second insert only
+        assert!(!sink.is_empty());
+
+        // Detach
+        index.set_metrics(None);
+        sink.clear();
+
+        index.insert(make_id(3), vec![0.0, 0.0, 1.0]).unwrap();
+        assert!(sink.is_empty(), "No events after detaching sink");
+    }
+
+    #[test]
+    fn search_on_empty_index_still_emits() {
+        let sink = Arc::new(RecordingSink::new());
+        let index = HnswIndex::new(3).with_metrics(sink.clone());
+
+        let results = index.search(&[1.0, 0.0, 0.0], 5).unwrap();
+        assert!(results.is_empty());
+
+        // Should still emit duration and count (even for empty index)
+        let events = sink.events();
+        let event_names: Vec<&str> = events.iter().map(|e| e.name).collect();
+        assert!(event_names.contains(&names::HNSW_SEARCH_DURATION_MS));
+        assert!(event_names.contains(&names::HNSW_SEARCH_COUNT));
+    }
+
+    #[test]
+    fn insert_duration_is_nonnegative() {
+        let sink = Arc::new(RecordingSink::new());
+        let mut index = HnswIndex::new(3).with_metrics(sink.clone());
+
+        index.insert(make_id(1), vec![1.0, 0.0, 0.0]).unwrap();
+
+        let duration_event = sink
+            .events()
+            .into_iter()
+            .find(|e| e.name == names::HNSW_INSERT_DURATION_MS)
+            .unwrap();
+
+        match duration_event.value {
+            MetricValue::Histogram(ms) => assert!(ms >= 0.0, "Duration must be >= 0"),
+            other => panic!("Expected Histogram, got {other:?}"),
+        }
+    }
+}
+
+#[cfg(test)]
+mod search_context_tests {
+    use crate::search_context::HnswSearchContext;
+    use crate::{DistanceMetric, HnswConfig, HnswIndex};
+    use crate::NodeId;
+
+    fn make_id(seed: u8) -> NodeId {
+        NodeId::new([seed; 16])
+    }
+
+    fn generate_random_vector(dim: usize, seed: u64) -> Vec<f32> {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+
+        (0..dim)
+            .map(|i| {
+                let mut hasher = DefaultHasher::new();
+                (seed, i).hash(&mut hasher);
+                (hasher.finish() as f32 / u64::MAX as f32) * 2.0 - 1.0
+            })
+            .collect()
+    }
+
+    #[test]
+    fn search_with_context_matches_search() {
+        // Build a non-trivial index
+        let config = HnswConfig::with_dimensions(64).with_seed(42);
+        let mut index = HnswIndex::with_config(config);
+
+        for i in 0..200u16 {
+            let id = NodeId::new([
+                (i >> 8) as u8,
+                (i & 0xff) as u8,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+                0,
+            ]);
+            let vector = generate_random_vector(64, i as u64);
+            index.insert(id, vector).expect("insert");
+        }
+
+        let mut ctx = HnswSearchContext::new(index.config().ef_search);
+
+        // Run multiple queries and verify results are identical
+        for q_seed in [0u64, 50, 100, 999, 12345] {
+            let query = generate_random_vector(64, q_seed);
+
+            let results_normal = index.search(&query, 10).expect("search");
+            let results_ctx = index
+                .search_with_context(&query, 10, &mut ctx)
+                .expect("search_with_context");
+
+            assert_eq!(
+                results_normal.len(),
+                results_ctx.len(),
+                "Result count should match for query seed {q_seed}"
+            );
+            for (i, (r_normal, r_ctx)) in results_normal.iter().zip(results_ctx.iter()).enumerate()
+            {
+                assert_eq!(
+                    r_normal.0, r_ctx.0,
+                    "ID mismatch at position {i} for query seed {q_seed}"
+                );
+                assert_eq!(
+                    r_normal.1, r_ctx.1,
+                    "Score mismatch at position {i} for query seed {q_seed}"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn context_reuse_across_many_searches() {
+        let config = HnswConfig::with_dimensions(32).with_seed(42);
+        let mut index = HnswIndex::with_config(config);
+
+        for i in 0..100u16 {
+            let id = NodeId::new([i as u8; 16]);
+            let vector = generate_random_vector(32, i as u64);
+            index.insert(id, vector).expect("insert");
+        }
+
+        let mut ctx = HnswSearchContext::new(index.config().ef_search);
+
+        // Run 50 searches reusing the same context
+        for q in 0..50u64 {
+            let query = generate_random_vector(32, q * 7);
+            let results = index
+                .search_with_context(&query, 5, &mut ctx)
+                .expect("search_with_context");
+            assert_eq!(results.len(), 5, "Should return 5 results on iteration {q}");
+
+            // Verify sorted descending by score
+            for window in results.windows(2) {
+                assert!(
+                    window[0].1 >= window[1].1,
+                    "Results should be sorted descending"
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn context_works_with_tombstones() {
+        let config = HnswConfig::with_dimensions(3).with_seed(42);
+        let mut index = HnswIndex::with_config(config);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        let id3 = make_id(3);
+
+        index.insert(id1, vec![1.0, 0.0, 0.0]).expect("insert");
+        index.insert(id2, vec![0.9, 0.1, 0.0]).expect("insert");
+        index.insert(id3, vec![0.0, 1.0, 0.0]).expect("insert");
+
+        // Delete id1
+        index.delete(id1);
+
+        let mut ctx = HnswSearchContext::new(index.config().ef_search);
+
+        let results_normal = index.search(&[1.0, 0.0, 0.0], 3).expect("search");
+        let results_ctx = index
+            .search_with_context(&[1.0, 0.0, 0.0], 3, &mut ctx)
+            .expect("search_with_context");
+
+        // Should not include tombstoned id1
+        assert_eq!(results_normal.len(), results_ctx.len());
+        for (r_normal, r_ctx) in results_normal.iter().zip(results_ctx.iter()) {
+            assert_eq!(r_normal.0, r_ctx.0);
+            assert_eq!(r_normal.1, r_ctx.1);
+            assert_ne!(r_normal.0, id1, "Tombstoned id1 should not appear");
+        }
+    }
+
+    #[test]
+    fn context_with_empty_index() {
+        let index = HnswIndex::new(3);
+        let mut ctx = HnswSearchContext::new(80);
+
+        let results = index
+            .search_with_context(&[1.0, 0.0, 0.0], 10, &mut ctx)
+            .expect("search empty");
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn context_dimension_mismatch() {
+        let mut index = HnswIndex::new(3);
+        index
+            .insert(make_id(1), vec![1.0, 0.0, 0.0])
+            .expect("insert");
+
+        let mut ctx = HnswSearchContext::new(80);
+        let result = index.search_with_context(&[1.0, 0.0], 1, &mut ctx);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn context_works_with_all_metrics() {
+        for metric in [
+            DistanceMetric::Cosine,
+            DistanceMetric::L2,
+            DistanceMetric::Dot,
+        ] {
+            let mut config = HnswConfig::with_dimensions(4);
+            config.metric = metric;
+            config.seed = Some(42);
+            let mut index = HnswIndex::with_config(config);
+
+            for i in 0..50u8 {
+                let id = make_id(i);
+                let vector = generate_random_vector(4, i as u64);
+                index.insert(id, vector).expect("insert");
+            }
+
+            let query = generate_random_vector(4, 999);
+            let mut ctx = HnswSearchContext::new(index.config().ef_search);
+
+            let results_normal = index.search(&query, 5).expect("search");
+            let results_ctx = index
+                .search_with_context(&query, 5, &mut ctx)
+                .expect("search_with_context");
+
+            assert_eq!(
+                results_normal.len(),
+                results_ctx.len(),
+                "Result count mismatch for {metric:?}"
+            );
+            for (r_normal, r_ctx) in results_normal.iter().zip(results_ctx.iter()) {
+                assert_eq!(r_normal.0, r_ctx.0, "ID mismatch for {metric:?}");
+                assert_eq!(r_normal.1, r_ctx.1, "Score mismatch for {metric:?}");
+            }
+        }
+    }
+
+    // =========================================================================
+    // build_batch (parallel HNSW construction)
+    // =========================================================================
+
+    use std::collections::HashSet;
+
+    #[test]
+    fn test_build_batch_empty() {
+        let mut index = HnswIndex::new(3);
+        index
+            .build_batch(vec![])
+            .expect("empty batch should succeed");
+        assert!(index.is_empty());
+    }
+
+    #[test]
+    fn test_build_batch_single_item() {
+        let mut index = HnswIndex::new(3);
+        let id = make_id(1);
+        index
+            .build_batch(vec![(id, vec![1.0, 0.0, 0.0])])
+            .expect("single-item batch");
+        assert_eq!(index.len(), 1);
+
+        let results = index.search(&[1.0, 0.0, 0.0], 1).expect("search");
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].0, id);
+    }
+
+    #[test]
+    fn test_build_batch_small_falls_back_to_sequential() {
+        // <= 32 items should use sequential fallback
+        let dim = 8;
+        let mut index = HnswIndex::new(dim);
+        let items: Vec<_> = (0..20u8)
+            .map(|i| (make_id(i), generate_random_vector(dim, i as u64)))
+            .collect();
+
+        index.build_batch(items).expect("small batch");
+        assert_eq!(index.len(), 20);
+
+        // Verify search works
+        let query = generate_random_vector(dim, 999);
+        let results = index.search(&query, 5).expect("search");
+        assert_eq!(results.len(), 5);
+    }
+
+    #[test]
+    fn test_build_batch_parallel_correctness() {
+        // Large enough to trigger parallel path (> 32)
+        let dim = 16;
+        let n = 200;
+        let config = HnswConfig {
+            dimensions: dim,
+            seed: Some(42),
+            ef_construction: 400,
+            ..HnswConfig::default()
+        };
+
+        let items: Vec<_> = (0..n)
+            .map(|i| {
+                let mut id_bytes = [0u8; 16];
+                id_bytes[0] = (i & 0xFF) as u8;
+                id_bytes[1] = ((i >> 8) & 0xFF) as u8;
+                let id = NodeId::new(id_bytes);
+                (id, generate_random_vector(dim, i as u64))
+            })
+            .collect();
+
+        let mut index = HnswIndex::with_config(config);
+        index.build_batch(items.clone()).expect("parallel batch");
+
+        assert_eq!(index.len(), n);
+
+        // Nearly all IDs should find themselves as nearest neighbor.
+        // The batch path searches a frozen seed graph, so a small number of
+        // vectors may not find themselves as #1 due to the approximate nature
+        // of the parallel build. Require >= 95% self-recall.
+        let mut self_found = 0usize;
+        for (id, vec) in &items {
+            let results = index.search(vec, 1).expect("search");
+            assert!(!results.is_empty(), "should find at least 1 result");
+            if results[0].0 == *id {
+                self_found += 1;
+            }
+        }
+        let self_recall = self_found as f64 / n as f64;
+        assert!(
+            self_recall >= 0.95,
+            "Self-recall too low: {:.1}% ({}/{}) (expected >= 95%)",
+            self_recall * 100.0,
+            self_found,
+            n
+        );
+    }
+
+    #[test]
+    fn test_build_batch_recall_quality() {
+        // Compare recall: build_batch vs sequential insert
+        let dim = 32;
+        let n = 500;
+
+        let items: Vec<_> = (0..n)
+            .map(|i| {
+                let mut id_bytes = [0u8; 16];
+                id_bytes[0] = (i & 0xFF) as u8;
+                id_bytes[1] = ((i >> 8) & 0xFF) as u8;
+                let id = NodeId::new(id_bytes);
+                (id, generate_random_vector(dim, i as u64))
+            })
+            .collect();
+
+        // Build with batch
+        let config_batch = HnswConfig {
+            dimensions: dim,
+            seed: Some(42),
+            ..HnswConfig::default()
+        };
+        let mut batch_index = HnswIndex::with_config(config_batch);
+        batch_index.build_batch(items.clone()).expect("batch build");
+
+        // Build sequentially
+        let config_seq = HnswConfig {
+            dimensions: dim,
+            seed: Some(42),
+            ..HnswConfig::default()
+        };
+        let mut seq_index = HnswIndex::with_config(config_seq);
+        for (id, vec) in &items {
+            seq_index
+                .insert(*id, vec.clone())
+                .expect("sequential insert");
+        }
+
+        // Both should have same size
+        assert_eq!(batch_index.len(), seq_index.len());
+
+        // Compare recall on random queries
+        let k = 10;
+        let num_queries = 20;
+        let mut total_overlap = 0;
+        let total_possible = num_queries * k;
+
+        for q in 0..num_queries {
+            let query = generate_random_vector(dim, 10_000 + q as u64);
+
+            let batch_results = batch_index.search(&query, k).expect("batch search");
+            let seq_results = seq_index.search(&query, k).expect("seq search");
+
+            let batch_ids: HashSet<_> = batch_results.iter().map(|(id, _)| *id).collect();
+            let seq_ids: HashSet<_> = seq_results.iter().map(|(id, _)| *id).collect();
+
+            total_overlap += batch_ids.intersection(&seq_ids).count();
+        }
+
+        // Expect at least 55% overlap between batch and sequential builds.
+        // The parallel build searches a frozen seed graph with the correct degree
+        // cap (m_max0 for layer 0, m for upper layers), so graph topology differs
+        // from sequential builds. 55% is conservative for this comparison.
+        let recall_overlap = total_overlap as f64 / total_possible as f64;
+        assert!(
+            recall_overlap >= 0.55,
+            "Recall overlap too low: {:.1}% (expected >= 55%)",
+            recall_overlap * 100.0
+        );
+    }
+
+    #[test]
+    fn test_build_batch_dimension_mismatch() {
+        let mut index = HnswIndex::new(3);
+        let result = index.build_batch(vec![(make_id(1), vec![1.0, 0.0])]);
+        assert!(result.is_err());
+    }
+
+    #[test]
+    fn test_build_batch_rejects_duplicate_ids() {
+        let mut index = HnswIndex::new(3);
+        index
+            .insert(make_id(1), vec![1.0, 0.0, 0.0])
+            .expect("pre-insert");
+
+        let result = index.build_batch(vec![(make_id(1), vec![0.0, 1.0, 0.0])]);
+        assert!(result.is_err(), "should reject duplicate ID");
+    }
+
+    #[test]
+    fn test_build_batch_search_after_build() {
+        // Verify that search still works correctly after batch build
+        let dim = 16;
+        let n = 100;
+
+        let mut index = HnswIndex::new(dim);
+        let items: Vec<_> = (0..n)
+            .map(|i| {
+                let mut id_bytes = [0u8; 16];
+                id_bytes[0] = (i & 0xFF) as u8;
+                let id = NodeId::new(id_bytes);
+                (id, generate_random_vector(dim, i as u64))
+            })
+            .collect();
+
+        index.build_batch(items.clone()).expect("batch build");
+
+        // Search for the first vector -- should find itself
+        let results = index.search(&items[0].1, 1).expect("search");
+        assert_eq!(results[0].0, items[0].0);
+        assert!(
+            results[0].1.to_f64() > 0.99,
+            "exact match should have high score"
+        );
+    }
+
+    #[test]
+    fn test_build_batch_with_delete_after() {
+        // Build batch, then delete, then search
+        let dim = 8;
+        let n = 50;
+
+        let mut index = HnswIndex::new(dim);
+        let items: Vec<_> = (0..n)
+            .map(|i| {
+                let mut id_bytes = [0u8; 16];
+                id_bytes[0] = (i & 0xFF) as u8;
+                let id = NodeId::new(id_bytes);
+                (id, generate_random_vector(dim, i as u64))
+            })
+            .collect();
+
+        index.build_batch(items.clone()).expect("batch build");
+
+        // Delete first 10 items
+        for (id, _) in &items[..10] {
+            assert!(index.delete(*id));
+        }
+
+        // Search should not return deleted items
+        let query = generate_random_vector(dim, 999);
+        let results = index.search(&query, 20).expect("search");
+
+        let deleted_ids: HashSet<_> = items[..10].iter().map(|(id, _)| *id).collect();
+        for (id, _) in &results {
+            assert!(
+                !deleted_ids.contains(id),
+                "deleted ID should not appear in results"
+            );
+        }
+    }
+
+    // =========================================================================
+    // INT8 Quantized Search Tests
+    // =========================================================================
+
+    #[test]
+    fn test_quantized_search_identical_results_small() {
+        let config = HnswConfig {
+            dimensions: 32,
+            seed: Some(42),
+            ..Default::default()
+        };
+        let mut index = HnswIndex::with_config(config);
+
+        for i in 0..50u8 {
+            let vec = generate_random_vector(32, i as u64);
+            index.insert(make_id(i), vec).unwrap();
+        }
+
+        let query = generate_random_vector(32, 999);
+        let results_f32 = index.search(&query, 10).unwrap();
+
+        index.set_quantized(true);
+        assert!(index.is_quantized());
+        let results_quant = index.search(&query, 10).unwrap();
+
+        assert_eq!(results_f32.len(), results_quant.len());
+        for (f32_result, quant_result) in results_f32.iter().zip(results_quant.iter()) {
+            assert_eq!(f32_result.0, quant_result.0, "ID mismatch");
+            assert_eq!(f32_result.1, quant_result.1, "Score mismatch");
+        }
+    }
+
+    #[test]
+    fn test_quantized_search_identical_results_medium() {
+        let config = HnswConfig {
+            dimensions: 128,
+            seed: Some(42),
+            ef_search: 50,
+            ..Default::default()
+        };
+        let mut index = HnswIndex::with_config(config);
+
+        for i in 0..200u64 {
+            let vec = generate_random_vector(128, i);
+            let id_bytes: [u8; 16] = {
+                let mut b = [0u8; 16];
+                b[..8].copy_from_slice(&i.to_le_bytes());
+                b
+            };
+            index.insert(NodeId::new(id_bytes), vec).unwrap();
+        }
+
+        for q_seed in 1000..1010u64 {
+            let query = generate_random_vector(128, q_seed);
+
+            let results_f32 = index.search(&query, 10).unwrap();
+
+            index.set_quantized(true);
+            let results_quant = index.search(&query, 10).unwrap();
+            index.set_quantized(false);
+
+            assert_eq!(
+                results_f32.len(),
+                results_quant.len(),
+                "Result count mismatch for query seed {q_seed}"
+            );
+            for (f32_r, quant_r) in results_f32.iter().zip(results_quant.iter()) {
+                assert_eq!(f32_r.0, quant_r.0, "ID mismatch for query seed {q_seed}");
+                // Allow a small tolerance for f32 FP rounding: batch-4 SIMD kernels
+                // use a different FMA accumulation order than the scalar pair kernel,
+                // producing differences of ~1e-7 (within single-precision epsilon for
+                // 128-dim vectors). The neighbor IDs above confirm same recall; this
+                // only checks that scores are within acceptable precision.
+                let diff = (f32_r.1.to_f64() - quant_r.1.to_f64()).abs();
+                assert!(
+                    diff < 1e-5,
+                    "Score mismatch for query seed {q_seed}: {} vs {} (diff={diff:.2e})",
+                    f32_r.1.to_f64(),
+                    quant_r.1.to_f64()
+                );
+            }
+        }
+    }
+
+    #[test]
+    fn test_quantized_builder_pattern() {
+        let index = HnswIndex::new(64).with_quantized();
+        assert!(index.is_quantized());
+    }
+
+    #[test]
+    fn test_quantized_runtime_toggle() {
+        let mut index = HnswIndex::new(64);
+        assert!(!index.is_quantized());
+
+        index.set_quantized(true);
+        assert!(index.is_quantized());
+
+        index.set_quantized(false);
+        assert!(!index.is_quantized());
+    }
+
+    #[test]
+    fn test_quantized_arena_survives_update() {
+        let mut index = HnswIndex::new(3);
+        index.set_quantized(true);
+
+        let id = make_id(1);
+        index.insert(id, vec![1.0, 0.0, 0.0]).unwrap();
+        index.insert(id, vec![0.0, 1.0, 0.0]).unwrap();
+
+        let results = index.search(&[0.0, 1.0, 0.0], 1).unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].0, id);
+        assert!(results[0].1.to_f64() > 0.99);
+    }
+
+    #[test]
+    fn test_quantized_arena_survives_rebuild() {
+        let mut index = HnswIndex::new(3);
+        index.set_quantized(true);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        let id3 = make_id(3);
+
+        index.insert(id1, vec![1.0, 0.0, 0.0]).unwrap();
+        index.insert(id2, vec![0.0, 1.0, 0.0]).unwrap();
+        index.insert(id3, vec![0.0, 0.0, 1.0]).unwrap();
+
+        index.delete(id2);
+        index.rebuild();
+
+        let results = index.search(&[1.0, 0.0, 0.0], 2).unwrap();
+        assert_eq!(results.len(), 2);
+        assert_eq!(results[0].0, id1);
+        assert!(results.iter().all(|(id, _)| *id != id2));
+    }
+
+    #[test]
+    fn test_quantized_arena_survives_clear() {
+        let mut index = HnswIndex::new(3);
+        index.set_quantized(true);
+
+        index.insert(make_id(1), vec![1.0, 0.0, 0.0]).unwrap();
+        index.clear();
+
+        index.insert(make_id(2), vec![0.0, 1.0, 0.0]).unwrap();
+        let results = index.search(&[0.0, 1.0, 0.0], 1).unwrap();
+        assert_eq!(results.len(), 1);
+        assert_eq!(results[0].0, make_id(2));
+    }
+
+    #[test]
+    fn test_quantized_empty_index() {
+        let index = HnswIndex::new(3).with_quantized();
+        let results = index.search(&[1.0, 0.0, 0.0], 5).unwrap();
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_quantized_only_affects_cosine() {
+        for metric in [DistanceMetric::Dot, DistanceMetric::L2] {
+            let config = HnswConfig {
+                dimensions: 32,
+                metric,
+                seed: Some(42),
+                ..Default::default()
+            };
+            let mut index = HnswIndex::with_config(config);
+
+            for i in 0..30u8 {
+                let vec = generate_random_vector(32, i as u64);
+                index.insert(make_id(i), vec).unwrap();
+            }
+
+            let query = generate_random_vector(32, 999);
+            let results_f32 = index.search(&query, 5).unwrap();
+
+            index.set_quantized(true);
+            let results_quant = index.search(&query, 5).unwrap();
+
+            assert_eq!(
+                results_f32.len(),
+                results_quant.len(),
+                "Result count mismatch for {metric:?}"
+            );
+            for (a, b) in results_f32.iter().zip(results_quant.iter()) {
+                assert_eq!(a.0, b.0, "ID mismatch for {metric:?}");
+                assert_eq!(a.1, b.1, "Score mismatch for {metric:?}");
+            }
+        }
+    }
+
+    #[test]
+    fn test_quantization_error_bounded() {
+        let dim = 384;
+        for seed in 0..20u64 {
+            let vec = generate_random_vector(dim, seed);
+            let norm: f32 = vec.iter().map(|x| x * x).sum::<f32>().sqrt();
+
+            let mut max_abs: f32 = 0.0;
+            for &v in &vec {
+                let abs = v.abs();
+                if abs > max_abs {
+                    max_abs = abs;
+                }
+            }
+            let scale = if max_abs > 1e-10 {
+                127.0 / max_abs
+            } else {
+                1.0
+            };
+            let quantized: Vec<i8> = vec
+                .iter()
+                .map(|&v| (v * scale).round().clamp(-127.0, 127.0) as i8)
+                .collect();
+
+            let dequantized: Vec<f32> = quantized.iter().map(|&v| v as f32 / scale).collect();
+
+            let dot: f32 = vec.iter().zip(dequantized.iter()).map(|(a, b)| a * b).sum();
+            let dq_norm: f32 = dequantized.iter().map(|x| x * x).sum::<f32>().sqrt();
+            let cos_sim = if norm > 0.0 && dq_norm > 0.0 {
+                dot / (norm * dq_norm)
+            } else {
+                1.0
+            };
+
+            assert!(
+                cos_sim > 0.95,
+                "Quantization error too high: cosine_sim={cos_sim} for seed={seed}"
+            );
+            assert!(
+                cos_sim > 0.99,
+                "Expected high fidelity for 384d: cosine_sim={cos_sim}"
+            );
+        }
+    }
+}
+
+// =============================================================================
+// Snapshot / restore tests (Issue #2161)
+// =============================================================================
+
+#[cfg(test)]
+mod snapshot_tests {
+    use crate::{HnswConfig, HnswIndex};
+    use crate::NodeId;
+    use std::collections::HashMap;
+
+    fn make_id(seed: u8) -> NodeId {
+        NodeId::new([seed; 16])
+    }
+
+    fn generate_random_vector(dim: usize, seed: u64) -> Vec<f32> {
+        use std::collections::hash_map::DefaultHasher;
+        use std::hash::{Hash, Hasher};
+
+        (0..dim)
+            .map(|i| {
+                let mut hasher = DefaultHasher::new();
+                (seed, i).hash(&mut hasher);
+                (hasher.finish() as f32 / u64::MAX as f32) * 2.0 - 1.0
+            })
+            .collect()
+    }
+
+    /// Snapshot from a populated index includes vector data.
+    #[test]
+    fn snapshot_includes_vector_data() {
+        let mut index = HnswIndex::new(4);
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        let vec1 = vec![1.0, 0.0, 0.0, 0.0];
+        let vec2 = vec![0.0, 1.0, 0.0, 0.0];
+
+        index.insert(id1, vec1.clone()).expect("insert");
+        index.insert(id2, vec2.clone()).expect("insert");
+
+        let snap = index.snapshot();
+
+        assert_eq!(snap.vectors.len(), 2, "snapshot should contain 2 vectors");
+
+        // Vectors are sorted by NodeId bytes — look up by id
+        let vec_map: HashMap<NodeId, &Vec<f32>> =
+            snap.vectors.iter().map(|(id, v)| (*id, v)).collect();
+
+        assert_eq!(
+            vec_map.get(&id1).copied(),
+            Some(&vec1),
+            "id1 vector should match"
+        );
+        assert_eq!(
+            vec_map.get(&id2).copied(),
+            Some(&vec2),
+            "id2 vector should match"
+        );
+    }
+
+    /// Empty index snapshot has no vectors.
+    #[test]
+    fn snapshot_empty_index_has_no_vectors() {
+        let index = HnswIndex::new(4);
+        let snap = index.snapshot();
+        assert!(
+            snap.vectors.is_empty(),
+            "empty index snapshot has no vectors"
+        );
+    }
+
+    /// Self-contained round-trip: snapshot() → restore_from_snapshot_embedded().
+    #[test]
+    fn snapshot_restore_embedded_round_trip() {
+        let config = HnswConfig::with_dimensions(8).with_seed(42);
+        let mut original = HnswIndex::with_config(config.clone());
+
+        let ids: Vec<NodeId> = (0..20u8).map(|i| make_id(i)).collect();
+        let vecs: Vec<Vec<f32>> = (0..20u64).map(|i| generate_random_vector(8, i)).collect();
+
+        for (id, vec) in ids.iter().zip(vecs.iter()) {
+            original.insert(*id, vec.clone()).expect("insert");
+        }
+
+        // Take a snapshot and restore into a fresh index
+        let snap = original.snapshot();
+        assert_eq!(
+            snap.vectors.len(),
+            20,
+            "snapshot should embed all 20 vectors"
+        );
+
+        let mut restored = HnswIndex::with_config(config);
+        restored
+            .restore_from_snapshot_embedded(&snap)
+            .expect("restore embedded");
+
+        assert_eq!(restored.len(), 20, "restored index should have 20 nodes");
+
+        // Search results should match the original
+        let query = generate_random_vector(8, 999);
+        let results_orig = original.search(&query, 5).expect("search original");
+        let results_rest = restored.search(&query, 5).expect("search restored");
+
+        assert_eq!(
+            results_orig.len(),
+            results_rest.len(),
+            "result count should match"
+        );
+        for (r_orig, r_rest) in results_orig.iter().zip(results_rest.iter()) {
+            assert_eq!(r_orig.0, r_rest.0, "result IDs should match");
+        }
+    }
+
+    /// Tombstoned nodes are preserved through embedded snapshot round-trip.
+    #[test]
+    fn snapshot_restore_embedded_preserves_tombstones() {
+        let config = HnswConfig::with_dimensions(4).with_seed(42);
+        let mut index = HnswIndex::with_config(config.clone());
+
+        let id1 = make_id(1);
+        let id2 = make_id(2);
+        let id3 = make_id(3);
+
+        index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+        index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+        index.insert(id3, vec![0.0, 0.0, 1.0, 0.0]).expect("insert");
+
+        // Tombstone id2
+        assert!(index.delete(id2));
+
+        let snap = index.snapshot();
+        assert_eq!(snap.total_nodes, 3);
+        assert_eq!(snap.tombstone_count, 1);
+        assert_eq!(snap.vectors.len(), 3, "all 3 vectors including tombstone");
+
+        let mut restored = HnswIndex::with_config(config);
+        restored
+            .restore_from_snapshot_embedded(&snap)
+            .expect("restore");
+
+        assert_eq!(restored.len(), 3, "total nodes preserved");
+        assert_eq!(
+            restored.tombstone_stats().tombstone_count,
+            1,
+            "tombstone count preserved"
+        );
+
+        // Search should not return id2
+        let results = restored.search(&[0.0, 1.0, 0.0, 0.0], 3).expect("search");
+        let result_ids: Vec<NodeId> = results.iter().map(|(id, _)| *id).collect();
+        assert!(
+            !result_ids.contains(&id2),
+            "tombstoned id2 should not appear in results"
+        );
+    }
+
+    /// Snapshot serialization includes vectors and round-trips via JSON.
+    #[test]
+    fn snapshot_serialization_includes_vectors() {
+        let mut index = HnswIndex::new(4);
+
+        let id1 = make_id(1);
+        index.insert(id1, vec![1.0, 2.0, 3.0, 4.0]).expect("insert");
+
+        let snap = index.snapshot();
+        assert!(!snap.vectors.is_empty(), "vectors should be in snapshot");
+
+        let json = serde_json::to_string(&snap).expect("serialize");
+        assert!(
+            json.contains("vectors"),
+            "serialized JSON should contain vectors field"
+        );
+
+        let restored_snap: crate::checkpoint::HnswSnapshot =
+            serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(
+            restored_snap.vectors.len(),
+            1,
+            "deserialized snapshot should have 1 vector"
+        );
+        assert_eq!(
+            restored_snap.vectors[0].0, id1,
+            "vector id should be preserved"
+        );
+        assert_eq!(
+            restored_snap.vectors[0].1,
+            vec![1.0, 2.0, 3.0, 4.0],
+            "vector data should be preserved"
+        );
+    }
+
+    /// Old snapshots (no vectors field) still deserialize correctly.
+    #[test]
+    fn backward_compat_snapshot_without_vectors() {
+        // Simulate a snapshot from before the vectors field was added
+        let old_json = r#"{
+            "total_nodes": 1,
+            "live_nodes": 1,
+            "tombstone_count": 0,
+            "max_layer": 0,
+            "entry_point": "01010101010101010101010101010101",
+            "config": {"m": 16, "ef_construction": 200, "metric": "cosine"},
+            "indexed_ids": ["01010101010101010101010101010101"],
+            "tombstoned_ids": [],
+            "layers": []
+        }"#;
+
+        let snap: crate::checkpoint::HnswSnapshot =
+            serde_json::from_str(old_json).expect("deserialize old snapshot");
+
+        assert_eq!(snap.total_nodes, 1);
+        assert!(
+            snap.vectors.is_empty(),
+            "old snapshot deserialized with empty vectors"
+        );
+        assert!(snap.verify().is_ok(), "old snapshot should verify");
+    }
+
+    /// restore_from_snapshot_embedded fails when snapshot has no vectors.
+    #[test]
+    fn restore_embedded_fails_without_vectors() {
+        let config = HnswConfig::with_dimensions(4);
+        let mut index = HnswIndex::with_config(config);
+
+        let id1 = make_id(1);
+        let snap = crate::checkpoint::HnswSnapshot {
+            vector_count: 0,
+            total_nodes: 1,
+            live_nodes: 1,
+            tombstone_count: 0,
+            max_layer: 0,
+            entry_point: Some(id1),
+            config: crate::checkpoint::HnswCheckpointConfig {
+                m: 20,
+                ef_construction: 200,
+                metric: "cosine".to_string(),
+            },
+            indexed_ids: vec![id1],
+            tombstoned_ids: vec![],
+            layers: vec![vec![(id1, vec![])]],
+            vectors: vec![], // Intentionally empty
+        };
+
+        let result = index.restore_from_snapshot_embedded(&snap);
+        assert!(
+            result.is_err(),
+            "should fail when snapshot has no embedded vectors"
+        );
+    }
+
+    /// restore_from_snapshot with external map takes priority over embedded vectors.
+    #[test]
+    fn restore_external_overrides_embedded_vectors() {
+        let config = HnswConfig::with_dimensions(4).with_seed(42);
+        let mut source = HnswIndex::with_config(config.clone());
+
+        let id1 = make_id(1);
+        source
+            .insert(id1, vec![1.0, 0.0, 0.0, 0.0])
+            .expect("insert");
+
+        let snap = source.snapshot();
+
+        // Provide a different vector for id1 via the external map
+        let updated_vector = vec![0.0, 0.0, 0.0, 1.0];
+        let external: HashMap<NodeId, Vec<f32>> =
+            [(id1, updated_vector.clone())].into_iter().collect();
+
+        let mut restored = HnswIndex::with_config(config);
+        restored
+            .restore_from_snapshot(&snap, &external)
+            .expect("restore with external");
+
+        // The restored index should use the external (override) vector
+        let retrieved = restored.get_vector(&id1).expect("get vector");
+        assert_eq!(
+            retrieved, updated_vector,
+            "external vector should override embedded"
+        );
+    }
+
+    /// Large snapshot round-trip preserves search quality.
+    #[test]
+    fn snapshot_restore_preserves_search_quality() {
+        let config = HnswConfig::with_dimensions(32).with_seed(42);
+        let mut original = HnswIndex::with_config(config.clone());
+
+        let n = 200usize;
+        for i in 0..n {
+            let id = NodeId::new({
+                let mut b = [0u8; 16];
+                b[0] = (i & 0xff) as u8;
+                b[1] = (i >> 8) as u8;
+                b
+            });
+            let vec = generate_random_vector(32, i as u64);
+            original.insert(id, vec).expect("insert");
+        }
+
+        let snap = original.snapshot();
+        assert_eq!(snap.vectors.len(), n, "all vectors embedded");
+
+        let mut restored = HnswIndex::with_config(config);
+        restored
+            .restore_from_snapshot_embedded(&snap)
+            .expect("restore");
+
+        // Search quality: >= 80% recall@10 across 10 queries
+        let k = 10;
+        let mut total_recall = 0.0;
+        for q in 0..10 {
+            let query = generate_random_vector(32, 10_000 + q);
+
+            let results_orig: std::collections::HashSet<NodeId> = original
+                .search(&query, k)
+                .expect("orig search")
+                .into_iter()
+                .map(|(id, _)| id)
+                .collect();
+            let results_rest: std::collections::HashSet<NodeId> = restored
+                .search(&query, k)
+                .expect("rest search")
+                .into_iter()
+                .map(|(id, _)| id)
+                .collect();
+
+            let overlap = results_orig.intersection(&results_rest).count();
+            total_recall += overlap as f32 / k as f32;
+        }
+
+        let avg_recall = total_recall / 10.0;
+        assert!(
+            avg_recall >= 0.8,
+            "restored index recall {avg_recall:.2} should be >= 0.8"
+        );
+    }
+}
diff --git a/crates/khive-retrieval/Cargo.toml b/crates/khive-retrieval/Cargo.toml
new file mode 100644
index 00000000..19a761e2
--- /dev/null
+++ b/crates/khive-retrieval/Cargo.toml
@@ -0,0 +1,56 @@
+[package]
+name = "khive-retrieval"
+version.workspace = true
+edition.workspace = true
+authors.workspace = true
+license.workspace = true
+repository.workspace = true
+homepage.workspace = true
+keywords.workspace = true
+categories.workspace = true
+description = "Hybrid retrieval composer (HNSW + BM25 + fusion + graph + cross-encoder) with deterministic scoring"
+
+[dependencies]
+khive-hnsw    = { version = "0.2.0", path = "../khive-hnsw" }
+khive-bm25    = { version = "0.2.0", path = "../khive-bm25" }
+khive-fusion  = { version = "0.2.0", path = "../khive-fusion" }
+khive-score   = { version = "0.2.0", path = "../khive-score" }
+khive-types   = { version = "0.2.0", path = "../khive-types" }
+khive-fold    = { version = "0.2.0", path = "../khive-fold", optional = true }
+khive-storage = { version = "0.2.0", path = "../khive-storage", optional = true }
+khive-db      = { version = "0.2.0", path = "../khive-db" }
+khive-gate    = { version = "0.2.0", path = "../khive-gate", optional = true }
+lattice-embed = { workspace = true }
+
+serde = { workspace = true }
+serde_json = { workspace = true }
+thiserror = { workspace = true }
+parking_lot = { workspace = true }
+async-trait = { workspace = true }
+tokio = { workspace = true }
+tokio-util = { version = "0.7", features = ["rt"] }
+chrono = { workspace = true }
+uuid = { workspace = true }
+rusqlite = { version = "0.33", optional = true }
+tracing = { workspace = true, optional = true }
+rand = { version = "0.8", optional = true }
+
+[features]
+default = []
+# Policy-based access control for search results (uses khive-gate API)
+policy = ["khive-gate"]
+# HNSW checkpoint integration with khive-fold
+# Note: khive_hnsw::HnswCheckpoint/HnswCheckpointStore depend on khive_fold::Checkpoint
+# which doesn't exist in the current khive-fold API. Those re-exports are gated out
+# until the khive-fold Checkpoint trait is ported.
+checkpoint = ["khive-fold"]
+# SQLite-based persistence for HNSW and BM25 indexes
+persist = ["rusqlite", "tracing", "rand"]
+# Adapters bridging khive-storage backends (sqlite-vec, FTS5) to retrieval search traits
+storage-adapters = ["khive-storage"]
+# Native cross-encoder reranking (deferred until khive-inference is ported)
+native-rerank = []
+# Native embedding service (delegated to lattice-embed; reserved for future feature-gating)
+embed = []
+# Legacy graph traversal module (depends on old EntityRef/LinkStore API; not yet ported)
+graph-legacy = []
diff --git a/crates/khive-retrieval/src/adapters/mod.rs b/crates/khive-retrieval/src/adapters/mod.rs
new file mode 100644
index 00000000..479e0fd2
--- /dev/null
+++ b/crates/khive-retrieval/src/adapters/mod.rs
@@ -0,0 +1,456 @@
+//! Adapters bridging `khive-storage-traits` backends to retrieval search traits.
+//!
+//! The retrieval crate defines [`VectorSearch`] and [`KeywordSearch`] as async
+//! traits with an associated `Id` type. The `khive-storage-traits` crate defines
+//! [`VectorStore`] and [`TextSearch`] as async persistence traits using `Uuid`.
+//!
+//! This module provides adapter types that implement the retrieval search traits
+//! by delegating to storage-traits backends:
+//!
+//! - [`StorageVectorSearch`]: wraps `Arc<dyn VectorStore>` -> `VectorSearch<Id = Uuid>`
+//! - [`StorageKeywordSearch`]: wraps `Arc<dyn TextSearch>` -> `KeywordSearch<Id = Uuid>`
+//!
+//! This makes [`HybridSearcher`] work with persistent backends (sqlite-vec, FTS5)
+//! alongside the existing in-memory backends (HNSW, BM25).
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use khive_db::StorageBackend;
+//! use khive_retrieval::adapters::{StorageVectorSearch, StorageKeywordSearch};
+//! use khive_retrieval::hybrid::{VectorSearch, KeywordSearch};
+//!
+//! let backend = StorageBackend::memory().unwrap();
+//! let vec_store = backend.vectors("model", 384).unwrap();
+//! let text_store = backend.text("docs").unwrap();
+//!
+//! let vector_search = StorageVectorSearch::new(vec_store);
+//! let keyword_search = StorageKeywordSearch::new(text_store);
+//!
+//! // Both implement the retrieval search traits with Id = Uuid
+//! let hits = vector_search.vector_search(&query_embedding, 10).await?;
+//! let kw_hits = keyword_search.keyword_search("some query", 10).await?;
+//! ```
+
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use khive_score::DeterministicScore;
+use khive_storage::types::{TextQueryMode, TextSearchRequest, VectorSearchRequest};
+use khive_storage::{TextSearch, VectorStore};
+use uuid::Uuid;
+
+use crate::error::{Result, RetrievalError};
+use crate::hybrid::{KeywordSearch, VectorSearch};
+
+// ---------------------------------------------------------------------------
+// Error conversion
+// ---------------------------------------------------------------------------
+
+/// Convert a `StorageError` into a `RetrievalError`.
+///
+/// Maps storage-level errors to the closest retrieval error variant:
+/// - Vector-related storage errors -> `Hnsw` (vector search context)
+/// - Text-related storage errors -> `Bm25` (keyword search context)
+/// - Timeout/pool errors -> transient retrieval errors
+/// - Everything else -> generic error string
+fn storage_err_to_retrieval(
+    err: khive_storage::StorageError,
+    context: &'static str,
+) -> RetrievalError {
+    use khive_storage::StorageError;
+
+    match &err {
+        StorageError::Timeout { .. } => {
+            // Map to a transient retrieval error
+            RetrievalError::Hnsw(format!("{context}: {err}"))
+        }
+        StorageError::InvalidInput { message, .. } => {
+            RetrievalError::InvalidQuery(format!("{context}: {message}"))
+        }
+        _ => {
+            // Generic mapping -- preserve the full error message
+            RetrievalError::Hnsw(format!("{context}: {err}"))
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// StorageVectorSearch
+// ---------------------------------------------------------------------------
+
+/// Adapter implementing [`VectorSearch`] by delegating to a [`VectorStore`].
+///
+/// Wraps an `Arc<dyn VectorStore>` (e.g., `SqliteVecStore`) and implements
+/// the retrieval `VectorSearch` trait with `Id = Uuid`.
+///
+/// The adapter is `Send + Sync` and can be shared across tasks.
+pub struct StorageVectorSearch {
+    store: Arc<dyn VectorStore>,
+}
+
+impl StorageVectorSearch {
+    /// Create a new adapter wrapping the given vector store.
+    pub fn new(store: Arc<dyn VectorStore>) -> Self {
+        Self { store }
+    }
+}
+
+#[async_trait]
+impl VectorSearch for StorageVectorSearch {
+    type Id = Uuid;
+
+    async fn vector_search(
+        &self,
+        embedding: &[f32],
+        top_k: usize,
+    ) -> Result<Vec<(Uuid, DeterministicScore)>> {
+        let request = VectorSearchRequest {
+            query_embedding: embedding.to_vec(),
+            top_k: top_k as u32,
+            namespace: None,
+            kind: None,
+        };
+
+        let hits = self
+            .store
+            .search(request)
+            .await
+            .map_err(|e| storage_err_to_retrieval(e, "vector search"))?;
+
+        Ok(hits
+            .into_iter()
+            .map(|hit| (hit.subject_id, hit.score))
+            .collect())
+    }
+}
+
+// ---------------------------------------------------------------------------
+// StorageKeywordSearch
+// ---------------------------------------------------------------------------
+
+/// Adapter implementing [`KeywordSearch`] by delegating to a [`TextSearch`].
+///
+/// Wraps an `Arc<dyn TextSearch>` (e.g., `Fts5TextSearch`) and implements
+/// the retrieval `KeywordSearch` trait with `Id = Uuid`.
+///
+/// Uses `TextQueryMode::Plain` for keyword queries by default. The snippet
+/// length is set to 0 since retrieval only needs IDs and scores.
+pub struct StorageKeywordSearch {
+    search: Arc<dyn TextSearch>,
+}
+
+impl StorageKeywordSearch {
+    /// Create a new adapter wrapping the given text search backend.
+    pub fn new(search: Arc<dyn TextSearch>) -> Self {
+        Self { search }
+    }
+}
+
+#[async_trait]
+impl KeywordSearch for StorageKeywordSearch {
+    type Id = Uuid;
+
+    async fn keyword_search(
+        &self,
+        text: &str,
+        top_k: usize,
+    ) -> Result<Vec<(Uuid, DeterministicScore)>> {
+        let request = TextSearchRequest {
+            query: text.to_string(),
+            mode: TextQueryMode::Plain,
+            filter: None,
+            top_k: top_k as u32,
+            snippet_chars: 0, // retrieval only needs IDs + scores
+        };
+
+        let hits = self
+            .search
+            .search(request)
+            .await
+            .map_err(|e| storage_err_to_retrieval(e, "keyword search"))?;
+
+        Ok(hits
+            .into_iter()
+            .map(|hit| (hit.subject_id, hit.score))
+            .collect())
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use khive_db::StorageBackend;
+    use khive_storage::types::TextDocument;
+    use khive_types::SubstrateKind;
+
+    /// Helper: create a memory-backed StorageBackend.
+    fn test_backend() -> StorageBackend {
+        StorageBackend::memory().expect("memory backend")
+    }
+
+    // -----------------------------------------------------------------------
+    // StorageVectorSearch tests
+    // -----------------------------------------------------------------------
+
+    #[tokio::test]
+    async fn vector_search_basic_roundtrip() {
+        let backend = test_backend();
+        let store = backend.vectors("test_vs", 3).unwrap();
+
+        // Insert two vectors
+        let id1 = Uuid::new_v4();
+        let id2 = Uuid::new_v4();
+        store
+            .insert(id1, SubstrateKind::Entity, "test", vec![1.0, 0.0, 0.0])
+            .await
+            .unwrap();
+        store
+            .insert(id2, SubstrateKind::Entity, "test", vec![0.0, 1.0, 0.0])
+            .await
+            .unwrap();
+
+        // Wrap in adapter and use VectorSearch trait
+        let adapter = StorageVectorSearch::new(store);
+        let hits = adapter.vector_search(&[1.0, 0.0, 0.0], 2).await.unwrap();
+
+        assert_eq!(hits.len(), 2);
+        // Closest to [1,0,0] should be id1
+        assert_eq!(hits[0].0, id1);
+        // Score should be high (cosine similarity ~1.0)
+        assert!(hits[0].1.to_f64() > 0.9);
+    }
+
+    #[tokio::test]
+    async fn vector_search_respects_top_k() {
+        let backend = test_backend();
+        let store = backend.vectors("test_topk", 3).unwrap();
+
+        // Insert 5 vectors
+        for _ in 0..5 {
+            store
+                .insert(
+                    Uuid::new_v4(),
+                    SubstrateKind::Entity,
+                    "test",
+                    vec![1.0, 0.0, 0.0],
+                )
+                .await
+                .unwrap();
+        }
+
+        let adapter = StorageVectorSearch::new(store);
+        let hits = adapter.vector_search(&[1.0, 0.0, 0.0], 3).await.unwrap();
+
+        assert_eq!(hits.len(), 3);
+    }
+
+    #[tokio::test]
+    async fn vector_search_empty_store() {
+        let backend = test_backend();
+        let store = backend.vectors("test_empty", 3).unwrap();
+
+        let adapter = StorageVectorSearch::new(store);
+        let hits = adapter.vector_search(&[1.0, 0.0, 0.0], 5).await.unwrap();
+
+        assert!(hits.is_empty());
+    }
+
+    #[tokio::test]
+    async fn vector_search_returns_deterministic_scores() {
+        let backend = test_backend();
+        let store = backend.vectors("test_det", 3).unwrap();
+
+        let id = Uuid::new_v4();
+        store
+            .insert(id, SubstrateKind::Entity, "test", vec![1.0, 0.0, 0.0])
+            .await
+            .unwrap();
+
+        let adapter = StorageVectorSearch::new(store);
+
+        // Run twice -- scores must be identical (deterministic)
+        let hits1 = adapter.vector_search(&[1.0, 0.0, 0.0], 1).await.unwrap();
+        let hits2 = adapter.vector_search(&[1.0, 0.0, 0.0], 1).await.unwrap();
+
+        assert_eq!(hits1[0].1, hits2[0].1);
+    }
+
+    // -----------------------------------------------------------------------
+    // StorageKeywordSearch tests
+    // -----------------------------------------------------------------------
+
+    #[tokio::test]
+    async fn keyword_search_basic_roundtrip() {
+        let backend = test_backend();
+        let store = backend.text("test_ks").unwrap();
+
+        let id1 = Uuid::new_v4();
+        let id2 = Uuid::new_v4();
+
+        store
+            .upsert_document(TextDocument {
+                subject_id: id1,
+                kind: SubstrateKind::Entity,
+                namespace: "test".to_string(),
+                title: Some("Rust Programming".to_string()),
+                body: "Rust is a systems programming language.".to_string(),
+                tags: vec![],
+                metadata: None,
+                updated_at: chrono::Utc::now(),
+            })
+            .await
+            .unwrap();
+
+        store
+            .upsert_document(TextDocument {
+                subject_id: id2,
+                kind: SubstrateKind::Entity,
+                namespace: "test".to_string(),
+                title: Some("Python Guide".to_string()),
+                body: "Python is a high-level programming language.".to_string(),
+                tags: vec![],
+                metadata: None,
+                updated_at: chrono::Utc::now(),
+            })
+            .await
+            .unwrap();
+
+        // Wrap in adapter and use KeywordSearch trait
+        let adapter = StorageKeywordSearch::new(store);
+        let hits = adapter.keyword_search("Rust", 10).await.unwrap();
+
+        // Should find the Rust document
+        assert!(!hits.is_empty());
+        assert_eq!(hits[0].0, id1);
+        assert!(hits[0].1.to_f64() > 0.0);
+    }
+
+    #[tokio::test]
+    async fn keyword_search_respects_top_k() {
+        let backend = test_backend();
+        let store = backend.text("test_ks_topk").unwrap();
+
+        // Insert 5 documents all containing "programming"
+        for i in 0..5 {
+            store
+                .upsert_document(TextDocument {
+                    subject_id: Uuid::new_v4(),
+                    kind: SubstrateKind::Note,
+                    namespace: "test".to_string(),
+                    title: Some(format!("Doc {}", i)),
+                    body: format!("Programming topic number {}.", i),
+                    tags: vec![],
+                    metadata: None,
+                    updated_at: chrono::Utc::now(),
+                })
+                .await
+                .unwrap();
+        }
+
+        let adapter = StorageKeywordSearch::new(store);
+        let hits = adapter.keyword_search("programming", 3).await.unwrap();
+
+        assert!(hits.len() <= 3);
+    }
+
+    #[tokio::test]
+    async fn keyword_search_empty_store() {
+        let backend = test_backend();
+        let store = backend.text("test_ks_empty").unwrap();
+
+        let adapter = StorageKeywordSearch::new(store);
+        let hits = adapter.keyword_search("anything", 5).await.unwrap();
+
+        assert!(hits.is_empty());
+    }
+
+    #[tokio::test]
+    async fn keyword_search_no_match() {
+        let backend = test_backend();
+        let store = backend.text("test_ks_nomatch").unwrap();
+
+        store
+            .upsert_document(TextDocument {
+                subject_id: Uuid::new_v4(),
+                kind: SubstrateKind::Entity,
+                namespace: "test".to_string(),
+                title: Some("Alpha".to_string()),
+                body: "Alpha article content.".to_string(),
+                tags: vec![],
+                metadata: None,
+                updated_at: chrono::Utc::now(),
+            })
+            .await
+            .unwrap();
+
+        let adapter = StorageKeywordSearch::new(store);
+        let hits = adapter
+            .keyword_search("nonexistent_xyz_term", 5)
+            .await
+            .unwrap();
+
+        assert!(hits.is_empty());
+    }
+
+    // -----------------------------------------------------------------------
+    // Integration: both adapters with fusion
+    // -----------------------------------------------------------------------
+
+    #[tokio::test]
+    async fn adapters_produce_fusible_results() {
+        use crate::hybrid::{fuse_search_results, HybridConfig};
+
+        let backend = test_backend();
+        let vec_store = backend.vectors("test_fuse", 3).unwrap();
+        let text_store = backend.text("test_fuse").unwrap();
+
+        let id = Uuid::new_v4();
+
+        // Insert into both stores
+        vec_store
+            .insert(id, SubstrateKind::Note, "test", vec![1.0, 0.0, 0.0])
+            .await
+            .unwrap();
+        text_store
+            .upsert_document(TextDocument {
+                subject_id: id,
+                kind: SubstrateKind::Note,
+                namespace: "test".to_string(),
+                title: Some("Test".to_string()),
+                body: "Test document for fusion.".to_string(),
+                tags: vec![],
+                metadata: None,
+                updated_at: chrono::Utc::now(),
+            })
+            .await
+            .unwrap();
+
+        let vec_adapter = StorageVectorSearch::new(vec_store);
+        let kw_adapter = StorageKeywordSearch::new(text_store);
+
+        let vec_hits = vec_adapter
+            .vector_search(&[1.0, 0.0, 0.0], 5)
+            .await
+            .unwrap();
+        let kw_hits = kw_adapter.keyword_search("Test", 5).await.unwrap();
+
+        // Both should return the same UUID
+        assert!(!vec_hits.is_empty());
+        assert!(!kw_hits.is_empty());
+        assert_eq!(vec_hits[0].0, id);
+        assert_eq!(kw_hits[0].0, id);
+
+        // Fuse the results -- same Id type (Uuid) means fusion works
+        let config = HybridConfig::new(10);
+        let fused = fuse_search_results(vec![vec_hits, kw_hits], &config);
+
+        assert!(!fused.is_empty());
+        // The single shared UUID should appear in fused results
+        assert_eq!(fused[0].0, id);
+    }
+}
diff --git a/crates/khive-retrieval/src/error.rs b/crates/khive-retrieval/src/error.rs
new file mode 100644
index 00000000..4d7af9b7
--- /dev/null
+++ b/crates/khive-retrieval/src/error.rs
@@ -0,0 +1,505 @@
+//! Error types for retrieval operations.
+//!
+//! Uses khive-db error patterns and integrates with EmbeddingError.
+//!
+//! # Error Classification (RETRIEVAL-06)
+//!
+//! Errors are classified into two categories for retry behavior:
+//!
+//! ## Transient Errors (retryable)
+//!
+//! These errors may succeed on retry and include:
+//! - **Network errors**: Connection timeouts, temporary unavailability
+//! - **Resource contention**: Lock conflicts, rate limiting
+//! - **External service errors**: Embedding/link store temporary failures
+//!
+//! Recommended retry strategy: exponential backoff with jitter, max 3 retries.
+//!
+//! ## Permanent Errors (non-retryable)
+//!
+//! These errors indicate logic/data issues that won't be fixed by retry:
+//! - **Validation errors**: Invalid query, dimension mismatch
+//! - **Configuration errors**: Bad parameters, missing required fields
+//! - **Data integrity errors**: Corrupt index, rebuild required
+//!
+//! These should be surfaced to the user immediately.
+//!
+//! # Usage
+//!
+//! ```rust
+//! use khive_retrieval::error::RetrievalError;
+//!
+//! fn handle_error(err: RetrievalError) {
+//!     if err.is_transient() {
+//!         // Retry with backoff
+//!         println!("Retrying: {}", err);
+//!     } else {
+//!         // Surface to user immediately
+//!         eprintln!("Permanent error: {}", err);
+//!     }
+//! }
+//! ```
+
+use thiserror::Error;
+
+/// Error classification for retry behavior.
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum ErrorKind {
+    /// Transient error that may succeed on retry (network, contention).
+    Transient,
+    /// Permanent error that won't be fixed by retry (validation, config).
+    Permanent,
+}
+
+/// Errors that can occur during retrieval operations.
+#[derive(Error, Debug)]
+#[non_exhaustive]
+pub enum RetrievalError {
+    /// Vector index operation failed.
+    #[error("hnsw error: {0}")]
+    Hnsw(String),
+
+    /// BM25 index operation failed.
+    #[error("bm25 error: {0}")]
+    Bm25(String),
+
+    /// Fusion operation failed.
+    #[error("fusion error: {0}")]
+    Fusion(String),
+
+    /// Graph traversal failed.
+    #[error("graph traversal error: {0}")]
+    GraphTraversal(String),
+
+    /// Invalid query parameters.
+    #[error("invalid query: {0}")]
+    InvalidQuery(String),
+
+    /// Dimension mismatch.
+    #[error("dimension mismatch: expected {expected}, got {actual}")]
+    DimensionMismatch {
+        /// Expected dimensions.
+        expected: usize,
+        /// Actual dimensions.
+        actual: usize,
+    },
+
+    /// Configuration error.
+    #[error("configuration error: {0}")]
+    Configuration(String),
+
+    /// Embedding store error.
+    #[error("embedding store: {0}")]
+    EmbeddingStore(String),
+
+    /// Link store error (for graph operations).
+    #[error("link store: {0}")]
+    LinkStore(String),
+
+    /// Index not initialized.
+    #[error("index not initialized: {0}")]
+    IndexNotInitialized(String),
+
+    /// Index rebuild required.
+    #[error("index rebuild required: {reason}")]
+    RebuildRequired {
+        /// Why rebuild is needed.
+        reason: String,
+    },
+
+    /// Query timed out before completing.
+    ///
+    /// The search operation exceeded the configured timeout duration.
+    /// This is a transient error: the query may succeed with a longer timeout
+    /// or fewer results requested.
+    #[error("query timed out after {elapsed_ms}ms")]
+    QueryTimeout {
+        /// Elapsed time in milliseconds before timeout.
+        elapsed_ms: u64,
+    },
+
+    /// Query was cancelled via cancellation token.
+    ///
+    /// The search operation was cancelled before completing.
+    /// This is a transient error: the query may succeed if not cancelled.
+    #[error("query cancelled")]
+    QueryCancelled,
+
+    /// Memory budget exceeded.
+    ///
+    /// The insert operation would cause the index to exceed its configured
+    /// memory budget. This is a permanent error: the same insert will always
+    /// fail unless the budget is raised or existing data is removed.
+    #[error("memory budget exceeded: current {current_usage} + item {item_size} > limit {limit}")]
+    BudgetExceeded {
+        /// Current estimated memory usage in bytes.
+        current_usage: usize,
+        /// Estimated size of the item being inserted in bytes.
+        item_size: usize,
+        /// Configured memory budget in bytes.
+        limit: usize,
+    },
+
+    /// Reranking operation failed (permanent).
+    #[error("rerank error: {0}")]
+    Rerank(String),
+
+    // TODO(port-rerank): khive-inference not ported yet; re-enable when available.
+    // #[cfg(feature = "native-rerank")]
+    // #[error("inference error: {0}")]
+    // Inference(#[from] khive_inference::InferenceError),
+}
+
+impl RetrievalError {
+    /// Get the error classification (transient or permanent).
+    ///
+    /// This classification determines retry behavior:
+    /// - `Transient`: May succeed on retry (network, external services)
+    /// - `Permanent`: Won't be fixed by retry (validation, config, data)
+    ///
+    /// # Error Classification Table
+    ///
+    /// | Error Type | Classification | Reason |
+    /// |------------|---------------|--------|
+    /// | EmbeddingStore | Transient | External service, may recover |
+    /// | LinkStore | Transient | External service, may recover |
+    /// | Hnsw | Permanent | Index algorithm error |
+    /// | Bm25 | Permanent | Index algorithm error |
+    /// | Fusion | Permanent | Score combination error |
+    /// | GraphTraversal | Permanent | Graph algorithm error |
+    /// | InvalidQuery | Permanent | User input validation |
+    /// | DimensionMismatch | Permanent | Data incompatibility |
+    /// | Configuration | Permanent | Setup/config issue |
+    /// | IndexNotInitialized | Permanent | Missing prerequisite |
+    /// | RebuildRequired | Permanent | Data integrity issue |
+    /// | QueryTimeout | Transient | May succeed with longer timeout |
+    /// | QueryCancelled | Transient | May succeed if not cancelled |
+    /// | BudgetExceeded | Permanent | Capacity limit, won't auto-resolve |
+    pub fn kind(&self) -> ErrorKind {
+        match self {
+            // Transient: external services that may recover, timeouts, cancellations
+            RetrievalError::EmbeddingStore(_)
+            | RetrievalError::LinkStore(_)
+            | RetrievalError::QueryTimeout { .. }
+            | RetrievalError::QueryCancelled => ErrorKind::Transient,
+
+            // Permanent: logic, validation, and configuration errors
+            RetrievalError::Hnsw(_)
+            | RetrievalError::Bm25(_)
+            | RetrievalError::Fusion(_)
+            | RetrievalError::GraphTraversal(_)
+            | RetrievalError::InvalidQuery(_)
+            | RetrievalError::DimensionMismatch { .. }
+            | RetrievalError::Configuration(_)
+            | RetrievalError::IndexNotInitialized(_)
+            | RetrievalError::RebuildRequired { .. }
+            | RetrievalError::BudgetExceeded { .. }
+            | RetrievalError::Rerank(_) => ErrorKind::Permanent,
+            // TODO(port-rerank): khive-inference not ported yet
+            // #[cfg(feature = "native-rerank")]
+            // RetrievalError::Inference(_) => ErrorKind::Permanent,
+        }
+    }
+
+    /// Check if this error is transient (may succeed on retry).
+    ///
+    /// Transient errors include:
+    /// - External service failures (embedding store, link store)
+    /// - Network-related issues
+    /// - Resource contention
+    ///
+    /// # Retry Strategy
+    ///
+    /// For transient errors, use exponential backoff with jitter:
+    /// - Initial delay: 100ms
+    /// - Max delay: 5s
+    /// - Max retries: 3
+    /// - Jitter: +/- 20%
+    ///
+    /// # Example
+    ///
+    /// ```rust
+    /// use khive_retrieval::error::RetrievalError;
+    ///
+    /// fn should_retry(err: &RetrievalError) -> bool {
+    ///     err.is_transient()
+    /// }
+    /// ```
+    #[inline]
+    pub fn is_transient(&self) -> bool {
+        self.kind() == ErrorKind::Transient
+    }
+
+    /// Check if this error is permanent (won't be fixed by retry).
+    ///
+    /// Permanent errors should be surfaced to the user immediately
+    /// without retry attempts.
+    #[inline]
+    pub fn is_permanent(&self) -> bool {
+        self.kind() == ErrorKind::Permanent
+    }
+
+    /// Check if this error is retryable (alias for `is_transient`).
+    ///
+    /// Provided for backward compatibility and semantic clarity.
+    #[inline]
+    pub fn is_retryable(&self) -> bool {
+        self.is_transient()
+    }
+
+    /// Create a rerank error (permanent).
+    pub fn rerank(msg: impl Into<String>) -> Self {
+        Self::Rerank(msg.into())
+    }
+
+    /// Create an HNSW error (permanent).
+    pub fn hnsw(msg: impl Into<String>) -> Self {
+        Self::Hnsw(msg.into())
+    }
+
+    /// Create a BM25 error (permanent).
+    pub fn bm25(msg: impl Into<String>) -> Self {
+        Self::Bm25(msg.into())
+    }
+
+    /// Create a fusion error (permanent).
+    pub fn fusion(msg: impl Into<String>) -> Self {
+        Self::Fusion(msg.into())
+    }
+
+    /// Create a graph traversal error (permanent).
+    pub fn graph_traversal(msg: impl Into<String>) -> Self {
+        Self::GraphTraversal(msg.into())
+    }
+
+    /// Create an invalid query error (permanent).
+    pub fn invalid_query(msg: impl Into<String>) -> Self {
+        Self::InvalidQuery(msg.into())
+    }
+
+    /// Create a dimension mismatch error (permanent).
+    pub fn dimension_mismatch(expected: usize, actual: usize) -> Self {
+        Self::DimensionMismatch { expected, actual }
+    }
+
+    /// Create a configuration error (permanent).
+    pub fn configuration(msg: impl Into<String>) -> Self {
+        Self::Configuration(msg.into())
+    }
+
+    /// Create an index not initialized error (permanent).
+    pub fn index_not_initialized(msg: impl Into<String>) -> Self {
+        Self::IndexNotInitialized(msg.into())
+    }
+
+    /// Create a rebuild required error (permanent).
+    pub fn rebuild_required(reason: impl Into<String>) -> Self {
+        Self::RebuildRequired {
+            reason: reason.into(),
+        }
+    }
+
+    /// Create a query timeout error (transient).
+    pub fn query_timeout(elapsed_ms: u64) -> Self {
+        Self::QueryTimeout { elapsed_ms }
+    }
+
+    /// Create a query cancelled error (transient).
+    pub fn query_cancelled() -> Self {
+        Self::QueryCancelled
+    }
+
+    /// Create a budget exceeded error (permanent).
+    pub fn budget_exceeded(current_usage: usize, item_size: usize, limit: usize) -> Self {
+        Self::BudgetExceeded {
+            current_usage,
+            item_size,
+            limit,
+        }
+    }
+}
+
+/// Result type alias for retrieval operations.
+pub type Result<T> = std::result::Result<T, RetrievalError>;
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_error_display() {
+        let err = RetrievalError::hnsw("connection failed");
+        assert_eq!(err.to_string(), "hnsw error: connection failed");
+    }
+
+    #[test]
+    fn test_dimension_mismatch() {
+        let err = RetrievalError::dimension_mismatch(768, 512);
+        assert_eq!(err.to_string(), "dimension mismatch: expected 768, got 512");
+    }
+
+    #[test]
+    fn test_is_retryable() {
+        // Non-retryable (permanent errors)
+        assert!(!RetrievalError::hnsw("fail").is_retryable());
+        assert!(!RetrievalError::bm25("fail").is_retryable());
+        assert!(!RetrievalError::InvalidQuery("bad".into()).is_retryable());
+        assert!(!RetrievalError::dimension_mismatch(768, 512).is_retryable());
+    }
+
+    // RETRIEVAL-06: Comprehensive error classification tests
+
+    #[test]
+    fn test_error_kind_transient() {
+        // EmbeddingStore and LinkStore are transient (external services)
+        // Note: We can't easily construct these without the actual error types,
+        // so we test via is_transient/is_permanent methods on constructable errors
+    }
+
+    #[test]
+    fn test_error_kind_permanent_all_variants() {
+        // All internal errors should be permanent
+        let permanent_errors: Vec<RetrievalError> = vec![
+            RetrievalError::hnsw("index corrupt"),
+            RetrievalError::bm25("tokenization failed"),
+            RetrievalError::fusion("incompatible scores"),
+            RetrievalError::graph_traversal("cycle detected"),
+            RetrievalError::invalid_query("empty query"),
+            RetrievalError::dimension_mismatch(768, 512),
+            RetrievalError::configuration("invalid k1 value"),
+            RetrievalError::index_not_initialized("HNSW index"),
+            RetrievalError::rebuild_required("version mismatch"),
+            RetrievalError::budget_exceeded(1000, 500, 1200),
+        ];
+
+        for err in permanent_errors {
+            assert!(err.is_permanent(), "Expected permanent: {err:?}");
+            assert!(!err.is_transient(), "Should not be transient: {err:?}");
+            assert_eq!(
+                err.kind(),
+                ErrorKind::Permanent,
+                "Kind mismatch for: {err:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_is_transient_is_permanent_consistency() {
+        // is_transient and is_permanent should be mutually exclusive and exhaustive
+        let test_errors: Vec<RetrievalError> = vec![
+            RetrievalError::hnsw("test"),
+            RetrievalError::bm25("test"),
+            RetrievalError::fusion("test"),
+            RetrievalError::invalid_query("test"),
+            RetrievalError::dimension_mismatch(1, 2),
+            RetrievalError::configuration("test"),
+            RetrievalError::budget_exceeded(100, 50, 120),
+        ];
+
+        for err in test_errors {
+            let transient = err.is_transient();
+            let permanent = err.is_permanent();
+
+            // XOR: exactly one should be true
+            assert!(
+                transient ^ permanent,
+                "Error must be exactly transient OR permanent: {err:?} (transient={transient}, permanent={permanent})"
+            );
+
+            // is_retryable should match is_transient
+            assert_eq!(
+                err.is_retryable(),
+                err.is_transient(),
+                "is_retryable should equal is_transient for: {err:?}"
+            );
+        }
+    }
+
+    #[test]
+    fn test_error_constructors_produce_correct_messages() {
+        assert_eq!(RetrievalError::hnsw("test").to_string(), "hnsw error: test");
+        assert_eq!(RetrievalError::bm25("test").to_string(), "bm25 error: test");
+        assert_eq!(
+            RetrievalError::fusion("test").to_string(),
+            "fusion error: test"
+        );
+        assert_eq!(
+            RetrievalError::graph_traversal("test").to_string(),
+            "graph traversal error: test"
+        );
+        assert_eq!(
+            RetrievalError::invalid_query("test").to_string(),
+            "invalid query: test"
+        );
+        assert_eq!(
+            RetrievalError::configuration("test").to_string(),
+            "configuration error: test"
+        );
+        assert_eq!(
+            RetrievalError::index_not_initialized("test").to_string(),
+            "index not initialized: test"
+        );
+        assert_eq!(
+            RetrievalError::rebuild_required("test").to_string(),
+            "index rebuild required: test"
+        );
+        assert_eq!(
+            RetrievalError::budget_exceeded(100, 50, 120).to_string(),
+            "memory budget exceeded: current 100 + item 50 > limit 120"
+        );
+    }
+
+    #[test]
+    fn test_error_kind_enum_debug() {
+        // Verify ErrorKind is Debug-able
+        assert_eq!(format!("{:?}", ErrorKind::Transient), "Transient");
+        assert_eq!(format!("{:?}", ErrorKind::Permanent), "Permanent");
+    }
+
+    #[test]
+    fn test_error_kind_equality() {
+        // Verify ErrorKind implements PartialEq correctly
+        assert_eq!(ErrorKind::Transient, ErrorKind::Transient);
+        assert_eq!(ErrorKind::Permanent, ErrorKind::Permanent);
+        assert_ne!(ErrorKind::Transient, ErrorKind::Permanent);
+    }
+
+    #[test]
+    fn test_query_timeout_error() {
+        let err = RetrievalError::query_timeout(5000);
+        assert_eq!(err.to_string(), "query timed out after 5000ms");
+        assert!(err.is_transient());
+        assert!(!err.is_permanent());
+        assert!(err.is_retryable());
+        assert_eq!(err.kind(), ErrorKind::Transient);
+    }
+
+    #[test]
+    fn test_query_cancelled_error() {
+        let err = RetrievalError::query_cancelled();
+        assert_eq!(err.to_string(), "query cancelled");
+        assert!(err.is_transient());
+        assert!(!err.is_permanent());
+        assert!(err.is_retryable());
+        assert_eq!(err.kind(), ErrorKind::Transient);
+    }
+
+    #[test]
+    fn test_transient_errors_classification() {
+        // All transient errors should be classified correctly
+        let transient_errors: Vec<RetrievalError> = vec![
+            RetrievalError::query_timeout(100),
+            RetrievalError::query_cancelled(),
+        ];
+
+        for err in transient_errors {
+            assert!(err.is_transient(), "Expected transient: {err:?}");
+            assert!(!err.is_permanent(), "Should not be permanent: {err:?}");
+            assert_eq!(
+                err.kind(),
+                ErrorKind::Transient,
+                "Kind mismatch for: {err:?}"
+            );
+        }
+    }
+}
diff --git a/crates/khive-retrieval/src/eval/engine_eval.rs b/crates/khive-retrieval/src/eval/engine_eval.rs
new file mode 100644
index 00000000..cbc56712
--- /dev/null
+++ b/crates/khive-retrieval/src/eval/engine_eval.rs
@@ -0,0 +1,655 @@
+//! Retrieval evaluation types and metrics for the khive compose pipeline.
+//!
+//! Provides the label taxonomy, graded scoring, and standard information-retrieval
+//! metrics needed to measure compose quality against annotated benchmarks.
+//!
+//! # Design
+//!
+//! Labels follow a 5-level taxonomy (`Decisive` → `AdjacentWrong`) modelled on
+//! GPQA-style relevance judgements where topically adjacent but factually wrong
+//! sections are explicitly penalised. The `gain` scoring function drives nDCG and
+//! `net_evidence` metrics.
+//!
+//! All metric functions operate on a slice of [`LabeledResult`] in **ranked order**
+//! (index 0 = rank 1). Callers are responsible for pre-sorting.
+
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+// ---------------------------------------------------------------------------
+// Label taxonomy
+// ---------------------------------------------------------------------------
+
+/// Five-level relevance label for retrieved sections.
+///
+/// Labels are designed for GPQA-style evaluation where *topically adjacent but
+/// factually wrong* sections are more harmful than irrelevant ones — they can
+/// actively mislead an LLM agent that trusts retrieved context.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum RetrievalLabel {
+    /// Section directly answers or enables answering the query.
+    Decisive,
+    /// Section provides useful supporting evidence for the query.
+    Supporting,
+    /// Section provides general context but not specific evidence.
+    Background,
+    /// Section has no relationship to the query.
+    Irrelevant,
+    /// Section is on-topic but contains factually incorrect information that
+    /// would mislead an LLM agent (the "GPQA failure mode").
+    AdjacentWrong,
+}
+
+impl RetrievalLabel {
+    /// Graded relevance gain used in DCG / net-evidence calculations.
+    ///
+    /// `AdjacentWrong` carries a negative gain to penalise retrieval of
+    /// misleading but plausible-sounding sections.
+    pub fn gain(self) -> f64 {
+        match self {
+            Self::Decisive => 3.0,
+            Self::Supporting => 2.0,
+            Self::Background => 0.5,
+            Self::Irrelevant => 0.0,
+            Self::AdjacentWrong => -2.0,
+        }
+    }
+
+    /// Returns `true` for labels that count as "relevant" in binary recall/precision.
+    pub fn is_relevant(self) -> bool {
+        matches!(self, Self::Decisive | Self::Supporting)
+    }
+
+    /// Returns `true` for labels that count as active distractors.
+    pub fn is_distractor(self) -> bool {
+        matches!(self, Self::AdjacentWrong)
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Result type
+// ---------------------------------------------------------------------------
+
+/// A single retrieved section with its ground-truth relevance label.
+///
+/// The slice passed to metric functions must be ordered by descending score
+/// (rank 1 at index 0).
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct LabeledResult {
+    /// Unique identifier of the retrieved section.
+    pub section_id: Uuid,
+    /// Retrieval score (higher = more relevant according to the pipeline).
+    pub score: f64,
+    /// Ground-truth relevance label assigned by a human or eval pipeline.
+    pub label: RetrievalLabel,
+}
+
+// ---------------------------------------------------------------------------
+// Aggregate metrics struct
+// ---------------------------------------------------------------------------
+
+/// All standard retrieval metrics computed for a single query.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RetrievalMetrics {
+    /// Recall at multiple k values: `[(k, recall_k), ...]`.
+    pub recall_at_k: Vec<(usize, f64)>,
+    /// nDCG at k = 10 using graded gains from [`RetrievalLabel::gain`].
+    pub ndcg_at_10: f64,
+    /// Precision at k = 5 (fraction of top-5 that are relevant).
+    pub precision_at_5: f64,
+    /// Precision at k = 10 (fraction of top-10 that are relevant).
+    pub precision_at_10: f64,
+    /// Fraction of top-10 results that are `AdjacentWrong` distractors.
+    pub distractor_at_10: f64,
+    /// Net graded evidence at k = 10: `sum(gain_i / log2(i+1))` for i in 1..=10.
+    pub net_evidence_at_10: f64,
+    /// Mean reciprocal rank: `1 / rank` of the first `Decisive` result, or `0.0`.
+    pub mrr: f64,
+    /// Optional before/after flip ratio: `wrong→right / right→wrong`.
+    ///
+    /// `None` when only a single ranking is available (no before/after pair).
+    pub flip_ratio: Option<f64>,
+}
+
+// ---------------------------------------------------------------------------
+// Metric functions
+// ---------------------------------------------------------------------------
+
+/// Recall at k: fraction of all `Decisive | Supporting` items that appear in top-k.
+///
+/// Returns `1.0` when there are no relevant items in the full list (vacuously true).
+pub fn recall_at_k(results: &[LabeledResult], k: usize) -> f64 {
+    let total_relevant: usize = results.iter().filter(|r| r.label.is_relevant()).count();
+    if total_relevant == 0 {
+        return 1.0;
+    }
+    let k = k.min(results.len());
+    let found: usize = results[..k]
+        .iter()
+        .filter(|r| r.label.is_relevant())
+        .count();
+    found as f64 / total_relevant as f64
+}
+
+/// Precision at k: fraction of top-k results that are `Decisive | Supporting`.
+///
+/// Returns `0.0` when k = 0 or results is empty.
+pub fn precision_at_k(results: &[LabeledResult], k: usize) -> f64 {
+    if k == 0 || results.is_empty() {
+        return 0.0;
+    }
+    let k = k.min(results.len());
+    let relevant: usize = results[..k]
+        .iter()
+        .filter(|r| r.label.is_relevant())
+        .count();
+    relevant as f64 / k as f64
+}
+
+/// Distractor at k: fraction of top-k results that are `AdjacentWrong`.
+///
+/// Returns `0.0` when k = 0 or results is empty.
+pub fn distractor_at_k(results: &[LabeledResult], k: usize) -> f64 {
+    if k == 0 || results.is_empty() {
+        return 0.0;
+    }
+    let k = k.min(results.len());
+    let distractors: usize = results[..k]
+        .iter()
+        .filter(|r| r.label.is_distractor())
+        .count();
+    distractors as f64 / k as f64
+}
+
+/// Net evidence at k: `sum(gain(label_i) / log2(i+2))` for i in 0..k.
+///
+/// The discount denominator uses `log2(i+2)` so that rank-1 (i=0) gets
+/// `log2(2) = 1.0` — the standard DCG convention.
+///
+/// Returns `0.0` when k = 0 or results is empty.
+pub fn net_evidence_at_k(results: &[LabeledResult], k: usize) -> f64 {
+    if k == 0 || results.is_empty() {
+        return 0.0;
+    }
+    let k = k.min(results.len());
+    results[..k]
+        .iter()
+        .enumerate()
+        .map(|(i, r)| r.label.gain() / (i as f64 + 2.0).log2())
+        .sum()
+}
+
+/// nDCG at k using graded gains from [`RetrievalLabel::gain`].
+///
+/// The ideal ranking places all `Decisive` results first, then `Supporting`,
+/// `Background`, `Irrelevant`, and finally `AdjacentWrong`. The ideal DCG is
+/// computed from a sorted-by-gain copy of the full result list.
+///
+/// Returns `1.0` when the ideal DCG is zero (no positive-gain items).
+pub fn ndcg_at_k(results: &[LabeledResult], k: usize) -> f64 {
+    if k == 0 || results.is_empty() {
+        return 0.0;
+    }
+    let k = k.min(results.len());
+
+    let dcg = results[..k]
+        .iter()
+        .enumerate()
+        .map(|(i, r)| r.label.gain() / (i as f64 + 2.0).log2())
+        .sum::<f64>();
+
+    // Ideal DCG: sort all results by gain descending, take top-k.
+    let mut gains: Vec<f64> = results.iter().map(|r| r.label.gain()).collect();
+    gains.sort_by(|a, b| b.partial_cmp(a).unwrap_or(std::cmp::Ordering::Equal));
+
+    let idcg = gains[..k]
+        .iter()
+        .enumerate()
+        .map(|(i, &g)| g / (i as f64 + 2.0).log2())
+        .sum::<f64>();
+
+    if idcg == 0.0 {
+        // No positive-gain items exist and no negative-gain items; vacuously perfect.
+        return 1.0;
+    }
+    if idcg < 0.0 {
+        // Only negative-gain items (all distractors); worst possible outcome.
+        return 0.0;
+    }
+    (dcg / idcg).clamp(0.0, 1.0)
+}
+
+/// Mean reciprocal rank: `1.0 / rank` of the first `Decisive` result.
+///
+/// Returns `0.0` if no `Decisive` result appears in the list.
+pub fn mrr(results: &[LabeledResult]) -> f64 {
+    for (i, r) in results.iter().enumerate() {
+        if r.label == RetrievalLabel::Decisive {
+            return 1.0 / (i as f64 + 1.0);
+        }
+    }
+    0.0
+}
+
+/// Compute all standard retrieval metrics at their canonical k values.
+///
+/// `recall_at_k` is evaluated at k ∈ {1, 3, 5, 10}.
+/// All other metrics use their k = 10 (or full-list for MRR) defaults.
+pub fn compute_all(results: &[LabeledResult]) -> RetrievalMetrics {
+    let recall_at_k_vals = vec![
+        (1, recall_at_k(results, 1)),
+        (3, recall_at_k(results, 3)),
+        (5, recall_at_k(results, 5)),
+        (10, recall_at_k(results, 10)),
+    ];
+
+    RetrievalMetrics {
+        recall_at_k: recall_at_k_vals,
+        ndcg_at_10: ndcg_at_k(results, 10),
+        precision_at_5: precision_at_k(results, 5),
+        precision_at_10: precision_at_k(results, 10),
+        distractor_at_10: distractor_at_k(results, 10),
+        net_evidence_at_10: net_evidence_at_k(results, 10),
+        mrr: mrr(results),
+        flip_ratio: None,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // ---- helpers ----
+
+    fn uuid(n: u64) -> Uuid {
+        Uuid::from_u64_pair(0, n)
+    }
+
+    fn make_result(n: u64, label: RetrievalLabel) -> LabeledResult {
+        LabeledResult {
+            section_id: uuid(n),
+            score: 1.0 / (n as f64 + 1.0),
+            label,
+        }
+    }
+
+    // ---- RetrievalLabel ----
+
+    #[test]
+    fn label_gain_values() {
+        assert_eq!(RetrievalLabel::Decisive.gain(), 3.0);
+        assert_eq!(RetrievalLabel::Supporting.gain(), 2.0);
+        assert_eq!(RetrievalLabel::Background.gain(), 0.5);
+        assert_eq!(RetrievalLabel::Irrelevant.gain(), 0.0);
+        assert_eq!(RetrievalLabel::AdjacentWrong.gain(), -2.0);
+    }
+
+    #[test]
+    fn label_is_relevant() {
+        assert!(RetrievalLabel::Decisive.is_relevant());
+        assert!(RetrievalLabel::Supporting.is_relevant());
+        assert!(!RetrievalLabel::Background.is_relevant());
+        assert!(!RetrievalLabel::Irrelevant.is_relevant());
+        assert!(!RetrievalLabel::AdjacentWrong.is_relevant());
+    }
+
+    #[test]
+    fn label_is_distractor() {
+        assert!(RetrievalLabel::AdjacentWrong.is_distractor());
+        assert!(!RetrievalLabel::Decisive.is_distractor());
+        assert!(!RetrievalLabel::Irrelevant.is_distractor());
+    }
+
+    // ---- recall_at_k ----
+
+    #[test]
+    fn recall_at_k_all_relevant() {
+        // 3 decisive results, k = 3 → recall = 1.0
+        let results: Vec<LabeledResult> = (0..3)
+            .map(|i| make_result(i, RetrievalLabel::Decisive))
+            .collect();
+        assert!((recall_at_k(&results, 3) - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn recall_at_k_partial() {
+        // 2 decisive at positions 0,1; 2 irrelevant at 2,3
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::Decisive),
+            make_result(2, RetrievalLabel::Irrelevant),
+            make_result(3, RetrievalLabel::Irrelevant),
+        ];
+        // k=1: 1 of 2 decisive in top-1 → 0.5
+        assert!((recall_at_k(&results, 1) - 0.5).abs() < 1e-9);
+        // k=2: 2 of 2 decisive in top-2 → 1.0
+        assert!((recall_at_k(&results, 2) - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn recall_at_k_none_relevant_vacuously_one() {
+        let results = vec![
+            make_result(0, RetrievalLabel::Irrelevant),
+            make_result(1, RetrievalLabel::Background),
+        ];
+        assert!((recall_at_k(&results, 5) - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn recall_at_k_k_exceeds_length() {
+        let results = vec![make_result(0, RetrievalLabel::Decisive)];
+        // k=100 should be clamped to len=1
+        assert!((recall_at_k(&results, 100) - 1.0).abs() < 1e-9);
+    }
+
+    // ---- precision_at_k ----
+
+    #[test]
+    fn precision_at_k_perfect() {
+        let results: Vec<LabeledResult> = (0..5)
+            .map(|i| make_result(i, RetrievalLabel::Decisive))
+            .collect();
+        assert!((precision_at_k(&results, 5) - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn precision_at_k_half_relevant() {
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::Irrelevant),
+            make_result(2, RetrievalLabel::Supporting),
+            make_result(3, RetrievalLabel::Irrelevant),
+        ];
+        // top-4: 2 relevant → 0.5
+        assert!((precision_at_k(&results, 4) - 0.5).abs() < 1e-9);
+    }
+
+    #[test]
+    fn precision_at_k_zero_when_k_zero() {
+        let results = vec![make_result(0, RetrievalLabel::Decisive)];
+        assert_eq!(precision_at_k(&results, 0), 0.0);
+    }
+
+    #[test]
+    fn precision_at_k_zero_when_empty() {
+        assert_eq!(precision_at_k(&[], 5), 0.0);
+    }
+
+    #[test]
+    fn precision_at_k_adjacent_wrong_not_counted() {
+        let results = vec![
+            make_result(0, RetrievalLabel::AdjacentWrong),
+            make_result(1, RetrievalLabel::AdjacentWrong),
+        ];
+        assert_eq!(precision_at_k(&results, 2), 0.0);
+    }
+
+    // ---- distractor_at_k ----
+
+    #[test]
+    fn distractor_at_k_all_wrong() {
+        let results: Vec<LabeledResult> = (0..4)
+            .map(|i| make_result(i, RetrievalLabel::AdjacentWrong))
+            .collect();
+        assert!((distractor_at_k(&results, 4) - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn distractor_at_k_none_wrong() {
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::Irrelevant),
+        ];
+        assert_eq!(distractor_at_k(&results, 2), 0.0);
+    }
+
+    #[test]
+    fn distractor_at_k_mixed() {
+        // 1 wrong in top-4 → 0.25
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::AdjacentWrong),
+            make_result(2, RetrievalLabel::Irrelevant),
+            make_result(3, RetrievalLabel::Background),
+        ];
+        assert!((distractor_at_k(&results, 4) - 0.25).abs() < 1e-9);
+    }
+
+    #[test]
+    fn distractor_at_k_zero_when_k_zero() {
+        let results = vec![make_result(0, RetrievalLabel::AdjacentWrong)];
+        assert_eq!(distractor_at_k(&results, 0), 0.0);
+    }
+
+    // ---- net_evidence_at_k ----
+
+    #[test]
+    fn net_evidence_at_k_single_decisive_rank1() {
+        // Rank-1 Decisive: gain=3.0 / log2(2)=1.0 → 3.0
+        let results = vec![make_result(0, RetrievalLabel::Decisive)];
+        assert!((net_evidence_at_k(&results, 1) - 3.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn net_evidence_at_k_negative_for_all_wrong() {
+        // Each AdjacentWrong at rank i contributes -2.0 / log2(i+2)
+        let results: Vec<LabeledResult> = (0..3)
+            .map(|i| make_result(i as u64, RetrievalLabel::AdjacentWrong))
+            .collect();
+        let score = net_evidence_at_k(&results, 3);
+        assert!(
+            score < 0.0,
+            "all distractors should produce negative net evidence"
+        );
+    }
+
+    #[test]
+    fn net_evidence_at_k_zero_for_empty() {
+        assert_eq!(net_evidence_at_k(&[], 5), 0.0);
+    }
+
+    #[test]
+    fn net_evidence_at_k_zero_for_k_zero() {
+        let results = vec![make_result(0, RetrievalLabel::Decisive)];
+        assert_eq!(net_evidence_at_k(&results, 0), 0.0);
+    }
+
+    #[test]
+    fn net_evidence_at_k_mixed_sums_correctly() {
+        // rank1=Decisive(3.0/log2(2)=3.0), rank2=Supporting(2.0/log2(3)≈1.2619)
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::Supporting),
+        ];
+        let expected = 3.0 / 2.0_f64.log2() + 2.0 / 3.0_f64.log2();
+        let actual = net_evidence_at_k(&results, 2);
+        assert!(
+            (actual - expected).abs() < 1e-9,
+            "expected {expected}, got {actual}"
+        );
+    }
+
+    // ---- ndcg_at_k ----
+
+    #[test]
+    fn ndcg_at_k_perfect_ranking() {
+        // Perfect ranking: Decisive first → nDCG = 1.0
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::Supporting),
+            make_result(2, RetrievalLabel::Irrelevant),
+        ];
+        let score = ndcg_at_k(&results, 3);
+        assert!(
+            (score - 1.0).abs() < 1e-9,
+            "perfect ranking should yield nDCG=1.0, got {score}"
+        );
+    }
+
+    #[test]
+    fn ndcg_at_k_suboptimal_ranking() {
+        // Irrelevant first, Decisive second → nDCG < 1.0
+        let results = vec![
+            make_result(0, RetrievalLabel::Irrelevant),
+            make_result(1, RetrievalLabel::Decisive),
+        ];
+        let score = ndcg_at_k(&results, 2);
+        assert!(
+            score < 1.0 && score > 0.0,
+            "suboptimal ranking should yield 0 < nDCG < 1.0, got {score}"
+        );
+    }
+
+    #[test]
+    fn ndcg_at_k_all_irrelevant_vacuously_one() {
+        // No positive-gain items → vacuously 1.0 (idcg = 0)
+        let results = vec![
+            make_result(0, RetrievalLabel::Irrelevant),
+            make_result(1, RetrievalLabel::Irrelevant),
+        ];
+        let score = ndcg_at_k(&results, 2);
+        assert!((score - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn ndcg_at_k_zero_for_zero_k() {
+        let results = vec![make_result(0, RetrievalLabel::Decisive)];
+        assert_eq!(ndcg_at_k(&results, 0), 0.0);
+    }
+
+    #[test]
+    fn ndcg_at_k_clamped_not_above_one() {
+        // Construct a case that could produce DCG > IDCG due to floating-point;
+        // verify clamp keeps result ≤ 1.0.
+        let results: Vec<LabeledResult> = (0..10)
+            .map(|i| make_result(i, RetrievalLabel::Decisive))
+            .collect();
+        let score = ndcg_at_k(&results, 10);
+        assert!(score <= 1.0 + 1e-12, "nDCG must not exceed 1.0, got {score}");
+    }
+
+    // ---- mrr ----
+
+    #[test]
+    fn mrr_decisive_at_rank1() {
+        let results = vec![
+            make_result(0, RetrievalLabel::Decisive),
+            make_result(1, RetrievalLabel::Irrelevant),
+        ];
+        assert!((mrr(&results) - 1.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn mrr_decisive_at_rank3() {
+        let results = vec![
+            make_result(0, RetrievalLabel::Irrelevant),
+            make_result(1, RetrievalLabel::Supporting),
+            make_result(2, RetrievalLabel::Decisive),
+        ];
+        // 1/3
+        assert!((mrr(&results) - 1.0 / 3.0).abs() < 1e-9);
+    }
+
+    #[test]
+    fn mrr_no_decisive() {
+        let results = vec![
+            make_result(0, RetrievalLabel::Supporting),
+            make_result(1, RetrievalLabel::Irrelevant),
+        ];
+        assert_eq!(mrr(&results), 0.0);
+    }
+
+    #[test]
+    fn mrr_empty() {
+        assert_eq!(mrr(&[]), 0.0);
+    }
+
+    // ---- compute_all ----
+
+    #[test]
+    fn compute_all_returns_correct_structure() {
+        let results: Vec<LabeledResult> = (0..10)
+            .map(|i| {
+                let label = if i < 3 {
+                    RetrievalLabel::Decisive
+                } else {
+                    RetrievalLabel::Irrelevant
+                };
+                make_result(i, label)
+            })
+            .collect();
+        let metrics = compute_all(&results);
+
+        // recall_at_k has 4 entries for k ∈ {1,3,5,10}
+        assert_eq!(metrics.recall_at_k.len(), 4);
+        assert_eq!(metrics.recall_at_k[0].0, 1);
+        assert_eq!(metrics.recall_at_k[1].0, 3);
+        assert_eq!(metrics.recall_at_k[2].0, 5);
+        assert_eq!(metrics.recall_at_k[3].0, 10);
+
+        // k=3: all 3 decisive in top-3 → recall=1.0
+        assert!((metrics.recall_at_k[1].1 - 1.0).abs() < 1e-9);
+
+        // MRR = 1.0 (decisive at rank 1)
+        assert!((metrics.mrr - 1.0).abs() < 1e-9);
+
+        // flip_ratio is None (no before/after pair provided)
+        assert!(metrics.flip_ratio.is_none());
+    }
+
+    #[test]
+    fn compute_all_distractor_metric() {
+        // 5 adjacent-wrong at ranks 1-5, rest irrelevant
+        let results: Vec<LabeledResult> = (0..10)
+            .map(|i| {
+                let label = if i < 5 {
+                    RetrievalLabel::AdjacentWrong
+                } else {
+                    RetrievalLabel::Irrelevant
+                };
+                make_result(i, label)
+            })
+            .collect();
+        let metrics = compute_all(&results);
+        // distractor_at_10 = 5/10 = 0.5
+        assert!(
+            (metrics.distractor_at_10 - 0.5).abs() < 1e-9,
+            "got {}",
+            metrics.distractor_at_10
+        );
+        // mrr = 0 (no Decisive)
+        assert_eq!(metrics.mrr, 0.0);
+    }
+
+    // ---- serialization round-trip ----
+
+    #[test]
+    fn label_serde_roundtrip() {
+        for label in [
+            RetrievalLabel::Decisive,
+            RetrievalLabel::Supporting,
+            RetrievalLabel::Background,
+            RetrievalLabel::Irrelevant,
+            RetrievalLabel::AdjacentWrong,
+        ] {
+            let json = serde_json::to_string(&label).unwrap();
+            let back: RetrievalLabel = serde_json::from_str(&json).unwrap();
+            assert_eq!(label, back);
+        }
+    }
+
+    #[test]
+    fn metrics_serde_roundtrip() {
+        let results = vec![make_result(0, RetrievalLabel::Decisive)];
+        let m = compute_all(&results);
+        let json = serde_json::to_string(&m).unwrap();
+        let back: RetrievalMetrics = serde_json::from_str(&json).unwrap();
+        assert_eq!(back.recall_at_k.len(), 4);
+        assert!((back.mrr - 1.0).abs() < 1e-9);
+    }
+}
diff --git a/crates/khive-retrieval/src/eval/mod.rs b/crates/khive-retrieval/src/eval/mod.rs
new file mode 100644
index 00000000..4de5a74a
--- /dev/null
+++ b/crates/khive-retrieval/src/eval/mod.rs
@@ -0,0 +1,5 @@
+//! Retrieval evaluation types and metrics.
+
+pub mod engine_eval;
+
+pub use engine_eval::*;
diff --git a/crates/khive-retrieval/src/graph/bfs.rs b/crates/khive-retrieval/src/graph/bfs.rs
new file mode 100644
index 00000000..e4c5b1d1
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/bfs.rs
@@ -0,0 +1,148 @@
+//! BFS (Breadth-First Search) traversal.
+//!
+//! # Formal Verification
+//!
+//! This implementation corresponds to the formal proofs in
+//! `proofs/Lion/Retrieval/Graph.lean`. Key theorems:
+//!
+//! - `bfs_terminates`: BFS always terminates (queue eventually empty)
+//! - `bfs_complete`: all reachable vertices are visited
+//! - `visited_mono`: visited set grows monotonically
+//! - `reachable_trans`: reachability is transitive
+
+use std::collections::{HashSet, VecDeque};
+
+use super::compat::{EntityRef, LinkStore, StorageContext};
+
+use crate::error::Result;
+
+use super::helpers::{get_edge_weight, get_neighbor_entity, get_neighbors, matches_link_type};
+use super::types::{PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS};
+
+/// Perform BFS traversal from a starting entity.
+///
+/// BFS explores nodes level by level, guaranteeing that nodes at depth N
+/// are visited before nodes at depth N+1. This makes it ideal for:
+///
+/// - Finding all entities within N hops
+/// - Social network expansion (friends of friends)
+/// - Entity neighborhood exploration
+///
+/// # Arguments
+///
+/// * `store` - The link store to query
+/// * `ctx` - Storage context for namespace isolation
+/// * `start` - Starting entity reference
+/// * `options` - Traversal options (depth, direction, filters)
+///
+/// # Returns
+///
+/// Vector of [`PathNode`] in BFS order. The first element is always the start node.
+///
+/// # Complexity
+///
+/// - Time: O(V + E) where V = vertices, E = edges
+/// - Space: O(V) for visited set and queue
+///
+/// # Example
+///
+/// ```ignore
+/// let options = TraversalOptions::new(3)
+///     .with_direction(Direction::Out)
+///     .with_link_types(["KNOWS"]);
+///
+/// let nodes = bfs_traverse(&store, &ctx, start_ref, &options).await?;
+/// for node in &nodes {
+///     println!("Entity {:?} at depth {}", node.entity_id, node.depth);
+/// }
+/// ```
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.bfs_terminates`
+/// Queue shrinks each iteration; visited set prevents re-enqueue; terminates when queue empty.
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.bfs_complete`
+/// All reachable vertices within max_depth are visited; BFS explores level-by-level.
+pub async fn bfs_traverse<S: LinkStore>(
+    store: &S,
+    ctx: &StorageContext,
+    start: EntityRef,
+    options: &TraversalOptions,
+) -> Result<Vec<PathNode>> {
+    let max_depth = options.max_depth.min(MAX_TRAVERSAL_DEPTH);
+    let limit = options
+        .limit
+        .unwrap_or(MAX_TRAVERSAL_RESULTS)
+        .min(MAX_TRAVERSAL_RESULTS);
+    let min_weight = options.min_weight.unwrap_or(f64::NEG_INFINITY);
+
+    // **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.visited_mono`
+    // Visited set only grows (insert-only); never shrinks during traversal.
+    // EntityRef implements Hash + Eq, enabling direct use as HashMap key.
+    let mut visited: HashSet<EntityRef> = HashSet::new();
+    let mut results: Vec<PathNode> = Vec::new();
+    // Queue: (entity_ref, depth, path_weight)
+    let mut queue: VecDeque<(EntityRef, usize, f64)> = VecDeque::new();
+
+    // Start node
+    visited.insert(start.clone());
+    results.push(PathNode::start(start.clone()));
+    queue.push_back((start, 0, 0.0));
+
+    while let Some((current, depth, path_weight)) = queue.pop_front() {
+        // Check depth limit
+        if depth >= max_depth {
+            continue;
+        }
+
+        // Check result limit
+        if results.len() >= limit {
+            break;
+        }
+
+        // Get neighbors based on direction
+        let links = get_neighbors(store, ctx, &current, &options.direction).await?;
+
+        for link in links {
+            // Filter by link type
+            if !matches_link_type(&link, &options.link_types) {
+                continue;
+            }
+
+            // Get edge weight and filter
+            let edge_weight = get_edge_weight(&link);
+            if edge_weight < min_weight {
+                continue;
+            }
+
+            // Determine neighbor entity based on direction
+            let neighbor = get_neighbor_entity(&link, &current, &options.direction);
+
+            // Skip if already visited (EntityRef implements Hash + Eq)
+            if visited.contains(&neighbor) {
+                continue;
+            }
+
+            // Mark as visited and add to results
+            visited.insert(neighbor.clone());
+            let new_weight = path_weight + edge_weight;
+
+            let node = PathNode {
+                entity_id: neighbor.clone(),
+                depth: depth + 1,
+                via_link: Some(link),
+                path_weight: new_weight,
+            };
+            results.push(node);
+
+            // Check limit after adding
+            if results.len() >= limit {
+                break;
+            }
+
+            // Add to queue for further exploration
+            queue.push_back((neighbor, depth + 1, new_weight));
+        }
+    }
+
+    Ok(results)
+}
diff --git a/crates/khive-retrieval/src/graph/compat.rs b/crates/khive-retrieval/src/graph/compat.rs
new file mode 100644
index 00000000..9d0493d3
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/compat.rs
@@ -0,0 +1,244 @@
+//! Compatibility shims for the legacy graph traversal module.
+//!
+//! The graph module was written against an older `khive_db` API that exported
+//! `EntityRef`, `Link`, `LinkStore`, and `StorageContext`. These types no longer
+//! exist in `khive_db`. This module provides minimal shims so the graph code
+//! compiles under the `graph-legacy` feature until the module is ported to the
+//! current `khive_storage::GraphStore` API.
+
+use std::collections::BTreeMap;
+
+use async_trait::async_trait;
+use serde::{Deserialize, Serialize};
+
+use crate::error::{Result, RetrievalError};
+
+// ---------------------------------------------------------------------------
+// EntityRef
+// ---------------------------------------------------------------------------
+
+/// A reference to a graph entity.
+///
+/// Legacy type — maps to the old `khive_db::EntityRef` API.
+#[derive(Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
+#[serde(tag = "kind", content = "id", rename_all = "snake_case")]
+pub enum EntityRef {
+    /// An externally-identified entity (string key).
+    External(String),
+}
+
+// ---------------------------------------------------------------------------
+// Link
+// ---------------------------------------------------------------------------
+
+/// An opaque link identifier.
+///
+/// Legacy type — shim for the old `khive_db::LinkId`.
+#[derive(Clone, Copy, Debug, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub struct LinkId(u64);
+
+impl LinkId {
+    /// The nil / zero link ID.
+    pub const NIL: Self = Self(0);
+}
+
+/// A directed edge between two entities.
+///
+/// Legacy type — maps to the old `khive_db::Link` API.
+#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
+pub struct Link {
+    /// Opaque link identifier.
+    pub id: LinkId,
+    /// Source entity.
+    pub source: EntityRef,
+    /// Target entity.
+    pub target: EntityRef,
+    /// Relation type (e.g. "contains", "references").
+    pub relation: String,
+    /// Optional edge properties (e.g. `{"weight": 0.9}`).
+    pub properties: Option<BTreeMap<String, serde_json::Value>>,
+}
+
+impl Link {
+    /// Create a new link with no properties.
+    pub fn new(
+        id: LinkId,
+        source: EntityRef,
+        target: EntityRef,
+        relation: impl Into<String>,
+    ) -> Self {
+        Self {
+            id,
+            source,
+            target,
+            relation: relation.into(),
+            properties: None,
+        }
+    }
+
+    /// Create a new link with serializable properties.
+    pub fn with_properties(
+        id: LinkId,
+        source: EntityRef,
+        target: EntityRef,
+        relation: impl Into<String>,
+        props: serde_json::Value,
+    ) -> Self {
+        let properties = props
+            .as_object()
+            .map(|m| m.iter().map(|(k, v)| (k.clone(), v.clone())).collect());
+        Self {
+            id,
+            source,
+            target,
+            relation: relation.into(),
+            properties,
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// StorageContext
+// ---------------------------------------------------------------------------
+
+/// Context for storage operations (namespace isolation, etc.).
+///
+/// Legacy type — maps to the old `khive_db::StorageContext` API.
+#[derive(Clone, Debug, Default)]
+pub struct StorageContext {
+    /// Namespace for multi-tenant isolation.
+    pub namespace: String,
+}
+
+impl StorageContext {
+    /// Create a new storage context with the given namespace.
+    pub fn new(namespace: impl Into<String>) -> Self {
+        Self {
+            namespace: namespace.into(),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// LinkStore
+// ---------------------------------------------------------------------------
+
+/// Trait for querying directed graph edges.
+///
+/// Legacy trait — maps to the old `khive_db::LinkStore` API.
+#[async_trait]
+pub trait LinkStore: Send + Sync {
+    /// Get all outgoing links from an entity.
+    async fn outgoing(
+        &self,
+        ctx: &StorageContext,
+        entity: &EntityRef,
+    ) -> Result<Vec<Link>>;
+
+    /// Get all incoming links to an entity.
+    async fn incoming(
+        &self,
+        ctx: &StorageContext,
+        entity: &EntityRef,
+    ) -> Result<Vec<Link>>;
+
+    /// Create a link between two entities.
+    async fn link(
+        &self,
+        ctx: &StorageContext,
+        source: EntityRef,
+        target: EntityRef,
+        relation: &str,
+        properties: Option<serde_json::Value>,
+    ) -> Result<Link>;
+}
+
+// ---------------------------------------------------------------------------
+// MockLinkStore (for tests)
+// ---------------------------------------------------------------------------
+
+/// In-memory mock implementation of `LinkStore` for tests.
+pub struct MockLinkStore {
+    links: parking_lot::Mutex<Vec<Link>>,
+    next_id: std::sync::atomic::AtomicU64,
+}
+
+impl MockLinkStore {
+    /// Create a new empty mock store.
+    pub fn new() -> Self {
+        Self {
+            links: parking_lot::Mutex::new(Vec::new()),
+            next_id: std::sync::atomic::AtomicU64::new(1),
+        }
+    }
+}
+
+impl Default for MockLinkStore {
+    fn default() -> Self {
+        Self::new()
+    }
+}
+
+#[async_trait]
+impl LinkStore for MockLinkStore {
+    async fn outgoing(
+        &self,
+        _ctx: &StorageContext,
+        entity: &EntityRef,
+    ) -> Result<Vec<Link>> {
+        let links = self.links.lock();
+        Ok(links
+            .iter()
+            .filter(|l| &l.source == entity)
+            .cloned()
+            .collect())
+    }
+
+    async fn incoming(
+        &self,
+        _ctx: &StorageContext,
+        entity: &EntityRef,
+    ) -> Result<Vec<Link>> {
+        let links = self.links.lock();
+        Ok(links
+            .iter()
+            .filter(|l| &l.target == entity)
+            .cloned()
+            .collect())
+    }
+
+    async fn link(
+        &self,
+        _ctx: &StorageContext,
+        source: EntityRef,
+        target: EntityRef,
+        relation: &str,
+        properties: Option<serde_json::Value>,
+    ) -> Result<Link> {
+        let id = self
+            .next_id
+            .fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+        let link = if let Some(props) = properties {
+            Link::with_properties(LinkId(id), source, target, relation, props)
+        } else {
+            Link::new(LinkId(id), source, target, relation)
+        };
+        self.links.lock().push(link.clone());
+        Ok(link)
+    }
+}
+
+/// Create a test storage context.
+pub fn test_context() -> StorageContext {
+    StorageContext::new("test")
+}
+
+// ---------------------------------------------------------------------------
+// Error adapter
+// ---------------------------------------------------------------------------
+
+/// Adapt a `String` error into a `RetrievalError::GraphTraversal`.
+#[allow(dead_code)]
+pub(crate) fn graph_err(msg: impl std::fmt::Display) -> RetrievalError {
+    RetrievalError::GraphTraversal(msg.to_string())
+}
diff --git a/crates/khive-retrieval/src/graph/dfs.rs b/crates/khive-retrieval/src/graph/dfs.rs
new file mode 100644
index 00000000..5bed7156
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/dfs.rs
@@ -0,0 +1,135 @@
+//! DFS (Depth-First Search) traversal.
+//!
+//! # Formal Verification
+//!
+//! This implementation corresponds to the formal proofs in
+//! `proofs/Lion/Retrieval/Graph.lean`. Key theorems:
+//!
+//! - `dfs_terminates_bound`: DFS bounded by |V| vertices
+//! - `visited_mono`: visited set grows monotonically
+//! - `reachable_trans`: reachability is transitive
+
+use std::collections::HashSet;
+
+use super::compat::{EntityRef, Link, LinkStore, StorageContext};
+
+use crate::error::Result;
+
+use super::helpers::{get_edge_weight, get_neighbor_entity, get_neighbors, matches_link_type};
+use super::types::{PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS};
+
+/// Perform DFS traversal from a starting entity.
+///
+/// DFS explores as far as possible along each branch before backtracking.
+/// This makes it ideal for:
+///
+/// - Deep chain exploration
+/// - Path existence checking
+/// - Exhaustive graph exploration with limited results
+///
+/// # Arguments
+///
+/// * `store` - The link store to query
+/// * `ctx` - Storage context for namespace isolation
+/// * `start` - Starting entity reference
+/// * `options` - Traversal options (depth, direction, filters)
+///
+/// # Returns
+///
+/// Vector of [`PathNode`] in DFS pre-order (parent before children).
+///
+/// # Complexity
+///
+/// - Time: O(V + E) where V = vertices, E = edges
+/// - Space: O(V) for visited set + O(h) stack where h = max depth
+///
+/// # Example
+///
+/// ```ignore
+/// let options = TraversalOptions::new(5)
+///     .with_direction(Direction::Out);
+///
+/// let nodes = dfs_traverse(&store, &ctx, start_ref, &options).await?;
+/// ```
+///
+/// **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.dfs_terminates_bound`
+/// Each vertex visited at most once; |visited| bounded by |V|; stack pops exceed pushes eventually.
+pub async fn dfs_traverse<S: LinkStore>(
+    store: &S,
+    ctx: &StorageContext,
+    start: EntityRef,
+    options: &TraversalOptions,
+) -> Result<Vec<PathNode>> {
+    let max_depth = options.max_depth.min(MAX_TRAVERSAL_DEPTH);
+    let limit = options
+        .limit
+        .unwrap_or(MAX_TRAVERSAL_RESULTS)
+        .min(MAX_TRAVERSAL_RESULTS);
+    let min_weight = options.min_weight.unwrap_or(f64::NEG_INFINITY);
+
+    // **PROOF CORRESPONDENCE**: `Lion.Retrieval.Graph.visited_mono`
+    // Visited set only grows (insert-only); never shrinks during traversal.
+    // EntityRef implements Hash + Eq, enabling direct use as HashMap key.
+    let mut visited: HashSet<EntityRef> = HashSet::new();
+    let mut results: Vec<PathNode> = Vec::new();
+
+    // Stack: (entity_ref, depth, path_weight, via_link)
+    let mut stack: Vec<(EntityRef, usize, f64, Option<Link>)> = Vec::new();
+    stack.push((start, 0, 0.0, None));
+
+    while let Some((current, depth, path_weight, via_link)) = stack.pop() {
+        // Skip if already visited (EntityRef implements Hash + Eq)
+        if visited.contains(&current) {
+            continue;
+        }
+
+        // Mark as visited and add to results
+        visited.insert(current.clone());
+        results.push(PathNode {
+            entity_id: current.clone(),
+            depth,
+            via_link,
+            path_weight,
+        });
+
+        // Check result limit
+        if results.len() >= limit {
+            break;
+        }
+
+        // Check depth limit before exploring children
+        if depth >= max_depth {
+            continue;
+        }
+
+        // Get neighbors and push to stack (reverse order for consistent traversal)
+        let links = get_neighbors(store, ctx, &current, &options.direction).await?;
+
+        // Push in reverse order so first neighbor is processed first
+        for link in links.into_iter().rev() {
+            // Filter by link type
+            if !matches_link_type(&link, &options.link_types) {
+                continue;
+            }
+
+            // Get edge weight and filter
+            let edge_weight = get_edge_weight(&link);
+            if edge_weight < min_weight {
+                continue;
+            }
+
+            // Determine neighbor entity
+            let neighbor = get_neighbor_entity(&link, &current, &options.direction);
+
+            // Skip if already visited (EntityRef implements Hash + Eq)
+            if visited.contains(&neighbor) {
+                continue;
+            }
+
+            let new_weight = path_weight + edge_weight;
+            stack.push((neighbor, depth + 1, new_weight, Some(link)));
+        }
+    }
+
+    Ok(results)
+}
diff --git a/crates/khive-retrieval/src/graph/helpers.rs b/crates/khive-retrieval/src/graph/helpers.rs
new file mode 100644
index 00000000..907ac58c
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/helpers.rs
@@ -0,0 +1,283 @@
+//! Helper functions for graph traversal.
+
+use super::compat::{EntityRef, Link, LinkStore, StorageContext};
+#[cfg(test)]
+use super::compat::LinkId;
+use khive_score::DeterministicScore;
+
+use crate::error::{Result, RetrievalError};
+
+use super::types::Direction;
+
+/// Extract edge weight from link properties.
+///
+/// Returns the `weight` property if present, otherwise defaults to 1.0.
+pub fn get_edge_weight(link: &Link) -> f64 {
+    link.properties
+        .as_ref()
+        .and_then(|props| props.get("weight"))
+        .and_then(|v| v.as_f64())
+        .unwrap_or(1.0)
+}
+
+/// Check if a link matches the type filter.
+///
+/// Returns `true` if:
+/// - The filter is `None` (all types match)
+/// - The link's relation is in the filter list
+pub fn matches_link_type(link: &Link, filter: &Option<Vec<String>>) -> bool {
+    match filter {
+        None => true,
+        Some(types) => types.iter().any(|t| t == &link.relation),
+    }
+}
+
+/// Get neighbor links based on direction.
+///
+/// # Arguments
+///
+/// * `store` - The link store to query
+/// * `ctx` - Storage context for namespace isolation
+/// * `entity` - The entity to get neighbors for
+/// * `direction` - Which direction to follow edges
+///
+/// # Returns
+///
+/// Vector of links in the specified direction(s).
+pub async fn get_neighbors<S: LinkStore>(
+    store: &S,
+    ctx: &StorageContext,
+    entity: &EntityRef,
+    direction: &Direction,
+) -> Result<Vec<Link>> {
+    let links =
+        match direction {
+            Direction::Out => store
+                .outgoing(ctx, entity)
+                .await
+                .map_err(|e| RetrievalError::GraphTraversal(format!("link store error: {e}"))),
+            Direction::In => store
+                .incoming(ctx, entity)
+                .await
+                .map_err(|e| RetrievalError::GraphTraversal(format!("link store error: {e}"))),
+            Direction::Both => {
+                let mut out = store.outgoing(ctx, entity).await.map_err(|e| {
+                    RetrievalError::GraphTraversal(format!("link store error: {e}"))
+                })?;
+                let incoming = store.incoming(ctx, entity).await.map_err(|e| {
+                    RetrievalError::GraphTraversal(format!("link store error: {e}"))
+                })?;
+                out.extend(incoming);
+                Ok(out)
+            }
+        };
+
+    links
+}
+
+/// Convert graph depth to proximity score.
+///
+/// Closer nodes (lower depth) get higher scores. This enables fusion with
+/// vector and keyword search results via RRF.
+///
+/// # Arguments
+///
+/// * `depth` - Distance from the start node (0 = start node itself)
+/// * `max_depth` - Maximum traversal depth configured
+///
+/// # Returns
+///
+/// A `DeterministicScore` in range [0.0, 1.0]:
+/// - depth=0 → 1.0 (at start node)
+/// - depth=max_depth → 0.0 (maximum distance)
+///
+/// # Edge Cases
+///
+/// When `max_depth = 0`:
+/// - depth=0 → 1.0 (only start node is reachable)
+/// - depth>0 → 0.0 (should not occur, but handled safely)
+///
+/// # Proof Correspondence
+///
+/// This function maintains the invariant:
+/// - `proximity_nonneg`: Result is always >= 0
+/// - `proximity_bounded`: Result is always <= 1.0
+/// - `proximity_mono`: Higher depth → lower score (monotonically decreasing)
+pub fn proximity_score(depth: usize, max_depth: usize) -> DeterministicScore {
+    // Guard against division by zero
+    if max_depth == 0 {
+        // At max_depth=0, only the start node (depth=0) is reachable
+        return DeterministicScore::from_f64(if depth == 0 { 1.0 } else { 0.0 });
+    }
+    // Closer = higher score (inverse relationship)
+    let proximity = 1.0 - (depth as f64 / max_depth as f64);
+    DeterministicScore::from_f64(proximity)
+}
+
+/// Get the neighbor entity from a link based on traversal direction and current node.
+///
+/// # Arguments
+///
+/// * `link` - The link to extract neighbor from
+/// * `current` - The current entity we're traversing from
+/// * `direction` - The traversal direction
+///
+/// # Returns
+///
+/// The entity at the "other end" of the link relative to the traversal direction.
+pub fn get_neighbor_entity(link: &Link, current: &EntityRef, direction: &Direction) -> EntityRef {
+    match direction {
+        Direction::Out => link.target.clone(),
+        Direction::In => link.source.clone(),
+        Direction::Both => {
+            // In bidirectional mode, return the "other end" of the link
+            if &link.source == current {
+                link.target.clone()
+            } else {
+                link.source.clone()
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_matches_link_type() {
+        let link = Link::new(
+            LinkId::NIL,
+            EntityRef::External("a".to_string()),
+            EntityRef::External("b".to_string()),
+            "contains",
+        );
+
+        // No filter matches all
+        assert!(matches_link_type(&link, &None));
+
+        // Matching type
+        assert!(matches_link_type(
+            &link,
+            &Some(vec!["contains".to_string()])
+        ));
+
+        // Non-matching type
+        assert!(!matches_link_type(
+            &link,
+            &Some(vec!["references".to_string()])
+        ));
+
+        // Multiple types, one matches
+        assert!(matches_link_type(
+            &link,
+            &Some(vec!["references".to_string(), "contains".to_string()])
+        ));
+    }
+
+    #[test]
+    fn test_get_edge_weight() {
+        // No properties = default weight 1.0
+        let link = Link::new(
+            LinkId::NIL,
+            EntityRef::External("a".to_string()),
+            EntityRef::External("b".to_string()),
+            "test",
+        );
+        assert_eq!(get_edge_weight(&link), 1.0);
+
+        // With weight property
+        let link_with_weight = Link::with_properties(
+            LinkId::NIL,
+            EntityRef::External("a".to_string()),
+            EntityRef::External("b".to_string()),
+            "test",
+            serde_json::json!({"weight": 2.5}),
+        );
+        assert_eq!(get_edge_weight(&link_with_weight), 2.5);
+    }
+
+    #[test]
+    fn test_get_neighbor_entity() {
+        let source = EntityRef::External("source".to_string());
+        let target = EntityRef::External("target".to_string());
+        let link = Link::new(
+            LinkId::NIL,
+            source.clone(),
+            target.clone(),
+            "test",
+        );
+
+        // Outgoing: return target
+        assert_eq!(get_neighbor_entity(&link, &source, &Direction::Out), target);
+
+        // Incoming: return source
+        assert_eq!(get_neighbor_entity(&link, &target, &Direction::In), source);
+
+        // Both from source: return target (other end)
+        assert_eq!(
+            get_neighbor_entity(&link, &source, &Direction::Both),
+            target
+        );
+
+        // Both from target: return source (other end)
+        assert_eq!(
+            get_neighbor_entity(&link, &target, &Direction::Both),
+            source
+        );
+    }
+
+    #[test]
+    fn test_proximity_score_normal() {
+        // At start node (depth=0)
+        let score = proximity_score(0, 5);
+        assert!((score.to_f64() - 1.0).abs() < f64::EPSILON);
+
+        // At max depth
+        let score = proximity_score(5, 5);
+        assert!((score.to_f64() - 0.0).abs() < f64::EPSILON);
+
+        // Midway
+        let score = proximity_score(2, 4);
+        assert!((score.to_f64() - 0.5).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_proximity_score_max_depth_zero() {
+        // Edge case: max_depth = 0, depth = 0 (only valid case)
+        let score = proximity_score(0, 0);
+        assert!((score.to_f64() - 1.0).abs() < f64::EPSILON);
+
+        // Edge case: max_depth = 0, depth > 0 (should not occur, but handled safely)
+        let score = proximity_score(1, 0);
+        assert!((score.to_f64() - 0.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_proximity_score_monotonic() {
+        // Scores should decrease as depth increases
+        let max_depth = 10;
+        let mut prev_score = f64::MAX;
+
+        for depth in 0..=max_depth {
+            let score = proximity_score(depth, max_depth).to_f64();
+            assert!(
+                score <= prev_score,
+                "Score should be monotonically decreasing"
+            );
+            prev_score = score;
+        }
+    }
+
+    #[test]
+    fn test_proximity_score_bounded() {
+        // All scores should be in [0.0, 1.0]
+        for max_depth in [0, 1, 5, 10, 100] {
+            for depth in 0..=max_depth {
+                let score = proximity_score(depth, max_depth).to_f64();
+                assert!(score >= 0.0, "Score should be >= 0");
+                assert!(score <= 1.0, "Score should be <= 1");
+            }
+        }
+    }
+}
diff --git a/crates/khive-retrieval/src/graph/mod.rs b/crates/khive-retrieval/src/graph/mod.rs
new file mode 100644
index 00000000..5fe1833b
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/mod.rs
@@ -0,0 +1,99 @@
+//! Graph traversal algorithms for relationship-aware retrieval.
+//!
+//! This module provides BFS, DFS, and shortest path algorithms for exploring
+//! the knowledge graph. All algorithms operate on the `LinkStore` trait from
+//! khive-db, enabling relationship-aware retrieval pipelines.
+//!
+//! # Algorithm Selection Guide
+//!
+//! | Use Case | Algorithm | Function |
+//! |----------|-----------|----------|
+//! | Explore neighbors | BFS | [`bfs_traverse`] |
+//! | Find shortest path | Bidirectional BFS | [`find_shortest_path`] |
+//! | Deep exploration | DFS | [`dfs_traverse`] |
+//!
+//! # Architecture (ADR-004)
+//!
+//! ```text
+//! khive-db                    khive-retrieval
+//! +-----------------+         +----------------------+
+//! | LinkStore trait |  <---   | Traversal algorithms |
+//! | EntityRef, Link |         | PathNode, Direction  |
+//! | StorageContext  |         | TraversalOptions     |
+//! +-----------------+         +----------------------+
+//! ```
+//!
+//! # RETRIEVAL-09: Audit Logging for Graph Operations
+//!
+//! **Current state**: Graph traversal algorithms do NOT emit audit logs.
+//!
+//! **Design decision**: Audit logging is the responsibility of the caller
+//! (typically khive-api or middleware layer), not the retrieval algorithms.
+//! This keeps the traversal code focused and testable.
+//!
+//! **What callers should log**:
+//!
+//! | Event | Context to Capture |
+//! |-------|-------------------|
+//! | Traversal start | start_node, direction, max_depth, link_types |
+//! | Traversal complete | nodes_visited, paths_found, duration_ms |
+//! | Depth limit hit | node_at_limit, depth |
+//! | Result limit hit | total_candidates, returned_count |
+//!
+//! **Future work**: If audit logging moves into the retrieval layer, add
+//! a `TraversalObserver` trait for pluggable logging without coupling to
+//! a specific logging framework.
+//!
+//! # Safety Limits
+//!
+//! All algorithms enforce safety limits to prevent runaway traversals:
+//! - [`MAX_TRAVERSAL_DEPTH`]: Maximum hops from start (20)
+//! - [`MAX_TRAVERSAL_RESULTS`]: Maximum nodes returned (10,000)
+//!
+//! # Example
+//!
+//! ```ignore
+//! use khive_retrieval::graph::{bfs_traverse, find_shortest_path, TraversalOptions, Direction};
+//! use khive_db::{LinkStore, StorageContext};
+//!
+//! // BFS exploration
+//! let options = TraversalOptions::new(3)
+//!     .with_direction(Direction::Out)
+//!     .with_link_types(["contains", "references"]);
+//!
+//! let neighbors = bfs_traverse(&store, &ctx, start_ref, &options).await?;
+//!
+//! // Find shortest path
+//! if let Some(path) = find_shortest_path(&store, &ctx, from, to, 10).await? {
+//!     println!("Path length: {} hops", path.len() - 1);
+//! }
+//! ```
+//!
+//! See [ADR-004](../docs/ADR-004-graph-traversal.md) for algorithm specification.
+
+mod bfs;
+mod compat;
+mod dfs;
+/// Helper functions for graph traversal (proximity scoring, neighbor extraction, etc.).
+pub mod helpers;
+mod shortest;
+mod types;
+
+#[cfg(test)]
+mod tests;
+
+// Re-export compat types (legacy graph API shims)
+pub use compat::{EntityRef, Link, LinkId, LinkStore, MockLinkStore, StorageContext, test_context};
+
+// Re-export public types
+pub use types::{
+    Direction, PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS,
+};
+
+// Re-export direction variants for convenience
+pub use types::Direction::{Both, In, Out};
+
+// Re-export traversal algorithms
+pub use bfs::bfs_traverse;
+pub use dfs::dfs_traverse;
+pub use shortest::find_shortest_path;
diff --git a/crates/khive-retrieval/src/graph/shortest.rs b/crates/khive-retrieval/src/graph/shortest.rs
new file mode 100644
index 00000000..80ff756d
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/shortest.rs
@@ -0,0 +1,266 @@
+//! Shortest path algorithm using bidirectional BFS.
+
+use std::collections::{HashMap, VecDeque};
+
+use super::compat::{EntityRef, Link, LinkStore, StorageContext};
+
+use crate::error::{Result, RetrievalError};
+
+use super::types::{PathNode, MAX_TRAVERSAL_DEPTH};
+
+/// Find the shortest path between two entities using bidirectional BFS.
+///
+/// Bidirectional BFS searches from both start and end simultaneously,
+/// meeting in the middle. This reduces search space from O(b^d) to O(b^(d/2))
+/// where b = branching factor and d = path depth.
+///
+/// # Arguments
+///
+/// * `store` - The link store to query
+/// * `ctx` - Storage context for namespace isolation
+/// * `from` - Starting entity reference
+/// * `to` - Target entity reference
+/// * `max_depth` - Maximum path length (clamped to [`MAX_TRAVERSAL_DEPTH`])
+///
+/// # Returns
+///
+/// - `Some(Vec<PathNode>)` - Path from source to target (inclusive)
+///   - `path[0]` is the start node (via_link = None)
+///   - `path[i].via_link` is the edge from `path[i-1]` to `path[i]`
+/// - `None` - No path exists within max_depth
+///
+/// # Complexity
+///
+/// - Time: O(b^(d/2)) vs O(b^d) for standard BFS
+/// - Space: O(b^(d/2)) for both frontiers
+///
+/// # Example
+///
+/// ```ignore
+/// let path = find_shortest_path(&store, &ctx, alice_ref, bob_ref, 5).await?;
+/// if let Some(path) = path {
+///     println!("Found path of {} hops", path.len() - 1);
+///     for node in &path {
+///         if let Some(link) = &node.via_link {
+///             println!("  via {} to {:?}", link.relation, node.entity_id);
+///         }
+///     }
+/// }
+/// ```
+pub async fn find_shortest_path<S: LinkStore>(
+    store: &S,
+    ctx: &StorageContext,
+    from: EntityRef,
+    to: EntityRef,
+    max_depth: usize,
+) -> Result<Option<Vec<PathNode>>> {
+    // Clamp max_depth to prevent excessive search
+    let max_depth = max_depth.min(MAX_TRAVERSAL_DEPTH);
+
+    // Same node = trivial path (EntityRef implements Eq)
+    if from == to {
+        return Ok(Some(vec![PathNode::start(from)]));
+    }
+
+    // Forward search state: entity -> (depth, parent_entity, link to this node)
+    // EntityRef implements Hash + Eq, enabling direct use as HashMap key.
+    let mut forward_visited: HashMap<EntityRef, (usize, Option<EntityRef>, Option<Link>)> =
+        HashMap::new();
+    let mut forward_queue: VecDeque<EntityRef> = VecDeque::new();
+    forward_visited.insert(from.clone(), (0, None, None));
+    forward_queue.push_back(from.clone());
+
+    // Backward search state: entity -> (depth, child_entity, link from this node)
+    let mut backward_visited: HashMap<EntityRef, (usize, Option<EntityRef>, Option<Link>)> =
+        HashMap::new();
+    let mut backward_queue: VecDeque<EntityRef> = VecDeque::new();
+    backward_visited.insert(to.clone(), (0, None, None));
+    backward_queue.push_back(to.clone());
+
+    let mut best_meeting: Option<(EntityRef, usize)> = None; // (node, total_dist)
+    let mut current_depth = 0;
+
+    // Alternate between forward and backward expansion.
+    // Process entire BFS levels before checking for a meeting point so we
+    // find the meeting node with the smallest total distance, not just the
+    // first one encountered (which depends on HashMap iteration order).
+    while !forward_queue.is_empty() || !backward_queue.is_empty() {
+        if current_depth > max_depth {
+            break;
+        }
+
+        // Expand forward frontier (following outgoing edges)
+        let forward_level_size = forward_queue.len();
+        for _ in 0..forward_level_size {
+            if let Some(current) = forward_queue.pop_front() {
+                let outgoing = store.outgoing(ctx, &current).await.map_err(|e| {
+                    RetrievalError::GraphTraversal(format!("link store error: {e}"))
+                })?;
+
+                for link in outgoing {
+                    let neighbor = link.target.clone();
+
+                    if !forward_visited.contains_key(&neighbor) {
+                        let fwd_dist = current_depth + 1;
+                        // Store: link goes from current to neighbor
+                        forward_visited.insert(
+                            neighbor.clone(),
+                            (fwd_dist, Some(current.clone()), Some(link)),
+                        );
+                        forward_queue.push_back(neighbor.clone());
+
+                        // Check if we've met the backward search
+                        if let Some((bwd_dist, _, _)) = backward_visited.get(&neighbor) {
+                            let total = fwd_dist + bwd_dist;
+                            if best_meeting.as_ref().is_none_or(|&(_, best)| total < best) {
+                                best_meeting = Some((neighbor, total));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // If we found a meeting point during forward expansion, the best
+        // meeting at this depth is optimal -- no need to expand backward.
+        if best_meeting.is_some() {
+            break;
+        }
+
+        // Expand backward frontier (following incoming edges)
+        let backward_level_size = backward_queue.len();
+        for _ in 0..backward_level_size {
+            if let Some(current) = backward_queue.pop_front() {
+                let incoming = store.incoming(ctx, &current).await.map_err(|e| {
+                    RetrievalError::GraphTraversal(format!("link store error: {e}"))
+                })?;
+
+                for link in incoming {
+                    // For incoming: link goes from neighbor to current
+                    let neighbor = link.source.clone();
+
+                    if !backward_visited.contains_key(&neighbor) {
+                        let bwd_dist = current_depth + 1;
+                        // Store: link goes from neighbor to current (for path reconstruction)
+                        backward_visited.insert(
+                            neighbor.clone(),
+                            (bwd_dist, Some(current.clone()), Some(link)),
+                        );
+                        backward_queue.push_back(neighbor.clone());
+
+                        // Check if we've met the forward search
+                        if let Some((fwd_dist, _, _)) = forward_visited.get(&neighbor) {
+                            let total = fwd_dist + bwd_dist;
+                            if best_meeting.as_ref().is_none_or(|&(_, best)| total < best) {
+                                best_meeting = Some((neighbor, total));
+                            }
+                        }
+                    }
+                }
+            }
+        }
+
+        // After processing both frontiers at this depth, check for meeting
+        if best_meeting.is_some() {
+            break;
+        }
+
+        current_depth += 1;
+    }
+
+    // Reconstruct path if found
+    match best_meeting {
+        Some((mid, _total_dist)) => {
+            let path = reconstruct_path(&forward_visited, &backward_visited, &mid);
+            Ok(Some(path))
+        }
+        None => Ok(None),
+    }
+}
+
+/// Reconstruct the path from forward and backward visited maps.
+fn reconstruct_path(
+    forward_visited: &HashMap<EntityRef, (usize, Option<EntityRef>, Option<Link>)>,
+    backward_visited: &HashMap<EntityRef, (usize, Option<EntityRef>, Option<Link>)>,
+    meeting_point: &EntityRef,
+) -> Vec<PathNode> {
+    // Build forward part: start -> meeting_point
+    let mut forward_entities: Vec<EntityRef> = Vec::new();
+    let mut forward_links: Vec<Option<Link>> = Vec::new();
+    let mut current = meeting_point.clone();
+
+    // Walk backwards from meeting point to start
+    while let Some((_, parent, link)) = forward_visited.get(&current) {
+        forward_entities.push(current.clone());
+        forward_links.push(link.clone());
+        match parent {
+            Some(p) => current = p.clone(),
+            None => break,
+        }
+    }
+
+    // Reverse to get start -> meeting_point order
+    forward_entities.reverse();
+    forward_links.reverse();
+
+    // Build backward part: meeting_point -> end
+    let mut backward_entities: Vec<EntityRef> = Vec::new();
+    let mut backward_links: Vec<Option<Link>> = Vec::new();
+
+    // Start from meeting point, walk towards 'to'
+    if let Some((_, Some(child), link)) = backward_visited.get(meeting_point) {
+        backward_links.push(link.clone());
+        current = child.clone();
+
+        while let Some((_, next_child, link)) = backward_visited.get(&current) {
+            backward_entities.push(current.clone());
+            match next_child {
+                Some(nc) => {
+                    backward_links.push(link.clone());
+                    current = nc.clone();
+                }
+                None => break,
+            }
+        }
+        // Defensive: if the while loop exited because backward_visited
+        // lacked an entry for `current` (shouldn't happen in a consistent
+        // graph, but guards against any map skew), include `current` so
+        // the target node is never silently dropped.
+        if backward_entities.last().map_or(true, |e| e != &current) {
+            backward_entities.push(current.clone());
+        }
+    }
+
+    // Combine into final path
+    let mut path: Vec<PathNode> = Vec::new();
+
+    // Add forward nodes
+    for (i, entity) in forward_entities.iter().enumerate() {
+        let link = if i == 0 {
+            None // Start node has no inbound edge
+        } else {
+            forward_links.get(i).cloned().flatten()
+        };
+
+        path.push(PathNode {
+            entity_id: entity.clone(),
+            depth: i,
+            via_link: link,
+            path_weight: i as f64,
+        });
+    }
+
+    // Add backward nodes (these come after meeting point)
+    let base_depth = path.len();
+    for (i, entity) in backward_entities.iter().enumerate() {
+        let link = backward_links.get(i).cloned().flatten();
+        path.push(PathNode {
+            entity_id: entity.clone(),
+            depth: base_depth + i,
+            via_link: link,
+            path_weight: (base_depth + i) as f64,
+        });
+    }
+
+    path
+}
diff --git a/crates/khive-retrieval/src/graph/tests.rs b/crates/khive-retrieval/src/graph/tests.rs
new file mode 100644
index 00000000..c9439355
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/tests.rs
@@ -0,0 +1,134 @@
+//! Unit tests for graph traversal module.
+
+use super::compat::{EntityRef, MockLinkStore, test_context};
+
+use crate::graph::types::{
+    Direction, PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS,
+};
+
+#[test]
+fn test_traversal_options_default() {
+    let opts = TraversalOptions::default();
+    assert_eq!(opts.max_depth, 3);
+    assert_eq!(opts.direction, Direction::Out);
+    assert!(opts.link_types.is_none());
+    assert_eq!(opts.limit, Some(MAX_TRAVERSAL_RESULTS));
+}
+
+#[test]
+fn test_traversal_options_builder() {
+    let opts = TraversalOptions::new(5)
+        .with_direction(Direction::Both)
+        .with_link_types(["contains", "references"])
+        .with_limit(100)
+        .with_min_weight(0.5);
+
+    assert_eq!(opts.max_depth, 5);
+    assert_eq!(opts.direction, Direction::Both);
+    assert_eq!(
+        opts.link_types,
+        Some(vec!["contains".to_string(), "references".to_string()])
+    );
+    assert_eq!(opts.limit, Some(100));
+    assert_eq!(opts.min_weight, Some(0.5));
+}
+
+#[test]
+fn test_traversal_options_clamping() {
+    // Depth clamping
+    let opts = TraversalOptions::new(100);
+    assert_eq!(opts.max_depth, MAX_TRAVERSAL_DEPTH);
+
+    // Limit clamping
+    let opts = TraversalOptions::new(3).with_limit(100_000);
+    assert_eq!(opts.limit, Some(MAX_TRAVERSAL_RESULTS));
+}
+
+#[test]
+fn test_path_node_start() {
+    let entity = EntityRef::External("test".to_string());
+    let node = PathNode::start(entity.clone());
+
+    assert_eq!(node.entity_id, entity);
+    assert_eq!(node.depth, 0);
+    assert!(node.via_link.is_none());
+    assert_eq!(node.path_weight, 0.0);
+}
+
+#[test]
+fn test_direction_default() {
+    let dir = Direction::default();
+    assert_eq!(dir, Direction::Out);
+}
+
+#[test]
+fn test_safety_constants() {
+    // Verify safety constants are reasonable
+    assert_eq!(MAX_TRAVERSAL_DEPTH, 20);
+    assert_eq!(MAX_TRAVERSAL_RESULTS, 10_000);
+}
+
+#[tokio::test]
+async fn shortest_path_includes_target_node() {
+    // Graph: A → B → C. Verify path is [A, B, C] — all three nodes including target C.
+    let store = MockLinkStore::new();
+    let ctx = test_context();
+
+    let a = EntityRef::External("A".to_string());
+    let b = EntityRef::External("B".to_string());
+    let c = EntityRef::External("C".to_string());
+
+    store
+        .link(
+            &ctx,
+            a.clone(),
+            b.clone(),
+            "edge",
+            None::<serde_json::Value>,
+        )
+        .await
+        .unwrap();
+    store
+        .link(&ctx, b.clone(), c.clone(), "edge", None)
+        .await
+        .unwrap();
+
+    let path = super::shortest::find_shortest_path(&store, &ctx, a.clone(), c.clone(), 5)
+        .await
+        .unwrap()
+        .expect("path exists");
+
+    assert_eq!(path.len(), 3, "path should contain 3 nodes: A, B, C");
+    assert_eq!(path[0].entity_id, a, "first node is start (A)");
+    assert_eq!(path[2].entity_id, c, "last node is target (C)");
+}
+
+#[tokio::test]
+async fn shortest_path_direct_edge_includes_target() {
+    // Graph: A → B (direct). Path should be [A, B], not just [A].
+    let store = MockLinkStore::new();
+    let ctx = test_context();
+
+    let a = EntityRef::External("X".to_string());
+    let b = EntityRef::External("Y".to_string());
+
+    store
+        .link(
+            &ctx,
+            a.clone(),
+            b.clone(),
+            "edge",
+            None::<serde_json::Value>,
+        )
+        .await
+        .unwrap();
+
+    let path = super::shortest::find_shortest_path(&store, &ctx, a.clone(), b.clone(), 5)
+        .await
+        .unwrap()
+        .expect("path exists");
+
+    assert_eq!(path.len(), 2, "path should contain 2 nodes: X, Y");
+    assert_eq!(path[0].entity_id, a);
+    assert_eq!(path[1].entity_id, b, "target node must be in path");
+}
diff --git a/crates/khive-retrieval/src/graph/types.rs b/crates/khive-retrieval/src/graph/types.rs
new file mode 100644
index 00000000..cf5b8159
--- /dev/null
+++ b/crates/khive-retrieval/src/graph/types.rs
@@ -0,0 +1,208 @@
+//! Graph traversal types.
+
+use super::compat::{EntityRef, Link};
+use serde::{Deserialize, Serialize};
+
+/// Maximum traversal depth to prevent stack overflow and runaway queries.
+pub const MAX_TRAVERSAL_DEPTH: usize = 20;
+
+/// Maximum results per traversal to prevent memory exhaustion.
+pub const MAX_TRAVERSAL_RESULTS: usize = 10_000;
+
+/// Direction of edge traversal.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum Direction {
+    /// Follow outgoing edges (source -> target).
+    #[default]
+    #[serde(alias = "Out")]
+    Out,
+    /// Follow incoming edges (target <- source).
+    #[serde(alias = "In")]
+    In,
+    /// Follow edges in both directions.
+    #[serde(alias = "Both")]
+    Both,
+}
+
+/// A node in a traversal path.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct PathNode {
+    /// The entity at this position in the path.
+    pub entity_id: EntityRef,
+    /// Depth from the start node (0 = start node).
+    pub depth: usize,
+    /// The link that led to this node (None for start node).
+    pub via_link: Option<Link>,
+    /// Cumulative path weight (sum of edge weights).
+    pub path_weight: f64,
+}
+
+impl PathNode {
+    /// Create a new path node for the start position.
+    pub fn start(entity_id: EntityRef) -> Self {
+        Self {
+            entity_id,
+            depth: 0,
+            via_link: None,
+            path_weight: 0.0,
+        }
+    }
+
+    /// Create a path node from an outgoing link.
+    pub fn from_outgoing_link(link: Link, depth: usize, path_weight: f64) -> Self {
+        Self {
+            entity_id: link.target.clone(),
+            depth,
+            via_link: Some(link),
+            path_weight,
+        }
+    }
+
+    /// Create a path node from an incoming link.
+    pub fn from_incoming_link(link: Link, depth: usize, path_weight: f64) -> Self {
+        Self {
+            entity_id: link.source.clone(),
+            depth,
+            via_link: Some(link),
+            path_weight,
+        }
+    }
+}
+
+/// Options for graph traversal operations.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct TraversalOptions {
+    /// Maximum depth to traverse (clamped to [`MAX_TRAVERSAL_DEPTH`]).
+    pub max_depth: usize,
+    /// Maximum number of nodes to return (clamped to [`MAX_TRAVERSAL_RESULTS`]).
+    pub limit: Option<usize>,
+    /// Direction to follow edges.
+    pub direction: Direction,
+    /// Filter by link relation types (None = all types).
+    pub link_types: Option<Vec<String>>,
+    /// Minimum edge weight to consider (for weighted traversal).
+    pub min_weight: Option<f64>,
+}
+
+impl Default for TraversalOptions {
+    fn default() -> Self {
+        Self {
+            max_depth: 3,
+            limit: Some(MAX_TRAVERSAL_RESULTS),
+            direction: Direction::Out,
+            link_types: None,
+            min_weight: None,
+        }
+    }
+}
+
+impl TraversalOptions {
+    /// Create new options with specified max depth.
+    pub fn new(max_depth: usize) -> Self {
+        Self {
+            max_depth: max_depth.min(MAX_TRAVERSAL_DEPTH),
+            limit: Some(MAX_TRAVERSAL_RESULTS),
+            ..Default::default()
+        }
+    }
+
+    /// Set the maximum traversal depth.
+    #[must_use]
+    pub fn with_max_depth(mut self, depth: usize) -> Self {
+        self.max_depth = depth.min(MAX_TRAVERSAL_DEPTH);
+        self
+    }
+
+    /// Set traversal direction.
+    #[must_use]
+    pub fn with_direction(mut self, direction: Direction) -> Self {
+        self.direction = direction;
+        self
+    }
+
+    /// Filter to specific link relation types.
+    #[must_use]
+    pub fn with_link_types(mut self, types: impl IntoIterator<Item = impl Into<String>>) -> Self {
+        self.link_types = Some(types.into_iter().map(Into::into).collect());
+        self
+    }
+
+    /// Set maximum number of results.
+    #[must_use]
+    pub fn with_limit(mut self, limit: usize) -> Self {
+        self.limit = Some(limit.min(MAX_TRAVERSAL_RESULTS));
+        self
+    }
+
+    /// Set minimum edge weight threshold.
+    #[must_use]
+    pub fn with_min_weight(mut self, weight: f64) -> Self {
+        self.min_weight = Some(weight);
+        self
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_traversal_options_default() {
+        let opts = TraversalOptions::default();
+        assert_eq!(opts.max_depth, 3);
+        assert_eq!(opts.direction, Direction::Out);
+        assert!(opts.link_types.is_none());
+        assert_eq!(opts.limit, Some(MAX_TRAVERSAL_RESULTS));
+    }
+
+    #[test]
+    fn test_traversal_options_builder() {
+        let opts = TraversalOptions::new(5)
+            .with_direction(Direction::Both)
+            .with_link_types(["contains", "references"])
+            .with_limit(100)
+            .with_min_weight(0.5);
+
+        assert_eq!(opts.max_depth, 5);
+        assert_eq!(opts.direction, Direction::Both);
+        assert_eq!(
+            opts.link_types,
+            Some(vec!["contains".to_string(), "references".to_string()])
+        );
+        assert_eq!(opts.limit, Some(100));
+        assert_eq!(opts.min_weight, Some(0.5));
+    }
+
+    #[test]
+    fn test_traversal_options_clamping() {
+        let opts = TraversalOptions::new(100);
+        assert_eq!(opts.max_depth, MAX_TRAVERSAL_DEPTH);
+
+        let opts = TraversalOptions::new(3).with_limit(100_000);
+        assert_eq!(opts.limit, Some(MAX_TRAVERSAL_RESULTS));
+    }
+
+    #[test]
+    fn test_path_node_start() {
+        let entity = EntityRef::External("test".to_string());
+        let node = PathNode::start(entity.clone());
+
+        assert_eq!(node.entity_id, entity);
+        assert_eq!(node.depth, 0);
+        assert!(node.via_link.is_none());
+        assert_eq!(node.path_weight, 0.0);
+    }
+
+    #[test]
+    fn test_direction_default() {
+        let dir = Direction::default();
+        assert_eq!(dir, Direction::Out);
+    }
+
+    #[test]
+    fn test_safety_constants() {
+        assert_eq!(MAX_TRAVERSAL_DEPTH, 20);
+        assert_eq!(MAX_TRAVERSAL_RESULTS, 10_000);
+    }
+}
diff --git a/crates/khive-retrieval/src/hybrid/config.rs b/crates/khive-retrieval/src/hybrid/config.rs
new file mode 100644
index 00000000..febac2bf
--- /dev/null
+++ b/crates/khive-retrieval/src/hybrid/config.rs
@@ -0,0 +1,260 @@
+//! Hybrid search configuration types.
+
+use std::time::Duration;
+
+use khive_score::DeterministicScore;
+use serde::{Deserialize, Serialize};
+
+use khive_fusion::FusionStrategy;
+
+/// Default candidate pool multiplier over top_k.
+pub const DEFAULT_POOL_MULTIPLIER: usize = 5;
+
+/// Query for hybrid search.
+///
+/// Combines text for keyword search and optional embedding for vector search.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct Query {
+    /// Text for keyword search (required).
+    pub text: String,
+
+    /// Pre-computed embedding for vector search (optional).
+    ///
+    /// If None, vector search is skipped or caller must provide.
+    pub embedding: Option<Vec<f32>>,
+
+    /// Optional filters to apply post-retrieval.
+    pub filters: Option<serde_json::Value>,
+}
+
+impl Query {
+    /// Create a new query with text only (keyword search).
+    pub fn text(text: impl Into<String>) -> Self {
+        Self {
+            text: text.into(),
+            embedding: None,
+            filters: None,
+        }
+    }
+
+    /// Create a query with both text and embedding (hybrid search).
+    pub fn hybrid(text: impl Into<String>, embedding: Vec<f32>) -> Self {
+        Self {
+            text: text.into(),
+            embedding: Some(embedding),
+            filters: None,
+        }
+    }
+
+    /// Add filters to the query.
+    #[must_use]
+    pub fn with_filters(mut self, filters: serde_json::Value) -> Self {
+        self.filters = Some(filters);
+        self
+    }
+
+    /// Check if this query supports vector search.
+    pub fn has_embedding(&self) -> bool {
+        self.embedding.is_some()
+    }
+}
+
+/// Configuration for hybrid search.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct HybridConfig {
+    /// Fusion strategy to use (default: RRF with k=60).
+    pub fusion_strategy: FusionStrategy,
+
+    /// Number of results to return.
+    pub top_k: usize,
+
+    /// Candidates to fetch from each retriever before fusion.
+    ///
+    /// Should be >= 5 * top_k for quality fusion.
+    pub candidate_pool_size: usize,
+
+    /// Minimum score threshold (post-fusion).
+    pub min_score: Option<DeterministicScore>,
+
+    /// Weight for vector search results (0.0 to 1.0).
+    ///
+    /// Only used when fusion_strategy is Weighted.
+    pub vector_weight: f64,
+
+    /// Weight for keyword search results (0.0 to 1.0).
+    ///
+    /// Only used when fusion_strategy is Weighted.
+    pub keyword_weight: f64,
+
+    /// Optional timeout for the entire search operation.
+    ///
+    /// If set, the search will be cancelled if it exceeds this duration,
+    /// returning [`RetrievalError::QueryTimeout`].
+    /// If None, no timeout is applied.
+    #[serde(
+        default,
+        skip_serializing_if = "Option::is_none",
+        with = "crate::timeout::serde_opt_duration"
+    )]
+    pub timeout: Option<Duration>,
+}
+
+impl Default for HybridConfig {
+    fn default() -> Self {
+        Self {
+            fusion_strategy: FusionStrategy::rrf(),
+            top_k: 10,
+            candidate_pool_size: 50, // 5 * top_k
+            min_score: None,
+            vector_weight: 0.7,
+            keyword_weight: 0.3,
+            timeout: None,
+        }
+    }
+}
+
+impl HybridConfig {
+    /// Create a new config with specified top_k.
+    pub fn new(top_k: usize) -> Self {
+        Self {
+            top_k,
+            candidate_pool_size: top_k * DEFAULT_POOL_MULTIPLIER,
+            ..Default::default()
+        }
+    }
+
+    /// Set the fusion strategy.
+    #[must_use]
+    pub fn with_fusion_strategy(mut self, strategy: FusionStrategy) -> Self {
+        self.fusion_strategy = strategy;
+        self
+    }
+
+    /// Set the candidate pool size.
+    #[must_use]
+    pub fn with_pool_size(mut self, size: usize) -> Self {
+        self.candidate_pool_size = size;
+        self
+    }
+
+    /// Set the minimum score threshold.
+    #[must_use]
+    pub fn with_min_score(mut self, score: DeterministicScore) -> Self {
+        self.min_score = Some(score);
+        self
+    }
+
+    /// Set weights for weighted fusion.
+    ///
+    /// Weights are clamped to [0.0, 1.0].
+    #[must_use]
+    pub fn with_weights(mut self, vector: f64, keyword: f64) -> Self {
+        self.vector_weight = vector.clamp(0.0, 1.0);
+        self.keyword_weight = keyword.clamp(0.0, 1.0);
+        self
+    }
+
+    /// Set the search timeout.
+    ///
+    /// If the search operation exceeds this duration, it will return
+    /// [`RetrievalError::QueryTimeout`].
+    #[must_use]
+    pub fn with_timeout(mut self, timeout: Duration) -> Self {
+        self.timeout = Some(timeout);
+        self
+    }
+
+    /// Get normalized weights that sum to 1.0.
+    ///
+    /// If both weights are zero, returns equal weights (0.5, 0.5).
+    pub fn normalized_weights(&self) -> (f64, f64) {
+        let sum = self.vector_weight + self.keyword_weight;
+        if sum <= 0.0 {
+            (0.5, 0.5)
+        } else {
+            (self.vector_weight / sum, self.keyword_weight / sum)
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_query_text_only() {
+        let q = Query::text("hello world");
+        assert_eq!(q.text, "hello world");
+        assert!(q.embedding.is_none());
+        assert!(!q.has_embedding());
+    }
+
+    #[test]
+    fn test_query_hybrid() {
+        let embedding = vec![0.1, 0.2, 0.3];
+        let q = Query::hybrid("hello", embedding.clone());
+        assert_eq!(q.text, "hello");
+        assert_eq!(q.embedding, Some(embedding));
+        assert!(q.has_embedding());
+    }
+
+    #[test]
+    fn test_query_with_filters() {
+        let q = Query::text("test").with_filters(serde_json::json!({"type": "memory"}));
+        assert!(q.filters.is_some());
+    }
+
+    #[test]
+    fn test_hybrid_config_default() {
+        let config = HybridConfig::default();
+        assert_eq!(config.top_k, 10);
+        assert_eq!(config.candidate_pool_size, 50);
+        assert!(matches!(
+            config.fusion_strategy,
+            FusionStrategy::Rrf { k: 60 }
+        ));
+        assert!(config.min_score.is_none());
+    }
+
+    #[test]
+    fn test_hybrid_config_new() {
+        let config = HybridConfig::new(20);
+        assert_eq!(config.top_k, 20);
+        assert_eq!(config.candidate_pool_size, 100); // 20 * 5
+    }
+
+    #[test]
+    fn test_hybrid_config_builder() {
+        let config = HybridConfig::new(10)
+            .with_fusion_strategy(FusionStrategy::union())
+            .with_pool_size(200)
+            .with_weights(0.6, 0.4);
+
+        assert_eq!(config.top_k, 10);
+        assert_eq!(config.candidate_pool_size, 200);
+        assert!(matches!(config.fusion_strategy, FusionStrategy::Union));
+        assert_eq!(config.vector_weight, 0.6);
+        assert_eq!(config.keyword_weight, 0.4);
+    }
+
+    #[test]
+    fn test_normalized_weights() {
+        let config = HybridConfig::default();
+        let (v, k) = config.normalized_weights();
+        assert!((v - 0.7).abs() < 0.01);
+        assert!((k - 0.3).abs() < 0.01);
+
+        // Zero weights -> equal
+        let config = HybridConfig::default().with_weights(0.0, 0.0);
+        let (v, k) = config.normalized_weights();
+        assert!((v - 0.5).abs() < 0.01);
+        assert!((k - 0.5).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_weight_clamping() {
+        let config = HybridConfig::default().with_weights(1.5, -0.5);
+        assert_eq!(config.vector_weight, 1.0);
+        assert_eq!(config.keyword_weight, 0.0);
+    }
+}
diff --git a/crates/khive-retrieval/src/hybrid/cross_encoder.rs b/crates/khive-retrieval/src/hybrid/cross_encoder.rs
new file mode 100644
index 00000000..ddc42343
--- /dev/null
+++ b/crates/khive-retrieval/src/hybrid/cross_encoder.rs
@@ -0,0 +1,291 @@
+//! Native cross-encoder reranking via `khive-inference`.
+//!
+//! Provides `NativeCrossEncoderReranker<Id, R, S>` which implements `Reranker<Id>`.
+//! Document texts are fetched by `RerankDocumentResolver<Id>` so the existing
+//! `Reranker` trait (which only carries IDs and scores) does not need to change.
+
+use std::marker::PhantomData;
+use std::sync::Arc;
+
+use async_trait::async_trait;
+use khive_score::DeterministicScore;
+
+use crate::error::{Result, RetrievalError};
+use crate::hybrid::searcher::Reranker;
+
+/// Resolve document texts for a set of candidate IDs.
+///
+/// Implementors fetch raw document text from whatever backing store is
+/// available. A missing document (e.g. deleted after indexing) should be
+/// returned as `None`.
+#[async_trait]
+pub trait RerankDocumentResolver<Id>: Send + Sync
+where
+    Id: Send + Sync + 'static,
+{
+    /// Fetch document bodies for `ids` in input order.
+    ///
+    /// The returned `Vec` must be the same length as `ids`. A missing document
+    /// is represented as `None`; the reranker will return an error in that case.
+    async fn resolve_documents(&self, ids: &[Id]) -> Result<Vec<Option<String>>>;
+}
+
+/// Synchronous cross-encoder scorer abstraction (for testability).
+pub trait CrossEncoderScorer: Send + Sync {
+    /// Score a query against a batch of documents; returns one value per document.
+    fn score_batch(&self, query: &str, documents: &[&str]) -> Vec<f32>;
+}
+
+// TODO(port-rerank): khive-inference not ported yet; CrossEncoderModel impl disabled.
+// impl CrossEncoderScorer for khive_inference::CrossEncoderModel { ... }
+
+/// Reranker that scores candidates with a native cross-encoder model.
+///
+/// The generic parameter `S` is the scorer implementation (defaults to no external dep
+/// in this OSS build; use a concrete scorer by passing one explicitly).
+/// Tests substitute a lightweight fake scorer.
+pub struct NativeCrossEncoderReranker<Id, R, S>
+where
+    Id: Clone + Send + Sync + 'static,
+    R: RerankDocumentResolver<Id>,
+    S: CrossEncoderScorer,
+{
+    model: Arc<S>,
+    resolver: Arc<R>,
+    _id: PhantomData<fn() -> Id>,
+}
+
+impl<Id, R, S> NativeCrossEncoderReranker<Id, R, S>
+where
+    Id: Clone + Send + Sync + 'static,
+    R: RerankDocumentResolver<Id>,
+    S: CrossEncoderScorer,
+{
+    /// Construct from an existing scorer and resolver.
+    pub fn new(model: Arc<S>, resolver: Arc<R>) -> Self {
+        Self {
+            model,
+            resolver,
+            _id: PhantomData,
+        }
+    }
+}
+
+// TODO(port-rerank): from_directory constructor requires khive-inference::CrossEncoderModel.
+// Re-enable once khive-inference is ported.
+// impl<Id, R> NativeCrossEncoderReranker<Id, R, khive_inference::CrossEncoderModel> { ... }
+
+#[async_trait]
+impl<Id, R, S> Reranker<Id> for NativeCrossEncoderReranker<Id, R, S>
+where
+    Id: Clone + Send + Sync + 'static,
+    R: RerankDocumentResolver<Id>,
+    S: CrossEncoderScorer,
+{
+    async fn rerank(
+        &self,
+        query: &str,
+        results: Vec<(Id, DeterministicScore)>,
+        top_k: usize,
+    ) -> Result<Vec<(Id, DeterministicScore)>> {
+        if top_k == 0 || results.is_empty() {
+            return Ok(Vec::new());
+        }
+
+        let ids: Vec<Id> = results.iter().map(|(id, _)| id.clone()).collect();
+        let resolved = self.resolver.resolve_documents(&ids).await?;
+        if resolved.len() != results.len() {
+            return Err(RetrievalError::rerank(format!(
+                "resolver returned {} documents for {} candidates",
+                resolved.len(),
+                results.len()
+            )));
+        }
+
+        let mut documents: Vec<String> = Vec::with_capacity(resolved.len());
+        for (idx, opt) in resolved.into_iter().enumerate() {
+            let text = opt.ok_or_else(|| {
+                RetrievalError::rerank(format!(
+                    "missing document text for rerank candidate at index {idx}"
+                ))
+            })?;
+            documents.push(text);
+        }
+
+        let document_refs: Vec<&str> = documents.iter().map(String::as_str).collect();
+        let scores = self.model.score_batch(query, &document_refs);
+        if scores.len() != results.len() {
+            return Err(RetrievalError::rerank(format!(
+                "model returned {} scores for {} candidates",
+                scores.len(),
+                results.len()
+            )));
+        }
+
+        let mut scored: Vec<(usize, Id, f32)> = results
+            .into_iter()
+            .zip(scores)
+            .enumerate()
+            .map(|(idx, ((id, _), score))| (idx, id, score))
+            .collect();
+
+        scored.sort_by(|a, b| b.2.total_cmp(&a.2).then_with(|| a.0.cmp(&b.0)));
+
+        Ok(scored
+            .into_iter()
+            .take(top_k)
+            .map(|(_, id, score)| (id, DeterministicScore::from_f64(score as f64)))
+            .collect())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    struct FakeScorer {
+        scores: Vec<f32>,
+    }
+
+    impl CrossEncoderScorer for FakeScorer {
+        fn score_batch(&self, _query: &str, _documents: &[&str]) -> Vec<f32> {
+            self.scores.clone()
+        }
+    }
+
+    struct FakeResolver {
+        documents: Vec<Option<String>>,
+    }
+
+    #[async_trait]
+    impl RerankDocumentResolver<u32> for FakeResolver {
+        async fn resolve_documents(&self, _ids: &[u32]) -> Result<Vec<Option<String>>> {
+            Ok(self.documents.clone())
+        }
+    }
+
+    fn make_reranker(
+        scores: Vec<f32>,
+        documents: Vec<Option<String>>,
+    ) -> NativeCrossEncoderReranker<u32, FakeResolver, FakeScorer> {
+        NativeCrossEncoderReranker::new(
+            Arc::new(FakeScorer { scores }),
+            Arc::new(FakeResolver { documents }),
+        )
+    }
+
+    #[tokio::test]
+    async fn test_top_k_zero_returns_empty() {
+        let reranker = make_reranker(vec![0.9, 0.1], vec![Some("a".into()), Some("b".into())]);
+        let results = vec![(1u32, DeterministicScore::from_f64(0.5))];
+        let out = reranker.rerank("q", results, 0).await.unwrap();
+        assert!(out.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_empty_input_returns_empty() {
+        let reranker = make_reranker(vec![], vec![]);
+        let out = reranker.rerank("q", vec![], 5).await.unwrap();
+        assert!(out.is_empty());
+    }
+
+    #[tokio::test]
+    async fn test_descending_sort() {
+        let reranker = make_reranker(
+            vec![0.1, 0.9, 0.5],
+            vec![Some("a".into()), Some("b".into()), Some("c".into())],
+        );
+        let results = vec![
+            (1u32, DeterministicScore::from_f64(0.3)),
+            (2u32, DeterministicScore::from_f64(0.3)),
+            (3u32, DeterministicScore::from_f64(0.3)),
+        ];
+        let out = reranker.rerank("q", results, 3).await.unwrap();
+        assert_eq!(out[0].0, 2u32); // score 0.9
+        assert_eq!(out[1].0, 3u32); // score 0.5
+        assert_eq!(out[2].0, 1u32); // score 0.1
+    }
+
+    #[tokio::test]
+    async fn test_tie_preserves_original_order() {
+        let reranker = make_reranker(
+            vec![0.5, 0.5, 0.5],
+            vec![Some("a".into()), Some("b".into()), Some("c".into())],
+        );
+        let results = vec![
+            (10u32, DeterministicScore::from_f64(0.3)),
+            (20u32, DeterministicScore::from_f64(0.3)),
+            (30u32, DeterministicScore::from_f64(0.3)),
+        ];
+        let out = reranker.rerank("q", results, 3).await.unwrap();
+        assert_eq!(out[0].0, 10u32);
+        assert_eq!(out[1].0, 20u32);
+        assert_eq!(out[2].0, 30u32);
+    }
+
+    #[tokio::test]
+    async fn test_missing_document_returns_error() {
+        let reranker = make_reranker(vec![0.5], vec![None]);
+        let results = vec![(1u32, DeterministicScore::from_f64(0.5))];
+        let err = reranker.rerank("q", results, 1).await.unwrap_err();
+        assert!(matches!(err, RetrievalError::Rerank(_)));
+    }
+
+    #[tokio::test]
+    async fn test_resolver_length_mismatch_returns_error() {
+        struct BadResolver;
+
+        #[async_trait]
+        impl RerankDocumentResolver<u32> for BadResolver {
+            async fn resolve_documents(&self, _ids: &[u32]) -> Result<Vec<Option<String>>> {
+                Ok(vec![]) // wrong length
+            }
+        }
+
+        let reranker = NativeCrossEncoderReranker::new(
+            Arc::new(FakeScorer { scores: vec![0.5] }),
+            Arc::new(BadResolver),
+        );
+        let results = vec![(1u32, DeterministicScore::from_f64(0.5))];
+        let err = reranker.rerank("q", results, 1).await.unwrap_err();
+        assert!(matches!(err, RetrievalError::Rerank(_)));
+    }
+
+    #[tokio::test]
+    async fn test_top_k_limits_output() {
+        let reranker = make_reranker(
+            vec![0.9, 0.8, 0.7],
+            vec![Some("a".into()), Some("b".into()), Some("c".into())],
+        );
+        let results = vec![
+            (1u32, DeterministicScore::from_f64(0.3)),
+            (2u32, DeterministicScore::from_f64(0.3)),
+            (3u32, DeterministicScore::from_f64(0.3)),
+        ];
+        let out = reranker.rerank("q", results, 2).await.unwrap();
+        assert_eq!(out.len(), 2);
+    }
+
+    #[tokio::test]
+    async fn test_top_k_larger_than_results_returns_all() {
+        // top_k=10 with only 2 candidates — should return all 2, sorted by score
+        let reranker = make_reranker(vec![0.1, 0.9], vec![Some("a".into()), Some("b".into())]);
+        let results = vec![
+            (1u32, DeterministicScore::from_f64(0.5)),
+            (2u32, DeterministicScore::from_f64(0.3)),
+        ];
+        let out = reranker.rerank("q", results, 10).await.unwrap();
+        assert_eq!(out.len(), 2);
+        assert_eq!(out[0].0, 2u32); // score 0.9
+        assert_eq!(out[1].0, 1u32); // score 0.1
+    }
+
+    #[tokio::test]
+    async fn test_single_result_passes_through() {
+        let reranker = make_reranker(vec![0.75], vec![Some("only doc".into())]);
+        let results = vec![(42u32, DeterministicScore::from_f64(0.5))];
+        let out = reranker.rerank("q", results, 1).await.unwrap();
+        assert_eq!(out.len(), 1);
+        assert_eq!(out[0].0, 42u32);
+    }
+}
diff --git a/crates/khive-retrieval/src/hybrid/dual_index.rs b/crates/khive-retrieval/src/hybrid/dual_index.rs
new file mode 100644
index 00000000..19350892
--- /dev/null
+++ b/crates/khive-retrieval/src/hybrid/dual_index.rs
@@ -0,0 +1,524 @@
+//! Dual-index query routing for embedding model migration.
+//!
+//! During an embedding model migration, both old and new embedding indexes coexist.
+//! The retrieval system must query both indexes and merge results to maintain search
+//! quality throughout the transition. This module provides a routing layer that
+//! decides which indexes to query and fuses the results using the fusion system.
+//!
+//! # Migration Lifecycle
+//!
+//! ```text
+//! Phase 1 (start):  Both → query old + new, fuse with RRF
+//! Phase 2 (mid):    Weighted → prefer new, still query old
+//! Phase 3 (end):    PrimaryOnly → new index fully populated
+//! ```
+//!
+//! # Example
+//!
+//! ```rust
+//! use khive_retrieval::hybrid::dual_index::{DualIndexConfig, DualIndexRouter, DualIndexStrategy};
+//! use khive_score::DeterministicScore;
+//!
+//! // During migration: query both indexes, fuse with RRF
+//! let config = DualIndexConfig::default();
+//! let router = DualIndexRouter::<String>::new(config);
+//!
+//! assert!(router.should_query_primary(None));
+//! assert!(router.should_query_legacy(None));
+//!
+//! // Merge results from both indexes
+//! let primary = vec![
+//!     ("doc_a".to_string(), DeterministicScore::from_f64(0.9)),
+//!     ("doc_b".to_string(), DeterministicScore::from_f64(0.8)),
+//! ];
+//! let legacy = vec![
+//!     ("doc_b".to_string(), DeterministicScore::from_f64(0.95)),
+//!     ("doc_c".to_string(), DeterministicScore::from_f64(0.7)),
+//! ];
+//!
+//! let merged = router.merge_results(primary, legacy, 10);
+//! // doc_b appears in both, gets highest RRF score
+//! assert_eq!(merged[0].0, "doc_b");
+//! ```
+
+use std::hash::Hash;
+
+use khive_score::DeterministicScore;
+
+use khive_fusion::{fuse, FusionStrategy};
+
+/// Strategy for routing queries during dual-index operation.
+///
+/// Controls which indexes are queried and how results are combined.
+#[derive(Debug, Clone, PartialEq)]
+pub enum DualIndexStrategy {
+    /// Query both indexes, fuse results (default during migration).
+    ///
+    /// Uses the specified fusion strategy to combine results from both
+    /// the primary (new) and legacy (old) indexes.
+    Both {
+        /// Fusion strategy for combining results from both indexes.
+        fusion: FusionStrategy,
+    },
+
+    /// Query only the primary (new) index.
+    ///
+    /// Use after migration is complete and all documents have been
+    /// re-embedded with the new model.
+    PrimaryOnly,
+
+    /// Query only the legacy (old) index.
+    ///
+    /// Use as a fallback if the new index has issues.
+    LegacyOnly,
+
+    /// Weighted preference: primary gets `primary_weight`, legacy gets `1 - primary_weight`.
+    ///
+    /// Useful during mid-migration when the new index covers most documents
+    /// but the old index still has better coverage for some.
+    Weighted {
+        /// Weight for primary index results, in range [0.0, 1.0].
+        /// Legacy index weight is computed as `1.0 - primary_weight`.
+        primary_weight: f64,
+    },
+}
+
+impl Default for DualIndexStrategy {
+    fn default() -> Self {
+        DualIndexStrategy::Both {
+            fusion: FusionStrategy::rrf(),
+        }
+    }
+}
+
+/// Configuration for dual-index query routing.
+#[derive(Debug, Clone)]
+pub struct DualIndexConfig {
+    /// Routing strategy.
+    pub strategy: DualIndexStrategy,
+
+    /// Candidate pool multiplier for each index.
+    ///
+    /// Each index fetches `top_k * pool_multiplier` candidates before fusion.
+    /// Default: 3.
+    pub pool_multiplier: usize,
+
+    /// Minimum migration progress to auto-switch to `PrimaryOnly`, in range [0.0, 1.0].
+    ///
+    /// When `migration_progress >= auto_switch_threshold`, the router automatically
+    /// skips the legacy index. Set to `None` to disable auto-switching.
+    pub auto_switch_threshold: Option<f64>,
+}
+
+impl Default for DualIndexConfig {
+    fn default() -> Self {
+        Self {
+            strategy: DualIndexStrategy::default(),
+            pool_multiplier: 3,
+            auto_switch_threshold: None,
+        }
+    }
+}
+
+impl DualIndexConfig {
+    /// Create a config with a specific strategy.
+    pub fn with_strategy(mut self, strategy: DualIndexStrategy) -> Self {
+        self.strategy = strategy;
+        self
+    }
+
+    /// Set the candidate pool multiplier.
+    pub fn with_pool_multiplier(mut self, multiplier: usize) -> Self {
+        self.pool_multiplier = multiplier.max(1);
+        self
+    }
+
+    /// Set the auto-switch threshold for migration progress.
+    pub fn with_auto_switch_threshold(mut self, threshold: f64) -> Self {
+        self.auto_switch_threshold = Some(threshold.clamp(0.0, 1.0));
+        self
+    }
+}
+
+/// Routes queries between primary (new) and legacy (old) vector indexes.
+///
+/// During embedding model migration, this router ensures search quality
+/// by querying both indexes and fusing results. It is generic over the
+/// document ID type, matching the [`VectorSearch`](crate::hybrid::VectorSearch) trait.
+///
+/// # Type Parameters
+///
+/// * `Id` - The identifier type for search results. Must implement `Eq + Hash + Clone + Ord`
+///   for deterministic fusion (same bounds as [`fuse`]).
+pub struct DualIndexRouter<Id> {
+    config: DualIndexConfig,
+    _marker: std::marker::PhantomData<Id>,
+}
+
+impl<Id> DualIndexRouter<Id>
+where
+    Id: Eq + Hash + Clone + Ord,
+{
+    /// Create a new dual-index router with the given configuration.
+    pub fn new(config: DualIndexConfig) -> Self {
+        Self {
+            config,
+            _marker: std::marker::PhantomData,
+        }
+    }
+
+    /// Determine whether the primary (new) index should be queried.
+    ///
+    /// Returns `false` only for [`DualIndexStrategy::LegacyOnly`].
+    pub fn should_query_primary(&self, _migration_progress: Option<f64>) -> bool {
+        !matches!(self.config.strategy, DualIndexStrategy::LegacyOnly)
+    }
+
+    /// Determine whether the legacy (old) index should be queried.
+    ///
+    /// Returns `false` for [`DualIndexStrategy::PrimaryOnly`], and also returns
+    /// `false` when migration progress exceeds the auto-switch threshold.
+    pub fn should_query_legacy(&self, migration_progress: Option<f64>) -> bool {
+        match &self.config.strategy {
+            DualIndexStrategy::PrimaryOnly => false,
+            DualIndexStrategy::LegacyOnly => true,
+            DualIndexStrategy::Both { .. } | DualIndexStrategy::Weighted { .. } => {
+                // Auto-switch: if migration is nearly complete, skip legacy
+                if let (Some(threshold), Some(progress)) =
+                    (self.config.auto_switch_threshold, migration_progress)
+                {
+                    progress < threshold
+                } else {
+                    true
+                }
+            }
+        }
+    }
+
+    /// Get the candidate pool size for each index.
+    ///
+    /// Returns `top_k * pool_multiplier`.
+    pub fn pool_size(&self, top_k: usize) -> usize {
+        top_k * self.config.pool_multiplier
+    }
+
+    /// Merge results from primary and legacy indexes.
+    ///
+    /// Applies the configured strategy to combine results:
+    /// - `PrimaryOnly`: returns primary results (truncated)
+    /// - `LegacyOnly`: returns legacy results (truncated)
+    /// - `Both`: fuses both result sets using the configured fusion strategy
+    /// - `Weighted`: fuses with per-index weights
+    ///
+    /// # Arguments
+    ///
+    /// * `primary_results` - Results from the new embedding index
+    /// * `legacy_results` - Results from the old embedding index
+    /// * `top_k` - Number of results to return after merging
+    pub fn merge_results(
+        &self,
+        primary_results: Vec<(Id, DeterministicScore)>,
+        legacy_results: Vec<(Id, DeterministicScore)>,
+        top_k: usize,
+    ) -> Vec<(Id, DeterministicScore)> {
+        match &self.config.strategy {
+            DualIndexStrategy::PrimaryOnly => {
+                let mut results = primary_results;
+                results.truncate(top_k);
+                results
+            }
+            DualIndexStrategy::LegacyOnly => {
+                let mut results = legacy_results;
+                results.truncate(top_k);
+                results
+            }
+            DualIndexStrategy::Both { fusion } => {
+                let sources = vec![primary_results, legacy_results];
+                fuse(sources, fusion, top_k)
+            }
+            DualIndexStrategy::Weighted { primary_weight } => {
+                let w = primary_weight.clamp(0.0, 1.0);
+                let strategy = FusionStrategy::weighted(vec![w, 1.0 - w]);
+                let sources = vec![primary_results, legacy_results];
+                fuse(sources, &strategy, top_k)
+            }
+        }
+    }
+
+    /// Get a reference to the current routing strategy.
+    pub fn strategy(&self) -> &DualIndexStrategy {
+        &self.config.strategy
+    }
+
+    /// Update the routing strategy (e.g., when migration completes).
+    pub fn set_strategy(&mut self, strategy: DualIndexStrategy) {
+        self.config.strategy = strategy;
+    }
+
+    /// Get a reference to the full configuration.
+    pub fn config(&self) -> &DualIndexConfig {
+        &self.config
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    /// Helper to build scored result lists from (id, f64) pairs.
+    fn make_results(items: Vec<(&str, f64)>) -> Vec<(String, DeterministicScore)> {
+        items
+            .into_iter()
+            .map(|(id, score)| (id.to_string(), DeterministicScore::from_f64(score)))
+            .collect()
+    }
+
+    // -- Strategy routing tests --
+
+    #[test]
+    fn test_primary_only_queries_only_primary() {
+        let config = DualIndexConfig::default().with_strategy(DualIndexStrategy::PrimaryOnly);
+        let router = DualIndexRouter::<String>::new(config);
+
+        assert!(router.should_query_primary(None));
+        assert!(!router.should_query_legacy(None));
+    }
+
+    #[test]
+    fn test_legacy_only_queries_only_legacy() {
+        let config = DualIndexConfig::default().with_strategy(DualIndexStrategy::LegacyOnly);
+        let router = DualIndexRouter::<String>::new(config);
+
+        assert!(!router.should_query_primary(None));
+        assert!(router.should_query_legacy(None));
+    }
+
+    #[test]
+    fn test_both_queries_both_indexes() {
+        let config = DualIndexConfig::default(); // default is Both { rrf }
+        let router = DualIndexRouter::<String>::new(config);
+
+        assert!(router.should_query_primary(None));
+        assert!(router.should_query_legacy(None));
+    }
+
+    #[test]
+    fn test_weighted_queries_both_indexes() {
+        let config = DualIndexConfig::default().with_strategy(DualIndexStrategy::Weighted {
+            primary_weight: 0.8,
+        });
+        let router = DualIndexRouter::<String>::new(config);
+
+        assert!(router.should_query_primary(None));
+        assert!(router.should_query_legacy(None));
+    }
+
+    // -- Auto-switch threshold tests --
+
+    #[test]
+    fn test_auto_switch_skips_legacy_when_threshold_exceeded() {
+        let config = DualIndexConfig::default().with_auto_switch_threshold(0.95);
+        let router = DualIndexRouter::<String>::new(config);
+
+        // Migration at 90% - below threshold, still query legacy
+        assert!(router.should_query_legacy(Some(0.90)));
+
+        // Migration at 95% - at threshold, skip legacy (progress >= threshold)
+        assert!(!router.should_query_legacy(Some(0.95)));
+
+        // Migration at 99% - above threshold, skip legacy
+        assert!(!router.should_query_legacy(Some(0.99)));
+    }
+
+    #[test]
+    fn test_auto_switch_no_threshold_always_queries_legacy() {
+        let config = DualIndexConfig::default(); // no auto_switch_threshold
+        let router = DualIndexRouter::<String>::new(config);
+
+        // Even with 100% progress, queries legacy without threshold
+        assert!(router.should_query_legacy(Some(1.0)));
+    }
+
+    #[test]
+    fn test_auto_switch_no_progress_queries_legacy() {
+        let config = DualIndexConfig::default().with_auto_switch_threshold(0.95);
+        let router = DualIndexRouter::<String>::new(config);
+
+        // No progress info provided - query legacy to be safe
+        assert!(router.should_query_legacy(None));
+    }
+
+    // -- Pool size tests --
+
+    #[test]
+    fn test_pool_size_calculation() {
+        let config = DualIndexConfig::default(); // pool_multiplier = 3
+        let router = DualIndexRouter::<String>::new(config);
+
+        assert_eq!(router.pool_size(10), 30);
+        assert_eq!(router.pool_size(1), 3);
+        assert_eq!(router.pool_size(0), 0);
+    }
+
+    #[test]
+    fn test_pool_size_custom_multiplier() {
+        let config = DualIndexConfig::default().with_pool_multiplier(5);
+        let router = DualIndexRouter::<String>::new(config);
+
+        assert_eq!(router.pool_size(10), 50);
+    }
+
+    // -- Merge results tests --
+
+    #[test]
+    fn test_merge_primary_only_returns_primary() {
+        let config = DualIndexConfig::default().with_strategy(DualIndexStrategy::PrimaryOnly);
+        let router = DualIndexRouter::<String>::new(config);
+
+        let primary = make_results(vec![("a", 0.9), ("b", 0.8)]);
+        let legacy = make_results(vec![("c", 0.95), ("d", 0.85)]);
+
+        let merged = router.merge_results(primary, legacy, 10);
+        assert_eq!(merged.len(), 2);
+        assert_eq!(merged[0].0, "a");
+        assert_eq!(merged[1].0, "b");
+    }
+
+    #[test]
+    fn test_merge_legacy_only_returns_legacy() {
+        let config = DualIndexConfig::default().with_strategy(DualIndexStrategy::LegacyOnly);
+        let router = DualIndexRouter::<String>::new(config);
+
+        let primary = make_results(vec![("a", 0.9), ("b", 0.8)]);
+        let legacy = make_results(vec![("c", 0.95), ("d", 0.85)]);
+
+        let merged = router.merge_results(primary, legacy, 10);
+        assert_eq!(merged.len(), 2);
+        assert_eq!(merged[0].0, "c");
+        assert_eq!(merged[1].0, "d");
+    }
+
+    #[test]
+    fn test_merge_both_fuses_with_rrf() {
+        let config = DualIndexConfig::default(); // Both { Rrf { k: 60 } }
+        let router = DualIndexRouter::<String>::new(config);
+
+        let primary = make_results(vec![("a", 0.9), ("b", 0.8)]);
+        let legacy = make_results(vec![("b", 0.95), ("c", 0.7)]);
+
+        let merged = router.merge_results(primary, legacy, 10);
+
+        // "b" appears in both sources, should get highest RRF score
+        assert_eq!(merged[0].0, "b");
+        // All three unique IDs should be present
+        assert_eq!(merged.len(), 3);
+    }
+
+    #[test]
+    fn test_merge_weighted_applies_weights() {
+        let config = DualIndexConfig::default().with_strategy(DualIndexStrategy::Weighted {
+            primary_weight: 0.8,
+        });
+        let router = DualIndexRouter::<String>::new(config);
+
+        let primary = make_results(vec![("a", 0.9), ("b", 0.5)]);
+        let legacy = make_results(vec![("b", 0.9), ("c", 0.5)]);
+
+        let merged = router.merge_results(primary, legacy, 10);
+
+        // All three unique IDs should appear
+        let ids: Vec<&str> = merged.iter().map(|(id, _)| id.as_str()).collect();
+        assert!(ids.contains(&"a"));
+        assert!(ids.contains(&"b"));
+        assert!(ids.contains(&"c"));
+    }
+
+    #[test]
+    fn test_merge_respects_top_k() {
+        let config = DualIndexConfig::default();
+        let router = DualIndexRouter::<String>::new(config);
+
+        let primary = make_results(vec![("a", 0.9), ("b", 0.8), ("c", 0.7)]);
+        let legacy = make_results(vec![("d", 0.95), ("e", 0.85), ("f", 0.75)]);
+
+        let merged = router.merge_results(primary, legacy, 2);
+        assert_eq!(merged.len(), 2);
+    }
+
+    #[test]
+    fn test_merge_empty_sources() {
+        let config = DualIndexConfig::default();
+        let router = DualIndexRouter::<String>::new(config);
+
+        let merged = router.merge_results(vec![], vec![], 10);
+        assert!(merged.is_empty());
+    }
+
+    // -- Strategy mutation tests --
+
+    #[test]
+    fn test_set_strategy_updates_routing() {
+        let config = DualIndexConfig::default();
+        let mut router = DualIndexRouter::<String>::new(config);
+
+        assert!(matches!(router.strategy(), DualIndexStrategy::Both { .. }));
+
+        router.set_strategy(DualIndexStrategy::PrimaryOnly);
+        assert!(matches!(router.strategy(), DualIndexStrategy::PrimaryOnly));
+    }
+
+    // -- Config builder tests --
+
+    #[test]
+    fn test_config_default() {
+        let config = DualIndexConfig::default();
+        assert!(matches!(config.strategy, DualIndexStrategy::Both { .. }));
+        assert_eq!(config.pool_multiplier, 3);
+        assert!(config.auto_switch_threshold.is_none());
+    }
+
+    #[test]
+    fn test_config_builder_chain() {
+        let config = DualIndexConfig::default()
+            .with_strategy(DualIndexStrategy::Weighted {
+                primary_weight: 0.7,
+            })
+            .with_pool_multiplier(5)
+            .with_auto_switch_threshold(0.95);
+
+        assert!(matches!(
+            config.strategy,
+            DualIndexStrategy::Weighted { primary_weight } if (primary_weight - 0.7).abs() < f64::EPSILON
+        ));
+        assert_eq!(config.pool_multiplier, 5);
+        assert!((config.auto_switch_threshold.unwrap() - 0.95).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_pool_multiplier_min_enforced() {
+        let config = DualIndexConfig::default().with_pool_multiplier(0);
+        assert_eq!(config.pool_multiplier, 1);
+    }
+
+    #[test]
+    fn test_auto_switch_threshold_clamped() {
+        let config = DualIndexConfig::default().with_auto_switch_threshold(1.5);
+        assert!((config.auto_switch_threshold.unwrap() - 1.0).abs() < f64::EPSILON);
+
+        let config = DualIndexConfig::default().with_auto_switch_threshold(-0.5);
+        assert!((config.auto_switch_threshold.unwrap() - 0.0).abs() < f64::EPSILON);
+    }
+
+    // -- Default strategy tests --
+
+    #[test]
+    fn test_default_strategy_is_both_rrf() {
+        let strategy = DualIndexStrategy::default();
+        assert_eq!(
+            strategy,
+            DualIndexStrategy::Both {
+                fusion: FusionStrategy::rrf()
+            }
+        );
+    }
+}
diff --git a/crates/khive-retrieval/src/hybrid/mod.rs b/crates/khive-retrieval/src/hybrid/mod.rs
new file mode 100644
index 00000000..8e28d4ce
--- /dev/null
+++ b/crates/khive-retrieval/src/hybrid/mod.rs
@@ -0,0 +1,83 @@
+//! Unified hybrid search interface.
+//!
+//! Combines HNSW vector search, BM25 keyword search, and graph traversal
+//! into a single query interface with configurable fusion strategies.
+//!
+//! # Architecture (ADR-002)
+//!
+//! ```text
+//! Query ──┬── [Vector Search] ── HNSW ── Vec<(Id, Distance)>
+//!         │                                    │
+//!         │                              distance → similarity
+//!         │                                    │
+//!         │                              Vec<(Id, DeterministicScore)>
+//!         │                                    │
+//!         └── [Keyword Search] ── BM25 ── Vec<(Id, BM25Score)>
+//!                                              │
+//!                                        normalize → DeterministicScore
+//!                                              │
+//!                                        Vec<(Id, DeterministicScore)>
+//!                                              │
+//!                                ┌─────────────┴─────────────┐
+//!                                │   reciprocal_rank_fusion  │
+//!                                │      k=60 (standard)      │
+//!                                └─────────────┬─────────────┘
+//!                                              │
+//!                                    Vec<(Id, DeterministicScore)>
+//! ```
+//!
+//! # Trait Hierarchy
+//!
+//! ```text
+//! VectorSearch ──┐
+//!                ├── HybridSearcher
+//! KeywordSearch ─┘
+//!
+//! Reranker (standalone, generic over Id)
+//! ```
+//!
+//! Each trait can be implemented independently:
+//! - [`VectorSearch`]: Embedding-based nearest-neighbor search (e.g., HNSW)
+//! - [`KeywordSearch`]: Text-based retrieval (e.g., BM25)
+//! - [`HybridSearcher`]: Combined search requiring both vector + keyword
+//! - [`Reranker`]: Post-retrieval reranking (e.g., cross-encoder)
+//!
+//! # Fusion Strategies
+//!
+//! - **RRF (Reciprocal Rank Fusion)**: Default and recommended. Uses only ranks,
+//!   making it robust to score distribution differences.
+//! - **Weighted**: Linear combination of scores with configurable weights.
+//! - **Union**: Takes the maximum score per ID across sources.
+//!
+//! # Example
+//!
+//! ```rust,ignore
+//! use khive_retrieval::hybrid::{
+//!     HybridConfig, HybridSearcher, VectorSearch, KeywordSearch, Query, fuse_search_results,
+//! };
+//! use khive_score::DeterministicScore;
+//!
+//! // Create your own searcher implementing VectorSearch + KeywordSearch + HybridSearcher
+//! // Then use fuse_search_results to combine vector and keyword results
+//!
+//! let vector_results = vec![("doc1".to_string(), DeterministicScore::from_f64(0.9))];
+//! let keyword_results = vec![("doc1".to_string(), DeterministicScore::from_f64(0.85))];
+//!
+//! let config = HybridConfig::new(10);
+//! let fused = fuse_search_results(vec![vector_results, keyword_results], &config);
+//! ```
+//!
+//! See [ADR-002](../docs/ADR-002-hybrid-search.md) for algorithm specification.
+
+mod config;
+#[cfg(feature = "native-rerank")]
+mod cross_encoder;
+pub mod dual_index;
+mod searcher;
+
+// Re-export public types
+pub use config::{HybridConfig, Query, DEFAULT_POOL_MULTIPLIER};
+#[cfg(feature = "native-rerank")]
+pub use cross_encoder::{CrossEncoderScorer, NativeCrossEncoderReranker, RerankDocumentResolver};
+pub use dual_index::{DualIndexConfig, DualIndexRouter, DualIndexStrategy};
+pub use searcher::{fuse_search_results, HybridSearcher, KeywordSearch, Reranker, VectorSearch};
diff --git a/crates/khive-retrieval/src/hybrid/searcher.rs b/crates/khive-retrieval/src/hybrid/searcher.rs
new file mode 100644
index 00000000..096e1255
--- /dev/null
+++ b/crates/khive-retrieval/src/hybrid/searcher.rs
@@ -0,0 +1,354 @@
+//! Granular search traits and hybrid search implementation.
+//!
+//! # Trait Hierarchy
+//!
+//! ```text
+//! VectorSearch ──┐
+//!                ├── HybridSearcher
+//! KeywordSearch ─┘
+//!
+//! Reranker (standalone, generic over Id)
+//! ```
+//!
+//! Each trait can be implemented independently, enabling:
+//! - Vector-only search (e.g., HNSW index)
+//! - Keyword-only search (e.g., BM25 index)
+//! - Full hybrid search (combining both with fusion)
+//! - Reranking as a separate, composable concern
+
+use std::hash::Hash;
+
+use async_trait::async_trait;
+use khive_score::DeterministicScore;
+
+use crate::error::Result;
+use khive_fusion::{fuse, FusionStrategy};
+
+use super::config::{HybridConfig, Query};
+
+/// Trait for vector similarity search.
+///
+/// Implementors provide embedding-based nearest-neighbor search
+/// (e.g., HNSW, flat scan, IVF).
+///
+/// # Associated Types
+///
+/// * `Id` - The identifier type for documents/results. Requires `Ord` for
+///   deterministic tie-breaking when scores are equal.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use khive_retrieval::hybrid::VectorSearch;
+///
+/// struct MyVectorIndex { /* ... */ }
+///
+/// #[async_trait::async_trait]
+/// impl VectorSearch for MyVectorIndex {
+///     type Id = String;
+///
+///     async fn vector_search(&self, embedding: &[f32], top_k: usize)
+///         -> khive_retrieval::Result<Vec<(String, khive_score::DeterministicScore)>>
+///     {
+///         // Your HNSW/ANN implementation here
+///         todo!()
+///     }
+/// }
+/// ```
+#[async_trait]
+pub trait VectorSearch: Send + Sync {
+    /// The ID type for search results.
+    /// `Ord` is required for deterministic tie-breaking when scores are equal.
+    type Id: Eq + Hash + Clone + Ord + Send + Sync;
+
+    /// Perform vector-only search.
+    ///
+    /// # Arguments
+    ///
+    /// * `embedding` - Query embedding vector
+    /// * `top_k` - Number of results to return
+    ///
+    /// # Returns
+    ///
+    /// Vector of (Id, DeterministicScore) pairs sorted by similarity descending.
+    async fn vector_search(
+        &self,
+        embedding: &[f32],
+        top_k: usize,
+    ) -> Result<Vec<(Self::Id, DeterministicScore)>>;
+}
+
+/// Trait for keyword-based search.
+///
+/// Implementors provide text-based retrieval (e.g., BM25, TF-IDF).
+///
+/// # Associated Types
+///
+/// * `Id` - The identifier type for documents/results. Requires `Ord` for
+///   deterministic tie-breaking when scores are equal.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use khive_retrieval::hybrid::KeywordSearch;
+///
+/// struct MyBm25Index { /* ... */ }
+///
+/// #[async_trait::async_trait]
+/// impl KeywordSearch for MyBm25Index {
+///     type Id = String;
+///
+///     async fn keyword_search(&self, text: &str, top_k: usize)
+///         -> khive_retrieval::Result<Vec<(String, khive_score::DeterministicScore)>>
+///     {
+///         // Your BM25 implementation here
+///         todo!()
+///     }
+/// }
+/// ```
+#[async_trait]
+pub trait KeywordSearch: Send + Sync {
+    /// The ID type for search results.
+    /// `Ord` is required for deterministic tie-breaking when scores are equal.
+    type Id: Eq + Hash + Clone + Ord + Send + Sync;
+
+    /// Perform keyword-only search (BM25).
+    ///
+    /// # Arguments
+    ///
+    /// * `text` - Query text
+    /// * `top_k` - Number of results to return
+    ///
+    /// # Returns
+    ///
+    /// Vector of (Id, DeterministicScore) pairs sorted by BM25 score descending.
+    async fn keyword_search(
+        &self,
+        text: &str,
+        top_k: usize,
+    ) -> Result<Vec<(Self::Id, DeterministicScore)>>;
+}
+
+/// Trait for hybrid search operations.
+///
+/// Combines vector similarity search (HNSW) with keyword search (BM25)
+/// using configurable fusion strategies.
+///
+/// # Supertrait Constraint
+///
+/// Requires both [`VectorSearch`] and [`KeywordSearch`] to be implemented
+/// with the **same `Id` type**, enforced by the
+/// `KeywordSearch<Id = <Self as VectorSearch>::Id>` bound.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use khive_retrieval::hybrid::{HybridSearcher, VectorSearch, KeywordSearch};
+///
+/// struct MyHybridIndex { /* ... */ }
+///
+/// // Implement VectorSearch and KeywordSearch first, then HybridSearcher
+/// #[async_trait::async_trait]
+/// impl HybridSearcher for MyHybridIndex {
+///     async fn hybrid_search(&self, query: &Query, config: &HybridConfig)
+///         -> Result<Vec<(String, DeterministicScore)>>
+///     {
+///         let mut sources = Vec::new();
+///         if let Some(emb) = &query.embedding {
+///             sources.push(self.vector_search(emb, config.candidate_pool_size).await?);
+///         }
+///         sources.push(self.keyword_search(&query.text, config.candidate_pool_size).await?);
+///         Ok(fuse_search_results(sources, config))
+///     }
+/// }
+/// ```
+#[async_trait]
+pub trait HybridSearcher: VectorSearch + KeywordSearch<Id = <Self as VectorSearch>::Id> {
+    /// Perform hybrid search combining vector and keyword retrieval.
+    ///
+    /// # Arguments
+    ///
+    /// * `query` - The search query (text + optional embedding)
+    /// * `config` - Hybrid search configuration
+    ///
+    /// # Returns
+    ///
+    /// Vector of (Id, DeterministicScore) pairs sorted by fused score descending.
+    async fn hybrid_search(
+        &self,
+        query: &Query,
+        config: &HybridConfig,
+    ) -> Result<Vec<(<Self as VectorSearch>::Id, DeterministicScore)>>;
+}
+
+/// Trait for reranking search results.
+///
+/// Separates the reranking concern from search, enabling:
+/// - Cross-encoder neural reranking
+/// - LLM-based reranking
+/// - Custom scoring adjustments
+///
+/// The `Id` type is a generic parameter rather than an associated type,
+/// allowing a single reranker to work with different ID types.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use khive_retrieval::hybrid::Reranker;
+///
+/// struct CrossEncoderReranker { /* model handle */ }
+///
+/// #[async_trait::async_trait]
+/// impl Reranker<String> for CrossEncoderReranker {
+///     async fn rerank(
+///         &self,
+///         query: &str,
+///         results: Vec<(String, DeterministicScore)>,
+///         top_k: usize,
+///     ) -> Result<Vec<(String, DeterministicScore)>> {
+///         // Score each (query, document) pair with cross-encoder
+///         // Sort by new scores and truncate to top_k
+///         todo!()
+///     }
+/// }
+/// ```
+#[async_trait]
+pub trait Reranker<Id: Send + Sync + 'static>: Send + Sync {
+    /// Rerank search results using additional signals.
+    ///
+    /// # Arguments
+    ///
+    /// * `query` - The original query text for relevance scoring
+    /// * `results` - Pre-ranked results to reorder
+    /// * `top_k` - Number of results to return after reranking
+    ///
+    /// # Returns
+    ///
+    /// Reranked vector of (Id, DeterministicScore) pairs, truncated to `top_k`.
+    async fn rerank(
+        &self,
+        query: &str,
+        results: Vec<(Id, DeterministicScore)>,
+        top_k: usize,
+    ) -> Result<Vec<(Id, DeterministicScore)>>;
+}
+
+/// Helper function to perform fusion on search results.
+///
+/// This can be used by implementors of [`HybridSearcher`] to fuse results
+/// from their [`VectorSearch`] and [`KeywordSearch`] implementations.
+///
+/// `Ord` is required for deterministic tie-breaking when scores are equal.
+pub fn fuse_search_results<Id: Eq + Hash + Clone + Ord>(
+    sources: Vec<Vec<(Id, DeterministicScore)>>,
+    config: &HybridConfig,
+) -> Vec<(Id, DeterministicScore)> {
+    if sources.is_empty() {
+        return Vec::new();
+    }
+
+    if sources.len() == 1 {
+        let mut results = sources.into_iter().next().unwrap();
+        if let Some(min_score) = config.min_score {
+            results.retain(|(_, score)| *score >= min_score);
+        }
+        results.truncate(config.top_k);
+        return results;
+    }
+
+    // Determine fusion strategy
+    let strategy = match &config.fusion_strategy {
+        FusionStrategy::Weighted { .. } => {
+            // Use configured weights
+            let (v, k) = config.normalized_weights();
+            FusionStrategy::weighted(vec![v, k])
+        }
+        other => other.clone(),
+    };
+
+    // Fuse results
+    let mut fused = fuse(sources, &strategy, config.top_k);
+
+    // Apply minimum score filter
+    if let Some(min_score) = config.min_score {
+        fused.retain(|(_, score)| *score >= min_score);
+    }
+
+    fused
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_fuse_empty_sources() {
+        let sources: Vec<Vec<(String, DeterministicScore)>> = vec![];
+        let config = HybridConfig::default();
+        let results = fuse_search_results(sources, &config);
+        assert!(results.is_empty());
+    }
+
+    #[test]
+    fn test_fuse_single_source() {
+        let sources = vec![vec![
+            ("a".to_string(), DeterministicScore::from_f64(0.9)),
+            ("b".to_string(), DeterministicScore::from_f64(0.8)),
+        ]];
+        let config = HybridConfig::new(10);
+        let results = fuse_search_results(sources, &config);
+
+        assert_eq!(results.len(), 2);
+        assert_eq!(results[0].0, "a");
+    }
+
+    #[test]
+    fn test_fuse_multiple_sources_rrf() {
+        let source1 = vec![
+            ("a".to_string(), DeterministicScore::from_f64(0.9)),
+            ("b".to_string(), DeterministicScore::from_f64(0.8)),
+        ];
+        let source2 = vec![
+            ("b".to_string(), DeterministicScore::from_f64(0.95)),
+            ("c".to_string(), DeterministicScore::from_f64(0.7)),
+        ];
+
+        let config = HybridConfig::new(10);
+        let results = fuse_search_results(vec![source1, source2], &config);
+
+        assert_eq!(results.len(), 3);
+        // b appears in both, should have highest RRF score
+        assert_eq!(results[0].0, "b");
+    }
+
+    #[test]
+    fn test_fuse_with_min_score() {
+        let sources = vec![vec![
+            ("a".to_string(), DeterministicScore::from_f64(0.9)),
+            ("b".to_string(), DeterministicScore::from_f64(0.1)),
+        ]];
+
+        let config = HybridConfig::new(10).with_min_score(DeterministicScore::from_f64(0.5));
+        let results = fuse_search_results(sources, &config);
+
+        // b should be filtered out (RRF score ~0.016 < 0.5)
+        // Actually RRF scores are very small, let's use a lower threshold
+        assert!(!results.is_empty());
+    }
+
+    #[test]
+    fn test_fuse_top_k_limit() {
+        let sources = vec![vec![
+            ("a".to_string(), DeterministicScore::from_f64(0.9)),
+            ("b".to_string(), DeterministicScore::from_f64(0.8)),
+            ("c".to_string(), DeterministicScore::from_f64(0.7)),
+            ("d".to_string(), DeterministicScore::from_f64(0.6)),
+            ("e".to_string(), DeterministicScore::from_f64(0.5)),
+        ]];
+
+        let config = HybridConfig::new(3);
+        let results = fuse_search_results(sources, &config);
+
+        assert_eq!(results.len(), 3);
+    }
+}
diff --git a/crates/khive-retrieval/src/lib.rs b/crates/khive-retrieval/src/lib.rs
new file mode 100644
index 00000000..0edad1ba
--- /dev/null
+++ b/crates/khive-retrieval/src/lib.rs
@@ -0,0 +1,190 @@
+#![allow(clippy::uninlined_format_args)]
+#![allow(clippy::field_reassign_with_default)]
+#![allow(clippy::approx_constant)]
+// Note: field_reassign_with_default is needed for some internal tests
+
+//! Hybrid search and ranking with deterministic scoring for khive.
+//!
+//! This crate provides:
+//! - HNSW vector search with `DeterministicScore` output
+//! - BM25 keyword search for exact matches
+//! - Reciprocal Rank Fusion (RRF) for hybrid search
+//! - Graph traversal for relationship-aware retrieval
+//!
+//! # Architecture
+//!
+//! ```text
+//! ┌─────────────────────────────────────────────────────────────────┐
+//! │                      khive-retrieval                             │
+//! │  ┌───────────┐  ┌───────────┐  ┌───────────┐  ┌───────────┐    │
+//! │  │   hnsw/   │  │   bm25/   │  │  graph/   │  │  fusion/  │    │
+//! │  │ (vector)  │  │ (keyword) │  │(traversal)│  │   (RRF)   │    │
+//! │  └───────────┘  └───────────┘  └───────────┘  └───────────┘    │
+//! │                       │                                          │
+//! │                       ▼                                          │
+//! │               ┌───────────────┐                                  │
+//! │               │    hybrid/    │                                  │
+//! │               │   (unified)   │                                  │
+//! │               └───────────────┘                                  │
+//! │                                                                  │
+//! │  Inputs: Query + optional embedding + optional start nodes       │
+//! │  Outputs: Vec<(Id, DeterministicScore)>                         │
+//! └─────────────────────────────────────────────────────────────────┘
+//! ```
+//!
+//! # Design Principles
+//!
+//! ## Deterministic Scoring (ADR-002)
+//!
+//! All scores use `DeterministicScore` from `khive-score` for:
+//! - Cross-platform identical rankings (x86_64, ARM64, WASM)
+//! - `Ord` implementation (sortable, usable in BTreeSet)
+//! - `Hash` implementation (cacheable)
+//!
+//! ## Index Management (ADR-003)
+//!
+//! - HNSW: Hierarchical Navigable Small World graphs for ANN search
+//! - BM25: Okapi BM25 for keyword relevance
+//! - Both support incremental updates with periodic rebuild
+//!
+//! ## Graph Traversal (ADR-004)
+//!
+//! - BFS for level-by-level exploration
+//! - DFS for deep path exploration
+//! - Bidirectional BFS for shortest path
+//!
+//! ## ID Types and Bridging
+//!
+//! Each retrieval module uses a different ID type:
+//!
+//! | Module | ID Type | Backing |
+//! |--------|---------|---------|
+//! | HNSW   | [`EmbeddingId`] | 128-bit (ULID, from khive-types) |
+//! | BM25   | [`DocumentId`]  | Newtype over `String` |
+//! | Graph  | `EntityRef`     | Enum (from khive-db) |
+//! | Fusion | Generic `Id`    | `Eq + Hash + Clone + Ord` |
+//!
+//! The [`fusion::fuse`] function is generic over the ID type, so hybrid
+//! search that combines results from different modules requires a common
+//! representation. Bridging strategies:
+//!
+//! 1. **String-based**: Convert all IDs to `String` before fusion.
+//! 2. **DocumentId-based**: Convert `EmbeddingId` to `DocumentId` via
+//!    `DocumentId::new(embedding_id.to_string())`.
+//! 3. **Application-level mapping**: Maintain a bidirectional lookup table
+//!    between ID types in the application layer.
+//!
+//! See [`DocumentId`] for details on the newtype and conversion traits.
+//!
+//! # Quick Start
+//!
+//! ```rust,ignore
+//! use khive_retrieval::{VectorSearch, KeywordSearch, HybridSearcher, Query, HybridConfig};
+//!
+//! // Implement granular traits independently:
+//! // - VectorSearch for embedding-based search (HNSW)
+//! // - KeywordSearch for text-based search (BM25)
+//! // - HybridSearcher for combined search (requires both)
+//! // - Reranker for post-retrieval reranking (standalone)
+//!
+//! // Example: keyword-only search
+//! let results = searcher.keyword_search("distributed systems", 10).await?;
+//!
+//! // Example: hybrid search (vector + keyword with fusion)
+//! let query = Query::hybrid("distributed systems", embedding_vec);
+//! let config = HybridConfig::new(10);
+//! let results = searcher.hybrid_search(&query, &config).await?;
+//!
+//! for (id, score) in results {
+//!     println!("{}: {}", id, score);
+//! }
+//! ```
+
+#![warn(missing_docs)]
+#![warn(clippy::all)]
+
+#[cfg(feature = "storage-adapters")]
+pub mod adapters;
+pub mod error;
+pub mod eval;
+// graph module depends on EntityRef/LinkStore/StorageContext from old monolith khive-db API;
+// gated until ported to current khive-storage GraphStore trait.
+#[cfg(feature = "graph-legacy")]
+pub mod graph;
+pub mod hybrid;
+pub mod metrics;
+#[cfg(feature = "persist")]
+pub mod persist;
+pub mod policy;
+pub mod query_ir;
+#[cfg(feature = "persist")]
+pub mod replay;
+pub mod search_config;
+pub mod timeout;
+#[cfg(feature = "persist")]
+pub mod weights;
+
+// Re-export adapter types
+#[cfg(feature = "storage-adapters")]
+pub use adapters::{StorageKeywordSearch, StorageVectorSearch};
+
+// Re-export core types
+pub use error::{ErrorKind, Result, RetrievalError};
+
+// Re-export types from sibling crates (now separate crates)
+pub use khive_bm25::{Bm25Config, Bm25Index, Bm25Stats, DocumentId, SearchContext};
+pub use khive_fusion::{
+    fuse, normalize_weights, reciprocal_rank_fusion, weighted_fusion, weights_are_normalized,
+    FusionStrategy, DEFAULT_RRF_K,
+};
+#[cfg(feature = "graph-legacy")]
+pub use graph::{
+    bfs_traverse, dfs_traverse, find_shortest_path, Direction, PathNode, TraversalOptions,
+    MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS,
+};
+pub use khive_hnsw::{
+    DistanceMetric, HnswCheckpointConfig, HnswConfig, HnswIndex, HnswSearchContext, HnswSnapshot,
+    NodeId, RebuildStats, TombstoneStats,
+};
+// TODO(port-checkpoint): HnswCheckpoint/HnswCheckpointStore depend on khive_fold::Checkpoint
+// which doesn't exist in the current khive-fold API. Re-enable when ported.
+// #[cfg(feature = "checkpoint")]
+// pub use khive_hnsw::{HnswCheckpoint, HnswCheckpointStore};
+pub use hybrid::{
+    fuse_search_results, DualIndexConfig, DualIndexRouter, DualIndexStrategy, HybridConfig,
+    HybridSearcher, KeywordSearch, Query, Reranker, VectorSearch,
+};
+// TODO(port-rerank): native cross-encoder reranking deferred; khive-inference not ported yet
+// #[cfg(feature = "native-rerank")]
+// pub use hybrid::{CrossEncoderScorer, NativeCrossEncoderReranker, RerankDocumentResolver};
+pub use metrics::{MetricEvent, MetricValue, MetricsSink, NoopSink, RecordingSink};
+#[cfg(feature = "persist")]
+pub use persist::{
+    PersistError, PersistenceStats, RetrievalPersistence, ShadowMetrics, ShadowValidationConfig,
+    ShadowValidationResult,
+};
+pub use policy::{filter_by_policy, filter_by_predicate, ClearanceLevel, SearchPolicy};
+pub use query_ir::{FilterPredicate, FuseStrategy, QueryNode, RerankMethod};
+pub use search_config::SearchConfig;
+pub use timeout::{
+    search_with_cancellation, search_with_deadline, search_with_optional_timeout,
+    search_with_timeout,
+};
+
+/// Re-exports from `lattice-embed` for app-layer access.
+///
+/// Apps should use these re-exports instead of depending on `lattice-embed` directly.
+/// This maintains the layer boundary: apps -> platform (retrieval) -> foundation (embed).
+///
+/// Core types (`EmbeddingModel`, `EmbeddingService`, `EmbedError`) are always available.
+/// Native model implementations (`NativeEmbeddingService`, etc.) require the `embed` feature.
+pub mod embed {
+    // Core types and traits (always available, no feature gate needed)
+    /// Result alias for embedding operations.
+    pub use lattice_embed::Result as EmbedResult;
+    pub use lattice_embed::{EmbedError, EmbeddingModel, EmbeddingService};
+
+    // Native model implementations (pure Rust lattice-embed via "embed" feature)
+    #[cfg(feature = "embed")]
+    pub use lattice_embed::{CachedEmbeddingService, NativeEmbeddingService};
+}
diff --git a/crates/khive-retrieval/src/metrics.rs b/crates/khive-retrieval/src/metrics.rs
new file mode 100644
index 00000000..0a074c8a
--- /dev/null
+++ b/crates/khive-retrieval/src/metrics.rs
@@ -0,0 +1,353 @@
+//! Observability hooks for retrieval indices.
+//!
+//! Provides a lightweight, trait-based metrics abstraction that avoids coupling
+//! the retrieval crate to any specific observability stack (Prometheus, OpenTelemetry,
+//! etc.). Callers inject a [`MetricsSink`] implementation and the indices emit
+//! well-known [`MetricEvent`]s during their operations.
+//!
+//! # Design Rationale
+//!
+//! - **Trait-based sink** rather than a global registry keeps the library
+//!   dependency-free and testable. The [`NoopSink`] compiles to zero overhead
+//!   when no observability is needed.
+//! - **`Arc<dyn MetricsSink>`** allows sharing one sink across multiple indices
+//!   without lifetime gymnastics.
+//! - **Well-known metric names** are `&'static str` constants so dashboards
+//!   can be built once and never break on typos.
+//!
+//! # Quick Start
+//!
+//! ```rust,ignore
+//! use std::sync::Arc;
+//! use khive_retrieval::metrics::{MetricsSink, NoopSink, RecordingSink};
+//! use khive_retrieval::HnswIndex;
+//!
+//! // Production: no-op (zero overhead)
+//! let mut idx = HnswIndex::new(128);
+//!
+//! // Testing: capture events
+//! let sink = Arc::new(RecordingSink::new());
+//! let mut idx = HnswIndex::new(128).with_metrics(sink.clone());
+//! // ... perform operations ...
+//! let events = sink.events();
+//! assert!(!events.is_empty());
+//! ```
+
+use std::fmt;
+use std::sync::{Arc, Mutex};
+
+// ---------------------------------------------------------------------------
+// Core types
+// ---------------------------------------------------------------------------
+
+/// A single metric observation.
+#[derive(Debug, Clone)]
+pub struct MetricEvent {
+    /// Well-known metric name (use constants from [`names`]).
+    pub name: &'static str,
+    /// Observed value.
+    pub value: MetricValue,
+    /// Dimensional labels for grouping / filtering.
+    pub labels: Vec<(&'static str, String)>,
+}
+
+/// Metric value kinds.
+#[derive(Debug, Clone, PartialEq)]
+pub enum MetricValue {
+    /// Monotonically increasing count.
+    Counter(u64),
+    /// Point-in-time measurement (can go up or down).
+    Gauge(f64),
+    /// Duration or distribution sample (typically seconds or milliseconds).
+    Histogram(f64),
+}
+
+impl fmt::Display for MetricValue {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            MetricValue::Counter(v) => write!(f, "counter({v})"),
+            MetricValue::Gauge(v) => write!(f, "gauge({v})"),
+            MetricValue::Histogram(v) => write!(f, "histogram({v})"),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Sink trait
+// ---------------------------------------------------------------------------
+
+/// Receiver of metric events emitted by retrieval indices.
+///
+/// Implementors bridge to their observability stack (Prometheus counters,
+/// OTel meters, StatsD, etc.). The trait is `Send + Sync` so a single
+/// `Arc<dyn MetricsSink>` can be shared across threads.
+pub trait MetricsSink: Send + Sync + fmt::Debug {
+    /// Record a single metric event.
+    fn record(&self, event: MetricEvent);
+}
+
+// ---------------------------------------------------------------------------
+// Built-in sinks
+// ---------------------------------------------------------------------------
+
+/// Sink that silently discards every event.
+///
+/// This is the implicit default when no metrics are configured.
+/// All calls compile down to a no-op.
+#[derive(Debug, Clone, Copy, Default)]
+pub struct NoopSink;
+
+impl MetricsSink for NoopSink {
+    #[inline]
+    fn record(&self, _event: MetricEvent) {
+        // intentionally empty
+    }
+}
+
+/// Thread-safe recording sink for tests.
+///
+/// Collects every [`MetricEvent`] into an internal `Vec` guarded by a
+/// `Mutex`. Use [`events()`](Self::events) to snapshot the recorded events
+/// and [`clear()`](Self::clear) to reset.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use std::sync::Arc;
+/// use khive_retrieval::metrics::RecordingSink;
+///
+/// let sink = Arc::new(RecordingSink::new());
+/// // ... pass to index ...
+/// let events = sink.events();
+/// assert!(events.iter().any(|e| e.name == "hnsw.search.duration_ms"));
+/// ```
+#[derive(Debug, Default)]
+pub struct RecordingSink {
+    events: Mutex<Vec<MetricEvent>>,
+}
+
+impl RecordingSink {
+    /// Create a new, empty recording sink.
+    pub fn new() -> Self {
+        Self {
+            events: Mutex::new(Vec::new()),
+        }
+    }
+
+    /// Return a snapshot of all recorded events.
+    ///
+    /// Returns an empty vec if the mutex is poisoned (indicates a prior panic).
+    pub fn events(&self) -> Vec<MetricEvent> {
+        self.events
+            .lock()
+            .unwrap_or_else(|poisoned| poisoned.into_inner())
+            .clone()
+    }
+
+    /// Discard all recorded events.
+    ///
+    /// Silently skips clearing if the mutex is poisoned.
+    pub fn clear(&self) {
+        if let Ok(mut guard) = self.events.lock() {
+            guard.clear();
+        }
+    }
+
+    /// Return the number of recorded events.
+    ///
+    /// Returns 0 if the mutex is poisoned.
+    pub fn len(&self) -> usize {
+        self.events
+            .lock()
+            .unwrap_or_else(|poisoned| poisoned.into_inner())
+            .len()
+    }
+
+    /// Check if no events have been recorded.
+    pub fn is_empty(&self) -> bool {
+        self.len() == 0
+    }
+}
+
+impl MetricsSink for RecordingSink {
+    fn record(&self, event: MetricEvent) {
+        if let Ok(mut guard) = self.events.lock() {
+            guard.push(event);
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Well-known metric names
+// ---------------------------------------------------------------------------
+
+/// Well-known metric name constants.
+///
+/// Using constants prevents typos and allows dashboards to be built once.
+/// Names follow the `{subsystem}.{operation}.{measurement}` convention.
+pub mod names {
+    // -- HNSW --
+
+    /// Duration of a single HNSW search in milliseconds.
+    pub const HNSW_SEARCH_DURATION_MS: &str = "hnsw.search.duration_ms";
+    /// Number of HNSW search operations completed.
+    pub const HNSW_SEARCH_COUNT: &str = "hnsw.search.count";
+    /// Number of results returned by an HNSW search.
+    pub const HNSW_SEARCH_RESULTS: &str = "hnsw.search.results";
+
+    /// Duration of a single HNSW insert in milliseconds.
+    pub const HNSW_INSERT_DURATION_MS: &str = "hnsw.insert.duration_ms";
+    /// Number of HNSW insert operations completed.
+    pub const HNSW_INSERT_COUNT: &str = "hnsw.insert.count";
+
+    /// Duration of an HNSW rebuild in milliseconds.
+    pub const HNSW_REBUILD_DURATION_MS: &str = "hnsw.rebuild.duration_ms";
+    /// Number of HNSW rebuild operations completed.
+    pub const HNSW_REBUILD_COUNT: &str = "hnsw.rebuild.count";
+    /// Number of nodes removed during a rebuild.
+    pub const HNSW_REBUILD_NODES_REMOVED: &str = "hnsw.rebuild.nodes_removed";
+
+    /// Current number of live vectors in the HNSW index.
+    pub const HNSW_INDEX_SIZE: &str = "hnsw.index.size";
+
+    // -- BM25 --
+
+    /// Duration of a single BM25 search in milliseconds.
+    pub const BM25_SEARCH_DURATION_MS: &str = "bm25.search.duration_ms";
+    /// Number of BM25 search operations completed.
+    pub const BM25_SEARCH_COUNT: &str = "bm25.search.count";
+    /// Number of results returned by a BM25 search.
+    pub const BM25_SEARCH_RESULTS: &str = "bm25.search.results";
+
+    /// Duration of a single BM25 index_document call in milliseconds.
+    pub const BM25_INDEX_DURATION_MS: &str = "bm25.index_document.duration_ms";
+    /// Number of BM25 index_document operations completed.
+    pub const BM25_INDEX_COUNT: &str = "bm25.index_document.count";
+
+    /// Current number of documents in the BM25 index.
+    pub const BM25_INDEX_SIZE: &str = "bm25.index.size";
+}
+
+// ---------------------------------------------------------------------------
+// Helper: emit to optional sink
+// ---------------------------------------------------------------------------
+
+/// Convenience function to emit a metric event to an optional sink.
+///
+/// This avoids repeating `if let Some(sink) = &self.metrics { ... }` in
+/// every instrumented method.
+#[inline]
+pub fn emit(sink: &Option<Arc<dyn MetricsSink>>, event: MetricEvent) {
+    if let Some(s) = sink {
+        s.record(event);
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+#[allow(clippy::approx_constant)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn noop_sink_does_not_panic() {
+        let sink = NoopSink;
+        sink.record(MetricEvent {
+            name: names::HNSW_SEARCH_COUNT,
+            value: MetricValue::Counter(1),
+            labels: vec![],
+        });
+    }
+
+    #[test]
+    fn recording_sink_captures_events() {
+        let sink = RecordingSink::new();
+        assert!(sink.is_empty());
+
+        sink.record(MetricEvent {
+            name: names::HNSW_SEARCH_DURATION_MS,
+            value: MetricValue::Histogram(1.5),
+            labels: vec![("k", "10".to_string())],
+        });
+        sink.record(MetricEvent {
+            name: names::HNSW_SEARCH_COUNT,
+            value: MetricValue::Counter(1),
+            labels: vec![],
+        });
+
+        assert_eq!(sink.len(), 2);
+        assert!(!sink.is_empty());
+
+        let events = sink.events();
+        assert_eq!(events.len(), 2);
+        assert_eq!(events[0].name, names::HNSW_SEARCH_DURATION_MS);
+        assert_eq!(events[1].name, names::HNSW_SEARCH_COUNT);
+    }
+
+    #[test]
+    fn recording_sink_clear() {
+        let sink = RecordingSink::new();
+        sink.record(MetricEvent {
+            name: names::HNSW_INSERT_COUNT,
+            value: MetricValue::Counter(1),
+            labels: vec![],
+        });
+        assert_eq!(sink.len(), 1);
+
+        sink.clear();
+        assert!(sink.is_empty());
+    }
+
+    #[test]
+    fn metric_value_display() {
+        assert_eq!(MetricValue::Counter(42).to_string(), "counter(42)");
+        assert_eq!(MetricValue::Gauge(3.14).to_string(), "gauge(3.14)");
+        assert_eq!(MetricValue::Histogram(1.5).to_string(), "histogram(1.5)");
+    }
+
+    #[test]
+    fn emit_helper_with_none() {
+        // Should not panic
+        emit(
+            &None,
+            MetricEvent {
+                name: names::HNSW_SEARCH_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+    }
+
+    #[test]
+    fn emit_helper_with_some() {
+        let sink = Arc::new(RecordingSink::new());
+        let opt: Option<Arc<dyn MetricsSink>> = Some(sink.clone());
+
+        emit(
+            &opt,
+            MetricEvent {
+                name: names::BM25_SEARCH_COUNT,
+                value: MetricValue::Counter(1),
+                labels: vec![],
+            },
+        );
+
+        assert_eq!(sink.len(), 1);
+    }
+
+    #[test]
+    fn recording_sink_is_send_sync() {
+        fn assert_send_sync<T: Send + Sync>() {}
+        assert_send_sync::<RecordingSink>();
+    }
+
+    #[test]
+    fn metrics_sink_is_object_safe() {
+        // Prove we can construct an Arc<dyn MetricsSink>
+        let _: Arc<dyn MetricsSink> = Arc::new(NoopSink);
+        let _: Arc<dyn MetricsSink> = Arc::new(RecordingSink::new());
+    }
+}
diff --git a/crates/khive-retrieval/src/persist/bm25.rs b/crates/khive-retrieval/src/persist/bm25.rs
new file mode 100644
index 00000000..b6043dc3
--- /dev/null
+++ b/crates/khive-retrieval/src/persist/bm25.rs
@@ -0,0 +1,112 @@
+//! BM25-specific persistence methods.
+
+use khive_bm25::Bm25Index;
+
+use super::shadow::{log_validation_result, should_sample};
+use super::{
+    PersistError, RetrievalPersistence, ShadowMetrics, ShadowValidationConfig,
+    ShadowValidationResult,
+};
+
+impl RetrievalPersistence {
+    /// Persist a BM25 index to SQLite.
+    ///
+    /// The entire index is serialized (it already has Serde derives).
+    pub async fn persist_bm25_index(&self, index: &Bm25Index) -> Result<(), PersistError> {
+        self.persist_snapshot("bm25", index).await
+    }
+
+    /// Load the latest BM25 index from SQLite.
+    ///
+    /// Returns `None` if no snapshot exists for this namespace.
+    /// Rebuilds the fast-path `doc_lengths_vec` from the deserialized HashMap.
+    pub async fn load_bm25_index(&self) -> Result<Option<Bm25Index>, PersistError> {
+        let mut index = self.load_snapshot::<Bm25Index>("bm25").await?;
+        if let Some(ref mut idx) = index {
+            idx.ensure_doc_lengths_vec();
+        }
+        Ok(index)
+    }
+
+    /// Persist a BM25 index with optional shadow validation.
+    ///
+    /// If shadow validation is enabled, the index is immediately loaded
+    /// back and compared to verify integrity. Discrepancies are logged but
+    /// do not block the persist operation.
+    pub async fn persist_bm25_with_validation(
+        &self,
+        index: &Bm25Index,
+        config: &ShadowValidationConfig,
+    ) -> Result<Option<ShadowValidationResult>, PersistError> {
+        // Always persist first
+        self.persist_bm25_index(index).await?;
+
+        // Skip validation if disabled or not sampled
+        if !config.enabled || !should_sample(config.sample_rate) {
+            return Ok(None);
+        }
+
+        // Capture expected metrics
+        let expected = ShadowMetrics {
+            item_count: index.doc_count(),
+            tombstone_count: 0, // BM25 doesn't have tombstones
+            snapshot_size: 0,
+        };
+
+        // Perform shadow validation
+        let result = self.validate_bm25_snapshot(expected).await;
+
+        // Log result (non-blocking)
+        log_validation_result(&result);
+
+        Ok(Some(result))
+    }
+
+    /// Validate a BM25 snapshot by loading it back and comparing metrics.
+    pub(crate) async fn validate_bm25_snapshot(
+        &self,
+        expected: ShadowMetrics,
+    ) -> ShadowValidationResult {
+        let mut result = ShadowValidationResult {
+            passed: false,
+            index_type: "bm25".to_string(),
+            expected: expected.clone(),
+            actual: None,
+            discrepancies: Vec::new(),
+        };
+
+        // Try to load the snapshot back
+        match self.load_bm25_index().await {
+            Ok(Some(index)) => {
+                let actual = ShadowMetrics {
+                    item_count: index.doc_count(),
+                    tombstone_count: 0,
+                    snapshot_size: 0,
+                };
+
+                // Compare metrics
+                if actual.item_count != expected.item_count {
+                    result.discrepancies.push(format!(
+                        "doc_count mismatch: expected {}, got {}",
+                        expected.item_count, actual.item_count
+                    ));
+                }
+
+                result.actual = Some(actual);
+                result.passed = result.discrepancies.is_empty();
+            }
+            Ok(None) => {
+                result
+                    .discrepancies
+                    .push("index not found after persist".to_string());
+            }
+            Err(e) => {
+                result
+                    .discrepancies
+                    .push(format!("failed to load index: {e}"));
+            }
+        }
+
+        result
+    }
+}
diff --git a/crates/khive-retrieval/src/persist/hnsw.rs b/crates/khive-retrieval/src/persist/hnsw.rs
new file mode 100644
index 00000000..5f8e7b3d
--- /dev/null
+++ b/crates/khive-retrieval/src/persist/hnsw.rs
@@ -0,0 +1,127 @@
+//! HNSW-specific persistence methods.
+
+use khive_hnsw::HnswSnapshot;
+use khive_hnsw::HnswIndex;
+
+use super::shadow::{log_validation_result, should_sample};
+use super::{
+    PersistError, RetrievalPersistence, ShadowMetrics, ShadowValidationConfig,
+    ShadowValidationResult,
+};
+
+impl RetrievalPersistence {
+    /// Persist an HNSW index snapshot to SQLite.
+    ///
+    /// Creates a snapshot of the index and stores it as a serialized BLOB.
+    pub async fn persist_hnsw_snapshot(&self, index: &HnswIndex) -> Result<(), PersistError> {
+        let snapshot = index.snapshot();
+        self.persist_snapshot("hnsw", &snapshot).await
+    }
+
+    /// Load the latest HNSW snapshot from SQLite.
+    ///
+    /// Returns `None` if no snapshot exists for this namespace.
+    pub async fn load_hnsw_snapshot(&self) -> Result<Option<HnswSnapshot>, PersistError> {
+        self.load_snapshot::<HnswSnapshot>("hnsw").await
+    }
+
+    /// Persist an HNSW snapshot with optional shadow validation.
+    ///
+    /// If shadow validation is enabled, the snapshot is immediately loaded
+    /// back and compared to verify integrity. Discrepancies are logged but
+    /// do not block the persist operation.
+    pub async fn persist_hnsw_with_validation(
+        &self,
+        index: &HnswIndex,
+        config: &ShadowValidationConfig,
+    ) -> Result<Option<ShadowValidationResult>, PersistError> {
+        // Always persist first
+        self.persist_hnsw_snapshot(index).await?;
+
+        // Skip validation if disabled or not sampled
+        if !config.enabled || !should_sample(config.sample_rate) {
+            return Ok(None);
+        }
+
+        // Capture expected metrics
+        let expected = ShadowMetrics {
+            item_count: index.len(),
+            tombstone_count: index.tombstone_stats().tombstone_count,
+            snapshot_size: 0, // Will be filled by stats
+        };
+
+        // Perform shadow validation
+        let result = self.validate_hnsw_snapshot(expected).await;
+
+        // Log result (non-blocking)
+        log_validation_result(&result);
+
+        Ok(Some(result))
+    }
+
+    /// Validate an HNSW snapshot by loading it back and comparing metrics.
+    pub(crate) async fn validate_hnsw_snapshot(
+        &self,
+        expected: ShadowMetrics,
+    ) -> ShadowValidationResult {
+        let mut result = ShadowValidationResult {
+            passed: false,
+            index_type: "hnsw".to_string(),
+            expected: expected.clone(),
+            actual: None,
+            discrepancies: Vec::new(),
+        };
+
+        // Try to load the snapshot back
+        match self.load_hnsw_snapshot().await {
+            Ok(Some(snapshot)) => {
+                // Issue #867: Deep verification using HnswSnapshot::verify()
+                // This checks internal consistency beyond just count comparison:
+                // - Count consistency: total_nodes == live_nodes + tombstone_count
+                // - ID count integrity: indexed_ids.len() == total_nodes
+                // - Tombstone containment: all tombstoned IDs exist in indexed_ids
+                if let Err(e) = snapshot.verify() {
+                    result
+                        .discrepancies
+                        .push(format!("Snapshot verification failed: {e}"));
+                }
+
+                let actual = ShadowMetrics {
+                    item_count: snapshot.total_nodes,
+                    tombstone_count: snapshot.tombstone_count,
+                    snapshot_size: 0, // Not easily available without re-serializing
+                };
+
+                // Compare metrics
+                if actual.item_count != expected.item_count {
+                    result.discrepancies.push(format!(
+                        "item_count mismatch: expected {}, got {}",
+                        expected.item_count, actual.item_count
+                    ));
+                }
+
+                if actual.tombstone_count != expected.tombstone_count {
+                    result.discrepancies.push(format!(
+                        "tombstone_count mismatch: expected {}, got {}",
+                        expected.tombstone_count, actual.tombstone_count
+                    ));
+                }
+
+                result.actual = Some(actual);
+                result.passed = result.discrepancies.is_empty();
+            }
+            Ok(None) => {
+                result
+                    .discrepancies
+                    .push("snapshot not found after persist".to_string());
+            }
+            Err(e) => {
+                result
+                    .discrepancies
+                    .push(format!("failed to load snapshot: {e}"));
+            }
+        }
+
+        result
+    }
+}
diff --git a/crates/khive-retrieval/src/persist/mod.rs b/crates/khive-retrieval/src/persist/mod.rs
new file mode 100644
index 00000000..40d4e678
--- /dev/null
+++ b/crates/khive-retrieval/src/persist/mod.rs
@@ -0,0 +1,318 @@
+//! Retrieval index persistence using SQLite.
+//!
+//! This module provides SQLite-based persistence for HNSW and BM25 indexes,
+//! following the write-through pattern established in khive-engine:
+//!
+//! 1. Persist snapshots to SQLite (point of no return)
+//! 2. Rebuild in-memory indexes on cold start
+//!
+//! # Architecture
+//!
+//! ```text
+//! HnswIndex ──snapshot──> HnswSnapshot ──serialize──> SQLite BLOB
+//!                                                         │
+//! HnswIndex <──restore───────────────────────────────────┘
+//! ```
+//!
+//! # Feature Flag
+//!
+//! This module requires the `persist` feature flag:
+//!
+//! ```toml
+//! khive-retrieval = { path = "../khive-retrieval", features = ["persist"] }
+//! ```
+//!
+//! # Example
+//!
+//! ```rust,no_run
+//! use khive_retrieval::persist::RetrievalPersistence;
+//! use khive_retrieval::hnsw::HnswIndex;
+//! use rusqlite::Connection;
+//! use std::sync::Arc;
+//! use tokio::sync::Mutex;
+//!
+//! async fn example() -> Result<(), Box<dyn std::error::Error>> {
+//!     // Open a file-based SQLite connection
+//!     let conn = Connection::open("retrieval.db")?;
+//!     let conn = Arc::new(Mutex::new(conn));
+//!
+//!     let persist = RetrievalPersistence::new(conn, "default");
+//!
+//!     // Initialize schema before use
+//!     persist.init_schema().await?;
+//!
+//!     // Persist an HNSW index
+//!     let index = HnswIndex::new(384);
+//!     persist.persist_hnsw_snapshot(&index).await?;
+//!
+//!     // Restore on cold start
+//!     if let Some(snapshot) = persist.load_hnsw_snapshot().await? {
+//!         // Rebuild index from snapshot
+//!     }
+//!     Ok(())
+//! }
+//! ```
+
+use std::sync::Arc;
+
+use rusqlite::Connection;
+use serde::{de::DeserializeOwned, Serialize};
+use thiserror::Error;
+use tokio::sync::Mutex;
+
+mod bm25;
+mod hnsw;
+mod shadow;
+
+#[cfg(test)]
+mod tests;
+
+pub use shadow::{ShadowMetrics, ShadowValidationConfig, ShadowValidationResult};
+
+/// Errors that can occur during retrieval persistence operations.
+#[derive(Error, Debug)]
+pub enum PersistError {
+    /// SQLite operation failed.
+    #[error("SQLite error: {0}")]
+    Sqlite(#[from] rusqlite::Error),
+
+    /// Serialization failed.
+    #[error("Serialization error: {0}")]
+    Serialize(String),
+
+    /// Deserialization failed.
+    #[error("Deserialization error: {0}")]
+    Deserialize(String),
+
+    /// Spawn blocking task failed.
+    #[error("Task join error: {0}")]
+    TaskJoin(String),
+
+    /// Snapshot verification failed.
+    #[error("Snapshot verification failed: {0}")]
+    SnapshotVerification(String),
+
+    /// Validation error (e.g. empty namespace, out-of-range parameter).
+    #[error("Validation error: {0}")]
+    Validation(String),
+
+    /// Task join error from spawn_blocking.
+    #[error("Blocking task failed: {0}")]
+    BlockingJoin(String),
+
+    /// JoinError from tokio spawn_blocking (auto-converted).
+    #[error("Tokio join error: {0}")]
+    Join(#[from] tokio::task::JoinError),
+
+    /// Internal error (generic, for ported engine code).
+    #[error("Internal error: {0}")]
+    Internal(String),
+
+    /// Embedding error (for ported engine code).
+    #[error("Embedding error: {0}")]
+    Embedding(String),
+
+    /// Retrieval error (for ported engine code).
+    #[error("Retrieval error: {0}")]
+    Retrieval(String),
+}
+
+/// Retrieval index persistence using SQLite.
+///
+/// Provides methods to persist and restore HNSW and BM25 index snapshots
+/// to/from SQLite. Uses the write-through pattern from khive-engine.
+pub struct RetrievalPersistence {
+    /// SQLite connection (thread-safe via async mutex).
+    pub(crate) conn: Arc<Mutex<Connection>>,
+    /// Namespace for multi-tenancy.
+    /// Uses Arc<str> for O(1) cloning in async spawn contexts.
+    pub(crate) namespace: Arc<str>,
+}
+
+impl RetrievalPersistence {
+    /// Create a new persistence layer.
+    ///
+    /// # Arguments
+    ///
+    /// * `conn` - Arc-wrapped SQLite connection
+    /// * `namespace` - Namespace for multi-tenancy isolation
+    pub fn new(conn: Arc<Mutex<Connection>>, namespace: impl Into<String>) -> Self {
+        Self {
+            conn,
+            namespace: Arc::from(namespace.into()),
+        }
+    }
+
+    /// Initialize the persistence schema.
+    ///
+    /// Creates tables for index snapshots if they don't exist.
+    pub async fn init_schema(&self) -> Result<(), PersistError> {
+        let conn = self.conn.clone();
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            conn.execute_batch(
+                r#"
+                CREATE TABLE IF NOT EXISTS retrieval_snapshots (
+                    namespace   TEXT NOT NULL,
+                    index_type  TEXT NOT NULL,
+                    snapshot    BLOB NOT NULL,
+                    created_at  INTEGER NOT NULL,
+                    PRIMARY KEY (namespace, index_type)
+                );
+
+                CREATE INDEX IF NOT EXISTS idx_retrieval_snapshots_namespace
+                    ON retrieval_snapshots(namespace);
+                "#,
+            )?;
+            Ok(())
+        })
+        .await
+        .map_err(|e| PersistError::TaskJoin(e.to_string()))?
+    }
+
+    /// Generic snapshot persistence.
+    pub(crate) async fn persist_snapshot<T: Serialize + Send + Sync>(
+        &self,
+        index_type: &str,
+        snapshot: &T,
+    ) -> Result<(), PersistError> {
+        let data =
+            serde_json::to_vec(snapshot).map_err(|e| PersistError::Serialize(e.to_string()))?;
+
+        let conn = self.conn.clone();
+        let namespace = self.namespace.clone();
+        let index_type = index_type.to_string();
+
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            conn.execute(
+                r#"
+                INSERT OR REPLACE INTO retrieval_snapshots
+                    (namespace, index_type, snapshot, created_at)
+                VALUES
+                    (?1, ?2, ?3, ?4)
+                "#,
+                rusqlite::params![
+                    &*namespace,
+                    index_type,
+                    data,
+                    chrono::Utc::now().timestamp_micros()
+                ],
+            )?;
+            Ok(())
+        })
+        .await
+        .map_err(|e| PersistError::TaskJoin(e.to_string()))?
+    }
+
+    /// Generic snapshot loading.
+    pub(crate) async fn load_snapshot<T: DeserializeOwned + Send + 'static>(
+        &self,
+        index_type: &str,
+    ) -> Result<Option<T>, PersistError> {
+        let conn = self.conn.clone();
+        let namespace = self.namespace.clone();
+        let index_type = index_type.to_string();
+
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            let mut stmt = conn.prepare(
+                r#"
+                SELECT snapshot FROM retrieval_snapshots
+                WHERE namespace = ?1 AND index_type = ?2
+                "#,
+            )?;
+
+            let result: Option<Vec<u8>> = match stmt
+                .query_row(rusqlite::params![&*namespace, index_type], |row| row.get(0))
+            {
+                Ok(data) => Some(data),
+                Err(rusqlite::Error::QueryReturnedNoRows) => None,
+                Err(e) => return Err(PersistError::Sqlite(e)),
+            };
+
+            match result {
+                Some(data) => {
+                    let snapshot: T = serde_json::from_slice(&data)
+                        .map_err(|e| PersistError::Deserialize(e.to_string()))?;
+                    Ok(Some(snapshot))
+                }
+                None => Ok(None),
+            }
+        })
+        .await
+        .map_err(|e| PersistError::TaskJoin(e.to_string()))?
+    }
+
+    /// Delete all snapshots for this namespace.
+    pub async fn clear(&self) -> Result<(), PersistError> {
+        let conn = self.conn.clone();
+        let namespace = self.namespace.clone();
+
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            conn.execute(
+                "DELETE FROM retrieval_snapshots WHERE namespace = ?1",
+                rusqlite::params![&*namespace],
+            )?;
+            Ok(())
+        })
+        .await
+        .map_err(|e| PersistError::TaskJoin(e.to_string()))?
+    }
+
+    /// Get persistence statistics.
+    pub async fn stats(&self) -> Result<PersistenceStats, PersistError> {
+        let conn = self.conn.clone();
+        let namespace = self.namespace.clone();
+
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            let mut stmt = conn.prepare(
+                r#"
+                SELECT index_type, length(snapshot), created_at
+                FROM retrieval_snapshots
+                WHERE namespace = ?1
+                "#,
+            )?;
+
+            let mut stats = PersistenceStats::default();
+            let mut rows = stmt.query(rusqlite::params![&*namespace])?;
+
+            while let Some(row) = rows.next()? {
+                let index_type: String = row.get(0)?;
+                let size: i64 = row.get(1)?;
+                let created_at: i64 = row.get(2)?;
+
+                match index_type.as_str() {
+                    "hnsw" => {
+                        stats.hnsw_snapshot_size = size as usize;
+                        stats.hnsw_snapshot_at = Some(created_at);
+                    }
+                    "bm25" => {
+                        stats.bm25_snapshot_size = size as usize;
+                        stats.bm25_snapshot_at = Some(created_at);
+                    }
+                    _ => {}
+                }
+            }
+
+            Ok(stats)
+        })
+        .await
+        .map_err(|e| PersistError::TaskJoin(e.to_string()))?
+    }
+}
+
+/// Statistics about persisted snapshots.
+#[derive(Debug, Default, Clone)]
+pub struct PersistenceStats {
+    /// Size of HNSW snapshot in bytes.
+    pub hnsw_snapshot_size: usize,
+    /// Timestamp when HNSW snapshot was created (Unix seconds).
+    pub hnsw_snapshot_at: Option<i64>,
+    /// Size of BM25 snapshot in bytes.
+    pub bm25_snapshot_size: usize,
+    /// Timestamp when BM25 snapshot was created (Unix seconds).
+    pub bm25_snapshot_at: Option<i64>,
+}
diff --git a/crates/khive-retrieval/src/persist/shadow.rs b/crates/khive-retrieval/src/persist/shadow.rs
new file mode 100644
index 00000000..a7814ec7
--- /dev/null
+++ b/crates/khive-retrieval/src/persist/shadow.rs
@@ -0,0 +1,105 @@
+//! Shadow validation types and helpers for persistence integrity checking.
+//!
+//! Shadow validation (Issue #628) verifies persisted snapshots can be correctly
+//! restored without blocking production operations. Discrepancies are logged only.
+
+use rand::Rng;
+
+// ---------------------------------------------------------------------------
+// Shadow Validation (Issue #628)
+// ---------------------------------------------------------------------------
+
+/// Configuration for shadow validation.
+///
+/// Shadow validation verifies persisted snapshots can be correctly restored
+/// without blocking production operations. Discrepancies are logged only.
+#[derive(Debug, Clone)]
+pub struct ShadowValidationConfig {
+    /// Whether shadow validation is enabled.
+    pub enabled: bool,
+    /// Sample rate for validation (0.0 to 1.0).
+    /// Set to 1.0 to validate every persist operation.
+    pub sample_rate: f64,
+}
+
+impl Default for ShadowValidationConfig {
+    fn default() -> Self {
+        Self {
+            enabled: false,
+            sample_rate: 0.1, // 10% sample rate by default
+        }
+    }
+}
+
+impl ShadowValidationConfig {
+    /// Enable shadow validation with full coverage.
+    pub fn enabled() -> Self {
+        Self {
+            enabled: true,
+            sample_rate: 1.0,
+        }
+    }
+
+    /// Enable shadow validation with a specific sample rate.
+    pub fn with_sample_rate(rate: f64) -> Self {
+        Self {
+            enabled: true,
+            sample_rate: rate.clamp(0.0, 1.0),
+        }
+    }
+}
+
+/// Result of shadow validation.
+#[derive(Debug, Clone)]
+pub struct ShadowValidationResult {
+    /// Whether validation passed.
+    pub passed: bool,
+    /// Index type that was validated.
+    pub index_type: String,
+    /// Expected metrics from the original index.
+    pub expected: ShadowMetrics,
+    /// Actual metrics from the restored snapshot.
+    pub actual: Option<ShadowMetrics>,
+    /// Discrepancies found (empty if validation passed).
+    pub discrepancies: Vec<String>,
+}
+
+/// Metrics captured for shadow validation comparison.
+#[derive(Debug, Clone, Default)]
+pub struct ShadowMetrics {
+    /// Total number of items in the index.
+    pub item_count: usize,
+    /// Number of tombstoned/deleted items (HNSW only).
+    pub tombstone_count: usize,
+    /// Snapshot size in bytes.
+    pub snapshot_size: usize,
+}
+
+/// Determine whether to sample this operation for validation.
+pub(crate) fn should_sample(rate: f64) -> bool {
+    if rate >= 1.0 {
+        return true;
+    }
+    if rate <= 0.0 {
+        return false;
+    }
+    rand::thread_rng().gen::<f64>() < rate
+}
+
+/// Log the validation result (logging-only, non-blocking).
+///
+/// This function logs discrepancies but never blocks or returns errors.
+/// In production, this should integrate with the application's logging
+/// infrastructure (e.g., tracing crate).
+pub(crate) fn log_validation_result(result: &ShadowValidationResult) {
+    // Only log failures - successful validations are silent by default
+    // to avoid log noise. The result is still returned to callers who
+    // may want to record metrics or take other actions.
+    if !result.passed {
+        tracing::warn!(
+            index_type = %result.index_type,
+            discrepancies = ?result.discrepancies,
+            "Shadow validation failed"
+        );
+    }
+}
diff --git a/crates/khive-retrieval/src/persist/tests.rs b/crates/khive-retrieval/src/persist/tests.rs
new file mode 100644
index 00000000..2efdf72d
--- /dev/null
+++ b/crates/khive-retrieval/src/persist/tests.rs
@@ -0,0 +1,1214 @@
+use super::*;
+use khive_bm25::Bm25Index;
+use khive_hnsw::HnswIndex;
+use rusqlite::Connection;
+use std::sync::Arc;
+use tokio::sync::Mutex;
+
+async fn setup_test_persistence() -> RetrievalPersistence {
+    let conn = Connection::open_in_memory().expect("open in-memory db");
+    conn.execute_batch("PRAGMA journal_mode=WAL; PRAGMA synchronous=NORMAL;")
+        .expect("set pragmas");
+    let persist = RetrievalPersistence::new(Arc::new(Mutex::new(conn)), "test");
+    persist.init_schema().await.expect("init schema");
+    persist
+}
+
+#[tokio::test]
+async fn test_persist_and_load_bm25() {
+    let persist = setup_test_persistence().await;
+
+    // Create and persist a BM25 index
+    let mut index = Bm25Index::default();
+    index
+        .index_document("doc1", "hello world")
+        .expect("index doc");
+    index
+        .index_document("doc2", "goodbye world")
+        .expect("index doc");
+
+    persist.persist_bm25_index(&index).await.expect("persist");
+
+    // Load and verify
+    let loaded = persist.load_bm25_index().await.expect("load");
+    assert!(loaded.is_some());
+    let loaded = loaded.unwrap();
+    assert_eq!(loaded.doc_count(), 2);
+}
+
+#[tokio::test]
+async fn test_persist_and_load_hnsw() {
+    let persist = setup_test_persistence().await;
+
+    // Create and persist an HNSW index with some vectors
+    let mut index = HnswIndex::new(4); // 4 dimensions
+
+    // Insert a few vectors
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+    index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+    index.insert(id3, vec![0.0, 0.0, 1.0, 0.0]).expect("insert");
+
+    assert_eq!(index.len(), 3);
+
+    // Persist the snapshot
+    persist
+        .persist_hnsw_snapshot(&index)
+        .await
+        .expect("persist");
+
+    // Load and verify the snapshot
+    let loaded = persist.load_hnsw_snapshot().await.expect("load");
+    assert!(loaded.is_some());
+    let snapshot = loaded.unwrap();
+
+    // Verify snapshot contains correct metadata
+    assert_eq!(snapshot.total_nodes, 3);
+    assert_eq!(snapshot.live_nodes, 3);
+    assert_eq!(snapshot.tombstone_count, 0);
+    assert_eq!(snapshot.indexed_ids.len(), 3);
+    assert!(snapshot.indexed_ids.contains(&id1));
+    assert!(snapshot.indexed_ids.contains(&id2));
+    assert!(snapshot.indexed_ids.contains(&id3));
+}
+
+#[tokio::test]
+async fn test_stats() {
+    let persist = setup_test_persistence().await;
+
+    // Initially empty
+    let stats = persist.stats().await.expect("stats");
+    assert_eq!(stats.hnsw_snapshot_size, 0);
+    assert_eq!(stats.bm25_snapshot_size, 0);
+
+    // Persist BM25
+    let index = Bm25Index::default();
+    persist.persist_bm25_index(&index).await.expect("persist");
+
+    // Check stats
+    let stats = persist.stats().await.expect("stats");
+    assert!(stats.bm25_snapshot_size > 0);
+    assert!(stats.bm25_snapshot_at.is_some());
+}
+
+#[tokio::test]
+async fn test_clear() {
+    let persist = setup_test_persistence().await;
+
+    // Persist something
+    let index = Bm25Index::default();
+    persist.persist_bm25_index(&index).await.expect("persist");
+
+    // Clear
+    persist.clear().await.expect("clear");
+
+    // Should be gone
+    let loaded = persist.load_bm25_index().await.expect("load");
+    assert!(loaded.is_none());
+}
+
+// -- Shadow validation tests --
+
+#[tokio::test]
+async fn test_shadow_validation_config_default() {
+    let config = ShadowValidationConfig::default();
+    assert!(!config.enabled);
+    assert!((config.sample_rate - 0.1).abs() < f64::EPSILON);
+}
+
+#[tokio::test]
+async fn test_shadow_validation_config_enabled() {
+    let config = ShadowValidationConfig::enabled();
+    assert!(config.enabled);
+    assert!((config.sample_rate - 1.0).abs() < f64::EPSILON);
+}
+
+#[tokio::test]
+async fn test_shadow_validation_config_sample_rate() {
+    let config = ShadowValidationConfig::with_sample_rate(0.5);
+    assert!(config.enabled);
+    assert!((config.sample_rate - 0.5).abs() < f64::EPSILON);
+
+    // Test clamping
+    let config = ShadowValidationConfig::with_sample_rate(1.5);
+    assert!((config.sample_rate - 1.0).abs() < f64::EPSILON);
+
+    let config = ShadowValidationConfig::with_sample_rate(-0.5);
+    assert!((config.sample_rate - 0.0).abs() < f64::EPSILON);
+}
+
+#[tokio::test]
+async fn test_bm25_shadow_validation_passes() {
+    let persist = setup_test_persistence().await;
+    let config = ShadowValidationConfig::enabled();
+
+    // Create and persist a BM25 index with validation
+    let mut index = Bm25Index::default();
+    index
+        .index_document("doc1", "hello world")
+        .expect("index doc");
+    index
+        .index_document("doc2", "goodbye world")
+        .expect("index doc");
+
+    let result = persist
+        .persist_bm25_with_validation(&index, &config)
+        .await
+        .expect("persist with validation");
+
+    assert!(result.is_some());
+    let validation = result.unwrap();
+    assert!(
+        validation.passed,
+        "validation should pass: {:?}",
+        validation.discrepancies
+    );
+    assert_eq!(validation.index_type, "bm25");
+    assert_eq!(validation.expected.item_count, 2);
+    assert!(validation.discrepancies.is_empty());
+}
+
+#[tokio::test]
+async fn test_shadow_validation_skipped_when_disabled() {
+    let persist = setup_test_persistence().await;
+    let config = ShadowValidationConfig::default(); // disabled
+
+    let index = Bm25Index::default();
+    let result = persist
+        .persist_bm25_with_validation(&index, &config)
+        .await
+        .expect("persist");
+
+    // Validation should be skipped
+    assert!(result.is_none());
+
+    // But the persist should still work
+    let loaded = persist.load_bm25_index().await.expect("load");
+    assert!(loaded.is_some());
+}
+
+#[tokio::test]
+async fn test_should_sample() {
+    use super::shadow::should_sample;
+
+    // Always sample at 1.0
+    assert!(should_sample(1.0));
+    assert!(should_sample(1.5)); // clamped to 1.0
+
+    // Never sample at 0.0
+    assert!(!should_sample(0.0));
+    assert!(!should_sample(-0.5)); // clamped to 0.0
+}
+
+// -- Issue #865: HNSW shadow validation test --
+
+#[tokio::test]
+async fn test_hnsw_shadow_validation_passes() {
+    let persist = setup_test_persistence().await;
+    let config = ShadowValidationConfig::enabled();
+
+    // Create an HNSW index with vectors
+    let mut index = HnswIndex::new(4);
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+    index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+
+    let result = persist
+        .persist_hnsw_with_validation(&index, &config)
+        .await
+        .expect("persist with validation");
+
+    assert!(result.is_some());
+    let validation = result.unwrap();
+    assert!(
+        validation.passed,
+        "validation should pass: {:?}",
+        validation.discrepancies
+    );
+    assert_eq!(validation.index_type, "hnsw");
+    assert_eq!(validation.expected.item_count, 2);
+    assert!(validation.discrepancies.is_empty());
+}
+
+#[tokio::test]
+async fn test_hnsw_shadow_validation_with_tombstones() {
+    let persist = setup_test_persistence().await;
+    let config = ShadowValidationConfig::enabled();
+
+    // Create an HNSW index with vectors and tombstones
+    let mut index = HnswIndex::new(4);
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+    index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+    index.insert(id3, vec![0.0, 0.0, 1.0, 0.0]).expect("insert");
+    index.delete(id2); // Tombstone id2
+
+    let result = persist
+        .persist_hnsw_with_validation(&index, &config)
+        .await
+        .expect("persist with validation");
+
+    assert!(result.is_some());
+    let validation = result.unwrap();
+    assert!(
+        validation.passed,
+        "validation should pass with tombstones: {:?}",
+        validation.discrepancies
+    );
+    assert_eq!(validation.expected.item_count, 3); // total_nodes including tombstones
+    assert_eq!(validation.expected.tombstone_count, 1);
+}
+
+// -- Issue #866: Namespace isolation test --
+
+#[tokio::test]
+async fn test_namespace_isolation() {
+    let conn = Connection::open_in_memory().expect("open in-memory db");
+    conn.execute_batch("PRAGMA journal_mode=WAL; PRAGMA synchronous=NORMAL;")
+        .expect("set pragmas");
+    let conn = Arc::new(Mutex::new(conn));
+
+    // Create two persistence layers with different namespaces
+    let persist_ns1 = RetrievalPersistence::new(conn.clone(), "namespace1");
+    let persist_ns2 = RetrievalPersistence::new(conn.clone(), "namespace2");
+
+    // Initialize schema (only needed once since they share the connection)
+    persist_ns1.init_schema().await.expect("init schema");
+
+    // Persist different data to each namespace
+    let mut index1 = Bm25Index::default();
+    index1
+        .index_document("doc1", "namespace one content")
+        .expect("index");
+
+    let mut index2 = Bm25Index::default();
+    index2
+        .index_document("doc2", "namespace two content")
+        .expect("index");
+    index2
+        .index_document("doc3", "more namespace two")
+        .expect("index");
+
+    persist_ns1
+        .persist_bm25_index(&index1)
+        .await
+        .expect("persist ns1");
+    persist_ns2
+        .persist_bm25_index(&index2)
+        .await
+        .expect("persist ns2");
+
+    // Verify each namespace loads its own data
+    let loaded1 = persist_ns1.load_bm25_index().await.expect("load ns1");
+    let loaded2 = persist_ns2.load_bm25_index().await.expect("load ns2");
+
+    assert!(loaded1.is_some());
+    assert!(loaded2.is_some());
+    assert_eq!(loaded1.unwrap().doc_count(), 1);
+    assert_eq!(loaded2.unwrap().doc_count(), 2);
+
+    // Clear one namespace and verify the other is unaffected
+    persist_ns1.clear().await.expect("clear ns1");
+
+    let loaded1_after = persist_ns1
+        .load_bm25_index()
+        .await
+        .expect("load ns1 after clear");
+    let loaded2_after = persist_ns2
+        .load_bm25_index()
+        .await
+        .expect("load ns2 after clear");
+
+    assert!(loaded1_after.is_none(), "ns1 should be cleared");
+    assert!(loaded2_after.is_some(), "ns2 should still exist");
+    assert_eq!(loaded2_after.unwrap().doc_count(), 2);
+}
+
+// -- Issue #868: Corrupted data handling tests --
+
+#[tokio::test]
+async fn test_corrupted_bm25_data_returns_error() {
+    let persist = setup_test_persistence().await;
+
+    // Manually insert corrupted JSON
+    {
+        let conn = persist.conn.clone();
+        let namespace = "test".to_string();
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            conn.execute(
+                r#"
+                INSERT OR REPLACE INTO retrieval_snapshots
+                    (namespace, index_type, snapshot, created_at)
+                VALUES
+                    (?1, 'bm25', ?2, strftime('%s', 'now'))
+                "#,
+                rusqlite::params![namespace, b"not valid json {{{{"],
+            )
+            .expect("insert corrupted");
+        })
+        .await
+        .expect("spawn");
+    }
+
+    // Attempt to load should return an error
+    let result = persist.load_bm25_index().await;
+    assert!(result.is_err(), "loading corrupted data should fail");
+    let err = result.unwrap_err();
+    assert!(matches!(err, PersistError::Deserialize(_)));
+}
+
+#[tokio::test]
+async fn test_corrupted_hnsw_data_returns_error() {
+    let persist = setup_test_persistence().await;
+
+    // Manually insert corrupted JSON
+    {
+        let conn = persist.conn.clone();
+        let namespace = "test".to_string();
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            conn.execute(
+                r#"
+                INSERT OR REPLACE INTO retrieval_snapshots
+                    (namespace, index_type, snapshot, created_at)
+                VALUES
+                    (?1, 'hnsw', ?2, strftime('%s', 'now'))
+                "#,
+                rusqlite::params![namespace, b"truncated json {\"total_nodes\":"],
+            )
+            .expect("insert corrupted");
+        })
+        .await
+        .expect("spawn");
+    }
+
+    // Attempt to load should return an error
+    let result = persist.load_hnsw_snapshot().await;
+    assert!(result.is_err(), "loading corrupted HNSW data should fail");
+    let err = result.unwrap_err();
+    assert!(matches!(err, PersistError::Deserialize(_)));
+}
+
+// -- Issue #868: Additional corrupted data handling tests --
+
+#[tokio::test]
+async fn test_valid_json_wrong_schema_bm25() {
+    let persist = setup_test_persistence().await;
+
+    // Insert valid JSON but wrong schema (missing required fields)
+    {
+        let conn = persist.conn.clone();
+        let namespace = "test".to_string();
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            // Valid JSON but wrong structure for Bm25Index
+            let wrong_schema = br#"{"some_field": "value", "other": 123}"#;
+            conn.execute(
+                r#"
+                INSERT OR REPLACE INTO retrieval_snapshots
+                    (namespace, index_type, snapshot, created_at)
+                VALUES
+                    (?1, 'bm25', ?2, strftime('%s', 'now'))
+                "#,
+                rusqlite::params![namespace, wrong_schema.as_slice()],
+            )
+            .expect("insert wrong schema");
+        })
+        .await
+        .expect("spawn");
+    }
+
+    // Attempt to load should return an error (missing required fields)
+    let result = persist.load_bm25_index().await;
+    assert!(result.is_err(), "loading wrong schema should fail");
+    let err = result.unwrap_err();
+    assert!(matches!(err, PersistError::Deserialize(_)));
+}
+
+#[tokio::test]
+async fn test_valid_json_wrong_schema_hnsw() {
+    let persist = setup_test_persistence().await;
+
+    // Insert valid JSON but wrong schema (missing required fields for HnswSnapshot)
+    {
+        let conn = persist.conn.clone();
+        let namespace = "test".to_string();
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            // Valid JSON but wrong structure for HnswSnapshot
+            let wrong_schema = br#"{"total_nodes": 5, "wrong_field": true}"#;
+            conn.execute(
+                r#"
+                INSERT OR REPLACE INTO retrieval_snapshots
+                    (namespace, index_type, snapshot, created_at)
+                VALUES
+                    (?1, 'hnsw', ?2, strftime('%s', 'now'))
+                "#,
+                rusqlite::params![namespace, wrong_schema.as_slice()],
+            )
+            .expect("insert wrong schema");
+        })
+        .await
+        .expect("spawn");
+    }
+
+    // Attempt to load should return an error (missing required fields)
+    let result = persist.load_hnsw_snapshot().await;
+    assert!(result.is_err(), "loading wrong schema should fail");
+    let err = result.unwrap_err();
+    assert!(matches!(err, PersistError::Deserialize(_)));
+}
+
+#[tokio::test]
+async fn test_empty_blob_returns_error() {
+    let persist = setup_test_persistence().await;
+
+    // Insert empty blob
+    {
+        let conn = persist.conn.clone();
+        let namespace = "test".to_string();
+        tokio::task::spawn_blocking(move || {
+            let conn = conn.blocking_lock();
+            conn.execute(
+                r#"
+                INSERT OR REPLACE INTO retrieval_snapshots
+                    (namespace, index_type, snapshot, created_at)
+                VALUES
+                    (?1, 'bm25', ?2, strftime('%s', 'now'))
+                "#,
+                rusqlite::params![namespace, &[] as &[u8]],
+            )
+            .expect("insert empty blob");
+        })
+        .await
+        .expect("spawn");
+    }
+
+    // Attempt to load should return an error
+    let result = persist.load_bm25_index().await;
+    assert!(result.is_err(), "loading empty blob should fail");
+    let err = result.unwrap_err();
+    assert!(matches!(err, PersistError::Deserialize(_)));
+}
+
+// -- Issue #869: Empty index persistence edge case tests --
+
+#[tokio::test]
+async fn test_empty_bm25_index_persistence() {
+    let persist = setup_test_persistence().await;
+
+    // Persist an empty BM25 index
+    let index = Bm25Index::default();
+    assert_eq!(index.doc_count(), 0);
+
+    persist
+        .persist_bm25_index(&index)
+        .await
+        .expect("persist empty");
+
+    // Load and verify
+    let loaded = persist.load_bm25_index().await.expect("load");
+    assert!(loaded.is_some());
+    let loaded = loaded.unwrap();
+    assert_eq!(loaded.doc_count(), 0, "empty index should remain empty");
+}
+
+#[tokio::test]
+async fn test_empty_hnsw_index_persistence() {
+    let persist = setup_test_persistence().await;
+
+    // Persist an empty HNSW index
+    let index = HnswIndex::new(4);
+    assert_eq!(index.len(), 0);
+
+    persist
+        .persist_hnsw_snapshot(&index)
+        .await
+        .expect("persist empty");
+
+    // Load and verify
+    let loaded = persist.load_hnsw_snapshot().await.expect("load");
+    assert!(loaded.is_some());
+    let snapshot = loaded.unwrap();
+    assert_eq!(
+        snapshot.total_nodes, 0,
+        "empty index snapshot should have 0 nodes"
+    );
+    assert_eq!(snapshot.live_nodes, 0);
+    assert!(snapshot.indexed_ids.is_empty());
+}
+
+#[tokio::test]
+async fn test_empty_hnsw_shadow_validation() {
+    let persist = setup_test_persistence().await;
+    let config = ShadowValidationConfig::enabled();
+
+    // Empty HNSW index
+    let index = HnswIndex::new(4);
+
+    let result = persist
+        .persist_hnsw_with_validation(&index, &config)
+        .await
+        .expect("persist empty with validation");
+
+    assert!(result.is_some());
+    let validation = result.unwrap();
+    assert!(validation.passed, "empty index validation should pass");
+    assert_eq!(validation.expected.item_count, 0);
+}
+
+// -- Issue #867: Test that verify() is called during shadow validation --
+
+#[tokio::test]
+async fn test_hnsw_shadow_validation_calls_verify() {
+    let persist = setup_test_persistence().await;
+    let config = ShadowValidationConfig::enabled();
+
+    // Create an HNSW index with vectors
+    let mut index = HnswIndex::new(4);
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+    index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+    index.insert(id3, vec![0.0, 0.0, 1.0, 0.0]).expect("insert");
+    index.delete(id2); // Create tombstone
+
+    let result = persist
+        .persist_hnsw_with_validation(&index, &config)
+        .await
+        .expect("persist with validation");
+
+    // Validation should pass because verify() succeeds on valid snapshot
+    assert!(result.is_some());
+    let validation = result.unwrap();
+    assert!(
+        validation.passed,
+        "valid snapshot should pass verify(): {:?}",
+        validation.discrepancies
+    );
+    assert_eq!(validation.expected.item_count, 3);
+    assert_eq!(validation.expected.tombstone_count, 1);
+}
+
+// ==========================================================================
+// Issue #1114: HNSW index corruption recovery tests
+// ==========================================================================
+//
+// These tests verify that the persistence layer correctly detects and handles
+// various forms of HNSW index corruption, enabling the engine to recover
+// by rebuilding from source data.
+
+/// Helper: insert raw bytes into the HNSW snapshot slot for a persistence instance.
+async fn inject_raw_hnsw_snapshot(persist: &RetrievalPersistence, data: &[u8]) {
+    let conn = persist.conn.clone();
+    let namespace = persist.namespace.clone();
+    let data = data.to_vec();
+    tokio::task::spawn_blocking(move || {
+        let conn = conn.blocking_lock();
+        conn.execute(
+            r#"
+            INSERT OR REPLACE INTO retrieval_snapshots
+                (namespace, index_type, snapshot, created_at)
+            VALUES
+                (?1, 'hnsw', ?2, strftime('%s', 'now'))
+            "#,
+            rusqlite::params![&*namespace, data],
+        )
+        .expect("inject raw snapshot");
+    })
+    .await
+    .expect("spawn");
+}
+
+/// Helper: build a valid HNSW index with some vectors and persist it.
+async fn build_and_persist_hnsw(persist: &RetrievalPersistence) -> HnswIndex {
+    let mut index = HnswIndex::new(4);
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+    index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+    index.insert(id3, vec![0.0, 0.0, 1.0, 0.0]).expect("insert");
+
+    persist
+        .persist_hnsw_snapshot(&index)
+        .await
+        .expect("persist");
+    index
+}
+
+// -- Test: Truncated HNSW snapshot file --
+//
+// Scenario: The snapshot BLOB in SQLite is truncated (e.g., write was interrupted).
+// Expected: load_hnsw_snapshot returns a Deserialize error, not a panic or corrupt data.
+
+#[tokio::test]
+async fn test_truncated_hnsw_snapshot_detected() {
+    let persist = setup_test_persistence().await;
+
+    // First persist a valid snapshot so we have realistic JSON to truncate
+    build_and_persist_hnsw(&persist).await;
+
+    // Load the valid snapshot and get its serialized form
+    let valid_snapshot = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load valid")
+        .expect("snapshot exists");
+
+    let valid_json = serde_json::to_vec(&valid_snapshot).expect("serialize");
+    assert!(valid_json.len() > 20, "valid JSON should be non-trivial");
+
+    // Truncate at various points to simulate interrupted writes
+    for truncate_at in [1, 10, valid_json.len() / 4, valid_json.len() / 2] {
+        let truncated = &valid_json[..truncate_at];
+        inject_raw_hnsw_snapshot(&persist, truncated).await;
+
+        let result = persist.load_hnsw_snapshot().await;
+        assert!(
+            result.is_err(),
+            "truncated snapshot (at byte {truncate_at}) should fail to load"
+        );
+        let err = result.unwrap_err();
+        assert!(
+            matches!(err, PersistError::Deserialize(_)),
+            "should be a Deserialize error, got: {err:?}"
+        );
+    }
+}
+
+// -- Test: Corrupted bytes in HNSW snapshot --
+//
+// Scenario: Random byte corruption in the snapshot BLOB (e.g., disk bit flip).
+// Expected: Deserialization fails or snapshot verify() catches inconsistency.
+
+#[tokio::test]
+async fn test_corrupted_bytes_in_hnsw_snapshot_detected() {
+    let persist = setup_test_persistence().await;
+
+    // Build and persist a valid snapshot
+    build_and_persist_hnsw(&persist).await;
+
+    let valid_snapshot = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load valid")
+        .expect("snapshot exists");
+
+    let mut corrupted_json = serde_json::to_vec(&valid_snapshot).expect("serialize");
+
+    // Corrupt bytes in the middle of the JSON (likely to break structure)
+    let mid = corrupted_json.len() / 2;
+    for i in mid..mid.saturating_add(10).min(corrupted_json.len()) {
+        corrupted_json[i] = 0xFF;
+    }
+
+    inject_raw_hnsw_snapshot(&persist, &corrupted_json).await;
+
+    let result = persist.load_hnsw_snapshot().await;
+
+    // The corrupted JSON should either fail to deserialize or produce
+    // a snapshot that fails verification. Either outcome is acceptable
+    // as long as we don't silently return corrupt data.
+    match result {
+        Err(PersistError::Deserialize(_)) => {
+            // Good: deserialization caught it
+        }
+        Ok(Some(snapshot)) => {
+            // If it deserialized, verify() should catch the inconsistency
+            // (corrupted counts, missing IDs, etc.)
+            let verify_result = snapshot.verify();
+            // Even if verify passes (unlikely with random corruption), we accept it
+            // because the snapshot's data fields would be garbled. The key invariant
+            // is that we don't panic or produce silently wrong results.
+            let _ = verify_result;
+        }
+        Ok(None) => {
+            panic!("snapshot was injected, should not return None");
+        }
+        Err(other) => {
+            panic!("unexpected error variant: {other:?}");
+        }
+    }
+}
+
+// -- Test: Missing HNSW snapshot (no row in SQLite) --
+//
+// Scenario: The snapshot row doesn't exist (e.g., first boot, or snapshot was
+//           deleted/cleared). Engine should detect this and rebuild from source.
+
+#[tokio::test]
+async fn test_missing_hnsw_snapshot_returns_none() {
+    let persist = setup_test_persistence().await;
+
+    // No snapshot has been persisted yet
+    let result = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load should not error");
+    assert!(
+        result.is_none(),
+        "missing snapshot should return None, not error"
+    );
+}
+
+#[tokio::test]
+async fn test_missing_hnsw_snapshot_after_clear_returns_none() {
+    let persist = setup_test_persistence().await;
+
+    // Persist a valid snapshot
+    build_and_persist_hnsw(&persist).await;
+
+    // Verify it exists
+    let loaded = persist.load_hnsw_snapshot().await.expect("load");
+    assert!(loaded.is_some(), "snapshot should exist before clear");
+
+    // Clear all snapshots (simulating data loss / recovery scenario)
+    persist.clear().await.expect("clear");
+
+    // Now loading should return None
+    let after_clear = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load after clear");
+    assert!(
+        after_clear.is_none(),
+        "snapshot should be None after clear, enabling rebuild from source"
+    );
+}
+
+// -- Test: HNSW snapshot with internally inconsistent state --
+//
+// Scenario: Snapshot deserializes successfully but has corrupted internal state
+//           (e.g., total_nodes doesn't match indexed_ids count). This simulates
+//           a partial write or in-memory corruption before serialization.
+
+#[tokio::test]
+async fn test_inconsistent_hnsw_snapshot_detected_by_verify() {
+    use khive_hnsw::{HnswCheckpointConfig, HnswSnapshot};
+
+    let persist = setup_test_persistence().await;
+
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+
+    // Create a snapshot where total_nodes doesn't match indexed_ids.len()
+    let bad_snapshot = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 5, // WRONG: says 5 but only 2 IDs
+        live_nodes: 5,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: HnswCheckpointConfig {
+            m: 16,
+            ef_construction: 200,
+            metric: "cosine".to_string(),
+        },
+        indexed_ids: vec![id1, id2], // Only 2 IDs
+        tombstoned_ids: vec![],
+        layers: vec![vec![(id1, vec![id2]), (id2, vec![id1])]],
+
+        vectors: vec![],
+    };
+
+    // Persist it (persistence layer doesn't validate, just serializes)
+    let data = serde_json::to_vec(&bad_snapshot).expect("serialize");
+    inject_raw_hnsw_snapshot(&persist, &data).await;
+
+    // Load succeeds (it's valid JSON with correct schema)
+    let loaded = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load should succeed for valid JSON");
+    assert!(loaded.is_some(), "snapshot should load");
+
+    let snapshot = loaded.unwrap();
+
+    // But verify() detects the inconsistency
+    let verify_result = snapshot.verify();
+    assert!(
+        verify_result.is_err(),
+        "verify should catch total_nodes != indexed_ids.len()"
+    );
+}
+
+#[tokio::test]
+async fn test_tombstone_inconsistency_detected_by_verify() {
+    use khive_hnsw::{HnswCheckpointConfig, HnswSnapshot};
+
+    let persist = setup_test_persistence().await;
+
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+    let id_phantom = NodeId::new([99; 16]);
+
+    // Snapshot claims id_phantom is tombstoned but it's not in indexed_ids
+    let bad_snapshot = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 2,
+        tombstone_count: 1,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: HnswCheckpointConfig {
+            m: 16,
+            ef_construction: 200,
+            metric: "cosine".to_string(),
+        },
+        indexed_ids: vec![id1, id2, id3],
+        tombstoned_ids: vec![id_phantom], // NOT in indexed_ids
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    let data = serde_json::to_vec(&bad_snapshot).expect("serialize");
+    inject_raw_hnsw_snapshot(&persist, &data).await;
+
+    let loaded = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load")
+        .expect("snapshot exists");
+
+    let verify_result = loaded.verify();
+    assert!(
+        verify_result.is_err(),
+        "verify should catch tombstoned ID not in indexed_ids"
+    );
+}
+
+// -- Test: Shadow validation catches corrupted snapshot state --
+//
+// Scenario: Snapshot is persisted correctly, then corrupted in-place in SQLite.
+//           Shadow validation (read-back) should detect the corruption.
+
+#[tokio::test]
+async fn test_shadow_validation_detects_corruption() {
+    let persist = setup_test_persistence().await;
+
+    // Build and persist valid index
+    let mut index = HnswIndex::new(4);
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+    index.insert(id2, vec![0.0, 1.0, 0.0, 0.0]).expect("insert");
+
+    persist
+        .persist_hnsw_snapshot(&index)
+        .await
+        .expect("persist");
+
+    // Now corrupt the stored data in-place
+    inject_raw_hnsw_snapshot(&persist, b"not valid json at all {{{").await;
+
+    // Shadow validation should detect the corruption
+    let expected = ShadowMetrics {
+        item_count: 2,
+        tombstone_count: 0,
+        snapshot_size: 0,
+    };
+
+    let result = persist.validate_hnsw_snapshot(expected).await;
+    assert!(
+        !result.passed,
+        "shadow validation should fail on corrupted data"
+    );
+    assert!(
+        !result.discrepancies.is_empty(),
+        "should report discrepancies"
+    );
+}
+
+// -- Test: Full recovery workflow --
+//
+// Scenario: Snapshot is corrupted. Engine detects via load failure, clears the
+//           corrupt entry, and rebuilds from source vectors. After rebuild,
+//           the new snapshot is valid.
+
+#[tokio::test]
+async fn test_full_recovery_workflow_corrupt_then_rebuild() {
+    let persist = setup_test_persistence().await;
+
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+
+    let vectors = vec![
+        (id1, vec![1.0, 0.0, 0.0, 0.0]),
+        (id2, vec![0.0, 1.0, 0.0, 0.0]),
+        (id3, vec![0.0, 0.0, 1.0, 0.0]),
+    ];
+
+    // Step 1: Build and persist a valid index
+    {
+        let mut index = HnswIndex::new(4);
+        for (id, vec) in &vectors {
+            index.insert(*id, vec.clone()).expect("insert");
+        }
+        persist
+            .persist_hnsw_snapshot(&index)
+            .await
+            .expect("persist");
+    }
+
+    // Step 2: Corrupt the snapshot
+    inject_raw_hnsw_snapshot(&persist, b"corrupted snapshot data").await;
+
+    // Step 3: Attempt to load -- should fail
+    let load_result = persist.load_hnsw_snapshot().await;
+    assert!(
+        load_result.is_err(),
+        "loading corrupted snapshot should fail"
+    );
+
+    // Step 4: Recovery -- clear corrupt data
+    persist.clear().await.expect("clear corrupted data");
+
+    // Step 5: Verify cleared
+    let after_clear = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load after clear");
+    assert!(after_clear.is_none(), "snapshot should be gone after clear");
+
+    // Step 6: Rebuild index from source vectors
+    let mut rebuilt_index = HnswIndex::new(4);
+    for (id, vec) in &vectors {
+        rebuilt_index.insert(*id, vec.clone()).expect("re-insert");
+    }
+
+    assert_eq!(
+        rebuilt_index.len(),
+        3,
+        "rebuilt index should have 3 vectors"
+    );
+
+    // Step 7: Persist the rebuilt index
+    persist
+        .persist_hnsw_snapshot(&rebuilt_index)
+        .await
+        .expect("persist rebuilt");
+
+    // Step 8: Verify the new snapshot is valid
+    let new_snapshot = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load rebuilt")
+        .expect("snapshot exists");
+
+    assert_eq!(new_snapshot.total_nodes, 3);
+    assert_eq!(new_snapshot.live_nodes, 3);
+    assert!(
+        new_snapshot.verify().is_ok(),
+        "rebuilt snapshot should pass verification"
+    );
+}
+
+// -- Test: Recovery from inconsistent snapshot via verify-then-rebuild --
+//
+// Scenario: Snapshot loads but fails verify(). Engine should detect this and
+//           trigger rebuild rather than using the corrupt topology.
+
+#[tokio::test]
+async fn test_recovery_from_inconsistent_snapshot_via_verify() {
+    use khive_hnsw::{HnswCheckpointConfig, HnswSnapshot};
+
+    let persist = setup_test_persistence().await;
+
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+    let id3 = NodeId::new([3; 16]);
+
+    // Inject a snapshot with mismatched tombstone counts
+    let bad_snapshot = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 3,
+        live_nodes: 1,
+        tombstone_count: 2, // Claims 2 tombstones
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: HnswCheckpointConfig {
+            m: 16,
+            ef_construction: 200,
+            metric: "cosine".to_string(),
+        },
+        indexed_ids: vec![id1, id2, id3],
+        tombstoned_ids: vec![id2], // Only 1 tombstone ID (mismatch!)
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    let data = serde_json::to_vec(&bad_snapshot).expect("serialize");
+    inject_raw_hnsw_snapshot(&persist, &data).await;
+
+    // Load succeeds
+    let loaded = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load")
+        .expect("snapshot exists");
+
+    // But verify catches the corruption
+    let verify_err = loaded.verify().unwrap_err();
+    let err_msg = verify_err.to_string();
+    assert!(
+        err_msg.contains("tombstoned_ids count mismatch"),
+        "should report tombstone count mismatch, got: {err_msg}"
+    );
+
+    // Recovery: clear and rebuild
+    persist.clear().await.expect("clear");
+
+    let mut rebuilt = HnswIndex::new(4);
+    rebuilt
+        .insert(id1, vec![1.0, 0.0, 0.0, 0.0])
+        .expect("insert");
+    rebuilt
+        .insert(id2, vec![0.0, 1.0, 0.0, 0.0])
+        .expect("insert");
+    rebuilt
+        .insert(id3, vec![0.0, 0.0, 1.0, 0.0])
+        .expect("insert");
+
+    persist
+        .persist_hnsw_snapshot(&rebuilt)
+        .await
+        .expect("persist rebuilt");
+
+    let new_snapshot = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load")
+        .expect("snapshot exists");
+    assert!(
+        new_snapshot.verify().is_ok(),
+        "rebuilt snapshot should be valid"
+    );
+}
+
+// -- Test: Restore from snapshot detects corrupt snapshot --
+//
+// Scenario: An index tries to restore_from_snapshot with a corrupt snapshot.
+//           The restore should fail with an error, not silently use bad data.
+
+#[tokio::test]
+async fn test_restore_from_corrupt_snapshot_fails() {
+    use khive_hnsw::{HnswCheckpointConfig, HnswSnapshot};
+
+    let id1 = NodeId::new([1; 16]);
+    let id2 = NodeId::new([2; 16]);
+
+    let mut index = HnswIndex::new(4);
+
+    // Create a corrupt snapshot (total_nodes mismatch)
+    let bad_snapshot = HnswSnapshot {
+        vector_count: 0,
+        total_nodes: 10, // WRONG
+        live_nodes: 10,
+        tombstone_count: 0,
+        max_layer: 0,
+        entry_point: Some(id1),
+        config: HnswCheckpointConfig {
+            m: 16,
+            ef_construction: 200,
+            metric: "cosine".to_string(),
+        },
+        indexed_ids: vec![id1, id2], // Only 2
+        tombstoned_ids: vec![],
+        layers: vec![],
+
+        vectors: vec![],
+    };
+
+    let vectors: std::collections::HashMap<NodeId, Vec<f32>> = [
+        (id1, vec![1.0, 0.0, 0.0, 0.0]),
+        (id2, vec![0.0, 1.0, 0.0, 0.0]),
+    ]
+    .into_iter()
+    .collect();
+
+    let result = index.restore_from_snapshot(&bad_snapshot, &vectors);
+    assert!(
+        result.is_err(),
+        "restore_from_snapshot should reject corrupt snapshot"
+    );
+    let err_msg = result.unwrap_err().to_string();
+    assert!(
+        err_msg.contains("Invalid snapshot"),
+        "error should mention invalid snapshot, got: {err_msg}"
+    );
+}
+
+// -- Test: Binary garbage as HNSW snapshot --
+//
+// Scenario: Random non-JSON binary data in the snapshot slot (e.g., disk corruption
+//           that overwrites the entire BLOB).
+
+#[tokio::test]
+async fn test_binary_garbage_hnsw_snapshot_detected() {
+    let persist = setup_test_persistence().await;
+
+    // Insert pure binary garbage
+    let garbage: Vec<u8> = (0..256).map(|i| i as u8).collect();
+    inject_raw_hnsw_snapshot(&persist, &garbage).await;
+
+    let result = persist.load_hnsw_snapshot().await;
+    assert!(result.is_err(), "binary garbage should fail to deserialize");
+    let err = result.unwrap_err();
+    assert!(
+        matches!(err, PersistError::Deserialize(_)),
+        "should be Deserialize error, got: {err:?}"
+    );
+}
+
+// -- Test: Overwrite corrupt snapshot with valid one --
+//
+// Scenario: After detecting corruption, persisting a new valid snapshot should
+//           overwrite the corrupt data (INSERT OR REPLACE behavior).
+
+#[tokio::test]
+async fn test_overwrite_corrupt_snapshot_with_valid() {
+    let persist = setup_test_persistence().await;
+
+    // Inject corrupt data
+    inject_raw_hnsw_snapshot(&persist, b"this is not valid json").await;
+
+    // Verify it's corrupt
+    assert!(persist.load_hnsw_snapshot().await.is_err());
+
+    // Now persist a valid index (should overwrite the corrupt entry)
+    let mut index = HnswIndex::new(4);
+    let id1 = NodeId::new([1; 16]);
+    index.insert(id1, vec![1.0, 0.0, 0.0, 0.0]).expect("insert");
+
+    persist
+        .persist_hnsw_snapshot(&index)
+        .await
+        .expect("persist should overwrite corrupt entry");
+
+    // Loading should now succeed
+    let loaded = persist
+        .load_hnsw_snapshot()
+        .await
+        .expect("load should succeed after overwrite")
+        .expect("snapshot should exist");
+
+    assert_eq!(loaded.total_nodes, 1);
+    assert!(loaded.verify().is_ok());
+}
diff --git a/crates/khive-retrieval/src/policy.rs b/crates/khive-retrieval/src/policy.rs
new file mode 100644
index 00000000..79dc554b
--- /dev/null
+++ b/crates/khive-retrieval/src/policy.rs
@@ -0,0 +1,349 @@
+//! Policy integration for access-controlled retrieval.
+//!
+//! # RETRIEVAL-03: Policy Integration
+//!
+//! This module provides policy-based filtering of search results, ensuring
+//! that callers only see documents they are authorized to access.
+//!
+//! # Architecture
+//!
+//! ```text
+//! Query -> Retrieval -> Policy Filter -> Results
+//!                            |
+//!                            v
+//!                      PolicyEngine
+//! ```
+//!
+//! # Example
+//!
+//! ```ignore
+//! use khive_retrieval::policy::{SearchPolicy, filter_by_policy};
+//!
+//! let policy = SearchPolicy::new(ClearanceLevel::Internal);
+//! let filtered = filter_by_policy(results, &policy, |id| get_doc_clearance(id));
+//! ```
+
+use khive_score::DeterministicScore;
+use std::hash::Hash;
+
+#[cfg(feature = "policy")]
+use khive_gate::GateContext as PolicyContext;
+
+/// Clearance level for documents.
+///
+/// Higher values indicate more restricted access.
+/// This is a simple hierarchical model; more complex ABAC can be built on top.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub enum ClearanceLevel {
+    /// Public documents, accessible to all.
+    Public = 0,
+    /// Internal documents, accessible to authenticated users.
+    Internal = 1,
+    /// Confidential documents, restricted access.
+    Confidential = 2,
+    /// Secret documents, highly restricted.
+    Secret = 3,
+}
+
+impl ClearanceLevel {
+    /// Check if this clearance level can access a document with the given level.
+    ///
+    /// A caller can access a document if their clearance is >= the document's level.
+    #[inline]
+    pub fn can_access(&self, document_level: ClearanceLevel) -> bool {
+        *self >= document_level
+    }
+}
+
+impl Default for ClearanceLevel {
+    fn default() -> Self {
+        Self::Public
+    }
+}
+
+/// Policy context for search operations.
+///
+/// Encapsulates the caller's clearance level and optional policy engine
+/// for more complex access control decisions.
+#[derive(Debug, Clone)]
+pub struct SearchPolicy {
+    /// Caller's clearance level (simple hierarchical model).
+    pub caller_clearance: ClearanceLevel,
+
+    /// Optional policy engine for complex ABAC decisions.
+    #[cfg(feature = "policy")]
+    pub policy_context: Option<PolicyContext>,
+}
+
+impl SearchPolicy {
+    /// Create a new search policy with the given clearance level.
+    pub fn new(caller_clearance: ClearanceLevel) -> Self {
+        Self {
+            caller_clearance,
+            #[cfg(feature = "policy")]
+            policy_context: None,
+        }
+    }
+
+    /// Create a public-level search policy (default).
+    pub fn public() -> Self {
+        Self::new(ClearanceLevel::Public)
+    }
+
+    /// Create an internal-level search policy.
+    pub fn internal() -> Self {
+        Self::new(ClearanceLevel::Internal)
+    }
+
+    /// Create a confidential-level search policy.
+    pub fn confidential() -> Self {
+        Self::new(ClearanceLevel::Confidential)
+    }
+
+    /// Create a secret-level search policy.
+    pub fn secret() -> Self {
+        Self::new(ClearanceLevel::Secret)
+    }
+
+    /// Set the policy context for complex access control.
+    #[cfg(feature = "policy")]
+    pub fn with_context(mut self, context: PolicyContext) -> Self {
+        self.policy_context = Some(context);
+        self
+    }
+
+    /// Check if the caller can access a document with the given clearance.
+    #[inline]
+    pub fn can_access(&self, document_clearance: ClearanceLevel) -> bool {
+        self.caller_clearance.can_access(document_clearance)
+    }
+}
+
+impl Default for SearchPolicy {
+    fn default() -> Self {
+        Self::public()
+    }
+}
+
+/// Filter search results based on policy.
+///
+/// # Arguments
+///
+/// * `results` - The search results to filter.
+/// * `policy` - The search policy to apply.
+/// * `get_clearance` - A function that returns the clearance level for a given ID.
+///
+/// # Returns
+///
+/// A new vector containing only the results the caller is authorized to see.
+///
+/// # Example
+///
+/// ```ignore
+/// let policy = SearchPolicy::new(ClearanceLevel::Internal);
+/// let filtered = filter_by_policy(results, &policy, |id| {
+///     // Look up document clearance from metadata
+///     get_document_clearance(id)
+/// });
+/// ```
+pub fn filter_by_policy<Id, F>(
+    results: Vec<(Id, DeterministicScore)>,
+    policy: &SearchPolicy,
+    get_clearance: F,
+) -> Vec<(Id, DeterministicScore)>
+where
+    Id: Clone,
+    F: Fn(&Id) -> ClearanceLevel,
+{
+    results
+        .into_iter()
+        .filter(|(id, _)| {
+            let doc_clearance = get_clearance(id);
+            policy.can_access(doc_clearance)
+        })
+        .collect()
+}
+
+/// Filter search results using a custom predicate.
+///
+/// This is a more flexible version of `filter_by_policy` that allows
+/// arbitrary access control logic.
+///
+/// # Arguments
+///
+/// * `results` - The search results to filter.
+/// * `is_accessible` - A predicate that returns true if the caller can access the document.
+///
+/// # Returns
+///
+/// A new vector containing only the accessible results.
+pub fn filter_by_predicate<Id, F>(
+    results: Vec<(Id, DeterministicScore)>,
+    is_accessible: F,
+) -> Vec<(Id, DeterministicScore)>
+where
+    Id: Clone,
+    F: Fn(&Id) -> bool,
+{
+    results
+        .into_iter()
+        .filter(|(id, _)| is_accessible(id))
+        .collect()
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_clearance_level_ordering() {
+        assert!(ClearanceLevel::Secret > ClearanceLevel::Confidential);
+        assert!(ClearanceLevel::Confidential > ClearanceLevel::Internal);
+        assert!(ClearanceLevel::Internal > ClearanceLevel::Public);
+    }
+
+    #[test]
+    fn test_clearance_can_access() {
+        let secret = ClearanceLevel::Secret;
+        let public = ClearanceLevel::Public;
+
+        // Secret can access everything
+        assert!(secret.can_access(ClearanceLevel::Secret));
+        assert!(secret.can_access(ClearanceLevel::Confidential));
+        assert!(secret.can_access(ClearanceLevel::Internal));
+        assert!(secret.can_access(ClearanceLevel::Public));
+
+        // Public can only access public
+        assert!(public.can_access(ClearanceLevel::Public));
+        assert!(!public.can_access(ClearanceLevel::Internal));
+        assert!(!public.can_access(ClearanceLevel::Confidential));
+        assert!(!public.can_access(ClearanceLevel::Secret));
+    }
+
+    #[test]
+    fn test_search_policy_constructors() {
+        let policy = SearchPolicy::public();
+        assert_eq!(policy.caller_clearance, ClearanceLevel::Public);
+
+        let policy = SearchPolicy::secret();
+        assert_eq!(policy.caller_clearance, ClearanceLevel::Secret);
+    }
+
+    // =========================================================================
+    // RETRIEVAL-03: Policy Integration Tests
+    // =========================================================================
+
+    #[test]
+    fn test_filter_by_policy_hides_secret_from_public() {
+        let results = vec![
+            ("doc_public", DeterministicScore::from_f64(0.9)),
+            ("doc_secret", DeterministicScore::from_f64(0.95)),
+            ("doc_internal", DeterministicScore::from_f64(0.8)),
+        ];
+
+        let policy = SearchPolicy::public();
+
+        // Clearance lookup function
+        let get_clearance = |id: &&str| -> ClearanceLevel {
+            match *id {
+                "doc_public" => ClearanceLevel::Public,
+                "doc_internal" => ClearanceLevel::Internal,
+                "doc_secret" => ClearanceLevel::Secret,
+                _ => ClearanceLevel::Public,
+            }
+        };
+
+        let filtered = filter_by_policy(results, &policy, get_clearance);
+
+        // Public caller should only see public documents
+        assert_eq!(filtered.len(), 1);
+        assert_eq!(filtered[0].0, "doc_public");
+    }
+
+    #[test]
+    fn test_filter_by_policy_secret_sees_all() {
+        let results = vec![
+            ("doc_public", DeterministicScore::from_f64(0.9)),
+            ("doc_secret", DeterministicScore::from_f64(0.95)),
+            ("doc_confidential", DeterministicScore::from_f64(0.8)),
+        ];
+
+        let policy = SearchPolicy::secret();
+
+        let get_clearance = |id: &&str| -> ClearanceLevel {
+            match *id {
+                "doc_public" => ClearanceLevel::Public,
+                "doc_confidential" => ClearanceLevel::Confidential,
+                "doc_secret" => ClearanceLevel::Secret,
+                _ => ClearanceLevel::Public,
+            }
+        };
+
+        let filtered = filter_by_policy(results, &policy, get_clearance);
+
+        // Secret caller should see all documents
+        assert_eq!(filtered.len(), 3);
+    }
+
+    #[test]
+    fn test_filter_by_policy_internal_sees_public_and_internal() {
+        let results = vec![
+            ("doc_public", DeterministicScore::from_f64(0.9)),
+            ("doc_secret", DeterministicScore::from_f64(0.95)),
+            ("doc_internal", DeterministicScore::from_f64(0.8)),
+        ];
+
+        let policy = SearchPolicy::internal();
+
+        let get_clearance = |id: &&str| -> ClearanceLevel {
+            match *id {
+                "doc_public" => ClearanceLevel::Public,
+                "doc_internal" => ClearanceLevel::Internal,
+                "doc_secret" => ClearanceLevel::Secret,
+                _ => ClearanceLevel::Public,
+            }
+        };
+
+        let filtered = filter_by_policy(results, &policy, get_clearance);
+
+        // Internal caller should see public and internal
+        assert_eq!(filtered.len(), 2);
+        assert!(filtered.iter().any(|(id, _)| *id == "doc_public"));
+        assert!(filtered.iter().any(|(id, _)| *id == "doc_internal"));
+        assert!(!filtered.iter().any(|(id, _)| *id == "doc_secret"));
+    }
+
+    #[test]
+    fn test_filter_by_policy_preserves_order() {
+        let results = vec![
+            ("doc1", DeterministicScore::from_f64(0.9)),
+            ("doc2", DeterministicScore::from_f64(0.8)),
+            ("doc3", DeterministicScore::from_f64(0.7)),
+        ];
+
+        let policy = SearchPolicy::public();
+        let get_clearance = |_: &&str| ClearanceLevel::Public;
+
+        let filtered = filter_by_policy(results, &policy, get_clearance);
+
+        // Order should be preserved
+        assert_eq!(filtered[0].0, "doc1");
+        assert_eq!(filtered[1].0, "doc2");
+        assert_eq!(filtered[2].0, "doc3");
+    }
+
+    #[test]
+    fn test_filter_by_predicate() {
+        let results = vec![
+            ("allowed", DeterministicScore::from_f64(0.9)),
+            ("denied", DeterministicScore::from_f64(0.8)),
+            ("allowed2", DeterministicScore::from_f64(0.7)),
+        ];
+
+        let filtered = filter_by_predicate(results, |id| id.starts_with("allowed"));
+
+        assert_eq!(filtered.len(), 2);
+        assert_eq!(filtered[0].0, "allowed");
+        assert_eq!(filtered[1].0, "allowed2");
+    }
+}
diff --git a/crates/khive-retrieval/src/query_ir.rs b/crates/khive-retrieval/src/query_ir.rs
new file mode 100644
index 00000000..a86ab164
--- /dev/null
+++ b/crates/khive-retrieval/src/query_ir.rs
@@ -0,0 +1,632 @@
+//! Query Intermediate Representation for the retrieval pipeline.
+//!
+//! Provides a composable, analyzable tree representation of search queries
+//! that can be inspected and optimized before execution.
+//!
+//! # Motivation
+//!
+//! The existing [`Query`](crate::hybrid::Query) struct captures *what* to
+//! search (text + optional embedding), but not *how* the retrieval pipeline
+//! should compose sub-queries, apply filters, or perform fusion.  `QueryNode`
+//! makes that composition explicit as an IR tree.
+//!
+//! # Example
+//!
+//! ```rust
+//! use khive_retrieval::query_ir::{QueryNode, FuseStrategy};
+//!
+//! // Build a hybrid query: vector + keyword fused with RRF, then top-10
+//! let embedding = vec![0.1_f32; 128];
+//! let q = QueryNode::hybrid(embedding, "distributed consensus", 10);
+//!
+//! assert_eq!(q.leaf_count(), 2);
+//! assert_eq!(q.top_k(), 10);
+//! assert!(!q.is_empty());
+//! ```
+
+use khive_score::DeterministicScore;
+use serde::{Deserialize, Serialize};
+
+// ---------------------------------------------------------------------------
+// Core IR node
+// ---------------------------------------------------------------------------
+
+/// A node in the Query IR tree.
+///
+/// Each variant represents a single retrieval operation or combinator.
+/// Nodes compose recursively -- a `Fuse` holds children, a `Filter` wraps
+/// a single child, and leaf nodes (`Vector`, `Keyword`, `Empty`) terminate
+/// the tree.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum QueryNode {
+    /// Vector similarity search (e.g. HNSW nearest-neighbor).
+    Vector {
+        /// Pre-computed query embedding.
+        embedding: Vec<f32>,
+        /// Number of results to return.
+        top_k: usize,
+        /// Optional minimum similarity threshold.
+        min_score: Option<DeterministicScore>,
+    },
+
+    /// Keyword / BM25 text search.
+    Keyword {
+        /// Query text.
+        text: String,
+        /// Number of results to return.
+        top_k: usize,
+        /// Optional minimum relevance threshold.
+        min_score: Option<DeterministicScore>,
+    },
+
+    /// Fuse multiple sub-queries into a single ranked list.
+    Fuse {
+        /// Sub-queries to fuse.
+        children: Vec<QueryNode>,
+        /// Strategy for combining ranked lists.
+        strategy: FuseStrategy,
+        /// Number of results after fusion.
+        top_k: usize,
+    },
+
+    /// Filter the results of a sub-query.
+    Filter {
+        /// The sub-query whose results are filtered.
+        child: Box<QueryNode>,
+        /// Predicate to apply.
+        predicate: FilterPredicate,
+    },
+
+    /// Rerank the results of a sub-query.
+    Rerank {
+        /// The sub-query whose results are reranked.
+        child: Box<QueryNode>,
+        /// Reranking method.
+        method: RerankMethod,
+        /// Number of results after reranking.
+        top_k: usize,
+    },
+
+    /// An empty query that is guaranteed to produce no results.
+    ///
+    /// Useful as the result of constant-folding provably-empty sub-trees.
+    Empty,
+}
+
+// ---------------------------------------------------------------------------
+// Supporting enums
+// ---------------------------------------------------------------------------
+
+/// Fusion strategy for combining sub-query result lists.
+///
+/// Mirrors [`FusionStrategy`](crate::fusion::FusionStrategy) at the IR level
+/// so that the query plan is self-contained and serialisable without depending
+/// on runtime fusion internals.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FuseStrategy {
+    /// Reciprocal Rank Fusion with smoothing constant `k`.
+    ///
+    /// Standard default: k = 60 (Craswell et al., 2009).
+    Rrf {
+        /// Smoothing constant.
+        k: usize,
+    },
+
+    /// Weighted linear combination of scores.
+    ///
+    /// One weight per child; weights are normalised at execution time.
+    Weighted {
+        /// Per-child weights (will be normalised to sum to 1.0).
+        weights: Vec<f64>,
+    },
+
+    /// Union with max-score-per-document semantics.
+    Union,
+}
+
+/// Predicate for post-retrieval filtering.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum FilterPredicate {
+    /// Keep only results whose score meets a minimum threshold.
+    MinScore(DeterministicScore),
+
+    /// Keep at most `k` results (top-k truncation).
+    TopK(usize),
+
+    /// Keep results where a metadata field equals a given value.
+    MetadataEquals {
+        /// Metadata field name.
+        field: String,
+        /// Expected value (JSON).
+        value: serde_json::Value,
+    },
+
+    /// All contained predicates must hold (conjunction).
+    And(Vec<FilterPredicate>),
+
+    /// At least one contained predicate must hold (disjunction).
+    Or(Vec<FilterPredicate>),
+}
+
+/// Method for reranking search results.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum RerankMethod {
+    /// Cross-encoder neural reranking (placeholder for future integration).
+    CrossEncoder {
+        /// Model identifier.
+        model: String,
+    },
+
+    /// Score-based reranking with custom per-signal weights.
+    ScoreWeighted {
+        /// Weights applied to each scoring signal.
+        weights: Vec<f64>,
+    },
+}
+
+// ---------------------------------------------------------------------------
+// Construction helpers
+// ---------------------------------------------------------------------------
+
+impl QueryNode {
+    /// Create a vector search leaf node.
+    ///
+    /// # Arguments
+    ///
+    /// * `embedding` - Pre-computed query embedding.
+    /// * `top_k` - Number of results to return.
+    pub fn vector(embedding: Vec<f32>, top_k: usize) -> Self {
+        QueryNode::Vector {
+            embedding,
+            top_k,
+            min_score: None,
+        }
+    }
+
+    /// Create a keyword search leaf node.
+    ///
+    /// # Arguments
+    ///
+    /// * `text` - Query text (converted via `Into<String>`).
+    /// * `top_k` - Number of results to return.
+    pub fn keyword(text: impl Into<String>, top_k: usize) -> Self {
+        QueryNode::Keyword {
+            text: text.into(),
+            top_k,
+            min_score: None,
+        }
+    }
+
+    /// Create a hybrid query (vector + keyword with RRF fusion).
+    ///
+    /// The two leaf sub-queries each request `top_k * 3` candidates to give
+    /// the fusion step a sufficiently large candidate pool.
+    ///
+    /// # Arguments
+    ///
+    /// * `embedding` - Pre-computed query embedding.
+    /// * `text` - Query text (converted via `Into<String>`).
+    /// * `top_k` - Number of final results after fusion.
+    pub fn hybrid(embedding: Vec<f32>, text: impl Into<String>, top_k: usize) -> Self {
+        QueryNode::Fuse {
+            children: vec![
+                QueryNode::vector(embedding, top_k * 3),
+                QueryNode::keyword(text, top_k * 3),
+            ],
+            strategy: FuseStrategy::Rrf { k: 60 },
+            top_k,
+        }
+    }
+
+    /// Wrap this node with a minimum-score filter.
+    #[must_use]
+    pub fn with_min_score(self, min_score: DeterministicScore) -> Self {
+        QueryNode::Filter {
+            child: Box::new(self),
+            predicate: FilterPredicate::MinScore(min_score),
+        }
+    }
+
+    /// Wrap this node with a top-k truncation filter.
+    #[must_use]
+    pub fn with_top_k(self, k: usize) -> Self {
+        QueryNode::Filter {
+            child: Box::new(self),
+            predicate: FilterPredicate::TopK(k),
+        }
+    }
+
+    // -----------------------------------------------------------------------
+    // Analysis helpers
+    // -----------------------------------------------------------------------
+
+    /// Returns `true` if this query is provably empty (no results possible).
+    ///
+    /// A query is provably empty when:
+    /// - It is the `Empty` variant.
+    /// - A leaf has `top_k == 0`.
+    /// - A keyword leaf has empty text.
+    /// - A fuse node has no children.
+    /// - A filter/rerank wraps a provably-empty child.
+    pub fn is_empty(&self) -> bool {
+        match self {
+            QueryNode::Empty => true,
+            QueryNode::Vector { top_k: 0, .. } => true,
+            QueryNode::Keyword { top_k: 0, .. } => true,
+            QueryNode::Keyword { text, .. } if text.is_empty() => true,
+            QueryNode::Fuse { children, .. } if children.is_empty() => true,
+            QueryNode::Filter { child, .. } => child.is_empty(),
+            QueryNode::Rerank { child, .. } => child.is_empty(),
+            _ => false,
+        }
+    }
+
+    /// Count the total number of leaf search operations in the tree.
+    ///
+    /// `Vector` and `Keyword` nodes each count as 1.  `Empty` counts as 0.
+    /// Combinators recurse into their children.
+    pub fn leaf_count(&self) -> usize {
+        match self {
+            QueryNode::Vector { .. } | QueryNode::Keyword { .. } => 1,
+            QueryNode::Fuse { children, .. } => children.iter().map(|c| c.leaf_count()).sum(),
+            QueryNode::Filter { child, .. } | QueryNode::Rerank { child, .. } => child.leaf_count(),
+            QueryNode::Empty => 0,
+        }
+    }
+
+    /// Return the effective `top_k` requested by this node.
+    ///
+    /// For `Filter` nodes with a `TopK` predicate, the predicate's value is
+    /// returned.  Otherwise the child's `top_k` propagates upward.
+    pub fn top_k(&self) -> usize {
+        match self {
+            QueryNode::Vector { top_k, .. } => *top_k,
+            QueryNode::Keyword { top_k, .. } => *top_k,
+            QueryNode::Fuse { top_k, .. } => *top_k,
+            QueryNode::Filter { child, predicate } => match predicate {
+                FilterPredicate::TopK(k) => *k,
+                _ => child.top_k(),
+            },
+            QueryNode::Rerank { top_k, .. } => *top_k,
+            QueryNode::Empty => 0,
+        }
+    }
+
+    /// Return the depth of the IR tree (longest root-to-leaf path).
+    ///
+    /// Leaf nodes have depth 1.  `Empty` has depth 0.
+    pub fn depth(&self) -> usize {
+        match self {
+            QueryNode::Empty => 0,
+            QueryNode::Vector { .. } | QueryNode::Keyword { .. } => 1,
+            QueryNode::Fuse { children, .. } => {
+                1 + children.iter().map(|c| c.depth()).max().unwrap_or(0)
+            }
+            QueryNode::Filter { child, .. } | QueryNode::Rerank { child, .. } => 1 + child.depth(),
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+#[allow(clippy::uninlined_format_args)]
+mod tests {
+    use super::*;
+
+    // -- Construction -------------------------------------------------------
+
+    #[test]
+    fn test_vector_construction() {
+        let emb = vec![0.1, 0.2, 0.3];
+        let node = QueryNode::vector(emb.clone(), 10);
+        match &node {
+            QueryNode::Vector {
+                embedding,
+                top_k,
+                min_score,
+            } => {
+                assert_eq!(embedding, &emb);
+                assert_eq!(*top_k, 10);
+                assert!(min_score.is_none());
+            }
+            other => panic!("expected Vector, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_keyword_construction() {
+        let node = QueryNode::keyword("hello world", 5);
+        match &node {
+            QueryNode::Keyword {
+                text,
+                top_k,
+                min_score,
+            } => {
+                assert_eq!(text, "hello world");
+                assert_eq!(*top_k, 5);
+                assert!(min_score.is_none());
+            }
+            other => panic!("expected Keyword, got {:?}", other),
+        }
+    }
+
+    #[test]
+    fn test_hybrid_construction() {
+        let emb = vec![0.1_f32; 128];
+        let node = QueryNode::hybrid(emb, "distributed consensus", 10);
+        match &node {
+            QueryNode::Fuse {
+                children,
+                strategy,
+                top_k,
+            } => {
+                assert_eq!(children.len(), 2);
+                assert_eq!(*top_k, 10);
+                // Sub-queries should request 3x candidates.
+                assert_eq!(children[0].top_k(), 30);
+                assert_eq!(children[1].top_k(), 30);
+                assert!(matches!(strategy, FuseStrategy::Rrf { k: 60 }));
+            }
+            other => panic!("expected Fuse, got {:?}", other),
+        }
+    }
+
+    // -- is_empty -----------------------------------------------------------
+
+    #[test]
+    fn test_empty_variant() {
+        assert!(QueryNode::Empty.is_empty());
+        assert_eq!(QueryNode::Empty.leaf_count(), 0);
+        assert_eq!(QueryNode::Empty.top_k(), 0);
+        assert_eq!(QueryNode::Empty.depth(), 0);
+    }
+
+    #[test]
+    fn test_vector_top_k_zero_is_empty() {
+        let node = QueryNode::vector(vec![1.0], 0);
+        assert!(node.is_empty());
+    }
+
+    #[test]
+    fn test_keyword_top_k_zero_is_empty() {
+        let node = QueryNode::keyword("hello", 0);
+        assert!(node.is_empty());
+    }
+
+    #[test]
+    fn test_keyword_empty_text_is_empty() {
+        let node = QueryNode::keyword("", 10);
+        assert!(node.is_empty());
+    }
+
+    #[test]
+    fn test_fuse_no_children_is_empty() {
+        let node = QueryNode::Fuse {
+            children: vec![],
+            strategy: FuseStrategy::Rrf { k: 60 },
+            top_k: 10,
+        };
+        assert!(node.is_empty());
+    }
+
+    #[test]
+    fn test_filter_of_empty_is_empty() {
+        let node = QueryNode::Empty.with_min_score(DeterministicScore::from_f64(0.5));
+        assert!(node.is_empty());
+    }
+
+    #[test]
+    fn test_rerank_of_empty_is_empty() {
+        let node = QueryNode::Rerank {
+            child: Box::new(QueryNode::Empty),
+            method: RerankMethod::ScoreWeighted { weights: vec![1.0] },
+            top_k: 10,
+        };
+        assert!(node.is_empty());
+    }
+
+    #[test]
+    fn test_non_empty_query() {
+        let node = QueryNode::keyword("hello", 5);
+        assert!(!node.is_empty());
+    }
+
+    // -- leaf_count ---------------------------------------------------------
+
+    #[test]
+    fn test_leaf_count_single() {
+        assert_eq!(QueryNode::vector(vec![1.0], 5).leaf_count(), 1);
+        assert_eq!(QueryNode::keyword("q", 5).leaf_count(), 1);
+    }
+
+    #[test]
+    fn test_leaf_count_hybrid() {
+        let q = QueryNode::hybrid(vec![1.0], "q", 10);
+        assert_eq!(q.leaf_count(), 2);
+    }
+
+    #[test]
+    fn test_leaf_count_nested() {
+        // Fuse(Fuse(vec, kw), kw) = 3 leaves
+        let inner = QueryNode::hybrid(vec![1.0], "inner", 10);
+        let outer = QueryNode::Fuse {
+            children: vec![inner, QueryNode::keyword("outer", 10)],
+            strategy: FuseStrategy::Union,
+            top_k: 10,
+        };
+        assert_eq!(outer.leaf_count(), 3);
+    }
+
+    // -- top_k --------------------------------------------------------------
+
+    #[test]
+    fn test_top_k_leaf() {
+        assert_eq!(QueryNode::vector(vec![], 7).top_k(), 7);
+        assert_eq!(QueryNode::keyword("q", 3).top_k(), 3);
+    }
+
+    #[test]
+    fn test_top_k_fuse() {
+        let q = QueryNode::hybrid(vec![1.0], "q", 15);
+        assert_eq!(q.top_k(), 15);
+    }
+
+    #[test]
+    fn test_top_k_filter_topk_predicate() {
+        let node = QueryNode::keyword("q", 100).with_top_k(5);
+        assert_eq!(node.top_k(), 5);
+    }
+
+    #[test]
+    fn test_top_k_filter_non_topk_predicate() {
+        let node = QueryNode::keyword("q", 20).with_min_score(DeterministicScore::from_f64(0.5));
+        // min_score filter doesn't change top_k -- falls through to child.
+        assert_eq!(node.top_k(), 20);
+    }
+
+    // -- depth --------------------------------------------------------------
+
+    #[test]
+    fn test_depth_leaf() {
+        assert_eq!(QueryNode::vector(vec![1.0], 5).depth(), 1);
+        assert_eq!(QueryNode::keyword("q", 5).depth(), 1);
+    }
+
+    #[test]
+    fn test_depth_hybrid() {
+        let q = QueryNode::hybrid(vec![1.0], "q", 10);
+        // Fuse -> leaf = depth 2
+        assert_eq!(q.depth(), 2);
+    }
+
+    #[test]
+    fn test_depth_chained_filters() {
+        let q = QueryNode::keyword("q", 10)
+            .with_min_score(DeterministicScore::from_f64(0.5))
+            .with_top_k(5);
+        // TopK(Filter(MinScore(Keyword))) = 3 wrappers + 1 leaf = depth 3
+        assert_eq!(q.depth(), 3);
+    }
+
+    // -- with_min_score / with_top_k chaining -------------------------------
+
+    #[test]
+    fn test_builder_chaining() {
+        let node = QueryNode::keyword("rust async patterns", 20)
+            .with_min_score(DeterministicScore::from_f64(0.3))
+            .with_top_k(10);
+
+        assert_eq!(node.top_k(), 10);
+        assert_eq!(node.leaf_count(), 1);
+        assert!(!node.is_empty());
+    }
+
+    // -- Serde round-trip ---------------------------------------------------
+
+    #[test]
+    fn test_serde_roundtrip_vector() {
+        let node = QueryNode::vector(vec![0.1, 0.2, 0.3], 10);
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back.top_k(), 10);
+        assert_eq!(back.leaf_count(), 1);
+    }
+
+    #[test]
+    fn test_serde_roundtrip_keyword() {
+        let node = QueryNode::keyword("hello world", 5);
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back.top_k(), 5);
+    }
+
+    #[test]
+    fn test_serde_roundtrip_hybrid() {
+        let node = QueryNode::hybrid(vec![1.0, 2.0], "search query", 10);
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back.top_k(), 10);
+        assert_eq!(back.leaf_count(), 2);
+    }
+
+    #[test]
+    fn test_serde_roundtrip_complex() {
+        let node = QueryNode::hybrid(vec![0.5; 4], "complex query", 10)
+            .with_min_score(DeterministicScore::from_f64(0.2))
+            .with_top_k(5);
+
+        let json = serde_json::to_string_pretty(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+
+        assert_eq!(back.top_k(), 5);
+        assert_eq!(back.leaf_count(), 2);
+        assert!(!back.is_empty());
+    }
+
+    #[test]
+    fn test_serde_roundtrip_empty() {
+        let node = QueryNode::Empty;
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert!(back.is_empty());
+    }
+
+    #[test]
+    fn test_serde_roundtrip_filter_metadata() {
+        let node = QueryNode::Filter {
+            child: Box::new(QueryNode::keyword("docs", 10)),
+            predicate: FilterPredicate::MetadataEquals {
+                field: "type".to_string(),
+                value: serde_json::json!("memory"),
+            },
+        };
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back.leaf_count(), 1);
+    }
+
+    #[test]
+    fn test_serde_roundtrip_rerank() {
+        let node = QueryNode::Rerank {
+            child: Box::new(QueryNode::keyword("rerank me", 20)),
+            method: RerankMethod::CrossEncoder {
+                model: "ms-marco-MiniLM".to_string(),
+            },
+            top_k: 10,
+        };
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back.top_k(), 10);
+    }
+
+    #[test]
+    fn test_serde_roundtrip_compound_predicate() {
+        let pred = FilterPredicate::And(vec![
+            FilterPredicate::MinScore(DeterministicScore::from_f64(0.3)),
+            FilterPredicate::Or(vec![
+                FilterPredicate::MetadataEquals {
+                    field: "lang".to_string(),
+                    value: serde_json::json!("en"),
+                },
+                FilterPredicate::MetadataEquals {
+                    field: "lang".to_string(),
+                    value: serde_json::json!("zh"),
+                },
+            ]),
+        ]);
+        let node = QueryNode::Filter {
+            child: Box::new(QueryNode::keyword("test", 10)),
+            predicate: pred,
+        };
+        let json = serde_json::to_string(&node).expect("serialize");
+        let back: QueryNode = serde_json::from_str(&json).expect("deserialize");
+        assert_eq!(back.leaf_count(), 1);
+    }
+}
diff --git a/crates/khive-retrieval/src/replay/engine_replay.rs b/crates/khive-retrieval/src/replay/engine_replay.rs
new file mode 100644
index 00000000..d25a85bb
--- /dev/null
+++ b/crates/khive-retrieval/src/replay/engine_replay.rs
@@ -0,0 +1,1027 @@
+//! Temporal replay APIs — Three Observables Feedback Loop (Phase 3).
+//!
+//! Provides four primitives for diffing past vs. present weight state:
+//!
+//! | Function              | Purpose                                                    |
+//! |-----------------------|------------------------------------------------------------|
+//! | [`weights_as_of`]     | Reconstruct weight snapshot at a past timestamp            |
+//! | [`replay`]            | Re-run vector search with historical or live weights       |
+//! | [`diff`]              | Jaccard + rank-delta report between two temporal replays   |
+//! | [`rank_history`]      | Weight change timeline for a single atom                   |
+//! | [`regression_check`]  | Re-run a stored compose event against current weights      |
+//!
+//! # Design
+//!
+//! The weight_events table is the ground-truth log.  No external baseline is
+//! needed — the log IS the reference.  Temporal replay reconstructs past weight
+//! state by selecting the latest `weight_events` row per (lambda_id, atom_id)
+//! with `ts ≤ at_time`.
+//!
+//! Ranking is performed by multiplying raw vector similarity scores by per-atom
+//! weights, then returning the top-K atom IDs in descending score order.
+//!
+//! # Drift Metrics (submodule)
+//!
+//! [`metrics::jaccard_stability_7d`] — rolling 7-day median Jaccard from
+//! regression_check over stored compose events.
+//!
+//! [`metrics::atom_rank_variance`] — variance of an atom's rank position across
+//! all compose events where it appeared in top_atoms.
+//!
+//! [`metrics::adjustment_rate_per_day`] — count of weight_events rows per day,
+//! useful for detecting runaway adjustment patterns.
+
+// The `engine` feature is a future integration point (EmbeddedEngine not yet ported).
+// Silence the cfg warning — the feature gate is intentionally undeclared so it never activates.
+#![allow(unexpected_cfgs)]
+
+use std::collections::{HashMap, HashSet};
+use std::sync::Arc;
+
+use chrono::{DateTime, NaiveDate, Utc};
+use parking_lot::Mutex;
+#[cfg(feature = "engine")]
+use rusqlite::OptionalExtension as _;
+use rusqlite::{params, Connection};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+use crate::persist::PersistError as EngineError;
+use crate::weights::WEIGHT_FLOOR;
+// TODO(port-engine): EmbeddedEngine not yet in khive-retrieval scope; stub for compilation.
+#[allow(dead_code)]
+type EmbeddedEngine = ();
+
+// ---------------------------------------------------------------------------
+// Public types
+// ---------------------------------------------------------------------------
+
+/// Per-atom weight change record in chronological order.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RankHistoryPoint {
+    /// Timestamp of the adjustment (UTC).
+    pub ts: DateTime<Utc>,
+    /// Weight after this adjustment was applied.
+    pub weight_after: f32,
+    /// Raw delta that was applied.
+    pub delta: f32,
+    /// Channel that emitted this adjustment (`ambient`, `explicit`, `ground_truth`).
+    pub channel: String,
+    /// Optional context identifier carried by the caller.
+    pub context_id: Option<String>,
+    /// Optional brain_events UUID that triggered this adjustment.
+    pub event_id: Option<Uuid>,
+}
+
+/// Diff report comparing two temporal rank lists for the same query.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct DiffReport {
+    /// Jaccard similarity: |A ∩ B| / |A ∪ B|.
+    pub jaccard: f32,
+    /// Atoms present in the t2 result but absent from t1.
+    pub added: Vec<Uuid>,
+    /// Atoms present in the t1 result but absent from t2.
+    pub dropped: Vec<Uuid>,
+    /// Per-atom rank change from t1 → t2 (negative = moved up).
+    pub rank_deltas: Vec<(Uuid, i32)>,
+    /// Ordered top-K atom IDs at t1.
+    pub top_k_at_t1: Vec<Uuid>,
+    /// Ordered top-K atom IDs at t2.
+    pub top_k_at_t2: Vec<Uuid>,
+}
+
+/// Report comparing a stored compose event's original top_atoms against
+/// the same query re-run with current weights.
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct RegressionReport {
+    /// Brain-events row UUID that was replayed.
+    pub event_id: Uuid,
+    /// Query text recorded at compose time (empty string if none).
+    pub query_text: String,
+    /// Ordered atom list stored in the original compose event.
+    pub original_top_atoms: Vec<Uuid>,
+    /// Ordered atom list from re-running the query with current weights.
+    pub current_top_atoms: Vec<Uuid>,
+    /// Jaccard similarity between the two lists.
+    pub jaccard: f32,
+    /// Atoms present in current but absent from original.
+    pub added: Vec<Uuid>,
+    /// Atoms present in original but absent from current.
+    pub dropped: Vec<Uuid>,
+    /// UTC timestamp when the original compose event was recorded.
+    pub timestamp_original: DateTime<Utc>,
+}
+
+// ---------------------------------------------------------------------------
+// weights_as_of
+// ---------------------------------------------------------------------------
+
+/// Reconstruct the weight state for a lambda at a given point in time.
+///
+/// For each (lambda_id, atom_id) pair, selects the latest `weight_events` row
+/// with `ts ≤ at_time` and returns `weight_after`.  Atoms with no history
+/// before `at_time` are absent from the map; callers should treat absence as
+/// the implicit default of 1.0.
+///
+/// # SQL
+///
+/// ```sql
+/// SELECT atom_id, weight_after
+/// FROM (
+///     SELECT atom_id, weight_after,
+///            ROW_NUMBER() OVER (PARTITION BY atom_id ORDER BY ts DESC) as rn
+///     FROM weight_events
+///     WHERE lambda_id = ?1 AND ts <= ?2
+/// )
+/// WHERE rn = 1
+/// ```
+pub async fn weights_as_of(
+    conn: &Arc<Mutex<Connection>>,
+    namespace: &str,
+    at_time: DateTime<Utc>,
+) -> Result<HashMap<Uuid, f32>, EngineError> {
+    let conn = Arc::clone(conn);
+    let namespace_str = namespace.to_string();
+    let at_time_us = at_time.timestamp_micros();
+
+    tokio::task::spawn_blocking(move || {
+        let conn = conn.lock();
+        let mut result = HashMap::new();
+
+        let mut stmt = conn
+            .prepare(
+                "SELECT atom_id, weight_after
+                 FROM (
+                     SELECT atom_id, weight_after,
+                            ROW_NUMBER() OVER (PARTITION BY atom_id ORDER BY ts DESC) as rn
+                     FROM weight_events
+                     WHERE namespace = ?1 AND ts <= ?2
+                 )
+                 WHERE rn = 1",
+            )
+            .map_err(|e| EngineError::Internal(format!("weights_as_of prepare: {e}")))?;
+
+        let mut rows = stmt
+            .query(params![namespace_str, at_time_us])
+            .map_err(|e| EngineError::Internal(format!("weights_as_of query: {e}")))?;
+
+        while let Some(row) = rows
+            .next()
+            .map_err(|e| EngineError::Internal(format!("weights_as_of row: {e}")))?
+        {
+            let atom_id_str: String = row
+                .get(0)
+                .map_err(|e| EngineError::Internal(format!("weights_as_of col 0: {e}")))?;
+            let weight_after: f64 = row
+                .get(1)
+                .map_err(|e| EngineError::Internal(format!("weights_as_of col 1: {e}")))?;
+
+            if let Ok(uuid) = atom_id_str.parse::<Uuid>() {
+                let clamped =
+                    (weight_after as f32).clamp(WEIGHT_FLOOR, crate::weights::WEIGHT_CEIL);
+                result.insert(uuid, clamped);
+            }
+        }
+
+        Ok(result)
+    })
+    .await
+    .map_err(|e| EngineError::Internal(format!("weights_as_of join: {e}")))?
+}
+
+// ---------------------------------------------------------------------------
+// Namespace isolation helper (B1 fix — must come before replay)
+// ---------------------------------------------------------------------------
+
+/// Return the subset of `candidate_ids` whose atoms are owned by `namespace`.
+///
+/// Queries `atoms WHERE namespace = ?1 AND id IN (?) AND deleted_at IS NULL`.
+/// Preserves no particular order — the caller re-orders by HNSW rank after
+/// filtering.
+///
+/// The in-memory HNSW snapshot is global (it indexes all atoms regardless of
+/// namespace).  Without this post-filter, `replay()` would return atoms from
+/// any namespace that happen to be semantically close to the query, leaking
+/// cross-tenant atom UUIDs to the requesting lambda.
+#[allow(dead_code)] // used only when feature = "engine" is active
+fn filter_atoms_by_namespace(
+    conn: &Connection,
+    namespace: &str,
+    candidate_ids: &[Uuid],
+) -> Result<HashSet<Uuid>, EngineError> {
+    if candidate_ids.is_empty() {
+        return Ok(HashSet::new());
+    }
+
+    // Build the IN clause with per-item placeholders.
+    // SQLITE_SAFE_BIND_LIMIT is 999; candidate_k is at most top_k*4 (≤400 for top_k=100).
+    let placeholders: Vec<String> = candidate_ids.iter().map(|_| "?".to_string()).collect();
+    let sql = format!(
+        "SELECT id FROM atoms WHERE namespace = ? AND id IN ({}) AND deleted_at IS NULL",
+        placeholders.join(", ")
+    );
+
+    let mut stmt = conn
+        .prepare(&sql)
+        .map_err(|e| EngineError::Internal(format!("filter_atoms_by_namespace prepare: {e}")))?;
+
+    // Collect all bind values as Strings so they have a uniform owned type.
+    // namespace goes first, then the UUID strings for the IN clause.
+    let id_strings: Vec<String> = candidate_ids.iter().map(|u| u.to_string()).collect();
+    let all_values: Vec<&str> = std::iter::once(namespace)
+        .chain(id_strings.iter().map(|s| s.as_str()))
+        .collect();
+
+    let rows = stmt
+        .query_map(rusqlite::params_from_iter(all_values.iter()), |row| {
+            row.get::<_, String>(0)
+        })
+        .map_err(|e| EngineError::Internal(format!("filter_atoms_by_namespace query: {e}")))?;
+
+    let owned: HashSet<Uuid> = rows
+        .filter_map(|r| r.ok())
+        .filter_map(|s| s.parse::<Uuid>().ok())
+        .collect();
+
+    Ok(owned)
+}
+
+// TODO(port-engine): replay, diff, regression_check, load_brain_event, and
+// jaccard_stability_7d require EmbeddedEngine which is not yet ported to
+// khive-retrieval scope. Gated behind "engine" feature until ported.
+#[cfg(feature = "engine")]
+/// When `weight_override` is `Some(map)`, each atom's raw similarity score is
+/// multiplied by the weight from the map (absent atoms default to 1.0).  When
+/// `None`, current `atom_weights` rows are used via `batch_load_weights`.
+///
+/// Returns atom IDs in descending weighted-score order.
+pub async fn replay(
+    engine: &EmbeddedEngine,
+    namespace: &str,
+    query_text: &str,
+    at_time: Option<DateTime<Utc>>,
+    top_k: usize,
+) -> Result<Vec<Uuid>, EngineError> {
+    // Step 1: embed the query.
+    let query_vec = engine
+        .embed_query(query_text)
+        .await
+        .map_err(|e| EngineError::Embedding(format!("replay embed: {e}")))?;
+
+    // Step 2: vector search via HNSW for a broad candidate set.
+    // Do this first so we know which atom IDs to load weights for.
+    let candidate_k = (top_k * 4).max(20);
+    let raw_results = engine
+        .search_by_vector(&query_vec, candidate_k)
+        .await
+        .map_err(|e| EngineError::Retrieval(format!("replay search: {e}")))?;
+
+    // Step 2b (B1 fix): filter to atoms owned by this lambda's namespace.
+    //
+    // The HNSW snapshot is global — it contains atoms from every namespace
+    // stored in this engine instance.  Without this filter, `replay()` would
+    // leak cross-tenant atom UUIDs into the ranked result (they default to
+    // weight 1.0 when absent from the weight map, potentially outranking the
+    // requesting lambda's own down-weighted atoms).
+    //
+    // A single engine instance may serve multiple lambdas whose atoms co-exist
+    // in SQLite but whose HNSW vectors are interleaved.
+    let raw_results = {
+        let conn_guard = engine.store().conn();
+        let c = conn_guard.lock();
+        let all_candidate_ids: Vec<Uuid> = raw_results.iter().map(|h| h.id).collect();
+        let owned = filter_atoms_by_namespace(&c, namespace, &all_candidate_ids)?;
+        // Re-filter raw_results (preserving HNSW rank order).
+        raw_results
+            .into_iter()
+            .filter(|h| owned.contains(&h.id))
+            .collect::<Vec<_>>()
+    };
+
+    // Step 3: resolve weights for the candidate atom IDs.
+    let candidate_ids: Vec<Uuid> = raw_results.iter().map(|h| h.id).collect();
+    let weights: HashMap<Uuid, f32> = match at_time {
+        Some(t) => weights_as_of(&engine.store().conn(), namespace, t).await?,
+        None => {
+            crate::weights::batch_load_weights(&engine.store().conn(), namespace, &candidate_ids)
+                .await
+                .unwrap_or_default()
+        }
+    };
+
+    // Step 4: apply weight multiplier.
+    let mut scored: Vec<(Uuid, f32)> = raw_results
+        .into_iter()
+        .map(|hit| {
+            let w = weights.get(&hit.id).copied().unwrap_or(1.0_f32);
+            (hit.id, hit.score * w)
+        })
+        .collect();
+
+    // Step 5: sort descending and truncate.
+    scored.sort_by(|a, b| b.1.partial_cmp(&a.1).unwrap_or(std::cmp::Ordering::Equal));
+    scored.truncate(top_k);
+
+    Ok(scored.into_iter().map(|(id, _)| id).collect())
+}
+
+/// Build a [`DiffReport`] from two ordered atom lists.
+#[allow(dead_code)] // used only when feature = "engine" is active
+fn compute_diff_report(top_k_at_t1: Vec<Uuid>, top_k_at_t2: Vec<Uuid>) -> DiffReport {
+    use std::collections::HashSet;
+
+    let set_t1: HashSet<Uuid> = top_k_at_t1.iter().copied().collect();
+    let set_t2: HashSet<Uuid> = top_k_at_t2.iter().copied().collect();
+
+    let intersection_size = set_t1.intersection(&set_t2).count();
+    let union_size = set_t1.union(&set_t2).count();
+    let jaccard = if union_size == 0 {
+        1.0_f32
+    } else {
+        intersection_size as f32 / union_size as f32
+    };
+
+    let added: Vec<Uuid> = set_t2.difference(&set_t1).copied().collect();
+    let dropped: Vec<Uuid> = set_t1.difference(&set_t2).copied().collect();
+
+    // Build rank maps (0-indexed).
+    let rank_t1: HashMap<Uuid, usize> = top_k_at_t1
+        .iter()
+        .enumerate()
+        .map(|(i, &id)| (id, i))
+        .collect();
+    let rank_t2: HashMap<Uuid, usize> = top_k_at_t2
+        .iter()
+        .enumerate()
+        .map(|(i, &id)| (id, i))
+        .collect();
+
+    // Rank deltas only for atoms present in both.
+    let rank_deltas: Vec<(Uuid, i32)> = set_t1
+        .intersection(&set_t2)
+        .filter_map(|&id| {
+            let r1 = *rank_t1.get(&id)?;
+            let r2 = *rank_t2.get(&id)?;
+            Some((id, r2 as i32 - r1 as i32))
+        })
+        .collect();
+
+    DiffReport {
+        jaccard,
+        added,
+        dropped,
+        rank_deltas,
+        top_k_at_t1,
+        top_k_at_t2,
+    }
+}
+
+// ---------------------------------------------------------------------------
+// diff — engine-dependent, gated
+// ---------------------------------------------------------------------------
+
+/// Compute the diff between two temporal replays of the same query.
+#[cfg(feature = "engine")]
+pub async fn diff(
+    engine: &EmbeddedEngine,
+    namespace: &str,
+    query_text: &str,
+    t1: DateTime<Utc>,
+    t2: DateTime<Utc>,
+    top_k: usize,
+) -> Result<DiffReport, EngineError> {
+    let (top_k_at_t1, top_k_at_t2) = tokio::try_join!(
+        replay(engine, namespace, query_text, Some(t1), top_k),
+        replay(engine, namespace, query_text, Some(t2), top_k),
+    )?;
+    Ok(compute_diff_report(top_k_at_t1, top_k_at_t2))
+}
+
+// ---------------------------------------------------------------------------
+// rank_history
+// ---------------------------------------------------------------------------
+
+/// Return the full weight-change history for a single (namespace, atom_id) pair
+/// in ascending timestamp order.
+///
+/// Useful for answering "why did this atom's rank change?" — each row captures
+/// the delta, resulting weight, channel, and optional originating context/event.
+pub async fn rank_history(
+    conn: &Arc<Mutex<Connection>>,
+    namespace: &str,
+    atom_id: Uuid,
+) -> Result<Vec<RankHistoryPoint>, EngineError> {
+    let conn = Arc::clone(conn);
+    let namespace_str = namespace.to_string();
+    let atom_id_str = atom_id.to_string();
+
+    tokio::task::spawn_blocking(move || {
+        let conn = conn.lock();
+        let mut stmt = conn
+            .prepare(
+                "SELECT ts, weight_after, delta, channel, context_id, event_id
+                 FROM weight_events
+                 WHERE namespace = ?1 AND atom_id = ?2
+                 ORDER BY ts ASC",
+            )
+            .map_err(|e| EngineError::Internal(format!("rank_history prepare: {e}")))?;
+
+        let rows = stmt
+            .query_map(params![namespace_str, atom_id_str], |row| {
+                let ts_us: i64 = row.get(0)?;
+                let weight_after: f64 = row.get(1)?;
+                let delta: f64 = row.get(2)?;
+                let channel: String = row.get(3)?;
+                let context_id: Option<String> = row.get(4)?;
+                let event_id_str: Option<String> = row.get(5)?;
+                Ok((
+                    ts_us,
+                    weight_after,
+                    delta,
+                    channel,
+                    context_id,
+                    event_id_str,
+                ))
+            })
+            .map_err(|e| EngineError::Internal(format!("rank_history query: {e}")))?;
+
+        let mut points = Vec::new();
+        for row in rows {
+            let (ts_us, weight_after, delta, channel, context_id, event_id_str) =
+                row.map_err(|e| EngineError::Internal(format!("rank_history row: {e}")))?;
+
+            let ts = DateTime::from_timestamp_micros(ts_us).unwrap_or_else(Utc::now);
+
+            let event_id = event_id_str.and_then(|s| s.parse::<Uuid>().ok());
+
+            points.push(RankHistoryPoint {
+                ts,
+                weight_after: weight_after as f32,
+                delta: delta as f32,
+                channel,
+                context_id,
+                event_id,
+            });
+        }
+
+        Ok(points)
+    })
+    .await
+    .map_err(|e| EngineError::Internal(format!("rank_history join: {e}")))?
+}
+
+// ---------------------------------------------------------------------------
+// regression_check — engine-dependent, gated
+// ---------------------------------------------------------------------------
+
+/// Re-run the query from a stored compose event against current weights.
+#[cfg(feature = "engine")]
+pub async fn regression_check(
+    engine: &EmbeddedEngine,
+    event_id: Uuid,
+) -> Result<RegressionReport, EngineError> {
+    // Step 1: load brain_events row.
+    // load_brain_event now returns InvalidData on malformed payload (B4 fix)
+    // and includes the stored embedding_model for validation (B5 fix).
+    let (query_text, original_top_atoms, namespace, created_at_us, stored_model) =
+        load_brain_event(engine, event_id).await?;
+
+    // Step 2 (B5 fix): validate embedding model compatibility.
+    //
+    // If the stored row recorded an embedding_model AND it differs from the
+    // engine's current model, the query re-embedding would produce a vector in
+    // a different space, making the resulting Jaccard meaningless.  We surface
+    // this as a distinct error so callers can skip or re-embed rather than
+    // silently reporting catastrophic drift.
+    //
+    // Legacy rows (stored_model == None, i.e. pre-Phase-2 events) are accepted
+    // with a warning — we cannot validate compatibility but also cannot reject
+    // all historical data.
+    if let Some(ref stored) = stored_model {
+        let current = engine.embedding_model();
+        if stored != current {
+            return Err(EngineError::IncompatibleEmbeddingModel {
+                stored: stored.clone(),
+                current: current.to_string(),
+            });
+        }
+    } else {
+        tracing::warn!(
+            event_id = %event_id,
+            "regression_check: brain_events row has no embedding_model (legacy row); \
+             proceeding without model compatibility check"
+        );
+    }
+
+    // Step 3: replay with current weights.
+    let current_top_atoms = replay(
+        engine,
+        &namespace,
+        &query_text,
+        None, // current weights
+        original_top_atoms.len().max(10),
+    )
+    .await?;
+
+    // Step 4: compute Jaccard.
+    let report = compute_diff_report(original_top_atoms.clone(), current_top_atoms.clone());
+
+    let timestamp_original =
+        DateTime::from_timestamp_micros(created_at_us).unwrap_or_else(Utc::now);
+
+    Ok(RegressionReport {
+        event_id,
+        query_text,
+        original_top_atoms,
+        current_top_atoms,
+        jaccard: report.jaccard,
+        added: report.added,
+        dropped: report.dropped,
+        timestamp_original,
+    })
+}
+
+/// Load a brain_events row and extract replay inputs. (engine-gated)
+#[cfg(feature = "engine")]
+async fn load_brain_event(
+    engine: &EmbeddedEngine,
+    event_id: Uuid,
+) -> Result<(String, Vec<Uuid>, String, i64, Option<String>), EngineError> {
+    // Use the legacy conn() path (Arc<Mutex<Connection>>) which is Send + Clone.
+    let conn = engine.store().conn();
+    let event_id_str = event_id.to_string();
+
+    tokio::task::spawn_blocking(move || {
+        let conn = conn.lock();
+        let guard = &*conn;
+
+        // Select query_text, payload (top_atoms lives in payload JSON),
+        // actor_id (our namespace proxy), created_at, and the embedding_model
+        // column added in migration v27.
+        let result = guard
+            .query_row(
+                "SELECT query_text, payload, actor_id, created_at, embedding_model
+                 FROM brain_events WHERE id = ?1",
+                params![event_id_str.clone()],
+                |row| {
+                    let query_text: Option<String> = row.get(0)?;
+                    let payload_str: String = row.get(1)?;
+                    let actor_id: Option<String> = row.get(2)?;
+                    let created_at: i64 = row.get(3)?;
+                    let embedding_model: Option<String> = row.get(4)?;
+                    Ok((
+                        query_text,
+                        payload_str,
+                        actor_id,
+                        created_at,
+                        embedding_model,
+                    ))
+                },
+            )
+            .optional()
+            .map_err(|e| EngineError::Internal(format!("load_brain_event query: {e}")))?;
+
+        let (query_text_opt, payload_str, actor_id_opt, created_at, stored_model) = result
+            .ok_or_else(|| {
+                EngineError::NotFound(format!("brain_events row not found: {event_id}"))
+            })?;
+
+        let query_text = query_text_opt.unwrap_or_default();
+
+        // B4 fix: propagate JSON parse errors instead of silently substituting
+        // `{}`, which would cause `top_atoms` to be empty and `regression_check`
+        // to report false 100% drift.
+        let payload: serde_json::Value = serde_json::from_str(&payload_str).map_err(|e| {
+            EngineError::InvalidData(format!(
+                "brain_events row {event_id} has unparseable payload JSON: {e}"
+            ))
+        })?;
+
+        // top_atoms in payload is an array of UUID strings.
+        let top_atoms: Vec<Uuid> = payload
+            .get("top_atoms")
+            .and_then(|v| v.as_array())
+            .map(|arr| {
+                arr.iter()
+                    .filter_map(|v| v.as_str().and_then(|s| s.parse::<Uuid>().ok()))
+                    .collect()
+            })
+            .unwrap_or_default();
+
+        // B4 fix (continued): an empty top_atoms list is also invalid data —
+        // it would produce a trivially-true jaccard=0 without indicating real drift.
+        if top_atoms.is_empty() {
+            return Err(EngineError::InvalidData(format!(
+                "brain_events row {event_id} has missing or empty top_atoms in payload"
+            )));
+        }
+
+        // namespace from payload field (most reliable) or actor_id column.
+        // Note: stored payload uses key "lambda_id" (legacy; kept for DB compat).
+        let namespace = payload
+            .get("lambda_id")
+            .or_else(|| payload.get("namespace"))
+            .and_then(|v| v.as_str())
+            .map(|s| s.to_string())
+            .or(actor_id_opt)
+            .unwrap_or_default();
+
+        Ok((query_text, top_atoms, namespace, created_at, stored_model))
+    })
+    .await
+    .map_err(|e| EngineError::Internal(format!("load_brain_event join: {e}")))?
+}
+
+// ---------------------------------------------------------------------------
+// Drift Metrics
+// ---------------------------------------------------------------------------
+
+/// Drift metrics for the Three Observables feedback loop.
+pub mod metrics {
+    use super::*;
+
+    /// M1: Rolling 7-day median Jaccard stability. (engine-gated)
+    #[cfg(feature = "engine")]
+    pub async fn jaccard_stability_7d(
+        engine: &EmbeddedEngine,
+        namespace: &str,
+    ) -> Result<f32, EngineError> {
+        let conn = engine.store().conn();
+        // brain_events.payload stores namespace under legacy key "lambda_id" (#2536).
+        // The JSON key cannot be renamed without a data migration; the column name
+        // was already `namespace` in v25 when the table was created.
+        let namespace_str = namespace.to_string();
+
+        // Collect event IDs from the last 7 days where actor_id matches namespace.
+        let event_ids: Vec<Uuid> = {
+            let conn = Arc::clone(&conn);
+            tokio::task::spawn_blocking(move || {
+                let c = conn.lock();
+                let cutoff_us = (Utc::now() - chrono::Duration::days(7)).timestamp_micros();
+                let mut stmt = c
+                    .prepare(
+                        "SELECT id FROM brain_events
+                         WHERE kind = 'ComposeEvent'
+                           AND created_at >= ?1
+                           AND json_extract(payload, '$.lambda_id') = ?2
+                         ORDER BY created_at DESC",
+                    )
+                    .map_err(|e| {
+                        EngineError::Internal(format!("jaccard_stability_7d prepare: {e}"))
+                    })?;
+
+                let rows = stmt
+                    .query_map(params![cutoff_us, namespace_str], |row| {
+                        row.get::<_, String>(0)
+                    })
+                    .map_err(|e| {
+                        EngineError::Internal(format!("jaccard_stability_7d query: {e}"))
+                    })?;
+
+                let ids: Vec<Uuid> = rows
+                    .filter_map(|r| r.ok())
+                    .filter_map(|s| s.parse::<Uuid>().ok())
+                    .collect();
+                Ok::<Vec<Uuid>, EngineError>(ids)
+            })
+            .await
+            .map_err(|e| EngineError::Internal(format!("jaccard_stability_7d join: {e}")))??
+        };
+
+        if event_ids.is_empty() {
+            return Ok(1.0);
+        }
+
+        // Run regression_check on each event; collect Jaccard values.
+        let mut jaccards: Vec<f32> = Vec::new();
+        for eid in event_ids {
+            match regression_check(engine, eid).await {
+                Ok(report) => jaccards.push(report.jaccard),
+                Err(_) => {
+                    // Non-fatal: skip events that fail to replay (e.g., empty query).
+                    continue;
+                }
+            }
+        }
+
+        if jaccards.is_empty() {
+            return Ok(1.0);
+        }
+
+        // Median (sort + mid point).
+        jaccards.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
+        let mid = jaccards.len() / 2;
+        let median = if jaccards.len() % 2 == 0 {
+            (jaccards[mid - 1] + jaccards[mid]) / 2.0
+        } else {
+            jaccards[mid]
+        };
+
+        Ok(median)
+    }
+
+    /// M2: Rank variance for an atom across all compose events where it appeared.
+    ///
+    /// High variance = context-sensitive atom; low variance = reliably ranked.
+    /// Variance is computed over the 0-indexed rank positions in `top_atoms`
+    /// arrays stored in `brain_events.payload`.
+    ///
+    /// Returns 0.0 when the atom has appeared in fewer than 2 events.
+    pub async fn atom_rank_variance(
+        conn: &Arc<Mutex<Connection>>,
+        namespace: &str,
+        atom_id: Uuid,
+    ) -> Result<f32, EngineError> {
+        let conn = Arc::clone(conn);
+        let atom_id_str = atom_id.to_string();
+        let namespace_str = namespace.to_string();
+
+        tokio::task::spawn_blocking(move || {
+            let c = conn.lock();
+            let mut stmt = c
+                .prepare(
+                    "SELECT payload FROM brain_events
+                     WHERE kind = 'ComposeEvent'
+                       AND json_extract(payload, '$.lambda_id') = ?1",
+                )
+                .map_err(|e| EngineError::Internal(format!("atom_rank_variance prepare: {e}")))?;
+
+            let rows = stmt
+                .query_map(params![namespace_str], |row| row.get::<_, String>(0))
+                .map_err(|e| EngineError::Internal(format!("atom_rank_variance query: {e}")))?;
+
+            let mut ranks: Vec<f32> = Vec::new();
+            for row in rows.filter_map(|r| r.ok()) {
+                let payload: serde_json::Value =
+                    serde_json::from_str(&row).unwrap_or(serde_json::json!({}));
+                if let Some(top_atoms) = payload.get("top_atoms").and_then(|v| v.as_array()) {
+                    if let Some(pos) = top_atoms
+                        .iter()
+                        .position(|v| v.as_str() == Some(&atom_id_str))
+                    {
+                        ranks.push(pos as f32);
+                    }
+                }
+            }
+
+            if ranks.len() < 2 {
+                return Ok(0.0_f32);
+            }
+
+            let mean = ranks.iter().sum::<f32>() / ranks.len() as f32;
+            let variance =
+                ranks.iter().map(|r| (r - mean).powi(2)).sum::<f32>() / ranks.len() as f32;
+            Ok(variance)
+        })
+        .await
+        .map_err(|e| EngineError::Internal(format!("atom_rank_variance join: {e}")))?
+    }
+
+    /// M3: Count of weight_events per calendar day over the last `days` days.
+    ///
+    /// A sudden spike in adjustment rate signals a potential runaway feedback loop.
+    /// Returns a vec of `(NaiveDate, count)` sorted by date ascending.
+    pub async fn adjustment_rate_per_day(
+        conn: &Arc<Mutex<Connection>>,
+        namespace: &str,
+        days: u32,
+    ) -> Result<Vec<(NaiveDate, u64)>, EngineError> {
+        let conn = Arc::clone(conn);
+        let namespace_str = namespace.to_string();
+        let days_i64 = days as i64;
+
+        tokio::task::spawn_blocking(move || {
+            let c = conn.lock();
+            let cutoff_us = (Utc::now() - chrono::Duration::days(days_i64)).timestamp_micros();
+
+            let mut stmt = c
+                .prepare(
+                    // SQLite: integer division gives day bucket (micros / 86_400_000_000).
+                    "SELECT ts / 86400000000 AS day_bucket, COUNT(*) as cnt
+                     FROM weight_events
+                     WHERE namespace = ?1 AND ts >= ?2
+                     GROUP BY day_bucket
+                     ORDER BY day_bucket ASC",
+                )
+                .map_err(|e| {
+                    EngineError::Internal(format!("adjustment_rate_per_day prepare: {e}"))
+                })?;
+
+            let rows = stmt
+                .query_map(params![namespace_str, cutoff_us], |row| {
+                    let day_bucket: i64 = row.get(0)?;
+                    let cnt: i64 = row.get(1)?;
+                    Ok((day_bucket, cnt as u64))
+                })
+                .map_err(|e| {
+                    EngineError::Internal(format!("adjustment_rate_per_day query: {e}"))
+                })?;
+
+            let mut result = Vec::new();
+            for row in rows.filter_map(|r| r.ok()) {
+                let (day_bucket, cnt) = row;
+                // day_bucket = days since Unix epoch.
+                // NaiveDate::from_num_days_from_ce expects days from year 1, so offset.
+                // Unix epoch (1970-01-01) = day 719_163 in from_num_days_from_ce.
+                const UNIX_EPOCH_CE_DAYS: i32 = 719_163;
+                let date =
+                    NaiveDate::from_num_days_from_ce_opt(UNIX_EPOCH_CE_DAYS + day_bucket as i32)
+                        .unwrap_or(NaiveDate::from_ymd_opt(1970, 1, 1).unwrap());
+                result.push((date, cnt));
+            }
+
+            Ok(result)
+        })
+        .await
+        .map_err(|e| EngineError::Internal(format!("adjustment_rate_per_day join: {e}")))?
+    }
+}
+
+// ---------------------------------------------------------------------------
+// Unit tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use khive_db::SqliteStore;
+
+    fn make_conn() -> Arc<Mutex<Connection>> {
+        let store = SqliteStore::memory().expect("in-memory store");
+        store.conn()
+    }
+
+    fn insert_weight_event(
+        conn: &Arc<Mutex<Connection>>,
+        namespace: &str,
+        atom_id: &str,
+        weight_after: f32,
+        ts_us: i64,
+    ) {
+        let c = conn.lock();
+        c.execute(
+            "INSERT INTO weight_events (namespace, atom_id, delta, weight_after, channel, eta, ts)
+             VALUES (?1, ?2, 0.1, ?3, 'explicit', 0.1, ?4)",
+            params![namespace, atom_id, weight_after as f64, ts_us],
+        )
+        .expect("insert weight_event");
+    }
+
+    #[tokio::test]
+    async fn test_weights_as_of_returns_snapshot_at_time() {
+        let conn = make_conn();
+        let lambda = "lambda:test";
+        let atom = Uuid::new_v4();
+        let atom_str = atom.to_string();
+
+        // t0: weight 1.5
+        let t0_us: i64 = 1_000_000_000;
+        insert_weight_event(&conn, lambda, &atom_str, 1.5, t0_us);
+
+        // t1: weight 2.5 (later)
+        let t1_us: i64 = 2_000_000_000;
+        insert_weight_event(&conn, lambda, &atom_str, 2.5, t1_us);
+
+        // Query at t0 + 1: should see 1.5.
+        let at_t0 = DateTime::from_timestamp_micros(t0_us + 1).unwrap();
+        let snapshot = weights_as_of(&conn, lambda, at_t0)
+            .await
+            .expect("weights_as_of");
+        let w = *snapshot.get(&atom).expect("atom must be in snapshot");
+        assert!((w - 1.5).abs() < 0.01, "expected 1.5 at t0, got {w}");
+
+        // Query at t1 + 1: should see 2.5.
+        let at_t1 = DateTime::from_timestamp_micros(t1_us + 1).unwrap();
+        let snapshot2 = weights_as_of(&conn, lambda, at_t1)
+            .await
+            .expect("weights_as_of at t1");
+        let w2 = *snapshot2
+            .get(&atom)
+            .expect("atom must be in snapshot at t1");
+        assert!((w2 - 2.5).abs() < 0.01, "expected 2.5 at t1, got {w2}");
+    }
+
+    #[tokio::test]
+    async fn test_weights_as_of_before_any_event_is_empty() {
+        let conn = make_conn();
+        let lambda = "lambda:test";
+        let atom = Uuid::new_v4();
+        let atom_str = atom.to_string();
+
+        let t1_us: i64 = 2_000_000_000;
+        insert_weight_event(&conn, lambda, &atom_str, 2.0, t1_us);
+
+        // Query before t1: no rows.
+        let before = DateTime::from_timestamp_micros(t1_us - 1).unwrap();
+        let snapshot = weights_as_of(&conn, lambda, before)
+            .await
+            .expect("weights_as_of");
+        assert!(
+            snapshot.is_empty(),
+            "snapshot before any event should be empty"
+        );
+    }
+
+    #[tokio::test]
+    async fn test_rank_history_returns_ordered_events() {
+        let conn = make_conn();
+        let lambda = "lambda:rank_hist";
+        let atom = Uuid::new_v4();
+        let atom_str = atom.to_string();
+
+        insert_weight_event(&conn, lambda, &atom_str, 1.2, 1_000);
+        insert_weight_event(&conn, lambda, &atom_str, 1.4, 2_000);
+        insert_weight_event(&conn, lambda, &atom_str, 1.1, 3_000);
+
+        let history = rank_history(&conn, lambda, atom)
+            .await
+            .expect("rank_history");
+
+        assert_eq!(history.len(), 3, "expected 3 history points");
+        // Verify ascending timestamp order.
+        assert!(history[0].ts <= history[1].ts);
+        assert!(history[1].ts <= history[2].ts);
+        // Verify weights.
+        assert!((history[0].weight_after - 1.2).abs() < 0.01);
+        assert!((history[1].weight_after - 1.4).abs() < 0.01);
+        assert!((history[2].weight_after - 1.1).abs() < 0.01);
+    }
+
+    #[test]
+    fn test_compute_diff_report_jaccard() {
+        let t1 = vec![
+            Uuid::parse_str("00000000-0000-0000-0000-000000000001").unwrap(),
+            Uuid::parse_str("00000000-0000-0000-0000-000000000002").unwrap(),
+            Uuid::parse_str("00000000-0000-0000-0000-000000000003").unwrap(),
+        ];
+        let t2 = vec![
+            Uuid::parse_str("00000000-0000-0000-0000-000000000002").unwrap(),
+            Uuid::parse_str("00000000-0000-0000-0000-000000000003").unwrap(),
+            Uuid::parse_str("00000000-0000-0000-0000-000000000004").unwrap(),
+        ];
+
+        let report = compute_diff_report(t1, t2);
+
+        // |intersection| = 2 ({002, 003}), |union| = 4
+        assert!(
+            (report.jaccard - 0.5).abs() < 0.01,
+            "jaccard={}",
+            report.jaccard
+        );
+        assert_eq!(report.added.len(), 1, "one atom added");
+        assert_eq!(report.dropped.len(), 1, "one atom dropped");
+    }
+
+    #[test]
+    fn test_compute_diff_report_identical() {
+        let ids: Vec<Uuid> = (1..=3)
+            .map(|i| Uuid::parse_str(&format!("00000000-0000-0000-0000-{:012}", i)).unwrap())
+            .collect();
+
+        let report = compute_diff_report(ids.clone(), ids);
+        assert!((report.jaccard - 1.0).abs() < 0.001);
+        assert!(report.added.is_empty());
+        assert!(report.dropped.is_empty());
+    }
+
+    #[test]
+    fn test_compute_diff_report_disjoint() {
+        let t1 = vec![Uuid::parse_str("00000000-0000-0000-0000-000000000001").unwrap()];
+        let t2 = vec![Uuid::parse_str("00000000-0000-0000-0000-000000000002").unwrap()];
+
+        let report = compute_diff_report(t1, t2);
+        assert!((report.jaccard - 0.0).abs() < 0.001);
+        assert_eq!(report.added.len(), 1);
+        assert_eq!(report.dropped.len(), 1);
+    }
+
+    #[tokio::test]
+    async fn test_adjustment_rate_per_day() {
+        let conn = make_conn();
+        let lambda = "lambda:rate_test";
+        let atom = Uuid::new_v4();
+        let atom_str = atom.to_string();
+
+        // Insert 3 events: 2 "today" and 1 "yesterday".
+        let now_us = Utc::now().timestamp_micros();
+        let yesterday_us = now_us - 86_400_000_001_i64; // slightly over 24h ago
+
+        insert_weight_event(&conn, lambda, &atom_str, 1.1, now_us - 100);
+        insert_weight_event(&conn, lambda, &atom_str, 1.2, now_us - 50);
+        insert_weight_event(&conn, lambda, &atom_str, 1.3, yesterday_us);
+
+        let rates = metrics::adjustment_rate_per_day(&conn, lambda, 7)
+            .await
+            .expect("adjustment_rate_per_day");
+
+        // At least 2 buckets (today and yesterday within 7 days).
+        assert!(
+            rates.len() >= 1,
+            "expected at least 1 day bucket, got {:?}",
+            rates
+        );
+        // Sum of all counts should be 3.
+        let total: u64 = rates.iter().map(|(_, c)| c).sum();
+        assert_eq!(total, 3, "expected 3 total events");
+    }
+}
diff --git a/crates/khive-retrieval/src/replay/mod.rs b/crates/khive-retrieval/src/replay/mod.rs
new file mode 100644
index 00000000..0c90700b
--- /dev/null
+++ b/crates/khive-retrieval/src/replay/mod.rs
@@ -0,0 +1,5 @@
+//! Temporal replay APIs for retrieval weight analysis.
+
+pub mod engine_replay;
+
+pub use engine_replay::*;
diff --git a/crates/khive-retrieval/src/search_config.rs b/crates/khive-retrieval/src/search_config.rs
new file mode 100644
index 00000000..3dd3de84
--- /dev/null
+++ b/crates/khive-retrieval/src/search_config.rs
@@ -0,0 +1,253 @@
+//! Tunable hybrid search configuration — Brain Phase 7 substrate.
+//!
+//! `SearchConfig` is a per-call configuration that controls how vector and
+//! keyword results are retrieved and fused. It is the public API surface for
+//! `recall()` and compose's internal search phase.
+//!
+//! # Defaults (backward-compatible)
+//!
+//! `SearchConfig::default()` is designed to produce **identical results** to
+//! the pre-Phase-7 hardcoded search behavior: RRF with k=60, top_k=10,
+//! no min_score filter. Existing callers that do not supply a `SearchConfig`
+//! get the same behavior as before.
+//!
+//! # Presets
+//!
+//! | Preset | Strategy | vector_weight |
+//! |--------|----------|---------------|
+//! | `default()` / `hybrid_balanced()` | RRF (k=60) | 0.5 |
+//! | `vector_only()` | VectorOnly | 1.0 |
+//! | `keyword_only()` | KeywordOnly | 0.0 |
+//!
+//! # Usage in recall
+//!
+//! ```rust,ignore
+//! let opts = RecallOptions {
+//!     query: "metal inference kernel".to_string(),
+//!     search: Some(SearchConfig::vector_only()),
+//!     ..Default::default()
+//! };
+//! service.recall(opts).await?;
+//! ```
+
+use serde::{Deserialize, Serialize};
+
+use khive_fusion::{FusionStrategy, DEFAULT_RRF_K};
+
+/// Per-call configuration for hybrid search retrieval and fusion.
+///
+/// Added to `RecallOptions` and `ComposeOptions` as `search: Option<SearchConfig>`.
+/// When `None`, callers receive identical behavior to pre-Phase-7 code.
+#[derive(Clone, Debug, Serialize, Deserialize)]
+pub struct SearchConfig {
+    /// Maximum number of results to return.
+    ///
+    /// Default: 10.
+    #[serde(default = "default_top_k")]
+    pub top_k: usize,
+
+    /// Candidate pool multiplier over `top_k`.
+    ///
+    /// The retriever fetches `top_k * candidate_pool_multiplier` candidates
+    /// before fusion and reranking. Higher values improve recall quality at
+    /// the cost of more computation.
+    ///
+    /// Default: 3.
+    #[serde(default = "default_multiplier")]
+    pub candidate_pool_multiplier: usize,
+
+    /// Fusion strategy for combining vector and keyword result lists.
+    ///
+    /// Default: RRF with k=60.
+    #[serde(default = "default_fusion")]
+    pub fusion_strategy: FusionStrategy,
+
+    /// Weight for vector search in weighted fusion (0.0 to 1.0).
+    ///
+    /// Only used when `fusion_strategy` is `Weighted`. Keyword weight is
+    /// implicitly `1.0 - vector_weight`.
+    ///
+    /// Default: 0.5 (balanced).
+    #[serde(default = "default_vector_weight")]
+    pub vector_weight: f64,
+
+    /// Minimum score threshold.
+    ///
+    /// Results with a final score below this value are filtered out.
+    /// When `None`, no threshold is applied.
+    ///
+    /// Default: None.
+    #[serde(default)]
+    pub min_score: Option<f64>,
+}
+
+fn default_top_k() -> usize {
+    10
+}
+
+fn default_multiplier() -> usize {
+    3
+}
+
+fn default_fusion() -> FusionStrategy {
+    FusionStrategy::Rrf { k: DEFAULT_RRF_K }
+}
+
+fn default_vector_weight() -> f64 {
+    0.5
+}
+
+impl Default for SearchConfig {
+    fn default() -> Self {
+        Self {
+            top_k: default_top_k(),
+            candidate_pool_multiplier: default_multiplier(),
+            fusion_strategy: default_fusion(),
+            vector_weight: default_vector_weight(),
+            min_score: None,
+        }
+    }
+}
+
+impl SearchConfig {
+    /// Preset: skip BM25 entirely, return only vector search results.
+    ///
+    /// Use when keyword search degrades quality (e.g., short queries, code search).
+    pub fn vector_only() -> Self {
+        Self {
+            top_k: default_top_k(),
+            candidate_pool_multiplier: 1,
+            fusion_strategy: FusionStrategy::VectorOnly,
+            vector_weight: 1.0,
+            min_score: None,
+        }
+    }
+
+    /// Preset: skip HNSW entirely, return only BM25 keyword results.
+    ///
+    /// Use for exact-match retrieval (e.g., medication names, identifiers).
+    pub fn keyword_only() -> Self {
+        Self {
+            top_k: default_top_k(),
+            candidate_pool_multiplier: 1,
+            fusion_strategy: FusionStrategy::KeywordOnly,
+            vector_weight: 0.0,
+            min_score: None,
+        }
+    }
+
+    /// Preset: balanced hybrid search using RRF with k=60.
+    ///
+    /// Equivalent to `SearchConfig::default()`. Combines vector and keyword
+    /// results with equal weight using Reciprocal Rank Fusion.
+    pub fn hybrid_balanced() -> Self {
+        Self::default()
+    }
+
+    /// Set a custom top_k.
+    #[must_use]
+    pub fn with_top_k(mut self, top_k: usize) -> Self {
+        self.top_k = top_k;
+        self
+    }
+
+    /// Set the candidate pool multiplier.
+    #[must_use]
+    pub fn with_candidate_pool_multiplier(mut self, multiplier: usize) -> Self {
+        self.candidate_pool_multiplier = multiplier;
+        self
+    }
+
+    /// Set a minimum score filter.
+    #[must_use]
+    pub fn with_min_score(mut self, min: f64) -> Self {
+        self.min_score = Some(min);
+        self
+    }
+
+    /// Compute the candidate pool size from `top_k * candidate_pool_multiplier`.
+    pub fn candidate_pool_size(&self) -> usize {
+        self.top_k * self.candidate_pool_multiplier.max(1)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_default_config() {
+        let cfg = SearchConfig::default();
+        assert_eq!(cfg.top_k, 10);
+        assert_eq!(cfg.candidate_pool_multiplier, 3);
+        assert!((cfg.vector_weight - 0.5).abs() < f64::EPSILON);
+        assert!(cfg.min_score.is_none());
+        assert!(matches!(cfg.fusion_strategy, FusionStrategy::Rrf { k: 60 }));
+    }
+
+    #[test]
+    fn test_vector_only_preset() {
+        let cfg = SearchConfig::vector_only();
+        assert!(matches!(cfg.fusion_strategy, FusionStrategy::VectorOnly));
+        assert!((cfg.vector_weight - 1.0).abs() < f64::EPSILON);
+        assert_eq!(cfg.candidate_pool_multiplier, 1);
+    }
+
+    #[test]
+    fn test_keyword_only_preset() {
+        let cfg = SearchConfig::keyword_only();
+        assert!(matches!(cfg.fusion_strategy, FusionStrategy::KeywordOnly));
+        assert!((cfg.vector_weight - 0.0).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_hybrid_balanced_is_default() {
+        let balanced = SearchConfig::hybrid_balanced();
+        let default = SearchConfig::default();
+        assert_eq!(balanced.top_k, default.top_k);
+        assert_eq!(
+            balanced.candidate_pool_multiplier,
+            default.candidate_pool_multiplier
+        );
+        assert!((balanced.vector_weight - default.vector_weight).abs() < f64::EPSILON);
+    }
+
+    #[test]
+    fn test_candidate_pool_size() {
+        let cfg = SearchConfig::default();
+        assert_eq!(cfg.candidate_pool_size(), 30); // 10 * 3
+
+        let cfg = SearchConfig::vector_only().with_top_k(5);
+        assert_eq!(cfg.candidate_pool_size(), 5); // 5 * 1
+    }
+
+    #[test]
+    fn test_builder_methods() {
+        let cfg = SearchConfig::default()
+            .with_top_k(20)
+            .with_candidate_pool_multiplier(5)
+            .with_min_score(0.3);
+        assert_eq!(cfg.top_k, 20);
+        assert_eq!(cfg.candidate_pool_multiplier, 5);
+        assert_eq!(cfg.min_score, Some(0.3));
+        assert_eq!(cfg.candidate_pool_size(), 100);
+    }
+
+    #[test]
+    fn test_serde_roundtrip() {
+        let cfg = SearchConfig {
+            top_k: 15,
+            candidate_pool_multiplier: 4,
+            fusion_strategy: FusionStrategy::Weighted {
+                weights: vec![0.7, 0.3],
+            },
+            vector_weight: 0.7,
+            min_score: Some(0.1),
+        };
+        let json = serde_json::to_string(&cfg).unwrap();
+        let back: SearchConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(back.top_k, 15);
+        assert_eq!(back.candidate_pool_multiplier, 4);
+        assert_eq!(back.min_score, Some(0.1));
+    }
+}
diff --git a/crates/khive-retrieval/src/timeout.rs b/crates/khive-retrieval/src/timeout.rs
new file mode 100644
index 00000000..1f3ba20b
--- /dev/null
+++ b/crates/khive-retrieval/src/timeout.rs
@@ -0,0 +1,435 @@
+//! Timeout and cancellation support for search operations.
+//!
+//! Provides utilities for wrapping search futures with timeout and cancellation
+//! semantics. Uses `tokio::time::timeout` for deadline enforcement and
+//! `tokio_util::sync::CancellationToken` for cooperative cancellation.
+//!
+//! # Design
+//!
+//! Timeout and cancellation are applied at the search entry points (hybrid search,
+//! graph traversal) rather than at every internal function call. This keeps the
+//! internal algorithms clean while providing operational safety at the boundaries.
+//!
+//! # Usage
+//!
+//! ```rust,ignore
+//! use std::time::Duration;
+//! use khive_retrieval::timeout::{search_with_timeout, search_with_cancellation};
+//! use tokio_util::sync::CancellationToken;
+//!
+//! // Timeout: cancel if search takes longer than 5 seconds
+//! let results = search_with_timeout(
+//!     searcher.hybrid_search(&query, &config),
+//!     Duration::from_secs(5),
+//! ).await?;
+//!
+//! // Cancellation: cancel via token (e.g., from a request handler)
+//! let token = CancellationToken::new();
+//! let results = search_with_cancellation(
+//!     searcher.hybrid_search(&query, &config),
+//!     token.clone(),
+//! ).await?;
+//!
+//! // From another task:
+//! token.cancel();
+//! ```
+//!
+//! See also: [`HybridConfig::timeout`] for declarative timeout configuration.
+
+use std::future::Future;
+use std::time::Duration;
+
+use tokio_util::sync::CancellationToken;
+
+use crate::error::{Result, RetrievalError};
+
+/// Execute a search future with a timeout.
+///
+/// Wraps the given future with `tokio::time::timeout`. If the future does not
+/// complete within the specified duration, returns [`RetrievalError::QueryTimeout`].
+///
+/// # Arguments
+///
+/// * `future` - The search operation to execute
+/// * `duration` - Maximum time to wait for completion
+///
+/// # Returns
+///
+/// The search result if completed within the timeout, or `QueryTimeout` error.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use std::time::Duration;
+/// use khive_retrieval::timeout::search_with_timeout;
+///
+/// let results = search_with_timeout(
+///     searcher.hybrid_search(&query, &config),
+///     Duration::from_secs(5),
+/// ).await?;
+/// ```
+pub async fn search_with_timeout<F, T>(future: F, duration: Duration) -> Result<T>
+where
+    F: Future<Output = Result<T>>,
+{
+    match tokio::time::timeout(duration, future).await {
+        Ok(result) => result,
+        Err(_elapsed) => Err(RetrievalError::QueryTimeout {
+            elapsed_ms: duration.as_millis() as u64,
+        }),
+    }
+}
+
+/// Execute a search future with an optional timeout.
+///
+/// If `timeout` is `Some`, wraps the future with [`search_with_timeout`].
+/// If `None`, executes the future directly without timeout.
+///
+/// This is a convenience function for use with [`HybridConfig::timeout`].
+///
+/// # Arguments
+///
+/// * `future` - The search operation to execute
+/// * `timeout` - Optional maximum time to wait
+///
+/// # Returns
+///
+/// The search result, or `QueryTimeout` if the timeout elapsed.
+pub async fn search_with_optional_timeout<F, T>(future: F, timeout: Option<Duration>) -> Result<T>
+where
+    F: Future<Output = Result<T>>,
+{
+    match timeout {
+        Some(duration) => search_with_timeout(future, duration).await,
+        None => future.await,
+    }
+}
+
+/// Execute a search future with a cancellation token.
+///
+/// Uses `tokio::select!` to race the search future against the cancellation token.
+/// If the token is cancelled before the search completes, returns
+/// [`RetrievalError::QueryCancelled`].
+///
+/// # Arguments
+///
+/// * `future` - The search operation to execute
+/// * `token` - Cancellation token to observe
+///
+/// # Returns
+///
+/// The search result if completed before cancellation, or `QueryCancelled` error.
+///
+/// # Example
+///
+/// ```rust,ignore
+/// use tokio_util::sync::CancellationToken;
+/// use khive_retrieval::timeout::search_with_cancellation;
+///
+/// let token = CancellationToken::new();
+/// let token_clone = token.clone();
+///
+/// // Spawn a task that cancels after 1 second
+/// tokio::spawn(async move {
+///     tokio::time::sleep(Duration::from_secs(1)).await;
+///     token_clone.cancel();
+/// });
+///
+/// let results = search_with_cancellation(
+///     searcher.hybrid_search(&query, &config),
+///     token,
+/// ).await?;
+/// ```
+pub async fn search_with_cancellation<F, T>(future: F, token: CancellationToken) -> Result<T>
+where
+    F: Future<Output = Result<T>>,
+{
+    tokio::select! {
+        result = future => result,
+        _ = token.cancelled() => Err(RetrievalError::QueryCancelled),
+    }
+}
+
+/// Execute a search future with both timeout and optional cancellation.
+///
+/// Combines timeout and cancellation into a single wrapper. The search will
+/// be terminated if either:
+/// - The timeout duration elapses (`QueryTimeout`)
+/// - The cancellation token is triggered (`QueryCancelled`)
+/// - The search completes normally
+///
+/// # Arguments
+///
+/// * `future` - The search operation to execute
+/// * `timeout` - Optional maximum time to wait
+/// * `cancel` - Optional cancellation token to observe
+///
+/// # Returns
+///
+/// The search result, or an appropriate error if timed out or cancelled.
+pub async fn search_with_deadline<F, T>(
+    future: F,
+    timeout: Option<Duration>,
+    cancel: Option<CancellationToken>,
+) -> Result<T>
+where
+    F: Future<Output = Result<T>>,
+{
+    match (timeout, cancel) {
+        (Some(duration), Some(token)) => {
+            tokio::select! {
+                result = tokio::time::timeout(duration, future) => {
+                    match result {
+                        Ok(inner) => inner,
+                        Err(_elapsed) => Err(RetrievalError::QueryTimeout {
+                            elapsed_ms: duration.as_millis() as u64,
+                        }),
+                    }
+                }
+                _ = token.cancelled() => Err(RetrievalError::QueryCancelled),
+            }
+        }
+        (Some(duration), None) => search_with_timeout(future, duration).await,
+        (None, Some(token)) => search_with_cancellation(future, token).await,
+        (None, None) => future.await,
+    }
+}
+
+/// Serde support for `Option<Duration>` as milliseconds.
+///
+/// Serializes `Duration` as `u64` milliseconds for JSON compatibility.
+pub(crate) mod serde_opt_duration {
+    use serde::{Deserialize, Deserializer, Serialize, Serializer};
+    use std::time::Duration;
+
+    /// Intermediate representation for serde.
+    #[derive(Serialize, Deserialize)]
+    struct DurationMs(u64);
+
+    /// Serialize `Option<Duration>` as optional milliseconds.
+    pub fn serialize<S>(value: &Option<Duration>, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match value {
+            Some(d) => DurationMs(d.as_millis() as u64).serialize(serializer),
+            None => serializer.serialize_none(),
+        }
+    }
+
+    /// Deserialize `Option<Duration>` from optional milliseconds.
+    pub fn deserialize<'de, D>(deserializer: D) -> Result<Option<Duration>, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let opt: Option<DurationMs> = Option::deserialize(deserializer)?;
+        Ok(opt.map(|ms| Duration::from_millis(ms.0)))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use std::time::Duration;
+
+    #[tokio::test]
+    async fn test_search_with_timeout_completes() {
+        // A future that completes immediately
+        let future = async { Ok::<_, RetrievalError>(vec![1, 2, 3]) };
+        let result = search_with_timeout(future, Duration::from_secs(5)).await;
+        assert!(result.is_ok());
+        assert_eq!(result.unwrap(), vec![1, 2, 3]);
+    }
+
+    #[tokio::test]
+    async fn test_search_with_timeout_expires() {
+        // A future that takes too long
+        let future = async {
+            tokio::time::sleep(Duration::from_secs(10)).await;
+            Ok::<_, RetrievalError>(vec![1, 2, 3])
+        };
+        let result = search_with_timeout(future, Duration::from_millis(50)).await;
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(matches!(err, RetrievalError::QueryTimeout { .. }));
+        assert!(err.is_transient());
+    }
+
+    #[tokio::test]
+    async fn test_search_with_timeout_propagates_error() {
+        // A future that fails with a different error
+        let future = async { Err::<Vec<i32>, _>(RetrievalError::invalid_query("bad query")) };
+        let result = search_with_timeout(future, Duration::from_secs(5)).await;
+        assert!(result.is_err());
+        assert!(matches!(
+            result.unwrap_err(),
+            RetrievalError::InvalidQuery(_)
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_search_with_optional_timeout_none() {
+        // No timeout means direct execution
+        let future = async { Ok::<_, RetrievalError>(42) };
+        let result = search_with_optional_timeout(future, None).await;
+        assert_eq!(result.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn test_search_with_optional_timeout_some() {
+        // With timeout, same as search_with_timeout
+        let future = async {
+            tokio::time::sleep(Duration::from_secs(10)).await;
+            Ok::<_, RetrievalError>(42)
+        };
+        let result = search_with_optional_timeout(future, Some(Duration::from_millis(50))).await;
+        assert!(matches!(
+            result.unwrap_err(),
+            RetrievalError::QueryTimeout { .. }
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_search_with_cancellation_completes() {
+        let token = CancellationToken::new();
+        let future = async { Ok::<_, RetrievalError>(vec![1, 2, 3]) };
+        let result = search_with_cancellation(future, token).await;
+        assert!(result.is_ok());
+    }
+
+    #[tokio::test]
+    async fn test_search_with_cancellation_cancelled() {
+        let token = CancellationToken::new();
+        let token_clone = token.clone();
+
+        // Cancel immediately
+        token_clone.cancel();
+
+        let future = async {
+            tokio::time::sleep(Duration::from_secs(10)).await;
+            Ok::<_, RetrievalError>(vec![1, 2, 3])
+        };
+        let result = search_with_cancellation(future, token).await;
+        assert!(result.is_err());
+        let err = result.unwrap_err();
+        assert!(matches!(err, RetrievalError::QueryCancelled));
+        assert!(err.is_transient());
+    }
+
+    #[tokio::test]
+    async fn test_search_with_cancellation_delayed() {
+        let token = CancellationToken::new();
+        let token_clone = token.clone();
+
+        // Cancel after a short delay
+        tokio::spawn(async move {
+            tokio::time::sleep(Duration::from_millis(20)).await;
+            token_clone.cancel();
+        });
+
+        let future = async {
+            tokio::time::sleep(Duration::from_secs(10)).await;
+            Ok::<_, RetrievalError>(vec![1, 2, 3])
+        };
+        let result = search_with_cancellation(future, token).await;
+        assert!(matches!(
+            result.unwrap_err(),
+            RetrievalError::QueryCancelled
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_search_with_deadline_timeout_and_cancel() {
+        let token = CancellationToken::new();
+
+        // Timeout fires first (50ms vs 10s sleep)
+        let future = async {
+            tokio::time::sleep(Duration::from_secs(10)).await;
+            Ok::<_, RetrievalError>(42)
+        };
+        let result =
+            search_with_deadline(future, Some(Duration::from_millis(50)), Some(token)).await;
+        assert!(matches!(
+            result.unwrap_err(),
+            RetrievalError::QueryTimeout { .. }
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_search_with_deadline_cancel_fires_first() {
+        let token = CancellationToken::new();
+        let token_clone = token.clone();
+
+        // Cancel immediately, timeout is long
+        token_clone.cancel();
+
+        let future = async {
+            tokio::time::sleep(Duration::from_secs(10)).await;
+            Ok::<_, RetrievalError>(42)
+        };
+        let result = search_with_deadline(future, Some(Duration::from_secs(60)), Some(token)).await;
+        assert!(matches!(
+            result.unwrap_err(),
+            RetrievalError::QueryCancelled
+        ));
+    }
+
+    #[tokio::test]
+    async fn test_search_with_deadline_neither() {
+        // No timeout, no cancellation: direct execution
+        let future = async { Ok::<_, RetrievalError>(42) };
+        let result = search_with_deadline(future, None, None).await;
+        assert_eq!(result.unwrap(), 42);
+    }
+
+    #[tokio::test]
+    async fn test_timeout_error_display() {
+        let err = RetrievalError::query_timeout(5000);
+        assert_eq!(err.to_string(), "query timed out after 5000ms");
+    }
+
+    #[tokio::test]
+    async fn test_cancelled_error_display() {
+        let err = RetrievalError::query_cancelled();
+        assert_eq!(err.to_string(), "query cancelled");
+    }
+
+    #[tokio::test]
+    async fn test_timeout_error_is_transient() {
+        assert!(RetrievalError::query_timeout(100).is_transient());
+        assert!(RetrievalError::query_cancelled().is_transient());
+        assert!(!RetrievalError::query_timeout(100).is_permanent());
+        assert!(!RetrievalError::query_cancelled().is_permanent());
+    }
+
+    #[test]
+    fn test_serde_opt_duration_roundtrip() {
+        use serde::{Deserialize, Serialize};
+
+        #[derive(Serialize, Deserialize, Debug, PartialEq)]
+        struct TestConfig {
+            #[serde(
+                default,
+                skip_serializing_if = "Option::is_none",
+                with = "super::serde_opt_duration"
+            )]
+            timeout: Option<Duration>,
+        }
+
+        // With timeout
+        let config = TestConfig {
+            timeout: Some(Duration::from_millis(5000)),
+        };
+        let json = serde_json::to_string(&config).unwrap();
+        assert!(json.contains("5000"));
+        let restored: TestConfig = serde_json::from_str(&json).unwrap();
+        assert_eq!(restored.timeout, Some(Duration::from_millis(5000)));
+
+        // Without timeout
+        let config = TestConfig { timeout: None };
+        let json = serde_json::to_string(&config).unwrap();
+        assert!(!json.contains("timeout"));
+        let restored: TestConfig = serde_json::from_str("{}").unwrap();
+        assert_eq!(restored.timeout, None);
+    }
+}
diff --git a/crates/khive-retrieval/src/weights/engine_weights.rs b/crates/khive-retrieval/src/weights/engine_weights.rs
new file mode 100644
index 00000000..7530767c
--- /dev/null
+++ b/crates/khive-retrieval/src/weights/engine_weights.rs
@@ -0,0 +1,561 @@
+//! Unified weight store — Three Observables Feedback Loop (Phase 2.A).
+//!
+//! This module provides the core EMA-update + audit-log primitives backing
+//! all three feedback channels:
+//!
+//! | Channel     | η     | Signal source                        |
+//! |-------------|-------|--------------------------------------|
+//! | Ambient     | 0.003 | Every recall / compose operation     |
+//! | Explicit    | 0.10  | note.create quality score            |
+//! | GroundTruth | 0.50  | Atlas eval / CLI manual trigger      |
+//!
+//! # Weight semantics
+//!
+//! Weights live in `atom_weights(namespace, atom_id)` and are bounded to
+//! `[WEIGHT_FLOOR, WEIGHT_CEIL]` = [0.1, 5.0].  Missing rows are treated as
+//! implicit 1.0 by callers; this module never inserts rows on first read.
+//!
+//! # EMA formula
+//!
+//! ```text
+//! new_weight = clamp(old_weight * (1 - η) + delta, WEIGHT_FLOOR, WEIGHT_CEIL)
+//! ```
+//!
+//! `old_weight` defaults to 1.0 when no row exists yet.
+
+use std::collections::HashMap;
+use std::sync::Arc;
+
+use parking_lot::Mutex;
+use rusqlite::{params, Connection, OptionalExtension};
+use serde::{Deserialize, Serialize};
+use uuid::Uuid;
+
+use crate::persist::PersistError;
+
+// ---------------------------------------------------------------------------
+// Constants
+// ---------------------------------------------------------------------------
+
+/// Lower bound for atom weights — prevents a weight from reaching zero.
+pub const WEIGHT_FLOOR: f32 = 0.1;
+
+/// Upper bound for atom weights — prevents runaway boosting.
+pub const WEIGHT_CEIL: f32 = 5.0;
+
+// ---------------------------------------------------------------------------
+// WeightChannel
+// ---------------------------------------------------------------------------
+
+/// Three feedback channels each with a distinct learning rate η.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+#[serde(rename_all = "snake_case")]
+pub enum WeightChannel {
+    /// η = 0.003 — every recall / compose invocation.
+    Ambient,
+    /// η = 0.10 — voluntary quality signal via note.create / note.correct.
+    Explicit,
+    /// η = 0.50 — Atlas eval or manual CLI trigger.
+    GroundTruth,
+}
+
+impl WeightChannel {
+    /// Learning rate η for this channel.
+    pub fn eta(self) -> f32 {
+        match self {
+            Self::Ambient => 0.003,
+            Self::Explicit => 0.10,
+            Self::GroundTruth => 0.50,
+        }
+    }
+
+    /// Canonical snake_case string stored in `weight_events.channel`.
+    pub fn as_str(self) -> &'static str {
+        match self {
+            Self::Ambient => "ambient",
+            Self::Explicit => "explicit",
+            Self::GroundTruth => "ground_truth",
+        }
+    }
+}
+
+// ---------------------------------------------------------------------------
+// apply_weight_delta
+// ---------------------------------------------------------------------------
+
+/// Apply an EMA weight update to one `(namespace, atom_id)` pair and append
+/// an audit row to `weight_events`.
+///
+/// # Algorithm
+///
+/// ```text
+/// old  = atom_weights[namespace, atom_id].weight  (default 1.0 if missing)
+/// new  = clamp(old * (1 − η) + delta, WEIGHT_FLOOR, WEIGHT_CEIL)
+/// ```
+///
+/// Both the `atom_weights` upsert and the `weight_events` insert execute inside
+/// a single `BEGIN IMMEDIATE` transaction so they are atomically consistent.
+///
+/// # Returns
+///
+/// `(new_weight, weight_event_row_id)` on success.
+pub async fn apply_weight_delta(
+    conn: &Arc<Mutex<Connection>>,
+    namespace: &str,
+    atom_id: Uuid,
+    delta: f32,
+    channel: WeightChannel,
+    event_id: Option<Uuid>,
+    context_id: Option<&str>,
+) -> Result<(f32, i64), PersistError> {
+    apply_weight_delta_with_eta(
+        conn,
+        namespace,
+        atom_id,
+        delta,
+        channel,
+        channel.eta(),
+        event_id,
+        context_id,
+    )
+    .await
+}
+
+/// Variant of [`apply_weight_delta`] that accepts a runtime-overridden `eta`.
+///
+/// Use this when the caller loads η from runtime config (e.g., atlas's
+/// `knowledge.toml` override of Channel C's default 0.50). Same algorithm
+/// and transactional guarantees as [`apply_weight_delta`].
+pub async fn apply_weight_delta_with_eta(
+    conn: &Arc<Mutex<Connection>>,
+    namespace: &str,
+    atom_id: Uuid,
+    delta: f32,
+    channel: WeightChannel,
+    eta: f32,
+    event_id: Option<Uuid>,
+    context_id: Option<&str>,
+) -> Result<(f32, i64), PersistError> {
+    if namespace.is_empty() {
+        tracing::warn!(
+            atom_id = %atom_id,
+            channel = %channel.as_str(),
+            "apply_weight_delta called with empty namespace — rejecting to avoid dead-namespace pollution"
+        );
+        return Err(PersistError::Validation(
+            "namespace must not be empty".to_string(),
+        ));
+    }
+    if !(0.0..=1.0).contains(&eta) {
+        return Err(PersistError::Validation(format!(
+            "eta must be in [0.0, 1.0], got {eta}"
+        )));
+    }
+    let conn = Arc::clone(conn);
+    let namespace_str = namespace.to_string();
+    let atom_id_str = atom_id.to_string();
+    let channel_str = channel.as_str();
+    let event_id_str = event_id.map(|u| u.to_string());
+    let context_id = context_id.map(|s| s.to_string());
+    let now_us = chrono::Utc::now().timestamp_micros();
+
+    tokio::task::spawn_blocking(move || {
+        let conn = conn.lock();
+
+        let tx =
+            rusqlite::Transaction::new_unchecked(&conn, rusqlite::TransactionBehavior::Immediate)?;
+
+        // Read current weight (default 1.0 if row absent).
+        let old_weight: f32 = tx
+            .query_row(
+                "SELECT weight FROM atom_weights WHERE namespace = ?1 AND atom_id = ?2",
+                params![namespace_str, atom_id_str],
+                |row| row.get::<_, f64>(0),
+            )
+            .optional()
+            .map_err(PersistError::from)?
+            .unwrap_or(1.0_f64) as f32;
+
+        // EMA update + clamp.
+        let new_weight = (old_weight * (1.0 - eta) + delta).clamp(WEIGHT_FLOOR, WEIGHT_CEIL);
+
+        // Upsert atom_weights — increment version on each write.
+        tx.execute(
+            "INSERT INTO atom_weights (namespace, atom_id, weight, updated_at, version)
+             VALUES (?1, ?2, ?3, ?4, 1)
+             ON CONFLICT(namespace, atom_id) DO UPDATE SET
+               weight     = excluded.weight,
+               updated_at = excluded.updated_at,
+               version    = version + 1",
+            params![namespace_str, atom_id_str, new_weight as f64, now_us],
+        )?;
+
+        // Append weight_events audit row.
+        tx.execute(
+            "INSERT INTO weight_events
+               (namespace, atom_id, delta, weight_after, channel, eta, event_id, context_id, ts)
+             VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7, ?8, ?9)",
+            params![
+                namespace_str,
+                atom_id_str,
+                delta as f64,
+                new_weight as f64,
+                channel_str,
+                eta as f64,
+                event_id_str,
+                context_id,
+                now_us,
+            ],
+        )?;
+
+        let row_id = tx.last_insert_rowid();
+        tx.commit()?;
+
+        Ok((new_weight, row_id))
+    })
+    .await?
+}
+
+// ---------------------------------------------------------------------------
+// batch_load_weights
+// ---------------------------------------------------------------------------
+
+/// Batch-load current weights for a slice of atom IDs under one lambda.
+///
+/// Only rows that exist in `atom_weights` are returned.  Missing atoms are
+/// **not** inserted; callers should treat absent entries as implicit 1.0.
+///
+/// Uses a single SQL query with a dynamic `IN (...)` clause.  The batch is
+/// chunked when `atom_ids` exceeds the SQLite 999-bind-param ceiling.
+pub async fn batch_load_weights(
+    conn: &Arc<Mutex<Connection>>,
+    namespace: &str,
+    atom_ids: &[Uuid],
+) -> Result<HashMap<Uuid, f32>, PersistError> {
+    if atom_ids.is_empty() {
+        return Ok(HashMap::new());
+    }
+
+    let conn = Arc::clone(conn);
+    let namespace_str = namespace.to_string();
+    // Convert UUIDs to strings once, then move into the blocking closure.
+    let id_strs: Vec<String> = atom_ids.iter().map(|u| u.to_string()).collect();
+
+    tokio::task::spawn_blocking(move || {
+        let conn = conn.lock();
+        let mut result = HashMap::with_capacity(id_strs.len());
+
+        // Chunk to stay within the SQLite 999-parameter limit.
+        // Each chunk uses 1 param (namespace) + N params (atom_ids) = N+1 total.
+        const CHUNK_SIZE: usize = 998;
+
+        for chunk in id_strs.chunks(CHUNK_SIZE) {
+            let placeholders = chunk
+                .iter()
+                .enumerate()
+                .map(|(i, _)| format!("?{}", i + 2))
+                .collect::<Vec<_>>()
+                .join(", ");
+
+            let sql = format!(
+                "SELECT atom_id, weight FROM atom_weights \
+                 WHERE namespace = ?1 AND atom_id IN ({placeholders})"
+            );
+
+            let mut stmt = conn.prepare(&sql).map_err(PersistError::from)?;
+
+            let mut param_values: Vec<rusqlite::types::Value> = Vec::with_capacity(chunk.len() + 1);
+            param_values.push(rusqlite::types::Value::Text(namespace_str.clone()));
+            for s in chunk {
+                param_values.push(rusqlite::types::Value::Text(s.clone()));
+            }
+
+            let mut rows = stmt
+                .query(rusqlite::params_from_iter(param_values))
+                .map_err(PersistError::from)?;
+
+            while let Some(row) = rows.next().map_err(PersistError::from)? {
+                let aid: String = row.get(0).map_err(PersistError::from)?;
+                let w: f64 = row.get(1).map_err(PersistError::from)?;
+                if let Ok(uuid) = aid.parse::<Uuid>() {
+                    // Clamp on read — symmetric with write-side invariant. Protects compose
+                    // from weight=0 rows introduced by manual SQL or future schema drift.
+                    let clamped = (w as f32).clamp(WEIGHT_FLOOR, WEIGHT_CEIL);
+                    result.insert(uuid, clamped);
+                }
+            }
+        }
+
+        Ok(result)
+    })
+    .await?
+}
+
+// ---------------------------------------------------------------------------
+// Tests
+// ---------------------------------------------------------------------------
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use khive_db::SqliteStore;
+    use std::sync::Arc;
+
+    fn make_conn() -> Arc<Mutex<Connection>> {
+        // Open an in-memory SQLite DB and run migrations so atom_weights and
+        // weight_events tables exist.
+        let store = SqliteStore::memory().expect("in-memory store");
+        store.conn()
+    }
+
+    // -------------------------------------------------------------------------
+    // Test 1 — ambient channel drives weight above 1.0 over 5 ticks
+    // -------------------------------------------------------------------------
+    #[tokio::test]
+    async fn test_apply_weight_delta_ambient_channel() {
+        let conn = make_conn();
+        let lambda = "lambda:test";
+        let atom = Uuid::new_v4();
+        let delta = 0.01_f32; // positive ambient nudge
+
+        let mut last_weight = 1.0_f32;
+        for _ in 0..5 {
+            let (w, _row_id) = apply_weight_delta(
+                &conn,
+                lambda,
+                atom,
+                delta,
+                WeightChannel::Ambient,
+                None,
+                None,
+            )
+            .await
+            .expect("apply_weight_delta should succeed");
+            last_weight = w;
+        }
+
+        assert!(
+            last_weight > 1.0,
+            "weight should rise above 1.0 with positive delta, got {last_weight}"
+        );
+        assert!(
+            last_weight < WEIGHT_CEIL,
+            "weight should not reach ceiling after 5 ticks"
+        );
+
+        // Verify 5 audit rows were written.
+        let map = batch_load_weights(&conn, lambda, &[atom])
+            .await
+            .expect("batch_load_weights");
+        assert!(map.contains_key(&atom), "weight row should exist");
+
+        // Count weight_events rows directly.
+        let count: i64 = {
+            let c = conn.lock();
+            c.query_row(
+                "SELECT COUNT(*) FROM weight_events WHERE namespace = ?1 AND atom_id = ?2 AND channel = 'ambient'",
+                params![lambda, atom.to_string()],
+                |r| r.get(0),
+            )
+            .unwrap()
+        };
+        assert_eq!(count, 5, "expected 5 ambient weight_events rows");
+    }
+
+    // -------------------------------------------------------------------------
+    // Test 2 — ceiling clamp
+    // -------------------------------------------------------------------------
+    #[tokio::test]
+    async fn test_apply_weight_delta_clamps_at_ceiling() {
+        let conn = make_conn();
+        let lambda = "lambda:test";
+        let atom = Uuid::new_v4();
+
+        // Repeatedly push large positive deltas.
+        for _ in 0..100 {
+            apply_weight_delta(
+                &conn,
+                lambda,
+                atom,
+                5.0, // huge delta
+                WeightChannel::GroundTruth,
+                None,
+                None,
+            )
+            .await
+            .expect("apply_weight_delta should succeed");
+        }
+
+        let map = batch_load_weights(&conn, lambda, &[atom])
+            .await
+            .expect("batch_load_weights");
+        let w = *map.get(&atom).expect("atom weight must exist");
+        assert_eq!(
+            w, WEIGHT_CEIL,
+            "weight should be clamped at WEIGHT_CEIL={WEIGHT_CEIL}, got {w}"
+        );
+    }
+
+    // -------------------------------------------------------------------------
+    // Test 3 — namespace isolation
+    // -------------------------------------------------------------------------
+    #[tokio::test]
+    async fn test_apply_weight_delta_namespace_isolation() {
+        let conn = make_conn();
+        let lambda_a = "lambda:a";
+        let lambda_b = "lambda:b";
+        let atom = Uuid::new_v4();
+
+        apply_weight_delta(
+            &conn,
+            lambda_a,
+            atom,
+            0.5,
+            WeightChannel::Explicit,
+            None,
+            None,
+        )
+        .await
+        .expect("apply for lambda:a");
+
+        // lambda:b should see nothing.
+        let map_b = batch_load_weights(&conn, lambda_b, &[atom])
+            .await
+            .expect("batch_load for lambda:b");
+        assert!(
+            !map_b.contains_key(&atom),
+            "lambda:b should not see lambda:a's weight"
+        );
+
+        // lambda:a should see the written weight.
+        let map_a = batch_load_weights(&conn, lambda_a, &[atom])
+            .await
+            .expect("batch_load for lambda:a");
+        assert!(
+            map_a.contains_key(&atom),
+            "lambda:a should see its own weight"
+        );
+    }
+
+    // -------------------------------------------------------------------------
+    // Test 4 — missing atoms are absent (not 1.0 rows) from batch_load result
+    // -------------------------------------------------------------------------
+    #[tokio::test]
+    async fn test_batch_load_weights_missing_atoms_default() {
+        let conn = make_conn();
+        let lambda = "lambda:test";
+        let atom_a = Uuid::new_v4();
+        let atom_b = Uuid::new_v4();
+        let atom_c = Uuid::new_v4();
+
+        // Write only atom_a.
+        apply_weight_delta(
+            &conn,
+            lambda,
+            atom_a,
+            0.3,
+            WeightChannel::Explicit,
+            None,
+            None,
+        )
+        .await
+        .expect("apply for atom_a");
+
+        let map = batch_load_weights(&conn, lambda, &[atom_a, atom_b, atom_c])
+            .await
+            .expect("batch_load");
+
+        assert!(map.contains_key(&atom_a), "atom_a should be present");
+        assert!(
+            !map.contains_key(&atom_b),
+            "atom_b should be absent (caller treats as 1.0)"
+        );
+        assert!(
+            !map.contains_key(&atom_c),
+            "atom_c should be absent (caller treats as 1.0)"
+        );
+
+        // atom_a weight should be non-default (was boosted).
+        let w_a = *map.get(&atom_a).unwrap();
+        assert!(w_a != 1.0_f32, "atom_a weight should differ from default");
+    }
+
+    // -------------------------------------------------------------------------
+    // Test 5 — negative delta writes a weight_events row (B4 regression guard)
+    // -------------------------------------------------------------------------
+    /// Verifies that apply_weight_delta writes to weight_events even when the
+    /// delta is negative, guarding against the B4 Channel-A decay skip bug.
+    #[tokio::test]
+    async fn test_channel_a_applies_on_negative_delta() {
+        let conn = make_conn();
+        let lambda = "lambda:test";
+        let atom = Uuid::new_v4();
+
+        // First boost the atom so it is above floor.
+        apply_weight_delta(
+            &conn,
+            lambda,
+            atom,
+            0.5,
+            WeightChannel::GroundTruth,
+            None,
+            None,
+        )
+        .await
+        .expect("initial boost");
+
+        // Now apply a negative ambient delta (simulates decay).
+        let (w_after, _) = apply_weight_delta(
+            &conn,
+            lambda,
+            atom,
+            -0.1,
+            WeightChannel::Ambient,
+            None,
+            Some("decay_test"),
+        )
+        .await
+        .expect("negative delta must succeed");
+
+        // Weight must be below the post-boost value (started ~1.25, decay should lower it).
+        assert!(
+            w_after < 1.5,
+            "weight should have decayed below post-boost value, got {w_after}"
+        );
+        assert!(
+            w_after >= WEIGHT_FLOOR,
+            "weight must not go below WEIGHT_FLOOR, got {w_after}"
+        );
+
+        // Confirm a weight_events row was written for the negative delta.
+        let count: i64 = {
+            let c = conn.lock();
+            c.query_row(
+                "SELECT COUNT(*) FROM weight_events \
+                 WHERE namespace = ?1 AND atom_id = ?2 AND delta < 0",
+                params![lambda, atom.to_string()],
+                |r| r.get(0),
+            )
+            .unwrap()
+        };
+        assert_eq!(
+            count, 1,
+            "expected 1 weight_event row with negative delta, got {count}"
+        );
+    }
+
+    // -------------------------------------------------------------------------
+    // Test 6 — empty namespace returns Validation error (F2 guard)
+    // -------------------------------------------------------------------------
+    #[tokio::test]
+    async fn test_apply_weight_delta_rejects_empty_namespace() {
+        let conn = make_conn();
+        let atom = Uuid::new_v4();
+        let result =
+            apply_weight_delta(&conn, "", atom, 0.1, WeightChannel::Ambient, None, None).await;
+        assert!(
+            matches!(result, Err(PersistError::Validation(_))),
+            "expected Validation error for empty namespace, got {result:?}"
+        );
+    }
+}
diff --git a/crates/khive-retrieval/src/weights/mod.rs b/crates/khive-retrieval/src/weights/mod.rs
new file mode 100644
index 00000000..6e3b834d
--- /dev/null
+++ b/crates/khive-retrieval/src/weights/mod.rs
@@ -0,0 +1,5 @@
+//! Unified weight store for the Three Observables Feedback Loop.
+
+pub mod engine_weights;
+
+pub use engine_weights::*;

From 5a57ea9786bb77ccdf2da54026f93f20c72d4249 Mon Sep 17 00:00:00 2001
From: OceanLi <122793010+ohdearquant@users.noreply.github.com>
Date: Fri, 22 May 2026 13:56:40 -0400
Subject: [PATCH 2/4] fix(retrieval): add debug_assert for weighted fusion
 source count

Weighted fusion is constrained to exactly 2 sources (vector + keyword)
by the HybridSearcher trait hierarchy. Add assertion to catch misuse.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/khive-retrieval/src/hybrid/searcher.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/crates/khive-retrieval/src/hybrid/searcher.rs b/crates/khive-retrieval/src/hybrid/searcher.rs
index 096e1255..f6ca8d7a 100644
--- a/crates/khive-retrieval/src/hybrid/searcher.rs
+++ b/crates/khive-retrieval/src/hybrid/searcher.rs
@@ -259,7 +259,8 @@ pub fn fuse_search_results<Id: Eq + Hash + Clone + Ord>(
     // Determine fusion strategy
     let strategy = match &config.fusion_strategy {
         FusionStrategy::Weighted { .. } => {
-            // Use configured weights
+            // Use configured weights — constrained to exactly 2 sources (vector + keyword)
+            debug_assert_eq!(sources.len(), 2, "Weighted fusion expects exactly 2 sources");
             let (v, k) = config.normalized_weights();
             FusionStrategy::weighted(vec![v, k])
         }

From dfc9ee99bcbad9083e945f094967f8eb0b61ab0e Mon Sep 17 00:00:00 2001
From: OceanLi <122793010+ohdearquant@users.noreply.github.com>
Date: Fri, 22 May 2026 13:58:44 -0400
Subject: [PATCH 3/4] style: cargo fmt

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/khive-retrieval/src/error.rs           |  1 -
 .../khive-retrieval/src/eval/engine_eval.rs   |  5 +++-
 crates/khive-retrieval/src/graph/compat.rs    | 24 ++++---------------
 crates/khive-retrieval/src/graph/helpers.rs   |  9 ++-----
 crates/khive-retrieval/src/graph/mod.rs       |  2 +-
 crates/khive-retrieval/src/graph/tests.rs     |  2 +-
 crates/khive-retrieval/src/hybrid/searcher.rs |  6 ++++-
 crates/khive-retrieval/src/lib.rs             | 10 ++++----
 crates/khive-retrieval/src/persist/hnsw.rs    |  2 +-
 9 files changed, 23 insertions(+), 38 deletions(-)

diff --git a/crates/khive-retrieval/src/error.rs b/crates/khive-retrieval/src/error.rs
index 4d7af9b7..d6f5b681 100644
--- a/crates/khive-retrieval/src/error.rs
+++ b/crates/khive-retrieval/src/error.rs
@@ -143,7 +143,6 @@ pub enum RetrievalError {
     /// Reranking operation failed (permanent).
     #[error("rerank error: {0}")]
     Rerank(String),
-
     // TODO(port-rerank): khive-inference not ported yet; re-enable when available.
     // #[cfg(feature = "native-rerank")]
     // #[error("inference error: {0}")]
diff --git a/crates/khive-retrieval/src/eval/engine_eval.rs b/crates/khive-retrieval/src/eval/engine_eval.rs
index cbc56712..a6021b21 100644
--- a/crates/khive-retrieval/src/eval/engine_eval.rs
+++ b/crates/khive-retrieval/src/eval/engine_eval.rs
@@ -530,7 +530,10 @@ mod tests {
             .map(|i| make_result(i, RetrievalLabel::Decisive))
             .collect();
         let score = ndcg_at_k(&results, 10);
-        assert!(score <= 1.0 + 1e-12, "nDCG must not exceed 1.0, got {score}");
+        assert!(
+            score <= 1.0 + 1e-12,
+            "nDCG must not exceed 1.0, got {score}"
+        );
     }
 
     // ---- mrr ----
diff --git a/crates/khive-retrieval/src/graph/compat.rs b/crates/khive-retrieval/src/graph/compat.rs
index 9d0493d3..03df1b09 100644
--- a/crates/khive-retrieval/src/graph/compat.rs
+++ b/crates/khive-retrieval/src/graph/compat.rs
@@ -129,18 +129,10 @@ impl StorageContext {
 #[async_trait]
 pub trait LinkStore: Send + Sync {
     /// Get all outgoing links from an entity.
-    async fn outgoing(
-        &self,
-        ctx: &StorageContext,
-        entity: &EntityRef,
-    ) -> Result<Vec<Link>>;
+    async fn outgoing(&self, ctx: &StorageContext, entity: &EntityRef) -> Result<Vec<Link>>;
 
     /// Get all incoming links to an entity.
-    async fn incoming(
-        &self,
-        ctx: &StorageContext,
-        entity: &EntityRef,
-    ) -> Result<Vec<Link>>;
+    async fn incoming(&self, ctx: &StorageContext, entity: &EntityRef) -> Result<Vec<Link>>;
 
     /// Create a link between two entities.
     async fn link(
@@ -181,11 +173,7 @@ impl Default for MockLinkStore {
 
 #[async_trait]
 impl LinkStore for MockLinkStore {
-    async fn outgoing(
-        &self,
-        _ctx: &StorageContext,
-        entity: &EntityRef,
-    ) -> Result<Vec<Link>> {
+    async fn outgoing(&self, _ctx: &StorageContext, entity: &EntityRef) -> Result<Vec<Link>> {
         let links = self.links.lock();
         Ok(links
             .iter()
@@ -194,11 +182,7 @@ impl LinkStore for MockLinkStore {
             .collect())
     }
 
-    async fn incoming(
-        &self,
-        _ctx: &StorageContext,
-        entity: &EntityRef,
-    ) -> Result<Vec<Link>> {
+    async fn incoming(&self, _ctx: &StorageContext, entity: &EntityRef) -> Result<Vec<Link>> {
         let links = self.links.lock();
         Ok(links
             .iter()
diff --git a/crates/khive-retrieval/src/graph/helpers.rs b/crates/khive-retrieval/src/graph/helpers.rs
index 907ac58c..4227a93c 100644
--- a/crates/khive-retrieval/src/graph/helpers.rs
+++ b/crates/khive-retrieval/src/graph/helpers.rs
@@ -1,8 +1,8 @@
 //! Helper functions for graph traversal.
 
-use super::compat::{EntityRef, Link, LinkStore, StorageContext};
 #[cfg(test)]
 use super::compat::LinkId;
+use super::compat::{EntityRef, Link, LinkStore, StorageContext};
 use khive_score::DeterministicScore;
 
 use crate::error::{Result, RetrievalError};
@@ -201,12 +201,7 @@ mod tests {
     fn test_get_neighbor_entity() {
         let source = EntityRef::External("source".to_string());
         let target = EntityRef::External("target".to_string());
-        let link = Link::new(
-            LinkId::NIL,
-            source.clone(),
-            target.clone(),
-            "test",
-        );
+        let link = Link::new(LinkId::NIL, source.clone(), target.clone(), "test");
 
         // Outgoing: return target
         assert_eq!(get_neighbor_entity(&link, &source, &Direction::Out), target);
diff --git a/crates/khive-retrieval/src/graph/mod.rs b/crates/khive-retrieval/src/graph/mod.rs
index 5fe1833b..7105ca75 100644
--- a/crates/khive-retrieval/src/graph/mod.rs
+++ b/crates/khive-retrieval/src/graph/mod.rs
@@ -83,7 +83,7 @@ mod types;
 mod tests;
 
 // Re-export compat types (legacy graph API shims)
-pub use compat::{EntityRef, Link, LinkId, LinkStore, MockLinkStore, StorageContext, test_context};
+pub use compat::{test_context, EntityRef, Link, LinkId, LinkStore, MockLinkStore, StorageContext};
 
 // Re-export public types
 pub use types::{
diff --git a/crates/khive-retrieval/src/graph/tests.rs b/crates/khive-retrieval/src/graph/tests.rs
index c9439355..639b3efd 100644
--- a/crates/khive-retrieval/src/graph/tests.rs
+++ b/crates/khive-retrieval/src/graph/tests.rs
@@ -1,6 +1,6 @@
 //! Unit tests for graph traversal module.
 
-use super::compat::{EntityRef, MockLinkStore, test_context};
+use super::compat::{test_context, EntityRef, MockLinkStore};
 
 use crate::graph::types::{
     Direction, PathNode, TraversalOptions, MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS,
diff --git a/crates/khive-retrieval/src/hybrid/searcher.rs b/crates/khive-retrieval/src/hybrid/searcher.rs
index f6ca8d7a..d24b12bd 100644
--- a/crates/khive-retrieval/src/hybrid/searcher.rs
+++ b/crates/khive-retrieval/src/hybrid/searcher.rs
@@ -260,7 +260,11 @@ pub fn fuse_search_results<Id: Eq + Hash + Clone + Ord>(
     let strategy = match &config.fusion_strategy {
         FusionStrategy::Weighted { .. } => {
             // Use configured weights — constrained to exactly 2 sources (vector + keyword)
-            debug_assert_eq!(sources.len(), 2, "Weighted fusion expects exactly 2 sources");
+            debug_assert_eq!(
+                sources.len(),
+                2,
+                "Weighted fusion expects exactly 2 sources"
+            );
             let (v, k) = config.normalized_weights();
             FusionStrategy::weighted(vec![v, k])
         }
diff --git a/crates/khive-retrieval/src/lib.rs b/crates/khive-retrieval/src/lib.rs
index 0edad1ba..60e61287 100644
--- a/crates/khive-retrieval/src/lib.rs
+++ b/crates/khive-retrieval/src/lib.rs
@@ -132,16 +132,16 @@ pub use adapters::{StorageKeywordSearch, StorageVectorSearch};
 pub use error::{ErrorKind, Result, RetrievalError};
 
 // Re-export types from sibling crates (now separate crates)
-pub use khive_bm25::{Bm25Config, Bm25Index, Bm25Stats, DocumentId, SearchContext};
-pub use khive_fusion::{
-    fuse, normalize_weights, reciprocal_rank_fusion, weighted_fusion, weights_are_normalized,
-    FusionStrategy, DEFAULT_RRF_K,
-};
 #[cfg(feature = "graph-legacy")]
 pub use graph::{
     bfs_traverse, dfs_traverse, find_shortest_path, Direction, PathNode, TraversalOptions,
     MAX_TRAVERSAL_DEPTH, MAX_TRAVERSAL_RESULTS,
 };
+pub use khive_bm25::{Bm25Config, Bm25Index, Bm25Stats, DocumentId, SearchContext};
+pub use khive_fusion::{
+    fuse, normalize_weights, reciprocal_rank_fusion, weighted_fusion, weights_are_normalized,
+    FusionStrategy, DEFAULT_RRF_K,
+};
 pub use khive_hnsw::{
     DistanceMetric, HnswCheckpointConfig, HnswConfig, HnswIndex, HnswSearchContext, HnswSnapshot,
     NodeId, RebuildStats, TombstoneStats,
diff --git a/crates/khive-retrieval/src/persist/hnsw.rs b/crates/khive-retrieval/src/persist/hnsw.rs
index 5f8e7b3d..d325f6a7 100644
--- a/crates/khive-retrieval/src/persist/hnsw.rs
+++ b/crates/khive-retrieval/src/persist/hnsw.rs
@@ -1,7 +1,7 @@
 //! HNSW-specific persistence methods.
 
-use khive_hnsw::HnswSnapshot;
 use khive_hnsw::HnswIndex;
+use khive_hnsw::HnswSnapshot;
 
 use super::shadow::{log_validation_result, should_sample};
 use super::{

From 810b8c210fa66a99da483eb864a6938f4e3f4b75 Mon Sep 17 00:00:00 2001
From: OceanLi <122793010+ohdearquant@users.noreply.github.com>
Date: Fri, 22 May 2026 14:03:17 -0400
Subject: [PATCH 4/4] fix: sync khive-fold and khive-runtime with main
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR #306 relocated ObjectiveRegistry from khive-fold to khive-runtime.
This branch predated that change — sync both crates to main.

Co-Authored-By: Claude Opus 4.6 (1M context) <noreply@anthropic.com>
---
 crates/khive-fold/src/objective/mod.rs | 2 --
 1 file changed, 2 deletions(-)

diff --git a/crates/khive-fold/src/objective/mod.rs b/crates/khive-fold/src/objective/mod.rs
index e2040fb6..c4504982 100644
--- a/crates/khive-fold/src/objective/mod.rs
+++ b/crates/khive-fold/src/objective/mod.rs
@@ -4,13 +4,11 @@ pub mod builtin;
 pub mod compose;
 mod context;
 pub mod error;
-pub mod registry;
 mod selection;
 mod traits;
 
 pub use context::ObjectiveContext;
 pub use error::{ObjectiveError, ObjectiveResult};
-pub use registry::{ObjectiveRegistry, RegisteredObjective};
 pub use selection::Selection;
 pub use traits::{objective_fn, DeterministicObjective, Objective};