devlux76 · devlux76 · Mar 14, 2026 · Mar 13, 2026 · Mar 13, 2026 · Mar 14, 2026
diff --git a/benchmarks/BASELINES.md b/benchmarks/BASELINES.md
@@ -0,0 +1,77 @@
+# CORTEX Benchmark Baselines
+
+> **Status:** Baseline measurements pending a hardware CI run.
+> The values below are illustrative targets; replace with real output from
+> `npm run benchmark:all` on representative hardware.
+
+## Williams Bound H(t) — Sublinear Growth Curve
+
+| Graph mass (t) | H(t) = ceil(0.5 * sqrt(t * log2(1+t))) | H(t)/t ratio |
+|---------------:|----------------------------------------:|-------------:|
+|          1 000 |                                      ~22 |        0.022 |
+|         10 000 |                                      ~99 |        0.010 |
+|        100 000 |                                     ~408 |        0.004 |
+|      1 000 000 |                                   ~1 576 |        0.002 |
+
+Key invariant: H(t)/t strictly decreases as t grows.
+
+---
+
+## Dummy Embedder Hotpath
+
+Run: `npm run benchmark:dummy`
+
+| Benchmark               | Mean latency (ms) | Throughput |
+|-------------------------|------------------:|----------:|
+| Single short input       |              TBD  |       TBD |
+| Batch 16 medium inputs   |              TBD  |       TBD |
+| Batch 64 short inputs    |              TBD  |       TBD |
+
+---
+
+## Query Latency vs Corpus Size
+
+Run: `npm run benchmark:query-latency`
+
+| Corpus size | Mean query latency (ms) |
+|------------:|------------------------:|
+|     100 pages |                   TBD  |
+|     500 pages |                   TBD  |
+
+Expected: latency grows sub-linearly because hotpath residents are scored
+first and most queries are served without scanning the full corpus.
+
+---
+
+## Storage Overhead
+
+Run: `npm run benchmark:storage-overhead`
+
+| Page count | Vector store size (bytes) | Bytes per page |
+|-----------:|--------------------------:|---------------:|
+|         50 |                      TBD  |           TBD  |
+|        200 |                      TBD  |           TBD  |
+
+Expected: linear growth (no hidden quadratic allocations).
+
+---
+
+## Hotpath Scaling
+
+Run: `npm run benchmark:hotpath-scaling`
+
+| Graph mass | H(t) capacity | Resident count | Promotion sweep (ms) |
+|-----------:|--------------:|---------------:|---------------------:|
+|      1 000 |           ~22 |           TBD  |                 TBD  |
+|      5 000 |           ~55 |           TBD  |                 TBD  |
+
+Invariant: Resident count never exceeds H(t).
+
+---
+
+## How to Update Baselines
+
+1. Run `npm run benchmark:all` on the target hardware.
+2. Copy the `mean` column values from the Vitest bench output.
+3. Replace every `TBD` cell in this file with the measured value.
+4. Commit with message `chore: update benchmark baselines — <hardware>`.
diff --git a/core/types.ts b/core/types.ts
@@ -64,16 +64,18 @@ export interface Edge {
 }
 
 // ---------------------------------------------------------------------------
-// Metroid nearest-neighbour graph (project term; medoid-inspired)
+// Semantic nearest-neighbor graph
 // ---------------------------------------------------------------------------
 
-export interface MetroidNeighbor {
+/** A single directed proximity edge in the sparse semantic neighbor graph. */
+export interface SemanticNeighbor {
   neighborPageId: Hash;
   cosineSimilarity: number;   // threshold is defined by runtime policy
   distance: number;           // 1 - cosineSimilarity (ready for TSP)
 }
 
-export interface MetroidSubgraph {
+/** Induced subgraph returned by BFS expansion of the semantic neighbor graph. */
+export interface SemanticNeighborSubgraph {
   nodes: Hash[];
   edges: { from: Hash; to: Hash; distance: number }[];
 }
@@ -162,6 +164,13 @@ export interface MetadataStore {
 
   putVolume(volume: Volume): Promise<void>;
   getVolume(volumeId: Hash): Promise<Volume | undefined>;
+  /**
+   * Delete a volume record and clean up all reverse-index entries
+   * (`bookToVolume` for each book in the volume, and the `volumeToShelf` entry).
+   * Callers are responsible for removing the volume from any shelf's `volumeIds`
+   * list before calling this method.
+   */
+  deleteVolume(volumeId: Hash): Promise<void>;
 
   putShelf(shelf: Shelf): Promise<void>;
   getShelf(shelfId: Hash): Promise<Shelf | undefined>;
@@ -175,20 +184,20 @@ export interface MetadataStore {
   getVolumesByBook(bookId: Hash): Promise<Volume[]>;
   getShelvesByVolume(volumeId: Hash): Promise<Shelf[]>;
 
-  // --- Metroid NN radius index ---
-  putMetroidNeighbors(pageId: Hash, neighbors: MetroidNeighbor[]): Promise<void>;
-  getMetroidNeighbors(pageId: Hash, maxDegree?: number): Promise<MetroidNeighbor[]>;
+  // --- Semantic neighbor radius index ---
+  putSemanticNeighbors(pageId: Hash, neighbors: SemanticNeighbor[]): Promise<void>;
+  getSemanticNeighbors(pageId: Hash, maxDegree?: number): Promise<SemanticNeighbor[]>;
 
-  /** BFS expansion of the Metroid subgraph up to `maxHops` levels deep. */
-  getInducedMetroidSubgraph(
+  /** BFS expansion of the semantic neighbor subgraph up to `maxHops` levels deep. */
+  getInducedNeighborSubgraph(
     seedPageIds: Hash[],
     maxHops: number,
-  ): Promise<MetroidSubgraph>;
+  ): Promise<SemanticNeighborSubgraph>;
 
   // --- Dirty-volume recalc flags ---
-  needsMetroidRecalc(volumeId: Hash): Promise<boolean>;
-  flagVolumeForMetroidRecalc(volumeId: Hash): Promise<void>;
-  clearMetroidRecalcFlag(volumeId: Hash): Promise<void>;
+  needsNeighborRecalc(volumeId: Hash): Promise<boolean>;
+  flagVolumeForNeighborRecalc(volumeId: Hash): Promise<void>;
+  clearNeighborRecalcFlag(volumeId: Hash): Promise<void>;
 
   // --- Hotpath index ---
   putHotpathEntry(entry: HotpathEntry): Promise<void>;

diff --git a/cortex/KnowledgeGapDetector.ts b/cortex/KnowledgeGapDetector.ts
@@ -0,0 +1,66 @@
+import type { Hash } from "../core/types";
+import type { ModelProfile } from "../core/ModelProfile";
+import { hashText } from "../core/crypto/hash";
+import type { Metroid } from "./MetroidBuilder";
+
+export interface KnowledgeGap {
+  queryText: string;
+  queryEmbedding: Float32Array;
+  knowledgeBoundary: Hash | null;
+  detectedAt: string;
+}
+
+export interface CuriosityProbe {
+  probeId: Hash;
+  queryText: string;
+  queryEmbedding: Float32Array;
+  knowledgeBoundary: Hash | null;
+  mimeType: string;
+  modelUrn: string;
+  createdAt: string;
+}
+
+/**
+ * Returns a KnowledgeGap when the metroid signals that m2 could not be found
+ * (i.e. the engine has no antithesis for this query). Returns null when the
+ * metroid is complete and no gap was detected.
+ */
+export async function detectKnowledgeGap(
+  queryText: string,
+  queryEmbedding: Float32Array,
+  metroid: Metroid,
+  // eslint-disable-next-line @typescript-eslint/no-unused-vars -- reserved for future model-aware gap categorisation
+  _modelProfile: ModelProfile,
+): Promise<KnowledgeGap | null> {
+  if (!metroid.knowledgeGap) return null;
+
+  return {
+    queryText,
+    queryEmbedding,
+    knowledgeBoundary: metroid.m1 !== "" ? metroid.m1 : null,
+    detectedAt: new Date().toISOString(),
+  };
+}
+
+/**
+ * Builds a serialisable CuriosityProbe from a detected KnowledgeGap.
+ * The probeId is the SHA-256 of (queryText + detectedAt) so it is
+ * deterministic for the same gap inputs.
+ */
+export async function buildCuriosityProbe(
+  gap: KnowledgeGap,
+  modelProfile: ModelProfile,
+  mimeType = "text/plain",
+): Promise<CuriosityProbe> {
+  const probeId = await hashText(gap.queryText + gap.detectedAt);
+
+  return {
+    probeId,
+    queryText: gap.queryText,
+    queryEmbedding: gap.queryEmbedding,
+    knowledgeBoundary: gap.knowledgeBoundary,
+    mimeType,
+    modelUrn: `urn:model:${modelProfile.modelId}`,
+    createdAt: new Date().toISOString(),
+  };
+}