clay-good · clay-good · May 31, 2026 · May 31, 2026 · May 31, 2026 · May 31, 2026
diff --git a/docs/mcp-tools.md b/docs/mcp-tools.md
@@ -49,13 +49,27 @@ Add `--watch-auto` to your MCP config args:
 }
 ```
 
-The watcher starts automatically on the first tool call — no hardcoded path needed. It re-extracts signatures for any changed source file and patches `llm-context.json` within ~500 ms of a save. If an embedding server is reachable, it also re-embeds changed functions into the vector index automatically. The call graph is not rebuilt on every change; it stays current via the [post-commit hook](#cicd-integration) (`openlore analyze --force`).
+The watcher is **on by default** — it starts automatically on the first tool call
+(no hardcoded path needed) and keeps the analysis fresh as you edit. To disable it,
+start the server with `openlore mcp --no-watch-auto`.
+
+Freshness is **O(change), not O(repo)** (Spec 13.1): per-file save events are coalesced
+into a single batched flush, the patched signatures are handed directly to the MCP read
+cache (so the next tool call is a cache hit, not a cold re-parse of `llm-context.json`),
+and the vector index is updated with row-level ops rather than a full-table rewrite.
+A bulk event (branch switch / rebase / formatter) collapses to a single refresh. On large
+repos (> 5000 source files) live embedding auto-degrades to signatures-only (logged once);
+embeddings then refresh at commit. Set `OPENLORE_WATCH_DEBUG=1` for per-file stderr detail
+(default is one summary line per batch). The call graph is not rebuilt on every change; it
+stays current via the [post-commit hook](#cicd-integration) (`openlore analyze --force`).
 
 | Option | Default | Description |
 |---|---|---|
-| `--watch-auto` | off | Auto-detect project root from first tool call |
+| `--watch-auto` | **on** | Auto-detect project root from first tool call |
+| `--no-watch-auto` | — | Disable the auto-watcher (one-shot tool calls) |
 | `--watch <dir>` | — | Watch a fixed directory (alternative to `--watch-auto`) |
-| `--watch-debounce <ms>` | 400 | Delay before re-indexing after a file change |
+| `--watch-debounce <ms>` | 400 | Idle delay before a coalesced flush after a change |
+| `--watch-no-embed` | off | Signatures-only: skip live re-embedding (refresh at commit) |
 
 ### Cline / Roo Code / Kilocode
 

diff --git a/docs/specs/openlore-spec-13-context-substrate.md b/docs/specs/openlore-spec-13-context-substrate.md
@@ -26,7 +26,8 @@ Branch: `openlore-spec-13-context-substrate`. Direction locked; claims verified;
 - [x] Competitive + market reality verified against primary sources (2026-05-30)
 - [x] Repo ground-truth established (what actually ships vs. what was claimed)
 - [x] Theses adversarially stress-tested; positioning corrected to survive the strongest attack
-- [ ] **Spec 14** — Agent Token-Efficiency Benchmark Harness (WITH vs WITHOUT). *Do this first.* → [openlore-spec-14-agent-benchmark-harness.md](openlore-spec-14-agent-benchmark-harness.md)
+- [ ] **Spec 13.1** — Make Incremental Freshness Cheap (Watch-Mode Performance). *Urgent regression fix — do this before 14; the watcher that backs this spec's "always-fresh" promise currently taxes every dogfooding session.* → [openlore-spec-13.1-watch-mode-performance.md](openlore-spec-13.1-watch-mode-performance.md)
+- [ ] **Spec 14** — Agent Token-Efficiency Benchmark Harness (WITH vs WITHOUT). *Do this first (after 13.1).* → [openlore-spec-14-agent-benchmark-harness.md](openlore-spec-14-agent-benchmark-harness.md)
 - [ ] **Spec 15** — Decision & Drift Governance Dogfooding (turn the gate on in our own repo). → [openlore-spec-15-governance-dogfooding.md](openlore-spec-15-governance-dogfooding.md)
 - [ ] **Spec 16** — Architectural Decisions as First-Class Graph Nodes (`affects` edges). → [openlore-spec-16-decisions-as-graph-nodes.md](openlore-spec-16-decisions-as-graph-nodes.md)
 - [ ] **Spec 17** — Cross-Domain Impact Analysis (Code ↔ Infrastructure). → [openlore-spec-17-cross-domain-impact.md](openlore-spec-17-cross-domain-impact.md)

diff --git a/docs/specs/openlore-spec-13.1-watch-mode-performance.md b/docs/specs/openlore-spec-13.1-watch-mode-performance.md
diff --git a/package.json b/package.json
@@ -27,6 +27,7 @@
     "view": "tsx src/cli/index.ts view",
     "bench": "tsx scripts/bench.ts",
     "bench:mcp": "tsx scripts/bench-mcp.ts",
+    "bench:watch": "tsx scripts/bench-watch.ts",
     "test": "vitest",
     "test:run": "vitest run",
     "test:coverage": "vitest run --coverage",

diff --git a/scripts/bench-watch.ts b/scripts/bench-watch.ts
@@ -0,0 +1,155 @@
+/**
+ * bench-watch.ts — watch-mode (MCP incremental re-index) microbenchmark.
+ *
+ * Spec 13.1: freshness must be O(change), not O(repo). This measures the
+ * per-save and bulk-burst cost of the watcher pipeline on a fixture with a
+ * ~2 MB llm-context.json, and asserts the coalescing/cache guarantees:
+ *
+ *   G1 — a single save triggers ≤ 1 llm-context persistence and the next read
+ *        is a cache HIT (no cold full-file re-parse).
+ *   G2 — a burst of N saves coalesces to ONE flush.
+ *   G4 — per-save wall-clock stays small relative to the context size.
+ *
+ * Run:  npm run bench:watch
+ *
+ * This is a manual benchmark (not part of CI). It builds its own throwaway
+ * fixture under the OS temp dir and cleans up afterwards.
+ */
+import { mkdtemp, mkdir, writeFile, readFile, rm, stat } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { performance } from 'node:perf_hooks';
+import { McpWatcher } from '../src/core/services/mcp-watcher.js';
+import { readCachedContext, _resetContextCacheForTesting } from '../src/core/services/mcp-handlers/utils.js';
+
+const FILE_COUNT = 600;       // synthetic source files in the context
+const ENTRIES_PER_FILE = 20;  // signatures per file
+const SINGLE_SAVE_RUNS = 20;
+const BURST_SIZE = 50;
+
+function median(xs: number[]): number {
+  const s = [...xs].sort((a, b) => a - b);
+  const m = Math.floor(s.length / 2);
+  return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
+}
+
+function synthSignatures(): Array<{ path: string; language: string; entries: Array<{ name: string; signature: string; docstring: string; line: number; kind: string }> }> {
+  const sigs = [];
+  for (let i = 0; i < FILE_COUNT; i++) {
+    const path = `src/module_${i}/file_${i}.ts`;
+    const entries = [];
+    for (let j = 0; j < ENTRIES_PER_FILE; j++) {
+      entries.push({
+        name: `fn_${i}_${j}`,
+        signature: `export function fn_${i}_${j}(arg0: string, arg1: number, opts?: Record<string, unknown>): Promise<void>`,
+        docstring: `Function ${j} in module ${i}. Handles a representative unit of work for the benchmark fixture.`,
+        line: j * 7 + 1,
+        kind: 'function',
+      });
+    }
+    sigs.push({ path, language: 'TypeScript', entries });
+  }
+  return sigs;
+}
+
+async function main(): Promise<void> {
+  const root = await mkdtemp(join(tmpdir(), 'ol-benchwatch-'));
+  const analysisDir = join(root, '.openlore', 'analysis');
+  await mkdir(analysisDir, { recursive: true });
+  const contextPath = join(analysisDir, 'llm-context.json');
+
+  // Build a ~2 MB context.
+  const signatures = synthSignatures();
+  await writeFile(contextPath, JSON.stringify({ signatures, callGraph: null }, null, 2), 'utf-8');
+  const ctxBytes = (await stat(contextPath)).size;
+
+  // Write the real source files so the watcher can read them on change.
+  for (let i = 0; i < FILE_COUNT; i++) {
+    const dir = join(root, 'src', `module_${i}`);
+    await mkdir(dir, { recursive: true });
+    await writeFile(join(dir, `file_${i}.ts`), `export function fn_${i}_0() { return ${i}; }\n`, 'utf-8');
+  }
+
+  // embed:false → measure the signature/freshness pipeline (the per-save hot
+  // path the spec flagged: the 2 MB rewrite + the re-parse it used to force).
+  const watcher = new McpWatcher({ rootPath: root, embed: false });
+
+  // ── Single-save latency, including the simulated "next tool call" read ──────
+  const flushTimes: number[] = [];
+  const readTimes: number[] = [];
+  for (let r = 0; r < SINGLE_SAVE_RUNS; r++) {
+    const i = r % FILE_COUNT;
+    const f = join(root, 'src', `module_${i}`, `file_${i}.ts`);
+    await writeFile(f, `export function fn_${i}_0() { return ${i + r * 1000}; }\n`, 'utf-8');
+
+    const t0 = performance.now();
+    await watcher.handleChange(f);
+    flushTimes.push(performance.now() - t0);
+
+    // The next "tool call" read — must be a cache HIT (no 2 MB cold re-parse).
+    const t1 = performance.now();
+    const ctx = await readCachedContext(root);
+    readTimes.push(performance.now() - t1);
+    if (!ctx) throw new Error('readCachedContext returned null after save');
+  }
+
+  // ── Cold read baseline (cache cleared → full 2 MB parse) for contrast ───────
+  _resetContextCacheForTesting();
+  const coldT0 = performance.now();
+  await readCachedContext(root);
+  const coldRead = performance.now() - coldT0;
+
+  // ── Bulk burst: BURST_SIZE files in one window must coalesce to ONE flush ───
+  let summaries = 0;
+  const origWrite = process.stderr.write.bind(process.stderr);
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  (process.stderr as any).write = (chunk: any, ...rest: any[]): boolean => {
+    if (/\[mcp-watcher\] (updated|coalesced)/.test(String(chunk))) summaries++;
+    return origWrite(chunk, ...rest);
+  };
+  const burst = new McpWatcher({ rootPath: root, embed: false, debounceMs: 50, maxBatchMs: 2000 });
+  const burstFiles: string[] = [];
+  for (let i = 0; i < BURST_SIZE; i++) {
+    const f = join(root, 'src', `module_${i}`, `file_${i}.ts`);
+    await writeFile(f, `export function fn_${i}_0() { return ${i}*2; }\n`, 'utf-8');
+    burstFiles.push(f);
+  }
+  const burstT0 = performance.now();
+  for (const f of burstFiles) (burst as unknown as { enqueue(p: string): void }).enqueue(f);
+  // Wait for the single coalesced flush to complete.
+  await new Promise((res) => setTimeout(res, 400));
+  const burstTime = performance.now() - burstT0;
+  (process.stderr as any).write = origWrite;
+
+  const report =
+`## Watch-mode benchmark (Spec 13.1)
+
+Fixture: ${FILE_COUNT} files × ${ENTRIES_PER_FILE} signatures, llm-context.json = ${(ctxBytes / 1_048_576).toFixed(2)} MB.
+
+| Metric | Result |
+|--------|--------|
+| Single-save flush (median of ${SINGLE_SAVE_RUNS}) | ${median(flushTimes).toFixed(1)} ms |
+| Next-call read after save (median, cache HIT) | ${median(readTimes).toFixed(2)} ms |
+| Cold read (cache cleared, full parse) | ${coldRead.toFixed(1)} ms |
+| ${BURST_SIZE}-file burst → flushes | ${summaries} (expected 1) |
+| ${BURST_SIZE}-file burst wall-clock | ${burstTime.toFixed(1)} ms |
+
+G1: next-call read is a cache hit — ${median(readTimes).toFixed(2)} ms vs ${coldRead.toFixed(1)} ms cold (${(coldRead / Math.max(median(readTimes), 0.001)).toFixed(0)}× faster).
+G2: ${BURST_SIZE} saves coalesced to ${summaries} flush${summaries === 1 ? '' : 'es'}.
+`;
+
+  // eslint-disable-next-line no-console
+  console.log(report);
+
+  // Assertions (fail loudly in CI-less manual runs).
+  if (summaries !== 1) throw new Error(`G2 violated: expected 1 coalesced flush, got ${summaries}`);
+  if (median(readTimes) >= coldRead) throw new Error('G1 violated: post-save read is not faster than a cold parse');
+
+  await rm(root, { recursive: true, force: true });
+}
+
+main().catch((err) => {
+  // eslint-disable-next-line no-console
+  console.error(err);
+  process.exit(1);
+});
diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts
@@ -1286,6 +1286,7 @@ interface McpServerOptions {
   watch?: string;
   watchAuto?: boolean;
   watchDebounce?: string;
+  watchNoEmbed?: boolean;
   minimal?: boolean;
 }
 
@@ -1353,6 +1354,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise<void> {
         autoWatcher = new McpWatcher({
           rootPath: resolve(dir),
           debounceMs: isNaN(debounceMs) ? 400 : debounceMs,
+          embed: !options.watchNoEmbed,
         });
         await autoWatcher.start();
         const cleanup = () => autoWatcher!.stop().then(() => process.exit(0));
@@ -1592,6 +1594,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise<void> {
     const watcher = new McpWatcher({
       rootPath: resolve(options.watch),
       debounceMs: isNaN(debounceMs) ? 400 : debounceMs,
+      embed: !options.watchNoEmbed,
     });
     await watcher.start();
     const cleanup = () => watcher.stop().then(() => process.exit(0));
@@ -1610,5 +1613,6 @@ export const mcpCommand = new Command('mcp')
   .option('--watch-auto', 'Auto-detect the project directory from the first tool call and start watching', true)
   .option('--no-watch-auto', 'Disable auto-watch (use for one-shot tool calls, e.g. the orient skill wrapper)')
   .option('--watch-debounce <ms>', 'Debounce delay in ms before re-indexing after a file change (default: 400)', '400')
+  .option('--watch-no-embed', 'Watch signatures only — skip live vector re-embedding (embeddings refresh at commit). Large repos auto-degrade to this.')
   .option('--minimal', 'Expose only core 5 tools (orient, search_code, record_decision, detect_changes, check_spec_drift). Pair with alwaysLoad: true in Claude Code for always-visible core tools.')
   .action((options: McpServerOptions) => startMcpServer(options));
diff --git a/src/constants.ts b/src/constants.ts
@@ -549,3 +549,36 @@ export const DECISIONS_CONSOLIDATION_MAX_TOKENS = 2_000;
 
 /** Max output tokens for verification LLM call */
 export const DECISIONS_VERIFICATION_MAX_TOKENS = 1_500;
+
+// ============================================================================
+// WATCH MODE (MCP incremental re-index) — Spec 13.1
+// ============================================================================
+// Defaults chosen to keep --watch-auto on by default while making incremental
+// freshness O(change), not O(repo). See docs/specs/openlore-spec-13.1-*.
+
+/** Idle quiet period (ms) before a coalesced flush after the last file change. */
+export const WATCH_DEBOUNCE_MS = 400;
+
+/**
+ * Hard ceiling (ms) that forces a flush even under a continuous change stream,
+ * so a steady drip of edits never starves the queue indefinitely.
+ */
+export const WATCH_MAX_BATCH_MS = 2000;
+
+/**
+ * Number of files in a single coalesced flush that trips VCS-flood handling
+ * (a branch switch / rebase / formatter touching many files at once).
+ */
+export const WATCH_BULK_THRESHOLD = 25;
+
+/**
+ * Above this many watched source files, live embedding auto-degrades to
+ * signatures-only; embeddings refresh at commit (post-commit analyze --embed).
+ */
+export const WATCH_EMBED_FILE_CEILING = 5000;
+
+/**
+ * Quiet period (ms) after a detected VCS bulk operation (.git/HEAD or index
+ * churn) before a single coalesced refresh runs, so the whole op settles first.
+ */
+export const WATCH_VCS_SETTLE_MS = 750;
diff --git a/src/core/analyzer/vector-index-updatefiles.test.ts b/src/core/analyzer/vector-index-updatefiles.test.ts
@@ -0,0 +1,91 @@
+/**
+ * Spec 13.1 — VectorIndex.updateFiles row-level incremental update.
+ *
+ * Proves the watch path replaces ONLY the changed file's rows (delete + add)
+ * instead of the full-table read+overwrite build() performs: a sibling file's
+ * rows survive an update untouched, and the changed file's rows are replaced.
+ * Runs BM25-only (embedSvc = null) so it needs no embedding service.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import {
+  VectorIndex,
+  _resetVectorIndexCachesForTesting,
+} from './vector-index.js';
+import type { FunctionNode } from './call-graph.js';
+
+function node(filePath: string, name: string): FunctionNode {
+  return {
+    id: `${filePath}::${name}`,
+    name,
+    filePath,
+    className: '',
+    language: 'TypeScript',
+    signature: `function ${name}()`,
+    docstring: '',
+    fanIn: 0,
+    fanOut: 0,
+    startIndex: 0,
+    endIndex: 0,
+  } as unknown as FunctionNode;
+}
+
+let outputDir: string;
+
+beforeEach(async () => {
+  outputDir = await mkdtemp(join(tmpdir(), 'ol-vi-update-'));
+  _resetVectorIndexCachesForTesting();
+});
+
+afterEach(async () => {
+  _resetVectorIndexCachesForTesting();
+  await rm(outputDir, { recursive: true, force: true });
+});
+
+async function names(query: string): Promise<Set<string>> {
+  const results = await VectorIndex.search(outputDir, query, null, { limit: 20 });
+  return new Set(results.map((r) => r.record.name));
+}
+
+describe('VectorIndex.updateFiles — Spec 13.1 (BM25-only)', () => {
+  it('replaces only the changed file rows; sibling file rows survive', async () => {
+    // Build a BM25-only index with two files.
+    const initial = [node('alpha.ts', 'alphafn'), node('beta.ts', 'betafn')];
+    const built = await VectorIndex.build(
+      outputDir, initial, [], new Set(), new Set(), null, undefined, false,
+    );
+    expect(built.hasEmbeddings).toBe(false);
+    expect(built.total).toBe(2);
+    _resetVectorIndexCachesForTesting();
+
+    // Sanity: both functions are findable.
+    expect(await names('alphafn')).toContain('alphafn');
+    expect(await names('betafn')).toContain('betafn');
+    _resetVectorIndexCachesForTesting();
+
+    // Rename alpha.ts's function → updateFiles should drop the old row and add new.
+    const result = await VectorIndex.updateFiles(
+      outputDir,
+      [node('alpha.ts', 'gammafn')],
+      new Set(['alpha.ts']),
+      [],
+      new Set(),
+      new Set(),
+      null,
+      undefined,
+    );
+    expect(result.hasEmbeddings).toBe(false);
+    _resetVectorIndexCachesForTesting();
+
+    // beta.ts is untouched (its row survived the row-level op).
+    expect(await names('betafn')).toContain('betafn');
+    _resetVectorIndexCachesForTesting();
+    // alpha.ts now has gammafn …
+    expect(await names('gammafn')).toContain('gammafn');
+    _resetVectorIndexCachesForTesting();
+    // … and the old alphafn row is gone (the delete predicate actually matched).
+    expect(await names('alphafn')).not.toContain('alphafn');
+  });
+});