diff --git a/docs/mcp-tools.md b/docs/mcp-tools.md
index 239bbdf9..696acf6d 100644
--- a/docs/mcp-tools.md
+++ b/docs/mcp-tools.md
@@ -49,13 +49,27 @@ Add `--watch-auto` to your MCP config args:
 }
 ```
 
-The watcher starts automatically on the first tool call — no hardcoded path needed. It re-extracts signatures for any changed source file and patches `llm-context.json` within ~500 ms of a save. If an embedding server is reachable, it also re-embeds changed functions into the vector index automatically. The call graph is not rebuilt on every change; it stays current via the [post-commit hook](#cicd-integration) (`openlore analyze --force`).
+The watcher is **on by default** — it starts automatically on the first tool call
+(no hardcoded path needed) and keeps the analysis fresh as you edit. To disable it,
+start the server with `openlore mcp --no-watch-auto`.
+
+Freshness is **O(change), not O(repo)** (Spec 13.1): per-file save events are coalesced
+into a single batched flush, the patched signatures are handed directly to the MCP read
+cache (so the next tool call is a cache hit, not a cold re-parse of `llm-context.json`),
+and the vector index is updated with row-level ops rather than a full-table rewrite.
+A bulk event (branch switch / rebase / formatter) collapses to a single refresh. On large
+repos (> 5000 source files) live embedding auto-degrades to signatures-only (logged once);
+embeddings then refresh at commit. Set `OPENLORE_WATCH_DEBUG=1` for per-file stderr detail
+(default is one summary line per batch). The call graph is not rebuilt on every change; it
+stays current via the [post-commit hook](#cicd-integration) (`openlore analyze --force`).
 
 | Option | Default | Description |
 |---|---|---|
-| `--watch-auto` | off | Auto-detect project root from first tool call |
+| `--watch-auto` | **on** | Auto-detect project root from first tool call |
+| `--no-watch-auto` | — | Disable the auto-watcher (one-shot tool calls) |
 | `--watch <dir>` | — | Watch a fixed directory (alternative to `--watch-auto`) |
-| `--watch-debounce <ms>` | 400 | Delay before re-indexing after a file change |
+| `--watch-debounce <ms>` | 400 | Idle delay before a coalesced flush after a change |
+| `--watch-no-embed` | off | Signatures-only: skip live re-embedding (refresh at commit) |
 
 ### Cline / Roo Code / Kilocode
 
diff --git a/docs/specs/openlore-spec-13-context-substrate.md b/docs/specs/openlore-spec-13-context-substrate.md
index 4bafde14..780d91f0 100644
--- a/docs/specs/openlore-spec-13-context-substrate.md
+++ b/docs/specs/openlore-spec-13-context-substrate.md
@@ -26,7 +26,8 @@ Branch: `openlore-spec-13-context-substrate`. Direction locked; claims verified;
 - [x] Competitive + market reality verified against primary sources (2026-05-30)
 - [x] Repo ground-truth established (what actually ships vs. what was claimed)
 - [x] Theses adversarially stress-tested; positioning corrected to survive the strongest attack
-- [ ] **Spec 14** — Agent Token-Efficiency Benchmark Harness (WITH vs WITHOUT). *Do this first.* → [openlore-spec-14-agent-benchmark-harness.md](openlore-spec-14-agent-benchmark-harness.md)
+- [ ] **Spec 13.1** — Make Incremental Freshness Cheap (Watch-Mode Performance). *Urgent regression fix — do this before 14; the watcher that backs this spec's "always-fresh" promise currently taxes every dogfooding session.* → [openlore-spec-13.1-watch-mode-performance.md](openlore-spec-13.1-watch-mode-performance.md)
+- [ ] **Spec 14** — Agent Token-Efficiency Benchmark Harness (WITH vs WITHOUT). *Do this first (after 13.1).* → [openlore-spec-14-agent-benchmark-harness.md](openlore-spec-14-agent-benchmark-harness.md)
 - [ ] **Spec 15** — Decision & Drift Governance Dogfooding (turn the gate on in our own repo). → [openlore-spec-15-governance-dogfooding.md](openlore-spec-15-governance-dogfooding.md)
 - [ ] **Spec 16** — Architectural Decisions as First-Class Graph Nodes (`affects` edges). → [openlore-spec-16-decisions-as-graph-nodes.md](openlore-spec-16-decisions-as-graph-nodes.md)
 - [ ] **Spec 17** — Cross-Domain Impact Analysis (Code ↔ Infrastructure). → [openlore-spec-17-cross-domain-impact.md](openlore-spec-17-cross-domain-impact.md)
diff --git a/docs/specs/openlore-spec-13.1-watch-mode-performance.md b/docs/specs/openlore-spec-13.1-watch-mode-performance.md
new file mode 100644
index 00000000..68d8feac
--- /dev/null
+++ b/docs/specs/openlore-spec-13.1-watch-mode-performance.md
@@ -0,0 +1,336 @@
+# OpenLore Spec 13.1 — Make Incremental Freshness Cheap (Watch-Mode Performance)
+
+> **Type:** urgent regression fix, not a feature. This is a child of [Spec 13](openlore-spec-13-context-substrate.md):
+> Spec 13 promises OpenLore "stays fresh **incrementally** so it never carries the staleness tax
+> Cherny rejected." The watcher is that mechanism — and today it imposes *its own* tax. This spec
+> makes incremental freshness actually O(change), so `--watch-auto` can stay **on by default**
+> (owner's decision, 2026-05-31) without degrading the agent session that depends on it.
+>
+> **Do this before Spec 14.** It is a live dogfooding-blocker (see "Why this jumps the queue").
+
+---
+
+## Progress
+
+Branch: `openlore-spec-13.1-watch-mode-performance` (proposed). Root cause confirmed against the code.
+
+- [x] Symptom reproduced from the field: multiple Claude Code sessions across multiple dogfooded
+      repos report *"severe, batched result-delivery latency — commands ran correctly on disk and
+      on the remote, but their output came back in large delayed drains."* Only began once the
+      `openlore` MCP server was registered in those repos.
+- [x] Root cause traced to the watch-mode re-index pipeline (this document, "Root cause").
+- [x] **Step 1** — Coalesce per-file events into a single batched flush (one write per burst).
+      `McpWatcher` now uses one `pending` Set + a single debounce timer + a `maxBatchMs`
+      ceiling; `handleChange(path)` delegates to `handleBatch([path])`.
+- [x] **Step 2** — Make the `llm-context.json` update cheap and stop busting the read cache.
+      Implemented 2a: `primeContextCache` (new export in `mcp-handlers/utils.ts`) hands the
+      patched context to the read cache so the next tool call is a HIT (0 ms vs ~19 ms cold),
+      and the ~2.7 MB disk write is write-behind (deferred + coalesced via `maxBatchMs`).
+- [x] **Step 3** — Make the vector-index update a real incremental row op (no full-table rewrite).
+      New `VectorIndex.updateFiles()` does `delete("filePath" IN …) + add(rows)` for the changed
+      files only and patches the BM25 corpus cache in place; the cold `build()` path is untouched.
+- [x] **Step 4** — Decouple embedding freshness from signature freshness (signatures land instantly).
+      Signatures persist synchronously first; the vector update runs on a separate lower-priority
+      embed lane. Added `--watch-no-embed` + auto-degrade above `WATCH_EMBED_FILE_CEILING` (5000).
+- [x] **Step 5** — Backpressure + VCS-flood detection (branch switch ⟹ one refresh, not N).
+      A `.git` ref watcher (HEAD/index/MERGE_HEAD/ORIG_HEAD) + a `WATCH_BULK_THRESHOLD` (25)
+      batch-size trip collapse a bulk event into one settled refresh; single-flight never interleaves.
+- [x] **Step 6** — stderr discipline (one summary line per batch; verbose behind a debug flag).
+      Default is one `[mcp-watcher] updated/coalesced N … (Mms)` line per batch; per-file/per-embed
+      detail is behind `OPENLORE_WATCH_DEBUG`.
+- [x] **Step 7** — Reconcile docs/install text with the on-by-default reality.
+      `docs/mcp-tools.md`, `README.md`, `src/cli/install/index.ts`, and the orient skill wrapper
+      now state watch is on by default, cheap/batched, and how to disable / run signatures-only.
+- [x] **Step 8** — Watch-mode microbenchmark + regression tests.
+      `scripts/bench-watch.ts` (+ `npm run bench:watch`, recorded in `scripts/BENCHMARKS.md`) plus
+      `mcp-watcher-incremental.test.ts` and `vector-index-updatefiles.test.ts`.
+
+**Measured (`npm run bench:watch`, synthetic 4.03 MB context, signatures-only):** single-save
+flush **4.5 ms**; next-call read after save **0.02 ms** (in-memory cache HIT) vs **4.4 ms** cold
+parse (≈256× on this fixture, widening with context size); 50-file burst → **1** flush (was 50
+full pipelines), coalesced flush **8 ms**. The decisive wins are the eliminated forced re-parse
+(G1) and the single-flush coalescing (G2). Satisfies G1, G2, G3, G4, G5, G6; G7 protected (cold
+`build()`/`analyze` paths untouched; full unit + relevant integration suites green).
+
+> **Not** addressed by PR #83 (Panic Response Layer). PR #83 touches `mcp.ts` and `vector-index.ts`
+> for panic/gryph concerns only; it does not change the `mcp-watcher` re-index pipeline,
+> `handleChange`, or `--watch-auto`. The two are independent — both edit `mcp.ts` but in different
+> regions, so expect a small merge but no logical conflict. **PR #83 does not fix this.**
+
+---
+
+## Symptom (from the field)
+
+Reported verbatim by multiple agent sessions, across multiple repos, only after dogfooding began:
+
+> "The tool-execution environment in this session had severe, batched result-delivery latency —
+> commands ran correctly on disk and on the remote, but their output came back to me in large
+> delayed drains, which is why this took many round-trips. Everything is confirmed landed."
+
+The tells: tool *execution* is fine (writes land), but tool *result delivery* arrives batched and
+late. That is the signature of (a) a background process contending for CPU/IO with the agent's
+session, and/or (b) a flood of child-process stderr the client must drain — not of failing commands.
+
+## What is shared across every affected repo
+
+Exactly one thing changed when dogfooding started: each repo's `.claude/settings.json` now registers
+
+```json
+{ "mcpServers": { "openlore": { "command": "npx", "args": ["--yes", "openlore", "mcp"] } } }
+```
+
+A long-running `openlore mcp` stdio server, started with **no flags**. No git hooks are installed in
+the affected repos (verified in `enklayve/.git/hooks` — empty), so the decisions/commit gate is **not**
+the cause. The cause is in the MCP server's default behavior.
+
+## Root cause (grounded against the code)
+
+**`--watch-auto` defaults to `true`** — [src/cli/commands/mcp.ts:1610](../../src/cli/commands/mcp.ts#L1610):
+
+```ts
+.option('--watch-auto', 'Auto-detect the project directory from the first tool call and start watching', true)
+```
+
+So plain `openlore mcp` silently arms a recursive `chokidar` watcher on the **first** tool call that
+carries a `directory` ([mcp.ts:1347-1362](../../src/cli/commands/mcp.ts#L1347-L1362)). From then on,
+**every file the agent edits** fires `McpWatcher.handleChange`
+([mcp-watcher.ts:191-273](../../src/core/services/mcp-watcher.ts#L191-L273)). The directory pruning
+itself is well guarded (node_modules/dist/target/etc. are excluded — the EMFILE fix at
+[mcp-watcher.ts:60-100](../../src/core/services/mcp-watcher.ts#L60-L100)); the defect is the **per-save
+cost**, which is O(repo), not O(change). On a real dogfood target (`enklayve`: 2.1 MB `call-graph.db`,
+**2.1 MB `llm-context.json`**, a LanceDB `vector-index/`), a single save does all of:
+
+1. **Full `llm-context.json` rewrite.** `handleChange` reads → `JSON.parse` → patches one signature
+   entry → writes the **entire** file back ([mcp-watcher.ts:247-267](../../src/core/services/mcp-watcher.ts#L247-L267)).
+   That is a 2.1 MB parse + 2.1 MB write **per save**, regardless of edit size.
+
+2. **A forced 2.1 MB re-parse on the next tool call.** `readCachedContext` caches the parsed context
+   keyed on file **mtime** ([utils.ts:124-146](../../src/core/services/mcp-handlers/utils.ts#L124-L146)).
+   The rewrite in (1) bumps mtime, so the next MCP query (which `orient`, `analyze_impact`,
+   `get_subgraph`, `search_code`, etc. all depend on) must re-read and re-parse the whole 2.1 MB file
+   cold. The watcher's write therefore taxes the read path too.
+
+3. **A full vector-index read + overwrite.** `reEmbed` ([mcp-watcher.ts:269-319](../../src/core/services/mcp-watcher.ts#L269-L319))
+   calls `VectorIndex.build(..., incremental=true)`. But the "incremental" path still
+   `openTable()` → `table.query().toArray()` — reads the **entire** corpus into memory
+   ([vector-index.ts:413-415](../../src/core/analyzer/vector-index.ts#L413-L415)) — then
+   `createTable(TABLE_NAME, ..., { mode: 'overwrite' })` — rewrites the **whole** table
+   ([vector-index.ts:472](../../src/core/analyzer/vector-index.ts#L472)). `incremental` only avoids
+   *re-embedding* unchanged functions; the storage read and rewrite are full-corpus every time. The
+   BM25-only path is the same shape (overwrite + corpus-cache bust).
+
+4. **A stderr line per change** (and another per embed) — [mcp-watcher.ts:238-239, 267, 311-315](../../src/core/services/mcp-watcher.ts#L238-L239).
+
+5. **No coalescing across files.** The debounce is **per-file** (a `setTimeout` per path,
+   [mcp-watcher.ts:165-183](../../src/core/services/mcp-watcher.ts#L165-L183)); the `running` flag
+   serializes but *reschedules* superseded work rather than dropping it. A bulk file event — `git
+   checkout`/`rebase`/`pull`, a formatter, a project-wide find-replace — touching N source files
+   therefore runs the full O(repo) pipeline **N times back-to-back**. A 50-file branch switch =
+   50 full `llm-context.json` rewrites + 50 full vector-index overwrites, serialized.
+
+**Net:** the freshness mechanism that Spec 13 sells as cheap is, in the field, an O(repo) re-index +
+re-embed pipeline that fires on every keystroke-save and storms on every VCS operation — saturating
+CPU/IO and flooding stderr in the MCP child process while the agent is trying to work. That is the
+"batched result-delivery latency." The call-graph subset rebuild
+([mcp-watcher.ts:206-244](../../src/core/services/mcp-watcher.ts#L206-L244)) is correctly bounded
+(changed file + ≤10 callers) and is **not** the problem — items 1–5 are.
+
+### Why this jumps the queue (ahead of Spec 14)
+
+Spec 13 says "run the benchmark before writing another line of *feature* code." This is not feature
+code — it is a regression that degrades every dogfooding session, and dogfooding is how 14–23 get
+validated. A Spec 14 token/latency benchmark run *through* the MCP server while this is live would
+also be polluted by watcher contention. Fix the substrate's freshness tax first; then benchmark.
+
+---
+
+## Goal & success criteria
+
+**Goal:** incremental freshness is O(change), not O(repo), and never storms — so `--watch-auto`
+stays on by default and a watching session is indistinguishable from a non-watching one in latency.
+
+Verifiable criteria (see Step 8 for the harness):
+
+- **G1** — A single source-file save triggers **≤ 1** `llm-context` persistence and **≤ 1** vector
+  update, and does **not** force a full-file re-parse on the next tool call.
+- **G2** — A burst of N saves within the debounce window coalesces to **1** flush, not N.
+- **G3** — A VCS bulk event (≥ `BULK_THRESHOLD` files, or `.git/HEAD`/`.git/index` churn) produces
+  **at most one** deferred refresh, not one pipeline per file.
+- **G4** — Per-save wall-clock and CPU on a 2 MB-context repo drop by **≥ 10×** vs. today
+  (measured; the benchmark sets the real number).
+- **G5** — Watcher stderr emits **≤ 1** line per batch by default; per-file detail only with a debug flag.
+- **G6** — `orient`/`search_code` still reflect a just-saved edit within the debounce window
+  (freshness preserved — this is the whole point of keeping watch on).
+- **G7** — No regression in the cold `analyze`/`--watch` path or in MCP read latency
+  (`scripts/bench-mcp.ts`).
+
+---
+
+## The fix — detailed steps
+
+> Design principle: **separate the two freshnesses.** *Signature/structure* freshness (what
+> `orient`/`search_code` return as text) must land immediately and cheaply. *Embedding* freshness
+> (semantic re-rank quality) may lag a few seconds and batch. Spec 13's thesis is the structural map;
+> the vector layer is the optional semantic assist (Spec 06), so it can trail.
+
+### Step 1 — Coalesce per-file events into a single batched flush
+
+Replace the per-file timer map + reschedule loop ([mcp-watcher.ts:111, 165-183](../../src/core/services/mcp-watcher.ts#L165-L183))
+with **one** coalescing queue:
+
+- Maintain a `Set<string>` of pending changed paths plus a single debounce timer.
+- On each `change`, add the path and (re)arm one timer (`debounceMs`, default 400). Add a hard
+  **max-batch ceiling** so a continuous stream still flushes periodically
+  (`maxBatchMs`, e.g. 2000) — never starve.
+- On flush, drain the whole Set and process it as **one batch**: one call-graph subset build over
+  all changed files, **one** `llm-context` persistence (Step 2), **one** vector update (Step 3).
+- Keep single-flight: if a flush is running, accumulate into the next Set; do not interleave.
+
+`handleChange(path)` stays exported for unit tests but becomes `handleBatch(paths)` internally; the
+single-file form delegates to a batch of one.
+
+### Step 2 — Make the `llm-context.json` update cheap and stop busting the read cache
+
+The 2.1 MB rewrite-per-save and the mtime-driven re-parse are the two biggest single-save costs.
+Pick **2a** (smallest change, recommended) and add **2c**; consider **2b** as the durable form.
+
+- **2a — Write-behind + in-memory cache handoff (recommended first move).** Keep the patched context
+  in memory; flush to `llm-context.json` at most once per `flushIntervalMs` (e.g. 2000) or on idle,
+  not per save. Crucially, **update the read-path cache in place** so freshness does not require a
+  disk round-trip: expose a setter on `readCachedContext`'s `_contextCache`
+  ([utils.ts:115-146](../../src/core/services/mcp-handlers/utils.ts#L115-L146)) that the watcher calls
+  with the new in-memory context, so the next tool call is a cache **hit** (no 2.1 MB re-parse) even
+  before the disk flush. This satisfies G1, G2, G6 directly.
+- **2b — Stop storing signatures in the monolith (durable form).** Signatures are the only thing the
+  watcher patches into `llm-context.json`. Move per-file signatures to an incrementally updatable
+  store — the `EdgeStore` SQLite already updated incrementally here is the natural home (one-row
+  upsert per file), or a per-file sidecar. Then a single-file change is an O(1) row write, and
+  `llm-context.json` is rebuilt only by `analyze`. Larger blast radius (read paths that consume
+  `context.signatures` must read the new store); schedule after 2a proves the model.
+- **2c — Cache invalidation that survives partial writes.** If any path keeps rewriting
+  `llm-context.json`, make `readCachedContext` invalidation tolerate it: invalidate per-file rather
+  than busting the whole parsed object, or have the watcher push the updated object into the cache
+  (as in 2a) so an mtime bump never forces a cold full re-parse.
+
+### Step 3 — Make the vector-index update a real incremental row op
+
+Stop the full-table read+overwrite on the watch path.
+
+- Replace `query().toArray()` + `createTable(overwrite)` ([vector-index.ts:404-472](../../src/core/analyzer/vector-index.ts#L404-L472))
+  with **row-level** ops for the changed functions only: LanceDB `delete(predicate)` for the changed
+  file's existing rows + `add(newRows)`, or `mergeInsert` keyed on function `id`. Add a dedicated
+  `VectorIndex.updateFiles(outputDir, changedNodes, …)` entry point for the watcher so the cold
+  `build()` path is untouched (protects G7 and the `analyze --embed` contract).
+- For the **BM25-only** path (no embedder): update only the affected documents in the corpus and
+  surgically invalidate just those entries in `_bm25Cache`
+  ([vector-index.ts:191-202](../../src/core/analyzer/vector-index.ts#L191-L202)) instead of rebuilding
+  and dropping the whole corpus cache.
+- Likewise invalidate only the changed rows in `_tableCache`, not the whole table handle.
+
+### Step 4 — Decouple embedding freshness from signature freshness
+
+- On flush, run Step 2 (signatures) **synchronously and first** so `orient`/`search_code` reflect the
+  edit immediately (G6). Schedule Step 3 (embedding/vector) as a **separate, lower-priority** task
+  that may batch across multiple flushes and run on idle. Never block a signature update on an embed.
+- Add a `watchEmbed` switch (config + `--watch-no-embed`) so large repos can run **signatures-only**
+  live freshness and let embeddings refresh at commit (the post-commit `analyze --embed` the
+  watcher header already references, [mcp-watcher.ts:10-11](../../src/core/services/mcp-watcher.ts#L10-L11)).
+- **Auto-degrade on big repos:** if the watched tree exceeds `WATCH_EMBED_FILE_CEILING` source files,
+  default to signatures-only live and log the decision once (no silent cap — state it, per Spec 13's
+  "no claim outruns the code" discipline).
+
+### Step 5 — Backpressure + VCS-flood detection
+
+- **VCS detection:** watch for `.git/HEAD`, `.git/index`, `.git/MERGE_HEAD`, `ORIG_HEAD` churn, or a
+  flush batch ≥ `BULK_THRESHOLD` files. On detection, **cancel** queued per-file work and schedule a
+  **single** coalesced refresh after the operation settles (a quiet period), rather than N pipelines.
+  A branch switch becomes one refresh (G3).
+- **Backpressure:** if flush batches keep arriving faster than they drain (queue depth grows past a
+  bound), degrade to "mark stale + one batched refresh on the next idle window" and emit a single
+  `[mcp-watcher] coalesced N changes` line. Never let the queue grow unbounded.
+
+### Step 6 — stderr discipline
+
+- Default to **one summary line per batch** (`[mcp-watcher] updated N files (Mms)`); move the
+  per-file/per-embed lines ([mcp-watcher.ts:238-239, 267, 311-315](../../src/core/services/mcp-watcher.ts#L238-L239))
+  behind `OPENLORE_WATCH_DEBUG`. This removes the stderr-flood contribution to the client's batched
+  result drain, independent of the CPU/IO win.
+
+### Step 7 — Reconcile the docs/install text with reality
+
+The current behavior contradicts the docs, which compounds the confusion:
+
+- [docs/mcp-tools.md:56](../../docs/mcp-tools.md#L56) lists `--watch-auto` default as **`off`** — it is
+  `true`. Fix the table to "on by default" and describe the new cheap batched behavior + the
+  `--watch-no-embed` / signatures-only auto-degrade.
+- [src/cli/install/index.ts:235](../../src/cli/install/index.ts#L235) and [README.md:183](../../README.md#L183)
+  should state watch is on by default, why (live freshness), and how to disable
+  (`openlore mcp --no-watch-auto`) or run signatures-only.
+- The orient skill's stdio fallback spawns `npx --yes openlore mcp` with no flags
+  ([skills/openlore-orient/scripts/orient-via-mcp.mjs:30](../../skills/openlore-orient/scripts/orient-via-mcp.mjs#L30)),
+  so it too arms the watcher; for one-shot orient it should pass `--no-watch-auto` (the option's own
+  help already claims the orient wrapper does this — make it true).
+
+### Step 8 — Watch-mode microbenchmark + regression tests
+
+- **Benchmark** (`scripts/bench-watch.ts`, sibling to `bench.ts`/`bench-mcp.ts`): on a fixture with a
+  ~2 MB context + populated vector index, measure (a) single-save flush latency + CPU, (b) a 50-file
+  bulk-change burst, asserting G1–G4. Record before/after in `BENCHMARKS.md`.
+- **Tests** (extend `mcp-watcher.test.ts` / `.integration.test.ts`):
+  - N change events in one window ⟹ exactly 1 persistence + 1 vector update (G2).
+  - VCS-flood / ≥ BULK_THRESHOLD batch ⟹ exactly 1 deferred refresh (G3).
+  - A save updates the in-memory read cache: the next `readCachedContext` is a **hit**, no full
+    re-parse (G1).
+  - `VectorIndex.updateFiles` changes only the target file's rows; corpus rows for other files are
+    byte-identical (Step 3).
+  - Signatures reflect a just-saved symbol within the debounce window even when the embedder is
+    absent/slow (G4/G6, signatures-only path).
+  - stderr emits ≤ 1 line per batch unless `OPENLORE_WATCH_DEBUG` (G5).
+
+---
+
+## Tunables (new) — single source of truth
+
+Add to constants and surface in `.openlore/config.json` (and `--watch-*` flags). Defaults chosen to
+keep watch **on** and cheap:
+
+| Knob | Default | Purpose |
+|---|---|---|
+| `watchDebounceMs` | 400 | idle quiet period before a flush (existing) |
+| `watchMaxBatchMs` | 2000 | hard flush ceiling under a continuous stream |
+| `watchBulkThreshold` | 25 | batch size that trips VCS-flood handling |
+| `watchEmbed` | `true` | run vector update live; `false` = signatures-only |
+| `watchEmbedFileCeiling` | e.g. 5000 | above this, auto-degrade to signatures-only |
+| `OPENLORE_WATCH_DEBUG` | unset | enable per-file/per-embed stderr lines |
+
+---
+
+## Compatibility & scope guarantee (per Spec 13's prime constraint)
+
+This is **additive and behavior-preserving** for the frozen contract:
+
+- **`mcp` CLI surface preserved.** `--watch`/`--watch-auto`/`--watch-debounce` keep their meaning;
+  new flags (`--watch-no-embed`) are additive. Default stays on (owner's decision) — but now cheap.
+- **Cold paths untouched.** `analyze` and `analyze --embed` build full `llm-context.json` and the full
+  vector index exactly as today; Step 3 adds a *new* `VectorIndex.updateFiles` reader beside the
+  existing `build()` rather than changing it (protects G7).
+- **`orient()` response shape unchanged.** This is a latency/IO fix; no field is added, removed, or
+  retyped.
+- **`llm-context.json` format unchanged** under 2a/2c. If 2b lands later, signatures move to a store
+  but the artifact stays valid (consumers migrate behind the existing readers); a `SCHEMA_VERSION`
+  bump rebuilds from source — one re-analyze, no migration (the Spec 13 safety property).
+- **Freshness guarantee strengthened, not weakened.** The point of keeping watch on is preserved
+  (G6); we remove only the cost, not the freshness.
+
+---
+
+## Relationship to existing specs
+
+- **Spec 13 (context substrate)** — direct parent. This is the "kept fresh incrementally so it never
+  carries the staleness tax" claim, made true in the field. Add a 13.1 line to Spec 13's Progress
+  list ahead of Spec 14.
+- **Spec 06 (BM25 without embeddings)** — Step 4's signatures-only / `--watch-no-embed` mode is the
+  watch-time expression of the same "deterministic retrieval, network/embeddings optional" floor.
+- **Spec 14 (benchmark harness)** — runs *after* this; a token/latency benchmark through the MCP
+  server is only trustworthy once the watcher no longer contends with the measured session.
+- **PR #83 (Panic Response Layer)** — orthogonal; does not touch this pipeline (see Progress note).
diff --git a/package.json b/package.json
index 88da19eb..0ef8ae26 100644
--- a/package.json
+++ b/package.json
@@ -27,6 +27,7 @@
     "view": "tsx src/cli/index.ts view",
     "bench": "tsx scripts/bench.ts",
     "bench:mcp": "tsx scripts/bench-mcp.ts",
+    "bench:watch": "tsx scripts/bench-watch.ts",
     "test": "vitest",
     "test:run": "vitest run",
     "test:coverage": "vitest run --coverage",
diff --git a/scripts/bench-watch.ts b/scripts/bench-watch.ts
new file mode 100644
index 00000000..f5c2604e
--- /dev/null
+++ b/scripts/bench-watch.ts
@@ -0,0 +1,155 @@
+/**
+ * bench-watch.ts — watch-mode (MCP incremental re-index) microbenchmark.
+ *
+ * Spec 13.1: freshness must be O(change), not O(repo). This measures the
+ * per-save and bulk-burst cost of the watcher pipeline on a fixture with a
+ * ~2 MB llm-context.json, and asserts the coalescing/cache guarantees:
+ *
+ *   G1 — a single save triggers ≤ 1 llm-context persistence and the next read
+ *        is a cache HIT (no cold full-file re-parse).
+ *   G2 — a burst of N saves coalesces to ONE flush.
+ *   G4 — per-save wall-clock stays small relative to the context size.
+ *
+ * Run:  npm run bench:watch
+ *
+ * This is a manual benchmark (not part of CI). It builds its own throwaway
+ * fixture under the OS temp dir and cleans up afterwards.
+ */
+import { mkdtemp, mkdir, writeFile, readFile, rm, stat } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { performance } from 'node:perf_hooks';
+import { McpWatcher } from '../src/core/services/mcp-watcher.js';
+import { readCachedContext, _resetContextCacheForTesting } from '../src/core/services/mcp-handlers/utils.js';
+
+const FILE_COUNT = 600;       // synthetic source files in the context
+const ENTRIES_PER_FILE = 20;  // signatures per file
+const SINGLE_SAVE_RUNS = 20;
+const BURST_SIZE = 50;
+
+function median(xs: number[]): number {
+  const s = [...xs].sort((a, b) => a - b);
+  const m = Math.floor(s.length / 2);
+  return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
+}
+
+function synthSignatures(): Array<{ path: string; language: string; entries: Array<{ name: string; signature: string; docstring: string; line: number; kind: string }> }> {
+  const sigs = [];
+  for (let i = 0; i < FILE_COUNT; i++) {
+    const path = `src/module_${i}/file_${i}.ts`;
+    const entries = [];
+    for (let j = 0; j < ENTRIES_PER_FILE; j++) {
+      entries.push({
+        name: `fn_${i}_${j}`,
+        signature: `export function fn_${i}_${j}(arg0: string, arg1: number, opts?: Record<string, unknown>): Promise<void>`,
+        docstring: `Function ${j} in module ${i}. Handles a representative unit of work for the benchmark fixture.`,
+        line: j * 7 + 1,
+        kind: 'function',
+      });
+    }
+    sigs.push({ path, language: 'TypeScript', entries });
+  }
+  return sigs;
+}
+
+async function main(): Promise<void> {
+  const root = await mkdtemp(join(tmpdir(), 'ol-benchwatch-'));
+  const analysisDir = join(root, '.openlore', 'analysis');
+  await mkdir(analysisDir, { recursive: true });
+  const contextPath = join(analysisDir, 'llm-context.json');
+
+  // Build a ~2 MB context.
+  const signatures = synthSignatures();
+  await writeFile(contextPath, JSON.stringify({ signatures, callGraph: null }, null, 2), 'utf-8');
+  const ctxBytes = (await stat(contextPath)).size;
+
+  // Write the real source files so the watcher can read them on change.
+  for (let i = 0; i < FILE_COUNT; i++) {
+    const dir = join(root, 'src', `module_${i}`);
+    await mkdir(dir, { recursive: true });
+    await writeFile(join(dir, `file_${i}.ts`), `export function fn_${i}_0() { return ${i}; }\n`, 'utf-8');
+  }
+
+  // embed:false → measure the signature/freshness pipeline (the per-save hot
+  // path the spec flagged: the 2 MB rewrite + the re-parse it used to force).
+  const watcher = new McpWatcher({ rootPath: root, embed: false });
+
+  // ── Single-save latency, including the simulated "next tool call" read ──────
+  const flushTimes: number[] = [];
+  const readTimes: number[] = [];
+  for (let r = 0; r < SINGLE_SAVE_RUNS; r++) {
+    const i = r % FILE_COUNT;
+    const f = join(root, 'src', `module_${i}`, `file_${i}.ts`);
+    await writeFile(f, `export function fn_${i}_0() { return ${i + r * 1000}; }\n`, 'utf-8');
+
+    const t0 = performance.now();
+    await watcher.handleChange(f);
+    flushTimes.push(performance.now() - t0);
+
+    // The next "tool call" read — must be a cache HIT (no 2 MB cold re-parse).
+    const t1 = performance.now();
+    const ctx = await readCachedContext(root);
+    readTimes.push(performance.now() - t1);
+    if (!ctx) throw new Error('readCachedContext returned null after save');
+  }
+
+  // ── Cold read baseline (cache cleared → full 2 MB parse) for contrast ───────
+  _resetContextCacheForTesting();
+  const coldT0 = performance.now();
+  await readCachedContext(root);
+  const coldRead = performance.now() - coldT0;
+
+  // ── Bulk burst: BURST_SIZE files in one window must coalesce to ONE flush ───
+  let summaries = 0;
+  const origWrite = process.stderr.write.bind(process.stderr);
+  // eslint-disable-next-line @typescript-eslint/no-explicit-any
+  (process.stderr as any).write = (chunk: any, ...rest: any[]): boolean => {
+    if (/\[mcp-watcher\] (updated|coalesced)/.test(String(chunk))) summaries++;
+    return origWrite(chunk, ...rest);
+  };
+  const burst = new McpWatcher({ rootPath: root, embed: false, debounceMs: 50, maxBatchMs: 2000 });
+  const burstFiles: string[] = [];
+  for (let i = 0; i < BURST_SIZE; i++) {
+    const f = join(root, 'src', `module_${i}`, `file_${i}.ts`);
+    await writeFile(f, `export function fn_${i}_0() { return ${i}*2; }\n`, 'utf-8');
+    burstFiles.push(f);
+  }
+  const burstT0 = performance.now();
+  for (const f of burstFiles) (burst as unknown as { enqueue(p: string): void }).enqueue(f);
+  // Wait for the single coalesced flush to complete.
+  await new Promise((res) => setTimeout(res, 400));
+  const burstTime = performance.now() - burstT0;
+  (process.stderr as any).write = origWrite;
+
+  const report =
+`## Watch-mode benchmark (Spec 13.1)
+
+Fixture: ${FILE_COUNT} files × ${ENTRIES_PER_FILE} signatures, llm-context.json = ${(ctxBytes / 1_048_576).toFixed(2)} MB.
+
+| Metric | Result |
+|--------|--------|
+| Single-save flush (median of ${SINGLE_SAVE_RUNS}) | ${median(flushTimes).toFixed(1)} ms |
+| Next-call read after save (median, cache HIT) | ${median(readTimes).toFixed(2)} ms |
+| Cold read (cache cleared, full parse) | ${coldRead.toFixed(1)} ms |
+| ${BURST_SIZE}-file burst → flushes | ${summaries} (expected 1) |
+| ${BURST_SIZE}-file burst wall-clock | ${burstTime.toFixed(1)} ms |
+
+G1: next-call read is a cache hit — ${median(readTimes).toFixed(2)} ms vs ${coldRead.toFixed(1)} ms cold (${(coldRead / Math.max(median(readTimes), 0.001)).toFixed(0)}× faster).
+G2: ${BURST_SIZE} saves coalesced to ${summaries} flush${summaries === 1 ? '' : 'es'}.
+`;
+
+  // eslint-disable-next-line no-console
+  console.log(report);
+
+  // Assertions (fail loudly in CI-less manual runs).
+  if (summaries !== 1) throw new Error(`G2 violated: expected 1 coalesced flush, got ${summaries}`);
+  if (median(readTimes) >= coldRead) throw new Error('G1 violated: post-save read is not faster than a cold parse');
+
+  await rm(root, { recursive: true, force: true });
+}
+
+main().catch((err) => {
+  // eslint-disable-next-line no-console
+  console.error(err);
+  process.exit(1);
+});
diff --git a/src/cli/commands/mcp.ts b/src/cli/commands/mcp.ts
index 0840fab2..6e75ed45 100644
--- a/src/cli/commands/mcp.ts
+++ b/src/cli/commands/mcp.ts
@@ -1286,6 +1286,7 @@ interface McpServerOptions {
   watch?: string;
   watchAuto?: boolean;
   watchDebounce?: string;
+  watchNoEmbed?: boolean;
   minimal?: boolean;
 }
 
@@ -1353,6 +1354,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise<void> {
         autoWatcher = new McpWatcher({
           rootPath: resolve(dir),
           debounceMs: isNaN(debounceMs) ? 400 : debounceMs,
+          embed: !options.watchNoEmbed,
         });
         await autoWatcher.start();
         const cleanup = () => autoWatcher!.stop().then(() => process.exit(0));
@@ -1592,6 +1594,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise<void> {
     const watcher = new McpWatcher({
       rootPath: resolve(options.watch),
       debounceMs: isNaN(debounceMs) ? 400 : debounceMs,
+      embed: !options.watchNoEmbed,
     });
     await watcher.start();
     const cleanup = () => watcher.stop().then(() => process.exit(0));
@@ -1610,5 +1613,6 @@ export const mcpCommand = new Command('mcp')
   .option('--watch-auto', 'Auto-detect the project directory from the first tool call and start watching', true)
   .option('--no-watch-auto', 'Disable auto-watch (use for one-shot tool calls, e.g. the orient skill wrapper)')
   .option('--watch-debounce <ms>', 'Debounce delay in ms before re-indexing after a file change (default: 400)', '400')
+  .option('--watch-no-embed', 'Watch signatures only — skip live vector re-embedding (embeddings refresh at commit). Large repos auto-degrade to this.')
   .option('--minimal', 'Expose only core 5 tools (orient, search_code, record_decision, detect_changes, check_spec_drift). Pair with alwaysLoad: true in Claude Code for always-visible core tools.')
   .action((options: McpServerOptions) => startMcpServer(options));
diff --git a/src/constants.ts b/src/constants.ts
index 7946bf76..c9355850 100644
--- a/src/constants.ts
+++ b/src/constants.ts
@@ -549,3 +549,36 @@ export const DECISIONS_CONSOLIDATION_MAX_TOKENS = 2_000;
 
 /** Max output tokens for verification LLM call */
 export const DECISIONS_VERIFICATION_MAX_TOKENS = 1_500;
+
+// ============================================================================
+// WATCH MODE (MCP incremental re-index) — Spec 13.1
+// ============================================================================
+// Defaults chosen to keep --watch-auto on by default while making incremental
+// freshness O(change), not O(repo). See docs/specs/openlore-spec-13.1-*.
+
+/** Idle quiet period (ms) before a coalesced flush after the last file change. */
+export const WATCH_DEBOUNCE_MS = 400;
+
+/**
+ * Hard ceiling (ms) that forces a flush even under a continuous change stream,
+ * so a steady drip of edits never starves the queue indefinitely.
+ */
+export const WATCH_MAX_BATCH_MS = 2000;
+
+/**
+ * Number of files in a single coalesced flush that trips VCS-flood handling
+ * (a branch switch / rebase / formatter touching many files at once).
+ */
+export const WATCH_BULK_THRESHOLD = 25;
+
+/**
+ * Above this many watched source files, live embedding auto-degrades to
+ * signatures-only; embeddings refresh at commit (post-commit analyze --embed).
+ */
+export const WATCH_EMBED_FILE_CEILING = 5000;
+
+/**
+ * Quiet period (ms) after a detected VCS bulk operation (.git/HEAD or index
+ * churn) before a single coalesced refresh runs, so the whole op settles first.
+ */
+export const WATCH_VCS_SETTLE_MS = 750;
diff --git a/src/core/analyzer/vector-index-updatefiles.test.ts b/src/core/analyzer/vector-index-updatefiles.test.ts
new file mode 100644
index 00000000..96f22358
--- /dev/null
+++ b/src/core/analyzer/vector-index-updatefiles.test.ts
@@ -0,0 +1,91 @@
+/**
+ * Spec 13.1 — VectorIndex.updateFiles row-level incremental update.
+ *
+ * Proves the watch path replaces ONLY the changed file's rows (delete + add)
+ * instead of the full-table read+overwrite build() performs: a sibling file's
+ * rows survive an update untouched, and the changed file's rows are replaced.
+ * Runs BM25-only (embedSvc = null) so it needs no embedding service.
+ */
+import { describe, it, expect, beforeEach, afterEach } from 'vitest';
+import { mkdtemp, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import {
+  VectorIndex,
+  _resetVectorIndexCachesForTesting,
+} from './vector-index.js';
+import type { FunctionNode } from './call-graph.js';
+
+function node(filePath: string, name: string): FunctionNode {
+  return {
+    id: `${filePath}::${name}`,
+    name,
+    filePath,
+    className: '',
+    language: 'TypeScript',
+    signature: `function ${name}()`,
+    docstring: '',
+    fanIn: 0,
+    fanOut: 0,
+    startIndex: 0,
+    endIndex: 0,
+  } as unknown as FunctionNode;
+}
+
+let outputDir: string;
+
+beforeEach(async () => {
+  outputDir = await mkdtemp(join(tmpdir(), 'ol-vi-update-'));
+  _resetVectorIndexCachesForTesting();
+});
+
+afterEach(async () => {
+  _resetVectorIndexCachesForTesting();
+  await rm(outputDir, { recursive: true, force: true });
+});
+
+async function names(query: string): Promise<Set<string>> {
+  const results = await VectorIndex.search(outputDir, query, null, { limit: 20 });
+  return new Set(results.map((r) => r.record.name));
+}
+
+describe('VectorIndex.updateFiles — Spec 13.1 (BM25-only)', () => {
+  it('replaces only the changed file rows; sibling file rows survive', async () => {
+    // Build a BM25-only index with two files.
+    const initial = [node('alpha.ts', 'alphafn'), node('beta.ts', 'betafn')];
+    const built = await VectorIndex.build(
+      outputDir, initial, [], new Set(), new Set(), null, undefined, false,
+    );
+    expect(built.hasEmbeddings).toBe(false);
+    expect(built.total).toBe(2);
+    _resetVectorIndexCachesForTesting();
+
+    // Sanity: both functions are findable.
+    expect(await names('alphafn')).toContain('alphafn');
+    expect(await names('betafn')).toContain('betafn');
+    _resetVectorIndexCachesForTesting();
+
+    // Rename alpha.ts's function → updateFiles should drop the old row and add new.
+    const result = await VectorIndex.updateFiles(
+      outputDir,
+      [node('alpha.ts', 'gammafn')],
+      new Set(['alpha.ts']),
+      [],
+      new Set(),
+      new Set(),
+      null,
+      undefined,
+    );
+    expect(result.hasEmbeddings).toBe(false);
+    _resetVectorIndexCachesForTesting();
+
+    // beta.ts is untouched (its row survived the row-level op).
+    expect(await names('betafn')).toContain('betafn');
+    _resetVectorIndexCachesForTesting();
+    // alpha.ts now has gammafn …
+    expect(await names('gammafn')).toContain('gammafn');
+    _resetVectorIndexCachesForTesting();
+    // … and the old alphafn row is gone (the delete predicate actually matched).
+    expect(await names('alphafn')).not.toContain('alphafn');
+  });
+});
diff --git a/src/core/analyzer/vector-index.ts b/src/core/analyzer/vector-index.ts
index 4d324c25..f4802103 100644
--- a/src/core/analyzer/vector-index.ts
+++ b/src/core/analyzer/vector-index.ts
@@ -204,6 +204,37 @@ export function _resetVectorIndexCachesForTesting(): void {
   _metaCache.clear();
 }
 
+/**
+ * Surgically patch the cached BM25 corpus for `dbPath` (Spec 13.1): drop the
+ * rows belonging to `changedFilePaths` and splice in `newRows`, then rebuild the
+ * in-memory corpus. No disk read — if nothing is cached yet this is a no-op and
+ * the next search builds the corpus fresh from the table.
+ */
+function patchBm25Cache(dbPath: string, changedFilePaths: Set<string>, newRows: Record<string, unknown>[]): void {
+  const entry = _bm25Cache.get(dbPath);
+  if (!entry) return;
+  const kept = entry.rows.filter((r) => !changedFilePaths.has(r.filePath as string));
+  for (const r of newRows) kept.push(r);
+  const corpus = buildBm25Corpus(kept.map((r) => ({ id: r.id as string, text: r.text as string })));
+  _bm25Cache.set(dbPath, { corpus, rowCount: kept.length, rows: kept });
+}
+
+/**
+ * Build a LanceDB `` `filePath` IN (...) `` predicate, SQL-escaping each path.
+ *
+ * The column identifier MUST be **backtick**-quoted, not double-quoted: LanceDB's
+ * datafusion filter parser treats a double-quoted token as a *string literal*
+ * (so `"filePath" = 'x'` compares the constant string 'filePath' to 'x' and is
+ * always false — a silent no-op delete), and a *bare* `filePath` is lowercased to
+ * `filepath`, which errors (no such column). Backticks are the only form that
+ * binds to the camelCase column. Verified empirically against @lancedb/lancedb.
+ */
+function filePathInPredicate(paths: Set<string>): string | null {
+  if (paths.size === 0) return null;
+  const list = Array.from(paths).map((p) => `'${p.replace(/'/g, "''")}'`).join(', ');
+  return `\`filePath\` IN (${list})`;
+}
+
 // ============================================================================
 // HELPERS
 // ============================================================================
@@ -492,6 +523,157 @@ export class VectorIndex {
     };
   }
 
+  /**
+   * Watch-mode incremental update (Spec 13.1). Replace only the rows for the
+   * changed files with freshly-built records — a row-level delete+add instead of
+   * the full-corpus read+overwrite that build() performs. The cold build() path
+   * is untouched, protecting the `analyze --embed` contract (G7).
+   *
+   *  - Embedded index: reuse existing vectors for rows whose embed-text is
+   *    unchanged (queried for the changed files only, not the whole corpus),
+   *    embed just the new/changed texts, then delete the changed files' old rows
+   *    and add the rebuilt ones. The LanceDB table handle in _tableCache stays
+   *    valid across row ops, so search() does not pay a reconnect.
+   *  - BM25-only index: delete+add the changed files' documents and patch the
+   *    cached BM25 corpus in place rather than dropping the whole corpus cache.
+   */
+  static async updateFiles(
+    outputDir: string,
+    nodes: FunctionNode[],
+    changedFilePaths: Set<string>,
+    signatures: FileSignatureMap[],
+    hubIds: Set<string>,
+    entryPointIds: Set<string>,
+    embedSvc: EmbeddingService | null | undefined,
+    fileContents?: Map<string, string>,
+  ): Promise<{ embedded: number; reused: number; total: number; hasEmbeddings: boolean }> {
+    if (!VectorIndex.exists(outputDir)) {
+      return { embedded: 0, reused: 0, total: 0, hasEmbeddings: false };
+    }
+    const dbPath = join(outputDir, DB_FOLDER);
+    const existingMeta = readMeta(outputDir);
+    const indexHasEmbeddings = existingMeta === null ? true : existingMeta.hasEmbeddings;
+
+    // ── Build candidate records for the changed files' functions ──────────────
+    const sigIndex = buildSignatureIndex(signatures);
+    const nodeIds = new Set(nodes.map((n) => n.id));
+    const candidates: Omit<FunctionRecord, 'vector'>[] = nodes.map((node) => {
+      const cgDoc = node.docstring ?? '';
+      const cgSig = node.signature ?? '';
+      const { signature: regexSig, docstring: regexDoc } = findSignatureEntry(node, sigIndex);
+      const signature = cgSig || regexSig;
+      const docstring = cgDoc || regexDoc;
+      return {
+        id: node.id,
+        name: node.name,
+        filePath: node.filePath,
+        className: node.className ?? '',
+        language: node.language,
+        signature,
+        docstring,
+        fanIn: node.fanIn,
+        fanOut: node.fanOut,
+        isHub: hubIds.has(node.id),
+        isEntryPoint: entryPointIds.has(node.id),
+        text: buildText(node, signature, docstring, fileContents),
+      };
+    });
+    // Synthetic entries (constants / type aliases with no call-graph node) for
+    // the changed files only.
+    for (const fsm of signatures) {
+      if (!changedFilePaths.has(fsm.path)) continue;
+      for (const entry of fsm.entries) {
+        const syntheticId = `${fsm.path}::${entry.name}`;
+        if (nodeIds.has(syntheticId)) continue;
+        if (nodes.some((n) => n.filePath === fsm.path && n.name === entry.name)) continue;
+        const sig = entry.signature ?? '';
+        const doc = entry.docstring ?? '';
+        candidates.push({
+          id: syntheticId,
+          name: entry.name,
+          filePath: fsm.path,
+          className: '',
+          language: fsm.language,
+          signature: sig,
+          docstring: doc,
+          fanIn: 0,
+          fanOut: 0,
+          isHub: false,
+          isEntryPoint: false,
+          text: `[${fsm.language}] ${fsm.path} ${entry.name}\n${sig}${doc ? '\n' + doc : ''}`,
+        });
+      }
+    }
+
+    const { connect } = await import('@lancedb/lancedb');
+    const db = await connect(dbPath);
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const table: any = await db.openTable(TABLE_NAME);
+    const predicate = filePathInPredicate(changedFilePaths);
+
+    // ── BM25-only index ───────────────────────────────────────────────────────
+    if (!embedSvc || !indexHasEmbeddings) {
+      if (predicate) await table.delete(predicate);
+      if (candidates.length > 0) {
+        await table.add(candidates as unknown as Record<string, unknown>[]);
+      }
+      patchBm25Cache(dbPath, changedFilePaths, candidates as unknown as Record<string, unknown>[]);
+      return { embedded: 0, reused: 0, total: candidates.length, hasEmbeddings: false };
+    }
+
+    // ── Embedded index: reuse unchanged vectors for the changed files only ────
+    const cachedVectors = new Map<string, number[]>(); // "id::text" → vector
+    if (predicate) {
+      try {
+        const existingRows = await table.query().where(predicate).toArray() as Record<string, unknown>[];
+        for (const row of existingRows) {
+          const id = row.id as string;
+          const text = row.text as string;
+          cachedVectors.set(`${id}::${text}`, Array.from(row.vector as ArrayLike<number>));
+        }
+      } catch {
+        // unreadable subset — embed everything fresh
+      }
+    }
+
+    const toEmbed: typeof candidates = [];
+    const toEmbedIdx: number[] = [];
+    const cachedIdx: number[] = [];
+    for (let i = 0; i < candidates.length; i++) {
+      const key = `${candidates[i].id}::${candidates[i].text}`;
+      if (cachedVectors.has(key)) cachedIdx.push(i);
+      else { toEmbed.push(candidates[i]); toEmbedIdx.push(i); }
+    }
+
+    let newVectors: number[][] = [];
+    if (toEmbed.length > 0) {
+      newVectors = await embedSvc.embed(toEmbed.map((r) => r.text));
+      if (newVectors.length !== toEmbed.length) {
+        throw new Error(`Embedding count mismatch: expected ${toEmbed.length}, got ${newVectors.length}`);
+      }
+    }
+
+    const fullRecords: FunctionRecord[] = new Array(candidates.length);
+    for (const idx of cachedIdx) {
+      const r = candidates[idx];
+      fullRecords[idx] = { ...r, vector: cachedVectors.get(`${r.id}::${r.text}`)! };
+    }
+    for (let i = 0; i < toEmbedIdx.length; i++) {
+      fullRecords[toEmbedIdx[i]] = { ...candidates[toEmbedIdx[i]], vector: newVectors[i] };
+    }
+
+    if (predicate) await table.delete(predicate);
+    if (fullRecords.length > 0) {
+      await table.add(fullRecords as unknown as Record<string, unknown>[]);
+    }
+
+    // Keep the table handle (_tableCache) — row ops don't invalidate it. Patch
+    // the BM25 corpus cache in place for the changed files.
+    patchBm25Cache(dbPath, changedFilePaths, fullRecords as unknown as Record<string, unknown>[]);
+
+    return { embedded: toEmbed.length, reused: cachedIdx.length, total: fullRecords.length, hasEmbeddings: true };
+  }
+
   /**
    * Hybrid search over the index: dense (ANN) + sparse (BM25) merged via RRF.
    *
diff --git a/src/core/services/mcp-handlers/utils.ts b/src/core/services/mcp-handlers/utils.ts
index 9a1059ff..c10b4c98 100644
--- a/src/core/services/mcp-handlers/utils.ts
+++ b/src/core/services/mcp-handlers/utils.ts
@@ -121,6 +121,38 @@ export function _resetContextCacheForTesting(): void {
   _contextCache.clear();
 }
 
+/**
+ * Watch-mode handoff (Spec 13.1). Push an updated context into the in-memory
+ * read cache so the next tool call is a cache HIT — no 2.1 MB disk re-parse —
+ * even though the watcher only patched a few signatures. Keyed identically to
+ * {@link readCachedContext} (resolved project directory).
+ *
+ * The cached `mtime` is set to the current on-disk `llm-context.json` mtime so
+ * the entry stays valid until the file genuinely changes on disk again:
+ *   • watcher patches in memory but defers the disk write → disk mtime is
+ *     unchanged → this entry matches → hit returns the patched context;
+ *   • watcher writes the file then primes → disk mtime is the just-written one
+ *     → this entry matches → hit, no cold re-parse of what we just wrote;
+ *   • some other process (e.g. `openlore analyze`) rewrites the file → its mtime
+ *     differs from this entry → next read MISSes and re-reads disk → correct.
+ */
+export async function primeContextCache(directory: string, ctx: CachedContext): Promise<void> {
+  const analysisDir = join(directory, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
+  const filePath = join(analysisDir, ARTIFACT_LLM_CONTEXT);
+  let mtime: number;
+  try {
+    mtime = (await stat(filePath)).mtimeMs;
+  } catch {
+    return; // no artifact on disk yet — nothing to stay fresh against
+  }
+  const existing = _contextCache.get(directory);
+  // Preserve an already-open EdgeStore handle if the new ctx doesn't carry one.
+  if (existing?.ctx.edgeStore && !ctx.edgeStore) {
+    ctx.edgeStore = existing.ctx.edgeStore;
+  }
+  _contextCache.set(directory, { ctx, mtime });
+}
+
 export async function readCachedContext(directory: string, timeout?: number): Promise<CachedContext | null> {
   const analysisDir = join(directory, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
   const filePath = join(analysisDir, ARTIFACT_LLM_CONTEXT);
diff --git a/src/core/services/mcp-watcher-incremental.test.ts b/src/core/services/mcp-watcher-incremental.test.ts
new file mode 100644
index 00000000..7b6eb971
--- /dev/null
+++ b/src/core/services/mcp-watcher-incremental.test.ts
@@ -0,0 +1,166 @@
+/**
+ * Spec 13.1 — watch-mode performance regression tests.
+ *
+ * These cover the freshness/coalescing guarantees without needing a real
+ * chokidar watcher, an EdgeStore (call-graph.db), or a LanceDB vector index:
+ *   • G1 — primeContextCache makes the next read a HIT (no cold re-parse of
+ *          llm-context.json).
+ *   • G2 — a burst of N events coalesces to exactly ONE flush / persistence.
+ *   • G3 — a batch ≥ BULK_THRESHOLD is reported as a single coalesced refresh.
+ *   • G5 — the watcher emits ≤ 1 summary line per batch by default.
+ *   • G6 — signatures reflect a just-saved symbol after the flush, on disk.
+ */
+import { describe, it, expect, beforeEach, afterEach, vi } from 'vitest';
+import { mkdtemp, mkdir, writeFile, readFile, rm } from 'node:fs/promises';
+import { tmpdir } from 'node:os';
+import { join } from 'node:path';
+import { McpWatcher } from './mcp-watcher.js';
+import {
+  readCachedContext,
+  primeContextCache,
+  _resetContextCacheForTesting,
+} from './mcp-handlers/utils.js';
+
+let root: string;
+let analysisDir: string;
+let contextPath: string;
+
+async function writeContext(signatures: unknown[] = []): Promise<void> {
+  await writeFile(contextPath, JSON.stringify({ signatures, callGraph: null }, null, 2), 'utf-8');
+}
+
+beforeEach(async () => {
+  root = await mkdtemp(join(tmpdir(), 'ol-watch-'));
+  analysisDir = join(root, '.openlore', 'analysis');
+  await mkdir(analysisDir, { recursive: true });
+  contextPath = join(analysisDir, 'llm-context.json');
+  _resetContextCacheForTesting();
+});
+
+afterEach(async () => {
+  _resetContextCacheForTesting();
+  vi.restoreAllMocks();
+  await rm(root, { recursive: true, force: true });
+});
+
+describe('McpWatcher — Spec 13.1 freshness', () => {
+  it('G6: a save patches the just-changed signature into llm-context.json on disk', async () => {
+    await writeContext([]);
+    await readCachedContext(root); // pre-warm
+
+    const fooAbs = join(root, 'foo.ts');
+    await writeFile(fooAbs, 'export function alpha() { return 1; }\n', 'utf-8');
+
+    const watcher = new McpWatcher({ rootPath: root, embed: false });
+    await watcher.handleChange(fooAbs);
+
+    const onDisk = JSON.parse(await readFile(contextPath, 'utf-8')) as { signatures: Array<{ path: string; entries: Array<{ name: string }> }> };
+    const fooEntry = onDisk.signatures.find((s) => s.path === 'foo.ts');
+    expect(fooEntry).toBeDefined();
+    expect(fooEntry!.entries.some((e) => e.name === 'alpha')).toBe(true);
+  });
+
+  it('G6: a save preserves existing signatures (reads ground truth from disk, not a stale cache)', async () => {
+    // Seed an existing entry, then pre-poison the shared read cache with an
+    // EMPTY context for this directory. A writer that patched the cached object
+    // would drop src/existing.ts; reading disk ground truth preserves it.
+    await writeContext([{ path: 'src/existing.ts', entries: [{ name: 'existingFn', signature: '', docstring: '', line: 1, kind: 'function' }] }]);
+    await primeContextCache(root, { signatures: [] } as never);
+
+    const fooAbs = join(root, 'src', 'newmod.ts');
+    await mkdir(join(root, 'src'), { recursive: true });
+    await writeFile(fooAbs, 'export function newFn() { return 42; }\n', 'utf-8');
+
+    const watcher = new McpWatcher({ rootPath: root, embed: false });
+    await watcher.handleChange(fooAbs);
+
+    const onDisk = JSON.parse(await readFile(contextPath, 'utf-8')) as { signatures: Array<{ path: string }> };
+    const paths = onDisk.signatures.map((s) => s.path);
+    expect(paths).toContain('src/existing.ts');
+    expect(paths).toContain('src/newmod.ts');
+  });
+
+  it('G1: primeContextCache makes the next read a HIT — it returns the in-memory object, not what is on disk', async () => {
+    await writeContext([{ path: 'orig.ts', entries: [] }]);
+    const cold = await readCachedContext(root);
+    expect(cold).not.toBeNull();
+
+    // Prime the cache with a DIFFERENT object WITHOUT touching the file → the
+    // on-disk mtime is unchanged, so the entry stays valid. A subsequent read
+    // that hit the cache returns the primed object; a read that went to disk
+    // would return the original on-disk signatures instead.
+    await primeContextCache(root, { signatures: [{ path: 'patched.ts', entries: [{ name: 'beta', signature: '', docstring: '', line: 1, kind: 'function' }] }] } as never);
+
+    const after = await readCachedContext(root);
+    const sigs = (after as { signatures: Array<{ path: string }> }).signatures;
+    expect(sigs.some((s) => s.path === 'patched.ts')).toBe(true);
+    expect(sigs.some((s) => s.path === 'orig.ts')).toBe(false);
+
+    const onDisk = JSON.parse(await readFile(contextPath, 'utf-8')) as { signatures: Array<{ path: string }> };
+    expect(onDisk.signatures.some((s) => s.path === 'orig.ts')).toBe(true);
+  });
+
+  it('G2/G5: a burst of N change events coalesces to exactly ONE flush + ONE summary line', async () => {
+    await writeContext([]);
+    const files = ['a.ts', 'b.ts', 'c.ts', 'd.ts'];
+    for (const f of files) {
+      await writeFile(join(root, f), `export function fn_${f.replace('.ts', '')}() {}\n`, 'utf-8');
+    }
+
+    const summaries: string[] = [];
+    vi.spyOn(process.stderr, 'write').mockImplementation((chunk: string | Uint8Array): boolean => {
+      const s = chunk.toString();
+      if (/\[mcp-watcher\] (updated|coalesced)/.test(s)) summaries.push(s);
+      return true;
+    });
+
+    const watcher = new McpWatcher({ rootPath: root, embed: false, debounceMs: 30, maxBatchMs: 1000 });
+    for (const f of files) (watcher as unknown as { enqueue(p: string): void }).enqueue(join(root, f));
+
+    await new Promise((r) => setTimeout(r, 200));
+
+    expect(summaries.length).toBe(1);
+    expect(summaries[0]).toContain('updated 4 files');
+
+    const ctx = await readCachedContext(root);
+    const paths = new Set((ctx as { signatures: Array<{ path: string }> }).signatures.map((s) => s.path));
+    for (const f of files) expect(paths.has(f)).toBe(true);
+  });
+
+  it('G3: a batch ≥ BULK_THRESHOLD is reported as a single coalesced refresh', async () => {
+    await writeContext([]);
+    const files = ['x.ts', 'y.ts', 'z.ts'];
+    for (const f of files) await writeFile(join(root, f), `export const ${f.replace('.ts', '')} = 1;\n`, 'utf-8');
+
+    const summaries: string[] = [];
+    vi.spyOn(process.stderr, 'write').mockImplementation((chunk: string | Uint8Array): boolean => {
+      const s = chunk.toString();
+      if (/\[mcp-watcher\] (updated|coalesced)/.test(s)) summaries.push(s);
+      return true;
+    });
+
+    const watcher = new McpWatcher({ rootPath: root, embed: false, debounceMs: 30, bulkThreshold: 3 });
+    for (const f of files) (watcher as unknown as { enqueue(p: string): void }).enqueue(join(root, f));
+    await new Promise((r) => setTimeout(r, 200));
+
+    expect(summaries.length).toBe(1);
+    expect(summaries[0]).toContain('coalesced 3 changes');
+  });
+
+  it('the watcher-path flush persists the patched context to disk (freshness survives a process restart)', async () => {
+    await writeContext([]);
+    const fooAbs = join(root, 'foo.ts');
+    await writeFile(fooAbs, 'export function delta() {}\n', 'utf-8');
+
+    const watcher = new McpWatcher({ rootPath: root, embed: false, debounceMs: 20, maxBatchMs: 1000 });
+    (watcher as unknown as { enqueue(p: string): void }).enqueue(fooAbs);
+    await new Promise((r) => setTimeout(r, 150));
+
+    const onDisk = JSON.parse(await readFile(contextPath, 'utf-8')) as { signatures: Array<{ path: string; entries: Array<{ name: string }> }> };
+    const foo = onDisk.signatures.find((s) => s.path === 'foo.ts');
+    expect(foo).toBeDefined();
+    expect(foo!.entries.some((e) => e.name === 'delta')).toBe(true);
+
+    await watcher.stop();
+  });
+});
diff --git a/src/core/services/mcp-watcher.test.ts b/src/core/services/mcp-watcher.test.ts
index 6d905d01..42ad4d28 100644
--- a/src/core/services/mcp-watcher.test.ts
+++ b/src/core/services/mcp-watcher.test.ts
@@ -1,5 +1,12 @@
 /**
- * Tests for McpWatcher — handleChange (unit, no real FS watcher needed)
+ * Tests for McpWatcher — handleChange / handleBatch (unit, no real FS watcher).
+ *
+ * Spec 13.1 reshaped the watcher: a single coalescing queue (enqueue → flush →
+ * handleBatch) replaces the per-file timer map, and the vector update goes
+ * through VectorIndex.updateFiles (row-level) on a decoupled lane rather than
+ * reEmbed → VectorIndex.build. These tests track the new surface; the
+ * freshness/coalescing guarantees themselves live in
+ * mcp-watcher-incremental.test.ts.
  */
 
 import { describe, it, expect, vi, beforeEach, afterEach } from 'vitest';
@@ -10,6 +17,7 @@ import type { LLMContext } from '../analyzer/artifact-generator.js';
 import type { SerializedCallGraph } from '../analyzer/call-graph.js';
 import { EdgeStore } from './edge-store.js';
 import type { CallEdge } from '../analyzer/call-graph.js';
+import { _resetContextCacheForTesting } from './mcp-handlers/utils.js';
 
 // ── chokidar mock (prevents real FS watcher from opening) ────────────────────
 
@@ -61,6 +69,10 @@ async function setupProject(ctx: LLMContext): Promise<{ rootPath: string; output
   return { rootPath, outputPath, contextPath };
 }
 
+beforeEach(() => {
+  _resetContextCacheForTesting();
+});
+
 // ── Tests ─────────────────────────────────────────────────────────────────────
 
 describe('McpWatcher.handleChange', () => {
@@ -72,6 +84,7 @@ describe('McpWatcher.handleChange', () => {
 
   afterEach(() => {
     stderrSpy.mockRestore();
+    _resetContextCacheForTesting();
   });
 
   it('updates signatures for a changed TypeScript file', async () => {
@@ -304,15 +317,16 @@ describe('McpWatcher.handleChange', () => {
     const watcher = new McpWatcher({ rootPath, outputPath });
     await watcher.handleChange(srcFile);
 
-    // llm-context.json must not be written (early return on hash hit)
+    // llm-context.json must not be written: the only changed file was a no-op
+    // autosave (hash hit), so the batch has nothing to persist.
     const after = await readFile(contextPath, 'utf-8');
     expect(after).toBe(before);
   });
 });
 
-// ── reEmbed paths ─────────────────────────────────────────────────────────────
+// ── Vector update path (updateVectors → VectorIndex.updateFiles) ────────────────
 
-describe('McpWatcher.reEmbed', () => {
+describe('McpWatcher vector update (Spec 13.1 — row-level updateFiles)', () => {
   let stderrSpy: ReturnType<typeof vi.spyOn>;
 
   beforeEach(() => {
@@ -322,20 +336,21 @@ describe('McpWatcher.reEmbed', () => {
   afterEach(() => {
     stderrSpy.mockRestore();
     vi.restoreAllMocks();
+    vi.resetModules();
+    _resetContextCacheForTesting();
   });
 
-  it('refreshes the BM25 index (embedSvc=null) when no embedding service is available', async () => {
+  it('calls VectorIndex.updateFiles with a null embedder when no embedding service is available (BM25 refresh)', async () => {
     const cg = makeCallGraph();
     const ctx = makeContext({ callGraph: cg });
     const { rootPath, outputPath } = await setupProject(ctx);
 
-    // Write a fake vector index marker so VectorIndex.exists returns true
     await mkdir(join(outputPath, 'vector-index'), { recursive: true });
     await writeFile(join(outputPath, 'vector-index', '.keep'), '', 'utf-8');
 
-    const mockBuild = vi.fn().mockResolvedValue({ embedded: 0, reused: 0, total: 1, hasEmbeddings: false });
+    const mockUpdate = vi.fn().mockResolvedValue({ embedded: 0, reused: 0, total: 1, hasEmbeddings: false });
     vi.doMock('../analyzer/vector-index.js', () => ({
-      VectorIndex: { exists: vi.fn().mockReturnValue(true), build: mockBuild },
+      VectorIndex: { exists: vi.fn().mockReturnValue(true), updateFiles: mockUpdate },
     }));
     vi.doMock('../analyzer/embedding-service.js', () => ({
       EmbeddingService: {
@@ -354,32 +369,31 @@ describe('McpWatcher.reEmbed', () => {
     const watcher = new McpWatcher({ rootPath, outputPath });
     await watcher.handleChange(srcFile);
 
-    // build is invoked with a null embedder (BM25 refresh), not skipped
-    expect(mockBuild).toHaveBeenCalledWith(
+    expect(mockUpdate).toHaveBeenCalledWith(
       outputPath,
-      cg.nodes,
-      expect.any(Array),
-      expect.any(Set),
-      expect.any(Set),
-      null,
-      expect.any(Map),
-      true,
-    );
-    expect(stderrSpy).toHaveBeenCalledWith(
-      expect.stringContaining('refreshed BM25 index'),
+      expect.any(Array),   // changed nodes (empty here — no edge store)
+      expect.any(Set),     // changed file paths
+      expect.any(Array),   // signatures
+      expect.any(Set),     // hub ids
+      expect.any(Set),     // entry ids
+      null,                // embedder unavailable → BM25 refresh
+      expect.any(Map),     // file contents
     );
   });
 
-  it('calls VectorIndex.build and logs when embedding succeeds', async () => {
+  it('calls VectorIndex.updateFiles with the embedder when one is available', async () => {
     const cg = makeCallGraph();
     const ctx = makeContext({ callGraph: cg });
     const { rootPath, outputPath } = await setupProject(ctx);
 
-    const mockBuild = vi.fn().mockResolvedValue({ embedded: 3, reused: 1, total: 4, hasEmbeddings: true });
+    await mkdir(join(outputPath, 'vector-index'), { recursive: true });
+    await writeFile(join(outputPath, 'vector-index', '.keep'), '', 'utf-8');
+
+    const mockUpdate = vi.fn().mockResolvedValue({ embedded: 3, reused: 1, total: 4, hasEmbeddings: true });
     const mockEmbedSvc = {};
 
     vi.doMock('../analyzer/vector-index.js', () => ({
-      VectorIndex: { exists: vi.fn().mockReturnValue(true), build: mockBuild },
+      VectorIndex: { exists: vi.fn().mockReturnValue(true), updateFiles: mockUpdate },
     }));
     vi.doMock('../analyzer/embedding-service.js', () => ({
       EmbeddingService: {
@@ -398,30 +412,30 @@ describe('McpWatcher.reEmbed', () => {
     const watcher = new McpWatcher({ rootPath, outputPath });
     await watcher.handleChange(srcFile);
 
-    expect(mockBuild).toHaveBeenCalledWith(
+    expect(mockUpdate).toHaveBeenCalledWith(
       outputPath,
-      cg.nodes,
+      expect.any(Array),
+      expect.any(Set),
       expect.any(Array),
       expect.any(Set),
       expect.any(Set),
       mockEmbedSvc,
       expect.any(Map),
-      true,
-    );
-    expect(stderrSpy).toHaveBeenCalledWith(
-      expect.stringContaining('re-embedded'),
     );
   });
 
-  it('logs embed error and does not throw when VectorIndex.build throws', async () => {
+  it('logs an embed error and does not throw when VectorIndex.updateFiles throws', async () => {
     const cg = makeCallGraph();
     const ctx = makeContext({ callGraph: cg });
     const { rootPath, outputPath } = await setupProject(ctx);
 
+    await mkdir(join(outputPath, 'vector-index'), { recursive: true });
+    await writeFile(join(outputPath, 'vector-index', '.keep'), '', 'utf-8');
+
     vi.doMock('../analyzer/vector-index.js', () => ({
       VectorIndex: {
         exists: vi.fn().mockReturnValue(true),
-        build: vi.fn().mockRejectedValue(new Error('LanceDB connection failed')),
+        updateFiles: vi.fn().mockRejectedValue(new Error('LanceDB connection failed')),
       },
     }));
     vi.doMock('../analyzer/embedding-service.js', () => ({
@@ -446,9 +460,9 @@ describe('McpWatcher.reEmbed', () => {
   });
 });
 
-// ── Debounce ──────────────────────────────────────────────────────────────────
+// ── Coalescing queue (Spec 13.1) ───────────────────────────────────────────────
 
-describe('McpWatcher debounce', () => {
+describe('McpWatcher coalescing queue', () => {
   beforeEach(() => {
     vi.useFakeTimers();
   });
@@ -457,79 +471,62 @@ describe('McpWatcher debounce', () => {
     vi.useRealTimers();
   });
 
-  it('coalesces rapid changes to the same file into one handleChange call', async () => {
+  it('coalesces rapid changes to the same file into a single batch flush', async () => {
     const { McpWatcher } = await import('./mcp-watcher.js');
-    const watcher = new McpWatcher({ rootPath: '/tmp/proj', debounceMs: 200 });
-    const spy = vi.spyOn(watcher, 'handleChange').mockResolvedValue(undefined);
+    const watcher = new McpWatcher({ rootPath: '/tmp/proj', debounceMs: 200, embed: false });
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const spy = vi.spyOn(watcher as any, 'handleBatch').mockResolvedValue(undefined);
+    const enqueue = (watcher as unknown as { enqueue(p: string): void }).enqueue.bind(watcher);
 
-    // Simulate 5 rapid saves
-    for (let i = 0; i < 5; i++) {
-      (watcher as unknown as { scheduleChange(p: string): void }).scheduleChange('/tmp/proj/src/foo.ts');
-    }
+    for (let i = 0; i < 5; i++) enqueue('/tmp/proj/src/foo.ts');
 
     await vi.runAllTimersAsync();
     expect(spy).toHaveBeenCalledTimes(1);
   });
 
-  it('fires separate handleChange for two different files', async () => {
+  it('coalesces changes across DIFFERENT files into ONE batch (G2)', async () => {
     const { McpWatcher } = await import('./mcp-watcher.js');
-    const watcher = new McpWatcher({ rootPath: '/tmp/proj', debounceMs: 200 });
-    const spy = vi.spyOn(watcher, 'handleChange').mockResolvedValue(undefined);
+    const watcher = new McpWatcher({ rootPath: '/tmp/proj', debounceMs: 200, embed: false });
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    const spy = vi.spyOn(watcher as any, 'handleBatch').mockResolvedValue(undefined);
+    const enqueue = (watcher as unknown as { enqueue(p: string): void }).enqueue.bind(watcher);
 
-    (watcher as unknown as { scheduleChange(p: string): void }).scheduleChange('/tmp/proj/src/a.ts');
-    (watcher as unknown as { scheduleChange(p: string): void }).scheduleChange('/tmp/proj/src/b.ts');
+    enqueue('/tmp/proj/src/a.ts');
+    enqueue('/tmp/proj/src/b.ts');
 
     await vi.runAllTimersAsync();
-    expect(spy).toHaveBeenCalledTimes(2);
-  });
-});
-
-describe('McpWatcher reschedule-when-busy', () => {
-  let stderrSpy: ReturnType<typeof vi.spyOn>;
-
-  beforeEach(() => {
-    stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation(() => true);
-    vi.useFakeTimers();
-  });
-
-  afterEach(() => {
-    stderrSpy.mockRestore();
-    vi.useRealTimers();
+    // One flush carrying both paths — not one flush per file.
+    expect(spy).toHaveBeenCalledTimes(1);
+    const batch = spy.mock.calls[0][0] as string[];
+    expect(new Set(batch)).toEqual(new Set(['/tmp/proj/src/a.ts', '/tmp/proj/src/b.ts']));
   });
 
-  it('reschedules a change instead of dropping it when busy', async () => {
+  it('processes changes that arrive while a flush is in flight (no drop, single-flight)', async () => {
     const { McpWatcher } = await import('./mcp-watcher.js');
-    const watcher = new McpWatcher({ rootPath: '/tmp/proj', debounceMs: 100 });
+    const watcher = new McpWatcher({ rootPath: '/tmp/proj', debounceMs: 100, embed: false });
 
-    // Make handleChange block until we resolve it
     let resolveFirst!: () => void;
     const firstCall = new Promise<void>(r => { resolveFirst = r; });
-    let callCount = 0;
-    vi.spyOn(watcher, 'handleChange').mockImplementation(async () => {
-      callCount++;
-      if (callCount === 1) await firstCall;
+    let calls = 0;
+    // eslint-disable-next-line @typescript-eslint/no-explicit-any
+    vi.spyOn(watcher as any, 'handleBatch').mockImplementation(async () => {
+      calls++;
+      if (calls === 1) await firstCall;
     });
+    const enqueue = (watcher as unknown as { enqueue(p: string): void }).enqueue.bind(watcher);
 
-    const schedule = (watcher as unknown as { scheduleChange(p: string): void }).scheduleChange.bind(watcher);
-
-    // First change — will start processing after debounce
-    schedule('/tmp/proj/src/a.ts');
+    enqueue('/tmp/proj/src/a.ts');
     await vi.advanceTimersByTimeAsync(100);
-    // handleChange is now running (blocked on firstCall)
-    expect(callCount).toBe(1);
+    expect(calls).toBe(1); // first flush running, blocked
 
-    // Second change arrives while busy — should be rescheduled, not dropped
-    schedule('/tmp/proj/src/a.ts');
+    // New change arrives while busy — accumulates in pending, not dropped.
+    enqueue('/tmp/proj/src/b.ts');
     await vi.advanceTimersByTimeAsync(100);
-    // Still blocked — rescheduled change fires but sees busy, reschedules again
-    expect(callCount).toBe(1);
+    expect(calls).toBe(1); // still single-flight
 
-    // Unblock first handleChange
     resolveFirst();
     await vi.advanceTimersByTimeAsync(200);
-
-    // Rescheduled change should now have fired
-    expect(callCount).toBe(2);
+    expect(calls).toBe(2); // pending 'b.ts' flushed after the first finished
   });
 });
 
@@ -614,16 +611,16 @@ describe('McpWatcher — real chokidar prunes build dirs (does not FD-storm targ
   // chokidar (not the module mock above) via a fresh dynamic import in an
   // isolated module registry.
   it('watches source but never opens target/ children', async () => {
-    const { mkdtemp, writeFile, mkdir } = await import('node:fs/promises');
-    const { tmpdir } = await import('node:os');
+    const { mkdtemp: mkdtempReal, writeFile: writeFileReal, mkdir: mkdirReal } = await import('node:fs/promises');
+    const { tmpdir: tmpdirReal } = await import('node:os');
     const { join: pjoin } = await import('node:path');
 
-    const root = await mkdtemp(pjoin(tmpdir(), 'mcp-prune-'));
-    await mkdir(pjoin(root, 'src'), { recursive: true });
-    await mkdir(pjoin(root, 'target', 'debug', 'deps'), { recursive: true });
-    await writeFile(pjoin(root, 'src', 'main.rs'), 'fn main() {}');
+    const root = await mkdtempReal(pjoin(tmpdirReal(), 'mcp-prune-'));
+    await mkdirReal(pjoin(root, 'src'), { recursive: true });
+    await mkdirReal(pjoin(root, 'target', 'debug', 'deps'), { recursive: true });
+    await writeFileReal(pjoin(root, 'src', 'main.rs'), 'fn main() {}');
     for (let i = 0; i < 40; i++) {
-      await writeFile(pjoin(root, 'target', 'debug', 'deps', `f${i}.rs`), '// gen');
+      await writeFileReal(pjoin(root, 'target', 'debug', 'deps', `f${i}.rs`), '// gen');
     }
 
     // Use the real chokidar + the real ignore predicate, not the vi.mock.
diff --git a/src/core/services/mcp-watcher.ts b/src/core/services/mcp-watcher.ts
index 9d39a01b..0629d50f 100644
--- a/src/core/services/mcp-watcher.ts
+++ b/src/core/services/mcp-watcher.ts
@@ -8,19 +8,39 @@
  * The call graph is deliberately excluded — rebuilding it requires full
  * tree-sitter analysis of all call sites and is too expensive for a watch loop.
  * It stays current via the post-commit hook (openlore analyze --force --embed).
+ *
+ * Spec 13.1 (watch-mode performance): freshness is O(change), not O(repo).
+ *   • Per-file events COALESCE into one batched flush (single debounce timer +
+ *     hard max-batch ceiling), so a burst / branch-switch runs the pipeline once,
+ *     not once per file.
+ *   • The patched llm-context is handed to the MCP read cache in place
+ *     (primeContextCache), so the next tool call is a cache HIT — no 2.1 MB
+ *     cold re-parse — even after the disk write.
+ *   • Vector updates are row-level (VectorIndex.updateFiles), not a full-corpus
+ *     read+overwrite, and run on a separate lower-priority lane so signature
+ *     freshness never blocks on embedding.
+ *   • VCS-flood / bulk batches are detected and collapsed to a single refresh.
+ *   • stderr emits one summary line per batch by default (per-file detail behind
+ *     OPENLORE_WATCH_DEBUG).
  */
 
-import { readFile, writeFile } from 'node:fs/promises';
+import { readFile, writeFile, readdir } from 'node:fs/promises';
 import { createHash } from 'node:crypto';
 import { join, relative } from 'node:path';
 import chokidar, { type FSWatcher } from 'chokidar';
 import { extractSignatures, detectLanguage } from '../analyzer/signature-extractor.js';
-import type { LLMContext } from '../analyzer/artifact-generator.js';
+import type { FunctionNode } from '../analyzer/call-graph.js';
 import { EdgeStore } from './edge-store.js';
+import { primeContextCache, type CachedContext } from './mcp-handlers/utils.js';
 import {
   OPENLORE_DIR,
   OPENLORE_ANALYSIS_SUBDIR,
   ARTIFACT_LLM_CONTEXT,
+  WATCH_DEBOUNCE_MS,
+  WATCH_MAX_BATCH_MS,
+  WATCH_BULK_THRESHOLD,
+  WATCH_EMBED_FILE_CEILING,
+  WATCH_VCS_SETTLE_MS,
 } from '../../constants.js';
 
 const CALL_GRAPH_LANGS = new Set([
@@ -36,12 +56,25 @@ export interface McpWatcherOptions {
   rootPath: string;
   /** Absolute path to .openlore/analysis/ — where llm-context.json lives */
   outputPath?: string;
-  /** Milliseconds to debounce file-change events (default: 400) */
+  /** Milliseconds to debounce file-change events (default: WATCH_DEBOUNCE_MS) */
   debounceMs?: number;
+  /** Hard flush ceiling under a continuous change stream (default: WATCH_MAX_BATCH_MS) */
+  maxBatchMs?: number;
+  /** Batch size that trips VCS-flood handling (default: WATCH_BULK_THRESHOLD) */
+  bulkThreshold?: number;
+  /** Run the live vector update; false = signatures-only (default: true) */
+  embed?: boolean;
+  /** Above this many watched source files, auto-degrade to signatures-only */
+  embedFileCeiling?: number;
   /** Extra glob patterns to ignore in addition to defaults */
   ignore?: string[];
 }
 
+interface ChangedFile {
+  rel: string;
+  content: string;
+}
+
 const SOURCE_EXTENSIONS = /\.(ts|tsx|js|jsx|py|go|rs|rb|java|kt|php|cs|cpp|cc|cxx|h|hpp|c|swift)$/;
 
 // Directory NAMES that must never be watched. Build-output and dependency
@@ -104,24 +137,63 @@ export function isIgnoredRelPath(relPath: string): boolean {
 export class McpWatcher {
   private readonly rootPath: string;
   private readonly outputPath: string;
+  private readonly contextPath: string;
   private readonly debounceMs: number;
+  private readonly maxBatchMs: number;
+  private readonly bulkThreshold: number;
+  private readonly embedFileCeiling: number;
   private readonly extraIgnore: string[];
+  private readonly debug: boolean;
 
   private fsWatcher?: FSWatcher;
-  private timers = new Map<string, ReturnType<typeof setTimeout>>();
-  private running = false;
+  private gitWatcher?: FSWatcher;
+
+  // ── Coalescing queue (Step 1) ──────────────────────────────────────────────
+  private pending = new Set<string>();              // absolute paths awaiting a flush
+  private debounceTimer?: ReturnType<typeof setTimeout>;
+  private maxBatchTimer?: ReturnType<typeof setTimeout>;
+  private running = false;                           // single-flight for the signature flush
+  private vcsBulkFlag = false;                       // set by the .git ref watcher
+
+  // ── Embedding lane (Step 4 — decoupled, lower priority) ─────────────────────
+  private embed: boolean;
+  private embedDegraded = false;                     // auto-degraded on a too-large tree
+  private embedFiles = new Map<string, string>();    // rel → content awaiting embed
+  private embedNodes = new Map<string, FunctionNode>(); // id → node awaiting embed
+  private embedTimer?: ReturnType<typeof setTimeout>;
+  private embedRunning = false;
+  private lastEmbedContext?: CachedContext;
 
   constructor(options: McpWatcherOptions) {
     this.rootPath   = options.rootPath;
     this.outputPath = options.outputPath
       ?? join(options.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
-    this.debounceMs  = options.debounceMs ?? 400;
+    this.contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
+    this.debounceMs  = options.debounceMs ?? WATCH_DEBOUNCE_MS;
+    this.maxBatchMs  = options.maxBatchMs ?? WATCH_MAX_BATCH_MS;
+    this.bulkThreshold = options.bulkThreshold ?? WATCH_BULK_THRESHOLD;
+    this.embedFileCeiling = options.embedFileCeiling ?? WATCH_EMBED_FILE_CEILING;
+    this.embed       = options.embed ?? true;
     this.extraIgnore = options.ignore ?? [];
+    this.debug       = !!process.env.OPENLORE_WATCH_DEBUG;
   }
 
   // ── Lifecycle ──────────────────────────────────────────────────────────────
 
   async start(): Promise<void> {
+    // Auto-degrade live embedding on very large trees (Step 4). Counting is
+    // bounded — it stops as soon as the ceiling is exceeded.
+    if (this.embed) {
+      const count = await this.countSourceFiles(this.embedFileCeiling + 1);
+      if (count > this.embedFileCeiling) {
+        this.embedDegraded = true;
+        process.stderr.write(
+          `[mcp-watcher] ${count}+ source files exceed the live-embed ceiling ` +
+          `(${this.embedFileCeiling}); running signatures-only — embeddings refresh at commit\n`
+        );
+      }
+    }
+
     await new Promise<void>((resolve, reject) => {
       const extraIgnore = this.extraIgnore;
       const rootPath = this.rootPath;
@@ -142,7 +214,7 @@ export class McpWatcher {
 
       this.fsWatcher.on('change', (absPath: string) => {
         if (SOURCE_EXTENSIONS.test(absPath)) {
-          this.scheduleChange(absPath);
+          this.enqueue(absPath);
         }
       });
 
@@ -150,133 +222,306 @@ export class McpWatcher {
       this.fsWatcher.on('error', (err: unknown) => reject(err));
     });
 
-    process.stderr.write(`[mcp-watcher] watching ${this.rootPath}\n`);
+    // Best-effort VCS-flood detection (Step 5): a branch switch / rebase / merge
+    // bumps these refs. We never recurse into .git (it stays ignored above); we
+    // watch only these specific files, then collapse the churn into one refresh.
+    try {
+      const gitDir = join(this.rootPath, '.git');
+      const refs = ['HEAD', 'index', 'MERGE_HEAD', 'ORIG_HEAD'].map((f) => join(gitDir, f));
+      this.gitWatcher = chokidar.watch(refs, {
+        persistent: true,
+        ignoreInitial: true,
+        followSymlinks: false,
+      });
+      this.gitWatcher.on('all', () => this.onVcsEvent());
+    } catch {
+      // no .git, or watch failed — VCS detection falls back to the batch-size
+      // threshold in handleBatch, which is enough for G3.
+    }
+
+    process.stderr.write(
+      `[mcp-watcher] watching ${this.rootPath}` +
+      `${this.embed && !this.embedDegraded ? '' : ' (signatures-only)'}\n`
+    );
   }
 
   async stop(): Promise<void> {
-    for (const t of this.timers.values()) clearTimeout(t);
-    this.timers.clear();
+    if (this.debounceTimer) clearTimeout(this.debounceTimer);
+    if (this.maxBatchTimer) clearTimeout(this.maxBatchTimer);
+    if (this.embedTimer) clearTimeout(this.embedTimer);
+    this.debounceTimer = this.maxBatchTimer = this.embedTimer = undefined;
+    // Best-effort: persist anything still queued so a save right before shutdown
+    // is not lost.
+    if (this.pending.size > 0 && !this.running) {
+      const batch = Array.from(this.pending);
+      this.pending.clear();
+      try { await this.handleBatch(batch, { syncFlush: true }); } catch { /* ignore */ }
+    }
     await this.fsWatcher?.close();
+    await this.gitWatcher?.close();
     process.stderr.write('[mcp-watcher] stopped\n');
   }
 
-  // ── Debounce ───────────────────────────────────────────────────────────────
+  // ── Coalescing (Step 1) ──────────────────────────────────────────────────────
 
-  private scheduleChange(absPath: string): void {
-    const existing = this.timers.get(absPath);
-    if (existing) clearTimeout(existing);
+  /**
+   * Add a changed path to the pending set and (re)arm a single debounce timer,
+   * plus a one-shot hard ceiling so a continuous stream still flushes.
+   */
+  private enqueue(absPath: string): void {
+    this.pending.add(absPath);
+    if (this.debounceTimer) clearTimeout(this.debounceTimer);
+    this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
+    if (!this.maxBatchTimer) {
+      this.maxBatchTimer = setTimeout(() => this.flush(), this.maxBatchMs);
+    }
+  }
 
-    const t = setTimeout(() => {
-      this.timers.delete(absPath);
-      if (this.running) {
-        // Re-schedule instead of dropping — ensures no changes are lost
-        this.scheduleChange(absPath);
-        return;
-      }
-      this.running = true;
-      this.handleChange(absPath)
-        .catch(err => process.stderr.write(`[mcp-watcher] error: ${(err as Error).message}\n`))
-        .finally(() => { this.running = false; });
-    }, this.debounceMs);
+  /** A .git ref changed — settle, then flush whatever changed as one bulk batch. */
+  private onVcsEvent(): void {
+    this.vcsBulkFlag = true;
+    if (this.debounceTimer) clearTimeout(this.debounceTimer);
+    this.debounceTimer = setTimeout(() => this.flush(), WATCH_VCS_SETTLE_MS);
+    if (this.debug) {
+      process.stderr.write('[mcp-watcher] VCS operation detected — coalescing into one refresh\n');
+    }
+  }
 
-    this.timers.set(absPath, t);
+  /**
+   * Drain the pending set into a single batch. Single-flight: if a flush is
+   * already running, leave the new paths in `pending` and reschedule once it
+   * finishes — never interleave two flushes.
+   */
+  private flush(): void {
+    if (this.debounceTimer) { clearTimeout(this.debounceTimer); this.debounceTimer = undefined; }
+    if (this.maxBatchTimer) { clearTimeout(this.maxBatchTimer); this.maxBatchTimer = undefined; }
+    if (this.running) return;            // a follow-up is scheduled in finally{}
+    if (this.pending.size === 0) return;
+
+    const batch = Array.from(this.pending);
+    this.pending.clear();
+    this.running = true;
+    this.handleBatch(batch)
+      .catch((err) => process.stderr.write(`[mcp-watcher] error: ${(err as Error).message}\n`))
+      .finally(() => {
+        this.running = false;
+        if (this.pending.size > 0) {
+          this.debounceTimer = setTimeout(() => this.flush(), this.debounceMs);
+        }
+      });
   }
 
   // ── Core re-index ──────────────────────────────────────────────────────────
 
   /**
-   * Re-index a single changed file.
-   * Exposed for unit testing without needing a real file watcher.
+   * Re-index a single changed file. Exposed for unit testing without needing a
+   * real file watcher; flushes synchronously so callers observe the update on
+   * disk immediately. Internally this is just a batch of one.
    */
   async handleChange(absPath: string): Promise<void> {
-    const rel = relative(this.rootPath, absPath);
-
-    // Skip test files and unsupported languages
-    if (isTestFile(rel)) return;
-    if (detectLanguage(rel) === 'unknown') return;
+    await this.handleBatch([absPath], { syncFlush: true });
+  }
 
-    // Read new file content (needed for hash check and re-parse)
-    let content: string;
-    try {
-      content = await readFile(absPath, 'utf-8');
-    } catch {
-      return; // file may have been deleted between the event and now
+  /**
+   * Process a coalesced batch of changed files as ONE pipeline pass:
+   *   • per-file incremental edge update (content-hash skip), all under one open
+   *     EdgeStore;
+   *   • ONE signature patch + ONE llm-context persist + ONE read-cache handoff;
+   *   • ONE vector update (inline when syncFlush, else on the embed lane).
+   */
+  private async handleBatch(absPaths: string[], opts: { syncFlush?: boolean } = {}): Promise<void> {
+    const t0 = Date.now();
+    const consumedVcsBulk = this.vcsBulkFlag;
+    this.vcsBulkFlag = false;
+
+    // 1. Resolve + read candidate files (skip tests / unknown langs / deleted).
+    const files: Array<{ rel: string; abs: string; content: string }> = [];
+    for (const abs of absPaths) {
+      const rel = relative(this.rootPath, abs);
+      if (isTestFile(rel)) continue;
+      if (detectLanguage(rel) === 'unknown') continue;
+      let content: string;
+      try {
+        content = await readFile(abs, 'utf-8');
+      } catch {
+        continue; // file may have been deleted between the event and now
+      }
+      files.push({ rel, abs, content });
     }
+    if (files.length === 0) return;
 
-    // ── Incremental edge update (CGC _handle_modification algorithm) ──────────
+    // 2. Incremental edge update (CGC _handle_modification algorithm), one open
+    //    store for the whole batch. Content-hash skip drops no-op autosaves.
+    const changedFiles: ChangedFile[] = [];
+    const changedNodes: FunctionNode[] = [];
     if (EdgeStore.exists(this.outputPath)) {
       const store = EdgeStore.open(EdgeStore.dbPath(this.outputPath));
       try {
-        // Content hash — skip entirely on no-op IDE autosaves
-        const newHash = createHash('sha256').update(content).digest('hex');
-        if (store.getFileHash(rel) === newHash) return;
-
-        // Reverse lookup BEFORE delete so we know which files call into this one
-        // callerFiles are relative paths (DB stores relative paths)
-        const callerFiles = store.getCallerFiles(rel);
-
-        // Re-parse BEFORE mutating DB — graph stays readable (old state) during parse.
-        // Seed resolution with all known nodes so the re-parsed caller files'
-        // calls into other files don't degrade to `external::` (they would
-        // otherwise, since the subset trie only holds the re-parsed files).
-        const resolutionNodes = store.getAllInternalNodes();
-        const { edges: newEdges, nodes: newNodes } = await buildGraphSubset(rel, content, callerFiles, this.rootPath, resolutionNodes);
-
-        // Atomic swap: delete stale data and insert fresh data in one transaction
-        // so concurrent MCP reads never see a torn graph
-        store.transaction(() => {
-          store.deleteEdgesForFile(rel);
-          for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
-            store.deleteOutgoingEdgesForFile(cf);
+        for (const f of files) {
+          const newHash = createHash('sha256').update(f.content).digest('hex');
+          if (store.getFileHash(f.rel) === newHash) continue; // no-op autosave
+
+          // Reverse lookup BEFORE delete so we know which files call into this one.
+          const callerFiles = store.getCallerFiles(f.rel);
+          // Re-parse BEFORE mutating DB — graph stays readable (old state) during
+          // parse. Seed resolution with all known nodes so re-parsed callers'
+          // cross-file calls don't degrade to `external::`.
+          const resolutionNodes = store.getAllInternalNodes();
+          const { edges: newEdges, nodes: newNodes } =
+            await buildGraphSubset(f.rel, f.content, callerFiles, this.rootPath, resolutionNodes);
+
+          // Atomic swap so concurrent MCP reads never see a torn graph.
+          store.transaction(() => {
+            store.deleteEdgesForFile(f.rel);
+            for (const cf of callerFiles.slice(0, CALLER_REPARSE_LIMIT)) {
+              store.deleteOutgoingEdgesForFile(cf);
+            }
+            store.deleteNodesForFile(f.rel);
+            store.insertNodes(newNodes);
+            store.insertEdges(newEdges);
+            store.setFileHash(f.rel, newHash);
+          });
+
+          changedFiles.push({ rel: f.rel, content: f.content });
+          for (const n of newNodes) changedNodes.push(n);
+          if (this.debug) {
+            process.stderr.write(
+              `[mcp-watcher] graph: ${f.rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers)\n`
+            );
           }
-          store.deleteNodesForFile(rel);
-          store.insertNodes(newNodes);
-          store.insertEdges(newEdges);
-          store.setFileHash(rel, newHash);
-        });
-
-        process.stderr.write(
-          `[mcp-watcher] updated graph: ${rel} (+${newNodes.length} nodes, +${newEdges.length} edges, ${callerFiles.length} callers re-parsed)\n`
-        );
+        }
       } finally {
         store.close();
       }
+    } else {
+      // No edge store yet — still refresh signatures for every candidate.
+      for (const f of files) changedFiles.push({ rel: f.rel, content: f.content });
     }
 
-    // ── Signature patch ───────────────────────────────────────────────────────
-    const contextPath = join(this.outputPath, ARTIFACT_LLM_CONTEXT);
-    let context: LLMContext;
-    try {
-      const raw = await readFile(contextPath, 'utf-8');
-      context = JSON.parse(raw) as LLMContext;
-    } catch {
-      process.stderr.write(`[mcp-watcher] no context at ${contextPath} — run analyze first\n`);
+    if (changedFiles.length === 0) return; // every event was a no-op autosave
+
+    // 3. Signatures: load context (shared in-memory cache), patch all changed
+    //    files, then ONE persist + read-cache handoff (Step 2). The handoff
+    //    means the next tool call is a cache HIT — no cold 2.1 MB re-parse.
+    const context = await this.loadContext();
+    if (!context) {
+      process.stderr.write(`[mcp-watcher] no context at ${this.contextPath} — run analyze first\n`);
       return;
     }
-
-    const newMap = extractSignatures(rel, content);
     if (!context.signatures) context.signatures = [];
-    const idx = context.signatures.findIndex(m => m.path === rel);
-    if (idx >= 0) {
-      context.signatures[idx] = newMap;
-    } else {
-      context.signatures.push(newMap);
+    for (const f of changedFiles) {
+      const newMap = extractSignatures(f.rel, f.content);
+      const idx = context.signatures.findIndex((m) => m.path === f.rel);
+      if (idx >= 0) context.signatures[idx] = newMap;
+      else context.signatures.push(newMap);
     }
+    await this.persistContext(context);
+
+    // 4. Vector update — decoupled from signature freshness (Step 4).
+    const isBulk = consumedVcsBulk || changedFiles.length >= this.bulkThreshold;
+    if (this.embed && !this.embedDegraded && context.callGraph) {
+      if (opts.syncFlush) {
+        // Direct handleChange path: inline so callers/tests observe it.
+        await this.updateVectors(context, changedFiles, changedNodes);
+      } else {
+        // Watcher path: schedule on the lower-priority embed lane. On a bulk
+        // event this still collapses to a single deferred pass.
+        this.scheduleEmbed(context, changedFiles, changedNodes);
+      }
+    }
+
+    // 5. One summary line per batch (Step 6). Per-file detail is behind debug.
+    const n = changedFiles.length;
+    process.stderr.write(
+      `[mcp-watcher] ${isBulk ? `coalesced ${n} changes` : `updated ${n} file${n === 1 ? '' : 's'}`} (${Date.now() - t0}ms)\n`
+    );
+  }
+
+  // ── llm-context load + persistence + read-cache handoff (Step 2) ─────────────
 
-    await writeFile(contextPath, JSON.stringify(context, null, 2), 'utf-8');
-    process.stderr.write(`[mcp-watcher] re-indexed signatures: ${rel}\n`);
+  /**
+   * True when this watcher writes to the canonical `<root>/.openlore/analysis`
+   * layout that the MCP read handlers cache against. Only then is the shared
+   * in-memory read cache (primeContextCache) the right channel to prime; a custom
+   * `outputPath` (tests / non-standard installs) writes only to disk.
+   */
+  private get usesStandardLayout(): boolean {
+    return this.outputPath === join(this.rootPath, OPENLORE_DIR, OPENLORE_ANALYSIS_SUBDIR);
+  }
 
-    // Incremental vector re-embed — silently skipped if no embedding service available
-    if (context.callGraph) {
-      await this.reEmbed(context, rel, content);
+  /**
+   * Load the context the watcher is about to patch. This ALWAYS reads fresh from
+   * disk — never through the shared read cache — because the cache is a read-path
+   * (tool-call) optimization, and patching a possibly-stale cached object could
+   * silently drop signatures written by a concurrent `analyze` between events.
+   * The writer reads ground truth; persistContext then primes the read cache with
+   * the result so the next tool call is still a hit (Step 2a, G1).
+   */
+  private async loadContext(): Promise<CachedContext | null> {
+    try {
+      const raw = await readFile(this.contextPath, 'utf-8');
+      return JSON.parse(raw) as CachedContext;
+    } catch {
+      return null;
     }
   }
 
-  // ── Embed step ─────────────────────────────────────────────────────────────
+  private async persistContext(context: CachedContext): Promise<void> {
+    // Strip the runtime-only EdgeStore handle before serializing.
+    const { edgeStore: _edgeStore, ...serializable } = context as CachedContext & { edgeStore?: unknown };
+    void _edgeStore;
+    await writeFile(this.contextPath, JSON.stringify(serializable, null, 2), 'utf-8');
+    // Hand the patched object back to the read cache, aligned to the new on-disk
+    // mtime, so the next tool call is a cache hit (no cold re-parse). This is the
+    // fix for root-cause item 2 (mtime bump forcing a full re-read). Only valid
+    // for the canonical layout the read handlers cache against.
+    if (this.usesStandardLayout) await primeContextCache(this.rootPath, context);
+  }
+
+  // ── Embedding lane (Step 4) ──────────────────────────────────────────────────
 
-  private async reEmbed(context: LLMContext, rel: string, content: string): Promise<void> {
+  private scheduleEmbed(context: CachedContext, changedFiles: ChangedFile[], nodes: FunctionNode[]): void {
+    for (const f of changedFiles) this.embedFiles.set(f.rel, f.content);
+    for (const node of nodes) this.embedNodes.set(node.id, node);
+    this.lastEmbedContext = context;
+    if (this.embedTimer) clearTimeout(this.embedTimer);
+    // Slightly behind the signature debounce so structural freshness always lands
+    // first and multiple flushes batch into one embed pass.
+    this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
+  }
+
+  private async runEmbedLane(): Promise<void> {
+    if (this.embedRunning) {
+      // Re-arm: drain again once the in-flight pass finishes.
+      this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
+      return;
+    }
+    if (this.embedFiles.size === 0 || !this.lastEmbedContext) return;
+    const changedFiles: ChangedFile[] = Array.from(this.embedFiles, ([rel, content]) => ({ rel, content }));
+    const nodes = Array.from(this.embedNodes.values());
+    const context = this.lastEmbedContext;
+    this.embedFiles.clear();
+    this.embedNodes.clear();
+    this.embedRunning = true;
     try {
-      const { VectorIndex }      = await import('../analyzer/vector-index.js');
+      await this.updateVectors(context, changedFiles, nodes);
+    } catch (err) {
+      process.stderr.write(`[mcp-watcher] embed error: ${(err as Error).message}\n`);
+    } finally {
+      this.embedRunning = false;
+      if (this.embedFiles.size > 0) {
+        this.embedTimer = setTimeout(() => void this.runEmbedLane(), this.debounceMs);
+      }
+    }
+  }
+
+  /**
+   * Row-level vector update for the changed files only (Step 3). Falls back to a
+   * silent no-op when no embedding service and no index are available.
+   */
+  private async updateVectors(context: CachedContext, changedFiles: ChangedFile[], changedNodes: FunctionNode[]): Promise<void> {
+    try {
+      const { VectorIndex } = await import('../analyzer/vector-index.js');
       const { EmbeddingService } = await import('../analyzer/embedding-service.js');
       const { readOpenLoreConfig } = await import('./config-manager.js');
 
@@ -289,37 +534,74 @@ export class McpWatcher {
         const cfg = await readOpenLoreConfig(this.rootPath);
         embedSvc = cfg ? EmbeddingService.fromConfig(cfg) : null;
       }
-      // embedSvc may be null: VectorIndex.build then refreshes the BM25-only
-      // corpus rather than re-embedding. Keeps the keyword index live in watch mode.
-
-      const cg = context.callGraph!;
-      const hubIds    = new Set((cg.hubFunctions ?? []).map(f => f.id));
-      const entryIds  = new Set((cg.entryPoints ?? []).map(f => f.id));
-      const fileContents = new Map([[rel, content]]);
-
-      const { embedded, reused, total, hasEmbeddings } = await VectorIndex.build(
+      // embedSvc may be null: updateFiles then refreshes the BM25-only corpus
+      // rather than re-embedding, keeping the keyword index live in watch mode.
+
+      const cg = context.callGraph;
+      if (!cg) return;
+      const hubIds = new Set((cg.hubFunctions ?? []).map((f) => f.id));
+      const entryIds = new Set((cg.entryPoints ?? []).map((f) => f.id));
+      const changedFilePaths = new Set(changedFiles.map((f) => f.rel));
+      const fileContents = new Map(changedFiles.map((f) => [f.rel, f.content]));
+      // Prefer the freshly-parsed nodes; fall back to the (possibly stale)
+      // call-graph nodes for the changed files when no edge store seeded them.
+      const nodes = changedNodes.length > 0
+        ? changedNodes
+        : (cg.nodes ?? []).filter((n) => changedFilePaths.has(n.filePath));
+
+      const { embedded, reused, total, hasEmbeddings } = await VectorIndex.updateFiles(
         this.outputPath,
-        cg.nodes,
+        nodes,
+        changedFilePaths,
         context.signatures ?? [],
         hubIds,
         entryIds,
         embedSvc,
         fileContents,
-        /* incremental */ true
       );
 
-      process.stderr.write(
-        hasEmbeddings
-          ? `[mcp-watcher] re-embedded ${rel}: ${embedded} new, ${reused} reused\n`
-          : `[mcp-watcher] refreshed BM25 index for ${rel}: ${total} functions\n`
-      );
+      if (this.debug) {
+        process.stderr.write(
+          hasEmbeddings
+            ? `[mcp-watcher] re-embedded ${changedFilePaths.size} file(s): ${embedded} new, ${reused} reused\n`
+            : `[mcp-watcher] refreshed BM25 index for ${changedFilePaths.size} file(s): ${total} functions\n`
+        );
+      }
     } catch (err) {
       process.stderr.write(`[mcp-watcher] embed error: ${(err as Error).message}\n`);
     }
   }
+
+  // ── Helpers ──────────────────────────────────────────────────────────────────
+
+  /** Bounded count of watched source files; stops early once `cap` is exceeded. */
+  private async countSourceFiles(cap: number): Promise<number> {
+    let count = 0;
+    const walk = async (dir: string): Promise<void> => {
+      if (count > cap) return;
+      let entries;
+      try {
+        entries = await readdir(dir, { withFileTypes: true });
+      } catch {
+        return;
+      }
+      for (const entry of entries) {
+        if (count > cap) return;
+        const abs = join(dir, entry.name);
+        const rel = relative(this.rootPath, abs);
+        if (entry.isDirectory()) {
+          if (!isIgnoredRelPath(rel)) await walk(abs);
+        } else if (entry.isFile() && SOURCE_EXTENSIONS.test(entry.name) && !isIgnoredRelPath(rel)) {
+          count++;
+        }
+      }
+    };
+    await walk(this.rootPath);
+    return count;
+  }
 }
 
-// ── Helpers ───────────────────────────────────────────────────────────────────
+// ── Module helpers ──────────────────────────────────────────────────────────────
 
 function isTestFile(relPath: string): boolean {
   return (
@@ -368,7 +650,7 @@ async function buildGraphSubset(
   const result = await builder.build(files, undefined, undefined, resolutionNodes);
 
   // Only return nodes from changedFile — callerFiles nodes are already in DB and unchanged
-  const changedNodes = Array.from(result.nodes.values()).filter(n => n.filePath === changedRel);
+  const changedNodes = Array.from(result.nodes.values()).filter((n) => n.filePath === changedRel);
 
   return { edges: result.edges, nodes: changedNodes };
 }