Skip to content
20 changes: 17 additions & 3 deletions docs/mcp-tools.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,13 +49,27 @@ Add `--watch-auto` to your MCP config args:
}
```

The watcher starts automatically on the first tool call — no hardcoded path needed. It re-extracts signatures for any changed source file and patches `llm-context.json` within ~500 ms of a save. If an embedding server is reachable, it also re-embeds changed functions into the vector index automatically. The call graph is not rebuilt on every change; it stays current via the [post-commit hook](#cicd-integration) (`openlore analyze --force`).
The watcher is **on by default** — it starts automatically on the first tool call
(no hardcoded path needed) and keeps the analysis fresh as you edit. To disable it,
start the server with `openlore mcp --no-watch-auto`.

Freshness is **O(change), not O(repo)** (Spec 13.1): per-file save events are coalesced
into a single batched flush, the patched signatures are handed directly to the MCP read
cache (so the next tool call is a cache hit, not a cold re-parse of `llm-context.json`),
and the vector index is updated with row-level ops rather than a full-table rewrite.
A bulk event (branch switch / rebase / formatter) collapses to a single refresh. On large
repos (> 5000 source files) live embedding auto-degrades to signatures-only (logged once);
embeddings then refresh at commit. Set `OPENLORE_WATCH_DEBUG=1` for per-file stderr detail
(default is one summary line per batch). The call graph is not rebuilt on every change; it
stays current via the [post-commit hook](#cicd-integration) (`openlore analyze --force`).

| Option | Default | Description |
|---|---|---|
| `--watch-auto` | off | Auto-detect project root from first tool call |
| `--watch-auto` | **on** | Auto-detect project root from first tool call |
| `--no-watch-auto` | — | Disable the auto-watcher (one-shot tool calls) |
| `--watch <dir>` | — | Watch a fixed directory (alternative to `--watch-auto`) |
| `--watch-debounce <ms>` | 400 | Delay before re-indexing after a file change |
| `--watch-debounce <ms>` | 400 | Idle delay before a coalesced flush after a change |
| `--watch-no-embed` | off | Signatures-only: skip live re-embedding (refresh at commit) |

### Cline / Roo Code / Kilocode

Expand Down
3 changes: 2 additions & 1 deletion docs/specs/openlore-spec-13-context-substrate.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,8 @@ Branch: `openlore-spec-13-context-substrate`. Direction locked; claims verified;
- [x] Competitive + market reality verified against primary sources (2026-05-30)
- [x] Repo ground-truth established (what actually ships vs. what was claimed)
- [x] Theses adversarially stress-tested; positioning corrected to survive the strongest attack
- [ ] **Spec 14** — Agent Token-Efficiency Benchmark Harness (WITH vs WITHOUT). *Do this first.* → [openlore-spec-14-agent-benchmark-harness.md](openlore-spec-14-agent-benchmark-harness.md)
- [ ] **Spec 13.1** — Make Incremental Freshness Cheap (Watch-Mode Performance). *Urgent regression fix — do this before 14; the watcher that backs this spec's "always-fresh" promise currently taxes every dogfooding session.* → [openlore-spec-13.1-watch-mode-performance.md](openlore-spec-13.1-watch-mode-performance.md)
- [ ] **Spec 14** — Agent Token-Efficiency Benchmark Harness (WITH vs WITHOUT). *Do this first (after 13.1).* → [openlore-spec-14-agent-benchmark-harness.md](openlore-spec-14-agent-benchmark-harness.md)
- [ ] **Spec 15** — Decision & Drift Governance Dogfooding (turn the gate on in our own repo). → [openlore-spec-15-governance-dogfooding.md](openlore-spec-15-governance-dogfooding.md)
- [ ] **Spec 16** — Architectural Decisions as First-Class Graph Nodes (`affects` edges). → [openlore-spec-16-decisions-as-graph-nodes.md](openlore-spec-16-decisions-as-graph-nodes.md)
- [ ] **Spec 17** — Cross-Domain Impact Analysis (Code ↔ Infrastructure). → [openlore-spec-17-cross-domain-impact.md](openlore-spec-17-cross-domain-impact.md)
Expand Down
336 changes: 336 additions & 0 deletions docs/specs/openlore-spec-13.1-watch-mode-performance.md

Large diffs are not rendered by default.

1 change: 1 addition & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
"view": "tsx src/cli/index.ts view",
"bench": "tsx scripts/bench.ts",
"bench:mcp": "tsx scripts/bench-mcp.ts",
"bench:watch": "tsx scripts/bench-watch.ts",
"test": "vitest",
"test:run": "vitest run",
"test:coverage": "vitest run --coverage",
Expand Down
155 changes: 155 additions & 0 deletions scripts/bench-watch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,155 @@
/**
* bench-watch.ts — watch-mode (MCP incremental re-index) microbenchmark.
*
* Spec 13.1: freshness must be O(change), not O(repo). This measures the
* per-save and bulk-burst cost of the watcher pipeline on a fixture with a
* ~2 MB llm-context.json, and asserts the coalescing/cache guarantees:
*
* G1 — a single save triggers ≤ 1 llm-context persistence and the next read
* is a cache HIT (no cold full-file re-parse).
* G2 — a burst of N saves coalesces to ONE flush.
* G4 — per-save wall-clock stays small relative to the context size.
*
* Run: npm run bench:watch
*
* This is a manual benchmark (not part of CI). It builds its own throwaway
* fixture under the OS temp dir and cleans up afterwards.
*/
import { mkdtemp, mkdir, writeFile, readFile, rm, stat } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import { performance } from 'node:perf_hooks';
import { McpWatcher } from '../src/core/services/mcp-watcher.js';
import { readCachedContext, _resetContextCacheForTesting } from '../src/core/services/mcp-handlers/utils.js';

const FILE_COUNT = 600; // synthetic source files in the context
const ENTRIES_PER_FILE = 20; // signatures per file
const SINGLE_SAVE_RUNS = 20;
const BURST_SIZE = 50;

function median(xs: number[]): number {
const s = [...xs].sort((a, b) => a - b);
const m = Math.floor(s.length / 2);
return s.length % 2 ? s[m] : (s[m - 1] + s[m]) / 2;
}

function synthSignatures(): Array<{ path: string; language: string; entries: Array<{ name: string; signature: string; docstring: string; line: number; kind: string }> }> {
const sigs = [];
for (let i = 0; i < FILE_COUNT; i++) {
const path = `src/module_${i}/file_${i}.ts`;
const entries = [];
for (let j = 0; j < ENTRIES_PER_FILE; j++) {
entries.push({
name: `fn_${i}_${j}`,
signature: `export function fn_${i}_${j}(arg0: string, arg1: number, opts?: Record<string, unknown>): Promise<void>`,
docstring: `Function ${j} in module ${i}. Handles a representative unit of work for the benchmark fixture.`,
line: j * 7 + 1,
kind: 'function',
});
}
sigs.push({ path, language: 'TypeScript', entries });
}
return sigs;
}

async function main(): Promise<void> {
const root = await mkdtemp(join(tmpdir(), 'ol-benchwatch-'));
const analysisDir = join(root, '.openlore', 'analysis');
await mkdir(analysisDir, { recursive: true });
const contextPath = join(analysisDir, 'llm-context.json');

// Build a ~2 MB context.
const signatures = synthSignatures();
await writeFile(contextPath, JSON.stringify({ signatures, callGraph: null }, null, 2), 'utf-8');
const ctxBytes = (await stat(contextPath)).size;

// Write the real source files so the watcher can read them on change.
for (let i = 0; i < FILE_COUNT; i++) {
const dir = join(root, 'src', `module_${i}`);
await mkdir(dir, { recursive: true });
await writeFile(join(dir, `file_${i}.ts`), `export function fn_${i}_0() { return ${i}; }\n`, 'utf-8');
}

// embed:false → measure the signature/freshness pipeline (the per-save hot
// path the spec flagged: the 2 MB rewrite + the re-parse it used to force).
const watcher = new McpWatcher({ rootPath: root, embed: false });

// ── Single-save latency, including the simulated "next tool call" read ──────
const flushTimes: number[] = [];
const readTimes: number[] = [];
for (let r = 0; r < SINGLE_SAVE_RUNS; r++) {
const i = r % FILE_COUNT;
const f = join(root, 'src', `module_${i}`, `file_${i}.ts`);
await writeFile(f, `export function fn_${i}_0() { return ${i + r * 1000}; }\n`, 'utf-8');

const t0 = performance.now();
await watcher.handleChange(f);
flushTimes.push(performance.now() - t0);

// The next "tool call" read — must be a cache HIT (no 2 MB cold re-parse).
const t1 = performance.now();
const ctx = await readCachedContext(root);
readTimes.push(performance.now() - t1);
if (!ctx) throw new Error('readCachedContext returned null after save');
}

// ── Cold read baseline (cache cleared → full 2 MB parse) for contrast ───────
_resetContextCacheForTesting();
const coldT0 = performance.now();
await readCachedContext(root);
const coldRead = performance.now() - coldT0;

// ── Bulk burst: BURST_SIZE files in one window must coalesce to ONE flush ───
let summaries = 0;
const origWrite = process.stderr.write.bind(process.stderr);
// eslint-disable-next-line @typescript-eslint/no-explicit-any
(process.stderr as any).write = (chunk: any, ...rest: any[]): boolean => {
if (/\[mcp-watcher\] (updated|coalesced)/.test(String(chunk))) summaries++;
return origWrite(chunk, ...rest);
};
const burst = new McpWatcher({ rootPath: root, embed: false, debounceMs: 50, maxBatchMs: 2000 });
const burstFiles: string[] = [];
for (let i = 0; i < BURST_SIZE; i++) {
const f = join(root, 'src', `module_${i}`, `file_${i}.ts`);
await writeFile(f, `export function fn_${i}_0() { return ${i}*2; }\n`, 'utf-8');
burstFiles.push(f);
}
const burstT0 = performance.now();
for (const f of burstFiles) (burst as unknown as { enqueue(p: string): void }).enqueue(f);
// Wait for the single coalesced flush to complete.
await new Promise((res) => setTimeout(res, 400));
const burstTime = performance.now() - burstT0;
(process.stderr as any).write = origWrite;

const report =
`## Watch-mode benchmark (Spec 13.1)

Fixture: ${FILE_COUNT} files × ${ENTRIES_PER_FILE} signatures, llm-context.json = ${(ctxBytes / 1_048_576).toFixed(2)} MB.

| Metric | Result |
|--------|--------|
| Single-save flush (median of ${SINGLE_SAVE_RUNS}) | ${median(flushTimes).toFixed(1)} ms |
| Next-call read after save (median, cache HIT) | ${median(readTimes).toFixed(2)} ms |
| Cold read (cache cleared, full parse) | ${coldRead.toFixed(1)} ms |
| ${BURST_SIZE}-file burst → flushes | ${summaries} (expected 1) |
| ${BURST_SIZE}-file burst wall-clock | ${burstTime.toFixed(1)} ms |

G1: next-call read is a cache hit — ${median(readTimes).toFixed(2)} ms vs ${coldRead.toFixed(1)} ms cold (${(coldRead / Math.max(median(readTimes), 0.001)).toFixed(0)}× faster).
G2: ${BURST_SIZE} saves coalesced to ${summaries} flush${summaries === 1 ? '' : 'es'}.
`;

// eslint-disable-next-line no-console
console.log(report);

// Assertions (fail loudly in CI-less manual runs).
if (summaries !== 1) throw new Error(`G2 violated: expected 1 coalesced flush, got ${summaries}`);
if (median(readTimes) >= coldRead) throw new Error('G1 violated: post-save read is not faster than a cold parse');

await rm(root, { recursive: true, force: true });
}

main().catch((err) => {
// eslint-disable-next-line no-console
console.error(err);
process.exit(1);
});
4 changes: 4 additions & 0 deletions src/cli/commands/mcp.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1286,6 +1286,7 @@ interface McpServerOptions {
watch?: string;
watchAuto?: boolean;
watchDebounce?: string;
watchNoEmbed?: boolean;
minimal?: boolean;
}

Expand Down Expand Up @@ -1353,6 +1354,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise<void> {
autoWatcher = new McpWatcher({
rootPath: resolve(dir),
debounceMs: isNaN(debounceMs) ? 400 : debounceMs,
embed: !options.watchNoEmbed,
});
await autoWatcher.start();
const cleanup = () => autoWatcher!.stop().then(() => process.exit(0));
Expand Down Expand Up @@ -1592,6 +1594,7 @@ async function startMcpServer(options: McpServerOptions = {}): Promise<void> {
const watcher = new McpWatcher({
rootPath: resolve(options.watch),
debounceMs: isNaN(debounceMs) ? 400 : debounceMs,
embed: !options.watchNoEmbed,
});
await watcher.start();
const cleanup = () => watcher.stop().then(() => process.exit(0));
Expand All @@ -1610,5 +1613,6 @@ export const mcpCommand = new Command('mcp')
.option('--watch-auto', 'Auto-detect the project directory from the first tool call and start watching', true)
.option('--no-watch-auto', 'Disable auto-watch (use for one-shot tool calls, e.g. the orient skill wrapper)')
.option('--watch-debounce <ms>', 'Debounce delay in ms before re-indexing after a file change (default: 400)', '400')
.option('--watch-no-embed', 'Watch signatures only — skip live vector re-embedding (embeddings refresh at commit). Large repos auto-degrade to this.')
.option('--minimal', 'Expose only core 5 tools (orient, search_code, record_decision, detect_changes, check_spec_drift). Pair with alwaysLoad: true in Claude Code for always-visible core tools.')
.action((options: McpServerOptions) => startMcpServer(options));
33 changes: 33 additions & 0 deletions src/constants.ts
Original file line number Diff line number Diff line change
Expand Up @@ -549,3 +549,36 @@ export const DECISIONS_CONSOLIDATION_MAX_TOKENS = 2_000;

/** Max output tokens for verification LLM call */
export const DECISIONS_VERIFICATION_MAX_TOKENS = 1_500;

// ============================================================================
// WATCH MODE (MCP incremental re-index) — Spec 13.1
// ============================================================================
// Defaults chosen to keep --watch-auto on by default while making incremental
// freshness O(change), not O(repo). See docs/specs/openlore-spec-13.1-*.

/** Idle quiet period (ms) before a coalesced flush after the last file change. */
export const WATCH_DEBOUNCE_MS = 400;

/**
* Hard ceiling (ms) that forces a flush even under a continuous change stream,
* so a steady drip of edits never starves the queue indefinitely.
*/
export const WATCH_MAX_BATCH_MS = 2000;

/**
* Number of files in a single coalesced flush that trips VCS-flood handling
* (a branch switch / rebase / formatter touching many files at once).
*/
export const WATCH_BULK_THRESHOLD = 25;

/**
* Above this many watched source files, live embedding auto-degrades to
* signatures-only; embeddings refresh at commit (post-commit analyze --embed).
*/
export const WATCH_EMBED_FILE_CEILING = 5000;

/**
* Quiet period (ms) after a detected VCS bulk operation (.git/HEAD or index
* churn) before a single coalesced refresh runs, so the whole op settles first.
*/
export const WATCH_VCS_SETTLE_MS = 750;
91 changes: 91 additions & 0 deletions src/core/analyzer/vector-index-updatefiles.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
/**
* Spec 13.1 — VectorIndex.updateFiles row-level incremental update.
*
* Proves the watch path replaces ONLY the changed file's rows (delete + add)
* instead of the full-table read+overwrite build() performs: a sibling file's
* rows survive an update untouched, and the changed file's rows are replaced.
* Runs BM25-only (embedSvc = null) so it needs no embedding service.
*/
import { describe, it, expect, beforeEach, afterEach } from 'vitest';
import { mkdtemp, rm } from 'node:fs/promises';
import { tmpdir } from 'node:os';
import { join } from 'node:path';
import {
VectorIndex,
_resetVectorIndexCachesForTesting,
} from './vector-index.js';
import type { FunctionNode } from './call-graph.js';

function node(filePath: string, name: string): FunctionNode {
return {
id: `${filePath}::${name}`,
name,
filePath,
className: '',
language: 'TypeScript',
signature: `function ${name}()`,
docstring: '',
fanIn: 0,
fanOut: 0,
startIndex: 0,
endIndex: 0,
} as unknown as FunctionNode;
}

let outputDir: string;

beforeEach(async () => {
outputDir = await mkdtemp(join(tmpdir(), 'ol-vi-update-'));
_resetVectorIndexCachesForTesting();
});

afterEach(async () => {
_resetVectorIndexCachesForTesting();
await rm(outputDir, { recursive: true, force: true });
});

async function names(query: string): Promise<Set<string>> {
const results = await VectorIndex.search(outputDir, query, null, { limit: 20 });
return new Set(results.map((r) => r.record.name));
}

describe('VectorIndex.updateFiles — Spec 13.1 (BM25-only)', () => {
it('replaces only the changed file rows; sibling file rows survive', async () => {
// Build a BM25-only index with two files.
const initial = [node('alpha.ts', 'alphafn'), node('beta.ts', 'betafn')];
const built = await VectorIndex.build(
outputDir, initial, [], new Set(), new Set(), null, undefined, false,
);
expect(built.hasEmbeddings).toBe(false);
expect(built.total).toBe(2);
_resetVectorIndexCachesForTesting();

// Sanity: both functions are findable.
expect(await names('alphafn')).toContain('alphafn');
expect(await names('betafn')).toContain('betafn');
_resetVectorIndexCachesForTesting();

// Rename alpha.ts's function → updateFiles should drop the old row and add new.
const result = await VectorIndex.updateFiles(
outputDir,
[node('alpha.ts', 'gammafn')],
new Set(['alpha.ts']),
[],
new Set(),
new Set(),
null,
undefined,
);
expect(result.hasEmbeddings).toBe(false);
_resetVectorIndexCachesForTesting();

// beta.ts is untouched (its row survived the row-level op).
expect(await names('betafn')).toContain('betafn');
_resetVectorIndexCachesForTesting();
// alpha.ts now has gammafn …
expect(await names('gammafn')).toContain('gammafn');
_resetVectorIndexCachesForTesting();
// … and the old alphafn row is gone (the delete predicate actually matched).
expect(await names('alphafn')).not.toContain('alphafn');
});
});
Loading
Loading