From a926f814e97909ab741bfb9356ba14c1fa2af72c Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 01:39:38 +0000
Subject: [PATCH 1/6] test: add stage-local contract tests for compiler and
 query pipelines

Closes #6

- compiler-contracts.test.ts: 5 describe blocks covering Parser resilience,
  Graph closure, Index round-trip, Validation coverage, and Snapshot determinism
- query-contracts.test.ts: 7 describe blocks covering Normalizer, Seeder,
  Ranker, Frontier expansion, Projection stability, Synthesis isolation,
  and cross-cutting Trace determinism

Each test targets a specific pipeline stage promise so that a failure pinpoints
the broken stage rather than surfacing as a generic 'end-to-end answer is wrong.'
Existing end-to-end tests remain untouched.

Co-Authored-By: Stanislau <nedbailov375426@gmail.com>
---
 tests/compiler-contracts.test.ts | 276 ++++++++++++++++++++++++
 tests/query-contracts.test.ts    | 348 +++++++++++++++++++++++++++++++
 2 files changed, 624 insertions(+)
 create mode 100644 tests/compiler-contracts.test.ts
 create mode 100644 tests/query-contracts.test.ts
diff --git a/tests/compiler-contracts.test.ts b/tests/compiler-contracts.test.ts
new file mode 100644
index 0000000..b520598
--- /dev/null
+++ b/tests/compiler-contracts.test.ts
@@ -0,0 +1,276 @@
+import { createHash } from 'node:crypto';
+import { readFile } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+import { describe, expect, it } from '@rstest/core';
+
+import { compileFpfSource, type CompilerOutput } from '../src/runtime/compiler.js';
+
+/**
+ * Stage-local contract tests for the compiler pipeline.
+ *
+ * Each test targets a specific compiler stage promise so that a failure
+ * pinpoints the broken stage rather than surfacing as a generic
+ * "end-to-end answer is wrong."
+ */
+
+let cachedOutput: CompilerOutput | undefined;
+
+async function getCompilerOutput(): Promise<CompilerOutput> {
+  if (cachedOutput) {
+    return cachedOutput;
+  }
+  const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
+  const sourceText = await readFile(sourcePath, 'utf8');
+  const sourceHash = createHash('sha256').update(sourceText).digest('hex');
+  cachedOutput = compileFpfSource({
+    sourcePath,
+    sourceHash,
+    builtAt: new Date().toISOString(),
+    sourceText,
+  });
+  return cachedOutput;
+}
+
+// ---------------------------------------------------------------------------
+// Stage 1: Parser resilience
+// ---------------------------------------------------------------------------
+describe('Compiler / Parser stage', () => {
+  it('parses a non-trivial number of sections, patterns, routes, and lexicon entries', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const { validation } = snapshot;
+
+    expect(validation.parsedSections).toBeGreaterThan(100);
+    expect(validation.parsedPatterns).toBeGreaterThan(50);
+    expect(validation.parsedRoutes).toBeGreaterThan(0);
+    expect(validation.parsedLexiconEntries).toBeGreaterThan(5);
+  });
+
+  it('assigns IDs to all compiled nodes and none are empty strings', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const nodeIds = Object.keys(snapshot.compiledNodes);
+
+    expect(nodeIds.length).toBeGreaterThan(50);
+    for (const nodeId of nodeIds) {
+      expect(nodeId.length).toBeGreaterThan(0);
+    }
+  });
+
+  it('preserves pattern metadata fields (title, status, part)', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const pattern = snapshot.patternGraph.nodes['A.1.1'];
+
+    expect(pattern).toBeDefined();
+    expect(pattern!.title.length).toBeGreaterThan(0);
+    expect(pattern!.status.length).toBeGreaterThan(0);
+    expect(pattern!.sectionIds.length).toBeGreaterThan(0);
+  });
+
+  it('produces anchors with valid line ranges', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const anchors = Object.values(snapshot.anchorMap);
+
+    expect(anchors.length).toBeGreaterThan(50);
+    for (const anchor of anchors.slice(0, 20)) {
+      expect(anchor.lineStart).toBeGreaterThanOrEqual(0);
+      expect(anchor.lineEnd).toBeGreaterThan(anchor.lineStart);
+    }
+
+    const nonEmpty = anchors.filter((a) => a.text.length > 0);
+    expect(nonEmpty.length).toBeGreaterThan(anchors.length / 2);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 2: Graph closure
+// ---------------------------------------------------------------------------
+describe('Compiler / Graph closure stage', () => {
+  it('keeps unresolved references bounded and stable', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const { validation } = snapshot;
+
+    // The FPF spec has a small number of forward/external references that
+    // don't resolve to compiled nodes.  The contract is that this set stays
+    // bounded — a regression would show as a sudden spike.
+    expect(validation.unresolvedReferences.length).toBeLessThan(20);
+  });
+
+  it('tracks duplicate IDs produced by catalog + heading overlap', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const { validation } = snapshot;
+
+    // "duplicateIds" lists pattern IDs that appear in both the catalog table
+    // and heading sections — this is expected for the FPF spec.  The contract
+    // is that the count stays proportional to the number of patterns.
+    expect(validation.duplicateIds.length).toBeGreaterThan(0);
+    expect(validation.duplicateIds.length).toBeLessThan(
+      Object.keys(snapshot.patternGraph.nodes).length + 10,
+    );
+  });
+
+  it('has no broken routes', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const { validation } = snapshot;
+
+    expect(validation.brokenRoutes).toEqual([]);
+  });
+
+  it('contains outline relations linking parents to children', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const outlineChildren = snapshot.relationGraph.filter(
+      (edge) => edge.relation === 'outline_child',
+    );
+
+    expect(outlineChildren.length).toBeGreaterThan(10);
+
+    const a15Children = outlineChildren.filter((edge) => edge.from === 'A.15');
+    expect(a15Children.length).toBeGreaterThan(0);
+  });
+
+  it('contains explicit_reference relations extracted from source text', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const explicitRefs = snapshot.relationGraph.filter(
+      (edge) => edge.relation === 'explicit_reference',
+    );
+
+    expect(explicitRefs.length).toBeGreaterThan(0);
+  });
+
+  it('routes reference mostly existing compiled nodes', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const allNodeIds = new Set(Object.keys(snapshot.compiledNodes));
+
+    let total = 0;
+    let resolved = 0;
+    for (const route of Object.values(snapshot.routeGraph.nodes)) {
+      for (const id of [...route.orderedIds, ...route.optionalIds, ...route.landingIds]) {
+        total += 1;
+        if (allNodeIds.has(id)) {
+          resolved += 1;
+        }
+      }
+    }
+
+    // At least 90% of route step IDs should resolve to compiled nodes.
+    expect(resolved / total).toBeGreaterThan(0.9);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 3: Index round-trip
+// ---------------------------------------------------------------------------
+describe('Compiler / Index round-trip stage', () => {
+  it('indexes resolve back to their source patterns', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const indexNodes = Object.values(snapshot.indexMap);
+    const patternIds = new Set(Object.keys(snapshot.patternGraph.nodes));
+
+    const linkedToPattern = indexNodes.filter(
+      (node) => node.metadata.patternId && patternIds.has(node.metadata.patternId),
+    );
+    expect(linkedToPattern.length).toBeGreaterThan(20);
+  });
+
+  it('alias index entries resolve to existing compiled nodes', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const allNodeIds = new Set(Object.keys(snapshot.compiledNodes));
+
+    for (const [_alias, nodeIds] of Object.entries(snapshot.indexes.aliasIndex)) {
+      for (const nodeId of nodeIds) {
+        expect(allNodeIds.has(nodeId)).toBe(true);
+      }
+    }
+  });
+
+  it('lexicon entries have at least one linked node', async () => {
+    const { snapshot } = await getCompilerOutput();
+
+    for (const entry of Object.values(snapshot.lexicon)) {
+      expect(entry.linkedNodeIds.length).toBeGreaterThan(0);
+    }
+  });
+
+  it('status index keys partition compiled nodes without overlap', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const statusIndex = snapshot.indexes.statusIndex;
+
+    expect(Object.keys(statusIndex).length).toBeGreaterThan(0);
+
+    for (const [_status, nodeIds] of Object.entries(statusIndex)) {
+      for (const nodeId of nodeIds) {
+        expect(snapshot.compiledNodes[nodeId]).toBeDefined();
+      }
+    }
+  });
+
+  it('route name index resolves to existing route nodes', async () => {
+    const { snapshot } = await getCompilerOutput();
+
+    for (const [_name, nodeIds] of Object.entries(snapshot.indexes.routeNameIndex)) {
+      for (const nodeId of nodeIds) {
+        expect(snapshot.routeGraph.nodes[nodeId]).toBeDefined();
+      }
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 4: Validation coverage
+// ---------------------------------------------------------------------------
+describe('Compiler / Validation stage', () => {
+  it('keeps missing required fields bounded', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const { validation } = snapshot;
+
+    // The FPF spec has a small number of patterns with incomplete metadata.
+    // The contract is that this stays bounded — a regression would spike it.
+    expect(validation.missingRequiredFields).toBeLessThan(25);
+  });
+
+  it('counts a plausible number of index map nodes', async () => {
+    const { snapshot } = await getCompilerOutput();
+    const { validation } = snapshot;
+
+    expect(validation.indexMapNodes).toBeGreaterThan(50);
+    expect(validation.indexMapNodes).toBe(Object.keys(snapshot.indexMap).length);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 5: Snapshot determinism
+// ---------------------------------------------------------------------------
+describe('Compiler / Snapshot determinism stage', () => {
+  it('produces byte-identical snapshot when compiled twice with the same input', async () => {
+    const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
+    const sourceText = await readFile(sourcePath, 'utf8');
+    const sourceHash = createHash('sha256').update(sourceText).digest('hex');
+    const builtAt = '2025-01-01T00:00:00.000Z';
+
+    const first = compileFpfSource({ sourcePath, sourceHash, builtAt, sourceText });
+    const second = compileFpfSource({ sourcePath, sourceHash, builtAt, sourceText });
+
+    const firstJson = JSON.stringify(first.snapshot);
+    const secondJson = JSON.stringify(second.snapshot);
+
+    expect(firstJson).toBe(secondJson);
+  });
+
+  it('produces different sourceHash when source text changes', async () => {
+    const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
+    const sourceText = await readFile(sourcePath, 'utf8');
+    const builtAt = '2025-01-01T00:00:00.000Z';
+
+    const hash1 = createHash('sha256').update(sourceText).digest('hex');
+    const hash2 = createHash('sha256').update(`${sourceText}\n<!-- change -->\n`).digest('hex');
+
+    const first = compileFpfSource({ sourcePath, sourceHash: hash1, builtAt, sourceText });
+    const second = compileFpfSource({
+      sourcePath,
+      sourceHash: hash2,
+      builtAt,
+      sourceText: `${sourceText}\n<!-- change -->\n`,
+    });
+
+    expect(first.snapshot.sourceHash).not.toBe(second.snapshot.sourceHash);
+  });
+});
diff --git a/tests/query-contracts.test.ts b/tests/query-contracts.test.ts
new file mode 100644
index 0000000..b867b49
--- /dev/null
+++ b/tests/query-contracts.test.ts
@@ -0,0 +1,348 @@
+import { createHash } from 'node:crypto';
+import { readFile } from 'node:fs/promises';
+import { resolve } from 'node:path';
+
+import { describe, expect, it } from '@rstest/core';
+
+import { compileFpfSource } from '../src/runtime/compiler.js';
+import { QueryEngine } from '../src/runtime/query-engine.js';
+import type { LocalAnswerSynthesizer, Snapshot } from '../src/runtime/types.js';
+
+/**
+ * Stage-local contract tests for the query pipeline.
+ *
+ * Each test targets a specific retrieval stage promise so that a failure
+ * pinpoints the broken stage rather than surfacing as a generic
+ * "end-to-end answer is wrong."
+ */
+
+let cachedSnapshot: Snapshot | undefined;
+
+async function getSnapshot(): Promise<Snapshot> {
+  if (cachedSnapshot) {
+    return cachedSnapshot;
+  }
+  const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
+  const sourceText = await readFile(sourcePath, 'utf8');
+  const sourceHash = createHash('sha256').update(sourceText).digest('hex');
+  const output = compileFpfSource({
+    sourcePath,
+    sourceHash,
+    builtAt: new Date().toISOString(),
+    sourceText,
+  });
+  cachedSnapshot = output.snapshot;
+  return cachedSnapshot;
+}
+
+function engine(snapshot: Snapshot, synthesizer?: LocalAnswerSynthesizer): QueryEngine {
+  return new QueryEngine(snapshot, false, synthesizer);
+}
+
+// ---------------------------------------------------------------------------
+// Stage 1: Normalizer
+// ---------------------------------------------------------------------------
+describe('Query / Normalizer stage', () => {
+  it('detects explicit IDs in the question', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('What is A.1.1?');
+
+    expect(trace.detected.ids).toContain('A.1.1');
+    expect(trace.normalizedQuestion.length).toBeGreaterThan(0);
+  });
+
+  it('detects route names when mentioned in the question', async () => {
+    const snapshot = await getSnapshot();
+    const routeNames = Object.values(snapshot.routeGraph.nodes).map((r) => r.name);
+    const firstRoute = routeNames[0];
+
+    if (firstRoute) {
+      const trace = engine(snapshot).trace(`Tell me about the ${firstRoute} route`);
+      expect(trace.detected.routeNames).toContain(firstRoute);
+    }
+  });
+
+  it('detects status terms present in the status index', async () => {
+    const snapshot = await getSnapshot();
+    const statusKeys = Object.keys(snapshot.indexes.statusIndex);
+
+    if (statusKeys.length > 0) {
+      const firstStatus = statusKeys[0]!;
+      const trace = engine(snapshot).trace(`Show me ${firstStatus} patterns`);
+      expect(trace.detected.statusTerms).toContain(firstStatus);
+    }
+  });
+
+  it('returns empty signals for a nonsense question', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('xyzzy plugh');
+
+    expect(trace.detected.ids).toEqual([]);
+    expect(trace.detected.routeNames).toEqual([]);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 2: Candidate seeder
+// ---------------------------------------------------------------------------
+describe('Query / Seed coverage stage', () => {
+  it('seeds exact-match candidates when explicit IDs are in the question', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('What is A.1.1?');
+
+    const exactCandidate = trace.candidateScores.find((c) => c.nodeId === 'A.1.1');
+    expect(exactCandidate).toBeDefined();
+    expect(exactCandidate!.reasons).toContain('exact-id');
+    expect(exactCandidate!.score).toBeGreaterThanOrEqual(100);
+  });
+
+  it('seeds lexical candidates for keyword-rich queries', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('How does bounded context relate to role assignment?');
+
+    const lexicalFrontier = trace.frontierCandidates.filter((c) => c.origin === 'lexical');
+    expect(lexicalFrontier.length).toBeGreaterThan(0);
+  });
+
+  it('seeds route expansion candidates for route-bearing queries', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace(
+      'What is the first practical route when vocabulary is overloaded across teams?',
+    );
+
+    const routeCandidates = trace.candidateScores.filter((c) => c.kind === 'route');
+    expect(routeCandidates.length).toBeGreaterThan(0);
+  });
+
+  it('produces few or low-scoring candidates for a completely unrelated question', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('xyzzy plugh');
+
+    // Index description overlap may still surface some weak candidates.
+    // The contract is that no candidate scores above the exact-match
+    // threshold (100) and total count stays low relative to the full catalog.
+    const highScoring = trace.candidateScores.filter((c) => c.score >= 100);
+    expect(highScoring.length).toBe(0);
+    expect(trace.candidateScores.length).toBeLessThan(
+      Object.keys(snapshot.compiledNodes).length / 2,
+    );
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 3: Candidate ranker
+// ---------------------------------------------------------------------------
+describe('Query / Ranker stage', () => {
+  it('ranks exact-ID matches above lexical matches', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('What is A.1.1?');
+
+    const scores = trace.candidateScores;
+    expect(scores.length).toBeGreaterThan(0);
+    expect(scores[0]!.nodeId).toBe('A.1.1');
+  });
+
+  it('selects the expected initial node IDs for an explicit ID query', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('What is A.1.1?');
+
+    expect(trace.selectedNodeIds).toContain('A.1.1');
+  });
+
+  it('selects a route node when route intent is clear', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace(
+      'What is the first practical route when vocabulary is overloaded across teams?',
+    );
+
+    const routeNodes = trace.selectedNodeIds.filter(
+      (id) => snapshot.compiledNodes[id]?.kind === 'route',
+    );
+    expect(routeNodes.length).toBeGreaterThan(0);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 4: Frontier expansion bounds
+// ---------------------------------------------------------------------------
+describe('Query / Frontier expansion stage', () => {
+  it('respects the MAX_HOPS budget (≤6 retrieval hops)', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace(
+      'How do U.RoleAssignment, U.BoundedContext, and U.RoleStateGraph connect in a lawful workflow?',
+    );
+
+    expect(trace.retrievalHops.length).toBeLessThanOrEqual(6);
+  });
+
+  it('respects the MAX_SELECTED_ANCHORS budget (≤12 anchors)', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('What is A.1.1?');
+
+    expect(trace.selectedAnchorIds.length).toBeLessThanOrEqual(12);
+  });
+
+  it('records hop metadata (iteration, reason, added nodes/anchors)', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace(
+      'How do U.RoleAssignment, U.BoundedContext, and U.RoleStateGraph connect in a lawful workflow?',
+    );
+
+    if (trace.retrievalHops.length > 0) {
+      const firstHop = trace.retrievalHops[0]!;
+      expect(firstHop.iteration).toBe(1);
+      expect(firstHop.reason.length).toBeGreaterThan(0);
+      expect(typeof firstHop.sufficientAfter).toBe('boolean');
+    }
+  });
+
+  it('marks sufficiency correctly — sufficient traces have anchors', async () => {
+    const snapshot = await getSnapshot();
+    const trace = engine(snapshot).trace('What is A.1.1?');
+
+    if (trace.sufficient) {
+      expect(trace.selectedAnchorIds.length).toBeGreaterThan(0);
+    }
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 5: Answer projection stability
+// ---------------------------------------------------------------------------
+describe('Query / Projection stability stage', () => {
+  it('produces stable support set across repeated queries', async () => {
+    const snapshot = await getSnapshot();
+    const eng = engine(snapshot);
+
+    const trace1 = eng.trace('What is A.1.1?');
+    const trace2 = eng.trace('What is A.1.1?');
+
+    expect(trace1.selectedNodeIds).toEqual(trace2.selectedNodeIds);
+    expect(trace1.selectedAnchorIds).toEqual(trace2.selectedAnchorIds);
+    expect(trace1.candidateScores.map((c) => c.nodeId)).toEqual(
+      trace2.candidateScores.map((c) => c.nodeId),
+    );
+  });
+
+  it('projects a non-empty answer with citations for a known pattern query', async () => {
+    const snapshot = await getSnapshot();
+    const result = await engine(snapshot).query('What is A.1.1?', 'verbose');
+
+    expect(result.status).toBe('ok');
+    expect(result.answer.length).toBeGreaterThan(0);
+    expect(result.ids).toContain('A.1.1');
+    expect(result.citations.length).toBeGreaterThan(0);
+  });
+
+  it('projects constraints for verbose mode', async () => {
+    const snapshot = await getSnapshot();
+    const result = await engine(snapshot).query('What is A.1.1?', 'verbose');
+
+    expect(result.constraints.length).toBeGreaterThanOrEqual(1);
+  });
+
+  it('projects a grounding chain in proof mode', async () => {
+    const snapshot = await getSnapshot();
+    const result = await engine(snapshot).query('What is A.1.1?', 'proof');
+
+    expect(result.groundingChain).toBeDefined();
+    expect(result.groundingChain!.length).toBeGreaterThan(0);
+  });
+
+  it('returns low-confidence status for completely unresolvable questions', async () => {
+    const snapshot = await getSnapshot();
+    const result = await engine(snapshot).query('xyzzy plugh nonsense', 'compact');
+
+    // Weak index-description overlap may still produce ambiguous candidates,
+    // so the engine may return 'ambiguous' or 'not_found'.  The contract is
+    // that confidence stays below the high-confidence threshold.
+    expect(['not_found', 'ambiguous']).toContain(result.status);
+    expect(result.confidence).toBeLessThan(0.7);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Stage 6: Synthesis isolation
+// ---------------------------------------------------------------------------
+describe('Query / Synthesis isolation stage', () => {
+  it('returns deterministic answer when synthesizer is unavailable', async () => {
+    const snapshot = await getSnapshot();
+    const unavailable: LocalAnswerSynthesizer = {
+      isAvailable: async () => false,
+      synthesize: async () => {
+        throw new Error('should not be called');
+      },
+    };
+
+    const result = await engine(snapshot, unavailable).query('What is A.1.1?', 'verbose');
+
+    expect(result.status).toBe('ok');
+    expect(result.ids).toContain('A.1.1');
+    expect(result.answer.length).toBeGreaterThan(0);
+  });
+
+  it('falls back to deterministic answer when synthesizer throws', async () => {
+    const snapshot = await getSnapshot();
+    const failing: LocalAnswerSynthesizer = {
+      isAvailable: async () => true,
+      synthesize: async () => {
+        throw new Error('synthesizer crashed');
+      },
+    };
+
+    const result = await engine(snapshot, failing).query('What is A.1.1?', 'verbose');
+
+    expect(result.status).toBe('ok');
+    expect(result.ids).toContain('A.1.1');
+    expect(result.gaps.some((gap) => gap.includes('synthesis skipped') || gap.includes('synthesizer crashed'))).toBe(true);
+  });
+
+  it('does not alter deterministic IDs or citations when synthesis fails', async () => {
+    const snapshot = await getSnapshot();
+    const eng = engine(snapshot);
+    const deterministicResult = await eng.query('What is A.1.1?', 'verbose');
+
+    const failing: LocalAnswerSynthesizer = {
+      isAvailable: async () => true,
+      synthesize: async () => {
+        throw new Error('test failure');
+      },
+    };
+    const failedSynthResult = await engine(snapshot, failing).query('What is A.1.1?', 'verbose');
+
+    expect(failedSynthResult.ids).toEqual(deterministicResult.ids);
+    expect(failedSynthResult.citations).toEqual(deterministicResult.citations);
+    expect(failedSynthResult.relations).toEqual(deterministicResult.relations);
+  });
+
+  it('does not call synthesize when synthesizer reports unavailable', async () => {
+    const snapshot = await getSnapshot();
+    let synthesizeCalled = false;
+    const unavailable: LocalAnswerSynthesizer = {
+      isAvailable: async () => false,
+      synthesize: async () => {
+        synthesizeCalled = true;
+        return {};
+      },
+    };
+
+    await engine(snapshot, unavailable).query('What is A.1.1?', 'compact');
+
+    expect(synthesizeCalled).toBe(false);
+  });
+});
+
+// ---------------------------------------------------------------------------
+// Trace determinism (cross-cutting)
+// ---------------------------------------------------------------------------
+describe('Query / Trace determinism', () => {
+  it('same snapshot + same query → identical trace structure', async () => {
+    const snapshot = await getSnapshot();
+    const eng = engine(snapshot);
+
+    const trace1 = eng.trace('How does bounded context relate to role assignment?', 'verbose');
+    const trace2 = eng.trace('How does bounded context relate to role assignment?', 'verbose');
+
+    expect(JSON.stringify(trace1)).toBe(JSON.stringify(trace2));
+  });
+});

From b7c8660d0f99a2d4f6f1b185b9212c07e7f575fa Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 01:58:20 +0000
Subject: [PATCH 2/6] test: address review feedback on contract tests

- Add total>0 guard before division in route resolution test (Devin Review)
- Use hardcoded normalizer tokens for status term test (Codex)
- Use frontierCandidates for route seeder and nonsense tests (Copilot)
- Rename status index test to match actual assertions (CodeRabbit)
- Fix hop metadata test to handle already-sufficient grounding
- Use real FPF IDs (A.1.1, A.15, B.3) for hop metadata query

Co-Authored-By: Stanislau <nedbailov375426@gmail.com>
---
 tests/compiler-contracts.test.ts | 16 +++++++----
 tests/query-contracts.test.ts    | 46 +++++++++++++++++++-------------
 2 files changed, 38 insertions(+), 24 deletions(-)

diff --git a/tests/compiler-contracts.test.ts b/tests/compiler-contracts.test.ts
index b520598..3e7cb28 100644
--- a/tests/compiler-contracts.test.ts
+++ b/tests/compiler-contracts.test.ts
@@ -26,12 +26,17 @@ async function getCompilerOutput(): Promise<CompilerOutput> {
   cachedOutput = compileFpfSource({
     sourcePath,
     sourceHash,
-    builtAt: new Date().toISOString(),
+    builtAt: '2025-01-01T00:00:00.000Z',
     sourceText,
   });
   return cachedOutput;
 }
 
+/** Minimum thresholds — deliberately loose so spec edits don't break tests. */
+const MIN_SECTIONS = 100;
+const MIN_PATTERNS = 50;
+const MIN_LEXICON_ENTRIES = 5;
+
 // ---------------------------------------------------------------------------
 // Stage 1: Parser resilience
 // ---------------------------------------------------------------------------
@@ -40,10 +45,10 @@ describe('Compiler / Parser stage', () => {
     const { snapshot } = await getCompilerOutput();
     const { validation } = snapshot;
 
-    expect(validation.parsedSections).toBeGreaterThan(100);
-    expect(validation.parsedPatterns).toBeGreaterThan(50);
+    expect(validation.parsedSections).toBeGreaterThan(MIN_SECTIONS);
+    expect(validation.parsedPatterns).toBeGreaterThan(MIN_PATTERNS);
     expect(validation.parsedRoutes).toBeGreaterThan(0);
-    expect(validation.parsedLexiconEntries).toBeGreaterThan(5);
+    expect(validation.parsedLexiconEntries).toBeGreaterThan(MIN_LEXICON_ENTRIES);
   });
 
   it('assigns IDs to all compiled nodes and none are empty strings', async () => {
@@ -152,6 +157,7 @@ describe('Compiler / Graph closure stage', () => {
     }
 
     // At least 90% of route step IDs should resolve to compiled nodes.
+    expect(total).toBeGreaterThan(0);
     expect(resolved / total).toBeGreaterThan(0.9);
   });
 });
@@ -190,7 +196,7 @@ describe('Compiler / Index round-trip stage', () => {
     }
   });
 
-  it('status index keys partition compiled nodes without overlap', async () => {
+  it('status index entries resolve to existing compiled nodes', async () => {
     const { snapshot } = await getCompilerOutput();
     const statusIndex = snapshot.indexes.statusIndex;
 
diff --git a/tests/query-contracts.test.ts b/tests/query-contracts.test.ts
index b867b49..8079aa5 100644
--- a/tests/query-contracts.test.ts
+++ b/tests/query-contracts.test.ts
@@ -28,7 +28,7 @@ async function getSnapshot(): Promise<Snapshot> {
   const output = compileFpfSource({
     sourcePath,
     sourceHash,
-    builtAt: new Date().toISOString(),
+    builtAt: '2025-01-01T00:00:00.000Z',
     sourceText,
   });
   cachedSnapshot = output.snapshot;
@@ -54,23 +54,25 @@ describe('Query / Normalizer stage', () => {
   it('detects route names when mentioned in the question', async () => {
     const snapshot = await getSnapshot();
     const routeNames = Object.values(snapshot.routeGraph.nodes).map((r) => r.name);
-    const firstRoute = routeNames[0];
 
-    if (firstRoute) {
-      const trace = engine(snapshot).trace(`Tell me about the ${firstRoute} route`);
-      expect(trace.detected.routeNames).toContain(firstRoute);
-    }
+    expect(routeNames.length).toBeGreaterThan(0);
+    const firstRoute = routeNames[0]!;
+    const trace = engine(snapshot).trace(`Tell me about the ${firstRoute} route`);
+    expect(trace.detected.routeNames).toContain(firstRoute);
   });
 
   it('detects status terms present in the status index', async () => {
     const snapshot = await getSnapshot();
-    const statusKeys = Object.keys(snapshot.indexes.statusIndex);
 
-    if (statusKeys.length > 0) {
-      const firstStatus = statusKeys[0]!;
-      const trace = engine(snapshot).trace(`Show me ${firstStatus} patterns`);
-      expect(trace.detected.statusTerms).toContain(firstStatus);
-    }
+    // The normalizer only detects these fixed tokens.
+    const knownTokens = ['draft', 'stable', 'stub', 'transitional'];
+    const matchedToken = knownTokens.find(
+      (t) => snapshot.indexes.statusIndex[t] !== undefined,
+    );
+
+    expect(matchedToken).toBeDefined();
+    const trace = engine(snapshot).trace(`Show me ${matchedToken} patterns`);
+    expect(trace.detected.statusTerms).toContain(matchedToken);
   });
 
   it('returns empty signals for a nonsense question', async () => {
@@ -110,8 +112,10 @@ describe('Query / Seed coverage stage', () => {
       'What is the first practical route when vocabulary is overloaded across teams?',
     );
 
-    const routeCandidates = trace.candidateScores.filter((c) => c.kind === 'route');
-    expect(routeCandidates.length).toBeGreaterThan(0);
+    const routeFrontier = trace.frontierCandidates.filter(
+      (c) => c.origin === 'route_expansion',
+    );
+    expect(routeFrontier.length).toBeGreaterThan(0);
   });
 
   it('produces few or low-scoring candidates for a completely unrelated question', async () => {
@@ -123,7 +127,7 @@ describe('Query / Seed coverage stage', () => {
     // threshold (100) and total count stays low relative to the full catalog.
     const highScoring = trace.candidateScores.filter((c) => c.score >= 100);
     expect(highScoring.length).toBe(0);
-    expect(trace.candidateScores.length).toBeLessThan(
+    expect(trace.frontierCandidates.length).toBeLessThan(
       Object.keys(snapshot.compiledNodes).length / 2,
     );
   });
@@ -185,14 +189,19 @@ describe('Query / Frontier expansion stage', () => {
   it('records hop metadata (iteration, reason, added nodes/anchors)', async () => {
     const snapshot = await getSnapshot();
     const trace = engine(snapshot).trace(
-      'How do U.RoleAssignment, U.BoundedContext, and U.RoleStateGraph connect in a lawful workflow?',
+      'How do A.1.1, A.15, and B.3 connect in a lawful workflow?',
     );
 
+    // If the engine already considers the grounding sufficient before any
+    // expansion, hops will be empty — that's valid behavior, not a test failure.
     if (trace.retrievalHops.length > 0) {
       const firstHop = trace.retrievalHops[0]!;
       expect(firstHop.iteration).toBe(1);
       expect(firstHop.reason.length).toBeGreaterThan(0);
       expect(typeof firstHop.sufficientAfter).toBe('boolean');
+    } else {
+      // No hops means grounding was already sufficient from initial selection.
+      expect(trace.sufficient).toBe(true);
     }
   });
 
@@ -200,9 +209,8 @@ describe('Query / Frontier expansion stage', () => {
     const snapshot = await getSnapshot();
     const trace = engine(snapshot).trace('What is A.1.1?');
 
-    if (trace.sufficient) {
-      expect(trace.selectedAnchorIds.length).toBeGreaterThan(0);
-    }
+    expect(trace.sufficient).toBe(true);
+    expect(trace.selectedAnchorIds.length).toBeGreaterThan(0);
   });
 });
 

From c4765d65dc104f79776f793db9c7bd04766a576a Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Mon, 13 Apr 2026 02:06:22 +0000
Subject: [PATCH 3/6] test: replace tautological sourceHash check with
 structural assertion

Co-Authored-By: Stanislau <nedbailov375426@gmail.com>
---
 tests/compiler-contracts.test.ts | 21 +++++++++++++++++----
 1 file changed, 17 insertions(+), 4 deletions(-)

diff --git a/tests/compiler-contracts.test.ts b/tests/compiler-contracts.test.ts
index 3e7cb28..73ad7b9 100644
--- a/tests/compiler-contracts.test.ts
+++ b/tests/compiler-contracts.test.ts
@@ -261,22 +261,35 @@ describe('Compiler / Snapshot determinism stage', () => {
     expect(firstJson).toBe(secondJson);
   });
 
-  it('produces different sourceHash when source text changes', async () => {
+  it('produces structurally different output when source text changes', async () => {
     const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
     const sourceText = await readFile(sourcePath, 'utf8');
     const builtAt = '2025-01-01T00:00:00.000Z';
 
     const hash1 = createHash('sha256').update(sourceText).digest('hex');
-    const hash2 = createHash('sha256').update(`${sourceText}\n<!-- change -->\n`).digest('hex');
+    // Append a new heading + body — the compiler must parse it as an
+    // additional section, which changes the structural output (not just
+    // the caller-provided hash).
+    const modifiedText = `${sourceText}\n\n## Z.99 Synthetic Test Section\n\nA synthetic section added to verify the compiler processes changed source text.\n`;
+    const hash2 = createHash('sha256').update(modifiedText).digest('hex');
 
     const first = compileFpfSource({ sourcePath, sourceHash: hash1, builtAt, sourceText });
     const second = compileFpfSource({
       sourcePath,
       sourceHash: hash2,
       builtAt,
-      sourceText: `${sourceText}\n<!-- change -->\n`,
+      sourceText: modifiedText,
     });
 
-    expect(first.snapshot.sourceHash).not.toBe(second.snapshot.sourceHash);
+    // Verify a structural difference — the added heading should produce at
+    // least one more parsed section or index-map node than the original.
+    const firstSections = first.snapshot.validation.parsedSections;
+    const firstIndexNodes = Object.keys(first.snapshot.indexMap).length;
+    const secondSections = second.snapshot.validation.parsedSections;
+    const secondIndexNodes = Object.keys(second.snapshot.indexMap).length;
+
+    const structurallyDifferent =
+      secondSections > firstSections || secondIndexNodes > firstIndexNodes;
+    expect(structurallyDifferent).toBe(true);
   });
 });

From 82dd2dc10c768332011405066be3b95687f38a18 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 03:53:23 +0000
Subject: [PATCH 4/6] fix: replace nonsense probe string, strengthen Z.99
 assertion, add spec-ID fixture comments

- Replace 'xyzzy plugh' with '__FPFTEST_NONSENSE_999__' to avoid false
  failures if spec ever mentions those words
- Strengthen Z.99 structural assertion: verify parsedSections grows (not
  just 'something grew'), with comment explaining why Z.99 appears as a
  section not a pattern node
- Add canonical fixture ID comments to both test files explaining that
  A.1.1/A.15/B.3 are stable spec anchors and where to update if renamed

Co-Authored-By: Stanislau <nedbailov375426@gmail.com>
---
 tests/compiler-contracts.test.ts | 11 ++++++++---
 tests/query-contracts.test.ts    | 10 +++++++---
 2 files changed, 15 insertions(+), 6 deletions(-)

diff --git a/tests/compiler-contracts.test.ts b/tests/compiler-contracts.test.ts
index 73ad7b9..5813a4a 100644
--- a/tests/compiler-contracts.test.ts
+++ b/tests/compiler-contracts.test.ts
@@ -12,6 +12,10 @@ import { compileFpfSource, type CompilerOutput } from '../src/runtime/compiler.j
  * Each test targets a specific compiler stage promise so that a failure
  * pinpoints the broken stage rather than surfacing as a generic
  * "end-to-end answer is wrong."
+ *
+ * Canonical fixture IDs: `A.1.1` is used as a stable spec anchor for
+ * metadata assertions. If the FPF spec renames or renumbers this
+ * pattern, update the ID here to match.
  */
 
 let cachedOutput: CompilerOutput | undefined;
@@ -288,8 +292,9 @@ describe('Compiler / Snapshot determinism stage', () => {
     const secondSections = second.snapshot.validation.parsedSections;
     const secondIndexNodes = Object.keys(second.snapshot.indexMap).length;
 
-    const structurallyDifferent =
-      secondSections > firstSections || secondIndexNodes > firstIndexNodes;
-    expect(structurallyDifferent).toBe(true);
+    // The synthetic Z.99 heading is parsed as a section (not a pattern —
+    // the compiler only promotes headings that match spec-catalog entries).
+    // Verify the section count grew, proving the parser handled the new heading.
+    expect(secondSections).toBeGreaterThan(firstSections);
   });
 });
diff --git a/tests/query-contracts.test.ts b/tests/query-contracts.test.ts
index 8079aa5..c00574c 100644
--- a/tests/query-contracts.test.ts
+++ b/tests/query-contracts.test.ts
@@ -14,6 +14,10 @@ import type { LocalAnswerSynthesizer, Snapshot } from '../src/runtime/types.js';
  * Each test targets a specific retrieval stage promise so that a failure
  * pinpoints the broken stage rather than surfacing as a generic
  * "end-to-end answer is wrong."
+ *
+ * Canonical fixture IDs: `A.1.1`, `A.15`, `B.3` are used as stable spec
+ * anchors throughout these tests. If the FPF spec renames or renumbers
+ * these patterns, update the IDs here to match.
  */
 
 let cachedSnapshot: Snapshot | undefined;
@@ -77,7 +81,7 @@ describe('Query / Normalizer stage', () => {
 
   it('returns empty signals for a nonsense question', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('xyzzy plugh');
+    const trace = engine(snapshot).trace('__FPFTEST_NONSENSE_999__');
 
     expect(trace.detected.ids).toEqual([]);
     expect(trace.detected.routeNames).toEqual([]);
@@ -120,7 +124,7 @@ describe('Query / Seed coverage stage', () => {
 
   it('produces few or low-scoring candidates for a completely unrelated question', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('xyzzy plugh');
+    const trace = engine(snapshot).trace('__FPFTEST_NONSENSE_999__');
 
     // Index description overlap may still surface some weak candidates.
     // The contract is that no candidate scores above the exact-match
@@ -259,7 +263,7 @@ describe('Query / Projection stability stage', () => {
 
   it('returns low-confidence status for completely unresolvable questions', async () => {
     const snapshot = await getSnapshot();
-    const result = await engine(snapshot).query('xyzzy plugh nonsense', 'compact');
+    const result = await engine(snapshot).query('__FPFTEST_NONSENSE_999__', 'compact');
 
     // Weak index-description overlap may still produce ambiguous candidates,
     // so the engine may return 'ambiguous' or 'not_found'.  The contract is

From 7a69ba0159c6cfa70a3bf37dbf367a01f387d414 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 03:56:05 +0000
Subject: [PATCH 5/6] fix: remove unused firstIndexNodes/secondIndexNodes
 variables

Co-Authored-By: Stanislau <nedbailov375426@gmail.com>
---
 tests/compiler-contracts.test.ts | 2 --
 1 file changed, 2 deletions(-)

diff --git a/tests/compiler-contracts.test.ts b/tests/compiler-contracts.test.ts
index 5813a4a..5ecfbb7 100644
--- a/tests/compiler-contracts.test.ts
+++ b/tests/compiler-contracts.test.ts
@@ -288,9 +288,7 @@ describe('Compiler / Snapshot determinism stage', () => {
     // Verify a structural difference — the added heading should produce at
     // least one more parsed section or index-map node than the original.
     const firstSections = first.snapshot.validation.parsedSections;
-    const firstIndexNodes = Object.keys(first.snapshot.indexMap).length;
     const secondSections = second.snapshot.validation.parsedSections;
-    const secondIndexNodes = Object.keys(second.snapshot.indexMap).length;
 
     // The synthetic Z.99 heading is parsed as a section (not a pattern —
     // the compiler only promotes headings that match spec-catalog entries).

From 13f7af7e5481addd25c284d0e7263e698517cd07 Mon Sep 17 00:00:00 2001
From: Devin AI <158243242+devin-ai-integration[bot]@users.noreply.github.com>
Date: Tue, 14 Apr 2026 05:43:04 +0000
Subject: [PATCH 6/6] refactor: call stage modules directly in query contract
 tests

Address venikman's review: replace QueryEngine.trace()/query() with direct
imports of normalizeQuery, seedCandidates, rankCandidates, expandGrounding,
buildPatternAnswer, synthesizeAnswer, confidenceFromTrace, and gapsFromTrace.

Each describe block now targets its stage function in isolation so a
regression in one stage cannot masquerade as a failure in another.

Co-Authored-By: Stanislau <nedbailov375426@gmail.com>
---
 tests/query-contracts.test.ts | 345 +++++++++++++++++++++++++---------
 1 file changed, 258 insertions(+), 87 deletions(-)

diff --git a/tests/query-contracts.test.ts b/tests/query-contracts.test.ts
index c00574c..174c646 100644
--- a/tests/query-contracts.test.ts
+++ b/tests/query-contracts.test.ts
@@ -5,15 +5,26 @@ import { resolve } from 'node:path';
 import { describe, expect, it } from '@rstest/core';
 
 import { compileFpfSource } from '../src/runtime/compiler.js';
-import { QueryEngine } from '../src/runtime/query-engine.js';
-import type { LocalAnswerSynthesizer, Snapshot } from '../src/runtime/types.js';
+import { normalizeQuery } from '../src/runtime/query-normalizer.js';
+import { seedCandidates } from '../src/runtime/candidate-seeder.js';
+import { isAmbiguous, rankCandidates } from '../src/runtime/candidate-ranker.js';
+import { expandGrounding } from '../src/runtime/frontier-expander.js';
+import {
+  buildPatternAnswer,
+  confidenceFromTrace,
+  gapsFromTrace,
+  prepareSynthesisSlices,
+} from '../src/runtime/answer-projector.js';
+import { synthesizeAnswer } from '../src/runtime/synthesis-adapter.js';
+import { MAX_EXCLUDED } from '../src/runtime/constants.js';
+import type { CompiledNode, LocalAnswerSynthesizer, Snapshot, TraceResult } from '../src/runtime/types.js';
 
 /**
  * Stage-local contract tests for the query pipeline.
  *
- * Each test targets a specific retrieval stage promise so that a failure
- * pinpoints the broken stage rather than surfacing as a generic
- * "end-to-end answer is wrong."
+ * Each describe block targets a single stage function imported directly
+ * from its module so that a regression in one stage cannot masquerade
+ * as a failure in another.
  *
  * Canonical fixture IDs: `A.1.1`, `A.15`, `B.3` are used as stable spec
  * anchors throughout these tests. If the FPF spec renames or renumbers
@@ -39,20 +50,80 @@ async function getSnapshot(): Promise<Snapshot> {
   return cachedSnapshot;
 }
 
-function engine(snapshot: Snapshot, synthesizer?: LocalAnswerSynthesizer): QueryEngine {
-  return new QueryEngine(snapshot, false, synthesizer);
+/**
+ * Assemble a TraceResult from stage outputs, mirroring QueryEngine.trace().
+ * Used by projection and synthesis tests so they can feed stage outputs
+ * forward without routing through QueryEngine.
+ */
+function assembleTrace(
+  question: string,
+  mode: 'compact' | 'verbose' | 'proof',
+  snapshot: Snapshot,
+): TraceResult {
+  const normalized = normalizeQuery(question, snapshot);
+  const seeding = seedCandidates(normalized, snapshot);
+  const ranking = rankCandidates(question, seeding.candidateMap, snapshot);
+  const grounding = expandGrounding(
+    question,
+    ranking.candidates,
+    ranking.initialNodeIds,
+    ranking.initialAnchorIds,
+    seeding.frontierCandidates,
+    seeding.frontierKeys,
+    snapshot,
+  );
+
+  const selectedNodeIds = grounding.selectedNodeIds;
+  const excludedNodeIds = ranking.candidates
+    .map((c) => c.nodeId)
+    .filter((nodeId) => !selectedNodeIds.includes(nodeId))
+    .slice(0, MAX_EXCLUDED);
+  const status =
+    selectedNodeIds.length === 0
+      ? 'not_found'
+      : ranking.routeWins
+        ? 'ok'
+        : isAmbiguous(question, ranking.candidates)
+          ? 'ambiguous'
+          : 'ok';
+
+  return {
+    mode,
+    question,
+    normalizedQuestion: normalized.normalizedQuestion,
+    detected: normalized.detected,
+    candidateScores: ranking.candidates.slice(0, 16),
+    frontierCandidates: seeding.frontierCandidates,
+    graphExpansions: grounding.graphExpansions,
+    selectedNodeIds,
+    selectedAnchorIds: grounding.selectedAnchorIds,
+    excludedNodeIds,
+    followedReferences: grounding.followedReferences,
+    retrievalHops: grounding.retrievalHops,
+    sessionApplied: seeding.sessionApplied,
+    sessionReusedNodeIds: [],
+    sessionMateriallyChanged: false,
+    sufficient: grounding.sufficient,
+    routeWins: ranking.routeWins,
+    status,
+    snapshot: {
+      sourceHash: snapshot.sourceHash,
+      builtAt: snapshot.builtAt,
+      rebuilt: false,
+    },
+  };
 }
 
 // ---------------------------------------------------------------------------
-// Stage 1: Normalizer
+// Stage 1: Normalizer  (normalizeQuery)
 // ---------------------------------------------------------------------------
 describe('Query / Normalizer stage', () => {
   it('detects explicit IDs in the question', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('What is A.1.1?');
+    const normalized = normalizeQuery('What is A.1.1?', snapshot);
 
-    expect(trace.detected.ids).toContain('A.1.1');
-    expect(trace.normalizedQuestion.length).toBeGreaterThan(0);
+    expect(normalized.detected.ids).toContain('A.1.1');
+    expect(normalized.normalizedQuestion.length).toBeGreaterThan(0);
   });
 
   it('detects route names when mentioned in the question', async () => {
@@ -61,42 +132,42 @@ describe('Query / Normalizer stage', () => {
 
     expect(routeNames.length).toBeGreaterThan(0);
     const firstRoute = routeNames[0]!;
-    const trace = engine(snapshot).trace(`Tell me about the ${firstRoute} route`);
-    expect(trace.detected.routeNames).toContain(firstRoute);
+    const normalized = normalizeQuery(`Tell me about the ${firstRoute} route`, snapshot);
+    expect(normalized.detected.routeNames).toContain(firstRoute);
   });
 
   it('detects status terms present in the status index', async () => {
     const snapshot = await getSnapshot();
 
-    // The normalizer only detects these fixed tokens.
     const knownTokens = ['draft', 'stable', 'stub', 'transitional'];
     const matchedToken = knownTokens.find(
       (t) => snapshot.indexes.statusIndex[t] !== undefined,
     );
 
     expect(matchedToken).toBeDefined();
-    const trace = engine(snapshot).trace(`Show me ${matchedToken} patterns`);
-    expect(trace.detected.statusTerms).toContain(matchedToken);
+    const normalized = normalizeQuery(`Show me ${matchedToken} patterns`, snapshot);
+    expect(normalized.detected.statusTerms).toContain(matchedToken);
   });
 
   it('returns empty signals for a nonsense question', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('__FPFTEST_NONSENSE_999__');
+    const normalized = normalizeQuery('__FPFTEST_NONSENSE_999__', snapshot);
 
-    expect(trace.detected.ids).toEqual([]);
-    expect(trace.detected.routeNames).toEqual([]);
+    expect(normalized.detected.ids).toEqual([]);
+    expect(normalized.detected.routeNames).toEqual([]);
   });
 });
 
 // ---------------------------------------------------------------------------
-// Stage 2: Candidate seeder
+// Stage 2: Candidate seeder  (seedCandidates)
 // ---------------------------------------------------------------------------
 describe('Query / Seed coverage stage', () => {
   it('seeds exact-match candidates when explicit IDs are in the question', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('What is A.1.1?');
+    const normalized = normalizeQuery('What is A.1.1?', snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
 
-    const exactCandidate = trace.candidateScores.find((c) => c.nodeId === 'A.1.1');
+    const exactCandidate = seeding.candidateMap.get('A.1.1');
     expect(exactCandidate).toBeDefined();
     expect(exactCandidate!.reasons).toContain('exact-id');
     expect(exactCandidate!.score).toBeGreaterThanOrEqual(100);
@@ -104,19 +175,22 @@ describe('Query / Seed coverage stage', () => {
 
   it('seeds lexical candidates for keyword-rich queries', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('How does bounded context relate to role assignment?');
+    const normalized = normalizeQuery('How does bounded context relate to role assignment?', snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
 
-    const lexicalFrontier = trace.frontierCandidates.filter((c) => c.origin === 'lexical');
+    const lexicalFrontier = seeding.frontierCandidates.filter((c) => c.origin === 'lexical');
     expect(lexicalFrontier.length).toBeGreaterThan(0);
   });
 
   it('seeds route expansion candidates for route-bearing queries', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace(
+    const normalized = normalizeQuery(
       'What is the first practical route when vocabulary is overloaded across teams?',
+      snapshot,
     );
+    const seeding = seedCandidates(normalized, snapshot);
 
-    const routeFrontier = trace.frontierCandidates.filter(
+    const routeFrontier = seeding.frontierCandidates.filter(
       (c) => c.origin === 'route_expansion',
     );
     expect(routeFrontier.length).toBeGreaterThan(0);
@@ -124,46 +198,49 @@ describe('Query / Seed coverage stage', () => {
 
   it('produces few or low-scoring candidates for a completely unrelated question', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('__FPFTEST_NONSENSE_999__');
+    const normalized = normalizeQuery('__FPFTEST_NONSENSE_999__', snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
 
-    // Index description overlap may still surface some weak candidates.
-    // The contract is that no candidate scores above the exact-match
-    // threshold (100) and total count stays low relative to the full catalog.
-    const highScoring = trace.candidateScores.filter((c) => c.score >= 100);
+    const highScoring = Array.from(seeding.candidateMap.values()).filter((c) => c.score >= 100);
     expect(highScoring.length).toBe(0);
-    expect(trace.frontierCandidates.length).toBeLessThan(
+    expect(seeding.frontierCandidates.length).toBeLessThan(
       Object.keys(snapshot.compiledNodes).length / 2,
     );
   });
 });
 
 // ---------------------------------------------------------------------------
-// Stage 3: Candidate ranker
+// Stage 3: Candidate ranker  (rankCandidates)
 // ---------------------------------------------------------------------------
 describe('Query / Ranker stage', () => {
   it('ranks exact-ID matches above lexical matches', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('What is A.1.1?');
+    const normalized = normalizeQuery('What is A.1.1?', snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates('What is A.1.1?', seeding.candidateMap, snapshot);
 
-    const scores = trace.candidateScores;
-    expect(scores.length).toBeGreaterThan(0);
-    expect(scores[0]!.nodeId).toBe('A.1.1');
+    expect(ranking.candidates.length).toBeGreaterThan(0);
+    expect(ranking.candidates[0]!.nodeId).toBe('A.1.1');
   });
 
   it('selects the expected initial node IDs for an explicit ID query', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('What is A.1.1?');
+    const normalized = normalizeQuery('What is A.1.1?', snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates('What is A.1.1?', seeding.candidateMap, snapshot);
 
-    expect(trace.selectedNodeIds).toContain('A.1.1');
+    expect(ranking.initialNodeIds).toContain('A.1.1');
   });
 
   it('selects a route node when route intent is clear', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace(
-      'What is the first practical route when vocabulary is overloaded across teams?',
-    );
+    const question = 'What is the first practical route when vocabulary is overloaded across teams?';
+    const normalized = normalizeQuery(question, snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates(question, seeding.candidateMap, snapshot);
 
-    const routeNodes = trace.selectedNodeIds.filter(
+    expect(ranking.routeWins).toBe(true);
+    const routeNodes = ranking.initialNodeIds.filter(
       (id) => snapshot.compiledNodes[id]?.kind === 'route',
     );
     expect(routeNodes.length).toBeGreaterThan(0);
@@ -171,63 +248,103 @@ describe('Query / Ranker stage', () => {
 });
 
 // ---------------------------------------------------------------------------
-// Stage 4: Frontier expansion bounds
+// Stage 4: Frontier expansion  (expandGrounding)
 // ---------------------------------------------------------------------------
 describe('Query / Frontier expansion stage', () => {
   it('respects the MAX_HOPS budget (≤6 retrieval hops)', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace(
-      'How do U.RoleAssignment, U.BoundedContext, and U.RoleStateGraph connect in a lawful workflow?',
+    const question = 'How do U.RoleAssignment, U.BoundedContext, and U.RoleStateGraph connect in a lawful workflow?';
+    const normalized = normalizeQuery(question, snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates(question, seeding.candidateMap, snapshot);
+    const grounding = expandGrounding(
+      question,
+      ranking.candidates,
+      ranking.initialNodeIds,
+      ranking.initialAnchorIds,
+      seeding.frontierCandidates,
+      seeding.frontierKeys,
+      snapshot,
     );
 
-    expect(trace.retrievalHops.length).toBeLessThanOrEqual(6);
+    expect(grounding.retrievalHops.length).toBeLessThanOrEqual(6);
   });
 
   it('respects the MAX_SELECTED_ANCHORS budget (≤12 anchors)', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('What is A.1.1?');
+    const question = 'What is A.1.1?';
+    const normalized = normalizeQuery(question, snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates(question, seeding.candidateMap, snapshot);
+    const grounding = expandGrounding(
+      question,
+      ranking.candidates,
+      ranking.initialNodeIds,
+      ranking.initialAnchorIds,
+      seeding.frontierCandidates,
+      seeding.frontierKeys,
+      snapshot,
+    );
 
-    expect(trace.selectedAnchorIds.length).toBeLessThanOrEqual(12);
+    expect(grounding.selectedAnchorIds.length).toBeLessThanOrEqual(12);
   });
 
   it('records hop metadata (iteration, reason, added nodes/anchors)', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace(
-      'How do A.1.1, A.15, and B.3 connect in a lawful workflow?',
+    const question = 'How do A.1.1, A.15, and B.3 connect in a lawful workflow?';
+    const normalized = normalizeQuery(question, snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates(question, seeding.candidateMap, snapshot);
+    const grounding = expandGrounding(
+      question,
+      ranking.candidates,
+      ranking.initialNodeIds,
+      ranking.initialAnchorIds,
+      seeding.frontierCandidates,
+      seeding.frontierKeys,
+      snapshot,
     );
 
-    // If the engine already considers the grounding sufficient before any
-    // expansion, hops will be empty — that's valid behavior, not a test failure.
-    if (trace.retrievalHops.length > 0) {
-      const firstHop = trace.retrievalHops[0]!;
+    if (grounding.retrievalHops.length > 0) {
+      const firstHop = grounding.retrievalHops[0]!;
       expect(firstHop.iteration).toBe(1);
       expect(firstHop.reason.length).toBeGreaterThan(0);
       expect(typeof firstHop.sufficientAfter).toBe('boolean');
     } else {
-      // No hops means grounding was already sufficient from initial selection.
-      expect(trace.sufficient).toBe(true);
+      expect(grounding.sufficient).toBe(true);
     }
   });
 
   it('marks sufficiency correctly — sufficient traces have anchors', async () => {
     const snapshot = await getSnapshot();
-    const trace = engine(snapshot).trace('What is A.1.1?');
+    const question = 'What is A.1.1?';
+    const normalized = normalizeQuery(question, snapshot);
+    const seeding = seedCandidates(normalized, snapshot);
+    const ranking = rankCandidates(question, seeding.candidateMap, snapshot);
+    const grounding = expandGrounding(
+      question,
+      ranking.candidates,
+      ranking.initialNodeIds,
+      ranking.initialAnchorIds,
+      seeding.frontierCandidates,
+      seeding.frontierKeys,
+      snapshot,
+    );
 
-    expect(trace.sufficient).toBe(true);
-    expect(trace.selectedAnchorIds.length).toBeGreaterThan(0);
+    expect(grounding.sufficient).toBe(true);
+    expect(grounding.selectedAnchorIds.length).toBeGreaterThan(0);
   });
 });
 
 // ---------------------------------------------------------------------------
-// Stage 5: Answer projection stability
+// Stage 5: Answer projection  (buildPatternAnswer / buildRouteAnswer / confidenceFromTrace)
 // ---------------------------------------------------------------------------
 describe('Query / Projection stability stage', () => {
-  it('produces stable support set across repeated queries', async () => {
+  it('produces stable support set across repeated stage invocations', async () => {
     const snapshot = await getSnapshot();
-    const eng = engine(snapshot);
 
-    const trace1 = eng.trace('What is A.1.1?');
-    const trace2 = eng.trace('What is A.1.1?');
+    const trace1 = assembleTrace('What is A.1.1?', 'compact', snapshot);
+    const trace2 = assembleTrace('What is A.1.1?', 'compact', snapshot);
 
     expect(trace1.selectedNodeIds).toEqual(trace2.selectedNodeIds);
     expect(trace1.selectedAnchorIds).toEqual(trace2.selectedAnchorIds);
@@ -238,7 +355,8 @@ describe('Query / Projection stability stage', () => {
 
   it('projects a non-empty answer with citations for a known pattern query', async () => {
     const snapshot = await getSnapshot();
-    const result = await engine(snapshot).query('What is A.1.1?', 'verbose');
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+    const result = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false);
 
     expect(result.status).toBe('ok');
     expect(result.answer.length).toBeGreaterThan(0);
@@ -248,37 +366,61 @@ describe('Query / Projection stability stage', () => {
 
   it('projects constraints for verbose mode', async () => {
     const snapshot = await getSnapshot();
-    const result = await engine(snapshot).query('What is A.1.1?', 'verbose');
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+    const result = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false);
 
     expect(result.constraints.length).toBeGreaterThanOrEqual(1);
   });
 
   it('projects a grounding chain in proof mode', async () => {
     const snapshot = await getSnapshot();
-    const result = await engine(snapshot).query('What is A.1.1?', 'proof');
+    const trace = assembleTrace('What is A.1.1?', 'proof', snapshot);
+    const result = buildPatternAnswer('What is A.1.1?', 'proof', trace, snapshot, false);
 
     expect(result.groundingChain).toBeDefined();
     expect(result.groundingChain!.length).toBeGreaterThan(0);
   });
 
-  it('returns low-confidence status for completely unresolvable questions', async () => {
+  it('returns low confidence for completely unresolvable questions', async () => {
+    const snapshot = await getSnapshot();
+    const trace = assembleTrace('__FPFTEST_NONSENSE_999__', 'compact', snapshot);
+
+    expect(['not_found', 'ambiguous']).toContain(trace.status);
+    expect(confidenceFromTrace(trace)).toBeLessThan(0.7);
+  });
+
+  it('computes confidence via confidenceFromTrace without QueryEngine', async () => {
+    const snapshot = await getSnapshot();
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+
+    const confidence = confidenceFromTrace(trace);
+    expect(confidence).toBeGreaterThan(0.5);
+    expect(confidence).toBeLessThanOrEqual(1);
+  });
+
+  it('computes gaps via gapsFromTrace without QueryEngine', async () => {
     const snapshot = await getSnapshot();
-    const result = await engine(snapshot).query('__FPFTEST_NONSENSE_999__', 'compact');
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
 
-    // Weak index-description overlap may still produce ambiguous candidates,
-    // so the engine may return 'ambiguous' or 'not_found'.  The contract is
-    // that confidence stays below the high-confidence threshold.
-    expect(['not_found', 'ambiguous']).toContain(result.status);
-    expect(result.confidence).toBeLessThan(0.7);
+    const gaps = gapsFromTrace(trace);
+    expect(Array.isArray(gaps)).toBe(true);
   });
 });
 
 // ---------------------------------------------------------------------------
-// Stage 6: Synthesis isolation
+// Stage 6: Synthesis isolation  (synthesizeAnswer)
 // ---------------------------------------------------------------------------
 describe('Query / Synthesis isolation stage', () => {
   it('returns deterministic answer when synthesizer is unavailable', async () => {
     const snapshot = await getSnapshot();
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+    const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false);
+    const nodes = trace.selectedNodeIds
+      .map((nodeId) => snapshot.compiledNodes[nodeId])
+      .filter((node): node is CompiledNode => Boolean(node))
+      .slice(0, 8);
+    const slices = prepareSynthesisSlices(trace, snapshot);
+
     const unavailable: LocalAnswerSynthesizer = {
       isAvailable: async () => false,
       synthesize: async () => {
@@ -286,7 +428,9 @@ describe('Query / Synthesis isolation stage', () => {
       },
     };
 
-    const result = await engine(snapshot, unavailable).query('What is A.1.1?', 'verbose');
+    const result = await synthesizeAnswer(
+      'What is A.1.1?', 'verbose', trace, nodes, slices, deterministicResult, unavailable,
+    );
 
     expect(result.status).toBe('ok');
     expect(result.ids).toContain('A.1.1');
@@ -295,6 +439,14 @@ describe('Query / Synthesis isolation stage', () => {
 
   it('falls back to deterministic answer when synthesizer throws', async () => {
     const snapshot = await getSnapshot();
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+    const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false);
+    const nodes = trace.selectedNodeIds
+      .map((nodeId) => snapshot.compiledNodes[nodeId])
+      .filter((node): node is CompiledNode => Boolean(node))
+      .slice(0, 8);
+    const slices = prepareSynthesisSlices(trace, snapshot);
+
     const failing: LocalAnswerSynthesizer = {
       isAvailable: async () => true,
       synthesize: async () => {
@@ -302,7 +454,9 @@ describe('Query / Synthesis isolation stage', () => {
       },
     };
 
-    const result = await engine(snapshot, failing).query('What is A.1.1?', 'verbose');
+    const result = await synthesizeAnswer(
+      'What is A.1.1?', 'verbose', trace, nodes, slices, deterministicResult, failing,
+    );
 
     expect(result.status).toBe('ok');
     expect(result.ids).toContain('A.1.1');
@@ -311,8 +465,13 @@ describe('Query / Synthesis isolation stage', () => {
 
   it('does not alter deterministic IDs or citations when synthesis fails', async () => {
     const snapshot = await getSnapshot();
-    const eng = engine(snapshot);
-    const deterministicResult = await eng.query('What is A.1.1?', 'verbose');
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+    const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false);
+    const nodes = trace.selectedNodeIds
+      .map((nodeId) => snapshot.compiledNodes[nodeId])
+      .filter((node): node is CompiledNode => Boolean(node))
+      .slice(0, 8);
+    const slices = prepareSynthesisSlices(trace, snapshot);
 
     const failing: LocalAnswerSynthesizer = {
       isAvailable: async () => true,
@@ -320,7 +479,10 @@ describe('Query / Synthesis isolation stage', () => {
         throw new Error('test failure');
       },
     };
-    const failedSynthResult = await engine(snapshot, failing).query('What is A.1.1?', 'verbose');
+
+    const failedSynthResult = await synthesizeAnswer(
+      'What is A.1.1?', 'verbose', trace, nodes, slices, deterministicResult, failing,
+    );
 
     expect(failedSynthResult.ids).toEqual(deterministicResult.ids);
     expect(failedSynthResult.citations).toEqual(deterministicResult.citations);
@@ -329,6 +491,14 @@ describe('Query / Synthesis isolation stage', () => {
 
   it('does not call synthesize when synthesizer reports unavailable', async () => {
     const snapshot = await getSnapshot();
+    const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot);
+    const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false);
+    const nodes = trace.selectedNodeIds
+      .map((nodeId) => snapshot.compiledNodes[nodeId])
+      .filter((node): node is CompiledNode => Boolean(node))
+      .slice(0, 8);
+    const slices = prepareSynthesisSlices(trace, snapshot);
+
     let synthesizeCalled = false;
     const unavailable: LocalAnswerSynthesizer = {
       isAvailable: async () => false,
@@ -338,22 +508,23 @@ describe('Query / Synthesis isolation stage', () => {
       },
     };
 
-    await engine(snapshot, unavailable).query('What is A.1.1?', 'compact');
+    await synthesizeAnswer(
+      'What is A.1.1?', 'compact', trace, nodes, slices, deterministicResult, unavailable,
+    );
 
     expect(synthesizeCalled).toBe(false);
   });
 });
 
 // ---------------------------------------------------------------------------
-// Trace determinism (cross-cutting)
+// Trace determinism (cross-cutting — assembled from stages, not QueryEngine)
 // ---------------------------------------------------------------------------
 describe('Query / Trace determinism', () => {
-  it('same snapshot + same query → identical trace structure', async () => {
+  it('same snapshot + same question → identical assembled trace', async () => {
     const snapshot = await getSnapshot();
-    const eng = engine(snapshot);
 
-    const trace1 = eng.trace('How does bounded context relate to role assignment?', 'verbose');
-    const trace2 = eng.trace('How does bounded context relate to role assignment?', 'verbose');
+    const trace1 = assembleTrace('How does bounded context relate to role assignment?', 'verbose', snapshot);
+    const trace2 = assembleTrace('How does bounded context relate to role assignment?', 'verbose', snapshot);
 
     expect(JSON.stringify(trace1)).toBe(JSON.stringify(trace2));
   });