diff --git a/tests/compiler-contracts.test.ts b/tests/compiler-contracts.test.ts new file mode 100644 index 0000000..5ecfbb7 --- /dev/null +++ b/tests/compiler-contracts.test.ts @@ -0,0 +1,298 @@ +import { createHash } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import { resolve } from 'node:path'; + +import { describe, expect, it } from '@rstest/core'; + +import { compileFpfSource, type CompilerOutput } from '../src/runtime/compiler.js'; + +/** + * Stage-local contract tests for the compiler pipeline. + * + * Each test targets a specific compiler stage promise so that a failure + * pinpoints the broken stage rather than surfacing as a generic + * "end-to-end answer is wrong." + * + * Canonical fixture IDs: `A.1.1` is used as a stable spec anchor for + * metadata assertions. If the FPF spec renames or renumbers this + * pattern, update the ID here to match. + */ + +let cachedOutput: CompilerOutput | undefined; + +async function getCompilerOutput(): Promise { + if (cachedOutput) { + return cachedOutput; + } + const sourcePath = resolve(process.cwd(), 'FPF-spec.md'); + const sourceText = await readFile(sourcePath, 'utf8'); + const sourceHash = createHash('sha256').update(sourceText).digest('hex'); + cachedOutput = compileFpfSource({ + sourcePath, + sourceHash, + builtAt: '2025-01-01T00:00:00.000Z', + sourceText, + }); + return cachedOutput; +} + +/** Minimum thresholds — deliberately loose so spec edits don't break tests. */ +const MIN_SECTIONS = 100; +const MIN_PATTERNS = 50; +const MIN_LEXICON_ENTRIES = 5; + +// --------------------------------------------------------------------------- +// Stage 1: Parser resilience +// --------------------------------------------------------------------------- +describe('Compiler / Parser stage', () => { + it('parses a non-trivial number of sections, patterns, routes, and lexicon entries', async () => { + const { snapshot } = await getCompilerOutput(); + const { validation } = snapshot; + + expect(validation.parsedSections).toBeGreaterThan(MIN_SECTIONS); + expect(validation.parsedPatterns).toBeGreaterThan(MIN_PATTERNS); + expect(validation.parsedRoutes).toBeGreaterThan(0); + expect(validation.parsedLexiconEntries).toBeGreaterThan(MIN_LEXICON_ENTRIES); + }); + + it('assigns IDs to all compiled nodes and none are empty strings', async () => { + const { snapshot } = await getCompilerOutput(); + const nodeIds = Object.keys(snapshot.compiledNodes); + + expect(nodeIds.length).toBeGreaterThan(50); + for (const nodeId of nodeIds) { + expect(nodeId.length).toBeGreaterThan(0); + } + }); + + it('preserves pattern metadata fields (title, status, part)', async () => { + const { snapshot } = await getCompilerOutput(); + const pattern = snapshot.patternGraph.nodes['A.1.1']; + + expect(pattern).toBeDefined(); + expect(pattern!.title.length).toBeGreaterThan(0); + expect(pattern!.status.length).toBeGreaterThan(0); + expect(pattern!.sectionIds.length).toBeGreaterThan(0); + }); + + it('produces anchors with valid line ranges', async () => { + const { snapshot } = await getCompilerOutput(); + const anchors = Object.values(snapshot.anchorMap); + + expect(anchors.length).toBeGreaterThan(50); + for (const anchor of anchors.slice(0, 20)) { + expect(anchor.lineStart).toBeGreaterThanOrEqual(0); + expect(anchor.lineEnd).toBeGreaterThan(anchor.lineStart); + } + + const nonEmpty = anchors.filter((a) => a.text.length > 0); + expect(nonEmpty.length).toBeGreaterThan(anchors.length / 2); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 2: Graph closure +// --------------------------------------------------------------------------- +describe('Compiler / Graph closure stage', () => { + it('keeps unresolved references bounded and stable', async () => { + const { snapshot } = await getCompilerOutput(); + const { validation } = snapshot; + + // The FPF spec has a small number of forward/external references that + // don't resolve to compiled nodes. The contract is that this set stays + // bounded — a regression would show as a sudden spike. + expect(validation.unresolvedReferences.length).toBeLessThan(20); + }); + + it('tracks duplicate IDs produced by catalog + heading overlap', async () => { + const { snapshot } = await getCompilerOutput(); + const { validation } = snapshot; + + // "duplicateIds" lists pattern IDs that appear in both the catalog table + // and heading sections — this is expected for the FPF spec. The contract + // is that the count stays proportional to the number of patterns. + expect(validation.duplicateIds.length).toBeGreaterThan(0); + expect(validation.duplicateIds.length).toBeLessThan( + Object.keys(snapshot.patternGraph.nodes).length + 10, + ); + }); + + it('has no broken routes', async () => { + const { snapshot } = await getCompilerOutput(); + const { validation } = snapshot; + + expect(validation.brokenRoutes).toEqual([]); + }); + + it('contains outline relations linking parents to children', async () => { + const { snapshot } = await getCompilerOutput(); + const outlineChildren = snapshot.relationGraph.filter( + (edge) => edge.relation === 'outline_child', + ); + + expect(outlineChildren.length).toBeGreaterThan(10); + + const a15Children = outlineChildren.filter((edge) => edge.from === 'A.15'); + expect(a15Children.length).toBeGreaterThan(0); + }); + + it('contains explicit_reference relations extracted from source text', async () => { + const { snapshot } = await getCompilerOutput(); + const explicitRefs = snapshot.relationGraph.filter( + (edge) => edge.relation === 'explicit_reference', + ); + + expect(explicitRefs.length).toBeGreaterThan(0); + }); + + it('routes reference mostly existing compiled nodes', async () => { + const { snapshot } = await getCompilerOutput(); + const allNodeIds = new Set(Object.keys(snapshot.compiledNodes)); + + let total = 0; + let resolved = 0; + for (const route of Object.values(snapshot.routeGraph.nodes)) { + for (const id of [...route.orderedIds, ...route.optionalIds, ...route.landingIds]) { + total += 1; + if (allNodeIds.has(id)) { + resolved += 1; + } + } + } + + // At least 90% of route step IDs should resolve to compiled nodes. + expect(total).toBeGreaterThan(0); + expect(resolved / total).toBeGreaterThan(0.9); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 3: Index round-trip +// --------------------------------------------------------------------------- +describe('Compiler / Index round-trip stage', () => { + it('indexes resolve back to their source patterns', async () => { + const { snapshot } = await getCompilerOutput(); + const indexNodes = Object.values(snapshot.indexMap); + const patternIds = new Set(Object.keys(snapshot.patternGraph.nodes)); + + const linkedToPattern = indexNodes.filter( + (node) => node.metadata.patternId && patternIds.has(node.metadata.patternId), + ); + expect(linkedToPattern.length).toBeGreaterThan(20); + }); + + it('alias index entries resolve to existing compiled nodes', async () => { + const { snapshot } = await getCompilerOutput(); + const allNodeIds = new Set(Object.keys(snapshot.compiledNodes)); + + for (const [_alias, nodeIds] of Object.entries(snapshot.indexes.aliasIndex)) { + for (const nodeId of nodeIds) { + expect(allNodeIds.has(nodeId)).toBe(true); + } + } + }); + + it('lexicon entries have at least one linked node', async () => { + const { snapshot } = await getCompilerOutput(); + + for (const entry of Object.values(snapshot.lexicon)) { + expect(entry.linkedNodeIds.length).toBeGreaterThan(0); + } + }); + + it('status index entries resolve to existing compiled nodes', async () => { + const { snapshot } = await getCompilerOutput(); + const statusIndex = snapshot.indexes.statusIndex; + + expect(Object.keys(statusIndex).length).toBeGreaterThan(0); + + for (const [_status, nodeIds] of Object.entries(statusIndex)) { + for (const nodeId of nodeIds) { + expect(snapshot.compiledNodes[nodeId]).toBeDefined(); + } + } + }); + + it('route name index resolves to existing route nodes', async () => { + const { snapshot } = await getCompilerOutput(); + + for (const [_name, nodeIds] of Object.entries(snapshot.indexes.routeNameIndex)) { + for (const nodeId of nodeIds) { + expect(snapshot.routeGraph.nodes[nodeId]).toBeDefined(); + } + } + }); +}); + +// --------------------------------------------------------------------------- +// Stage 4: Validation coverage +// --------------------------------------------------------------------------- +describe('Compiler / Validation stage', () => { + it('keeps missing required fields bounded', async () => { + const { snapshot } = await getCompilerOutput(); + const { validation } = snapshot; + + // The FPF spec has a small number of patterns with incomplete metadata. + // The contract is that this stays bounded — a regression would spike it. + expect(validation.missingRequiredFields).toBeLessThan(25); + }); + + it('counts a plausible number of index map nodes', async () => { + const { snapshot } = await getCompilerOutput(); + const { validation } = snapshot; + + expect(validation.indexMapNodes).toBeGreaterThan(50); + expect(validation.indexMapNodes).toBe(Object.keys(snapshot.indexMap).length); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 5: Snapshot determinism +// --------------------------------------------------------------------------- +describe('Compiler / Snapshot determinism stage', () => { + it('produces byte-identical snapshot when compiled twice with the same input', async () => { + const sourcePath = resolve(process.cwd(), 'FPF-spec.md'); + const sourceText = await readFile(sourcePath, 'utf8'); + const sourceHash = createHash('sha256').update(sourceText).digest('hex'); + const builtAt = '2025-01-01T00:00:00.000Z'; + + const first = compileFpfSource({ sourcePath, sourceHash, builtAt, sourceText }); + const second = compileFpfSource({ sourcePath, sourceHash, builtAt, sourceText }); + + const firstJson = JSON.stringify(first.snapshot); + const secondJson = JSON.stringify(second.snapshot); + + expect(firstJson).toBe(secondJson); + }); + + it('produces structurally different output when source text changes', async () => { + const sourcePath = resolve(process.cwd(), 'FPF-spec.md'); + const sourceText = await readFile(sourcePath, 'utf8'); + const builtAt = '2025-01-01T00:00:00.000Z'; + + const hash1 = createHash('sha256').update(sourceText).digest('hex'); + // Append a new heading + body — the compiler must parse it as an + // additional section, which changes the structural output (not just + // the caller-provided hash). + const modifiedText = `${sourceText}\n\n## Z.99 Synthetic Test Section\n\nA synthetic section added to verify the compiler processes changed source text.\n`; + const hash2 = createHash('sha256').update(modifiedText).digest('hex'); + + const first = compileFpfSource({ sourcePath, sourceHash: hash1, builtAt, sourceText }); + const second = compileFpfSource({ + sourcePath, + sourceHash: hash2, + builtAt, + sourceText: modifiedText, + }); + + // Verify a structural difference — the added heading should produce at + // least one more parsed section or index-map node than the original. + const firstSections = first.snapshot.validation.parsedSections; + const secondSections = second.snapshot.validation.parsedSections; + + // The synthetic Z.99 heading is parsed as a section (not a pattern — + // the compiler only promotes headings that match spec-catalog entries). + // Verify the section count grew, proving the parser handled the new heading. + expect(secondSections).toBeGreaterThan(firstSections); + }); +}); diff --git a/tests/query-contracts.test.ts b/tests/query-contracts.test.ts new file mode 100644 index 0000000..174c646 --- /dev/null +++ b/tests/query-contracts.test.ts @@ -0,0 +1,531 @@ +import { createHash } from 'node:crypto'; +import { readFile } from 'node:fs/promises'; +import { resolve } from 'node:path'; + +import { describe, expect, it } from '@rstest/core'; + +import { compileFpfSource } from '../src/runtime/compiler.js'; +import { normalizeQuery } from '../src/runtime/query-normalizer.js'; +import { seedCandidates } from '../src/runtime/candidate-seeder.js'; +import { isAmbiguous, rankCandidates } from '../src/runtime/candidate-ranker.js'; +import { expandGrounding } from '../src/runtime/frontier-expander.js'; +import { + buildPatternAnswer, + confidenceFromTrace, + gapsFromTrace, + prepareSynthesisSlices, +} from '../src/runtime/answer-projector.js'; +import { synthesizeAnswer } from '../src/runtime/synthesis-adapter.js'; +import { MAX_EXCLUDED } from '../src/runtime/constants.js'; +import type { CompiledNode, LocalAnswerSynthesizer, Snapshot, TraceResult } from '../src/runtime/types.js'; + +/** + * Stage-local contract tests for the query pipeline. + * + * Each describe block targets a single stage function imported directly + * from its module so that a regression in one stage cannot masquerade + * as a failure in another. + * + * Canonical fixture IDs: `A.1.1`, `A.15`, `B.3` are used as stable spec + * anchors throughout these tests. If the FPF spec renames or renumbers + * these patterns, update the IDs here to match. + */ + +let cachedSnapshot: Snapshot | undefined; + +async function getSnapshot(): Promise { + if (cachedSnapshot) { + return cachedSnapshot; + } + const sourcePath = resolve(process.cwd(), 'FPF-spec.md'); + const sourceText = await readFile(sourcePath, 'utf8'); + const sourceHash = createHash('sha256').update(sourceText).digest('hex'); + const output = compileFpfSource({ + sourcePath, + sourceHash, + builtAt: '2025-01-01T00:00:00.000Z', + sourceText, + }); + cachedSnapshot = output.snapshot; + return cachedSnapshot; +} + +/** + * Assemble a TraceResult from stage outputs, mirroring QueryEngine.trace(). + * Used by projection and synthesis tests so they can feed stage outputs + * forward without routing through QueryEngine. + */ +function assembleTrace( + question: string, + mode: 'compact' | 'verbose' | 'proof', + snapshot: Snapshot, +): TraceResult { + const normalized = normalizeQuery(question, snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates(question, seeding.candidateMap, snapshot); + const grounding = expandGrounding( + question, + ranking.candidates, + ranking.initialNodeIds, + ranking.initialAnchorIds, + seeding.frontierCandidates, + seeding.frontierKeys, + snapshot, + ); + + const selectedNodeIds = grounding.selectedNodeIds; + const excludedNodeIds = ranking.candidates + .map((c) => c.nodeId) + .filter((nodeId) => !selectedNodeIds.includes(nodeId)) + .slice(0, MAX_EXCLUDED); + const status = + selectedNodeIds.length === 0 + ? 'not_found' + : ranking.routeWins + ? 'ok' + : isAmbiguous(question, ranking.candidates) + ? 'ambiguous' + : 'ok'; + + return { + mode, + question, + normalizedQuestion: normalized.normalizedQuestion, + detected: normalized.detected, + candidateScores: ranking.candidates.slice(0, 16), + frontierCandidates: seeding.frontierCandidates, + graphExpansions: grounding.graphExpansions, + selectedNodeIds, + selectedAnchorIds: grounding.selectedAnchorIds, + excludedNodeIds, + followedReferences: grounding.followedReferences, + retrievalHops: grounding.retrievalHops, + sessionApplied: seeding.sessionApplied, + sessionReusedNodeIds: [], + sessionMateriallyChanged: false, + sufficient: grounding.sufficient, + routeWins: ranking.routeWins, + status, + snapshot: { + sourceHash: snapshot.sourceHash, + builtAt: snapshot.builtAt, + rebuilt: false, + }, + }; +} + +// --------------------------------------------------------------------------- +// Stage 1: Normalizer (normalizeQuery) +// --------------------------------------------------------------------------- +describe('Query / Normalizer stage', () => { + it('detects explicit IDs in the question', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('What is A.1.1?', snapshot); + + expect(normalized.detected.ids).toContain('A.1.1'); + expect(normalized.normalizedQuestion.length).toBeGreaterThan(0); + }); + + it('detects route names when mentioned in the question', async () => { + const snapshot = await getSnapshot(); + const routeNames = Object.values(snapshot.routeGraph.nodes).map((r) => r.name); + + expect(routeNames.length).toBeGreaterThan(0); + const firstRoute = routeNames[0]!; + const normalized = normalizeQuery(`Tell me about the ${firstRoute} route`, snapshot); + expect(normalized.detected.routeNames).toContain(firstRoute); + }); + + it('detects status terms present in the status index', async () => { + const snapshot = await getSnapshot(); + + const knownTokens = ['draft', 'stable', 'stub', 'transitional']; + const matchedToken = knownTokens.find( + (t) => snapshot.indexes.statusIndex[t] !== undefined, + ); + + expect(matchedToken).toBeDefined(); + const normalized = normalizeQuery(`Show me ${matchedToken} patterns`, snapshot); + expect(normalized.detected.statusTerms).toContain(matchedToken); + }); + + it('returns empty signals for a nonsense question', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('__FPFTEST_NONSENSE_999__', snapshot); + + expect(normalized.detected.ids).toEqual([]); + expect(normalized.detected.routeNames).toEqual([]); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 2: Candidate seeder (seedCandidates) +// --------------------------------------------------------------------------- +describe('Query / Seed coverage stage', () => { + it('seeds exact-match candidates when explicit IDs are in the question', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('What is A.1.1?', snapshot); + const seeding = seedCandidates(normalized, snapshot); + + const exactCandidate = seeding.candidateMap.get('A.1.1'); + expect(exactCandidate).toBeDefined(); + expect(exactCandidate!.reasons).toContain('exact-id'); + expect(exactCandidate!.score).toBeGreaterThanOrEqual(100); + }); + + it('seeds lexical candidates for keyword-rich queries', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('How does bounded context relate to role assignment?', snapshot); + const seeding = seedCandidates(normalized, snapshot); + + const lexicalFrontier = seeding.frontierCandidates.filter((c) => c.origin === 'lexical'); + expect(lexicalFrontier.length).toBeGreaterThan(0); + }); + + it('seeds route expansion candidates for route-bearing queries', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery( + 'What is the first practical route when vocabulary is overloaded across teams?', + snapshot, + ); + const seeding = seedCandidates(normalized, snapshot); + + const routeFrontier = seeding.frontierCandidates.filter( + (c) => c.origin === 'route_expansion', + ); + expect(routeFrontier.length).toBeGreaterThan(0); + }); + + it('produces few or low-scoring candidates for a completely unrelated question', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('__FPFTEST_NONSENSE_999__', snapshot); + const seeding = seedCandidates(normalized, snapshot); + + const highScoring = Array.from(seeding.candidateMap.values()).filter((c) => c.score >= 100); + expect(highScoring.length).toBe(0); + expect(seeding.frontierCandidates.length).toBeLessThan( + Object.keys(snapshot.compiledNodes).length / 2, + ); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 3: Candidate ranker (rankCandidates) +// --------------------------------------------------------------------------- +describe('Query / Ranker stage', () => { + it('ranks exact-ID matches above lexical matches', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('What is A.1.1?', snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates('What is A.1.1?', seeding.candidateMap, snapshot); + + expect(ranking.candidates.length).toBeGreaterThan(0); + expect(ranking.candidates[0]!.nodeId).toBe('A.1.1'); + }); + + it('selects the expected initial node IDs for an explicit ID query', async () => { + const snapshot = await getSnapshot(); + const normalized = normalizeQuery('What is A.1.1?', snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates('What is A.1.1?', seeding.candidateMap, snapshot); + + expect(ranking.initialNodeIds).toContain('A.1.1'); + }); + + it('selects a route node when route intent is clear', async () => { + const snapshot = await getSnapshot(); + const question = 'What is the first practical route when vocabulary is overloaded across teams?'; + const normalized = normalizeQuery(question, snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates(question, seeding.candidateMap, snapshot); + + expect(ranking.routeWins).toBe(true); + const routeNodes = ranking.initialNodeIds.filter( + (id) => snapshot.compiledNodes[id]?.kind === 'route', + ); + expect(routeNodes.length).toBeGreaterThan(0); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 4: Frontier expansion (expandGrounding) +// --------------------------------------------------------------------------- +describe('Query / Frontier expansion stage', () => { + it('respects the MAX_HOPS budget (≤6 retrieval hops)', async () => { + const snapshot = await getSnapshot(); + const question = 'How do U.RoleAssignment, U.BoundedContext, and U.RoleStateGraph connect in a lawful workflow?'; + const normalized = normalizeQuery(question, snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates(question, seeding.candidateMap, snapshot); + const grounding = expandGrounding( + question, + ranking.candidates, + ranking.initialNodeIds, + ranking.initialAnchorIds, + seeding.frontierCandidates, + seeding.frontierKeys, + snapshot, + ); + + expect(grounding.retrievalHops.length).toBeLessThanOrEqual(6); + }); + + it('respects the MAX_SELECTED_ANCHORS budget (≤12 anchors)', async () => { + const snapshot = await getSnapshot(); + const question = 'What is A.1.1?'; + const normalized = normalizeQuery(question, snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates(question, seeding.candidateMap, snapshot); + const grounding = expandGrounding( + question, + ranking.candidates, + ranking.initialNodeIds, + ranking.initialAnchorIds, + seeding.frontierCandidates, + seeding.frontierKeys, + snapshot, + ); + + expect(grounding.selectedAnchorIds.length).toBeLessThanOrEqual(12); + }); + + it('records hop metadata (iteration, reason, added nodes/anchors)', async () => { + const snapshot = await getSnapshot(); + const question = 'How do A.1.1, A.15, and B.3 connect in a lawful workflow?'; + const normalized = normalizeQuery(question, snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates(question, seeding.candidateMap, snapshot); + const grounding = expandGrounding( + question, + ranking.candidates, + ranking.initialNodeIds, + ranking.initialAnchorIds, + seeding.frontierCandidates, + seeding.frontierKeys, + snapshot, + ); + + if (grounding.retrievalHops.length > 0) { + const firstHop = grounding.retrievalHops[0]!; + expect(firstHop.iteration).toBe(1); + expect(firstHop.reason.length).toBeGreaterThan(0); + expect(typeof firstHop.sufficientAfter).toBe('boolean'); + } else { + expect(grounding.sufficient).toBe(true); + } + }); + + it('marks sufficiency correctly — sufficient traces have anchors', async () => { + const snapshot = await getSnapshot(); + const question = 'What is A.1.1?'; + const normalized = normalizeQuery(question, snapshot); + const seeding = seedCandidates(normalized, snapshot); + const ranking = rankCandidates(question, seeding.candidateMap, snapshot); + const grounding = expandGrounding( + question, + ranking.candidates, + ranking.initialNodeIds, + ranking.initialAnchorIds, + seeding.frontierCandidates, + seeding.frontierKeys, + snapshot, + ); + + expect(grounding.sufficient).toBe(true); + expect(grounding.selectedAnchorIds.length).toBeGreaterThan(0); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 5: Answer projection (buildPatternAnswer / buildRouteAnswer / confidenceFromTrace) +// --------------------------------------------------------------------------- +describe('Query / Projection stability stage', () => { + it('produces stable support set across repeated stage invocations', async () => { + const snapshot = await getSnapshot(); + + const trace1 = assembleTrace('What is A.1.1?', 'compact', snapshot); + const trace2 = assembleTrace('What is A.1.1?', 'compact', snapshot); + + expect(trace1.selectedNodeIds).toEqual(trace2.selectedNodeIds); + expect(trace1.selectedAnchorIds).toEqual(trace2.selectedAnchorIds); + expect(trace1.candidateScores.map((c) => c.nodeId)).toEqual( + trace2.candidateScores.map((c) => c.nodeId), + ); + }); + + it('projects a non-empty answer with citations for a known pattern query', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + const result = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false); + + expect(result.status).toBe('ok'); + expect(result.answer.length).toBeGreaterThan(0); + expect(result.ids).toContain('A.1.1'); + expect(result.citations.length).toBeGreaterThan(0); + }); + + it('projects constraints for verbose mode', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + const result = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false); + + expect(result.constraints.length).toBeGreaterThanOrEqual(1); + }); + + it('projects a grounding chain in proof mode', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'proof', snapshot); + const result = buildPatternAnswer('What is A.1.1?', 'proof', trace, snapshot, false); + + expect(result.groundingChain).toBeDefined(); + expect(result.groundingChain!.length).toBeGreaterThan(0); + }); + + it('returns low confidence for completely unresolvable questions', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('__FPFTEST_NONSENSE_999__', 'compact', snapshot); + + expect(['not_found', 'ambiguous']).toContain(trace.status); + expect(confidenceFromTrace(trace)).toBeLessThan(0.7); + }); + + it('computes confidence via confidenceFromTrace without QueryEngine', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + + const confidence = confidenceFromTrace(trace); + expect(confidence).toBeGreaterThan(0.5); + expect(confidence).toBeLessThanOrEqual(1); + }); + + it('computes gaps via gapsFromTrace without QueryEngine', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + + const gaps = gapsFromTrace(trace); + expect(Array.isArray(gaps)).toBe(true); + }); +}); + +// --------------------------------------------------------------------------- +// Stage 6: Synthesis isolation (synthesizeAnswer) +// --------------------------------------------------------------------------- +describe('Query / Synthesis isolation stage', () => { + it('returns deterministic answer when synthesizer is unavailable', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false); + const nodes = trace.selectedNodeIds + .map((nodeId) => snapshot.compiledNodes[nodeId]) + .filter((node): node is CompiledNode => Boolean(node)) + .slice(0, 8); + const slices = prepareSynthesisSlices(trace, snapshot); + + const unavailable: LocalAnswerSynthesizer = { + isAvailable: async () => false, + synthesize: async () => { + throw new Error('should not be called'); + }, + }; + + const result = await synthesizeAnswer( + 'What is A.1.1?', 'verbose', trace, nodes, slices, deterministicResult, unavailable, + ); + + expect(result.status).toBe('ok'); + expect(result.ids).toContain('A.1.1'); + expect(result.answer.length).toBeGreaterThan(0); + }); + + it('falls back to deterministic answer when synthesizer throws', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false); + const nodes = trace.selectedNodeIds + .map((nodeId) => snapshot.compiledNodes[nodeId]) + .filter((node): node is CompiledNode => Boolean(node)) + .slice(0, 8); + const slices = prepareSynthesisSlices(trace, snapshot); + + const failing: LocalAnswerSynthesizer = { + isAvailable: async () => true, + synthesize: async () => { + throw new Error('synthesizer crashed'); + }, + }; + + const result = await synthesizeAnswer( + 'What is A.1.1?', 'verbose', trace, nodes, slices, deterministicResult, failing, + ); + + expect(result.status).toBe('ok'); + expect(result.ids).toContain('A.1.1'); + expect(result.gaps.some((gap) => gap.includes('synthesis skipped') || gap.includes('synthesizer crashed'))).toBe(true); + }); + + it('does not alter deterministic IDs or citations when synthesis fails', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false); + const nodes = trace.selectedNodeIds + .map((nodeId) => snapshot.compiledNodes[nodeId]) + .filter((node): node is CompiledNode => Boolean(node)) + .slice(0, 8); + const slices = prepareSynthesisSlices(trace, snapshot); + + const failing: LocalAnswerSynthesizer = { + isAvailable: async () => true, + synthesize: async () => { + throw new Error('test failure'); + }, + }; + + const failedSynthResult = await synthesizeAnswer( + 'What is A.1.1?', 'verbose', trace, nodes, slices, deterministicResult, failing, + ); + + expect(failedSynthResult.ids).toEqual(deterministicResult.ids); + expect(failedSynthResult.citations).toEqual(deterministicResult.citations); + expect(failedSynthResult.relations).toEqual(deterministicResult.relations); + }); + + it('does not call synthesize when synthesizer reports unavailable', async () => { + const snapshot = await getSnapshot(); + const trace = assembleTrace('What is A.1.1?', 'verbose', snapshot); + const deterministicResult = buildPatternAnswer('What is A.1.1?', 'verbose', trace, snapshot, false); + const nodes = trace.selectedNodeIds + .map((nodeId) => snapshot.compiledNodes[nodeId]) + .filter((node): node is CompiledNode => Boolean(node)) + .slice(0, 8); + const slices = prepareSynthesisSlices(trace, snapshot); + + let synthesizeCalled = false; + const unavailable: LocalAnswerSynthesizer = { + isAvailable: async () => false, + synthesize: async () => { + synthesizeCalled = true; + return {}; + }, + }; + + await synthesizeAnswer( + 'What is A.1.1?', 'compact', trace, nodes, slices, deterministicResult, unavailable, + ); + + expect(synthesizeCalled).toBe(false); + }); +}); + +// --------------------------------------------------------------------------- +// Trace determinism (cross-cutting — assembled from stages, not QueryEngine) +// --------------------------------------------------------------------------- +describe('Query / Trace determinism', () => { + it('same snapshot + same question → identical assembled trace', async () => { + const snapshot = await getSnapshot(); + + const trace1 = assembleTrace('How does bounded context relate to role assignment?', 'verbose', snapshot); + const trace2 = assembleTrace('How does bounded context relate to role assignment?', 'verbose', snapshot); + + expect(JSON.stringify(trace1)).toBe(JSON.stringify(trace2)); + }); +});