Skip to content
298 changes: 298 additions & 0 deletions tests/compiler-contracts.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,298 @@
import { createHash } from 'node:crypto';
import { readFile } from 'node:fs/promises';
import { resolve } from 'node:path';

import { describe, expect, it } from '@rstest/core';

import { compileFpfSource, type CompilerOutput } from '../src/runtime/compiler.js';

/**
* Stage-local contract tests for the compiler pipeline.
*
* Each test targets a specific compiler stage promise so that a failure
* pinpoints the broken stage rather than surfacing as a generic
* "end-to-end answer is wrong."
*
* Canonical fixture IDs: `A.1.1` is used as a stable spec anchor for
* metadata assertions. If the FPF spec renames or renumbers this
* pattern, update the ID here to match.
*/

let cachedOutput: CompilerOutput | undefined;

async function getCompilerOutput(): Promise<CompilerOutput> {
if (cachedOutput) {
return cachedOutput;
}
const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

Using process.cwd() to resolve the source file path is fragile as it depends on the directory from which the test runner is executed. This can lead to failures if tests are run from a different directory level (e.g., in a monorepo or a sub-package). It is safer to resolve paths relative to the test file's location.

const sourceText = await readFile(sourcePath, 'utf8');
const sourceHash = createHash('sha256').update(sourceText).digest('hex');
cachedOutput = compileFpfSource({
sourcePath,
sourceHash,
builtAt: '2025-01-01T00:00:00.000Z',
sourceText,
});
return cachedOutput;
}

/** Minimum thresholds — deliberately loose so spec edits don't break tests. */
const MIN_SECTIONS = 100;
const MIN_PATTERNS = 50;
const MIN_LEXICON_ENTRIES = 5;

// ---------------------------------------------------------------------------
// Stage 1: Parser resilience
// ---------------------------------------------------------------------------
describe('Compiler / Parser stage', () => {
it('parses a non-trivial number of sections, patterns, routes, and lexicon entries', async () => {
const { snapshot } = await getCompilerOutput();
const { validation } = snapshot;

expect(validation.parsedSections).toBeGreaterThan(MIN_SECTIONS);
expect(validation.parsedPatterns).toBeGreaterThan(MIN_PATTERNS);
expect(validation.parsedRoutes).toBeGreaterThan(0);
expect(validation.parsedLexiconEntries).toBeGreaterThan(MIN_LEXICON_ENTRIES);
});

it('assigns IDs to all compiled nodes and none are empty strings', async () => {
const { snapshot } = await getCompilerOutput();
const nodeIds = Object.keys(snapshot.compiledNodes);

expect(nodeIds.length).toBeGreaterThan(50);
for (const nodeId of nodeIds) {
expect(nodeId.length).toBeGreaterThan(0);
}
});

it('preserves pattern metadata fields (title, status, part)', async () => {
const { snapshot } = await getCompilerOutput();
const pattern = snapshot.patternGraph.nodes['A.1.1'];

expect(pattern).toBeDefined();
expect(pattern!.title.length).toBeGreaterThan(0);
expect(pattern!.status.length).toBeGreaterThan(0);
expect(pattern!.sectionIds.length).toBeGreaterThan(0);
});

it('produces anchors with valid line ranges', async () => {
const { snapshot } = await getCompilerOutput();
const anchors = Object.values(snapshot.anchorMap);

expect(anchors.length).toBeGreaterThan(50);
for (const anchor of anchors.slice(0, 20)) {
expect(anchor.lineStart).toBeGreaterThanOrEqual(0);
expect(anchor.lineEnd).toBeGreaterThan(anchor.lineStart);
}

const nonEmpty = anchors.filter((a) => a.text.length > 0);
expect(nonEmpty.length).toBeGreaterThan(anchors.length / 2);
});
});
Comment on lines +47 to +91
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

Parser resilience is not actually under test here.

These assertions only inspect the canonical FPF-spec.md snapshot. They never recompile whitespace-only, reordered, or prose-only variants, so a regression on semantically equivalent source edits would still pass this whole block.

Suggested direction
+  it('preserves semantic IR under whitespace-only rewrites', async () => {
+    const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
+    const sourceText = await readFile(sourcePath, 'utf8');
+    const rewritten = sourceText
+      .replace(/[ \t]+$/gm, '')
+      .replace(/\n{3,}/g, '\n\n');
+    const builtAt = '2025-01-01T00:00:00.000Z';
+
+    const base = compileFpfSource({
+      sourcePath,
+      sourceHash: createHash('sha256').update(sourceText).digest('hex'),
+      builtAt,
+      sourceText,
+    });
+    const variant = compileFpfSource({
+      sourcePath,
+      sourceHash: createHash('sha256').update(rewritten).digest('hex'),
+      builtAt,
+      sourceText: rewritten,
+    });
+
+    expect(Object.keys(variant.snapshot.patternGraph.nodes)).toEqual(
+      Object.keys(base.snapshot.patternGraph.nodes),
+    );
+    expect(Object.keys(variant.snapshot.routeGraph.nodes)).toEqual(
+      Object.keys(base.snapshot.routeGraph.nodes),
+    );
+  });
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/compiler-contracts.test.ts` around lines 38 - 82, The tests under
"Compiler / Parser stage" currently only validate the canonical snapshot
returned by getCompilerOutput(); add additional test cases that recompile
semantic-equivalent variants (whitespace-only, reordered sections, and
prose-only edits) and assert the same invariants
(parsedSections/parsedPatterns/parsedRoutes/parsedLexiconEntries counts,
compiledNodes IDs, pattern metadata for 'A.1.1', and valid anchors) hold for
each variant; implement helper functions to produce or load variant inputs, call
the same getCompilerOutput or a variant-taking compile function, and run the
existing assertions against each resulting snapshot to ensure parser resilience.


// ---------------------------------------------------------------------------
// Stage 2: Graph closure
// ---------------------------------------------------------------------------
describe('Compiler / Graph closure stage', () => {
it('keeps unresolved references bounded and stable', async () => {
const { snapshot } = await getCompilerOutput();
const { validation } = snapshot;

// The FPF spec has a small number of forward/external references that
// don't resolve to compiled nodes. The contract is that this set stays
// bounded — a regression would show as a sudden spike.
expect(validation.unresolvedReferences.length).toBeLessThan(20);
});

it('tracks duplicate IDs produced by catalog + heading overlap', async () => {
const { snapshot } = await getCompilerOutput();
const { validation } = snapshot;

// "duplicateIds" lists pattern IDs that appear in both the catalog table
// and heading sections — this is expected for the FPF spec. The contract
// is that the count stays proportional to the number of patterns.
expect(validation.duplicateIds.length).toBeGreaterThan(0);
expect(validation.duplicateIds.length).toBeLessThan(
Object.keys(snapshot.patternGraph.nodes).length + 10,
);
});

it('has no broken routes', async () => {
const { snapshot } = await getCompilerOutput();
const { validation } = snapshot;

expect(validation.brokenRoutes).toEqual([]);
});

it('contains outline relations linking parents to children', async () => {
const { snapshot } = await getCompilerOutput();
const outlineChildren = snapshot.relationGraph.filter(
(edge) => edge.relation === 'outline_child',
);

expect(outlineChildren.length).toBeGreaterThan(10);

const a15Children = outlineChildren.filter((edge) => edge.from === 'A.15');
expect(a15Children.length).toBeGreaterThan(0);
});

it('contains explicit_reference relations extracted from source text', async () => {
const { snapshot } = await getCompilerOutput();
const explicitRefs = snapshot.relationGraph.filter(
(edge) => edge.relation === 'explicit_reference',
);

expect(explicitRefs.length).toBeGreaterThan(0);
});

it('routes reference mostly existing compiled nodes', async () => {
const { snapshot } = await getCompilerOutput();
const allNodeIds = new Set(Object.keys(snapshot.compiledNodes));

let total = 0;
let resolved = 0;
for (const route of Object.values(snapshot.routeGraph.nodes)) {
for (const id of [...route.orderedIds, ...route.optionalIds, ...route.landingIds]) {
total += 1;
if (allNodeIds.has(id)) {
resolved += 1;
}
}
}

// At least 90% of route step IDs should resolve to compiled nodes.
expect(total).toBeGreaterThan(0);
expect(resolved / total).toBeGreaterThan(0.9);
Comment thread
devin-ai-integration[bot] marked this conversation as resolved.
});
});

// ---------------------------------------------------------------------------
// Stage 3: Index round-trip
// ---------------------------------------------------------------------------
describe('Compiler / Index round-trip stage', () => {
it('indexes resolve back to their source patterns', async () => {
const { snapshot } = await getCompilerOutput();
const indexNodes = Object.values(snapshot.indexMap);
const patternIds = new Set(Object.keys(snapshot.patternGraph.nodes));

const linkedToPattern = indexNodes.filter(
(node) => node.metadata.patternId && patternIds.has(node.metadata.patternId),
);
expect(linkedToPattern.length).toBeGreaterThan(20);
});

it('alias index entries resolve to existing compiled nodes', async () => {
const { snapshot } = await getCompilerOutput();
const allNodeIds = new Set(Object.keys(snapshot.compiledNodes));

for (const [_alias, nodeIds] of Object.entries(snapshot.indexes.aliasIndex)) {
for (const nodeId of nodeIds) {
expect(allNodeIds.has(nodeId)).toBe(true);
}
}
});

it('lexicon entries have at least one linked node', async () => {
const { snapshot } = await getCompilerOutput();

for (const entry of Object.values(snapshot.lexicon)) {
expect(entry.linkedNodeIds.length).toBeGreaterThan(0);
}
});

it('status index entries resolve to existing compiled nodes', async () => {
const { snapshot } = await getCompilerOutput();
const statusIndex = snapshot.indexes.statusIndex;

expect(Object.keys(statusIndex).length).toBeGreaterThan(0);

for (const [_status, nodeIds] of Object.entries(statusIndex)) {
for (const nodeId of nodeIds) {
expect(snapshot.compiledNodes[nodeId]).toBeDefined();
}
}
});

it('route name index resolves to existing route nodes', async () => {
const { snapshot } = await getCompilerOutput();

for (const [_name, nodeIds] of Object.entries(snapshot.indexes.routeNameIndex)) {
for (const nodeId of nodeIds) {
expect(snapshot.routeGraph.nodes[nodeId]).toBeDefined();
}
}
});
});

// ---------------------------------------------------------------------------
// Stage 4: Validation coverage
// ---------------------------------------------------------------------------
describe('Compiler / Validation stage', () => {
it('keeps missing required fields bounded', async () => {
const { snapshot } = await getCompilerOutput();
const { validation } = snapshot;

// The FPF spec has a small number of patterns with incomplete metadata.
// The contract is that this stays bounded — a regression would spike it.
expect(validation.missingRequiredFields).toBeLessThan(25);
});

it('counts a plausible number of index map nodes', async () => {
const { snapshot } = await getCompilerOutput();
const { validation } = snapshot;

expect(validation.indexMapNodes).toBeGreaterThan(50);
expect(validation.indexMapNodes).toBe(Object.keys(snapshot.indexMap).length);
});
Comment on lines +230 to +246
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟠 Major

The validator is never exercised with known-bad input.

Both checks run against the current production spec, so a regression that stops reporting missing fields or unresolved references can still go green here. This stage needs at least one deliberately malformed fixture and explicit findings assertions.

Suggested direction
+  it('reports specific findings for malformed input', () => {
+    const sourceText = `
+## A.1.1
+Status: draft
+
+References: Missing.Id
+`;
+    const output = compileFpfSource({
+      sourcePath: 'inline',
+      sourceHash: createHash('sha256').update(sourceText).digest('hex'),
+      builtAt: '2025-01-01T00:00:00.000Z',
+      sourceText,
+    });
+
+    expect(output.snapshot.validation.missingRequiredFields).toBeGreaterThan(0);
+    expect(output.snapshot.validation.unresolvedReferences.length).toBeGreaterThan(0);
+  });
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@tests/compiler-contracts.test.ts` around lines 220 - 236, The tests currently
only run against live production data (getCompilerOutput) so they won't detect
regressions that stop reporting issues; add a deterministic malformed fixture
and a unit test that feeds it through the same validation path and asserts
specific findings. Create a new test case (e.g., in the same suite 'Compiler /
Validation stage') that builds a small synthetic snapshot with intentionally
missing required fields and an unresolved reference, call the same validation
logic used by getCompilerOutput (reuse the functions that populate
snapshot.validation or invoke the validator directly), then assert exact
expected counts on validation.missingRequiredFields and
validation.unresolvedReferences (or indexMapNodes if applicable) to ensure the
validator flags the known-bad inputs. Ensure the test references
getCompilerOutput/validator entrypoint and the validation fields
missingRequiredFields and unresolvedReferences/indexMapNodes so future
regressions fail deterministically.

});

// ---------------------------------------------------------------------------
// Stage 5: Snapshot determinism
// ---------------------------------------------------------------------------
describe('Compiler / Snapshot determinism stage', () => {
it('produces byte-identical snapshot when compiled twice with the same input', async () => {
const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
const sourceText = await readFile(sourcePath, 'utf8');
const sourceHash = createHash('sha256').update(sourceText).digest('hex');
const builtAt = '2025-01-01T00:00:00.000Z';

const first = compileFpfSource({ sourcePath, sourceHash, builtAt, sourceText });
const second = compileFpfSource({ sourcePath, sourceHash, builtAt, sourceText });

const firstJson = JSON.stringify(first.snapshot);
const secondJson = JSON.stringify(second.snapshot);

expect(firstJson).toBe(secondJson);
});

it('produces structurally different output when source text changes', async () => {
const sourcePath = resolve(process.cwd(), 'FPF-spec.md');
const sourceText = await readFile(sourcePath, 'utf8');
const builtAt = '2025-01-01T00:00:00.000Z';

const hash1 = createHash('sha256').update(sourceText).digest('hex');
// Append a new heading + body — the compiler must parse it as an
// additional section, which changes the structural output (not just
// the caller-provided hash).
const modifiedText = `${sourceText}\n\n## Z.99 Synthetic Test Section\n\nA synthetic section added to verify the compiler processes changed source text.\n`;
const hash2 = createHash('sha256').update(modifiedText).digest('hex');

const first = compileFpfSource({ sourcePath, sourceHash: hash1, builtAt, sourceText });
const second = compileFpfSource({
sourcePath,
sourceHash: hash2,
builtAt,
sourceText: modifiedText,
});

// Verify a structural difference — the added heading should produce at
// least one more parsed section or index-map node than the original.
const firstSections = first.snapshot.validation.parsedSections;
const secondSections = second.snapshot.validation.parsedSections;

// The synthetic Z.99 heading is parsed as a section (not a pattern —
// the compiler only promotes headings that match spec-catalog entries).
// Verify the section count grew, proving the parser handled the new heading.
expect(secondSections).toBeGreaterThan(firstSections);
});
});
Loading
Loading