diff --git a/.gitignore b/.gitignore index aa8fb7b..c9ff8a7 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ skills-lock.json # Launch-day Hallmark vs Impeccable vs no-skills comparison — one-off artifact, # not referenced from the marketing site or README. Kept locally if useful. site/_launch-comparison/ + +# Eval audit snapshots (throwaway) +evals/.site-cache/ diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 0000000..f4d69fe --- /dev/null +++ b/evals/README.md @@ -0,0 +1,51 @@ +# `evals/` — anti-slop eval harness + +An eval-driven hillclimb that improved Hallmark against two external anchors: + +- **Impeccable's slop standard** — "37 patterns that mark an interface as + AI-generated" across 8 dimensions ([impeccable.style/slop](https://impeccable.style/slop)). +- **"Your Evals Will Break and You Won't See It Coming"** — why static evals + silently miss new failure regimes, and the case for self-evolving evals + ([wanglun1996.github.io](https://wanglun1996.github.io/blog/your-evals-will-break.html)). + +## What's here + +| File | Role | +|---|---| +| `rubric.md` | The scoring rubric: 8 detector dimensions + craft; v2 also folds in the cross-fixture structure order parameter. | +| `briefs.md` | The briefs each fixture is the skill exercised on. | +| `detector.mjs` | Deterministic slop detector — the CLI-checkable subset of the 37 patterns + Hallmark gates. v1 = 37 rules, v2 = 43. | +| `run.mjs` | Merges detector + judge sidecars, computes the cross-fixture **order parameter**, snapshots a cycle, rebuilds `results/history.md`. | +| `config.json` | Which fixtures belong to eval v1 vs v2. | +| `fixtures/*.html` | Self-contained pages (what Hallmark emits). | +| `fixtures/*.judge.json` | Per-fixture craft scores (philosophy, hierarchy, execution, specificity, restraint, variety, honesty). | +| `results/` | One JSON snapshot per cycle + the running `history.md` table. | + +## Run it + +```bash +node evals/check.mjs # non-mutating regression check +cd evals +node check.mjs --eval v2 --min-score 98 # stricter local check, no writes +node detector.mjs fixtures/pulse.html --eval v2 # inspect one page +node run.mjs --cycle 10 --eval v2 --label "..." # score a cycle, update history +``` + +## The hillclimb (10 cycles) + +**Phase 1 (v1, cycles 1–5)** drove the three originals from 74.2 → 98.3 by +closing gaps the detector found — each cycle added a real gate to +`references/slop-test.md` (gates **70–77**) and brought the fixtures into line. + +**The break (cycle 6)** upgraded the eval to **v2**: six new detector rules +for failure modes v1 was blind to (notably hero-float / gate 54, which the +v1-perfect fixtures had been violating the whole time), a cross-fixture +**order parameter** (macrostructure reuse — variety is a property of the +*set*, not the page), and two adversarial fixtures (`pulse`, `vellum`). Score +fell 98.3 → 76.4, exactly as the blog predicts. + +**Phase 2 (v2, cycles 7–10)** climbed back to 98.7, adding gates **78–84** +and resisting `pulse`'s dark/neon/metric-hero brief gravity. + +The skill is the artifact that improved: 15 new gates, motivated by what the +eval could measure. See `results/history.md` for the full score table. diff --git a/evals/audit-site.mjs b/evals/audit-site.mjs new file mode 100644 index 0000000..de3633a --- /dev/null +++ b/evals/audit-site.mjs @@ -0,0 +1,79 @@ +// Audit real, in-repo Hallmark output with the detector. +// +// The detector reads inline CSS only; the shipped pages link external +// stylesheets. This adapter inlines local files into +// a self-contained snapshot, then scores it under both eval versions so we can +// see what the current skill's gates (v2) catch that the initial skill's +// gates (v1) did not — on artifacts the eval author did not write. +// +// Usage: node audit-site.mjs [ ...] + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { analyze } from './detector.mjs'; + +const HERE = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.resolve(HERE, '..'); +const CACHE = path.join(HERE, '.site-cache'); +fs.mkdirSync(CACHE, { recursive: true }); + +function readCssWithImports(cssPath, seen = new Set()) { + const abs = path.resolve(cssPath); + if (seen.has(abs) || !fs.existsSync(abs)) return ''; + seen.add(abs); + + const dir = path.dirname(abs); + let css = fs.readFileSync(abs, 'utf8'); + css = css.replace(/@import\s+(?:url\()?['"]([^'")]+)['"]\)?\s*;/gi, (full, href) => { + if (/^https?:|^\/\//i.test(href)) return full; // keep remote imports as-is + const importPath = path.resolve(dir, href.split(/[?#]/)[0]); + return `/* ${href} */\n${readCssWithImports(importPath, seen)}`; + }); + return css; +} + +function inlinePage(htmlPath) { + const abs = path.resolve(ROOT, htmlPath); + const dir = path.dirname(abs); + let html = fs.readFileSync(abs, 'utf8'); + const links = [...html.matchAll(/]*rel=["']stylesheet["'][^>]*>/gi)].map((m) => m[0]); + const blocks = []; + for (const link of links) { + const href = (link.match(/href=["']([^"']+)["']/i) || [])[1]; + if (!href || /^https?:|^\/\//i.test(href)) continue; // skip remote (e.g. Google Fonts) + const cssPath = path.resolve(dir, href.split(/[?#]/)[0]); + if (fs.existsSync(cssPath)) blocks.push(`/* ${href} */\n${readCssWithImports(cssPath)}`); + } + if (blocks.length) { + const styleTag = `\n\n`; + html = html.replace(/<\/head>/i, `${styleTag}`); + } + const out = path.join(CACHE, htmlPath.replace(/[\/]/g, '__')); + fs.writeFileSync(out, html); + return out; +} + +const pages = process.argv.slice(2).filter((a) => !a.startsWith('--')); +const rows = []; +for (const p of pages) { + let snap; + try { snap = inlinePage(p); } catch (e) { console.error(`skip ${p}: ${e.message}`); continue; } + const v1 = analyze(snap, 'v1'); + const v2 = analyze(snap, 'v2'); + const v2fails = Object.values(v2.dims).flatMap((d) => d.rules).filter((r) => !r.pass); + rows.push({ page: p, v1: v1.overall, v2: v2.overall, fails: v2fails, multiTheme: v2.multiTheme, themeCount: v2.themeCount }); +} + +const name = (p) => p.replace(/^site\//, '').replace(/\/index\.html$/, '/').replace(/index\.html$/, ''); +console.log('\nReal Hallmark corpus — detector audit (overall /5)\n'); +console.log(`${'page'.padEnd(34)} ${'v1'.padStart(6)} ${'v2'.padStart(6)} v2 findings`); +console.log('-'.repeat(72)); +for (const r of rows) { + const f = r.fails.length ? r.fails.map((x) => x.id.replace(/^v2-/, '')).join(', ') : '—'; + const tag = r.multiTheme ? ` [multi-theme:${r.themeCount}, low-confidence]` : ''; + console.log(`${name(r.page).padEnd(34)} ${r.v1.toFixed(2).padStart(6)} ${r.v2.toFixed(2).padStart(6)} ${f}${tag}`); +} +const avg = (k) => (rows.reduce((a, r) => a + r[k], 0) / rows.length).toFixed(2); +console.log('-'.repeat(72)); +console.log(`${'CORPUS MEAN'.padEnd(34)} ${avg('v1').padStart(6)} ${avg('v2').padStart(6)}`); diff --git a/evals/briefs.md b/evals/briefs.md new file mode 100644 index 0000000..991f184 --- /dev/null +++ b/evals/briefs.md @@ -0,0 +1,26 @@ +# Eval briefs + +Each fixture is the skill exercised on one brief. Briefs span genres so the +detector isn't fooled by a single safe house style. Fixtures live in +`fixtures/` as self-contained HTML (exactly what Hallmark emits). + +## v1 briefs + +- **ledger** — landing page for *Ledger*, an open-source double-entry + bookkeeping CLI for indie developers. Genre: modern-minimal. + Macrostructure target: stat-led / workbench (no rote hero→3-features→CTA). +- **fernweh** — homepage for *Fernweh*, a small-group slow-travel company + running 8-day walking trips. Genre: atmospheric / editorial. + Macrostructure target: photographic or narrative-workflow. +- **kiln** — studio page for *Kiln & Co.*, a two-person ceramics workshop + selling a seasonal run of stoneware. Genre: editorial / specimen-adjacent + but must NOT default to Specimen. + +## v2 briefs (added when v1 saturates) + +- **synthwave-trap** — adversarial: a brief for *Pulse*, a "developer + analytics dashboard," whose own copy nudges toward dark-mode + neon + + metric-hero slop. The skill must resist the brief's gravity. +- **vellum** — a long-form essay page for *Vellum*, a writing tool. Probes + reading-comfort tells v1 underweights (measure rhythm, widows, heading + cadence, real prose hierarchy). diff --git a/evals/check.mjs b/evals/check.mjs new file mode 100644 index 0000000..68995e9 --- /dev/null +++ b/evals/check.mjs @@ -0,0 +1,80 @@ +// Non-mutating CI/local check for the Hallmark eval harness. +// +// Scores the configured fixtures through the same path as run.mjs, but never +// writes result snapshots, history files, or audit caches. Fails if detector +// rules regress, if the v2 structure order parameter regresses, or if the +// aggregate score falls below the configured threshold. +// +// Usage: +// node evals/check.mjs # check every eval version, min score 95 +// node evals/check.mjs --eval v2 # check one eval version +// node evals/check.mjs --min-score 98 # tighten the score floor +// node evals/check.mjs --json # machine-readable summary + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { detectorFailures, evaluateCycle } from './core.mjs'; + +const HERE = path.dirname(fileURLToPath(import.meta.url)); + +function arg(name, def) { + const i = process.argv.indexOf(`--${name}`); + return i >= 0 ? process.argv[i + 1] : def; +} + +function evalVersions() { + const requested = arg('eval', 'all'); + if (requested !== 'all') return [requested]; + const config = JSON.parse(fs.readFileSync(path.join(HERE, 'config.json'), 'utf8')); + return Object.keys(config.evals); +} + +const minScore = Number(arg('min-score', '95')); +const asJson = process.argv.includes('--json'); +const rows = []; +let failed = false; + +for (const evalVersion of evalVersions()) { + const { snapshot, perFixture, structureScore } = evaluateCycle({ evalVersion, label: 'non-mutating check', timestamp: 'check' }); + const failures = detectorFailures(perFixture); + const structureFailure = evalVersion === 'v2' && structureScore < 5; + const scoreFailure = snapshot.cycleScore < minScore; + + rows.push({ + evalVersion, + score: snapshot.cycleScore, + minScore, + ruleCount: snapshot.ruleCount, + fixtureCount: snapshot.fixtureCount, + structureScore: evalVersion === 'v2' ? structureScore : undefined, + detectorFailureCount: failures.length, + failures, + passed: failures.length === 0 && !structureFailure && !scoreFailure, + }); + + if (failures.length || structureFailure || scoreFailure) failed = true; +} + +if (asJson) { + console.log(JSON.stringify({ passed: !failed, checks: rows }, null, 2)); +} else { + console.log('\nHallmark eval check (non-mutating)\n'); + for (const row of rows) { + const structure = row.structureScore == null ? '' : ` structure ${row.structureScore.toFixed(2)}/5`; + const status = row.passed ? 'PASS' : 'FAIL'; + console.log(`${status} ${row.evalVersion}: ${row.score.toFixed(1)}/100 rules ${row.ruleCount} fixtures ${row.fixtureCount}${structure}`); + + if (row.score < row.minScore) { + console.log(` ✗ score below floor: ${row.score.toFixed(1)} < ${row.minScore}`); + } + if (row.structureScore != null && row.structureScore < 5) { + console.log(` ✗ structure order parameter below 5/5: ${row.structureScore.toFixed(2)}`); + } + for (const f of row.failures) { + console.log(` ✗ ${f.file} [${f.dim}] ${f.id} — ${f.note}`); + } + } +} + +process.exitCode = failed ? 1 : 0; diff --git a/evals/config.json b/evals/config.json new file mode 100644 index 0000000..e075925 --- /dev/null +++ b/evals/config.json @@ -0,0 +1,20 @@ +{ + "evals": { + "v1": { + "fixtures": [ + { "name": "ledger", "file": "fixtures/ledger.html", "judge": "fixtures/ledger.judge.json" }, + { "name": "fernweh", "file": "fixtures/fernweh.html", "judge": "fixtures/fernweh.judge.json" }, + { "name": "kiln", "file": "fixtures/kiln.html", "judge": "fixtures/kiln.judge.json" } + ] + }, + "v2": { + "fixtures": [ + { "name": "ledger", "file": "fixtures/ledger.html", "judge": "fixtures/ledger.judge.json" }, + { "name": "fernweh", "file": "fixtures/fernweh.html", "judge": "fixtures/fernweh.judge.json" }, + { "name": "kiln", "file": "fixtures/kiln.html", "judge": "fixtures/kiln.judge.json" }, + { "name": "pulse", "file": "fixtures/pulse.html", "judge": "fixtures/pulse.judge.json" }, + { "name": "vellum", "file": "fixtures/vellum.html", "judge": "fixtures/vellum.judge.json" } + ] + } + } +} diff --git a/evals/core.mjs b/evals/core.mjs new file mode 100644 index 0000000..f3e7d2d --- /dev/null +++ b/evals/core.mjs @@ -0,0 +1,144 @@ +// Shared scoring primitives for the Hallmark eval harness. +// +// `run.mjs` uses these helpers to write result snapshots. `check.mjs` uses the +// same path to validate the current fixtures without mutating the working tree. + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { analyze } from './detector.mjs'; + +const HERE = path.dirname(fileURLToPath(import.meta.url)); +const RESULTS = path.join(HERE, 'results'); +const CRAFT_AXES = ['philosophy', 'hierarchy', 'execution', 'specificity', 'restraint', 'variety', 'honesty']; +const DIM_ORDER = ['visual', 'typography', 'color', 'layout', 'motion', 'interaction', 'responsive', 'general', 'craft', 'structure']; + +function readJson(file) { + return JSON.parse(fs.readFileSync(file, 'utf8')); +} + +function macrostructureOf(file) { + const src = fs.readFileSync(path.join(HERE, file), 'utf8'); + return (src.match(/macrostructure:\s*([a-z0-9-]+)/i) || [])[1] || 'unstamped'; +} + +function structureScoreFor(fixtures) { + const macros = fixtures.map((fx) => macrostructureOf(fx.file)); + const counts = macros.reduce((m, k) => ((m[k] = (m[k] || 0) + 1), m), {}); + const collisions = Object.values(counts).reduce((a, n) => a + (n - 1), 0); + const unstamped = macros.filter((k) => k === 'unstamped').length; + return +Math.max(0, 5 - 2.5 * collisions - 2.5 * unstamped).toFixed(3); +} + +function loadConfig() { + return readJson(path.join(HERE, 'config.json')); +} + +function evaluateCycle({ cycle = 0, evalVersion = 'v1', label = '', timestamp = new Date().toISOString() } = {}) { + const config = loadConfig(); + const evalConfig = config.evals[evalVersion]; + if (!evalConfig) throw new Error(`Unknown eval version: ${evalVersion}`); + + const fixtures = evalConfig.fixtures; + const structureScore = structureScoreFor(fixtures); + const perFixture = []; + + for (const fx of fixtures) { + const det = analyze(path.join(HERE, fx.file), evalVersion); + const judge = readJson(path.join(HERE, fx.judge)); + const craftVals = CRAFT_AXES.map((a) => judge[a]); + const craft = +(craftVals.reduce((a, b) => a + b, 0) / craftVals.length).toFixed(3); + + const dimScores = { ...Object.fromEntries(Object.entries(det.dims).map(([k, v]) => [k, v.score])), craft }; + // The order parameter is a property of the whole eval set; v2 folds it in. + if (evalVersion === 'v2') dimScores.structure = structureScore; + const overall5 = +(Object.values(dimScores).reduce((a, b) => a + b, 0) / Object.values(dimScores).length).toFixed(3); + + perFixture.push({ + name: fx.name, + file: fx.file, + macrostructure: macrostructureOf(fx.file), + detector: det, + judge, + dimScores, + score100: +(overall5 * 20).toFixed(1), + }); + } + + const allDims = [...new Set(perFixture.flatMap((f) => Object.keys(f.dimScores)))]; + const aggDims = {}; + for (const d of allDims) { + const vals = perFixture.map((f) => f.dimScores[d]).filter((v) => v != null); + aggDims[d] = +(vals.reduce((a, b) => a + b, 0) / vals.length).toFixed(3); + } + const cycleScore = +(perFixture.reduce((a, f) => a + f.score100, 0) / perFixture.length).toFixed(1); + + const snapshot = { + cycle, evalVersion, label, + ruleCount: perFixture[0]?.detector.ruleCount ?? 0, + fixtureCount: perFixture.length, + cycleScore, + aggDims, + fixtures: perFixture.map((f) => ({ name: f.name, score100: f.score100, dimScores: f.dimScores })), + timestamp, + }; + + return { snapshot, perFixture, structureScore }; +} + +function writeSnapshot(snapshot) { + fs.mkdirSync(RESULTS, { recursive: true }); + const tag = `${String(snapshot.cycle).padStart(2, '0')}-${snapshot.evalVersion}`; + fs.writeFileSync(path.join(RESULTS, `cycle-${tag}.json`), JSON.stringify(snapshot, null, 2)); +} + +function readSnapshots() { + if (!fs.existsSync(RESULTS)) return []; + return fs.readdirSync(RESULTS) + .filter((f) => /^cycle-.*\.json$/.test(f)) + .map((f) => readJson(path.join(RESULTS, f))) + .sort((a, b) => (a.cycle - b.cycle) || a.evalVersion.localeCompare(b.evalVersion)); +} + +function buildHistoryMarkdown(snaps) { + let md = '# Eval history — Hallmark anti-slop hillclimb\n\n'; + md += 'Score = mean of detector dimensions plus craft, × 20 (0–100). v2 also\n'; + md += 'folds in the cross-fixture `structure` order parameter. Dimensions 1–8\n'; + md += 'are the deterministic Impeccable detector; `craft` is the LLM-judge mean\n'; + md += "of Hallmark's six axes + honesty.\n\n"; + md += '| Cycle | Eval | Rules | Score | ' + DIM_ORDER.map((d) => d.slice(0, 5)).join(' | ') + ' | Change |\n'; + md += '|---|---|---|---|' + DIM_ORDER.map(() => '---').join('|') + '|---|\n'; + let prev = null; + for (const s of snaps) { + const delta = prev == null ? '—' : (s.cycleScore - prev >= 0 ? `+${(s.cycleScore - prev).toFixed(1)}` : (s.cycleScore - prev).toFixed(1)); + md += `| ${s.cycle} | ${s.evalVersion} | ${s.ruleCount} | **${s.cycleScore.toFixed(1)}** | ` + + DIM_ORDER.map((d) => (s.aggDims[d] != null ? s.aggDims[d].toFixed(2) : '—')).join(' | ') + + ` | ${delta} |\n`; + prev = s.cycleScore; + } + md += '\n## Notes per cycle\n\n'; + for (const s of snaps) md += `- **Cycle ${s.cycle} (${s.evalVersion})** — ${s.label || '—'}\n`; + return md; +} + +function rebuildHistory() { + fs.mkdirSync(RESULTS, { recursive: true }); + fs.writeFileSync(path.join(RESULTS, 'history.md'), buildHistoryMarkdown(readSnapshots())); +} + +function detectorFailures(perFixture) { + return perFixture.flatMap((fixture) => Object.entries(fixture.detector.dims).flatMap(([dim, d]) => ( + d.rules.filter((r) => !r.pass).map((rule) => ({ fixture: fixture.name, file: fixture.file, dim, ...rule })) + ))); +} + +export { + CRAFT_AXES, + DIM_ORDER, + evaluateCycle, + writeSnapshot, + readSnapshots, + buildHistoryMarkdown, + rebuildHistory, + detectorFailures, +}; diff --git a/evals/detector.mjs b/evals/detector.mjs new file mode 100644 index 0000000..42f1baf --- /dev/null +++ b/evals/detector.mjs @@ -0,0 +1,680 @@ +// Hallmark slop detector — deterministic anti-slop checks for self-contained HTML. +// +// Grounds the eval in two external standards: +// 1. Impeccable's "37 patterns that mark an interface as AI-generated" +// across 8 dimensions (impeccable.style/slop). +// 2. Hallmark's own slop-test gates (references/slop-test.md). +// +// Only the deterministic (CLI-checkable) subset lives here. Taste dimensions +// (philosophy, hierarchy, specificity, restraint, variety, honesty) are scored +// by an LLM judge and merged by run.mjs. +// +// Usage: node detector.mjs [--json] + +import fs from 'node:fs'; + +const FONT_OVERUSED = [ + 'inter', 'roboto', 'open sans', 'poppins', 'lato', 'montserrat', + 'plus jakarta sans', 'space grotesk', 'geist', 'nunito', 'raleway', +]; +const GENERIC_FAMILIES = new Set([ + 'sans-serif', 'serif', 'monospace', 'system-ui', 'ui-monospace', + 'ui-serif', 'ui-sans-serif', 'cursive', 'fantasy', 'emoji', 'math', + '-apple-system', 'blinkmacsystemfont', 'segoe ui', 'inherit', 'initial', +]); + +// ---------------------------------------------------------------- doc loading +function loadDoc(path) { + const html = fs.readFileSync(path, 'utf8'); + const styleCss = [...html.matchAll(/]*>([\s\S]*?)<\/style>/gi)] + .map((m) => m[1]).join('\n'); + const inlineCss = [...html.matchAll(/\sstyle="([^"]*)"/gi)] + .map((m) => `__inline__{${m[1]}}`).join('\n'); + const css = `${styleCss}\n${inlineCss}`; + const stamp = (css.match(/\/\*\s*Hallmark[\s\S]*?\*\//) || [''])[0]; + const genre = + (stamp.match(/genre:\s*([a-z-]+)/i) || [])[1] || + (html.match(/data-genre="([^"]+)"/) || [])[1] || ''; + return { path, html, css, styleCss, stamp, genre }; +} + +// crude flat-rule splitter; @media wrappers drop out but inner rules survive. +function cssRules(css) { + const out = []; + const re = /([^{}]+)\{([^{}]*)\}/g; + let m; + while ((m = re.exec(css))) { + out.push({ sel: m[1].trim().toLowerCase(), body: m[2].trim() }); + } + return out; +} + +function tokenMap(css, activeTheme) { + const map = {}; + for (const r of cssRules(css)) { + const isRoot = /:root/.test(r.sel); + const themeM = r.sel.match(/\[data-theme(?:[~^$|*]?=)?["']?([a-z0-9-]+)?["']?\]/i); + if (!isRoot && !themeM) continue; + // when the page declares an active theme, ignore other themes' token blocks + // so a 22-theme design-system stylesheet isn't scored as one page + if (themeM && themeM[1] && activeTheme && themeM[1].toLowerCase() !== activeTheme.toLowerCase()) continue; + for (const m of r.body.matchAll(/(--[a-z0-9-]+)\s*:\s*([^;]+)/gi)) { + map[m[1].trim()] = m[2].trim(); + } + } + return map; +} + +function resolveVar(value, map, depth = 0) { + if (depth > 8 || !value) return value; + return value.replace(/var\(\s*(--[a-z0-9-]+)\s*(?:,([^)]*))?\)/gi, (_, name, fb) => { + const v = map[name.trim()]; + if (v != null) return resolveVar(v, map, depth + 1); + return fb != null ? resolveVar(fb.trim(), map, depth + 1) : ''; + }); +} + +// oklch lightness 0..1 (handles "oklch(.3 ...)" and "oklch(32% ...)") +function oklchL(value) { + const m = String(value).match(/oklch\(\s*([0-9.]+%?)/i); + if (!m) return null; + const raw = m[1]; + return raw.endsWith('%') ? parseFloat(raw) / 100 : parseFloat(raw); +} +function oklchC(value) { + const m = String(value).match(/oklch\(\s*[0-9.]+%?\s+([0-9.]+)/i); + return m ? parseFloat(m[1]) : null; +} +function oklchH(value) { + const m = String(value).match(/oklch\(\s*[0-9.]+%?\s+[0-9.]+\s+([0-9.]+)/i); + return m ? parseFloat(m[1]) : null; +} + +const COLOR_LITERAL = /#[0-9a-fA-F]{3,8}\b|\brgba?\([^)]*\)|\bhsla?\([^)]*\)|\boklch\([^)]*\)|\blab\([^)]*\)/gi; + +// Count families that are actually *applied*. Per gate 39, a monospace face +// counts toward the family budget only when used outside code contexts — +// counting an unused --font-mono token, or mono inside
/, is the
+// false positive that lit up dev-tool pages.
+function fontFamilies(rules, map) {
+  const fams = new Set();
+  for (const r of rules) {
+    if (/:root|\[data-theme/.test(r.sel)) continue;
+    const m = r.body.match(/font-family\s*:\s*([^;}]+)/i);
+    if (!m) continue;
+    const resolved = resolveVar(m[1], map);
+    const first = resolved.split(',')[0].trim().replace(/['"]/g, '').toLowerCase();
+    if (!first || GENERIC_FAMILIES.has(first) || first.startsWith('var(')) continue;
+    const mono = /mono/.test(first) || /\bmonospace\b/.test(resolved.toLowerCase());
+    const codeSel = /\b(pre|code|kbd|samp)\b/.test(r.sel);
+    if (mono && codeSel) continue;
+    fams.add(first);
+  }
+  return [...fams];
+}
+
+function headingLevels(html) {
+  return [...html.matchAll(/]/gi)].map((m) => +m[1]);
+}
+
+// Balanced extraction of @media (...max-width...) block bodies. Regex alone
+// trips over nested rule braces and indented closers, so count braces.
+function maxWidthMediaBodies(css) {
+  const bodies = [];
+  const re = /@media[^{]*max-width[^{]*\{/gi;
+  let m;
+  while ((m = re.exec(css))) {
+    let depth = 1;
+    let i = m.index + m[0].length;
+    const start = i;
+    for (; i < css.length && depth > 0; i++) {
+      if (css[i] === '{') depth++;
+      else if (css[i] === '}') depth--;
+    }
+    bodies.push(css.slice(start, i - 1));
+  }
+  return bodies;
+}
+
+// ---------------------------------------------------------------- rule set v1
+// Each rule: { id, dim, label, fn(ctx) -> {pass:boolean, note:string} }
+const RULES = [
+  // ---- TYPOGRAPHY -------------------------------------------------------
+  {
+    id: 'type-overused-font', dim: 'typography',
+    label: 'Display/body face is an overused AI default (Inter, Roboto, Geist…)',
+    fn: ({ fams }) => {
+      const hit = fams.filter((f) => FONT_OVERUSED.includes(f));
+      return { pass: hit.length === 0, note: hit.length ? `uses ${hit.join(', ')}` : 'distinctive faces' };
+    },
+  },
+  {
+    id: 'type-single-font', dim: 'typography',
+    label: 'Single font family across the whole page',
+    fn: ({ fams }) => ({ pass: fams.length !== 1, note: `${fams.length} distinct families` }),
+  },
+  {
+    id: 'type-too-many-fonts', dim: 'typography',
+    label: 'More than three distinct font families (gate 39)',
+    fn: ({ fams }) => ({ pass: fams.length <= 3, note: `${fams.length} families: ${fams.join(', ') || 'none'}` }),
+  },
+  {
+    id: 'type-allcaps-body', dim: 'typography',
+    label: 'All-caps applied to body/paragraph text',
+    fn: ({ rules }) => {
+      const bad = rules.find((r) => /(^|[\s,])(body|p|li|article)\b/.test(r.sel) && /text-transform\s*:\s*uppercase/.test(r.body));
+      return { pass: !bad, note: bad ? `on ${bad.sel}` : 'body is mixed-case' };
+    },
+  },
+  {
+    id: 'type-tight-leading', dim: 'typography',
+    label: 'Body line-height below 1.3',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        if (!/(^|[\s,])(body|p|li|article|html)\b/.test(r.sel)) continue;
+        const m = r.body.match(/line-height\s*:\s*([0-9.]+)\b/);
+        if (m && parseFloat(m[1]) < 1.3 && parseFloat(m[1]) > 0) return { pass: false, note: `line-height ${m[1]} on ${r.sel}` };
+      }
+      return { pass: true, note: 'comfortable leading' };
+    },
+  },
+  {
+    id: 'type-wide-tracking-body', dim: 'typography',
+    label: 'Letter-spacing above 0.05em on body text',
+    fn: ({ rules }) => {
+      for (const r of rules) {
+        if (!/(^|[\s,])(body|p|li)\b/.test(r.sel)) continue;
+        const m = r.body.match(/letter-spacing\s*:\s*([0-9.]+)em/);
+        if (m && parseFloat(m[1]) > 0.05) return { pass: false, note: `${m[1]}em on ${r.sel}` };
+      }
+      return { pass: true, note: 'tracking in range' };
+    },
+  },
+  {
+    id: 'type-tiny-body', dim: 'typography',
+    label: 'Body text below 12px',
+    fn: ({ rules }) => {
+      for (const r of rules) {
+        if (!/(^|[\s,])(body|p|li)\b/.test(r.sel)) continue;
+        const m = r.body.match(/font-size\s*:\s*([0-9.]+)px/);
+        if (m && parseFloat(m[1]) < 12) return { pass: false, note: `${m[1]}px on ${r.sel}` };
+      }
+      return { pass: true, note: 'legible body size' };
+    },
+  },
+
+  // ---- COLOR & CONTRAST -------------------------------------------------
+  {
+    id: 'color-gradient-text', dim: 'color',
+    label: 'Gradient clipped to text (background-clip: text)',
+    fn: ({ css }) => {
+      const bad = /background-clip\s*:\s*text|-webkit-background-clip\s*:\s*text/i.test(css) && /gradient/i.test(css);
+      return { pass: !bad, note: bad ? 'gradient text headline' : 'solid headline fill' };
+    },
+  },
+  {
+    id: 'color-ai-palette', dim: 'color',
+    label: 'AI purple/violet→cyan gradient',
+    fn: ({ css }) => {
+      // The tell is the violet/purple -> cyan/blue *ramp*, not a single
+      // deliberate brand hue. Require both ends to be present in one gradient.
+      const grads = [...css.matchAll(/(linear|radial|conic)-gradient\([^;}]*\)/gi)].map((m) => m[0]);
+      for (const g of grads) {
+        const violetKw = /purple|violet|indigo|fuchsia|magenta|#8b5cf6|#6366f1|#7c3aed|#a855f7|#b06cff/i.test(g);
+        const cyanKw = /\bcyan\b|\bteal\b|\baqua\b|#06b6d4|#22d3ee|#38d6ff/i.test(g);
+        const hues = [...g.matchAll(/oklch\([^)]*\)/gi)].map((x) => oklchH(x[0])).filter((h) => h != null);
+        const hasViolet = hues.some((h) => h >= 270 && h <= 330);
+        const hasCyanBlue = hues.some((h) => h >= 190 && h <= 265);
+        const ramp = (violetKw && cyanKw) || (hasViolet && hasCyanBlue) || (violetKw && hasCyanBlue) || (hasViolet && cyanKw);
+        if (ramp) return { pass: false, note: `violet→cyan ramp in ${g.slice(0, 40)}…` };
+      }
+      return { pass: true, note: 'no violet→cyan ramp' };
+    },
+  },
+  {
+    id: 'color-pure-black-bg', dim: 'color',
+    label: 'Pure #000 / oklch(0) used as a base background',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        const m = r.body.match(/background(?:-color)?\s*:\s*([^;]+)/i);
+        if (!m) continue;
+        const v = resolveVar(m[1], map).toLowerCase();
+        if (/#000(\b|000\b)|\boklch\(\s*0\s+0\b|\brgb\(\s*0\s*,\s*0\s*,\s*0\s*\)|\bblack\b/.test(v)) return { pass: false, note: `pure black bg on ${r.sel}` };
+      }
+      return { pass: true, note: 'no pure-black base' };
+    },
+  },
+  {
+    id: 'color-zero-chroma', dim: 'color',
+    label: 'Zero-chroma flat-grey neutrals (gate 24)',
+    fn: ({ map, genre }) => {
+      if (genre === 'modern-minimal') return { pass: true, note: 'modern-minimal allows zero-chroma' };
+      for (const [k, v] of Object.entries(map)) {
+        if (!/--color|--paper|--ink|--surface|--muted|--neutral|--bg/.test(k)) continue;
+        const c = oklchC(resolveVar(v, map));
+        if (c === 0) return { pass: false, note: `${k} has 0 chroma` };
+      }
+      return { pass: true, note: 'neutrals tinted toward anchor' };
+    },
+  },
+  {
+    id: 'color-token-discipline', dim: 'color',
+    label: 'Colour literal outside the token block (gate 58)',
+    fn: ({ rules }) => {
+      const offenders = [];
+      for (const r of rules) {
+        if (/:root|\[data-theme/.test(r.sel)) continue;
+        const lits = (r.body.match(COLOR_LITERAL) || []).filter((c) => !/transparent|currentcolor|inherit|none/i.test(c));
+        if (lits.length) offenders.push(`${r.sel}: ${lits[0]}`);
+      }
+      return { pass: offenders.length === 0, note: offenders.length ? `${offenders.length} literal(s), e.g. ${offenders[0]}` : 'all colours via tokens' };
+    },
+  },
+  {
+    id: 'color-ink-on-ink', dim: 'color',
+    label: 'Text lightness too close to its background (ink-on-ink, gates 46–50)',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        if (/:root|\[data-theme/.test(r.sel)) continue;
+        const cM = r.body.match(/(? {
+      for (const r of rules) {
+        // a left rule on a blockquote/figure is a typographic convention, not the card tell
+        if (/\b(blockquote|figure|aside|q|cite)\b/.test(r.sel)) continue;
+        const m = r.body.match(/border-(left|right)\s*:\s*([0-9.]+)px\s+\w+\s+([^;]+)/i);
+        if (!m) continue;
+        const w = parseFloat(m[2]);
+        const col = resolveVar(m[3], map).toLowerCase();
+        if (w >= 4 && !/transparent/.test(col)) return { pass: false, note: `${m[2]}px ${m[1]} stripe on ${r.sel}` };
+      }
+      return { pass: true, note: 'no side-tab stripe' };
+    },
+  },
+  {
+    id: 'visual-glassmorphism', dim: 'visual',
+    label: 'Glassmorphism (backdrop blur on translucent panels)',
+    fn: ({ css }) => {
+      const bad = /backdrop-filter\s*:\s*[^;]*blur/i.test(css) && /rgba?\([^)]*0?\.\d+\s*\)|\/\s*0?\.\d+\s*\)/.test(css);
+      return { pass: !bad, note: bad ? 'translucent blur panel' : 'no glass panels' };
+    },
+  },
+  {
+    id: 'visual-sparkline-decoration', dim: 'visual',
+    label: 'Sparkline / chart used as pure decoration',
+    fn: ({ html }) => {
+      const bad = /class="[^"]*\b(sparkline|spark-line|decor[a-z-]*chart|fake-chart)\b/i.test(html);
+      return { pass: !bad, note: bad ? 'decorative sparkline present' : 'no decorative charts' };
+    },
+  },
+
+  // ---- LAYOUT & SPACE ---------------------------------------------------
+  {
+    id: 'layout-center-everything', dim: 'layout',
+    label: 'Everything centre-aligned (≥4 text-align:center)',
+    fn: ({ css }) => {
+      const n = (css.match(/text-align\s*:\s*center/gi) || []).length;
+      return { pass: n < 4, note: `${n} centred blocks` };
+    },
+  },
+  {
+    id: 'layout-justified', dim: 'layout',
+    label: 'Justified body text (word-spacing rivers)',
+    fn: ({ css }) => {
+      const bad = /text-align\s*:\s*justify/i.test(css);
+      return { pass: !bad, note: bad ? 'justified text present' : 'ragged-right text' };
+    },
+  },
+  {
+    id: 'layout-three-col-cards', dim: 'layout',
+    label: 'Three equal-column card grid (icon-tile template)',
+    fn: ({ css }) => {
+      const bad = /grid-template-columns\s*:\s*repeat\(\s*3\s*,\s*(?:minmax\(0,\s*)?1fr/i.test(css) || /grid-template-columns\s*:\s*1fr\s+1fr\s+1fr\b/i.test(css);
+      return { pass: !bad, note: bad ? 'repeat(3, 1fr) grid' : 'no rote 3-col grid' };
+    },
+  },
+  {
+    id: 'layout-long-measure', dim: 'layout',
+    label: 'Prose measure beyond 75ch (gate 27)',
+    fn: ({ css }) => {
+      for (const m of css.matchAll(/max-width\s*:\s*([0-9.]+)ch/gi)) {
+        if (parseFloat(m[1]) > 75) return { pass: false, note: `${m[1]}ch measure` };
+      }
+      return { pass: true, note: 'measure ≤ 75ch' };
+    },
+  },
+  {
+    id: 'layout-arbitrary-spacing', dim: 'layout',
+    label: 'Spacing off the 4px scale (gate 26)',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        if (/:root|\[data-theme/.test(r.sel)) continue;
+        for (const m of r.body.matchAll(/\b(?:padding|margin|gap|row-gap|column-gap)(?:-\w+)?\s*:\s*([^;]+)/gi)) {
+          const resolved = resolveVar(m[1], map);
+          for (const px of resolved.matchAll(/(-?[0-9.]+)px/g)) {
+            const v = Math.abs(parseFloat(px[1]));
+            if (v > 0 && v % 4 !== 0) return { pass: false, note: `${px[1]}px on ${r.sel}` };
+          }
+        }
+      }
+      return { pass: true, note: 'spacing on 4px scale' };
+    },
+  },
+  {
+    id: 'layout-skipped-heading', dim: 'layout',
+    label: 'Skipped heading level (h1→h3 with no h2)',
+    fn: ({ html }) => {
+      const lv = headingLevels(html);
+      for (let i = 1; i < lv.length; i++) {
+        if (lv[i] - lv[i - 1] > 1) return { pass: false, note: `h${lv[i - 1]}→h${lv[i]}` };
+      }
+      return { pass: true, note: 'heading levels contiguous' };
+    },
+  },
+
+  // ---- MOTION -----------------------------------------------------------
+  {
+    id: 'motion-transition-all', dim: 'motion',
+    label: 'transition: all (gate 11)',
+    fn: ({ css }) => {
+      const bad = /transition\s*:\s*all\b/i.test(css);
+      return { pass: !bad, note: bad ? 'transition: all present' : 'transitions are scoped' };
+    },
+  },
+  {
+    id: 'motion-hover-scale', dim: 'motion',
+    label: 'Uniform hover-scale (gate 12)',
+    fn: ({ css }) => {
+      const bad = /:hover[^{}]*\{[^{}]*transform\s*:\s*scale\(\s*1\.0[1-9]/i.test(css) || /hover:scale-10[0-9]/i.test(css);
+      return { pass: !bad, note: bad ? 'hover scale present' : 'no rote hover-scale' };
+    },
+  },
+  {
+    id: 'motion-bouncy-easing', dim: 'motion',
+    label: 'Bouncy/overshoot easing on UI state (gate 13)',
+    fn: ({ css }) => {
+      for (const m of css.matchAll(/cubic-bezier\(\s*([0-9.-]+)\s*,\s*([0-9.-]+)\s*,\s*([0-9.-]+)\s*,\s*([0-9.-]+)\s*\)/gi)) {
+        const y1 = parseFloat(m[2]); const y2 = parseFloat(m[4]);
+        if (y1 > 1 || y2 > 1 || y1 < 0 || y2 < 0) return { pass: false, note: `overshoot ${m[0]}` };
+      }
+      return { pass: true, note: 'no overshoot easing' };
+    },
+  },
+  {
+    id: 'motion-layout-animation', dim: 'motion',
+    label: 'Animating layout properties (gate 15)',
+    fn: ({ css }) => {
+      const bad = /transition\s*:[^;}]*\b(width|height|top|left|right|bottom|margin|padding)\b/i.test(css);
+      return { pass: !bad, note: bad ? 'layout prop in transition' : 'animates transform/opacity only' };
+    },
+  },
+  {
+    id: 'motion-no-reduced-motion', dim: 'motion',
+    label: 'Animation without prefers-reduced-motion fallback (gate 29)',
+    fn: ({ css }) => {
+      const hasMotion = /@keyframes|animation\s*:|transition\s*:/i.test(css);
+      const hasGuard = /prefers-reduced-motion/i.test(css);
+      return { pass: !hasMotion || hasGuard, note: hasMotion ? (hasGuard ? 'guarded' : 'no reduced-motion guard') : 'no motion' };
+    },
+  },
+
+  // ---- INTERACTION ------------------------------------------------------
+  {
+    id: 'interaction-emoji-icon', dim: 'interaction',
+    label: 'Emoji used as a feature/step icon (gate 60)',
+    fn: ({ html }) => {
+      const body = html.replace(//gi, '').replace(//gi, '');
+      const bad = /[\u{1F300}-\u{1FAFF}\u{2600}-\u{27BF}\u{2B00}-\u{2BFF}\u{FE0F}]/u.test(body);
+      return { pass: !bad, note: bad ? 'emoji glyph in markup' : 'no emoji icons' };
+    },
+  },
+  {
+    id: 'interaction-all-primary', dim: 'interaction',
+    label: 'Every button styled as primary (no secondary register)',
+    fn: ({ html, css }) => {
+      const btns = (html.match(/<(?:button|a)[^>]*class="[^"]*\b(?:btn|button|cta)\b/gi) || []).length;
+      const hasVariant = /\b(btn|button)[-_]{1,2}(secondary|ghost|outline|tertiary|quiet|text)\b|data-variant|\bbtn--/i.test(html + css);
+      return { pass: btns < 3 || hasVariant, note: btns >= 3 && !hasVariant ? `${btns} buttons, one register` : 'button hierarchy present' };
+    },
+  },
+  {
+    id: 'interaction-placeholder-names', dim: 'interaction',
+    label: 'Placeholder names / startup clichés (gate 20)',
+    fn: ({ html }) => {
+      // Only flag actual placeholder *names* — not ordinary words ("seamless",
+      // "unleash") that legitimately appear in marketing prose.
+      const bad = /jane doe|john smith|john doe|lorem ipsum|\bacme\b|\bwidget(?:co|inc)\b|example\.com/i.test(html);
+      return { pass: !bad, note: bad ? 'placeholder/cliché name' : 'specific copy' };
+    },
+  },
+  {
+    id: 'interaction-modal-reflex', dim: 'interaction',
+    label: 'Reaching for a modal/dialog reflexively',
+    fn: ({ html }) => {
+      const bad = / {
+      const bad = !/(html|body)[^{}]*\{[^{}]*overflow-x\s*:\s*clip/i.test(css) && !/(html|body)\s*,\s*(html|body)[^{}]*\{[^{}]*overflow-x\s*:\s*clip/i.test(css);
+      return { pass: !bad, note: bad ? 'no overflow-x: clip' : 'overflow-x clipped' };
+    },
+  },
+  {
+    id: 'responsive-img-grid-minmax', dim: 'responsive',
+    label: 'Image-bearing 1fr grid track without minmax(0,1fr) (gate 61)',
+    fn: ({ css, html }) => {
+      const hasImg = /