diff --git a/.gitignore b/.gitignore index aa8fb7b..c9ff8a7 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,6 @@ skills-lock.json # Launch-day Hallmark vs Impeccable vs no-skills comparison — one-off artifact, # not referenced from the marketing site or README. Kept locally if useful. site/_launch-comparison/ + +# Eval audit snapshots (throwaway) +evals/.site-cache/ diff --git a/evals/README.md b/evals/README.md new file mode 100644 index 0000000..729f137 --- /dev/null +++ b/evals/README.md @@ -0,0 +1,49 @@ +# `evals/` — anti-slop eval harness + +An eval-driven hillclimb that improved Hallmark against two external anchors: + +- **Impeccable's slop standard** — "37 patterns that mark an interface as + AI-generated" across 8 dimensions ([impeccable.style/slop](https://impeccable.style/slop)). +- **"Your Evals Will Break and You Won't See It Coming"** — why static evals + silently miss new failure regimes, and the case for self-evolving evals + ([wanglun1996.github.io](https://wanglun1996.github.io/blog/your-evals-will-break.html)). + +## What's here + +| File | Role | +|---|---| +| `rubric.md` | The scoring rubric: 8 detector dimensions + 1 craft (judge) dimension. | +| `briefs.md` | The briefs each fixture is the skill exercised on. | +| `detector.mjs` | Deterministic slop detector — the CLI-checkable subset of the 37 patterns + Hallmark gates. v1 = 37 rules, v2 = 43. | +| `run.mjs` | Merges detector + judge sidecars, computes the cross-fixture **order parameter**, snapshots a cycle, rebuilds `results/history.md`. | +| `config.json` | Which fixtures belong to eval v1 vs v2. | +| `fixtures/*.html` | Self-contained pages (what Hallmark emits). | +| `fixtures/*.judge.json` | Per-fixture craft scores (philosophy, hierarchy, execution, specificity, restraint, variety, honesty). | +| `results/` | One JSON snapshot per cycle + the running `history.md` table. | + +## Run it + +```bash +cd evals +node detector.mjs fixtures/pulse.html --eval v2 # inspect one page +node run.mjs --cycle 10 --eval v2 --label "..." # score a cycle, update history +``` + +## The hillclimb (10 cycles) + +**Phase 1 (v1, cycles 1–5)** drove the three originals from 74.2 → 98.3 by +closing gaps the detector found — each cycle added a real gate to +`references/slop-test.md` (gates **70–77**) and brought the fixtures into line. + +**The break (cycle 6)** upgraded the eval to **v2**: six new detector rules +for failure modes v1 was blind to (notably hero-float / gate 54, which the +v1-perfect fixtures had been violating the whole time), a cross-fixture +**order parameter** (macrostructure reuse — variety is a property of the +*set*, not the page), and two adversarial fixtures (`pulse`, `vellum`). Score +fell 98.3 → 76.4, exactly as the blog predicts. + +**Phase 2 (v2, cycles 7–10)** climbed back to 98.7, adding gates **78–84** +and resisting `pulse`'s dark/neon/metric-hero brief gravity. + +The skill is the artifact that improved: 15 new gates, motivated by what the +eval could measure. See `results/history.md` for the full score table. diff --git a/evals/audit-site.mjs b/evals/audit-site.mjs new file mode 100644 index 0000000..430d1d2 --- /dev/null +++ b/evals/audit-site.mjs @@ -0,0 +1,64 @@ +// Audit real, in-repo Hallmark output with the detector. +// +// The detector reads inline CSS only; the shipped pages link external +// stylesheets. This adapter inlines local files into +// a self-contained snapshot, then scores it under both eval versions so we can +// see what the current skill's gates (v2) catch that the initial skill's +// gates (v1) did not — on artifacts the eval author did not write. +// +// Usage: node audit-site.mjs [ ...] + +import fs from 'node:fs'; +import path from 'node:path'; +import { fileURLToPath } from 'node:url'; +import { analyze } from './detector.mjs'; + +const HERE = path.dirname(fileURLToPath(import.meta.url)); +const ROOT = path.resolve(HERE, '..'); +const CACHE = path.join(HERE, '.site-cache'); +fs.mkdirSync(CACHE, { recursive: true }); + +function inlinePage(htmlPath) { + const abs = path.resolve(ROOT, htmlPath); + const dir = path.dirname(abs); + let html = fs.readFileSync(abs, 'utf8'); + const links = [...html.matchAll(/]*rel=["']stylesheet["'][^>]*>/gi)].map((m) => m[0]); + const blocks = []; + for (const link of links) { + const href = (link.match(/href=["']([^"']+)["']/i) || [])[1]; + if (!href || /^https?:|^\/\//i.test(href)) continue; // skip remote (e.g. Google Fonts) + const cssPath = path.resolve(dir, href.split(/[?#]/)[0]); + if (fs.existsSync(cssPath)) blocks.push(`/* ${href} */\n${fs.readFileSync(cssPath, 'utf8')}`); + } + if (blocks.length) { + const styleTag = `\n\n`; + html = html.replace(/<\/head>/i, `${styleTag}`); + } + const out = path.join(CACHE, htmlPath.replace(/[\/]/g, '__')); + fs.writeFileSync(out, html); + return out; +} + +const pages = process.argv.slice(2).filter((a) => !a.startsWith('--')); +const rows = []; +for (const p of pages) { + let snap; + try { snap = inlinePage(p); } catch (e) { console.error(`skip ${p}: ${e.message}`); continue; } + const v1 = analyze(snap, 'v1'); + const v2 = analyze(snap, 'v2'); + const v2fails = Object.values(v2.dims).flatMap((d) => d.rules).filter((r) => !r.pass); + rows.push({ page: p, v1: v1.overall, v2: v2.overall, fails: v2fails, multiTheme: v2.multiTheme, themeCount: v2.themeCount }); +} + +const name = (p) => p.replace(/^site\//, '').replace(/\/index\.html$/, '/').replace(/index\.html$/, ''); +console.log('\nReal Hallmark corpus — detector audit (overall /5)\n'); +console.log(`${'page'.padEnd(34)} ${'v1'.padStart(6)} ${'v2'.padStart(6)} v2 findings`); +console.log('-'.repeat(72)); +for (const r of rows) { + const f = r.fails.length ? r.fails.map((x) => x.id.replace(/^v2-/, '')).join(', ') : '—'; + const tag = r.multiTheme ? ` [multi-theme:${r.themeCount}, low-confidence]` : ''; + console.log(`${name(r.page).padEnd(34)} ${r.v1.toFixed(2).padStart(6)} ${r.v2.toFixed(2).padStart(6)} ${f}${tag}`); +} +const avg = (k) => (rows.reduce((a, r) => a + r[k], 0) / rows.length).toFixed(2); +console.log('-'.repeat(72)); +console.log(`${'CORPUS MEAN'.padEnd(34)} ${avg('v1').padStart(6)} ${avg('v2').padStart(6)}`); diff --git a/evals/briefs.md b/evals/briefs.md new file mode 100644 index 0000000..991f184 --- /dev/null +++ b/evals/briefs.md @@ -0,0 +1,26 @@ +# Eval briefs + +Each fixture is the skill exercised on one brief. Briefs span genres so the +detector isn't fooled by a single safe house style. Fixtures live in +`fixtures/` as self-contained HTML (exactly what Hallmark emits). + +## v1 briefs + +- **ledger** — landing page for *Ledger*, an open-source double-entry + bookkeeping CLI for indie developers. Genre: modern-minimal. + Macrostructure target: stat-led / workbench (no rote hero→3-features→CTA). +- **fernweh** — homepage for *Fernweh*, a small-group slow-travel company + running 8-day walking trips. Genre: atmospheric / editorial. + Macrostructure target: photographic or narrative-workflow. +- **kiln** — studio page for *Kiln & Co.*, a two-person ceramics workshop + selling a seasonal run of stoneware. Genre: editorial / specimen-adjacent + but must NOT default to Specimen. + +## v2 briefs (added when v1 saturates) + +- **synthwave-trap** — adversarial: a brief for *Pulse*, a "developer + analytics dashboard," whose own copy nudges toward dark-mode + neon + + metric-hero slop. The skill must resist the brief's gravity. +- **vellum** — a long-form essay page for *Vellum*, a writing tool. Probes + reading-comfort tells v1 underweights (measure rhythm, widows, heading + cadence, real prose hierarchy). diff --git a/evals/config.json b/evals/config.json new file mode 100644 index 0000000..e075925 --- /dev/null +++ b/evals/config.json @@ -0,0 +1,20 @@ +{ + "evals": { + "v1": { + "fixtures": [ + { "name": "ledger", "file": "fixtures/ledger.html", "judge": "fixtures/ledger.judge.json" }, + { "name": "fernweh", "file": "fixtures/fernweh.html", "judge": "fixtures/fernweh.judge.json" }, + { "name": "kiln", "file": "fixtures/kiln.html", "judge": "fixtures/kiln.judge.json" } + ] + }, + "v2": { + "fixtures": [ + { "name": "ledger", "file": "fixtures/ledger.html", "judge": "fixtures/ledger.judge.json" }, + { "name": "fernweh", "file": "fixtures/fernweh.html", "judge": "fixtures/fernweh.judge.json" }, + { "name": "kiln", "file": "fixtures/kiln.html", "judge": "fixtures/kiln.judge.json" }, + { "name": "pulse", "file": "fixtures/pulse.html", "judge": "fixtures/pulse.judge.json" }, + { "name": "vellum", "file": "fixtures/vellum.html", "judge": "fixtures/vellum.judge.json" } + ] + } + } +} diff --git a/evals/detector.mjs b/evals/detector.mjs new file mode 100644 index 0000000..42f1baf --- /dev/null +++ b/evals/detector.mjs @@ -0,0 +1,680 @@ +// Hallmark slop detector — deterministic anti-slop checks for self-contained HTML. +// +// Grounds the eval in two external standards: +// 1. Impeccable's "37 patterns that mark an interface as AI-generated" +// across 8 dimensions (impeccable.style/slop). +// 2. Hallmark's own slop-test gates (references/slop-test.md). +// +// Only the deterministic (CLI-checkable) subset lives here. Taste dimensions +// (philosophy, hierarchy, specificity, restraint, variety, honesty) are scored +// by an LLM judge and merged by run.mjs. +// +// Usage: node detector.mjs [--json] + +import fs from 'node:fs'; + +const FONT_OVERUSED = [ + 'inter', 'roboto', 'open sans', 'poppins', 'lato', 'montserrat', + 'plus jakarta sans', 'space grotesk', 'geist', 'nunito', 'raleway', +]; +const GENERIC_FAMILIES = new Set([ + 'sans-serif', 'serif', 'monospace', 'system-ui', 'ui-monospace', + 'ui-serif', 'ui-sans-serif', 'cursive', 'fantasy', 'emoji', 'math', + '-apple-system', 'blinkmacsystemfont', 'segoe ui', 'inherit', 'initial', +]); + +// ---------------------------------------------------------------- doc loading +function loadDoc(path) { + const html = fs.readFileSync(path, 'utf8'); + const styleCss = [...html.matchAll(/]*>([\s\S]*?)<\/style>/gi)] + .map((m) => m[1]).join('\n'); + const inlineCss = [...html.matchAll(/\sstyle="([^"]*)"/gi)] + .map((m) => `__inline__{${m[1]}}`).join('\n'); + const css = `${styleCss}\n${inlineCss}`; + const stamp = (css.match(/\/\*\s*Hallmark[\s\S]*?\*\//) || [''])[0]; + const genre = + (stamp.match(/genre:\s*([a-z-]+)/i) || [])[1] || + (html.match(/data-genre="([^"]+)"/) || [])[1] || ''; + return { path, html, css, styleCss, stamp, genre }; +} + +// crude flat-rule splitter; @media wrappers drop out but inner rules survive. +function cssRules(css) { + const out = []; + const re = /([^{}]+)\{([^{}]*)\}/g; + let m; + while ((m = re.exec(css))) { + out.push({ sel: m[1].trim().toLowerCase(), body: m[2].trim() }); + } + return out; +} + +function tokenMap(css, activeTheme) { + const map = {}; + for (const r of cssRules(css)) { + const isRoot = /:root/.test(r.sel); + const themeM = r.sel.match(/\[data-theme(?:[~^$|*]?=)?["']?([a-z0-9-]+)?["']?\]/i); + if (!isRoot && !themeM) continue; + // when the page declares an active theme, ignore other themes' token blocks + // so a 22-theme design-system stylesheet isn't scored as one page + if (themeM && themeM[1] && activeTheme && themeM[1].toLowerCase() !== activeTheme.toLowerCase()) continue; + for (const m of r.body.matchAll(/(--[a-z0-9-]+)\s*:\s*([^;]+)/gi)) { + map[m[1].trim()] = m[2].trim(); + } + } + return map; +} + +function resolveVar(value, map, depth = 0) { + if (depth > 8 || !value) return value; + return value.replace(/var\(\s*(--[a-z0-9-]+)\s*(?:,([^)]*))?\)/gi, (_, name, fb) => { + const v = map[name.trim()]; + if (v != null) return resolveVar(v, map, depth + 1); + return fb != null ? resolveVar(fb.trim(), map, depth + 1) : ''; + }); +} + +// oklch lightness 0..1 (handles "oklch(.3 ...)" and "oklch(32% ...)") +function oklchL(value) { + const m = String(value).match(/oklch\(\s*([0-9.]+%?)/i); + if (!m) return null; + const raw = m[1]; + return raw.endsWith('%') ? parseFloat(raw) / 100 : parseFloat(raw); +} +function oklchC(value) { + const m = String(value).match(/oklch\(\s*[0-9.]+%?\s+([0-9.]+)/i); + return m ? parseFloat(m[1]) : null; +} +function oklchH(value) { + const m = String(value).match(/oklch\(\s*[0-9.]+%?\s+[0-9.]+\s+([0-9.]+)/i); + return m ? parseFloat(m[1]) : null; +} + +const COLOR_LITERAL = /#[0-9a-fA-F]{3,8}\b|\brgba?\([^)]*\)|\bhsla?\([^)]*\)|\boklch\([^)]*\)|\blab\([^)]*\)/gi; + +// Count families that are actually *applied*. Per gate 39, a monospace face +// counts toward the family budget only when used outside code contexts — +// counting an unused --font-mono token, or mono inside
/, is the
+// false positive that lit up dev-tool pages.
+function fontFamilies(rules, map) {
+  const fams = new Set();
+  for (const r of rules) {
+    if (/:root|\[data-theme/.test(r.sel)) continue;
+    const m = r.body.match(/font-family\s*:\s*([^;}]+)/i);
+    if (!m) continue;
+    const resolved = resolveVar(m[1], map);
+    const first = resolved.split(',')[0].trim().replace(/['"]/g, '').toLowerCase();
+    if (!first || GENERIC_FAMILIES.has(first) || first.startsWith('var(')) continue;
+    const mono = /mono/.test(first) || /\bmonospace\b/.test(resolved.toLowerCase());
+    const codeSel = /\b(pre|code|kbd|samp)\b/.test(r.sel);
+    if (mono && codeSel) continue;
+    fams.add(first);
+  }
+  return [...fams];
+}
+
+function headingLevels(html) {
+  return [...html.matchAll(/]/gi)].map((m) => +m[1]);
+}
+
+// Balanced extraction of @media (...max-width...) block bodies. Regex alone
+// trips over nested rule braces and indented closers, so count braces.
+function maxWidthMediaBodies(css) {
+  const bodies = [];
+  const re = /@media[^{]*max-width[^{]*\{/gi;
+  let m;
+  while ((m = re.exec(css))) {
+    let depth = 1;
+    let i = m.index + m[0].length;
+    const start = i;
+    for (; i < css.length && depth > 0; i++) {
+      if (css[i] === '{') depth++;
+      else if (css[i] === '}') depth--;
+    }
+    bodies.push(css.slice(start, i - 1));
+  }
+  return bodies;
+}
+
+// ---------------------------------------------------------------- rule set v1
+// Each rule: { id, dim, label, fn(ctx) -> {pass:boolean, note:string} }
+const RULES = [
+  // ---- TYPOGRAPHY -------------------------------------------------------
+  {
+    id: 'type-overused-font', dim: 'typography',
+    label: 'Display/body face is an overused AI default (Inter, Roboto, Geist…)',
+    fn: ({ fams }) => {
+      const hit = fams.filter((f) => FONT_OVERUSED.includes(f));
+      return { pass: hit.length === 0, note: hit.length ? `uses ${hit.join(', ')}` : 'distinctive faces' };
+    },
+  },
+  {
+    id: 'type-single-font', dim: 'typography',
+    label: 'Single font family across the whole page',
+    fn: ({ fams }) => ({ pass: fams.length !== 1, note: `${fams.length} distinct families` }),
+  },
+  {
+    id: 'type-too-many-fonts', dim: 'typography',
+    label: 'More than three distinct font families (gate 39)',
+    fn: ({ fams }) => ({ pass: fams.length <= 3, note: `${fams.length} families: ${fams.join(', ') || 'none'}` }),
+  },
+  {
+    id: 'type-allcaps-body', dim: 'typography',
+    label: 'All-caps applied to body/paragraph text',
+    fn: ({ rules }) => {
+      const bad = rules.find((r) => /(^|[\s,])(body|p|li|article)\b/.test(r.sel) && /text-transform\s*:\s*uppercase/.test(r.body));
+      return { pass: !bad, note: bad ? `on ${bad.sel}` : 'body is mixed-case' };
+    },
+  },
+  {
+    id: 'type-tight-leading', dim: 'typography',
+    label: 'Body line-height below 1.3',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        if (!/(^|[\s,])(body|p|li|article|html)\b/.test(r.sel)) continue;
+        const m = r.body.match(/line-height\s*:\s*([0-9.]+)\b/);
+        if (m && parseFloat(m[1]) < 1.3 && parseFloat(m[1]) > 0) return { pass: false, note: `line-height ${m[1]} on ${r.sel}` };
+      }
+      return { pass: true, note: 'comfortable leading' };
+    },
+  },
+  {
+    id: 'type-wide-tracking-body', dim: 'typography',
+    label: 'Letter-spacing above 0.05em on body text',
+    fn: ({ rules }) => {
+      for (const r of rules) {
+        if (!/(^|[\s,])(body|p|li)\b/.test(r.sel)) continue;
+        const m = r.body.match(/letter-spacing\s*:\s*([0-9.]+)em/);
+        if (m && parseFloat(m[1]) > 0.05) return { pass: false, note: `${m[1]}em on ${r.sel}` };
+      }
+      return { pass: true, note: 'tracking in range' };
+    },
+  },
+  {
+    id: 'type-tiny-body', dim: 'typography',
+    label: 'Body text below 12px',
+    fn: ({ rules }) => {
+      for (const r of rules) {
+        if (!/(^|[\s,])(body|p|li)\b/.test(r.sel)) continue;
+        const m = r.body.match(/font-size\s*:\s*([0-9.]+)px/);
+        if (m && parseFloat(m[1]) < 12) return { pass: false, note: `${m[1]}px on ${r.sel}` };
+      }
+      return { pass: true, note: 'legible body size' };
+    },
+  },
+
+  // ---- COLOR & CONTRAST -------------------------------------------------
+  {
+    id: 'color-gradient-text', dim: 'color',
+    label: 'Gradient clipped to text (background-clip: text)',
+    fn: ({ css }) => {
+      const bad = /background-clip\s*:\s*text|-webkit-background-clip\s*:\s*text/i.test(css) && /gradient/i.test(css);
+      return { pass: !bad, note: bad ? 'gradient text headline' : 'solid headline fill' };
+    },
+  },
+  {
+    id: 'color-ai-palette', dim: 'color',
+    label: 'AI purple/violet→cyan gradient',
+    fn: ({ css }) => {
+      // The tell is the violet/purple -> cyan/blue *ramp*, not a single
+      // deliberate brand hue. Require both ends to be present in one gradient.
+      const grads = [...css.matchAll(/(linear|radial|conic)-gradient\([^;}]*\)/gi)].map((m) => m[0]);
+      for (const g of grads) {
+        const violetKw = /purple|violet|indigo|fuchsia|magenta|#8b5cf6|#6366f1|#7c3aed|#a855f7|#b06cff/i.test(g);
+        const cyanKw = /\bcyan\b|\bteal\b|\baqua\b|#06b6d4|#22d3ee|#38d6ff/i.test(g);
+        const hues = [...g.matchAll(/oklch\([^)]*\)/gi)].map((x) => oklchH(x[0])).filter((h) => h != null);
+        const hasViolet = hues.some((h) => h >= 270 && h <= 330);
+        const hasCyanBlue = hues.some((h) => h >= 190 && h <= 265);
+        const ramp = (violetKw && cyanKw) || (hasViolet && hasCyanBlue) || (violetKw && hasCyanBlue) || (hasViolet && cyanKw);
+        if (ramp) return { pass: false, note: `violet→cyan ramp in ${g.slice(0, 40)}…` };
+      }
+      return { pass: true, note: 'no violet→cyan ramp' };
+    },
+  },
+  {
+    id: 'color-pure-black-bg', dim: 'color',
+    label: 'Pure #000 / oklch(0) used as a base background',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        const m = r.body.match(/background(?:-color)?\s*:\s*([^;]+)/i);
+        if (!m) continue;
+        const v = resolveVar(m[1], map).toLowerCase();
+        if (/#000(\b|000\b)|\boklch\(\s*0\s+0\b|\brgb\(\s*0\s*,\s*0\s*,\s*0\s*\)|\bblack\b/.test(v)) return { pass: false, note: `pure black bg on ${r.sel}` };
+      }
+      return { pass: true, note: 'no pure-black base' };
+    },
+  },
+  {
+    id: 'color-zero-chroma', dim: 'color',
+    label: 'Zero-chroma flat-grey neutrals (gate 24)',
+    fn: ({ map, genre }) => {
+      if (genre === 'modern-minimal') return { pass: true, note: 'modern-minimal allows zero-chroma' };
+      for (const [k, v] of Object.entries(map)) {
+        if (!/--color|--paper|--ink|--surface|--muted|--neutral|--bg/.test(k)) continue;
+        const c = oklchC(resolveVar(v, map));
+        if (c === 0) return { pass: false, note: `${k} has 0 chroma` };
+      }
+      return { pass: true, note: 'neutrals tinted toward anchor' };
+    },
+  },
+  {
+    id: 'color-token-discipline', dim: 'color',
+    label: 'Colour literal outside the token block (gate 58)',
+    fn: ({ rules }) => {
+      const offenders = [];
+      for (const r of rules) {
+        if (/:root|\[data-theme/.test(r.sel)) continue;
+        const lits = (r.body.match(COLOR_LITERAL) || []).filter((c) => !/transparent|currentcolor|inherit|none/i.test(c));
+        if (lits.length) offenders.push(`${r.sel}: ${lits[0]}`);
+      }
+      return { pass: offenders.length === 0, note: offenders.length ? `${offenders.length} literal(s), e.g. ${offenders[0]}` : 'all colours via tokens' };
+    },
+  },
+  {
+    id: 'color-ink-on-ink', dim: 'color',
+    label: 'Text lightness too close to its background (ink-on-ink, gates 46–50)',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        if (/:root|\[data-theme/.test(r.sel)) continue;
+        const cM = r.body.match(/(? {
+      for (const r of rules) {
+        // a left rule on a blockquote/figure is a typographic convention, not the card tell
+        if (/\b(blockquote|figure|aside|q|cite)\b/.test(r.sel)) continue;
+        const m = r.body.match(/border-(left|right)\s*:\s*([0-9.]+)px\s+\w+\s+([^;]+)/i);
+        if (!m) continue;
+        const w = parseFloat(m[2]);
+        const col = resolveVar(m[3], map).toLowerCase();
+        if (w >= 4 && !/transparent/.test(col)) return { pass: false, note: `${m[2]}px ${m[1]} stripe on ${r.sel}` };
+      }
+      return { pass: true, note: 'no side-tab stripe' };
+    },
+  },
+  {
+    id: 'visual-glassmorphism', dim: 'visual',
+    label: 'Glassmorphism (backdrop blur on translucent panels)',
+    fn: ({ css }) => {
+      const bad = /backdrop-filter\s*:\s*[^;]*blur/i.test(css) && /rgba?\([^)]*0?\.\d+\s*\)|\/\s*0?\.\d+\s*\)/.test(css);
+      return { pass: !bad, note: bad ? 'translucent blur panel' : 'no glass panels' };
+    },
+  },
+  {
+    id: 'visual-sparkline-decoration', dim: 'visual',
+    label: 'Sparkline / chart used as pure decoration',
+    fn: ({ html }) => {
+      const bad = /class="[^"]*\b(sparkline|spark-line|decor[a-z-]*chart|fake-chart)\b/i.test(html);
+      return { pass: !bad, note: bad ? 'decorative sparkline present' : 'no decorative charts' };
+    },
+  },
+
+  // ---- LAYOUT & SPACE ---------------------------------------------------
+  {
+    id: 'layout-center-everything', dim: 'layout',
+    label: 'Everything centre-aligned (≥4 text-align:center)',
+    fn: ({ css }) => {
+      const n = (css.match(/text-align\s*:\s*center/gi) || []).length;
+      return { pass: n < 4, note: `${n} centred blocks` };
+    },
+  },
+  {
+    id: 'layout-justified', dim: 'layout',
+    label: 'Justified body text (word-spacing rivers)',
+    fn: ({ css }) => {
+      const bad = /text-align\s*:\s*justify/i.test(css);
+      return { pass: !bad, note: bad ? 'justified text present' : 'ragged-right text' };
+    },
+  },
+  {
+    id: 'layout-three-col-cards', dim: 'layout',
+    label: 'Three equal-column card grid (icon-tile template)',
+    fn: ({ css }) => {
+      const bad = /grid-template-columns\s*:\s*repeat\(\s*3\s*,\s*(?:minmax\(0,\s*)?1fr/i.test(css) || /grid-template-columns\s*:\s*1fr\s+1fr\s+1fr\b/i.test(css);
+      return { pass: !bad, note: bad ? 'repeat(3, 1fr) grid' : 'no rote 3-col grid' };
+    },
+  },
+  {
+    id: 'layout-long-measure', dim: 'layout',
+    label: 'Prose measure beyond 75ch (gate 27)',
+    fn: ({ css }) => {
+      for (const m of css.matchAll(/max-width\s*:\s*([0-9.]+)ch/gi)) {
+        if (parseFloat(m[1]) > 75) return { pass: false, note: `${m[1]}ch measure` };
+      }
+      return { pass: true, note: 'measure ≤ 75ch' };
+    },
+  },
+  {
+    id: 'layout-arbitrary-spacing', dim: 'layout',
+    label: 'Spacing off the 4px scale (gate 26)',
+    fn: ({ rules, map }) => {
+      for (const r of rules) {
+        if (/:root|\[data-theme/.test(r.sel)) continue;
+        for (const m of r.body.matchAll(/\b(?:padding|margin|gap|row-gap|column-gap)(?:-\w+)?\s*:\s*([^;]+)/gi)) {
+          const resolved = resolveVar(m[1], map);
+          for (const px of resolved.matchAll(/(-?[0-9.]+)px/g)) {
+            const v = Math.abs(parseFloat(px[1]));
+            if (v > 0 && v % 4 !== 0) return { pass: false, note: `${px[1]}px on ${r.sel}` };
+          }
+        }
+      }
+      return { pass: true, note: 'spacing on 4px scale' };
+    },
+  },
+  {
+    id: 'layout-skipped-heading', dim: 'layout',
+    label: 'Skipped heading level (h1→h3 with no h2)',
+    fn: ({ html }) => {
+      const lv = headingLevels(html);
+      for (let i = 1; i < lv.length; i++) {
+        if (lv[i] - lv[i - 1] > 1) return { pass: false, note: `h${lv[i - 1]}→h${lv[i]}` };
+      }
+      return { pass: true, note: 'heading levels contiguous' };
+    },
+  },
+
+  // ---- MOTION -----------------------------------------------------------
+  {
+    id: 'motion-transition-all', dim: 'motion',
+    label: 'transition: all (gate 11)',
+    fn: ({ css }) => {
+      const bad = /transition\s*:\s*all\b/i.test(css);
+      return { pass: !bad, note: bad ? 'transition: all present' : 'transitions are scoped' };
+    },
+  },
+  {
+    id: 'motion-hover-scale', dim: 'motion',
+    label: 'Uniform hover-scale (gate 12)',
+    fn: ({ css }) => {
+      const bad = /:hover[^{}]*\{[^{}]*transform\s*:\s*scale\(\s*1\.0[1-9]/i.test(css) || /hover:scale-10[0-9]/i.test(css);
+      return { pass: !bad, note: bad ? 'hover scale present' : 'no rote hover-scale' };
+    },
+  },
+  {
+    id: 'motion-bouncy-easing', dim: 'motion',
+    label: 'Bouncy/overshoot easing on UI state (gate 13)',
+    fn: ({ css }) => {
+      for (const m of css.matchAll(/cubic-bezier\(\s*([0-9.-]+)\s*,\s*([0-9.-]+)\s*,\s*([0-9.-]+)\s*,\s*([0-9.-]+)\s*\)/gi)) {
+        const y1 = parseFloat(m[2]); const y2 = parseFloat(m[4]);
+        if (y1 > 1 || y2 > 1 || y1 < 0 || y2 < 0) return { pass: false, note: `overshoot ${m[0]}` };
+      }
+      return { pass: true, note: 'no overshoot easing' };
+    },
+  },
+  {
+    id: 'motion-layout-animation', dim: 'motion',
+    label: 'Animating layout properties (gate 15)',
+    fn: ({ css }) => {
+      const bad = /transition\s*:[^;}]*\b(width|height|top|left|right|bottom|margin|padding)\b/i.test(css);
+      return { pass: !bad, note: bad ? 'layout prop in transition' : 'animates transform/opacity only' };
+    },
+  },
+  {
+    id: 'motion-no-reduced-motion', dim: 'motion',
+    label: 'Animation without prefers-reduced-motion fallback (gate 29)',
+    fn: ({ css }) => {
+      const hasMotion = /@keyframes|animation\s*:|transition\s*:/i.test(css);
+      const hasGuard = /prefers-reduced-motion/i.test(css);
+      return { pass: !hasMotion || hasGuard, note: hasMotion ? (hasGuard ? 'guarded' : 'no reduced-motion guard') : 'no motion' };
+    },
+  },
+
+  // ---- INTERACTION ------------------------------------------------------
+  {
+    id: 'interaction-emoji-icon', dim: 'interaction',
+    label: 'Emoji used as a feature/step icon (gate 60)',
+    fn: ({ html }) => {
+      const body = html.replace(//gi, '').replace(//gi, '');
+      const bad = /[\u{1F300}-\u{1FAFF}\u{2600}-\u{27BF}\u{2B00}-\u{2BFF}\u{FE0F}]/u.test(body);
+      return { pass: !bad, note: bad ? 'emoji glyph in markup' : 'no emoji icons' };
+    },
+  },
+  {
+    id: 'interaction-all-primary', dim: 'interaction',
+    label: 'Every button styled as primary (no secondary register)',
+    fn: ({ html, css }) => {
+      const btns = (html.match(/<(?:button|a)[^>]*class="[^"]*\b(?:btn|button|cta)\b/gi) || []).length;
+      const hasVariant = /\b(btn|button)[-_]{1,2}(secondary|ghost|outline|tertiary|quiet|text)\b|data-variant|\bbtn--/i.test(html + css);
+      return { pass: btns < 3 || hasVariant, note: btns >= 3 && !hasVariant ? `${btns} buttons, one register` : 'button hierarchy present' };
+    },
+  },
+  {
+    id: 'interaction-placeholder-names', dim: 'interaction',
+    label: 'Placeholder names / startup clichés (gate 20)',
+    fn: ({ html }) => {
+      // Only flag actual placeholder *names* — not ordinary words ("seamless",
+      // "unleash") that legitimately appear in marketing prose.
+      const bad = /jane doe|john smith|john doe|lorem ipsum|\bacme\b|\bwidget(?:co|inc)\b|example\.com/i.test(html);
+      return { pass: !bad, note: bad ? 'placeholder/cliché name' : 'specific copy' };
+    },
+  },
+  {
+    id: 'interaction-modal-reflex', dim: 'interaction',
+    label: 'Reaching for a modal/dialog reflexively',
+    fn: ({ html }) => {
+      const bad = / {
+      const bad = !/(html|body)[^{}]*\{[^{}]*overflow-x\s*:\s*clip/i.test(css) && !/(html|body)\s*,\s*(html|body)[^{}]*\{[^{}]*overflow-x\s*:\s*clip/i.test(css);
+      return { pass: !bad, note: bad ? 'no overflow-x: clip' : 'overflow-x clipped' };
+    },
+  },
+  {
+    id: 'responsive-img-grid-minmax', dim: 'responsive',
+    label: 'Image-bearing 1fr grid track without minmax(0,1fr) (gate 61)',
+    fn: ({ css, html }) => {
+      const hasImg = /