diff --git a/apps/cli/src/commands/results/index.ts b/apps/cli/src/commands/results/index.ts
index c09d51d3e..5dc90626a 100644
--- a/apps/cli/src/commands/results/index.ts
+++ b/apps/cli/src/commands/results/index.ts
@@ -2,6 +2,7 @@ import { subcommands } from 'cmd-ts';
 
 import { resultsExportCommand } from './export.js';
 import { resultsFailuresCommand } from './failures.js';
+import { resultsReportCommand } from './report.js';
 import { resultsShowCommand } from './show.js';
 import { resultsSummaryCommand } from './summary.js';
 import { resultsValidateCommand } from './validate.js';
@@ -11,6 +12,7 @@ export const resultsCommand = subcommands({
   description: 'Inspect, export, and manage evaluation results',
   cmds: {
     export: resultsExportCommand,
+    report: resultsReportCommand,
     summary: resultsSummaryCommand,
     failures: resultsFailuresCommand,
     show: resultsShowCommand,
diff --git a/apps/cli/src/commands/results/report-template.ts b/apps/cli/src/commands/results/report-template.ts
new file mode 100644
index 000000000..983728d61
--- /dev/null
+++ b/apps/cli/src/commands/results/report-template.ts
@@ -0,0 +1,2 @@
+export const RESULTS_REPORT_TEMPLATE =
+  "<!DOCTYPE html>\n<html lang=\"en\">\n<head>\n  <meta charset=\"utf-8\">\n  <meta name=\"viewport\" content=\"width=device-width, initial-scale=1\">\n  <title>AgentV Evaluation Report</title>\n  <style>\n    * { box-sizing: border-box; }\n    html, body { margin: 0; padding: 0; }\n    :root {\n      color-scheme: dark;\n      --canvas: #030712;\n      --surface: #111827;\n      --surface-muted: rgba(17, 24, 39, 0.5);\n      --surface-hover: rgba(17, 24, 39, 0.3);\n      --border: #1f2937;\n      --text: #d1d5db;\n      --text-muted: #9ca3af;\n      --text-subtle: #6b7280;\n      --heading: #ffffff;\n      --accent: #22d3ee;\n      --accent-weak: rgba(8, 145, 178, 0.2);\n      --pass: #34d399;\n      --warn: #facc15;\n      --fail: #f87171;\n      --track: #1f2937;\n      --pill-gradient: linear-gradient(90deg, #60a5fa 0%, #2563eb 100%);\n      --font: ui-sans-serif, system-ui, -apple-system, BlinkMacSystemFont, \"Segoe UI\", Roboto, \"Helvetica Neue\", Arial, sans-serif;\n      --radius-md: 6px;\n      --radius-lg: 8px;\n    }\n\n    :root.light {\n      color-scheme: light;\n      --canvas: #f8fafc;\n      --surface: #ffffff;\n      --surface-muted: rgba(241, 245, 249, 0.8);\n      --surface-hover: rgba(241, 245, 249, 0.5);\n      --border: #e2e8f0;\n      --text: #1e293b;\n      --text-muted: #64748b;\n      --text-subtle: #94a3b8;\n      --heading: #0f172a;\n      --accent: #0891b2;\n      --accent-weak: rgba(8, 145, 178, 0.1);\n      --pass: #16a34a;\n      --warn: #ca8a04;\n      --fail: #dc2626;\n      --track: #e2e8f0;\n      --pill-gradient: linear-gradient(90deg, #3b82f6 0%, #2563eb 100%);\n    }\n\n    :root.light .chip {\n      border-color: rgba(8, 145, 178, 0.3);\n      background: rgba(8, 145, 178, 0.08);\n      color: #0891b2;\n    }\n\n    :root.light .assertion-badge {\n      border-color: rgba(8, 145, 178, 0.3);\n      background: rgba(8, 145, 178, 0.08);\n      color: #0891b2;\n    }\n\n    :root.light .pass-rate-label {\n      color: #ffffff;\n    }\n\n    :root.light .group-header {\n      background: rgba(241, 245, 249, 0.8);\n    }\n\n    :root.light .status-pill {\n      background: rgba(241, 245, 249, 0.8);\n    }\n\n    :root.light .assertion-item {\n      background: rgba(241, 245, 249, 0.5);\n      border-color: var(--border);\n    }\n\n    :root.light .detail-row td {\n      background: rgba(248, 250, 252, 0.8);\n    }\n\n    body {\n      background: var(--canvas);\n      color: var(--text);\n      font-family: var(--font);\n      font-size: 14px;\n      line-height: 1.5;\n    }\n\n    button, input, select {\n      font: inherit;\n    }\n\n    .tabular,\n    .num,\n    .pass-rate-label,\n    .metric-value,\n    .count-label,\n    .table-num {\n      font-variant-numeric: tabular-nums;\n    }\n\n    .shell {\n      min-height: 100vh;\n    }\n\n    .header {\n      border-bottom: 1px solid var(--border);\n      background: var(--surface);\n      padding: 16px 24px;\n      display: flex;\n      align-items: center;\n      justify-content: space-between;\n      gap: 16px;\n    }\n\n    .header-title-wrap {\n      display: flex;\n      flex-direction: column;\n      gap: 4px;\n    }\n\n    .eyebrow {\n      color: var(--text-muted);\n      font-size: 12px;\n      letter-spacing: 0.08em;\n      text-transform: uppercase;\n    }\n\n    .header-title {\n      color: var(--heading);\n      font-size: 24px;\n      font-weight: 600;\n      margin: 0;\n    }\n\n    .header-subtitle {\n      color: var(--text-muted);\n      margin: 0;\n    }\n\n    .header-meta {\n      display: flex;\n      flex-wrap: wrap;\n      justify-content: flex-end;\n      align-items: center;\n      gap: 8px;\n    }\n\n    .chip {\n      border: 1px solid rgba(8, 145, 178, 0.45);\n      background: rgba(8, 145, 178, 0.16);\n      color: #67e8f9;\n      border-radius: var(--radius-md);\n      padding: 4px 8px;\n      font-size: 12px;\n      font-weight: 500;\n    }\n\n    .tabs {\n      display: flex;\n      gap: 4px;\n      padding: 0 24px;\n      border-bottom: 1px solid var(--border);\n      background: var(--canvas);\n    }\n\n    .tab {\n      background: transparent;\n      border: none;\n      border-bottom: 2px solid transparent;\n      color: var(--text-muted);\n      cursor: pointer;\n      padding: 12px 8px 10px;\n      transition: color 120ms ease, border-color 120ms ease;\n    }\n\n    .tab:hover {\n      color: var(--text);\n    }\n\n    .tab.active {\n      color: var(--accent);\n      border-bottom-color: var(--accent);\n    }\n\n    .app {\n      padding: 24px;\n      display: flex;\n      flex-direction: column;\n      gap: 24px;\n    }\n\n    .empty-state {\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      background: var(--surface);\n      padding: 32px;\n      text-align: center;\n    }\n\n    .empty-state h2 {\n      margin: 0 0 8px;\n      color: var(--heading);\n      font-size: 18px;\n      font-weight: 500;\n    }\n\n    .empty-state p {\n      margin: 0;\n      color: var(--text-muted);\n    }\n\n    .stats-grid {\n      display: grid;\n      grid-template-columns: repeat(auto-fit, minmax(160px, 1fr));\n      gap: 12px;\n    }\n\n    .stat-card {\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      background: var(--surface);\n      padding: 16px;\n      display: flex;\n      flex-direction: column;\n      gap: 6px;\n      min-height: 96px;\n    }\n\n    .stat-label {\n      color: var(--text-muted);\n      font-size: 12px;\n      letter-spacing: 0.04em;\n      text-transform: uppercase;\n    }\n\n    .stat-value {\n      color: var(--heading);\n      font-size: 28px;\n      font-weight: 600;\n      line-height: 1.1;\n    }\n\n    .tone-pass .stat-value,\n    .text-pass { color: var(--pass); }\n    .tone-warn .stat-value,\n    .text-warn { color: var(--warn); }\n    .tone-fail .stat-value,\n    .text-fail { color: var(--fail); }\n\n    .section {\n      display: flex;\n      flex-direction: column;\n      gap: 12px;\n    }\n\n    .section-heading {\n      display: flex;\n      align-items: baseline;\n      justify-content: space-between;\n      gap: 12px;\n      flex-wrap: wrap;\n    }\n\n    .section-heading h2 {\n      margin: 0;\n      color: var(--heading);\n      font-size: 20px;\n      font-weight: 600;\n    }\n\n    .section-heading p {\n      margin: 0;\n      color: var(--text-muted);\n    }\n\n    .table-wrap {\n      overflow-x: auto;\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      background: var(--surface);\n    }\n\n    table {\n      width: 100%;\n      border-collapse: collapse;\n      font-size: 14px;\n    }\n\n    thead {\n      border-bottom: 1px solid var(--border);\n      background: var(--surface-muted);\n    }\n\n    th {\n      padding: 12px 16px;\n      text-align: left;\n      font-weight: 500;\n      color: var(--text-muted);\n      white-space: nowrap;\n    }\n\n    td {\n      padding: 12px 16px;\n      border-top: 1px solid rgba(31, 41, 55, 0.5);\n      vertical-align: top;\n    }\n\n    tbody tr {\n      transition: background-color 120ms ease;\n    }\n\n    tbody tr:hover {\n      background: var(--surface-hover);\n    }\n\n    .sortable {\n      cursor: pointer;\n      user-select: none;\n    }\n\n    .sortable:hover {\n      color: var(--text);\n    }\n\n    .table-num {\n      text-align: right;\n      color: var(--text);\n      white-space: nowrap;\n    }\n\n    .table-muted {\n      color: var(--text-muted);\n    }\n\n    .id-cell {\n      font-weight: 500;\n      color: var(--heading);\n    }\n\n    .status-pill {\n      display: inline-flex;\n      align-items: center;\n      gap: 6px;\n      padding: 2px 8px;\n      border-radius: 999px;\n      border: 1px solid var(--border);\n      background: rgba(17, 24, 39, 0.6);\n      color: var(--text);\n      font-size: 12px;\n      white-space: nowrap;\n    }\n\n    .status-dot {\n      width: 8px;\n      height: 8px;\n      border-radius: 999px;\n      background: var(--text-subtle);\n    }\n\n    .status-pass .status-dot { background: var(--pass); }\n    .status-fail .status-dot { background: var(--fail); }\n    .status-error .status-dot { background: var(--warn); }\n\n    .pass-rate-track {\n      width: 80px;\n      height: 20px;\n      overflow: hidden;\n      border-radius: 999px;\n      background: var(--track);\n      position: relative;\n      border: 1px solid rgba(31, 41, 55, 0.8);\n    }\n\n    .pass-rate-fill {\n      position: absolute;\n      inset: 0 auto 0 0;\n      background: var(--pill-gradient);\n      border-radius: 999px;\n    }\n\n    .pass-rate-label {\n      position: absolute;\n      inset: 0;\n      display: flex;\n      align-items: center;\n      justify-content: center;\n      font-size: 12px;\n      font-weight: 600;\n      color: #ffffff;\n      white-space: nowrap;\n      z-index: 1;\n    }\n\n    .histogram {\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      background: var(--surface);\n      padding: 16px;\n      display: flex;\n      flex-direction: column;\n      gap: 10px;\n    }\n\n    .hist-row {\n      display: grid;\n      grid-template-columns: 72px 1fr 40px;\n      gap: 12px;\n      align-items: center;\n    }\n\n    .hist-label,\n    .hist-count {\n      color: var(--text-muted);\n      font-size: 12px;\n    }\n\n    .hist-bar-track {\n      height: 16px;\n      border-radius: 999px;\n      background: var(--track);\n      overflow: hidden;\n    }\n\n    .hist-bar-fill {\n      height: 100%;\n      border-radius: 999px;\n      background: var(--pill-gradient);\n    }\n\n    .filter-bar {\n      display: flex;\n      flex-wrap: wrap;\n      gap: 8px;\n      align-items: center;\n    }\n\n    .filter-input,\n    .filter-select {\n      border: 1px solid #374151;\n      border-radius: var(--radius-md);\n      background: var(--canvas);\n      color: var(--text);\n      padding: 8px 10px;\n    }\n\n    .filter-input::placeholder {\n      color: var(--text-subtle);\n    }\n\n    .filter-input:focus,\n    .filter-select:focus {\n      outline: none;\n      border-color: var(--accent);\n      box-shadow: 0 0 0 1px var(--accent);\n    }\n\n    .filter-input {\n      min-width: 260px;\n      flex: 1 1 260px;\n    }\n\n    .filter-count {\n      margin-left: auto;\n      color: var(--text-muted);\n      font-size: 12px;\n    }\n\n    .group-list {\n      display: flex;\n      flex-direction: column;\n      gap: 16px;\n    }\n\n    .group-card {\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      overflow: hidden;\n      background: var(--surface);\n    }\n\n    .group-header {\n      padding: 16px;\n      border-bottom: 1px solid var(--border);\n      display: flex;\n      justify-content: space-between;\n      align-items: center;\n      gap: 12px;\n      flex-wrap: wrap;\n      background: rgba(17, 24, 39, 0.65);\n    }\n\n    .group-title-wrap {\n      display: flex;\n      flex-direction: column;\n      gap: 4px;\n    }\n\n    .group-title {\n      margin: 0;\n      color: var(--heading);\n      font-size: 18px;\n      font-weight: 600;\n    }\n\n    .group-subtitle {\n      margin: 0;\n      color: var(--text-muted);\n      font-size: 13px;\n    }\n\n    .group-metrics {\n      display: flex;\n      flex-wrap: wrap;\n      align-items: center;\n      gap: 10px 14px;\n    }\n\n    .metric {\n      display: flex;\n      flex-direction: column;\n      gap: 2px;\n    }\n\n    .metric-label {\n      color: var(--text-muted);\n      font-size: 12px;\n    }\n\n    .metric-value {\n      color: var(--heading);\n      font-weight: 500;\n    }\n\n    .test-row {\n      cursor: pointer;\n    }\n\n    .expand-cell {\n      width: 40px;\n      color: var(--text-muted);\n      text-align: center;\n    }\n\n    .detail-row td {\n      padding: 0;\n      background: rgba(3, 7, 18, 0.5);\n    }\n\n    .detail-panel {\n      padding: 16px;\n      display: flex;\n      flex-direction: column;\n      gap: 16px;\n    }\n\n    .detail-grid {\n      display: grid;\n      grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));\n      gap: 16px;\n    }\n\n    .detail-card,\n    .assertion-card,\n    .error-card {\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      background: var(--surface);\n      padding: 16px;\n    }\n\n    .detail-card h4,\n    .assertion-card h4,\n    .error-card h4 {\n      margin: 0 0 10px;\n      color: var(--heading);\n      font-size: 14px;\n      font-weight: 500;\n    }\n\n    pre {\n      margin: 0;\n      white-space: pre-wrap;\n      word-break: break-word;\n      font-family: ui-monospace, SFMono-Regular, Menlo, Consolas, monospace;\n      font-size: 12px;\n      color: var(--text);\n      max-height: 320px;\n      overflow: auto;\n    }\n\n    .evaluator-table-wrap {\n      border: 1px solid var(--border);\n      border-radius: var(--radius-lg);\n      overflow-x: auto;\n      background: var(--surface);\n    }\n\n    .evaluator-table-wrap th,\n    .evaluator-table-wrap td {\n      padding: 14px 20px;\n    }\n\n    .assertion-grid {\n      display: grid;\n      grid-template-columns: repeat(auto-fit, minmax(280px, 1fr));\n      gap: 16px;\n    }\n\n    .assertion-list {\n      list-style: none;\n      margin: 0;\n      padding: 0;\n      display: flex;\n      flex-direction: column;\n      gap: 10px;\n    }\n\n    .assertion-item {\n      border: 1px solid rgba(31, 41, 55, 0.8);\n      border-radius: var(--radius-md);\n      background: rgba(3, 7, 18, 0.45);\n      padding: 10px 12px;\n      display: flex;\n      flex-direction: column;\n      gap: 8px;\n    }\n\n    .assertion-topline {\n      display: flex;\n      align-items: center;\n      flex-wrap: wrap;\n      gap: 8px;\n    }\n\n    .assertion-badge {\n      border: 1px solid rgba(8, 145, 178, 0.45);\n      background: rgba(8, 145, 178, 0.16);\n      color: #67e8f9;\n      border-radius: var(--radius-md);\n      padding: 2px 8px;\n      font-size: 12px;\n      font-weight: 500;\n      align-self: flex-start;\n    }\n\n    .assertion-status {\n      font-size: 12px;\n      font-weight: 500;\n    }\n\n    .assertion-text {\n      color: var(--text);\n    }\n\n    .assertion-evidence {\n      color: var(--text-muted);\n      font-size: 12px;\n      white-space: pre-wrap;\n      word-break: break-word;\n    }\n\n    .meta-row {\n      display: flex;\n      flex-wrap: wrap;\n      gap: 8px 16px;\n      color: var(--text-muted);\n      font-size: 12px;\n      border-top: 1px solid var(--border);\n      padding-top: 12px;\n    }\n\n    .error-card {\n      border-color: rgba(248, 113, 113, 0.45);\n      background: rgba(127, 29, 29, 0.18);\n    }\n\n    .error-card h4,\n    .error-card pre {\n      color: #fecaca;\n    }\n\n    .muted {\n      color: var(--text-muted);\n    }\n\n    .hidden {\n      display: none;\n    }\n\n    .theme-toggle {\n      background: var(--surface);\n      border: 1px solid var(--border);\n      border-radius: var(--radius-md);\n      color: var(--text-muted);\n      cursor: pointer;\n      padding: 6px 10px;\n      font-size: 13px;\n      transition: color 120ms ease, border-color 120ms ease;\n      white-space: nowrap;\n    }\n\n    .theme-toggle:hover {\n      color: var(--text);\n      border-color: var(--text-muted);\n    }\n\n    .criteria-cell {\n      color: var(--text-muted);\n      font-size: 13px;\n      max-width: 320px;\n    }\n\n    .io-toggle {\n      cursor: pointer;\n      user-select: none;\n      color: var(--text-muted);\n      font-size: 13px;\n      border: 1px solid var(--border);\n      border-radius: var(--radius-md);\n      background: var(--surface);\n      padding: 8px 12px;\n      display: inline-flex;\n      align-items: center;\n      gap: 6px;\n    }\n\n    .io-toggle:hover {\n      color: var(--text);\n    }\n\n    .io-toggle[open] > summary {\n      margin-bottom: 12px;\n    }\n\n    .io-toggle > summary {\n      list-style: none;\n    }\n\n    .io-toggle > summary::-webkit-details-marker {\n      display: none;\n    }\n\n    @media (max-width: 900px) {\n      .header,\n      .group-header,\n      .section-heading {\n        align-items: flex-start;\n      }\n\n      .filter-count {\n        width: 100%;\n        margin-left: 0;\n      }\n    }\n\n    @media (prefers-reduced-motion: reduce) {\n      *, *::before, *::after {\n        transition: none !important;\n        scroll-behavior: auto !important;\n      }\n    }\n  </style>\n</head>\n<body>\n  <div class=\"shell\">\n    <header class=\"header\">\n      <div class=\"header-title-wrap\">\n        <div class=\"eyebrow\">AgentV static export</div>\n        <h1 class=\"header-title\">Evaluation Report</h1>\n        <p class=\"header-subtitle\">Studio-themed HTML generated from an existing AgentV results workspace.</p>\n      </div>\n      <div class=\"header-meta\" id=\"header-meta\"></div>\n    </header>\n\n    <nav class=\"tabs\" aria-label=\"Report sections\" id=\"tab-nav\">\n      <button class=\"tab active\" type=\"button\" data-tab=\"overview\">Overview</button>\n      <button class=\"tab\" type=\"button\" data-tab=\"tests\">Test Cases</button>\n    </nav>\n\n    <main class=\"app\" id=\"app\"></main>\n  </div>\n\n  <script>\n    const RAW_DATA = __DATA_PLACEHOLDER__;\n\n    (function () {\n      function pick(obj, keys, fallback) {\n        for (let i = 0; i < keys.length; i += 1) {\n          const key = keys[i];\n          if (obj && Object.prototype.hasOwnProperty.call(obj, key) && obj[key] !== undefined && obj[key] !== null) {\n            return obj[key];\n          }\n        }\n        return fallback;\n      }\n\n      function asString(value, fallback) {\n        return typeof value === 'string' ? value : fallback;\n      }\n\n      function asNumber(value, fallback) {\n        return typeof value === 'number' && Number.isFinite(value) ? value : fallback;\n      }\n\n      function asArray(value) {\n        return Array.isArray(value) ? value : [];\n      }\n\n      function normalizeAssertion(value, assertionType) {\n        if (!value || typeof value !== 'object' || Array.isArray(value)) return null;\n        return {\n          text: asString(value.text, 'Untitled assertion'),\n          passed: Boolean(value.passed),\n          evidence: asString(value.evidence, ''),\n          assertion_type: asString(value.assertion_type, '') || assertionType,\n        };\n      }\n\n      function normalizeScores(value) {\n        return asArray(value)\n          .map((entry) => {\n            if (!entry || typeof entry !== 'object' || Array.isArray(entry)) return null;\n            const scoreName = asString(pick(entry, ['name', 'type'], 'assertion'), 'assertion');\n            const scoreType = asString(pick(entry, ['type', 'name'], 'assertion'), 'assertion');\n            const assertions = asArray(entry.assertions)\n              .map((assertion) => normalizeAssertion(assertion, scoreName))\n              .filter(Boolean);\n            return {\n              name: scoreName,\n              type: scoreType,\n              score: asNumber(entry.score, 0),\n              assertions,\n            };\n          })\n          .filter(Boolean);\n      }\n\n      function normalizeTokenUsage(value) {\n        if (!value || typeof value !== 'object' || Array.isArray(value)) {\n          return { input: 0, output: 0, reasoning: 0 };\n        }\n        return {\n          input: asNumber(pick(value, ['input', 'input_tokens', 'inputTokens'], 0), 0),\n          output: asNumber(pick(value, ['output', 'output_tokens', 'outputTokens'], 0), 0),\n          reasoning: asNumber(pick(value, ['reasoning', 'reasoning_tokens', 'reasoningTokens'], 0), 0),\n        };\n      }\n\n      function normalizeResult(raw, index) {\n        const scores = normalizeScores(pick(raw, ['scores'], []));\n        const fallbackAssertions = asArray(pick(raw, ['assertions'], []))\n          .map((assertion) => normalizeAssertion(assertion, 'assertion'))\n          .filter(Boolean);\n        const flattenedAssertions = scores.flatMap((score) => score.assertions);\n        const displayAssertions = flattenedAssertions.length > 0 ? flattenedAssertions : fallbackAssertions;\n        const testId = asString(pick(raw, ['test_id', 'testId'], 'unknown'), 'unknown');\n        const target = asString(pick(raw, ['target'], 'unknown'), 'unknown');\n        const evalFile = asString(pick(raw, ['eval_file', 'evalFile'], 'ungrouped'), 'ungrouped');\n        const executionStatus = asString(pick(raw, ['execution_status', 'executionStatus'], ''), '');\n        return {\n          _key: `${evalFile}:${target}:${testId}:${index}`,\n          raw,\n          eval_file: evalFile,\n          test_id: testId,\n          target,\n          score: asNumber(pick(raw, ['score'], 0), 0),\n          duration_ms: asNumber(pick(raw, ['duration_ms', 'durationMs'], 0), 0),\n          cost_usd: asNumber(pick(raw, ['cost_usd', 'costUsd'], 0), 0),\n          timestamp: asString(pick(raw, ['timestamp'], ''), ''),\n          execution_status: executionStatus,\n          error: asString(pick(raw, ['error'], ''), ''),\n          input: pick(raw, ['input'], null),\n          output: pick(raw, ['output'], null),\n          token_usage: normalizeTokenUsage(pick(raw, ['token_usage', 'tokenUsage'], {})),\n          total_tokens: 0,\n          scores,\n          assertions: displayAssertions,\n        };\n      }\n\n      function escapeHtml(value) {\n        return String(value)\n          .replace(/&/g, '&amp;')\n          .replace(/</g, '&lt;')\n          .replace(/>/g, '&gt;')\n          .replace(/\"/g, '&quot;');\n      }\n\n      function statusOf(result) {\n        if (result.execution_status === 'execution_error') return 'error';\n        if (result.execution_status === 'quality_failure') return 'fail';\n        if (result.execution_status === 'ok') return 'pass';\n        if (result.error) return 'error';\n        return result.score >= 0.5 ? 'pass' : 'fail';\n      }\n\n      function statusLabel(status) {\n        if (status === 'pass') return 'Passed';\n        if (status === 'fail') return 'Failed';\n        return 'Error';\n      }\n\n      function toneForScore(value) {\n        if (value >= 0.8) return 'text-pass';\n        if (value >= 0.5) return 'text-warn';\n        return 'text-fail';\n      }\n\n      function toneClassForRate(value) {\n        if (value >= 0.8) return 'tone-pass';\n        if (value >= 0.5) return 'tone-warn';\n        return 'tone-fail';\n      }\n\n      function formatPercent(value) {\n        return `${(value * 100).toFixed(1)}%`;\n      }\n\n      function formatDuration(ms) {\n        if (!ms) return '-';\n        if (ms < 1000) return `${Math.round(ms)}ms`;\n        if (ms < 60000) return `${(ms / 1000).toFixed(1)}s`;\n        const minutes = Math.floor(ms / 60000);\n        const seconds = Math.round((ms % 60000) / 1000);\n        return `${minutes}m ${seconds}s`;\n      }\n\n      function formatTokens(value) {\n        if (!value) return '-';\n        if (value >= 1000000) return `${(value / 1000000).toFixed(1)}M`;\n        if (value >= 1000) return `${(value / 1000).toFixed(1)}K`;\n        return String(value);\n      }\n\n      function formatCost(value) {\n        if (!value) return '-';\n        if (value < 0.01) return '<$0.01';\n        return `$${value.toFixed(2)}`;\n      }\n\n      function totalTokens(result) {\n        return (result.token_usage.input || 0) + (result.token_usage.output || 0) + (result.token_usage.reasoning || 0);\n      }\n\n      function computeStats(rows) {\n        const stats = {\n          total: rows.length,\n          passed: 0,\n          failed: 0,\n          errors: 0,\n          total_duration_ms: 0,\n          total_tokens: 0,\n          total_cost_usd: 0,\n          scores: [],\n        };\n        rows.forEach((row) => {\n          const status = statusOf(row);\n          if (status === 'pass') stats.passed += 1;\n          else if (status === 'fail') stats.failed += 1;\n          else stats.errors += 1;\n          stats.total_duration_ms += row.duration_ms || 0;\n          stats.total_tokens += totalTokens(row);\n          stats.total_cost_usd += row.cost_usd || 0;\n          if (status !== 'error') stats.scores.push(row.score);\n        });\n        const graded = stats.passed + stats.failed;\n        stats.pass_rate = graded > 0 ? stats.passed / graded : 0;\n        return stats;\n      }\n\n      function groupBy(rows, key) {\n        const map = new Map();\n        rows.forEach((row) => {\n          const value = row[key];\n          if (!map.has(value)) map.set(value, []);\n          map.get(value).push(row);\n        });\n        return Array.from(map.entries()).map(([name, entries]) => ({\n          name,\n          rows: entries,\n          stats: computeStats(entries),\n        }));\n      }\n\n      function uniqueScoreNames(rows) {\n        const seen = new Set();\n        rows.forEach((row) => {\n          row.scores.forEach((score) => seen.add(score.name));\n        });\n        return Array.from(seen.values()).sort();\n      }\n\n      function scoreForName(row, scoreName) {\n        const match = row.scores.find((score) => score.name === scoreName);\n        return match ? match.score : null;\n      }\n\n      function sortRows(rows, sort) {\n        const direction = sort.dir === 'asc' ? 1 : -1;\n        const statusRank = { pass: 0, fail: 1, error: 2 };\n        return rows.slice().sort((left, right) => {\n          let a;\n          let b;\n          if (sort.col === 'status') {\n            a = statusRank[statusOf(left)];\n            b = statusRank[statusOf(right)];\n          } else if (sort.col.startsWith('score:')) {\n            const scoreName = sort.col.slice('score:'.length);\n            a = scoreForName(left, scoreName);\n            b = scoreForName(right, scoreName);\n          } else {\n            a = left[sort.col];\n            b = right[sort.col];\n          }\n          if (a == null && b == null) return 0;\n          if (a == null) return 1;\n          if (b == null) return -1;\n          if (typeof a === 'string' || typeof b === 'string') {\n            return String(a).localeCompare(String(b)) * direction;\n          }\n          return (a - b) * direction;\n        });\n      }\n\n      function matchesSearch(row, search) {\n        if (!search) return true;\n        const haystack = [\n          row.test_id,\n          row.target,\n          row.eval_file,\n          row.error,\n          extractCriteria(row),\n          row.assertions.map((assertion) => `${assertion.assertion_type} ${assertion.text} ${assertion.evidence}`).join(' '),\n        ].join(' ').toLowerCase();\n        return haystack.includes(search.toLowerCase());\n      }\n\n      function filterRows(rows, filter) {\n        return rows.filter((row) => {\n          const status = statusOf(row);\n          if (filter.status !== 'all' && status !== filter.status) return false;\n          if (filter.target !== 'all' && row.target !== filter.target) return false;\n          if (!matchesSearch(row, filter.search)) return false;\n          return true;\n        });\n      }\n\n      function renderPassRate(rate) {\n        return `\n          <span class=\"pass-rate-track\">\n            <span class=\"pass-rate-fill\" style=\"width:${Math.max(0, Math.min(100, rate * 100))}%\"></span>\n            <span class=\"pass-rate-label\">${formatPercent(rate)}</span>\n          </span>\n        `;\n      }\n\n      function formatJson(value) {\n        if (value == null) return '';\n        if (typeof value === 'string') return value;\n        try {\n          return JSON.stringify(value, null, 2);\n        } catch (error) {\n          return String(value);\n        }\n      }\n\n      function renderStatusPill(status) {\n        return `\n          <span class=\"status-pill status-${status}\">\n            <span class=\"status-dot\"></span>\n            <span>${statusLabel(status)}</span>\n          </span>\n        `;\n      }\n\n      function renderAssertions(assertions) {\n        if (assertions.length === 0) {\n          return '<p class=\"muted\">No assertions.</p>';\n        }\n        return `\n          <ul class=\"assertion-list\">\n            ${assertions.map((assertion) => `\n              <li class=\"assertion-item\">\n                <div class=\"assertion-topline\">\n                  <span class=\"assertion-status ${assertion.passed ? 'text-pass' : 'text-fail'}\">${assertion.passed ? '\\u2713' : '\\u2717'}</span>\n                  ${assertion.assertion_type ? `<span class=\"assertion-badge\">${escapeHtml(assertion.assertion_type)}</span>` : ''}\n                  <span class=\"assertion-text\">${escapeHtml(assertion.text)}</span>\n                </div>\n                ${assertion.evidence ? `<div class=\"assertion-evidence\">${escapeHtml(assertion.evidence)}</div>` : ''}\n              </li>\n            `).join('')}\n          </ul>\n        `;\n      }\n\n      function extractCriteria(row) {\n        if (row.input && typeof row.input === 'object' && row.input.prompt) {\n          return String(row.input.prompt);\n        }\n        if (typeof row.input === 'string' && row.input.length > 0 && row.input.length <= 200) {\n          return row.input;\n        }\n        if (row.assertions && row.assertions.length > 0) {\n          const types = Array.from(new Set(row.assertions.map(a => a.assertion_type).filter(Boolean)));\n          const firstText = row.assertions[0].text || '';\n          const prefix = types.length > 0 ? types.join(', ') + ': ' : '';\n          const full = prefix + firstText;\n          return full.length > 120 ? full.slice(0, 117) + '...' : full;\n        }\n        return '';\n      }\n\n      function renderDetail(row, evaluatorNames, hasAnyCost) {\n        const meta = [];\n        if (row.timestamp) meta.push(escapeHtml(row.timestamp));\n        if (row.target) meta.push(escapeHtml(row.target));\n        if (row.duration_ms) meta.push(escapeHtml(formatDuration(row.duration_ms)));\n        if (totalTokens(row)) meta.push(escapeHtml(`${formatTokens(totalTokens(row))} tokens`));\n        if (hasAnyCost && row.cost_usd) meta.push(escapeHtml(formatCost(row.cost_usd)));\n\n        const hasIo = row.input != null || row.output != null;\n\n        return `\n          <div class=\"detail-panel\">\n            <div class=\"assertion-card\">\n              <h4>Assertions (${row.assertions.length})</h4>\n              ${renderAssertions(row.assertions)}\n            </div>\n\n            ${row.error ? `\n              <div class=\"error-card\">\n                <h4>Error</h4>\n                <pre>${escapeHtml(row.error)}</pre>\n              </div>\n            ` : ''}\n\n            ${hasIo ? `\n              <details class=\"io-toggle\">\n                <summary>\\u25B8 Input / Output</summary>\n                <div class=\"detail-grid\">\n                  <div class=\"detail-card\">\n                    <h4>Input</h4>\n                    <pre>${escapeHtml(formatJson(row.input))}</pre>\n                  </div>\n                  <div class=\"detail-card\">\n                    <h4>Output</h4>\n                    <pre>${escapeHtml(formatJson(row.output))}</pre>\n                  </div>\n                </div>\n              </details>\n            ` : ''}\n\n            <div class=\"meta-row\">${meta.join('<span>&middot;</span>')}</div>\n          </div>\n        `;\n      }\n\n      const DATA = asArray(RAW_DATA).map(normalizeResult);\n      DATA.forEach((row) => {\n        row.total_tokens = totalTokens(row);\n      });\n      const STATE = {\n        tab: 'overview',\n        filter: { status: 'all', target: 'all', search: '' },\n        sort: { col: 'test_id', dir: 'asc' },\n        expanded: {},\n      };\n\n      const APP = document.getElementById('app');\n      const HEADER_META = document.getElementById('header-meta');\n      const TAB_BUTTONS = Array.from(document.querySelectorAll('.tab'));\n      const TARGETS = Array.from(new Set(DATA.map((row) => row.target))).sort();\n      const EVALUATOR_NAMES = uniqueScoreNames(DATA);\n      const HAS_ANY_COST = DATA.some((row) => row.cost_usd > 0);\n      const OVERALL_STATS = computeStats(DATA);\n      const EVAL_GROUPS = groupBy(DATA, 'eval_file').sort((a, b) => a.name.localeCompare(b.name));\n      const TARGET_GROUPS = groupBy(DATA, 'target').sort((a, b) => a.name.localeCompare(b.name));\n\n      function syncHeaderMeta() {\n        const themeLabel = document.documentElement.classList.contains('light') ? '\\u263E Dark' : '\\u2600 Light';\n        HEADER_META.innerHTML = `\n          <span class=\"chip\">${escapeHtml(String(DATA.length))} tests</span>\n          <span class=\"chip\">${escapeHtml(String(EVAL_GROUPS.length))} eval files</span>\n          <span class=\"chip\">${escapeHtml(String(TARGETS.length))} targets</span>\n          <button class=\"theme-toggle\" id=\"theme-btn\" type=\"button\">${themeLabel}</button>\n        `;\n        document.getElementById('theme-btn').addEventListener('click', toggleTheme);\n      }\n\n      function toggleTheme() {\n        document.documentElement.classList.toggle('light');\n        syncHeaderMeta();\n      }\n\n      function setTab(tab) {\n        STATE.tab = tab;\n        TAB_BUTTONS.forEach((button) => {\n          button.classList.toggle('active', button.getAttribute('data-tab') === tab);\n        });\n        render();\n      }\n\n      TAB_BUTTONS.forEach((button) => {\n        button.addEventListener('click', () => setTab(button.getAttribute('data-tab')));\n      });\n\n      function renderStatCard(label, value, tone) {\n        return `\n          <div class=\"stat-card ${tone || ''}\">\n            <div class=\"stat-label\">${escapeHtml(label)}</div>\n            <div class=\"stat-value\">${escapeHtml(value)}</div>\n          </div>\n        `;\n      }\n\n      function renderOverview() {\n        if (DATA.length === 0) {\n          APP.innerHTML = `\n            <div class=\"empty-state\">\n              <h2>No results loaded</h2>\n              <p>Add one or more parsed JSONL result rows to the template placeholder.</p>\n            </div>\n          `;\n          return;\n        }\n\n        const overviewSections = [];\n        overviewSections.push(`\n          <section class=\"section\">\n            <div class=\"stats-grid\">\n              ${renderStatCard('Total tests', String(OVERALL_STATS.total))}\n              ${renderStatCard('Eval files', String(EVAL_GROUPS.length))}\n              ${renderStatCard('Targets', String(TARGETS.length))}\n              ${renderStatCard('Passed', String(OVERALL_STATS.passed), 'tone-pass')}\n              ${renderStatCard('Failed', String(OVERALL_STATS.failed), 'tone-fail')}\n              ${renderStatCard('Errors', String(OVERALL_STATS.errors), OVERALL_STATS.errors > 0 ? 'tone-warn' : '')}\n              ${renderStatCard('Pass rate', formatPercent(OVERALL_STATS.pass_rate), toneClassForRate(OVERALL_STATS.pass_rate))}\n              ${renderStatCard('Duration', formatDuration(OVERALL_STATS.total_duration_ms))}\n              ${renderStatCard('Tokens', formatTokens(OVERALL_STATS.total_tokens))}\n              ${HAS_ANY_COST ? renderStatCard('Cost', formatCost(OVERALL_STATS.total_cost_usd)) : ''}\n            </div>\n          </section>\n        `);\n\n        overviewSections.push(`\n          <section class=\"section\">\n            <div class=\"section-heading\">\n              <div>\n                <h2>Eval Files</h2>\n                <p>Grouped exactly as the exported template will render them in the Test Cases tab.</p>\n              </div>\n            </div>\n            <div class=\"table-wrap\">\n              <table>\n                <thead>\n                  <tr>\n                    <th>Eval file</th>\n                    <th>Pass rate</th>\n                    <th class=\"table-num\">Passed</th>\n                    <th class=\"table-num\">Failed</th>\n                    <th class=\"table-num\">Errors</th>\n                    <th class=\"table-num\">Tests</th>\n                    <th class=\"table-num\">Duration</th>\n                    <th class=\"table-num\">Tokens</th>\n                    ${HAS_ANY_COST ? '<th class=\"table-num\">Cost</th>' : ''}\n                  </tr>\n                </thead>\n                <tbody>\n                  ${EVAL_GROUPS.map((group) => `\n                    <tr>\n                      <td class=\"id-cell\">${escapeHtml(group.name)}</td>\n                      <td>${renderPassRate(group.stats.pass_rate)}</td>\n                      <td class=\"table-num text-pass\">${escapeHtml(String(group.stats.passed))}</td>\n                      <td class=\"table-num text-fail\">${escapeHtml(String(group.stats.failed))}</td>\n                      <td class=\"table-num text-warn\">${escapeHtml(String(group.stats.errors))}</td>\n                      <td class=\"table-num\">${escapeHtml(String(group.stats.total))}</td>\n                      <td class=\"table-num\">${escapeHtml(formatDuration(group.stats.total_duration_ms))}</td>\n                      <td class=\"table-num\">${escapeHtml(formatTokens(group.stats.total_tokens))}</td>\n                      ${HAS_ANY_COST ? `<td class=\"table-num\">${escapeHtml(formatCost(group.stats.total_cost_usd))}</td>` : ''}\n                    </tr>\n                  `).join('')}\n                </tbody>\n              </table>\n            </div>\n          </section>\n        `);\n\n        if (TARGET_GROUPS.length > 1) {\n          overviewSections.push(`\n            <section class=\"section\">\n              <div class=\"section-heading\">\n                <div>\n                  <h2>Targets</h2>\n                  <p>Cross-target summary for the loaded result set.</p>\n                </div>\n              </div>\n              <div class=\"table-wrap\">\n                <table>\n                  <thead>\n                    <tr>\n                      <th>Target</th>\n                    <th>Pass rate</th>\n                      <th class=\"table-num\">Passed</th>\n                      <th class=\"table-num\">Failed</th>\n                      <th class=\"table-num\">Errors</th>\n                      <th class=\"table-num\">Avg score</th>\n                      <th class=\"table-num\">Duration</th>\n                      <th class=\"table-num\">Tokens</th>\n                      ${HAS_ANY_COST ? '<th class=\"table-num\">Cost</th>' : ''}\n                    </tr>\n                  </thead>\n                  <tbody>\n                    ${TARGET_GROUPS.map((group) => {\n                      const gradedRows = group.rows.filter((row) => statusOf(row) !== 'error');\n                      const avgScore = gradedRows.length > 0\n                        ? gradedRows.reduce((sum, row) => sum + row.score, 0) / gradedRows.length\n                        : 0;\n                      return `\n                        <tr>\n                          <td class=\"id-cell\">${escapeHtml(group.name)}</td>\n                      <td>${renderPassRate(group.stats.pass_rate)}</td>\n                          <td class=\"table-num text-pass\">${escapeHtml(String(group.stats.passed))}</td>\n                          <td class=\"table-num text-fail\">${escapeHtml(String(group.stats.failed))}</td>\n                          <td class=\"table-num text-warn\">${escapeHtml(String(group.stats.errors))}</td>\n                          <td class=\"table-num ${toneForScore(avgScore)}\">${escapeHtml(formatPercent(avgScore))}</td>\n                          <td class=\"table-num\">${escapeHtml(formatDuration(group.stats.total_duration_ms))}</td>\n                          <td class=\"table-num\">${escapeHtml(formatTokens(group.stats.total_tokens))}</td>\n                          ${HAS_ANY_COST ? `<td class=\"table-num\">${escapeHtml(formatCost(group.stats.total_cost_usd))}</td>` : ''}\n                        </tr>\n                      `;\n                    }).join('')}\n                  </tbody>\n                </table>\n              </div>\n            </section>\n          `);\n        }\n\n        if (OVERALL_STATS.scores.length > 0) {\n          const buckets = [0, 0, 0, 0, 0];\n          OVERALL_STATS.scores.forEach((score) => {\n            const index = Math.min(Math.floor(score * 5), 4);\n            buckets[index] += 1;\n          });\n          const maxBucket = Math.max.apply(null, buckets);\n          const labels = ['0-20%', '20-40%', '40-60%', '60-80%', '80-100%'];\n          overviewSections.push(`\n            <section class=\"section\">\n              <div class=\"section-heading\">\n                <div>\n                  <h2>Score Distribution</h2>\n                  <p>Histogram across all non-error result rows.</p>\n                </div>\n              </div>\n              <div class=\"histogram\">\n                ${buckets.map((count, index) => `\n                  <div class=\"hist-row\">\n                    <div class=\"hist-label\">${labels[index]}</div>\n                    <div class=\"hist-bar-track\">\n                      <div class=\"hist-bar-fill\" style=\"width:${maxBucket > 0 ? (count / maxBucket) * 100 : 0}%\"></div>\n                    </div>\n                    <div class=\"hist-count tabular\">${count}</div>\n                  </div>\n                `).join('')}\n              </div>\n            </section>\n          `);\n        }\n\n        APP.innerHTML = overviewSections.join('');\n      }\n\n      function renderSortableHeader(label, col) {\n        const arrow = STATE.sort.col === col ? (STATE.sort.dir === 'asc' ? ' ↑' : ' ↓') : '';\n        return `<th class=\"sortable\" data-sort=\"${escapeHtml(col)}\">${escapeHtml(label)}${arrow}</th>`;\n      }\n\n      function renderTestGroups() {\n        const filteredRows = filterRows(DATA, STATE.filter);\n        const filteredGroups = groupBy(filteredRows, 'eval_file').sort((a, b) => a.name.localeCompare(b.name));\n        const hasMultipleTargets = TARGETS.length > 1;\n        const hasCriteria = DATA.some((row) => extractCriteria(row).length > 0);\n        if (filteredRows.length === 0) {\n          return '<div class=\"empty-state\"><h2>No matching tests</h2><p>Adjust the filters or search text.</p></div>';\n        }\n\n        const colCount = 4 + (hasCriteria ? 1 : 0) + (hasMultipleTargets ? 1 : 0) + (HAS_ANY_COST ? 1 : 0);\n\n        return `\n          <div class=\"group-list\">\n            ${filteredGroups.map((group) => {\n              const sortedRows = sortRows(group.rows, STATE.sort);\n              const groupSummary = [];\n              groupSummary.push(`${group.stats.passed}/${group.stats.total} passed`);\n              if (group.stats.failed > 0) groupSummary.push(`${group.stats.failed} failed`);\n              if (group.stats.errors > 0) groupSummary.push(`${group.stats.errors} errors`);\n              return `\n                <section class=\"group-card\">\n                  <div class=\"group-header\">\n                    <div class=\"group-title-wrap\">\n                      <h2 class=\"group-title\">${escapeHtml(group.name)}</h2>\n                      <p class=\"group-subtitle\">${escapeHtml(groupSummary.join(' | '))}</p>\n                    </div>\n                    <div class=\"group-metrics\">\n                      <div class=\"metric\">\n                        <span class=\"metric-label\">Pass rate</span>\n                        <span class=\"metric-value\">${escapeHtml(formatPercent(group.stats.pass_rate))}</span>\n                      </div>\n                      <div class=\"metric\">\n                        <span class=\"metric-label\">Tests</span>\n                        <span class=\"metric-value\">${escapeHtml(String(group.stats.total))}</span>\n                      </div>\n                      <div class=\"metric\">\n                        <span class=\"metric-label\">Duration</span>\n                        <span class=\"metric-value\">${escapeHtml(formatDuration(group.stats.total_duration_ms))}</span>\n                      </div>\n                    </div>\n                  </div>\n                  <div class=\"table-wrap\">\n                    <table>\n                      <thead>\n                        <tr>\n                          <th class=\"expand-cell\"></th>\n                          ${renderSortableHeader('Test ID', 'test_id')}\n                          ${hasCriteria ? '<th>Criteria</th>' : ''}\n                          ${hasMultipleTargets ? renderSortableHeader('Target', 'target') : ''}\n                          ${renderSortableHeader('Score', 'score')}\n                          ${renderSortableHeader('Result', 'status')}\n                          ${HAS_ANY_COST ? renderSortableHeader('Cost', 'cost_usd') : ''}\n                        </tr>\n                      </thead>\n                      <tbody>\n                        ${sortedRows.map((row) => {\n                          const expanded = Boolean(STATE.expanded[row._key]);\n                          const criteria = extractCriteria(row);\n                          return `\n                            <tr class=\"test-row\" data-expand=\"${escapeHtml(row._key)}\">\n                              <td class=\"expand-cell\">${expanded ? '\\u25BE' : '\\u25B8'}</td>\n                              <td class=\"id-cell\">${escapeHtml(row.test_id)}</td>\n                              ${hasCriteria ? `<td class=\"criteria-cell\">${escapeHtml(criteria)}</td>` : ''}\n                              ${hasMultipleTargets ? `<td class=\"table-muted\">${escapeHtml(row.target)}</td>` : ''}\n                              <td class=\"table-num ${toneForScore(row.score)}\">${escapeHtml(formatPercent(row.score))}</td>\n                              <td>${renderStatusPill(statusOf(row))}</td>\n                              ${HAS_ANY_COST ? `<td class=\"table-num\">${escapeHtml(formatCost(row.cost_usd))}</td>` : ''}\n                            </tr>\n                            ${expanded ? `<tr class=\"detail-row\"><td colspan=\"${colCount}\">${renderDetail(row, EVALUATOR_NAMES, HAS_ANY_COST)}</td></tr>` : ''}\n                          `;\n                        }).join('')}\n                      </tbody>\n                    </table>\n                  </div>\n                </section>\n              `;\n            }).join('')}\n          </div>\n        `;\n      }\n\n      function bindTestEvents() {\n        const statusInput = document.getElementById('filter-status');\n        const targetInput = document.getElementById('filter-target');\n        const searchInput = document.getElementById('filter-search');\n        const sortableHeaders = Array.from(document.querySelectorAll('[data-sort]'));\n        const expandableRows = Array.from(document.querySelectorAll('[data-expand]'));\n\n        if (statusInput) {\n          statusInput.value = STATE.filter.status;\n          statusInput.addEventListener('change', (event) => {\n            STATE.filter.status = event.target.value;\n            render();\n          });\n        }\n\n        if (targetInput) {\n          targetInput.value = STATE.filter.target;\n          targetInput.addEventListener('change', (event) => {\n            STATE.filter.target = event.target.value;\n            render();\n          });\n        }\n\n        if (searchInput) {\n          searchInput.value = STATE.filter.search;\n          searchInput.addEventListener('input', (event) => {\n            STATE.filter.search = event.target.value;\n            render();\n          });\n        }\n\n        sortableHeaders.forEach((header) => {\n          header.addEventListener('click', () => {\n            const col = header.getAttribute('data-sort');\n            if (STATE.sort.col === col) {\n              STATE.sort.dir = STATE.sort.dir === 'asc' ? 'desc' : 'asc';\n            } else {\n              STATE.sort.col = col;\n              STATE.sort.dir = 'asc';\n            }\n            render();\n          });\n        });\n\n        expandableRows.forEach((row) => {\n          row.addEventListener('click', () => {\n            const key = row.getAttribute('data-expand');\n            STATE.expanded[key] = !STATE.expanded[key];\n            render();\n          });\n        });\n      }\n\n      function renderTests() {\n        const filteredRows = filterRows(DATA, STATE.filter);\n        APP.innerHTML = `\n          <section class=\"section\">\n            <div class=\"section-heading\">\n              <div>\n                <h2>Test Cases</h2>\n                <p>Grouped by source eval file with assertion type badges carried down from parent score entries.</p>\n              </div>\n            </div>\n            <div class=\"filter-bar\">\n              <select class=\"filter-select\" id=\"filter-status\" aria-label=\"Filter by status\">\n                <option value=\"all\">All status</option>\n                <option value=\"pass\">Passed</option>\n                <option value=\"fail\">Failed</option>\n                <option value=\"error\">Errors</option>\n              </select>\n              ${TARGETS.length > 1 ? `\n                <select class=\"filter-select\" id=\"filter-target\" aria-label=\"Filter by target\">\n                  <option value=\"all\">All targets</option>\n                  ${TARGETS.map((target) => `<option value=\"${escapeHtml(target)}\">${escapeHtml(target)}</option>`).join('')}\n                </select>\n              ` : ''}\n              <input class=\"filter-input\" id=\"filter-search\" type=\"search\" placeholder=\"Search test IDs, targets, eval files, or assertion text\">\n              <span class=\"filter-count\">${escapeHtml(String(filteredRows.length))} of ${escapeHtml(String(DATA.length))} tests</span>\n            </div>\n            ${renderTestGroups()}\n          </section>\n        `;\n        bindTestEvents();\n      }\n\n      const IS_SMALL = DATA.length <= 20;\n      const TAB_NAV = document.getElementById('tab-nav');\n\n      function render() {\n        if (IS_SMALL) {\n          TAB_NAV.classList.add('hidden');\n          renderSinglePage();\n        } else {\n          TAB_NAV.classList.remove('hidden');\n          if (STATE.tab === 'overview') renderOverview();\n          else renderTests();\n        }\n      }\n\n      function renderSinglePage() {\n        if (DATA.length === 0) {\n          APP.innerHTML = `\n            <div class=\"empty-state\">\n              <h2>No results loaded</h2>\n              <p>Add one or more parsed JSONL result rows to the template placeholder.</p>\n            </div>\n          `;\n          return;\n        }\n\n        const sections = [];\n        sections.push(`\n          <section class=\"section\">\n            <div class=\"stats-grid\">\n              ${renderStatCard('Total tests', String(OVERALL_STATS.total))}\n              ${renderStatCard('Passed', String(OVERALL_STATS.passed), 'tone-pass')}\n              ${renderStatCard('Failed', String(OVERALL_STATS.failed), OVERALL_STATS.failed > 0 ? 'tone-fail' : '')}\n              ${renderStatCard('Pass rate', formatPercent(OVERALL_STATS.pass_rate), toneClassForRate(OVERALL_STATS.pass_rate))}\n              ${renderStatCard('Duration', formatDuration(OVERALL_STATS.total_duration_ms))}\n              ${HAS_ANY_COST ? renderStatCard('Cost', formatCost(OVERALL_STATS.total_cost_usd)) : ''}\n            </div>\n          </section>\n        `);\n\n        sections.push(`\n          <section class=\"section\">\n            <div class=\"filter-bar\">\n              <select class=\"filter-select\" id=\"filter-status\" aria-label=\"Filter by status\">\n                <option value=\"all\">All status</option>\n                <option value=\"pass\">Passed</option>\n                <option value=\"fail\">Failed</option>\n                <option value=\"error\">Errors</option>\n              </select>\n              <input class=\"filter-input\" id=\"filter-search\" type=\"search\" placeholder=\"Search test IDs, targets, or assertion text\">\n            </div>\n            ${renderTestGroups()}\n          </section>\n        `);\n\n        APP.innerHTML = sections.join('');\n        bindTestEvents();\n      }\n\n      syncHeaderMeta();\n      render();\n    })();\n  </script>\n</body>\n</html>\n";
diff --git a/apps/cli/src/commands/results/report.ts b/apps/cli/src/commands/results/report.ts
new file mode 100644
index 000000000..5158cabfc
--- /dev/null
+++ b/apps/cli/src/commands/results/report.ts
@@ -0,0 +1,184 @@
+import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
+import path from 'node:path';
+
+import { command, option, optional, string } from 'cmd-ts';
+
+import type { EvaluationResult } from '@agentv/core';
+
+import { loadManifestResults, parseResultManifest, resolveResultSourcePath } from './manifest.js';
+import { RESULTS_REPORT_TEMPLATE } from './report-template.js';
+import { resolveSourceFile, sourceArg } from './shared.js';
+
+interface ReportManifestRecord {
+  readonly eval_file?: string;
+}
+
+interface BenchmarkMetadata {
+  readonly metadata?: {
+    readonly eval_file?: string;
+  };
+}
+
+function normalizeEvalFileLabel(value: string | undefined): string | undefined {
+  const trimmed = value?.trim();
+  if (!trimmed) {
+    return undefined;
+  }
+
+  return path
+    .basename(trimmed)
+    .replace(/\.results\.jsonl$/i, '')
+    .replace(/\.eval\.ya?ml$/i, '')
+    .replace(/\.ya?ml$/i, '')
+    .replace(/\.jsonl$/i, '');
+}
+
+function readBenchmarkEvalFile(sourceFile: string): string | undefined {
+  const benchmarkPath = path.join(path.dirname(sourceFile), 'benchmark.json');
+  if (!existsSync(benchmarkPath)) {
+    return undefined;
+  }
+
+  try {
+    const benchmark = JSON.parse(readFileSync(benchmarkPath, 'utf8')) as BenchmarkMetadata;
+    return normalizeEvalFileLabel(benchmark.metadata?.eval_file);
+  } catch {
+    return undefined;
+  }
+}
+
+export function deriveReportPath(sourceFile: string): string {
+  return path.join(path.dirname(sourceFile), 'report.html');
+}
+
+function serializeReportResult(
+  result: EvaluationResult,
+  sourceFile: string,
+  manifestRecord?: ReportManifestRecord,
+  benchmarkEvalFile?: string,
+): Record<string, unknown> {
+  const fallbackEvalFile =
+    normalizeEvalFileLabel(manifestRecord?.eval_file) ??
+    benchmarkEvalFile ??
+    normalizeEvalFileLabel(result.suite) ??
+    path.basename(path.dirname(sourceFile));
+
+  return {
+    timestamp: result.timestamp,
+    test_id: result.testId,
+    suite: result.suite,
+    category: result.category,
+    target: result.target,
+    score: result.score,
+    scores: result.scores,
+    execution_status: result.executionStatus,
+    error: result.error,
+    duration_ms: result.durationMs,
+    token_usage: result.tokenUsage,
+    cost_usd: result.costUsd,
+    input: result.input,
+    output: result.output,
+    assertions: result.assertions,
+    eval_file: fallbackEvalFile,
+  };
+}
+
+export async function loadReportSource(
+  source: string | undefined,
+  cwd: string,
+): Promise<{
+  sourceFile: string;
+  results: EvaluationResult[];
+  records: readonly ReportManifestRecord[];
+  benchmarkEvalFile?: string;
+}> {
+  const { sourceFile } = await resolveSourceFile(source, cwd);
+  const resolvedSourceFile = resolveResultSourcePath(sourceFile, cwd);
+  const content = readFileSync(resolvedSourceFile, 'utf8');
+  const records = parseResultManifest(content) as ReportManifestRecord[];
+  const results = loadManifestResults(resolvedSourceFile);
+
+  if (results.length === 0) {
+    throw new Error(`No results found in ${resolvedSourceFile}`);
+  }
+
+  return {
+    sourceFile: resolvedSourceFile,
+    results,
+    records,
+    benchmarkEvalFile: readBenchmarkEvalFile(resolvedSourceFile),
+  };
+}
+
+export function renderResultsReport(
+  results: readonly EvaluationResult[],
+  sourceFile: string,
+  records: readonly ReportManifestRecord[],
+  benchmarkEvalFile?: string,
+): string {
+  if (!RESULTS_REPORT_TEMPLATE.includes('__DATA_PLACEHOLDER__')) {
+    throw new Error('Report template is missing __DATA_PLACEHOLDER__');
+  }
+
+  const rows = results.map((result, index) =>
+    serializeReportResult(result, sourceFile, records[index], benchmarkEvalFile),
+  );
+  const dataJson = JSON.stringify(rows).replace(/<\//g, '<\\/');
+  return RESULTS_REPORT_TEMPLATE.replace('__DATA_PLACEHOLDER__', dataJson);
+}
+
+export async function writeResultsReport(
+  source: string | undefined,
+  outputPath: string | undefined,
+  cwd: string,
+): Promise<{ sourceFile: string; outputPath: string; html: string }> {
+  const { sourceFile, results, records, benchmarkEvalFile } = await loadReportSource(source, cwd);
+  const resolvedOutputPath = outputPath
+    ? path.isAbsolute(outputPath)
+      ? outputPath
+      : path.resolve(cwd, outputPath)
+    : deriveReportPath(sourceFile);
+  const html = renderResultsReport(results, sourceFile, records, benchmarkEvalFile);
+
+  mkdirSync(path.dirname(resolvedOutputPath), { recursive: true });
+  writeFileSync(resolvedOutputPath, html, 'utf8');
+
+  const written = readFileSync(resolvedOutputPath, 'utf8');
+  if (written.includes('__DATA_PLACEHOLDER__')) {
+    throw new Error('Report placeholder substitution failed');
+  }
+
+  return { sourceFile, outputPath: resolvedOutputPath, html: written };
+}
+
+export const resultsReportCommand = command({
+  name: 'report',
+  description: 'Generate a static HTML report from a run workspace or index.jsonl manifest',
+  args: {
+    source: sourceArg,
+    out: option({
+      type: optional(string),
+      long: 'out',
+      short: 'o',
+      description: 'Output HTML file (defaults to <run-dir>/report.html)',
+    }),
+    dir: option({
+      type: optional(string),
+      long: 'dir',
+      short: 'd',
+      description: 'Working directory (default: current directory)',
+    }),
+  },
+  handler: async ({ source, out, dir }) => {
+    const cwd = dir ?? process.cwd();
+
+    try {
+      const { sourceFile, outputPath } = await writeResultsReport(source, out, cwd);
+      console.log(`Report written to ${outputPath}`);
+      console.log(`Source: ${sourceFile}`);
+    } catch (error) {
+      console.error(`Error: ${(error as Error).message}`);
+      process.exit(1);
+    }
+  },
+});
diff --git a/apps/cli/test/commands/results/report.test.ts b/apps/cli/test/commands/results/report.test.ts
new file mode 100644
index 000000000..e33b5de87
--- /dev/null
+++ b/apps/cli/test/commands/results/report.test.ts
@@ -0,0 +1,174 @@
+import { afterEach, beforeEach, describe, expect, it } from 'bun:test';
+import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import path from 'node:path';
+import vm from 'node:vm';
+
+import type { EvaluationResult, EvaluatorResult } from '@agentv/core';
+
+import { writeArtifactsFromResults } from '../../../src/commands/eval/artifact-writer.js';
+import {
+  deriveReportPath,
+  loadReportSource,
+  writeResultsReport,
+} from '../../../src/commands/results/report.js';
+
+function makeScore(
+  name: string,
+  type: string,
+  score: number,
+  assertions: EvaluatorResult['assertions'],
+): EvaluatorResult {
+  return {
+    name,
+    type,
+    score,
+    assertions,
+    verdict: score >= 0.5 ? 'pass' : 'fail',
+  };
+}
+
+function makeResult(overrides: Partial<EvaluationResult> = {}): EvaluationResult {
+  return {
+    timestamp: '2026-04-15T01:00:00.000Z',
+    testId: 'test-1',
+    suite: 'default',
+    score: 1,
+    assertions: [{ text: 'fallback assertion', passed: true, evidence: 'ok' }],
+    output: [{ role: 'assistant', content: 'answer' }],
+    input: [{ role: 'user', content: 'question' }],
+    target: 'default',
+    executionStatus: 'ok',
+    tokenUsage: { input: 100, output: 50 },
+    durationMs: 1200,
+    ...overrides,
+  };
+}
+
+describe('results report', () => {
+  let tempDir: string;
+
+  beforeEach(() => {
+    tempDir = mkdtempSync(path.join(tmpdir(), 'agentv-report-test-'));
+  });
+
+  afterEach(() => {
+    rmSync(tempDir, { recursive: true, force: true });
+  });
+
+  it('derives default report path from the run workspace', () => {
+    const sourceFile = path.join(tempDir, 'run', 'index.jsonl');
+    expect(deriveReportPath(sourceFile)).toBe(path.join(tempDir, 'run', 'report.html'));
+  });
+
+  it('loads benchmark eval file metadata from a run workspace', async () => {
+    const runDir = path.join(tempDir, 'run');
+    await writeArtifactsFromResults([makeResult()], runDir, { evalFile: 'evals/demo.eval.yaml' });
+
+    const loaded = await loadReportSource(runDir, tempDir);
+
+    expect(loaded.results).toHaveLength(1);
+    expect(loaded.benchmarkEvalFile).toBe('demo');
+  });
+
+  it('writes a static HTML report with grouped eval files and assertion type badges', async () => {
+    const runDir = path.join(tempDir, 'run');
+    await writeArtifactsFromResults(
+      [
+        makeResult({
+          testId: 'registry-pass',
+          target: 'claude-sonnet',
+          scores: [
+            makeScore('contains', 'contains', 1, [
+              { text: 'mentions registry', passed: true, evidence: 'registry present' },
+            ]),
+          ],
+        }),
+        makeResult({
+          testId: 'billing-fail',
+          target: 'gpt-5.4',
+          score: 0.2,
+          executionStatus: 'quality_failure',
+          scores: [
+            makeScore('regex', 'regex', 0.2, [
+              { text: 'matches invoice pattern', passed: false, evidence: 'no invoice id' },
+            ]),
+          ],
+        }),
+      ],
+      runDir,
+      { evalFile: 'evals/demo.eval.yaml' },
+    );
+
+    const indexPath = path.join(runDir, 'index.jsonl');
+    const lines = readFileSync(indexPath, 'utf8')
+      .trim()
+      .split('\n')
+      .map((line) => JSON.parse(line) as Record<string, unknown>);
+    lines[0].eval_file = 'cw-freight-boolean-registry';
+    lines[1].eval_file = 'cw-freight-billing';
+    writeFileSync(indexPath, `${lines.map((line) => JSON.stringify(line)).join('\n')}\n`, 'utf8');
+
+    const { outputPath } = await writeResultsReport(runDir, undefined, tempDir);
+    const html = readFileSync(outputPath, 'utf8');
+
+    expect(outputPath).toBe(path.join(runDir, 'report.html'));
+    expect(html).not.toContain('__DATA_PLACEHOLDER__');
+    expect(html).toContain('#030712');
+    expect(html).toContain('cw-freight-boolean-registry');
+    expect(html).toContain('cw-freight-billing');
+    expect(html).toContain('contains');
+    expect(html).toContain('regex');
+    expect(html).toContain('AgentV Evaluation Report');
+    expect(html).not.toContain('<th>Progress</th>');
+    expect(html).not.toContain('metric-stack');
+    expect(html).toContain('<span class="pass-rate-track">');
+    expect(html).toContain('<span class="pass-rate-label">${formatPercent(rate)}</span>');
+    expect(html).toContain(
+      '<span class="metric-value">${escapeHtml(formatPercent(group.stats.pass_rate))}</span>',
+    );
+    expect(html).toContain('Assertions');
+    expect(html).toContain('assertion-badge');
+    expect(html).not.toContain('Grader Results');
+    expect(html).not.toContain('Evaluator Results');
+  });
+
+  it('emits an inline report script that parses successfully', async () => {
+    const runDir = path.join(tempDir, 'run');
+    await writeArtifactsFromResults([makeResult()], runDir, { evalFile: 'evals/demo.eval.yaml' });
+
+    const { outputPath } = await writeResultsReport(runDir, undefined, tempDir);
+    const html = readFileSync(outputPath, 'utf8');
+    const script = html.match(/<script>([\s\S]*)<\/script>/)?.[1];
+
+    expect(script).toBeString();
+
+    const app = { innerHTML: '' };
+    const headerMeta = { innerHTML: '' };
+    const tabNav = { classList: { add: () => undefined, remove: () => undefined } };
+    const tabButton = {
+      getAttribute: () => 'overview',
+      classList: { toggle: () => undefined },
+      addEventListener: () => undefined,
+    };
+
+    expect(() =>
+      vm.runInNewContext(script as string, {
+        console,
+        document: {
+          documentElement: { classList: { contains: () => false, toggle: () => undefined } },
+          getElementById(id: string) {
+            if (id === 'app') return app;
+            if (id === 'header-meta') return headerMeta;
+            if (id === 'tab-nav') return tabNav;
+            if (id === 'theme-btn') return { addEventListener: () => undefined };
+            return null;
+          },
+          querySelectorAll(selector: string) {
+            return selector === '.tab' ? [tabButton] : [];
+          },
+        },
+      }),
+    ).not.toThrow();
+  });
+});
diff --git a/apps/web/src/assets/screenshots/results-report-details.png b/apps/web/src/assets/screenshots/results-report-details.png
new file mode 100644
index 000000000..ed01df98d
Binary files /dev/null and b/apps/web/src/assets/screenshots/results-report-details.png differ
diff --git a/apps/web/src/assets/screenshots/results-report-overview.png b/apps/web/src/assets/screenshots/results-report-overview.png
new file mode 100644
index 000000000..50ad72318
Binary files /dev/null and b/apps/web/src/assets/screenshots/results-report-overview.png differ
diff --git a/apps/web/src/content/docs/docs/guides/human-review.mdx b/apps/web/src/content/docs/docs/guides/human-review.mdx
index 018aca2a0..e48f851e0 100644
--- a/apps/web/src/content/docs/docs/guides/human-review.mdx
+++ b/apps/web/src/content/docs/docs/guides/human-review.mdx
@@ -34,16 +34,21 @@ Skip the review step for routine CI gate runs where you only need pass/fail.
 
 ### Inspect results
 
-For workspace evaluations (EVAL.yaml), use the trace viewer:
+For workspace evaluations (EVAL.yaml), inspect the run manifest and generate the HTML report from the existing workspace:
 
 ```bash
 # View traces from a specific run
 agentv inspect show results/2026-03-14T10-32-00_claude/index.jsonl
 
-# View the HTML report (if generated via #562)
+# Generate the HTML report from the run workspace
+agentv results report results/2026-03-14T10-32-00_claude
+
+# Open the generated HTML report
 open results/2026-03-14T10-32-00_claude/report.html
 ```
 
+The report itself is documented under [Results](/docs/tools/results/). Use that page for the command surface and visual walkthrough; use this page for the review loop that happens after you open it.
+
 For simple skill evaluations (evals.json), scan the results JSONL:
 
 ```bash
@@ -51,7 +56,7 @@ For simple skill evaluations (evals.json), scan the results JSONL:
 cat results/output.jsonl | jq 'select(.score < 0.8)'
 
 # Show all scores
-cat results/output.jsonl | jq '{id: .testId, score: .score, verdict: .verdict}'
+cat results/output.jsonl | jq '{id: .test_id, score: .score, verdict: .verdict}'
 ```
 
 ### Write feedback
diff --git a/apps/web/src/content/docs/docs/tools/results.mdx b/apps/web/src/content/docs/docs/tools/results.mdx
new file mode 100644
index 000000000..9c798dd32
--- /dev/null
+++ b/apps/web/src/content/docs/docs/tools/results.mdx
@@ -0,0 +1,81 @@
+---
+title: Results
+description: Inspect, export, and share AgentV result workspaces from the CLI.
+sidebar:
+  order: 6
+---
+
+import { Image } from 'astro:assets';
+import resultsReportOverview from '../../../../assets/screenshots/results-report-overview.png';
+import resultsReportDetails from '../../../../assets/screenshots/results-report-details.png';
+
+The `results` command family works on existing AgentV run workspaces and `index.jsonl` manifests. Use it after an eval run to inspect failures, validate manifests, export artifact layouts, or generate a shareable HTML report.
+
+## Subcommands
+
+| Subcommand | Purpose |
+|-----------|---------|
+| `results report` | Generate a self-contained static HTML report from an existing run workspace |
+| `results export` | Materialize or normalize the artifact workspace structure for a manifest |
+| `results summary` | Print aggregate metrics for a run |
+| `results failures` | Show only failing cases |
+| `results show` | Display case-level rows from a run workspace |
+| `results validate` | Validate that a workspace or manifest resolves correctly |
+
+## `results report`
+
+The `results report` command turns an existing run workspace or `index.jsonl` manifest into a self-contained HTML report for sharing, inspection, and human review.
+
+<Image src={resultsReportOverview} alt="AgentV results report overview showing 11 tests across 2 eval files with pass, fail, pass rate, duration, and cost summary cards" />
+
+```bash
+agentv results report <run-workspace-or-index.jsonl>
+```
+
+Examples:
+
+```bash
+# Generate report.html next to the run manifest
+agentv results report .agentv/results/runs/2026-03-14T10-32-00_claude
+
+# Use an explicit output path
+agentv results report .agentv/results/runs/2026-03-14T10-32-00_claude/index.jsonl \
+  --out ./reports/human-review.html
+```
+
+What it shows:
+
+- **Summary stats** — total tests, passed, failed, pass rate, duration, and cost
+- **Eval file groups** — test cases grouped by eval file with pass rate, test count, and duration
+- **Expandable details** — unified assertions with pass/fail indicators and type badges, collapsible input/output
+- **Criteria column** — shows the test prompt or description inline for quick scanning
+
+<Image src={resultsReportDetails} alt="AgentV results report showing an expanded failing test case with unified assertions, deterministic type badges, pass/fail indicators, evidence text, and collapsible input/output" />
+
+| Option | Description |
+|--------|-------------|
+| `--out`, `-o` | Output HTML file (defaults to `<run-dir>/report.html`) |
+| `--dir`, `-d` | Working directory used to resolve the source path |
+
+## `results export`
+
+Use `results export` when you need the artifact workspace layout itself rather than a rendered report.
+
+```bash
+agentv results export <run-workspace-or-index.jsonl> [--out <dir>]
+```
+
+This is useful when a manifest needs to be materialized into a predictable artifact tree for other tooling, review, or archiving.
+
+## Inspection helpers
+
+For lightweight terminal workflows:
+
+```bash
+agentv results summary .agentv/results/runs/<timestamp>
+agentv results failures .agentv/results/runs/<timestamp>
+agentv results show .agentv/results/runs/<timestamp> --test-id my-case
+agentv results validate .agentv/results/runs/<timestamp>
+```
+
+For a review-centric workflow built around these artifacts, see [Human Review Checkpoint](/docs/guides/human-review/).