diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
index 510e966d..5aeb6e97 100644
--- a/.github/workflows/benchmarks.yml
+++ b/.github/workflows/benchmarks.yml
@@ -9,10 +9,22 @@ on:
       - "docs/**"
       - "assets/**"
       - "LICENSE"
+  pull_request:
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "assets/**"
+      - "LICENSE"
   workflow_dispatch:
 
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
 env:
   SKIP_INSTALL_SIMPLE_GIT_HOOKS: "1"
+  HUNK_BENCHMARK_SAMPLES: ${{ github.event_name == 'pull_request' && '1' || '3' }}
 
 concurrency:
   group: benchmarks-${{ github.workflow }}-${{ github.ref }}
@@ -25,6 +37,8 @@ jobs:
     steps:
       - name: Check out repository
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
 
       - name: Set up Bun
         uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
@@ -34,36 +48,61 @@ jobs:
       - name: Install dependencies
         run: bun install --frozen-lockfile
 
-      - name: Run bootstrap benchmark
+      - name: Run head benchmarks
         run: |
           mkdir -p benchmark-results
-          bun run bench:bootstrap-load | tee benchmark-results/bootstrap-load.txt
+          bun run bench -- --samples "$HUNK_BENCHMARK_SAMPLES" --out benchmark-results/head.json \
+            | tee benchmark-results/head.txt
 
-      - name: Run highlight prefetch benchmark
+      - name: Run base benchmarks
+        if: github.event_name == 'pull_request'
         run: |
-          bun run bench:highlight-prefetch | tee benchmark-results/highlight-prefetch.txt
+          git fetch origin main
+          git worktree add ../hunk-benchmark-base origin/main
+          rm -rf ../hunk-benchmark-base/benchmarks
+          cp -R benchmarks ../hunk-benchmark-base/benchmarks
+          cd ../hunk-benchmark-base
+          bun install --frozen-lockfile
+          bun run benchmarks/run.ts --samples "$HUNK_BENCHMARK_SAMPLES" --out "$GITHUB_WORKSPACE/benchmark-results/base.json" \
+            | tee "$GITHUB_WORKSPACE/benchmark-results/base.txt"
 
-      - name: Run large stream benchmark
+      - name: Compare benchmark results
+        id: compare
+        if: github.event_name == 'pull_request'
+        continue-on-error: true
         run: |
-          bun run bench:large-stream | tee benchmark-results/large-stream.txt
+          bun run bench:compare -- \
+            --base benchmark-results/base.json \
+            --head benchmark-results/head.json \
+            --out benchmark-results/comparison.json \
+            --markdown benchmark-results/summary.md
 
       - name: Publish benchmark summary
+        if: always()
         run: |
-          {
-            echo '## Benchmark results'
-            echo
-            for file in benchmark-results/*.txt; do
-              echo "### $(basename "$file")"
+          if [ -f benchmark-results/summary.md ]; then
+            cat benchmark-results/summary.md >> "$GITHUB_STEP_SUMMARY"
+          else
+            {
+              echo '## Benchmark results'
+              echo
               echo '```text'
-              cat "$file"
+              cat benchmark-results/head.txt
               echo '```'
-              echo
-            done
-          } >> "$GITHUB_STEP_SUMMARY"
+            } >> "$GITHUB_STEP_SUMMARY"
+          fi
+
+      - name: Comment benchmark summary on PR
+        if: always() && github.event_name == 'pull_request' && hashFiles('benchmark-results/summary.md') != ''
+        continue-on-error: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: bun run bench:comment-pr -- --body benchmark-results/summary.md
 
       - name: Upload benchmark artifacts
+        if: always()
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: benchmark-results
-          path: benchmark-results/*.txt
+          path: benchmark-results/*
           if-no-files-found: error
diff --git a/CHANGELOG.md b/CHANGELOG.md
index 7f62d9f5..723c75af 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,8 @@ All notable user-visible changes to Hunk are documented in this file.
 
 ### Added
 
+- Added CI performance benchmarks with PR comparison comments to guard Hunk startup, loading, rendering, highlighting, navigation, and memory costs.
+
 ### Changed
 
 ### Fixed
diff --git a/benchmarks/README.md b/benchmarks/README.md
index 1bb5e78d..7027d596 100644
--- a/benchmarks/README.md
+++ b/benchmarks/README.md
@@ -1,28 +1,106 @@
 # Benchmarks
 
-Benchmark scripts, shared fixtures, and local result artifacts live here.
+Benchmark scripts, shared fixtures, and local result artifacts live here. These benchmarks protect Hunk's core promise: fast loading, fast first render, fast navigation, and predictable memory use on large diffs.
 
-## Scripts
+## Running locally
 
-- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo
-- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness
-- `large-stream.ts` — measures large split-stream first-frame and scroll cost, including note-enabled cases
-- `large-stream-profile.ts` — profiles the main pure planning stages behind the large split-stream benchmark
-- `large-stream-fixture.ts` — shared synthetic diff fixture used by the large-stream benchmarks
+Run the full benchmark suite with one JSON result file:
 
-## Running
+```bash
+bun run bench -- --samples 3 --out benchmarks/results/head.json
+```
 
-From the project root:
+Run focused scripts while iterating:
 
 ```bash
 bun run bench:bootstrap-load
+bun run bench:working-tree-load
+bun run bench:changeset-parse
+bun run bench:render-layout
 bun run bench:highlight-prefetch
 bun run bench:large-stream
 bun run bench:large-stream-profile
+bun run bench:memory
+bun run bench:competitors
+```
+
+Compare two JSON result files:
+
+```bash
+bun run bench:compare -- \
+  --base benchmarks/results/base.json \
+  --head benchmarks/results/head.json \
+  --markdown benchmarks/results/summary.md
+```
+
+## Scripts
+
+- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo, including file-pair bootstrap.
+- `working-tree-load.ts` — measures git working-tree loads across small, medium, large, many-untracked, and few-large-untracked repos.
+- `changeset-parse.ts` — measures patch normalization, Pierre parsing, patch chunking, and normalized `DiffFile` construction for many-small-files, balanced, and large-single-file patches.
+- `render-layout.ts` — measures pure split/stack row building, section geometry, and review-plan construction for many-small-files, balanced, and large-single-file streams.
+- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness.
+- `large-stream.ts` — measures large split-stream first-frame and scroll cost.
+- `large-stream-profile.ts` — optional local profiler for the main pure planning stages behind the large split-stream benchmark.
+- `memory.ts` — optional local RSS/heap profiler after fixture loading, planning, first frame, and next-hunk navigation.
+- `competitors.ts` — optional local informational comparisons against `git diff --no-ext-diff`, `delta`, `difftastic`, and `diff-so-fancy` when installed.
+- `large-stream-fixture.ts` and `lib/fixtures.ts` — shared deterministic synthetic fixtures.
+
+## Output format
+
+Each script prints `METRIC name=value` lines. `benchmarks/run.ts` repeats scripts, aggregates samples, and writes JSON:
+
+```json
+{
+  "version": 1,
+  "samplesPerBenchmark": 3,
+  "results": [
+    {
+      "name": "large-stream/cold_first_frame_ms",
+      "unit": "ms",
+      "samples": [61.2, 60.8, 62.1],
+      "median": 61.2,
+      "p75": 62.1,
+      "p95": 62.1,
+      "threshold": {
+        "maxRegressionRatio": 1.15,
+        "minAbsoluteRegression": 5
+      },
+      "comparable": true
+    }
+  ]
+}
 ```
 
-## Results
+## CI policy
+
+`.github/workflows/benchmarks.yml` runs the suite on `main`, pull requests, and manual dispatch. On pull requests it:
+
+1. Runs benchmarks on the PR revision.
+2. Checks out `origin/main` in a sibling worktree.
+3. Copies the PR benchmark harness into that base worktree so new benchmarks can compare base code during the PR that introduces them.
+4. Runs the same benchmarks on base.
+5. Compares medians and marks regressions in the PR summary without blocking the PR.
+6. Uploads raw JSON/text artifacts.
+7. Posts or updates one PR comment with a curated key-benchmark table, always including regressions and hiding noisy supporting metrics.
+
+The default CI suite intentionally excludes optional memory profiling, pure-planning profiling, and competitor comparisons to keep PR feedback fast. Pull requests use one sample per benchmark and are informational/non-blocking; `main` runs keep three samples for a more stable history. Run `bun run bench -- --include-competitors` or focused scripts locally when deeper diagnostics are needed.
+
+Initial thresholds:
+
+- Time metrics (`*_ms`): fail when PR median is more than 15% slower **and** at least 5ms slower.
+- Memory metrics (`rss`/`heap`): fail when PR median is more than 20% higher **and** at least 8MiB higher.
+- Counts, fixture sizes, availability flags, and optional competitor metrics are informational.
+
+Competitor comparisons are intentionally non-failing because installed tool versions and feature parity vary by environment.
+
+## Updating thresholds
+
+Prefer fixing regressions first. If a maintainer accepts an intentional tradeoff, update the threshold in `benchmarks/lib/benchmark-result.ts` and mention why in the PR. Keep thresholds broad enough for CI variability but tight enough to catch visible slowdowns.
 
-Use `benchmarks/results/` for local benchmark output, notes, or captured runs.
+## Noise troubleshooting
 
-The folder stays in the repo so the convention is discoverable, but local result files inside it are ignored by default.
+- Re-run failed jobs before investigating tiny deltas; thresholds include absolute tolerances to avoid failing on sub-5ms noise.
+- PTY/renderer-adjacent metrics are noisier than pure parsing/planning metrics.
+- Use `--samples 5` locally when validating borderline changes.
+- Inspect uploaded raw samples before changing thresholds.
diff --git a/benchmarks/changeset-parse.ts b/benchmarks/changeset-parse.ts
new file mode 100644
index 00000000..5925623d
--- /dev/null
+++ b/benchmarks/changeset-parse.ts
@@ -0,0 +1,59 @@
+// Benchmark raw patch parsing and normalized DiffFile construction for several diff shapes.
+import { performance } from "perf_hooks";
+import { parsePatchFiles } from "@pierre/diffs";
+import { buildDiffFile } from "../src/core/diffFile";
+import { findPatchChunk, splitPatchIntoFileChunks } from "../src/core/patch/chunks";
+import { normalizePatchText } from "../src/core/patch/normalize";
+import { createSyntheticPatch } from "./lib/fixtures";
+
+interface Scenario {
+  name: string;
+  patch: string;
+}
+
+const scenarios: Scenario[] = [
+  {
+    name: "many_small_files",
+    patch: createSyntheticPatch({ fileCount: 240, lines: 48, changedLines: 8 }),
+  },
+  {
+    name: "balanced_changeset",
+    patch: createSyntheticPatch({ fileCount: 96, lines: 220, changedLines: 48 }),
+  },
+  {
+    name: "large_single_file",
+    patch: createSyntheticPatch({ fileCount: 1, lines: 18_000, changedLines: 2_000 }),
+  },
+];
+
+function measureScenario({ name, patch }: Scenario) {
+  const normalizeStart = performance.now();
+  const normalized = normalizePatchText(patch);
+  const normalizeMs = performance.now() - normalizeStart;
+
+  const parseStart = performance.now();
+  const parsed = parsePatchFiles(normalized, "patch", true);
+  const parseMs = performance.now() - parseStart;
+
+  const splitStart = performance.now();
+  const chunks = splitPatchIntoFileChunks(normalized);
+  const splitMs = performance.now() - splitStart;
+
+  const files = parsed.flatMap((entry) => entry.files);
+  const buildStart = performance.now();
+  const diffFiles = files.map((metadata, index) =>
+    buildDiffFile(metadata, findPatchChunk(metadata, chunks, index), index, name, null),
+  );
+  const buildMs = performance.now() - buildStart;
+
+  console.log(`METRIC ${name}_normalize_patch_ms=${normalizeMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_parse_patch_ms=${parseMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_split_chunks_ms=${splitMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_build_diff_files_ms=${buildMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_files=${diffFiles.length}`);
+  console.log(`METRIC ${name}_patch_bytes=${Buffer.byteLength(normalized)}`);
+}
+
+for (const scenario of scenarios) {
+  measureScenario(scenario);
+}
diff --git a/benchmarks/comment-pr.ts b/benchmarks/comment-pr.ts
new file mode 100644
index 00000000..387df039
--- /dev/null
+++ b/benchmarks/comment-pr.ts
@@ -0,0 +1,97 @@
+#!/usr/bin/env bun
+import { readFileSync } from "node:fs";
+
+const marker = "<!-- hunk-benchmark-comment -->";
+
+function requireEnv(name: string) {
+  const value = process.env[name];
+  if (!value) {
+    throw new Error(`Missing ${name}`);
+  }
+  return value;
+}
+
+function parseArgs(args: string[]) {
+  for (let index = 0; index < args.length; index += 1) {
+    if (args[index] === "--body") {
+      const value = args[index + 1];
+      if (!value) {
+        throw new Error("Missing value for --body");
+      }
+      return { bodyPath: value };
+    }
+  }
+
+  throw new Error("Usage: bun run benchmarks/comment-pr.ts --body benchmark-results/summary.md");
+}
+
+async function githubRequest(path: string, init: RequestInit = {}) {
+  const token = requireEnv("GITHUB_TOKEN");
+  const response = await fetch(`https://api.github.com${path}`, {
+    ...init,
+    headers: {
+      Accept: "application/vnd.github+json",
+      Authorization: `Bearer ${token}`,
+      "X-GitHub-Api-Version": "2022-11-28",
+      ...init.headers,
+    },
+  });
+
+  if (!response.ok) {
+    const text = await response.text();
+    throw new Error(
+      `GitHub API ${init.method ?? "GET"} ${path} failed: ${response.status} ${text}`,
+    );
+  }
+
+  return response.status === 204 ? null : response.json();
+}
+
+/** Fetch every issue comment page so the marker lookup can update old bot comments. */
+async function fetchAllComments(repository: string, pullRequestNumber: number) {
+  const comments: Array<{ id: number; body?: string }> = [];
+
+  for (let page = 1; ; page += 1) {
+    const batch = (await githubRequest(
+      `/repos/${repository}/issues/${pullRequestNumber}/comments?per_page=100&page=${page}`,
+    )) as Array<{ id: number; body?: string }>;
+
+    comments.push(...batch);
+
+    if (batch.length < 100) {
+      return comments;
+    }
+  }
+}
+
+const { bodyPath } = parseArgs(Bun.argv.slice(2));
+const repository = requireEnv("GITHUB_REPOSITORY");
+const eventPath = requireEnv("GITHUB_EVENT_PATH");
+const event = JSON.parse(readFileSync(eventPath, "utf8")) as { pull_request?: { number: number } };
+const pullRequestNumber = event.pull_request?.number;
+
+if (!pullRequestNumber) {
+  console.log("No pull request in event payload; skipping benchmark comment.");
+  process.exit(0);
+}
+
+const body = readFileSync(bodyPath, "utf8");
+const comments = await fetchAllComments(repository, pullRequestNumber);
+const existing = comments.find((comment) => comment.body?.includes(marker));
+
+if (existing) {
+  await githubRequest(`/repos/${repository}/issues/comments/${existing.id}`, {
+    method: "PATCH",
+    body: JSON.stringify({ body }),
+  });
+  console.log(`Updated benchmark comment ${existing.id}.`);
+} else {
+  const created = (await githubRequest(
+    `/repos/${repository}/issues/${pullRequestNumber}/comments`,
+    {
+      method: "POST",
+      body: JSON.stringify({ body }),
+    },
+  )) as { id: number };
+  console.log(`Created benchmark comment ${created.id}.`);
+}
diff --git a/benchmarks/compare.ts b/benchmarks/compare.ts
new file mode 100644
index 00000000..f66fd45d
--- /dev/null
+++ b/benchmarks/compare.ts
@@ -0,0 +1,293 @@
+#!/usr/bin/env bun
+import { mkdirSync, readFileSync, writeFileSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import type {
+  BenchmarkComparisonResult,
+  BenchmarkComparisonRow,
+  BenchmarkMetricResult,
+  BenchmarkRunResult,
+} from "./lib/benchmark-result";
+
+interface CompareOptions {
+  base: string;
+  head: string;
+  out?: string;
+  markdown?: string;
+}
+
+function readArgValue(args: string[], index: number) {
+  const value = args[index + 1];
+  if (!value) {
+    throw new Error(`Missing value for ${args[index]}`);
+  }
+  return value;
+}
+
+function parseArgs(args: string[]): CompareOptions {
+  const options: Partial<CompareOptions> = {};
+
+  for (let index = 0; index < args.length; index += 1) {
+    const arg = args[index]!;
+
+    if (arg === "--base") {
+      options.base = readArgValue(args, index);
+      index += 1;
+      continue;
+    }
+
+    if (arg === "--head") {
+      options.head = readArgValue(args, index);
+      index += 1;
+      continue;
+    }
+
+    if (arg === "--out") {
+      options.out = readArgValue(args, index);
+      index += 1;
+      continue;
+    }
+
+    if (arg === "--markdown") {
+      options.markdown = readArgValue(args, index);
+      index += 1;
+      continue;
+    }
+
+    throw new Error(`Unknown benchmark compare argument: ${arg}`);
+  }
+
+  if (!options.base || !options.head) {
+    throw new Error(
+      "Usage: bun run benchmarks/compare.ts --base base.json --head head.json [--out compare.json] [--markdown summary.md]",
+    );
+  }
+
+  return options as CompareOptions;
+}
+
+function readRun(path: string): BenchmarkRunResult {
+  return JSON.parse(readFileSync(path, "utf8")) as BenchmarkRunResult;
+}
+
+function compareMetric(
+  base: BenchmarkMetricResult | undefined,
+  head: BenchmarkMetricResult | undefined,
+) {
+  if (!base && !head) {
+    throw new Error("Cannot compare two missing metrics");
+  }
+
+  const metric = head ?? base!;
+  const baseMedian = base?.median ?? 0;
+  const headMedian = head?.median ?? 0;
+  const absoluteDelta = headMedian - baseMedian;
+  const relativeDelta = baseMedian === 0 ? 0 : absoluteDelta / baseMedian;
+
+  let status: BenchmarkComparisonRow["status"] = "pass";
+  if (!base) {
+    status = "missing-base";
+  } else if (!head) {
+    status = "missing-head";
+  } else if (!metric.comparable || metric.name.includes("competitor_")) {
+    status = "informational";
+  } else if (
+    metric.threshold &&
+    headMedian > baseMedian * metric.threshold.maxRegressionRatio &&
+    absoluteDelta > metric.threshold.minAbsoluteRegression
+  ) {
+    status = "fail";
+  }
+
+  return {
+    name: metric.name,
+    unit: metric.unit,
+    baseMedian,
+    headMedian,
+    absoluteDelta,
+    relativeDelta,
+    threshold: metric.threshold,
+    status,
+    source: metric.source,
+  } satisfies BenchmarkComparisonRow;
+}
+
+function formatNumber(value: number, unit: BenchmarkComparisonRow["unit"]) {
+  if (unit === "bytes") {
+    const mib = value / (1024 * 1024);
+    return `${mib.toFixed(1)} MiB`;
+  }
+
+  if (unit === "ms") {
+    return `${value.toFixed(value >= 100 ? 1 : 2)} ms`;
+  }
+
+  if (unit === "boolean") {
+    return value ? "yes" : "no";
+  }
+
+  return value.toFixed(Number.isInteger(value) ? 0 : 2);
+}
+
+function formatDelta(row: BenchmarkComparisonRow) {
+  const sign = row.absoluteDelta >= 0 ? "+" : "";
+  const relative = row.baseMedian === 0 ? "n/a" : `${sign}${(row.relativeDelta * 100).toFixed(1)}%`;
+  return `${sign}${formatNumber(row.absoluteDelta, row.unit)} (${relative})`;
+}
+
+function formatThreshold(row: BenchmarkComparisonRow) {
+  if (!row.threshold) {
+    return "—";
+  }
+
+  return `+${((row.threshold.maxRegressionRatio - 1) * 100).toFixed(0)}% and +${formatNumber(row.threshold.minAbsoluteRegression, row.unit)}`;
+}
+
+function statusIcon(status: BenchmarkComparisonRow["status"]) {
+  switch (status) {
+    case "pass":
+      return "✅";
+    case "fail":
+      return "❌";
+    case "informational":
+      return "ℹ️";
+    case "missing-base":
+    case "missing-head":
+      return "⚠️";
+  }
+}
+
+const keyBenchmarkNames = new Set([
+  "bootstrap-load/git_bootstrap_ms",
+  "bootstrap-load/file_pair_bootstrap_ms",
+  "working-tree-load/small_worktree_load_ms",
+  "working-tree-load/medium_worktree_load_ms",
+  "working-tree-load/large_worktree_load_ms",
+  "working-tree-load/untracked_many_small_load_ms",
+  "working-tree-load/untracked_few_large_load_ms",
+  "changeset-parse/many_small_files_parse_patch_ms",
+  "changeset-parse/balanced_changeset_parse_patch_ms",
+  "changeset-parse/large_single_file_parse_patch_ms",
+  "render-layout/many_small_files_review_plan_ms",
+  "render-layout/balanced_stream_review_plan_ms",
+  "render-layout/large_single_file_review_plan_ms",
+  "large-stream/cold_first_frame_ms",
+  "large-stream/warm_first_frame_ms",
+  "large-stream/windowed_scroll_ticks_ms",
+  "large-stream-profile/section_geometry_ms",
+  "large-stream-profile/review_plan_ms",
+  "highlight-prefetch/selected_startup_ms",
+  "highlight-prefetch/next_file_ready_ms",
+  "memory/first_frame_ms",
+  "memory/next_hunk_navigation_ms",
+  "memory/after_first_frame_rss_bytes",
+  "memory/after_navigation_rss_bytes",
+]);
+
+/** Keep PR comments readable while all metrics remain enforced and available as artifacts. */
+function selectDisplayedComparableRows(rows: BenchmarkComparisonRow[]) {
+  const displayed = new Map<string, BenchmarkComparisonRow>();
+
+  for (const row of rows) {
+    if (row.status === "fail" || row.status === "missing-head" || keyBenchmarkNames.has(row.name)) {
+      displayed.set(row.name, row);
+    }
+  }
+
+  return [...displayed.values()].sort((left, right) => left.name.localeCompare(right.name));
+}
+
+function competitorTimingRows(rows: BenchmarkComparisonRow[]) {
+  return rows.filter(
+    (row) =>
+      row.status === "informational" &&
+      row.name.includes("/competitor_") &&
+      row.name.endsWith("_ms"),
+  );
+}
+
+function buildMarkdown(comparison: BenchmarkComparisonResult) {
+  const comparableRows = comparison.rows.filter((row) => row.status !== "informational");
+  const displayedComparableRows = selectDisplayedComparableRows(comparableRows);
+  const hiddenComparableCount = comparableRows.length - displayedComparableRows.length;
+  const displayedCompetitorRows = competitorTimingRows(comparison.rows);
+  const lines = [
+    "<!-- hunk-benchmark-comment -->",
+    "## Hunk benchmark results",
+    "",
+    comparison.failed
+      ? "❌ One or more benchmarks regressed beyond the configured threshold."
+      : "✅ Benchmarks are within the configured thresholds.",
+    "",
+    `Base: \`${comparison.baseSha?.slice(0, 12) ?? "unknown"}\` · Head: \`${comparison.headSha?.slice(0, 12) ?? "unknown"}\``,
+    "",
+    "### Key Hunk benchmarks",
+    "",
+    "| Benchmark | Base median | PR median | Delta | Threshold | Status |",
+    "|---|---:|---:|---:|---:|:---:|",
+  ];
+
+  for (const row of displayedComparableRows) {
+    lines.push(
+      `| ${row.name} | ${formatNumber(row.baseMedian, row.unit)} | ${formatNumber(row.headMedian, row.unit)} | ${formatDelta(row)} | ${formatThreshold(row)} | ${statusIcon(row.status)} |`,
+    );
+  }
+
+  if (hiddenComparableCount > 0) {
+    lines.push(
+      "",
+      `${hiddenComparableCount} additional comparable Hunk metrics were checked but hidden to keep this comment readable. See the workflow artifacts for full JSON and text output.`,
+    );
+  }
+
+  if (displayedCompetitorRows.length > 0) {
+    lines.push("", "### Informational competitor comparison", "");
+    lines.push("| Benchmark | Base median | PR median | Delta | Status |");
+    lines.push("|---|---:|---:|---:|:---:|");
+    for (const row of displayedCompetitorRows) {
+      lines.push(
+        `| ${row.name} | ${formatNumber(row.baseMedian, row.unit)} | ${formatNumber(row.headMedian, row.unit)} | ${formatDelta(row)} | ${statusIcon(row.status)} |`,
+      );
+    }
+  }
+
+  lines.push("", "Raw JSON and text logs are available in the benchmark workflow artifacts.", "");
+  return lines.join("\n");
+}
+
+const options = parseArgs(Bun.argv.slice(2));
+const base = readRun(options.base);
+const head = readRun(options.head);
+const baseByName = new Map(base.results.map((result) => [result.name, result]));
+const headByName = new Map(head.results.map((result) => [result.name, result]));
+const names = new Set([...baseByName.keys(), ...headByName.keys()]);
+const rows = [...names]
+  .map((name) => compareMetric(baseByName.get(name), headByName.get(name)))
+  .sort((left, right) => left.name.localeCompare(right.name));
+
+const comparison: BenchmarkComparisonResult = {
+  version: 1,
+  generatedAt: new Date().toISOString(),
+  baseSha: base.gitSha,
+  headSha: head.gitSha,
+  failed: rows.some((row) => row.status === "fail" || row.status === "missing-head"),
+  rows,
+};
+const markdown = buildMarkdown(comparison);
+
+console.log(markdown);
+
+if (options.out) {
+  const outPath = resolve(options.out);
+  mkdirSync(dirname(outPath), { recursive: true });
+  writeFileSync(outPath, `${JSON.stringify(comparison, null, 2)}\n`);
+}
+
+if (options.markdown) {
+  const markdownPath = resolve(options.markdown);
+  mkdirSync(dirname(markdownPath), { recursive: true });
+  writeFileSync(markdownPath, markdown);
+}
+
+if (comparison.failed) {
+  process.exitCode = 1;
+}
diff --git a/benchmarks/competitors.ts b/benchmarks/competitors.ts
new file mode 100644
index 00000000..63ab61c9
--- /dev/null
+++ b/benchmarks/competitors.ts
@@ -0,0 +1,112 @@
+// Optional informational comparisons against diff-oriented CLI tools when installed.
+import { writeFileSync } from "node:fs";
+import { join } from "node:path";
+import { performance } from "perf_hooks";
+import {
+  createChangedRepo,
+  createSyntheticPatch,
+  createSyntheticSource,
+  createTemporaryDirectory,
+  git,
+} from "./lib/fixtures";
+
+interface ToolScenario {
+  metric: string;
+  command: string[];
+  stdin?: string;
+  cwd?: string;
+}
+
+function commandExists(command: string) {
+  const proc = Bun.spawnSync(["sh", "-c", `command -v ${command} >/dev/null 2>&1`], {
+    stdout: "ignore",
+    stderr: "ignore",
+  });
+  return proc.exitCode === 0;
+}
+
+function measureTool({ metric, command, stdin, cwd }: ToolScenario) {
+  const start = performance.now();
+  const proc = Bun.spawnSync(command, {
+    cwd,
+    stdin: stdin === undefined ? "ignore" : Buffer.from(stdin),
+    stdout: "ignore",
+    stderr: "pipe",
+    env: { ...process.env, NO_COLOR: "1", TERM: "xterm-256color" },
+  });
+  const duration = performance.now() - start;
+
+  if (proc.exitCode !== 0) {
+    const stderr = Buffer.from(proc.stderr).toString("utf8").trim();
+    console.log(`METRIC ${metric}_available=0`);
+    if (stderr) {
+      console.warn(`${command.join(" ")} failed: ${stderr}`);
+    }
+    return;
+  }
+
+  console.log(`METRIC ${metric}_ms=${duration.toFixed(2)}`);
+  console.log(`METRIC ${metric}_available=1`);
+}
+
+const patch = createSyntheticPatch({ fileCount: 96, lines: 180, changedLines: 36 });
+const patchFixture = createTemporaryDirectory("hunk-competitor-patch-");
+const repoFixture = createChangedRepo({ fileCount: 96, lines: 180, changedLines: 36 });
+
+try {
+  const patchPath = join(patchFixture.path, "large.patch");
+  const beforePath = join(patchFixture.path, "before.ts");
+  const afterPath = join(patchFixture.path, "after.ts");
+  writeFileSync(patchPath, patch);
+  writeFileSync(
+    beforePath,
+    createSyntheticSource(1, false, { lines: 12_000, changedLines: 2_000 }),
+  );
+  writeFileSync(afterPath, createSyntheticSource(1, true, { lines: 12_000, changedLines: 2_000 }));
+
+  measureTool({
+    metric: "competitor_git_diff_no_ext_diff",
+    command: ["git", "diff", "--no-ext-diff", "--no-color"],
+    cwd: repoFixture.path,
+  });
+
+  // Warm git's object lookup so the metric above still validates the fixture even if not compared.
+  git(repoFixture.path, "status", "--short");
+
+  if (commandExists("delta")) {
+    measureTool({
+      metric: "competitor_delta_patch_stdin",
+      command: ["delta", "--no-gitconfig", "--paging=never"],
+      stdin: patch,
+    });
+  } else {
+    console.log("METRIC competitor_delta_patch_stdin_available=0");
+  }
+
+  if (commandExists("difft")) {
+    measureTool({
+      metric: "competitor_difftastic_file_pair",
+      command: ["difft", "--color=never", beforePath, afterPath],
+    });
+  } else if (commandExists("difftastic")) {
+    measureTool({
+      metric: "competitor_difftastic_file_pair",
+      command: ["difftastic", "--color=never", beforePath, afterPath],
+    });
+  } else {
+    console.log("METRIC competitor_difftastic_file_pair_available=0");
+  }
+
+  if (commandExists("diff-so-fancy")) {
+    measureTool({
+      metric: "competitor_diff_so_fancy_patch_stdin",
+      command: ["diff-so-fancy"],
+      stdin: patch,
+    });
+  } else {
+    console.log("METRIC competitor_diff_so_fancy_patch_stdin_available=0");
+  }
+} finally {
+  patchFixture.cleanup();
+  repoFixture.cleanup();
+}
diff --git a/benchmarks/large-stream-fixture.ts b/benchmarks/large-stream-fixture.ts
index 0a95c184..386a3e31 100644
--- a/benchmarks/large-stream-fixture.ts
+++ b/benchmarks/large-stream-fixture.ts
@@ -3,37 +3,19 @@ import type { AppBootstrap, DiffFile } from "../src/core/types";
 
 export const DEFAULT_FILE_COUNT = 180;
 export const DEFAULT_LINES_PER_FILE = 120;
-export const DEFAULT_NOTES_PER_FILE = 2;
-
 interface LargeSplitStreamFixtureOptions {
   fileCount?: number;
   linesPerFile?: number;
-  notesPerFile?: number;
-}
-
-function createAgentAnnotations(index: number, notesPerFile: number) {
-  if (notesPerFile <= 0) {
-    return [];
-  }
-
-  return Array.from({ length: notesPerFile }, (_, noteIndex) => {
-    const startLine = 40 + noteIndex * 12;
-    const endLine = startLine + 5;
-    return {
-      id: `note:${index}:${noteIndex}`,
-      newRange: [startLine, endLine] as [number, number],
-      summary: `Explain the split-mode refactor in file ${index}, hunk note ${noteIndex + 1}.`,
-      rationale:
-        "Synthetic benchmark note to exercise inline note placement, guide rows, and note-enabled full-stream rendering.",
-    };
-  });
+  changedStartLine?: number;
+  changedEndLine?: number;
 }
 
 export function createLargeSplitDiffFile(
   index: number,
   {
     linesPerFile = DEFAULT_LINES_PER_FILE,
-    notesPerFile = 0,
+    changedStartLine = 37,
+    changedEndLine = 84,
   }: Omit<LargeSplitStreamFixtureOptions, "fileCount"> = {},
 ): DiffFile {
   const path = `src/stream${index}.ts`;
@@ -44,7 +26,7 @@ export function createLargeSplitDiffFile(
 
   const after = Array.from({ length: linesPerFile }, (_, lineIndex) => {
     const line = lineIndex + 1;
-    if (lineIndex >= 36 && lineIndex < 84) {
+    if (line >= changedStartLine && line <= changedEndLine) {
       return `export function stream${index}_${line}(value: number) { return value * ${line} + ${index}; }\n`;
     }
 
@@ -66,40 +48,40 @@ export function createLargeSplitDiffFile(
     true,
   );
 
-  const annotations = createAgentAnnotations(index, notesPerFile);
-
   return {
     id: `stream:${index}`,
     path,
     patch: "",
     language: "typescript",
-    stats: { additions: 48, deletions: 48 },
+    stats: {
+      additions: Math.max(0, changedEndLine - changedStartLine + 1),
+      deletions: Math.max(0, changedEndLine - changedStartLine + 1),
+    },
     metadata,
-    agent:
-      annotations.length > 0
-        ? {
-            path,
-            summary: `Synthetic note-heavy benchmark context for ${path}`,
-            annotations,
-          }
-        : null,
+    agent: null,
   };
 }
 
 export function createLargeSplitStreamFiles({
   fileCount = DEFAULT_FILE_COUNT,
   linesPerFile = DEFAULT_LINES_PER_FILE,
-  notesPerFile = 0,
+  changedStartLine,
+  changedEndLine,
 }: LargeSplitStreamFixtureOptions = {}) {
   return Array.from({ length: fileCount }, (_, index) =>
-    createLargeSplitDiffFile(index + 1, { linesPerFile, notesPerFile }),
+    createLargeSplitDiffFile(index + 1, {
+      linesPerFile,
+      changedStartLine,
+      changedEndLine,
+    }),
   );
 }
 
 export function createLargeSplitStreamBootstrap({
   fileCount = DEFAULT_FILE_COUNT,
   linesPerFile = DEFAULT_LINES_PER_FILE,
-  notesPerFile = 0,
+  changedStartLine,
+  changedEndLine,
 }: LargeSplitStreamFixtureOptions = {}): AppBootstrap {
   return {
     input: {
@@ -110,13 +92,18 @@ export function createLargeSplitStreamBootstrap({
       },
     },
     changeset: {
-      id: `changeset:large-split-stream:${fileCount}:${linesPerFile}:${notesPerFile}`,
+      id: `changeset:large-split-stream:${fileCount}:${linesPerFile}`,
       sourceLabel: "repo",
       title: "repo working tree",
-      files: createLargeSplitStreamFiles({ fileCount, linesPerFile, notesPerFile }),
+      files: createLargeSplitStreamFiles({
+        fileCount,
+        linesPerFile,
+        changedStartLine,
+        changedEndLine,
+      }),
     },
     initialMode: "split",
     initialTheme: "midnight",
-    initialShowAgentNotes: notesPerFile > 0,
+    initialShowAgentNotes: false,
   };
 }
diff --git a/benchmarks/large-stream-profile.ts b/benchmarks/large-stream-profile.ts
index aa8ee4b7..fa77419a 100644
--- a/benchmarks/large-stream-profile.ts
+++ b/benchmarks/large-stream-profile.ts
@@ -9,20 +9,10 @@ import {
   createLargeSplitStreamFiles,
   DEFAULT_FILE_COUNT,
   DEFAULT_LINES_PER_FILE,
-  DEFAULT_NOTES_PER_FILE,
 } from "./large-stream-fixture";
 
 const theme = resolveTheme("midnight", null);
-const windowedFiles = createLargeSplitStreamFiles({ notesPerFile: 0 });
-const noteFiles = createLargeSplitStreamFiles({ notesPerFile: DEFAULT_NOTES_PER_FILE });
-
-function visibleAgentNotesForFile(file: (typeof noteFiles)[number]) {
-  const annotations = file.agent?.annotations ?? [];
-  return annotations.map((annotation, index) => ({
-    id: `annotation:${file.id}:${annotation.id ?? index}`,
-    annotation,
-  }));
-}
+const windowedFiles = createLargeSplitStreamFiles();
 
 function measureMs(run: () => void) {
   const start = performance.now();
@@ -43,24 +33,23 @@ const splitRowsMs = measureMs(() => {
   });
 });
 
-let notePlannedRows = 0;
-const noteReviewPlanMs = measureMs(() => {
-  noteFiles.forEach((file) => {
+let plannedRows = 0;
+const reviewPlanMs = measureMs(() => {
+  windowedFiles.forEach((file) => {
     const rows = buildSplitRows(file, null, theme);
-    notePlannedRows += buildReviewRenderPlan({
+    plannedRows += buildReviewRenderPlan({
       fileId: file.id,
       rows,
       showHunkHeaders: true,
-      visibleAgentNotes: visibleAgentNotesForFile(file),
+      visibleAgentNotes: [],
     }).length;
   });
 });
 
 console.log(`METRIC section_geometry_ms=${sectionGeometryMs.toFixed(2)}`);
 console.log(`METRIC split_rows_ms=${splitRowsMs.toFixed(2)}`);
-console.log(`METRIC note_review_plan_ms=${noteReviewPlanMs.toFixed(2)}`);
+console.log(`METRIC review_plan_ms=${reviewPlanMs.toFixed(2)}`);
 console.log(`METRIC split_rows=${windowedRows}`);
-console.log(`METRIC note_planned_rows=${notePlannedRows}`);
+console.log(`METRIC planned_rows=${plannedRows}`);
 console.log(`METRIC files=${DEFAULT_FILE_COUNT}`);
 console.log(`METRIC lines_per_file=${DEFAULT_LINES_PER_FILE}`);
-console.log(`METRIC notes_per_file=${DEFAULT_NOTES_PER_FILE}`);
diff --git a/benchmarks/large-stream.ts b/benchmarks/large-stream.ts
index 21c739c3..3777ad30 100644
--- a/benchmarks/large-stream.ts
+++ b/benchmarks/large-stream.ts
@@ -1,5 +1,4 @@
-// Benchmark split-mode startup and scroll behaviour on very large review streams,
-// including note-enabled cases that disable the placeholder windowing path.
+// Benchmark split-mode startup and scroll behaviour on very large review streams.
 import { performance } from "perf_hooks";
 import React from "react";
 import { testRender } from "@opentui/react/test-utils";
@@ -9,14 +8,13 @@ import {
   createLargeSplitStreamBootstrap,
   DEFAULT_FILE_COUNT,
   DEFAULT_LINES_PER_FILE,
-  DEFAULT_NOTES_PER_FILE,
 } from "./large-stream-fixture";
 
 const VIEWPORT = {
   width: 240,
   height: 28,
 } as const;
-const SCROLL_TICKS = 18;
+const SCROLL_TICKS = 4;
 const SCROLL_TARGET = {
   x: 170,
   y: 12,
@@ -67,10 +65,10 @@ async function destroyRenderer(setup: BenchmarkRenderer) {
   });
 }
 
-async function measureFirstFrameMs(notesPerFile: number) {
+async function measureFirstFrameMs() {
   const setup = await testRender(
     React.createElement(AppHost, {
-      bootstrap: createLargeSplitStreamBootstrap({ notesPerFile }),
+      bootstrap: createLargeSplitStreamBootstrap(),
     }),
     VIEWPORT,
   );
@@ -85,10 +83,10 @@ async function measureFirstFrameMs(notesPerFile: number) {
   }
 }
 
-async function measureScrollTicksMs(notesPerFile: number) {
+async function measureScrollTicksMs() {
   const setup = await testRender(
     React.createElement(AppHost, {
-      bootstrap: createLargeSplitStreamBootstrap({ notesPerFile }),
+      bootstrap: createLargeSplitStreamBootstrap(),
     }),
     VIEWPORT,
   );
@@ -112,18 +110,13 @@ async function measureScrollTicksMs(notesPerFile: number) {
   }
 }
 
-const coldFirstFrameMs = await measureFirstFrameMs(0);
-const warmFirstFrameMs = await measureFirstFrameMs(0);
-const noteFirstFrameMs = await measureFirstFrameMs(DEFAULT_NOTES_PER_FILE);
-const windowedScrollMs = await measureScrollTicksMs(0);
-const noteScrollMs = await measureScrollTicksMs(DEFAULT_NOTES_PER_FILE);
+const coldFirstFrameMs = await measureFirstFrameMs();
+const warmFirstFrameMs = await measureFirstFrameMs();
+const windowedScrollMs = await measureScrollTicksMs();
 
 console.log(`METRIC cold_first_frame_ms=${coldFirstFrameMs.toFixed(2)}`);
 console.log(`METRIC warm_first_frame_ms=${warmFirstFrameMs.toFixed(2)}`);
-console.log(`METRIC note_first_frame_ms=${noteFirstFrameMs.toFixed(2)}`);
 console.log(`METRIC windowed_scroll_ticks_ms=${windowedScrollMs.toFixed(2)}`);
-console.log(`METRIC note_scroll_ticks_ms=${noteScrollMs.toFixed(2)}`);
 console.log(`METRIC scroll_ticks=${SCROLL_TICKS}`);
 console.log(`METRIC files=${DEFAULT_FILE_COUNT}`);
 console.log(`METRIC lines_per_file=${DEFAULT_LINES_PER_FILE}`);
-console.log(`METRIC notes_per_file=${DEFAULT_NOTES_PER_FILE}`);
diff --git a/benchmarks/lib/benchmark-result.ts b/benchmarks/lib/benchmark-result.ts
new file mode 100644
index 00000000..dc0e8a26
--- /dev/null
+++ b/benchmarks/lib/benchmark-result.ts
@@ -0,0 +1,122 @@
+export interface BenchmarkThreshold {
+  maxRegressionRatio: number;
+  minAbsoluteRegression: number;
+}
+
+export interface BenchmarkMetricResult {
+  name: string;
+  unit: "ms" | "bytes" | "count" | "ratio" | "boolean";
+  samples: number[];
+  median: number;
+  p75: number;
+  p95: number;
+  min: number;
+  max: number;
+  threshold?: BenchmarkThreshold;
+  comparable: boolean;
+  source: string;
+}
+
+export interface BenchmarkRunResult {
+  version: 1;
+  generatedAt: string;
+  gitSha?: string;
+  samplesPerBenchmark: number;
+  results: BenchmarkMetricResult[];
+}
+
+export interface BenchmarkComparisonRow {
+  name: string;
+  unit: BenchmarkMetricResult["unit"];
+  baseMedian: number;
+  headMedian: number;
+  absoluteDelta: number;
+  relativeDelta: number;
+  threshold?: BenchmarkThreshold;
+  status: "pass" | "fail" | "missing-base" | "missing-head" | "informational";
+  source: string;
+}
+
+export interface BenchmarkComparisonResult {
+  version: 1;
+  generatedAt: string;
+  baseSha?: string;
+  headSha?: string;
+  failed: boolean;
+  rows: BenchmarkComparisonRow[];
+}
+
+/** Return percentile values using nearest-rank indexing over sorted samples. */
+export function percentile(samples: number[], percentileValue: number) {
+  if (samples.length === 0) {
+    return 0;
+  }
+
+  const sorted = [...samples].sort((left, right) => left - right);
+  const index = Math.min(
+    sorted.length - 1,
+    Math.max(0, Math.ceil((percentileValue / 100) * sorted.length) - 1),
+  );
+  return sorted[index]!;
+}
+
+/** Infer display and comparison metadata from the metric name emitted by a script. */
+export function classifyMetric(
+  name: string,
+): Pick<BenchmarkMetricResult, "unit" | "comparable" | "threshold"> {
+  if (name.startsWith("competitor_")) {
+    return { unit: "ms", comparable: false };
+  }
+
+  if (name.endsWith("_ms")) {
+    return {
+      unit: "ms",
+      comparable: true,
+      threshold: { maxRegressionRatio: 1.15, minAbsoluteRegression: 5 },
+    };
+  }
+
+  if (
+    name.startsWith("is_") ||
+    name.endsWith("_ready_before_move") ||
+    name.endsWith("_available")
+  ) {
+    return { unit: "boolean", comparable: false };
+  }
+
+  if (name.includes("rss") || name.includes("heap")) {
+    return {
+      unit: "bytes",
+      comparable: true,
+      threshold: { maxRegressionRatio: 1.2, minAbsoluteRegression: 8 * 1024 * 1024 },
+    };
+  }
+
+  if (name.endsWith("_bytes")) {
+    return { unit: "bytes", comparable: false };
+  }
+
+  return { unit: "count", comparable: false };
+}
+
+/** Build an aggregated result from raw numeric samples. */
+export function aggregateMetric(
+  source: string,
+  name: string,
+  samples: number[],
+): BenchmarkMetricResult {
+  const classification = classifyMetric(name);
+  const sorted = [...samples].sort((left, right) => left - right);
+
+  return {
+    name: `${source}/${name}`,
+    source,
+    samples,
+    median: percentile(sorted, 50),
+    p75: percentile(sorted, 75),
+    p95: percentile(sorted, 95),
+    min: sorted[0] ?? 0,
+    max: sorted.at(-1) ?? 0,
+    ...classification,
+  };
+}
diff --git a/benchmarks/lib/fixtures.ts b/benchmarks/lib/fixtures.ts
new file mode 100644
index 00000000..f7258e4e
--- /dev/null
+++ b/benchmarks/lib/fixtures.ts
@@ -0,0 +1,136 @@
+import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs";
+import { tmpdir } from "node:os";
+import { dirname, join } from "node:path";
+import { createTwoFilesPatch } from "diff";
+
+export interface SyntheticFileOptions {
+  lines: number;
+  changedStart?: number;
+  changedLines?: number;
+  extension?: string;
+}
+
+export interface SyntheticPatchOptions extends SyntheticFileOptions {
+  fileCount: number;
+  prefix?: string;
+}
+
+export interface TemporaryDirectory {
+  path: string;
+  cleanup: () => void;
+}
+
+/** Create a temporary directory with a cleanup helper for benchmark fixtures. */
+export function createTemporaryDirectory(prefix: string): TemporaryDirectory {
+  const path = mkdtempSync(join(tmpdir(), prefix));
+  return {
+    path,
+    cleanup: () => rmSync(path, { recursive: true, force: true }),
+  };
+}
+
+/** Run git in a benchmark fixture and throw with stderr on failure. */
+export function git(cwd: string, ...cmd: string[]) {
+  const proc = Bun.spawnSync(["git", ...cmd], {
+    cwd,
+    stdout: "pipe",
+    stderr: "pipe",
+    stdin: "ignore",
+  });
+
+  if (proc.exitCode !== 0) {
+    const stderr = Buffer.from(proc.stderr).toString("utf8");
+    throw new Error(stderr.trim() || `git ${cmd.join(" ")} failed`);
+  }
+
+  return Buffer.from(proc.stdout).toString("utf8");
+}
+
+/** Generate deterministic TypeScript-like contents with a controlled changed region. */
+export function createSyntheticSource(
+  fileIndex: number,
+  changed: boolean,
+  options: SyntheticFileOptions,
+) {
+  const changedStart = options.changedStart ?? Math.floor(options.lines / 3);
+  const changedEnd =
+    changedStart + (options.changedLines ?? Math.max(4, Math.floor(options.lines / 6)));
+
+  return Array.from({ length: options.lines }, (_, lineIndex) => {
+    const line = lineIndex + 1;
+    if (changed && lineIndex >= changedStart && lineIndex < changedEnd) {
+      return `export function bench${fileIndex}_${line}(value: number) { return value * ${line} + ${fileIndex}; }\n`;
+    }
+
+    return `export function bench${fileIndex}_${line}(value: number) { return value + ${line}; }\n`;
+  }).join("");
+}
+
+/** Build one deterministic multi-file unified patch. */
+export function createSyntheticPatch({
+  fileCount,
+  lines,
+  changedStart,
+  changedLines,
+  extension = "ts",
+  prefix = "src/bench",
+}: SyntheticPatchOptions) {
+  return Array.from({ length: fileCount }, (_, index) => {
+    const fileIndex = index + 1;
+    const path = `${prefix}${fileIndex}.${extension}`;
+    const before = createSyntheticSource(fileIndex, false, { lines, changedStart, changedLines });
+    const after = createSyntheticSource(fileIndex, true, { lines, changedStart, changedLines });
+
+    const patch = createTwoFilesPatch(path, path, before, after, "", "", { context: 3 });
+    // Pierre's patch parser expects unified/git hunks; remove diff-package index banners.
+    return patch.replace(/^Index: .*\n=+\n/, "").trimEnd();
+  }).join("\n");
+}
+
+/** Create a git repo with committed files and modified tracked contents. */
+export function createChangedRepo({
+  fileCount,
+  lines,
+  changedStart,
+  changedLines,
+  extension = "ts",
+}: SyntheticPatchOptions) {
+  const fixture = createTemporaryDirectory("hunk-benchmark-repo-");
+
+  git(fixture.path, "init");
+  git(fixture.path, "config", "user.name", "Benchmark User");
+  git(fixture.path, "config", "user.email", "benchmark@example.com");
+
+  for (let index = 1; index <= fileCount; index += 1) {
+    const relativePath = join("src", `bench${index}.${extension}`);
+    const absolutePath = join(fixture.path, relativePath);
+    mkdirSync(dirname(absolutePath), { recursive: true });
+    writeFileSync(
+      absolutePath,
+      createSyntheticSource(index, false, { lines, changedStart, changedLines }),
+    );
+  }
+
+  git(fixture.path, "add", ".");
+  git(fixture.path, "commit", "-m", "initial benchmark fixture");
+
+  for (let index = 1; index <= fileCount; index += 1) {
+    const relativePath = join("src", `bench${index}.${extension}`);
+    writeFileSync(
+      join(fixture.path, relativePath),
+      createSyntheticSource(index, true, { lines, changedStart, changedLines }),
+    );
+  }
+
+  return fixture;
+}
+
+/** Add deterministic untracked files to an existing benchmark repository. */
+export function addUntrackedFiles(repoDir: string, fileCount: number, lines: number) {
+  for (let index = 1; index <= fileCount; index += 1) {
+    const relativePath = join("untracked", `new${index}.ts`);
+    const absolutePath = join(repoDir, relativePath);
+    mkdirSync(dirname(absolutePath), { recursive: true });
+    writeFileSync(absolutePath, createSyntheticSource(index, true, { lines }));
+  }
+}
diff --git a/benchmarks/memory.ts b/benchmarks/memory.ts
new file mode 100644
index 00000000..7bbb58d0
--- /dev/null
+++ b/benchmarks/memory.ts
@@ -0,0 +1,72 @@
+// Track heap/RSS pressure for loading, planning, rendering, and navigating a large diff.
+import { performance } from "perf_hooks";
+import React from "react";
+import { testRender } from "@opentui/react/test-utils";
+import { act } from "react";
+import { buildSplitRows } from "../src/ui/diff/pierre";
+import { buildReviewRenderPlan } from "../src/ui/diff/reviewRenderPlan";
+import { resolveTheme } from "../src/ui/themes";
+import { AppHost } from "../src/ui/AppHost";
+import { createLargeSplitStreamBootstrap } from "./large-stream-fixture";
+
+const viewport = { width: 240, height: 28 } as const;
+
+function printMemory(prefix: string) {
+  const usage = process.memoryUsage();
+  console.log(`METRIC ${prefix}_rss_bytes=${usage.rss}`);
+  console.log(`METRIC ${prefix}_heap_used_bytes=${usage.heapUsed}`);
+}
+
+async function renderOnce(setup: Awaited<ReturnType<typeof testRender>>) {
+  await act(async () => {
+    await setup.renderOnce();
+    await Bun.sleep(0);
+  });
+}
+
+const bootstrapStart = performance.now();
+const bootstrap = createLargeSplitStreamBootstrap({
+  fileCount: 120,
+  linesPerFile: 120,
+});
+console.log(`METRIC bootstrap_fixture_ms=${(performance.now() - bootstrapStart).toFixed(2)}`);
+printMemory("after_bootstrap");
+
+const theme = resolveTheme("midnight", null);
+let plannedRows = 0;
+const planningStart = performance.now();
+for (const file of bootstrap.changeset.files) {
+  const rows = buildSplitRows(file, null, theme);
+  plannedRows += buildReviewRenderPlan({
+    fileId: file.id,
+    rows,
+    showHunkHeaders: true,
+    visibleAgentNotes: [],
+  }).length;
+}
+console.log(`METRIC planning_ms=${(performance.now() - planningStart).toFixed(2)}`);
+console.log(`METRIC planned_rows=${plannedRows}`);
+printMemory("after_planning");
+
+const setup = await testRender(React.createElement(AppHost, { bootstrap }), viewport);
+try {
+  const firstFrameStart = performance.now();
+  await renderOnce(setup);
+  console.log(`METRIC first_frame_ms=${(performance.now() - firstFrameStart).toFixed(2)}`);
+  printMemory("after_first_frame");
+
+  const navigationStart = performance.now();
+  for (let index = 0; index < 6; index += 1) {
+    await act(async () => {
+      await setup.mockInput.typeText("]");
+      await setup.renderOnce();
+      await Bun.sleep(0);
+    });
+  }
+  console.log(`METRIC next_hunk_navigation_ms=${(performance.now() - navigationStart).toFixed(2)}`);
+  printMemory("after_navigation");
+} finally {
+  await act(async () => {
+    setup.renderer.destroy();
+  });
+}
diff --git a/benchmarks/render-layout.ts b/benchmarks/render-layout.ts
new file mode 100644
index 00000000..0b805b06
--- /dev/null
+++ b/benchmarks/render-layout.ts
@@ -0,0 +1,78 @@
+// Benchmark pure diff row/layout planning across split, stack, and size-shape cases.
+import { performance } from "perf_hooks";
+import { buildSplitRows, buildStackRows } from "../src/ui/diff/pierre";
+import { buildReviewRenderPlan } from "../src/ui/diff/reviewRenderPlan";
+import { measureDiffSectionGeometry } from "../src/ui/diff/diffSectionGeometry";
+import { resolveTheme } from "../src/ui/themes";
+import { createLargeSplitStreamFiles } from "./large-stream-fixture";
+
+const theme = resolveTheme("midnight", null);
+
+function measureMs(run: () => void) {
+  const start = performance.now();
+  run();
+  return performance.now() - start;
+}
+
+function measureScenario(name: string, files: ReturnType<typeof createLargeSplitStreamFiles>) {
+  let splitRows = 0;
+  let stackRows = 0;
+  let plannedRows = 0;
+
+  const splitRowsMs = measureMs(() => {
+    for (const file of files) {
+      splitRows += buildSplitRows(file, null, theme).length;
+    }
+  });
+
+  const stackRowsMs = measureMs(() => {
+    for (const file of files) {
+      stackRows += buildStackRows(file, null, theme).length;
+    }
+  });
+
+  const geometryMs = measureMs(() => {
+    for (const file of files) {
+      measureDiffSectionGeometry(file, "split", true, theme);
+    }
+  });
+
+  const reviewPlanMs = measureMs(() => {
+    for (const file of files) {
+      const rows = buildSplitRows(file, null, theme);
+      plannedRows += buildReviewRenderPlan({
+        fileId: file.id,
+        rows,
+        showHunkHeaders: true,
+        visibleAgentNotes: [],
+      }).length;
+    }
+  });
+
+  console.log(`METRIC ${name}_split_rows_ms=${splitRowsMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_stack_rows_ms=${stackRowsMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_geometry_ms=${geometryMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_review_plan_ms=${reviewPlanMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_files=${files.length}`);
+  console.log(`METRIC ${name}_split_rows=${splitRows}`);
+  console.log(`METRIC ${name}_stack_rows=${stackRows}`);
+  console.log(`METRIC ${name}_planned_rows=${plannedRows}`);
+}
+
+measureScenario(
+  "many_small_files",
+  createLargeSplitStreamFiles({ fileCount: 360, linesPerFile: 48 }),
+);
+measureScenario(
+  "balanced_stream",
+  createLargeSplitStreamFiles({ fileCount: 180, linesPerFile: 120 }),
+);
+measureScenario(
+  "large_single_file",
+  createLargeSplitStreamFiles({
+    fileCount: 1,
+    linesPerFile: 18_000,
+    changedStartLine: 1_000,
+    changedEndLine: 17_000,
+  }),
+);
diff --git a/benchmarks/run.ts b/benchmarks/run.ts
new file mode 100644
index 00000000..d8560fd9
--- /dev/null
+++ b/benchmarks/run.ts
@@ -0,0 +1,186 @@
+#!/usr/bin/env bun
+import { mkdirSync, writeFileSync } from "node:fs";
+import { dirname, resolve } from "node:path";
+import { aggregateMetric, type BenchmarkRunResult } from "./lib/benchmark-result";
+
+const defaultScripts = [
+  "bootstrap-load.ts",
+  "working-tree-load.ts",
+  "changeset-parse.ts",
+  "render-layout.ts",
+  "highlight-prefetch.ts",
+  "large-stream.ts",
+];
+
+interface RunOptions {
+  samples: number;
+  out?: string;
+  includeCompetitors: boolean;
+  scripts: string[];
+}
+
+function readArgValue(args: string[], index: number) {
+  const value = args[index + 1];
+  if (!value) {
+    throw new Error(`Missing value for ${args[index]}`);
+  }
+  return value;
+}
+
+function parseArgs(args: string[]): RunOptions {
+  const options: RunOptions = {
+    samples: Number(process.env.HUNK_BENCHMARK_SAMPLES ?? 3),
+    includeCompetitors: false,
+    scripts: [],
+  };
+
+  for (let index = 0; index < args.length; index += 1) {
+    const arg = args[index]!;
+
+    if (arg === "--samples") {
+      options.samples = Number(readArgValue(args, index));
+      index += 1;
+      continue;
+    }
+
+    if (arg === "--out") {
+      options.out = readArgValue(args, index);
+      index += 1;
+      continue;
+    }
+
+    if (arg === "--include-competitors") {
+      options.includeCompetitors = true;
+      continue;
+    }
+
+    if (arg === "--script") {
+      options.scripts.push(readArgValue(args, index));
+      index += 1;
+      continue;
+    }
+
+    throw new Error(`Unknown benchmark runner argument: ${arg}`);
+  }
+
+  if (!Number.isFinite(options.samples) || options.samples < 1) {
+    throw new Error("--samples must be a positive number");
+  }
+
+  return options;
+}
+
+function gitSha() {
+  const proc = Bun.spawnSync(["git", "rev-parse", "HEAD"], {
+    stdout: "pipe",
+    stderr: "ignore",
+    stdin: "ignore",
+  });
+
+  if (proc.exitCode !== 0) {
+    return undefined;
+  }
+
+  return Buffer.from(proc.stdout).toString("utf8").trim();
+}
+
+function parseMetrics(output: string) {
+  const metrics = new Map<string, number>();
+  const metricPattern = /^METRIC\s+([A-Za-z0-9_.:-]+)=(-?\d+(?:\.\d+)?)$/;
+
+  for (const line of output.split(/\r?\n/)) {
+    const match = metricPattern.exec(line.trim());
+    if (!match) {
+      continue;
+    }
+
+    metrics.set(match[1]!, Number(match[2]!));
+  }
+
+  return metrics;
+}
+
+async function runScript(script: string) {
+  const proc = Bun.spawn(["bun", "run", `benchmarks/${script}`], {
+    stdout: "pipe",
+    stderr: "pipe",
+    stdin: "ignore",
+    env: { ...process.env, CI: process.env.CI ?? "1" },
+  });
+
+  const [stdout, stderr, exitCode] = await Promise.all([
+    new Response(proc.stdout).text(),
+    new Response(proc.stderr).text(),
+    proc.exited,
+  ]);
+
+  if (stderr.trim()) {
+    console.warn(stderr.trim());
+  }
+
+  if (exitCode !== 0) {
+    throw new Error(`${script} failed with exit code ${exitCode}\n${stderr}`);
+  }
+
+  process.stdout.write(stdout);
+  return parseMetrics(stdout);
+}
+
+function formatValue(value: number) {
+  if (Math.abs(value) >= 100) {
+    return value.toFixed(1);
+  }
+  return value.toFixed(2);
+}
+
+const options = parseArgs(Bun.argv.slice(2));
+const scripts = options.scripts.length > 0 ? options.scripts : [...defaultScripts];
+if (options.includeCompetitors) {
+  scripts.push("competitors.ts");
+}
+
+const samplesByMetric = new Map<string, { source: string; metric: string; samples: number[] }>();
+
+for (const script of scripts) {
+  const source = script.replace(/\.ts$/, "");
+  console.log(`\n## ${source}`);
+
+  for (let sample = 1; sample <= options.samples; sample += 1) {
+    console.log(`\n# sample ${sample}/${options.samples}`);
+    const metrics = await runScript(script);
+
+    for (const [metric, value] of metrics) {
+      const key = `${source}/${metric}`;
+      const entry = samplesByMetric.get(key) ?? { source, metric, samples: [] };
+      entry.samples.push(value);
+      samplesByMetric.set(key, entry);
+    }
+  }
+}
+
+const results = [...samplesByMetric.values()]
+  .map(({ source, metric, samples }) => aggregateMetric(source, metric, samples))
+  .sort((left, right) => left.name.localeCompare(right.name));
+
+const runResult: BenchmarkRunResult = {
+  version: 1,
+  generatedAt: new Date().toISOString(),
+  gitSha: gitSha(),
+  samplesPerBenchmark: options.samples,
+  results,
+};
+
+console.log("\n## Aggregated benchmark medians");
+for (const result of results) {
+  const suffix = result.unit === "ms" ? "ms" : result.unit === "bytes" ? " bytes" : "";
+  console.log(
+    `${result.name}: median=${formatValue(result.median)}${suffix} p95=${formatValue(result.p95)}${suffix}`,
+  );
+}
+
+if (options.out) {
+  const outPath = resolve(options.out);
+  mkdirSync(dirname(outPath), { recursive: true });
+  writeFileSync(outPath, `${JSON.stringify(runResult, null, 2)}\n`);
+  console.log(`\nWrote ${outPath}`);
+}
diff --git a/benchmarks/working-tree-load.ts b/benchmarks/working-tree-load.ts
new file mode 100644
index 00000000..8476113e
--- /dev/null
+++ b/benchmarks/working-tree-load.ts
@@ -0,0 +1,68 @@
+// Benchmark git-backed working-tree loading, including untracked file handling.
+import { performance } from "perf_hooks";
+import { loadAppBootstrap } from "../src/core/loaders";
+import { addUntrackedFiles, createChangedRepo } from "./lib/fixtures";
+
+interface Scenario {
+  name: string;
+  fileCount: number;
+  lines: number;
+  untrackedFiles?: number;
+  untrackedLines?: number;
+}
+
+const scenarios: Scenario[] = [
+  { name: "small_worktree", fileCount: 16, lines: 80 },
+  { name: "medium_worktree", fileCount: 96, lines: 180 },
+  { name: "large_worktree", fileCount: 240, lines: 220 },
+  {
+    name: "untracked_many_small",
+    fileCount: 16,
+    lines: 80,
+    untrackedFiles: 120,
+    untrackedLines: 36,
+  },
+  {
+    name: "untracked_few_large",
+    fileCount: 8,
+    lines: 80,
+    untrackedFiles: 6,
+    untrackedLines: 5_000,
+  },
+];
+
+async function measureScenario(scenario: Scenario) {
+  const fixture = createChangedRepo({ fileCount: scenario.fileCount, lines: scenario.lines });
+
+  try {
+    if (scenario.untrackedFiles) {
+      addUntrackedFiles(fixture.path, scenario.untrackedFiles, scenario.untrackedLines ?? 40);
+    }
+
+    const start = performance.now();
+    const bootstrap = await loadAppBootstrap(
+      { kind: "vcs", staged: false, options: { mode: "auto" } },
+      { cwd: fixture.path },
+    );
+    const loadMs = performance.now() - start;
+    const additions = bootstrap.changeset.files.reduce(
+      (sum, file) => sum + file.stats.additions,
+      0,
+    );
+    const deletions = bootstrap.changeset.files.reduce(
+      (sum, file) => sum + file.stats.deletions,
+      0,
+    );
+
+    console.log(`METRIC ${scenario.name}_load_ms=${loadMs.toFixed(2)}`);
+    console.log(`METRIC ${scenario.name}_files=${bootstrap.changeset.files.length}`);
+    console.log(`METRIC ${scenario.name}_additions=${additions}`);
+    console.log(`METRIC ${scenario.name}_deletions=${deletions}`);
+  } finally {
+    fixture.cleanup();
+  }
+}
+
+for (const scenario of scenarios) {
+  await measureScenario(scenario);
+}
diff --git a/package.json b/package.json
index c4366970..9ea41a6f 100644
--- a/package.json
+++ b/package.json
@@ -68,10 +68,18 @@
     "publish:prebuilt:npm": "bun run ./scripts/publish-prebuilt-npm.ts",
     "update:homebrew-formula": "bun run ./scripts/update-homebrew-formula.ts",
     "prepack": "bun run build:npm",
+    "bench": "bun run benchmarks/run.ts",
+    "bench:compare": "bun run benchmarks/compare.ts",
+    "bench:comment-pr": "bun run benchmarks/comment-pr.ts",
     "bench:bootstrap-load": "bun run benchmarks/bootstrap-load.ts",
+    "bench:working-tree-load": "bun run benchmarks/working-tree-load.ts",
+    "bench:changeset-parse": "bun run benchmarks/changeset-parse.ts",
+    "bench:render-layout": "bun run benchmarks/render-layout.ts",
     "bench:highlight-prefetch": "bun run benchmarks/highlight-prefetch.ts",
     "bench:large-stream": "bun run benchmarks/large-stream.ts",
     "bench:large-stream-profile": "bun run benchmarks/large-stream-profile.ts",
+    "bench:memory": "bun run benchmarks/memory.ts",
+    "bench:competitors": "bun run benchmarks/competitors.ts",
     "nix:update-lock": "nix run .#update-bun-lock"
   },
   "dependencies": {