modem-dev · benvinegar · May 29, 2026 · May 29, 2026 · May 29, 2026 · May 30, 2026
diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml
@@ -9,10 +9,22 @@ on:
       - "docs/**"
       - "assets/**"
       - "LICENSE"
+  pull_request:
+    paths-ignore:
+      - "**/*.md"
+      - "docs/**"
+      - "assets/**"
+      - "LICENSE"
   workflow_dispatch:
 
+permissions:
+  contents: read
+  issues: write
+  pull-requests: write
+
 env:
   SKIP_INSTALL_SIMPLE_GIT_HOOKS: "1"
+  HUNK_BENCHMARK_SAMPLES: ${{ github.event_name == 'pull_request' && '1' || '3' }}
 
 concurrency:
   group: benchmarks-${{ github.workflow }}-${{ github.ref }}
@@ -25,6 +37,8 @@ jobs:
     steps:
       - name: Check out repository
         uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
+        with:
+          fetch-depth: 0
 
       - name: Set up Bun
         uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
@@ -34,36 +48,61 @@ jobs:
       - name: Install dependencies
         run: bun install --frozen-lockfile
 
-      - name: Run bootstrap benchmark
+      - name: Run head benchmarks
         run: |
           mkdir -p benchmark-results
-          bun run bench:bootstrap-load | tee benchmark-results/bootstrap-load.txt
+          bun run bench -- --samples "$HUNK_BENCHMARK_SAMPLES" --out benchmark-results/head.json \
+            | tee benchmark-results/head.txt
 
-      - name: Run highlight prefetch benchmark
+      - name: Run base benchmarks
+        if: github.event_name == 'pull_request'
         run: |
-          bun run bench:highlight-prefetch | tee benchmark-results/highlight-prefetch.txt
+          git fetch origin main
+          git worktree add ../hunk-benchmark-base origin/main
+          rm -rf ../hunk-benchmark-base/benchmarks
+          cp -R benchmarks ../hunk-benchmark-base/benchmarks
+          cd ../hunk-benchmark-base
+          bun install --frozen-lockfile
+          bun run benchmarks/run.ts --samples "$HUNK_BENCHMARK_SAMPLES" --out "$GITHUB_WORKSPACE/benchmark-results/base.json" \
+            | tee "$GITHUB_WORKSPACE/benchmark-results/base.txt"
 
-      - name: Run large stream benchmark
+      - name: Compare benchmark results
+        id: compare
+        if: github.event_name == 'pull_request'
+        continue-on-error: true
         run: |
-          bun run bench:large-stream | tee benchmark-results/large-stream.txt
+          bun run bench:compare -- \
+            --base benchmark-results/base.json \
+            --head benchmark-results/head.json \
+            --out benchmark-results/comparison.json \
+            --markdown benchmark-results/summary.md
 
       - name: Publish benchmark summary
+        if: always()
         run: |
-          {
-            echo '## Benchmark results'
-            echo
-            for file in benchmark-results/*.txt; do
-              echo "### $(basename "$file")"
+          if [ -f benchmark-results/summary.md ]; then
+            cat benchmark-results/summary.md >> "$GITHUB_STEP_SUMMARY"
+          else
+            {
+              echo '## Benchmark results'
+              echo
               echo '```text'
-              cat "$file"
+              cat benchmark-results/head.txt
               echo '```'
-              echo
-            done
-          } >> "$GITHUB_STEP_SUMMARY"
+            } >> "$GITHUB_STEP_SUMMARY"
+          fi
+
+      - name: Comment benchmark summary on PR
+        if: always() && github.event_name == 'pull_request' && hashFiles('benchmark-results/summary.md') != ''
+        continue-on-error: true
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: bun run bench:comment-pr -- --body benchmark-results/summary.md
 
       - name: Upload benchmark artifacts
+        if: always()
         uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
         with:
           name: benchmark-results
-          path: benchmark-results/*.txt
+          path: benchmark-results/*
           if-no-files-found: error
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -6,6 +6,8 @@ All notable user-visible changes to Hunk are documented in this file.
 
 ### Added
 
+- Added CI performance benchmarks with PR comparison comments to guard Hunk startup, loading, rendering, highlighting, navigation, and memory costs.
+
 ### Changed
 
 ### Fixed

diff --git a/benchmarks/README.md b/benchmarks/README.md
@@ -1,28 +1,106 @@
 # Benchmarks
 
-Benchmark scripts, shared fixtures, and local result artifacts live here.
+Benchmark scripts, shared fixtures, and local result artifacts live here. These benchmarks protect Hunk's core promise: fast loading, fast first render, fast navigation, and predictable memory use on large diffs.
 
-## Scripts
+## Running locally
 
-- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo
-- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness
-- `large-stream.ts` — measures large split-stream first-frame and scroll cost, including note-enabled cases
-- `large-stream-profile.ts` — profiles the main pure planning stages behind the large split-stream benchmark
-- `large-stream-fixture.ts` — shared synthetic diff fixture used by the large-stream benchmarks
+Run the full benchmark suite with one JSON result file:
 
-## Running
+```bash
+bun run bench -- --samples 3 --out benchmarks/results/head.json
+```
 
-From the project root:
+Run focused scripts while iterating:
 
 ```bash
 bun run bench:bootstrap-load
+bun run bench:working-tree-load
+bun run bench:changeset-parse
+bun run bench:render-layout
 bun run bench:highlight-prefetch
 bun run bench:large-stream
 bun run bench:large-stream-profile
+bun run bench:memory
+bun run bench:competitors
+```
+
+Compare two JSON result files:
+
+```bash
+bun run bench:compare -- \
+  --base benchmarks/results/base.json \
+  --head benchmarks/results/head.json \
+  --markdown benchmarks/results/summary.md
+```
+
+## Scripts
+
+- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo, including file-pair bootstrap.
+- `working-tree-load.ts` — measures git working-tree loads across small, medium, large, many-untracked, and few-large-untracked repos.
+- `changeset-parse.ts` — measures patch normalization, Pierre parsing, patch chunking, and normalized `DiffFile` construction for many-small-files, balanced, and large-single-file patches.
+- `render-layout.ts` — measures pure split/stack row building, section geometry, and review-plan construction for many-small-files, balanced, and large-single-file streams.
+- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness.
+- `large-stream.ts` — measures large split-stream first-frame and scroll cost.
+- `large-stream-profile.ts` — optional local profiler for the main pure planning stages behind the large split-stream benchmark.
+- `memory.ts` — optional local RSS/heap profiler after fixture loading, planning, first frame, and next-hunk navigation.
+- `competitors.ts` — optional local informational comparisons against `git diff --no-ext-diff`, `delta`, `difftastic`, and `diff-so-fancy` when installed.
+- `large-stream-fixture.ts` and `lib/fixtures.ts` — shared deterministic synthetic fixtures.
+
+## Output format
+
+Each script prints `METRIC name=value` lines. `benchmarks/run.ts` repeats scripts, aggregates samples, and writes JSON:
+
+```json
+{
+  "version": 1,
+  "samplesPerBenchmark": 3,
+  "results": [
+    {
+      "name": "large-stream/cold_first_frame_ms",
+      "unit": "ms",
+      "samples": [61.2, 60.8, 62.1],
+      "median": 61.2,
+      "p75": 62.1,
+      "p95": 62.1,
+      "threshold": {
+        "maxRegressionRatio": 1.15,
+        "minAbsoluteRegression": 5
+      },
+      "comparable": true
+    }
+  ]
+}
 ```
 
-## Results
+## CI policy
+
+`.github/workflows/benchmarks.yml` runs the suite on `main`, pull requests, and manual dispatch. On pull requests it:
+
+1. Runs benchmarks on the PR revision.
+2. Checks out `origin/main` in a sibling worktree.
+3. Copies the PR benchmark harness into that base worktree so new benchmarks can compare base code during the PR that introduces them.
+4. Runs the same benchmarks on base.
+5. Compares medians and marks regressions in the PR summary without blocking the PR.
+6. Uploads raw JSON/text artifacts.
+7. Posts or updates one PR comment with a curated key-benchmark table, always including regressions and hiding noisy supporting metrics.
+
+The default CI suite intentionally excludes optional memory profiling, pure-planning profiling, and competitor comparisons to keep PR feedback fast. Pull requests use one sample per benchmark and are informational/non-blocking; `main` runs keep three samples for a more stable history. Run `bun run bench -- --include-competitors` or focused scripts locally when deeper diagnostics are needed.
+
+Initial thresholds:
+
+- Time metrics (`*_ms`): fail when PR median is more than 15% slower **and** at least 5ms slower.
+- Memory metrics (`rss`/`heap`): fail when PR median is more than 20% higher **and** at least 8MiB higher.
+- Counts, fixture sizes, availability flags, and optional competitor metrics are informational.
+
+Competitor comparisons are intentionally non-failing because installed tool versions and feature parity vary by environment.
+
+## Updating thresholds
+
+Prefer fixing regressions first. If a maintainer accepts an intentional tradeoff, update the threshold in `benchmarks/lib/benchmark-result.ts` and mention why in the PR. Keep thresholds broad enough for CI variability but tight enough to catch visible slowdowns.
 
-Use `benchmarks/results/` for local benchmark output, notes, or captured runs.
+## Noise troubleshooting
 
-The folder stays in the repo so the convention is discoverable, but local result files inside it are ignored by default.
+- Re-run failed jobs before investigating tiny deltas; thresholds include absolute tolerances to avoid failing on sub-5ms noise.
+- PTY/renderer-adjacent metrics are noisier than pure parsing/planning metrics.
+- Use `--samples 5` locally when validating borderline changes.
+- Inspect uploaded raw samples before changing thresholds.
diff --git a/benchmarks/changeset-parse.ts b/benchmarks/changeset-parse.ts
@@ -0,0 +1,59 @@
+// Benchmark raw patch parsing and normalized DiffFile construction for several diff shapes.
+import { performance } from "perf_hooks";
+import { parsePatchFiles } from "@pierre/diffs";
+import { buildDiffFile } from "../src/core/diffFile";
+import { findPatchChunk, splitPatchIntoFileChunks } from "../src/core/patch/chunks";
+import { normalizePatchText } from "../src/core/patch/normalize";
+import { createSyntheticPatch } from "./lib/fixtures";
+
+interface Scenario {
+  name: string;
+  patch: string;
+}
+
+const scenarios: Scenario[] = [
+  {
+    name: "many_small_files",
+    patch: createSyntheticPatch({ fileCount: 240, lines: 48, changedLines: 8 }),
+  },
+  {
+    name: "balanced_changeset",
+    patch: createSyntheticPatch({ fileCount: 96, lines: 220, changedLines: 48 }),
+  },
+  {
+    name: "large_single_file",
+    patch: createSyntheticPatch({ fileCount: 1, lines: 18_000, changedLines: 2_000 }),
+  },
+];
+
+function measureScenario({ name, patch }: Scenario) {
+  const normalizeStart = performance.now();
+  const normalized = normalizePatchText(patch);
+  const normalizeMs = performance.now() - normalizeStart;
+
+  const parseStart = performance.now();
+  const parsed = parsePatchFiles(normalized, "patch", true);
+  const parseMs = performance.now() - parseStart;
+
+  const splitStart = performance.now();
+  const chunks = splitPatchIntoFileChunks(normalized);
+  const splitMs = performance.now() - splitStart;
+
+  const files = parsed.flatMap((entry) => entry.files);
+  const buildStart = performance.now();
+  const diffFiles = files.map((metadata, index) =>
+    buildDiffFile(metadata, findPatchChunk(metadata, chunks, index), index, name, null),
+  );
+  const buildMs = performance.now() - buildStart;
+
+  console.log(`METRIC ${name}_normalize_patch_ms=${normalizeMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_parse_patch_ms=${parseMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_split_chunks_ms=${splitMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_build_diff_files_ms=${buildMs.toFixed(2)}`);
+  console.log(`METRIC ${name}_files=${diffFiles.length}`);
+  console.log(`METRIC ${name}_patch_bytes=${Buffer.byteLength(normalized)}`);
+}
+
+for (const scenario of scenarios) {
+  measureScenario(scenario);
+}
diff --git a/benchmarks/comment-pr.ts b/benchmarks/comment-pr.ts
@@ -0,0 +1,97 @@
+#!/usr/bin/env bun
+import { readFileSync } from "node:fs";
+
+const marker = "<!-- hunk-benchmark-comment -->";
+
+function requireEnv(name: string) {
+  const value = process.env[name];
+  if (!value) {
+    throw new Error(`Missing ${name}`);
+  }
+  return value;
+}
+
+function parseArgs(args: string[]) {
+  for (let index = 0; index < args.length; index += 1) {
+    if (args[index] === "--body") {
+      const value = args[index + 1];
+      if (!value) {
+        throw new Error("Missing value for --body");
+      }
+      return { bodyPath: value };
+    }
+  }
+
+  throw new Error("Usage: bun run benchmarks/comment-pr.ts --body benchmark-results/summary.md");
+}
+
+async function githubRequest(path: string, init: RequestInit = {}) {
+  const token = requireEnv("GITHUB_TOKEN");
+  const response = await fetch(`https://api.github.com${path}`, {
+    ...init,
+    headers: {
+      Accept: "application/vnd.github+json",
+      Authorization: `Bearer ${token}`,
+      "X-GitHub-Api-Version": "2022-11-28",
+      ...init.headers,
+    },
+  });
+
+  if (!response.ok) {
+    const text = await response.text();
+    throw new Error(
+      `GitHub API ${init.method ?? "GET"} ${path} failed: ${response.status} ${text}`,
+    );
+  }
+
+  return response.status === 204 ? null : response.json();
+}
+
+/** Fetch every issue comment page so the marker lookup can update old bot comments. */
+async function fetchAllComments(repository: string, pullRequestNumber: number) {
+  const comments: Array<{ id: number; body?: string }> = [];
+
+  for (let page = 1; ; page += 1) {
+    const batch = (await githubRequest(
+      `/repos/${repository}/issues/${pullRequestNumber}/comments?per_page=100&page=${page}`,
+    )) as Array<{ id: number; body?: string }>;
+
+    comments.push(...batch);
+
+    if (batch.length < 100) {
+      return comments;
+    }
+  }
+}
+
+const { bodyPath } = parseArgs(Bun.argv.slice(2));
+const repository = requireEnv("GITHUB_REPOSITORY");
+const eventPath = requireEnv("GITHUB_EVENT_PATH");
+const event = JSON.parse(readFileSync(eventPath, "utf8")) as { pull_request?: { number: number } };
+const pullRequestNumber = event.pull_request?.number;
+
+if (!pullRequestNumber) {
+  console.log("No pull request in event payload; skipping benchmark comment.");
+  process.exit(0);
+}
+
+const body = readFileSync(bodyPath, "utf8");
+const comments = await fetchAllComments(repository, pullRequestNumber);
+const existing = comments.find((comment) => comment.body?.includes(marker));
+
+if (existing) {
+  await githubRequest(`/repos/${repository}/issues/comments/${existing.id}`, {
+    method: "PATCH",
+    body: JSON.stringify({ body }),
+  });
+  console.log(`Updated benchmark comment ${existing.id}.`);
+} else {
+  const created = (await githubRequest(
+    `/repos/${repository}/issues/${pullRequestNumber}/comments`,
+    {
+      method: "POST",
+      body: JSON.stringify({ body }),
+    },
+  )) as { id: number };
+  console.log(`Created benchmark comment ${created.id}.`);
+}