Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 55 additions & 16 deletions .github/workflows/benchmarks.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,10 +9,22 @@ on:
- "docs/**"
- "assets/**"
- "LICENSE"
pull_request:
paths-ignore:
- "**/*.md"
- "docs/**"
- "assets/**"
- "LICENSE"
workflow_dispatch:

permissions:
contents: read
issues: write
pull-requests: write

env:
SKIP_INSTALL_SIMPLE_GIT_HOOKS: "1"
HUNK_BENCHMARK_SAMPLES: ${{ github.event_name == 'pull_request' && '1' || '3' }}

concurrency:
group: benchmarks-${{ github.workflow }}-${{ github.ref }}
Expand All @@ -25,6 +37,8 @@ jobs:
steps:
- name: Check out repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
with:
fetch-depth: 0

- name: Set up Bun
uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
Expand All @@ -34,36 +48,61 @@ jobs:
- name: Install dependencies
run: bun install --frozen-lockfile

- name: Run bootstrap benchmark
- name: Run head benchmarks
run: |
mkdir -p benchmark-results
bun run bench:bootstrap-load | tee benchmark-results/bootstrap-load.txt
bun run bench -- --samples "$HUNK_BENCHMARK_SAMPLES" --out benchmark-results/head.json \
| tee benchmark-results/head.txt

- name: Run highlight prefetch benchmark
- name: Run base benchmarks
if: github.event_name == 'pull_request'
run: |
bun run bench:highlight-prefetch | tee benchmark-results/highlight-prefetch.txt
git fetch origin main
git worktree add ../hunk-benchmark-base origin/main
rm -rf ../hunk-benchmark-base/benchmarks
cp -R benchmarks ../hunk-benchmark-base/benchmarks
cd ../hunk-benchmark-base
bun install --frozen-lockfile
bun run benchmarks/run.ts --samples "$HUNK_BENCHMARK_SAMPLES" --out "$GITHUB_WORKSPACE/benchmark-results/base.json" \
| tee "$GITHUB_WORKSPACE/benchmark-results/base.txt"

- name: Run large stream benchmark
- name: Compare benchmark results
id: compare
if: github.event_name == 'pull_request'
continue-on-error: true
run: |
bun run bench:large-stream | tee benchmark-results/large-stream.txt
bun run bench:compare -- \
--base benchmark-results/base.json \
--head benchmark-results/head.json \
--out benchmark-results/comparison.json \
--markdown benchmark-results/summary.md

- name: Publish benchmark summary
if: always()
run: |
{
echo '## Benchmark results'
echo
for file in benchmark-results/*.txt; do
echo "### $(basename "$file")"
if [ -f benchmark-results/summary.md ]; then
cat benchmark-results/summary.md >> "$GITHUB_STEP_SUMMARY"
else
{
echo '## Benchmark results'
echo
echo '```text'
cat "$file"
cat benchmark-results/head.txt
echo '```'
echo
done
} >> "$GITHUB_STEP_SUMMARY"
} >> "$GITHUB_STEP_SUMMARY"
fi

- name: Comment benchmark summary on PR
if: always() && github.event_name == 'pull_request' && hashFiles('benchmark-results/summary.md') != ''
continue-on-error: true
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: bun run bench:comment-pr -- --body benchmark-results/summary.md

- name: Upload benchmark artifacts
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: benchmark-results
path: benchmark-results/*.txt
path: benchmark-results/*
if-no-files-found: error
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ All notable user-visible changes to Hunk are documented in this file.

### Added

- Added CI performance benchmarks with PR comparison comments to guard Hunk startup, loading, rendering, highlighting, navigation, and memory costs.

### Changed

### Fixed
Expand Down
102 changes: 90 additions & 12 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
@@ -1,28 +1,106 @@
# Benchmarks

Benchmark scripts, shared fixtures, and local result artifacts live here.
Benchmark scripts, shared fixtures, and local result artifacts live here. These benchmarks protect Hunk's core promise: fast loading, fast first render, fast navigation, and predictable memory use on large diffs.

## Scripts
## Running locally

- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo
- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness
- `large-stream.ts` — measures large split-stream first-frame and scroll cost, including note-enabled cases
- `large-stream-profile.ts` — profiles the main pure planning stages behind the large split-stream benchmark
- `large-stream-fixture.ts` — shared synthetic diff fixture used by the large-stream benchmarks
Run the full benchmark suite with one JSON result file:

## Running
```bash
bun run bench -- --samples 3 --out benchmarks/results/head.json
```

From the project root:
Run focused scripts while iterating:

```bash
bun run bench:bootstrap-load
bun run bench:working-tree-load
bun run bench:changeset-parse
bun run bench:render-layout
bun run bench:highlight-prefetch
bun run bench:large-stream
bun run bench:large-stream-profile
bun run bench:memory
bun run bench:competitors
```

Compare two JSON result files:

```bash
bun run bench:compare -- \
--base benchmarks/results/base.json \
--head benchmarks/results/head.json \
--markdown benchmarks/results/summary.md
```

## Scripts

- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo, including file-pair bootstrap.
- `working-tree-load.ts` — measures git working-tree loads across small, medium, large, many-untracked, and few-large-untracked repos.
- `changeset-parse.ts` — measures patch normalization, Pierre parsing, patch chunking, and normalized `DiffFile` construction for many-small-files, balanced, and large-single-file patches.
- `render-layout.ts` — measures pure split/stack row building, section geometry, and review-plan construction for many-small-files, balanced, and large-single-file streams.
- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness.
- `large-stream.ts` — measures large split-stream first-frame and scroll cost.
- `large-stream-profile.ts` — optional local profiler for the main pure planning stages behind the large split-stream benchmark.
- `memory.ts` — optional local RSS/heap profiler after fixture loading, planning, first frame, and next-hunk navigation.
- `competitors.ts` — optional local informational comparisons against `git diff --no-ext-diff`, `delta`, `difftastic`, and `diff-so-fancy` when installed.
- `large-stream-fixture.ts` and `lib/fixtures.ts` — shared deterministic synthetic fixtures.

## Output format

Each script prints `METRIC name=value` lines. `benchmarks/run.ts` repeats scripts, aggregates samples, and writes JSON:

```json
{
"version": 1,
"samplesPerBenchmark": 3,
"results": [
{
"name": "large-stream/cold_first_frame_ms",
"unit": "ms",
"samples": [61.2, 60.8, 62.1],
"median": 61.2,
"p75": 62.1,
"p95": 62.1,
"threshold": {
"maxRegressionRatio": 1.15,
"minAbsoluteRegression": 5
},
"comparable": true
}
]
}
```

## Results
## CI policy

`.github/workflows/benchmarks.yml` runs the suite on `main`, pull requests, and manual dispatch. On pull requests it:

1. Runs benchmarks on the PR revision.
2. Checks out `origin/main` in a sibling worktree.
3. Copies the PR benchmark harness into that base worktree so new benchmarks can compare base code during the PR that introduces them.
4. Runs the same benchmarks on base.
5. Compares medians and marks regressions in the PR summary without blocking the PR.
6. Uploads raw JSON/text artifacts.
7. Posts or updates one PR comment with a curated key-benchmark table, always including regressions and hiding noisy supporting metrics.

The default CI suite intentionally excludes optional memory profiling, pure-planning profiling, and competitor comparisons to keep PR feedback fast. Pull requests use one sample per benchmark and are informational/non-blocking; `main` runs keep three samples for a more stable history. Run `bun run bench -- --include-competitors` or focused scripts locally when deeper diagnostics are needed.

Initial thresholds:

- Time metrics (`*_ms`): fail when PR median is more than 15% slower **and** at least 5ms slower.
- Memory metrics (`rss`/`heap`): fail when PR median is more than 20% higher **and** at least 8MiB higher.
- Counts, fixture sizes, availability flags, and optional competitor metrics are informational.

Competitor comparisons are intentionally non-failing because installed tool versions and feature parity vary by environment.

## Updating thresholds

Prefer fixing regressions first. If a maintainer accepts an intentional tradeoff, update the threshold in `benchmarks/lib/benchmark-result.ts` and mention why in the PR. Keep thresholds broad enough for CI variability but tight enough to catch visible slowdowns.

Use `benchmarks/results/` for local benchmark output, notes, or captured runs.
## Noise troubleshooting

The folder stays in the repo so the convention is discoverable, but local result files inside it are ignored by default.
- Re-run failed jobs before investigating tiny deltas; thresholds include absolute tolerances to avoid failing on sub-5ms noise.
- PTY/renderer-adjacent metrics are noisier than pure parsing/planning metrics.
- Use `--samples 5` locally when validating borderline changes.
- Inspect uploaded raw samples before changing thresholds.
59 changes: 59 additions & 0 deletions benchmarks/changeset-parse.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// Benchmark raw patch parsing and normalized DiffFile construction for several diff shapes.
import { performance } from "perf_hooks";
import { parsePatchFiles } from "@pierre/diffs";
import { buildDiffFile } from "../src/core/diffFile";
import { findPatchChunk, splitPatchIntoFileChunks } from "../src/core/patch/chunks";
import { normalizePatchText } from "../src/core/patch/normalize";
import { createSyntheticPatch } from "./lib/fixtures";

interface Scenario {
name: string;
patch: string;
}

const scenarios: Scenario[] = [
{
name: "many_small_files",
patch: createSyntheticPatch({ fileCount: 240, lines: 48, changedLines: 8 }),
},
{
name: "balanced_changeset",
patch: createSyntheticPatch({ fileCount: 96, lines: 220, changedLines: 48 }),
},
{
name: "large_single_file",
patch: createSyntheticPatch({ fileCount: 1, lines: 18_000, changedLines: 2_000 }),
},
];

function measureScenario({ name, patch }: Scenario) {
const normalizeStart = performance.now();
const normalized = normalizePatchText(patch);
const normalizeMs = performance.now() - normalizeStart;

const parseStart = performance.now();
const parsed = parsePatchFiles(normalized, "patch", true);
const parseMs = performance.now() - parseStart;

const splitStart = performance.now();
const chunks = splitPatchIntoFileChunks(normalized);
const splitMs = performance.now() - splitStart;

const files = parsed.flatMap((entry) => entry.files);
const buildStart = performance.now();
const diffFiles = files.map((metadata, index) =>
buildDiffFile(metadata, findPatchChunk(metadata, chunks, index), index, name, null),
);
const buildMs = performance.now() - buildStart;

console.log(`METRIC ${name}_normalize_patch_ms=${normalizeMs.toFixed(2)}`);
console.log(`METRIC ${name}_parse_patch_ms=${parseMs.toFixed(2)}`);
console.log(`METRIC ${name}_split_chunks_ms=${splitMs.toFixed(2)}`);
console.log(`METRIC ${name}_build_diff_files_ms=${buildMs.toFixed(2)}`);
console.log(`METRIC ${name}_files=${diffFiles.length}`);
console.log(`METRIC ${name}_patch_bytes=${Buffer.byteLength(normalized)}`);
}

for (const scenario of scenarios) {
measureScenario(scenario);
}
97 changes: 97 additions & 0 deletions benchmarks/comment-pr.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
#!/usr/bin/env bun
import { readFileSync } from "node:fs";

const marker = "<!-- hunk-benchmark-comment -->";

function requireEnv(name: string) {
const value = process.env[name];
if (!value) {
throw new Error(`Missing ${name}`);
}
return value;
}

function parseArgs(args: string[]) {
for (let index = 0; index < args.length; index += 1) {
if (args[index] === "--body") {
const value = args[index + 1];
if (!value) {
throw new Error("Missing value for --body");
}
return { bodyPath: value };
}
}

throw new Error("Usage: bun run benchmarks/comment-pr.ts --body benchmark-results/summary.md");
}

async function githubRequest(path: string, init: RequestInit = {}) {
const token = requireEnv("GITHUB_TOKEN");
const response = await fetch(`https://api.github.com${path}`, {
...init,
headers: {
Accept: "application/vnd.github+json",
Authorization: `Bearer ${token}`,
"X-GitHub-Api-Version": "2022-11-28",
...init.headers,
},
});

if (!response.ok) {
const text = await response.text();
throw new Error(
`GitHub API ${init.method ?? "GET"} ${path} failed: ${response.status} ${text}`,
);
}

return response.status === 204 ? null : response.json();
}

/** Fetch every issue comment page so the marker lookup can update old bot comments. */
async function fetchAllComments(repository: string, pullRequestNumber: number) {
const comments: Array<{ id: number; body?: string }> = [];

for (let page = 1; ; page += 1) {
const batch = (await githubRequest(
`/repos/${repository}/issues/${pullRequestNumber}/comments?per_page=100&page=${page}`,
)) as Array<{ id: number; body?: string }>;

comments.push(...batch);

if (batch.length < 100) {
return comments;
}
}
}

const { bodyPath } = parseArgs(Bun.argv.slice(2));
const repository = requireEnv("GITHUB_REPOSITORY");
const eventPath = requireEnv("GITHUB_EVENT_PATH");
const event = JSON.parse(readFileSync(eventPath, "utf8")) as { pull_request?: { number: number } };
const pullRequestNumber = event.pull_request?.number;

if (!pullRequestNumber) {
console.log("No pull request in event payload; skipping benchmark comment.");
process.exit(0);
}

const body = readFileSync(bodyPath, "utf8");
const comments = await fetchAllComments(repository, pullRequestNumber);
const existing = comments.find((comment) => comment.body?.includes(marker));

if (existing) {
await githubRequest(`/repos/${repository}/issues/comments/${existing.id}`, {
method: "PATCH",
body: JSON.stringify({ body }),
});
console.log(`Updated benchmark comment ${existing.id}.`);
} else {
const created = (await githubRequest(
`/repos/${repository}/issues/${pullRequestNumber}/comments`,
{
method: "POST",
body: JSON.stringify({ body }),
},
)) as { id: number };
console.log(`Created benchmark comment ${created.id}.`);
}
Loading
Loading