Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 64 additions & 0 deletions .github/workflows/release-prebuilt-npm.yml
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ on:
required: true
default: latest
type: string
allow_benchmark_regression:
description: Allow a manual release despite a material benchmark regression
required: true
default: false
type: boolean
benchmark_regression_reason:
description: Required reason when allowing a benchmark regression
required: false
default: ""
type: string
push:
tags:
- "v*"
Expand All @@ -25,8 +35,62 @@ concurrency:
cancel-in-progress: false

jobs:
release-benchmark-gate:
name: Release benchmark gate
runs-on: ubuntu-latest
steps:
- name: Check out repository
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2

- name: Set up Bun
uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0
with:
bun-version: 1.3.10

- name: Install dependencies
run: bun install --frozen-lockfile

- name: Verify tag matches package version
if: github.event_name == 'push'
run: bun run ./scripts/check-release-version.ts "${{ github.ref_name }}"

- name: Compare release benchmark snapshot
id: benchmark-gate
continue-on-error: ${{ github.event_name == 'workflow_dispatch' && inputs.allow_benchmark_regression }}
run: |
mkdir -p dist/release
bun run bench:release:compare -- \
--out dist/release/benchmark-comparison.json \
--summary "$GITHUB_STEP_SUMMARY"

- name: Require benchmark override reason
if: ${{ steps.benchmark-gate.outcome == 'failure' && github.event_name == 'workflow_dispatch' && inputs.allow_benchmark_regression }}
env:
BENCHMARK_REGRESSION_REASON: ${{ inputs.benchmark_regression_reason }}
run: |
if [ -z "$BENCHMARK_REGRESSION_REASON" ]; then
echo "benchmark_regression_reason is required when allow_benchmark_regression is true." >&2
exit 1
fi
{
echo
echo "## Benchmark regression override"
echo
echo "Manual override reason: $BENCHMARK_REGRESSION_REASON"
} >> "$GITHUB_STEP_SUMMARY"

- name: Upload benchmark comparison
if: always()
uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1
with:
name: release-benchmark-comparison
path: dist/release/benchmark-comparison.json
if-no-files-found: ignore

build-binaries:
name: Build ${{ matrix.package_name }}
needs:
- release-benchmark-gate
runs-on: ${{ matrix.runner }}
strategy:
fail-fast: false
Expand Down
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,8 @@ All notable user-visible changes to Hunk are documented in this file.

### Added

- Added release benchmark snapshots and a release workflow gate that blocks publishing when committed benchmark results show material performance regressions.

### Changed

### Fixed
Expand Down
7 changes: 7 additions & 0 deletions benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,13 @@ Include the opt-in huge fixture tier (~1k files / 300k+ diff lines plus one ~50k
bun run bench -- --samples 1 --include-huge --out benchmarks/results/local-with-huge.json
```

Generate the committed release benchmark snapshot during release prep:

```bash
bun run bench:release
bun run bench:release:compare
```

Run focused scripts while iterating:

```bash
Expand Down
8 changes: 8 additions & 0 deletions benchmarks/lib/benchmark-result.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,18 @@ export interface BenchmarkMetricResult {
source: string;
}

export interface BenchmarkRuntimeInfo {
bunVersion?: string;
platform: string;
arch: string;
}

export interface BenchmarkRunResult {
version: 1;
generatedAt: string;
gitSha?: string;
packageVersion?: string;
runtime?: BenchmarkRuntimeInfo;
samplesPerBenchmark: number;
results: BenchmarkMetricResult[];
}
Expand Down
Empty file added benchmarks/release/.gitkeep
Empty file.
38 changes: 38 additions & 0 deletions benchmarks/release/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Release benchmark snapshots

Committed files in this directory are the performance baselines used by the release workflow. They are intentionally versioned so a release can be audited after publishing.

## Release prep

Before pushing a release tag, run the benchmark suite for the version in `package.json`:

```bash
bun run bench:release
```

This writes:

```text
benchmarks/release/bench-x.y.z.json
```

Then compare it against the latest lower stable release snapshot:

```bash
bun run bench:release:compare
```

Commit the new `bench-x.y.z.json` file with the release-prep change. The tag release workflow validates that this file exists and fails before publishing npm packages if the comparison finds a material regression.

## Regression policy

The gate compares benchmark medians and only fails on regressions that exceed both the relative and absolute thresholds embedded in the benchmark result metadata:

- timing metrics: default `+15%` and at least `+5ms`
- memory metrics: default `+20%` and at least `+8MiB`

New metrics are informational until a later release has a baseline. Missing previously comparable metrics fail, because that means the gate can no longer protect that measurement.

## Backfilling

When adding this gate or restoring a missing baseline, check out the release tag and generate the snapshot with the same Bun version and runner class used for current release prep. Commit backfilled snapshots before relying on the release gate.
16 changes: 16 additions & 0 deletions benchmarks/run.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#!/usr/bin/env bun
import os from "node:os";
import { mkdirSync, writeFileSync } from "node:fs";
import { dirname, resolve } from "node:path";
import { aggregateMetric, type BenchmarkRunResult } from "./lib/benchmark-result";
Expand Down Expand Up @@ -94,6 +95,15 @@ function gitSha() {
return Buffer.from(proc.stdout).toString("utf8").trim();
}

async function packageVersion() {
try {
const packageJson = JSON.parse(await Bun.file("package.json").text()) as { version?: string };
return packageJson.version;
} catch {
return undefined;
}
}

function parseMetrics(output: string) {
const metrics = new Map<string, number>();
const metricPattern = /^METRIC\s+([A-Za-z0-9_.:-]+)=(-?\d+(?:\.\d+)?)$/;
Expand Down Expand Up @@ -179,6 +189,12 @@ const runResult: BenchmarkRunResult = {
version: 1,
generatedAt: new Date().toISOString(),
gitSha: gitSha(),
packageVersion: await packageVersion(),
runtime: {
bunVersion: Bun.version,
platform: os.platform(),
arch: os.arch(),
},
samplesPerBenchmark: options.samples,
results,
};
Expand Down
2 changes: 2 additions & 0 deletions package.json
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,8 @@
"update:homebrew-formula": "bun run ./scripts/update-homebrew-formula.ts",
"prepack": "bun run build:npm",
"bench": "bun run benchmarks/run.ts",
"bench:release": "bun run ./scripts/run-release-benchmark.ts",
"bench:release:compare": "bun run ./scripts/compare-release-benchmarks.ts",
"bench:bootstrap-load": "bun run benchmarks/bootstrap-load.ts",
"bench:working-tree-load": "bun run benchmarks/working-tree-load.ts",
"bench:changeset-parse": "bun run benchmarks/changeset-parse.ts",
Expand Down
163 changes: 163 additions & 0 deletions scripts/compare-release-benchmarks.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs";
import os from "node:os";
import path from "node:path";
import { afterEach, describe, expect, test } from "bun:test";
import type { BenchmarkMetricResult, BenchmarkRunResult } from "../benchmarks/lib/benchmark-result";
import {
compareBenchmarkRuns,
findPreviousReleaseBenchmark,
formatComparisonMarkdown,
isMaterialRegression,
} from "./compare-release-benchmarks";
import { parseRunReleaseBenchmarkArgs } from "./run-release-benchmark";

let tempRoot: string | undefined;

function createTempReleaseDir() {
tempRoot = mkdtempSync(path.join(os.tmpdir(), "hunk-release-benchmarks-"));
const releaseDir = path.join(tempRoot, "benchmarks", "release");
mkdirSync(releaseDir, { recursive: true });
return releaseDir;
}

function metric(overrides: Partial<BenchmarkMetricResult>): BenchmarkMetricResult {
return {
name: "large-stream/cold_first_frame_ms",
source: "large-stream",
unit: "ms",
samples: [100, 101, 99],
median: 100,
p75: 101,
p95: 101,
min: 99,
max: 101,
comparable: true,
threshold: { maxRegressionRatio: 1.15, minAbsoluteRegression: 5 },
...overrides,
};
}

function runResult(results: BenchmarkMetricResult[]): BenchmarkRunResult {
return {
version: 1,
generatedAt: "2026-06-13T00:00:00.000Z",
gitSha: "abc1234",
samplesPerBenchmark: 3,
results,
};
}

afterEach(() => {
if (tempRoot) {
rmSync(tempRoot, { recursive: true, force: true });
tempRoot = undefined;
}
});

describe("findPreviousReleaseBenchmark", () => {
test("selects the latest lower stable release benchmark", () => {
const releaseDir = createTempReleaseDir();
for (const version of ["0.14.1", "0.15.0", "0.15.3-beta.1", "0.15.3"]) {
writeFileSync(path.join(releaseDir, `bench-${version}.json`), "{}\n");
}

expect(findPreviousReleaseBenchmark("0.15.4", releaseDir)).toMatchObject({
version: "0.15.3",
});
});
});

describe("isMaterialRegression", () => {
test("requires both relative and absolute timing thresholds", () => {
const threshold = { maxRegressionRatio: 1.15, minAbsoluteRegression: 5 };

expect(isMaterialRegression(100, 116, threshold)).toBe(true);
expect(isMaterialRegression(100, 104, threshold)).toBe(false);
expect(isMaterialRegression(10, 12, threshold)).toBe(false);
expect(isMaterialRegression(100, 90, threshold)).toBe(false);
});
});

describe("parseRunReleaseBenchmarkArgs", () => {
test("keeps an explicit output path when --version appears later", async () => {
const outPath = path.join(os.tmpdir(), "custom-release-benchmark.json");

await expect(
parseRunReleaseBenchmarkArgs(["--out", outPath, "--version", "0.16.0"]),
).resolves.toMatchObject({
version: "0.16.0",
out: outPath,
});
});
});

describe("compareBenchmarkRuns", () => {
test("fails material comparable regressions", () => {
const comparison = compareBenchmarkRuns(
runResult([metric({ median: 100 })]),
runResult([metric({ median: 120 })]),
);

expect(comparison.failed).toBe(true);
expect(comparison.rows[0]?.status).toBe("fail");
});

test("passes comparable changes inside the material threshold", () => {
const comparison = compareBenchmarkRuns(
runResult([metric({ median: 100 })]),
runResult([metric({ median: 110 })]),
);

expect(comparison.failed).toBe(false);
expect(comparison.rows[0]?.status).toBe("pass");
});

test("treats new comparable metrics as informational until a baseline exists", () => {
const comparison = compareBenchmarkRuns(runResult([]), runResult([metric({ median: 100 })]));

expect(comparison.failed).toBe(false);
expect(comparison.rows[0]?.status).toBe("missing-base");
});

test("fails when a previously comparable metric disappears", () => {
const comparison = compareBenchmarkRuns(runResult([metric({ median: 100 })]), runResult([]));

expect(comparison.failed).toBe(true);
expect(comparison.rows[0]?.status).toBe("missing-head");
});
});

describe("formatComparisonMarkdown", () => {
test("shows absolute threshold units", () => {
const comparison = compareBenchmarkRuns(
runResult([
metric({ median: 100 }),
metric({
name: "memory/rss_bytes",
source: "memory",
unit: "bytes",
median: 100 * 1024 * 1024,
threshold: { maxRegressionRatio: 1.2, minAbsoluteRegression: 8 * 1024 * 1024 },
}),
]),
runResult([
metric({ median: 110 }),
metric({
name: "memory/rss_bytes",
source: "memory",
unit: "bytes",
median: 105 * 1024 * 1024,
threshold: { maxRegressionRatio: 1.2, minAbsoluteRegression: 8 * 1024 * 1024 },
}),
]),
);

const markdown = formatComparisonMarkdown(comparison, {
baseLabel: "0.15.1",
headLabel: "0.15.2",
});

expect(markdown).toContain("+15% and +5.00 ms");
expect(markdown).toContain("+20% and +8.00 MiB");
});
});
Loading
Loading