diff --git a/.github/workflows/release-prebuilt-npm.yml b/.github/workflows/release-prebuilt-npm.yml index 0dfef721..c101ab05 100644 --- a/.github/workflows/release-prebuilt-npm.yml +++ b/.github/workflows/release-prebuilt-npm.yml @@ -13,6 +13,16 @@ on: required: true default: latest type: string + allow_benchmark_regression: + description: Allow a manual release despite a material benchmark regression + required: true + default: false + type: boolean + benchmark_regression_reason: + description: Required reason when allowing a benchmark regression + required: false + default: "" + type: string push: tags: - "v*" @@ -25,8 +35,62 @@ concurrency: cancel-in-progress: false jobs: + release-benchmark-gate: + name: Release benchmark gate + runs-on: ubuntu-latest + steps: + - name: Check out repository + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + + - name: Set up Bun + uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 + with: + bun-version: 1.3.10 + + - name: Install dependencies + run: bun install --frozen-lockfile + + - name: Verify tag matches package version + if: github.event_name == 'push' + run: bun run ./scripts/check-release-version.ts "${{ github.ref_name }}" + + - name: Compare release benchmark snapshot + id: benchmark-gate + continue-on-error: ${{ github.event_name == 'workflow_dispatch' && inputs.allow_benchmark_regression }} + run: | + mkdir -p dist/release + bun run bench:release:compare -- \ + --out dist/release/benchmark-comparison.json \ + --summary "$GITHUB_STEP_SUMMARY" + + - name: Require benchmark override reason + if: ${{ steps.benchmark-gate.outcome == 'failure' && github.event_name == 'workflow_dispatch' && inputs.allow_benchmark_regression }} + env: + BENCHMARK_REGRESSION_REASON: ${{ inputs.benchmark_regression_reason }} + run: | + if [ -z "$BENCHMARK_REGRESSION_REASON" ]; then + echo "benchmark_regression_reason is required when allow_benchmark_regression is true." >&2 + exit 1 + fi + { + echo + echo "## Benchmark regression override" + echo + echo "Manual override reason: $BENCHMARK_REGRESSION_REASON" + } >> "$GITHUB_STEP_SUMMARY" + + - name: Upload benchmark comparison + if: always() + uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 + with: + name: release-benchmark-comparison + path: dist/release/benchmark-comparison.json + if-no-files-found: ignore + build-binaries: name: Build ${{ matrix.package_name }} + needs: + - release-benchmark-gate runs-on: ${{ matrix.runner }} strategy: fail-fast: false diff --git a/CHANGELOG.md b/CHANGELOG.md index d219f781..285666ba 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ All notable user-visible changes to Hunk are documented in this file. ### Added +- Added release benchmark snapshots and a release workflow gate that blocks publishing when committed benchmark results show material performance regressions. + ### Changed ### Fixed diff --git a/benchmarks/README.md b/benchmarks/README.md index afdfdb1e..30ded0b9 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -22,6 +22,13 @@ Include the opt-in huge fixture tier (~1k files / 300k+ diff lines plus one ~50k bun run bench -- --samples 1 --include-huge --out benchmarks/results/local-with-huge.json ``` +Generate the committed release benchmark snapshot during release prep: + +```bash +bun run bench:release +bun run bench:release:compare +``` + Run focused scripts while iterating: ```bash diff --git a/benchmarks/lib/benchmark-result.ts b/benchmarks/lib/benchmark-result.ts index dc0e8a26..b50728f2 100644 --- a/benchmarks/lib/benchmark-result.ts +++ b/benchmarks/lib/benchmark-result.ts @@ -17,10 +17,18 @@ export interface BenchmarkMetricResult { source: string; } +export interface BenchmarkRuntimeInfo { + bunVersion?: string; + platform: string; + arch: string; +} + export interface BenchmarkRunResult { version: 1; generatedAt: string; gitSha?: string; + packageVersion?: string; + runtime?: BenchmarkRuntimeInfo; samplesPerBenchmark: number; results: BenchmarkMetricResult[]; } diff --git a/benchmarks/release/.gitkeep b/benchmarks/release/.gitkeep new file mode 100644 index 00000000..e69de29b diff --git a/benchmarks/release/README.md b/benchmarks/release/README.md new file mode 100644 index 00000000..4e256cd3 --- /dev/null +++ b/benchmarks/release/README.md @@ -0,0 +1,38 @@ +# Release benchmark snapshots + +Committed files in this directory are the performance baselines used by the release workflow. They are intentionally versioned so a release can be audited after publishing. + +## Release prep + +Before pushing a release tag, run the benchmark suite for the version in `package.json`: + +```bash +bun run bench:release +``` + +This writes: + +```text +benchmarks/release/bench-x.y.z.json +``` + +Then compare it against the latest lower stable release snapshot: + +```bash +bun run bench:release:compare +``` + +Commit the new `bench-x.y.z.json` file with the release-prep change. The tag release workflow validates that this file exists and fails before publishing npm packages if the comparison finds a material regression. + +## Regression policy + +The gate compares benchmark medians and only fails on regressions that exceed both the relative and absolute thresholds embedded in the benchmark result metadata: + +- timing metrics: default `+15%` and at least `+5ms` +- memory metrics: default `+20%` and at least `+8MiB` + +New metrics are informational until a later release has a baseline. Missing previously comparable metrics fail, because that means the gate can no longer protect that measurement. + +## Backfilling + +When adding this gate or restoring a missing baseline, check out the release tag and generate the snapshot with the same Bun version and runner class used for current release prep. Commit backfilled snapshots before relying on the release gate. diff --git a/benchmarks/run.ts b/benchmarks/run.ts index c041a4f5..9bf1e59e 100644 --- a/benchmarks/run.ts +++ b/benchmarks/run.ts @@ -1,4 +1,5 @@ #!/usr/bin/env bun +import os from "node:os"; import { mkdirSync, writeFileSync } from "node:fs"; import { dirname, resolve } from "node:path"; import { aggregateMetric, type BenchmarkRunResult } from "./lib/benchmark-result"; @@ -94,6 +95,15 @@ function gitSha() { return Buffer.from(proc.stdout).toString("utf8").trim(); } +async function packageVersion() { + try { + const packageJson = JSON.parse(await Bun.file("package.json").text()) as { version?: string }; + return packageJson.version; + } catch { + return undefined; + } +} + function parseMetrics(output: string) { const metrics = new Map(); const metricPattern = /^METRIC\s+([A-Za-z0-9_.:-]+)=(-?\d+(?:\.\d+)?)$/; @@ -179,6 +189,12 @@ const runResult: BenchmarkRunResult = { version: 1, generatedAt: new Date().toISOString(), gitSha: gitSha(), + packageVersion: await packageVersion(), + runtime: { + bunVersion: Bun.version, + platform: os.platform(), + arch: os.arch(), + }, samplesPerBenchmark: options.samples, results, }; diff --git a/package.json b/package.json index e7d612ea..14b645e6 100644 --- a/package.json +++ b/package.json @@ -69,6 +69,8 @@ "update:homebrew-formula": "bun run ./scripts/update-homebrew-formula.ts", "prepack": "bun run build:npm", "bench": "bun run benchmarks/run.ts", + "bench:release": "bun run ./scripts/run-release-benchmark.ts", + "bench:release:compare": "bun run ./scripts/compare-release-benchmarks.ts", "bench:bootstrap-load": "bun run benchmarks/bootstrap-load.ts", "bench:working-tree-load": "bun run benchmarks/working-tree-load.ts", "bench:changeset-parse": "bun run benchmarks/changeset-parse.ts", diff --git a/scripts/compare-release-benchmarks.test.ts b/scripts/compare-release-benchmarks.test.ts new file mode 100644 index 00000000..3f9008d9 --- /dev/null +++ b/scripts/compare-release-benchmarks.test.ts @@ -0,0 +1,163 @@ +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import { afterEach, describe, expect, test } from "bun:test"; +import type { BenchmarkMetricResult, BenchmarkRunResult } from "../benchmarks/lib/benchmark-result"; +import { + compareBenchmarkRuns, + findPreviousReleaseBenchmark, + formatComparisonMarkdown, + isMaterialRegression, +} from "./compare-release-benchmarks"; +import { parseRunReleaseBenchmarkArgs } from "./run-release-benchmark"; + +let tempRoot: string | undefined; + +function createTempReleaseDir() { + tempRoot = mkdtempSync(path.join(os.tmpdir(), "hunk-release-benchmarks-")); + const releaseDir = path.join(tempRoot, "benchmarks", "release"); + mkdirSync(releaseDir, { recursive: true }); + return releaseDir; +} + +function metric(overrides: Partial): BenchmarkMetricResult { + return { + name: "large-stream/cold_first_frame_ms", + source: "large-stream", + unit: "ms", + samples: [100, 101, 99], + median: 100, + p75: 101, + p95: 101, + min: 99, + max: 101, + comparable: true, + threshold: { maxRegressionRatio: 1.15, minAbsoluteRegression: 5 }, + ...overrides, + }; +} + +function runResult(results: BenchmarkMetricResult[]): BenchmarkRunResult { + return { + version: 1, + generatedAt: "2026-06-13T00:00:00.000Z", + gitSha: "abc1234", + samplesPerBenchmark: 3, + results, + }; +} + +afterEach(() => { + if (tempRoot) { + rmSync(tempRoot, { recursive: true, force: true }); + tempRoot = undefined; + } +}); + +describe("findPreviousReleaseBenchmark", () => { + test("selects the latest lower stable release benchmark", () => { + const releaseDir = createTempReleaseDir(); + for (const version of ["0.14.1", "0.15.0", "0.15.3-beta.1", "0.15.3"]) { + writeFileSync(path.join(releaseDir, `bench-${version}.json`), "{}\n"); + } + + expect(findPreviousReleaseBenchmark("0.15.4", releaseDir)).toMatchObject({ + version: "0.15.3", + }); + }); +}); + +describe("isMaterialRegression", () => { + test("requires both relative and absolute timing thresholds", () => { + const threshold = { maxRegressionRatio: 1.15, minAbsoluteRegression: 5 }; + + expect(isMaterialRegression(100, 116, threshold)).toBe(true); + expect(isMaterialRegression(100, 104, threshold)).toBe(false); + expect(isMaterialRegression(10, 12, threshold)).toBe(false); + expect(isMaterialRegression(100, 90, threshold)).toBe(false); + }); +}); + +describe("parseRunReleaseBenchmarkArgs", () => { + test("keeps an explicit output path when --version appears later", async () => { + const outPath = path.join(os.tmpdir(), "custom-release-benchmark.json"); + + await expect( + parseRunReleaseBenchmarkArgs(["--out", outPath, "--version", "0.16.0"]), + ).resolves.toMatchObject({ + version: "0.16.0", + out: outPath, + }); + }); +}); + +describe("compareBenchmarkRuns", () => { + test("fails material comparable regressions", () => { + const comparison = compareBenchmarkRuns( + runResult([metric({ median: 100 })]), + runResult([metric({ median: 120 })]), + ); + + expect(comparison.failed).toBe(true); + expect(comparison.rows[0]?.status).toBe("fail"); + }); + + test("passes comparable changes inside the material threshold", () => { + const comparison = compareBenchmarkRuns( + runResult([metric({ median: 100 })]), + runResult([metric({ median: 110 })]), + ); + + expect(comparison.failed).toBe(false); + expect(comparison.rows[0]?.status).toBe("pass"); + }); + + test("treats new comparable metrics as informational until a baseline exists", () => { + const comparison = compareBenchmarkRuns(runResult([]), runResult([metric({ median: 100 })])); + + expect(comparison.failed).toBe(false); + expect(comparison.rows[0]?.status).toBe("missing-base"); + }); + + test("fails when a previously comparable metric disappears", () => { + const comparison = compareBenchmarkRuns(runResult([metric({ median: 100 })]), runResult([])); + + expect(comparison.failed).toBe(true); + expect(comparison.rows[0]?.status).toBe("missing-head"); + }); +}); + +describe("formatComparisonMarkdown", () => { + test("shows absolute threshold units", () => { + const comparison = compareBenchmarkRuns( + runResult([ + metric({ median: 100 }), + metric({ + name: "memory/rss_bytes", + source: "memory", + unit: "bytes", + median: 100 * 1024 * 1024, + threshold: { maxRegressionRatio: 1.2, minAbsoluteRegression: 8 * 1024 * 1024 }, + }), + ]), + runResult([ + metric({ median: 110 }), + metric({ + name: "memory/rss_bytes", + source: "memory", + unit: "bytes", + median: 105 * 1024 * 1024, + threshold: { maxRegressionRatio: 1.2, minAbsoluteRegression: 8 * 1024 * 1024 }, + }), + ]), + ); + + const markdown = formatComparisonMarkdown(comparison, { + baseLabel: "0.15.1", + headLabel: "0.15.2", + }); + + expect(markdown).toContain("+15% and +5.00 ms"); + expect(markdown).toContain("+20% and +8.00 MiB"); + }); +}); diff --git a/scripts/compare-release-benchmarks.ts b/scripts/compare-release-benchmarks.ts new file mode 100644 index 00000000..f112d0ed --- /dev/null +++ b/scripts/compare-release-benchmarks.ts @@ -0,0 +1,458 @@ +#!/usr/bin/env bun + +import { appendFileSync, existsSync, mkdirSync, readdirSync, writeFileSync } from "node:fs"; +import os from "node:os"; +import path from "node:path"; +import type { + BenchmarkComparisonResult, + BenchmarkComparisonRow, + BenchmarkMetricResult, + BenchmarkRunResult, + BenchmarkThreshold, +} from "../benchmarks/lib/benchmark-result"; + +interface ParsedVersion { + raw: string; + major: number; + minor: number; + patch: number; + prerelease?: string; +} + +interface CompareOptions { + releaseDir: string; + version: string; + head?: string; + base?: string; + out?: string; + summary?: string; +} + +const BENCHMARK_FILE_PATTERN = /^bench-(\d+\.\d+\.\d+(?:-[0-9A-Za-z.-]+)?)\.json$/; +const repoRoot = path.resolve(import.meta.dir, ".."); + +/** Resolve the directory that stores committed release benchmark snapshots. */ +export function releaseBenchmarkDir(root = repoRoot) { + return path.join(root, "benchmarks", "release"); +} + +/** Parse the package version used by release benchmark filenames. */ +export async function readPackageVersion(root = repoRoot) { + const packageJson = JSON.parse(await Bun.file(path.join(root, "package.json")).text()) as { + version: string; + }; + return packageJson.version; +} + +/** Parse the semver subset used by Hunk release tags and benchmark files. */ +export function parseReleaseVersion(version: string): ParsedVersion { + const match = /^(\d+)\.(\d+)\.(\d+)(?:-([0-9A-Za-z.-]+))?$/.exec(version); + if (!match) { + throw new Error(`Invalid release benchmark version: ${version}`); + } + + return { + raw: version, + major: Number(match[1]), + minor: Number(match[2]), + patch: Number(match[3]), + prerelease: match[4], + }; +} + +/** Compare two release versions with stable releases ordered after their prereleases. */ +export function compareReleaseVersions(left: string, right: string) { + const parsedLeft = parseReleaseVersion(left); + const parsedRight = parseReleaseVersion(right); + + for (const key of ["major", "minor", "patch"] as const) { + const delta = parsedLeft[key] - parsedRight[key]; + if (delta !== 0) { + return delta; + } + } + + if (!parsedLeft.prerelease && !parsedRight.prerelease) { + return 0; + } + + if (!parsedLeft.prerelease) { + return 1; + } + + if (!parsedRight.prerelease) { + return -1; + } + + return parsedLeft.prerelease.localeCompare(parsedRight.prerelease, undefined, { + numeric: true, + sensitivity: "base", + }); +} + +/** Return the committed benchmark path for one package version. */ +export function releaseBenchmarkPath(version: string, directory = releaseBenchmarkDir()) { + parseReleaseVersion(version); + return path.join(directory, `bench-${version}.json`); +} + +/** Find the latest stable benchmark snapshot lower than the release candidate version. */ +export function findPreviousReleaseBenchmark(version: string, directory = releaseBenchmarkDir()) { + const current = parseReleaseVersion(version); + if (!existsSync(directory)) { + return undefined; + } + + const candidates = readdirSync(directory) + .map((fileName) => { + const match = BENCHMARK_FILE_PATTERN.exec(fileName); + if (!match) { + return undefined; + } + + const candidateVersion = parseReleaseVersion(match[1]!); + if (candidateVersion.prerelease) { + return undefined; + } + + if (compareReleaseVersions(candidateVersion.raw, current.raw) >= 0) { + return undefined; + } + + return { + version: candidateVersion.raw, + path: path.join(directory, fileName), + }; + }) + .filter((candidate): candidate is { version: string; path: string } => Boolean(candidate)) + .sort((left, right) => compareReleaseVersions(right.version, left.version)); + + return candidates[0]; +} + +/** Read and lightly validate one benchmark JSON file. */ +export async function loadBenchmarkRun(filePath: string): Promise { + const result = JSON.parse(await Bun.file(filePath).text()) as BenchmarkRunResult; + if (result.version !== 1 || !Array.isArray(result.results)) { + throw new Error(`Invalid benchmark result file: ${filePath}`); + } + return result; +} + +/** Determine whether a comparable metric exceeded its material-regression threshold. */ +export function isMaterialRegression( + baseMedian: number, + headMedian: number, + threshold: BenchmarkThreshold, +) { + const absoluteDelta = headMedian - baseMedian; + if (absoluteDelta <= 0) { + return false; + } + + if (absoluteDelta < threshold.minAbsoluteRegression) { + return false; + } + + if (baseMedian === 0) { + return headMedian > 0; + } + + return headMedian / baseMedian >= threshold.maxRegressionRatio; +} + +function relativeDelta(baseMedian: number, headMedian: number) { + if (baseMedian === 0) { + return headMedian === 0 ? 0 : Number.POSITIVE_INFINITY; + } + return headMedian / baseMedian - 1; +} + +function comparableThreshold( + baseResult: BenchmarkMetricResult | undefined, + headResult: BenchmarkMetricResult | undefined, +) { + if (headResult?.threshold) { + return headResult.threshold; + } + return baseResult?.threshold; +} + +/** Compare two benchmark snapshots and mark only material regressions as failures. */ +export function compareBenchmarkRuns( + base: BenchmarkRunResult, + head: BenchmarkRunResult, +): BenchmarkComparisonResult { + const baseByName = new Map(base.results.map((result) => [result.name, result])); + const headByName = new Map(head.results.map((result) => [result.name, result])); + const names = [...new Set([...baseByName.keys(), ...headByName.keys()])].sort(); + const rows: BenchmarkComparisonRow[] = names.map((name) => { + const baseResult = baseByName.get(name); + const headResult = headByName.get(name); + const resultForMetadata = headResult ?? baseResult; + const threshold = comparableThreshold(baseResult, headResult); + + if (!baseResult && headResult) { + return { + name, + unit: headResult.unit, + baseMedian: 0, + headMedian: headResult.median, + absoluteDelta: headResult.median, + relativeDelta: Number.POSITIVE_INFINITY, + threshold, + status: headResult.comparable ? "missing-base" : "informational", + source: headResult.source, + }; + } + + if (baseResult && !headResult) { + return { + name, + unit: baseResult.unit, + baseMedian: baseResult.median, + headMedian: 0, + absoluteDelta: -baseResult.median, + relativeDelta: -1, + threshold, + status: baseResult.comparable ? "missing-head" : "informational", + source: baseResult.source, + }; + } + + const checkedBase = baseResult!; + const checkedHead = headResult!; + const absoluteDelta = checkedHead.median - checkedBase.median; + const row: BenchmarkComparisonRow = { + name, + unit: checkedHead.unit, + baseMedian: checkedBase.median, + headMedian: checkedHead.median, + absoluteDelta, + relativeDelta: relativeDelta(checkedBase.median, checkedHead.median), + threshold, + status: "informational", + source: resultForMetadata!.source, + }; + + if (!checkedHead.comparable || !threshold) { + return row; + } + + return { + ...row, + status: isMaterialRegression(checkedBase.median, checkedHead.median, threshold) + ? "fail" + : "pass", + }; + }); + + return { + version: 1, + generatedAt: new Date().toISOString(), + baseSha: base.gitSha, + headSha: head.gitSha, + failed: rows.some((row) => row.status === "fail" || row.status === "missing-head"), + rows, + }; +} + +function formatNumber(value: number) { + if (!Number.isFinite(value)) { + return "∞"; + } + + if (Math.abs(value) >= 100) { + return value.toFixed(1); + } + + return value.toFixed(2); +} + +function formatDeltaPercent(value: number) { + if (!Number.isFinite(value)) { + return "+∞"; + } + + const sign = value > 0 ? "+" : ""; + return `${sign}${(value * 100).toFixed(1)}%`; +} + +function formatUnit(unit: BenchmarkMetricResult["unit"]) { + return unit === "bytes" ? "B" : unit; +} + +function formatThresholdValue(value: number, unit: BenchmarkMetricResult["unit"]) { + if (unit === "bytes") { + return `${formatNumber(value / (1024 * 1024))} MiB`; + } + + if (unit === "ms") { + return `${formatNumber(value)} ms`; + } + + return `${formatNumber(value)} ${formatUnit(unit)}`; +} + +function formatThreshold( + threshold: BenchmarkThreshold | undefined, + unit: BenchmarkMetricResult["unit"], +) { + if (!threshold) { + return "—"; + } + + return `+${((threshold.maxRegressionRatio - 1) * 100).toFixed(0)}% and +${formatThresholdValue( + threshold.minAbsoluteRegression, + unit, + )}`; +} + +/** Render a compact Markdown report suitable for GitHub Actions summaries. */ +export function formatComparisonMarkdown( + comparison: BenchmarkComparisonResult, + options: { baseLabel: string; headLabel: string }, +) { + const failedRows = comparison.rows.filter( + (row) => row.status === "fail" || row.status === "missing-head", + ); + const lines = [ + "## Release benchmark gate", + "", + comparison.failed + ? `❌ ${failedRows.length} material benchmark regression${failedRows.length === 1 ? "" : "s"} found.` + : "✅ No material benchmark regressions found.", + "", + `Base: \`${options.baseLabel}\` `, + `Head: \`${options.headLabel}\``, + "", + "| Status | Metric | Base median | Head median | Δ | Threshold |", + "| --- | --- | ---: | ---: | ---: | --- |", + ]; + + for (const row of comparison.rows) { + const unit = formatUnit(row.unit); + const status = row.status === "fail" || row.status === "missing-head" ? "❌" : "✅"; + lines.push( + `| ${status} ${row.status} | \`${row.name}\` | ${formatNumber(row.baseMedian)} ${unit} | ${formatNumber( + row.headMedian, + )} ${unit} | ${formatDeltaPercent(row.relativeDelta)} | ${formatThreshold(row.threshold, row.unit)} |`, + ); + } + + return `${lines.join("\n")}\n`; +} + +function readArgValue(args: string[], index: number) { + const value = args[index + 1]; + if (!value) { + throw new Error(`Missing value for ${args[index]}`); + } + return value; +} + +async function parseArgs(args: string[]): Promise { + const packageVersion = await readPackageVersion(); + const options: CompareOptions = { + releaseDir: releaseBenchmarkDir(), + version: packageVersion, + }; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + + if (arg === "--release-dir") { + options.releaseDir = path.resolve(readArgValue(args, index)); + index += 1; + continue; + } + + if (arg === "--version") { + options.version = readArgValue(args, index); + index += 1; + continue; + } + + if (arg === "--head") { + options.head = path.resolve(readArgValue(args, index)); + index += 1; + continue; + } + + if (arg === "--base") { + options.base = path.resolve(readArgValue(args, index)); + index += 1; + continue; + } + + if (arg === "--out") { + options.out = path.resolve(readArgValue(args, index)); + index += 1; + continue; + } + + if (arg === "--summary") { + options.summary = path.resolve(readArgValue(args, index)); + index += 1; + continue; + } + + throw new Error(`Unknown release benchmark comparison argument: ${arg}`); + } + + parseReleaseVersion(options.version); + return options; +} + +/** Run the release benchmark comparison CLI. */ +export async function main(args = Bun.argv.slice(2)) { + const options = await parseArgs(args); + const headPath = options.head ?? releaseBenchmarkPath(options.version, options.releaseDir); + if (!existsSync(headPath)) { + throw new Error( + `Missing release benchmark ${headPath}. Run bun run bench:release before tagging this release.`, + ); + } + + const baseCandidate = options.base + ? { version: path.basename(options.base), path: options.base } + : findPreviousReleaseBenchmark(options.version, options.releaseDir); + if (!baseCandidate) { + throw new Error( + `Missing previous release benchmark in ${options.releaseDir}. Backfill at least one lower stable release benchmark before releasing.`, + ); + } + + const [base, head] = await Promise.all([ + loadBenchmarkRun(baseCandidate.path), + loadBenchmarkRun(headPath), + ]); + const comparison = compareBenchmarkRuns(base, head); + + if (options.out) { + mkdirSync(path.dirname(options.out), { recursive: true }); + writeFileSync(options.out, `${JSON.stringify(comparison, null, 2)}\n`); + } + + const markdown = formatComparisonMarkdown(comparison, { + baseLabel: options.base ?? baseCandidate.version, + headLabel: path.basename(headPath), + }); + process.stdout.write(markdown); + + if (options.summary) { + appendFileSync(options.summary, `\n${markdown}`); + } + + if (comparison.failed) { + throw new Error( + "Release benchmark gate failed. Resolve the regression or use an explicit manual override.", + ); + } + + console.log(`Release benchmark gate passed on ${os.platform()}/${os.arch()}.`); +} + +if (import.meta.main) { + await main(); +} diff --git a/scripts/run-release-benchmark.ts b/scripts/run-release-benchmark.ts new file mode 100644 index 00000000..c956aa0c --- /dev/null +++ b/scripts/run-release-benchmark.ts @@ -0,0 +1,100 @@ +#!/usr/bin/env bun + +import { mkdirSync } from "node:fs"; +import path from "node:path"; +import { + releaseBenchmarkPath, + releaseBenchmarkDir, + readPackageVersion, +} from "./compare-release-benchmarks"; + +export interface RunReleaseBenchmarkOptions { + version: string; + samples: number; + out: string; +} + +const repoRoot = path.resolve(import.meta.dir, ".."); + +function readArgValue(args: string[], index: number) { + const value = args[index + 1]; + if (!value) { + throw new Error(`Missing value for ${args[index]}`); + } + return value; +} + +/** Parse release benchmark CLI options while preserving explicit output paths. */ +export async function parseRunReleaseBenchmarkArgs( + args: string[], +): Promise { + const version = await readPackageVersion(repoRoot); + const options: RunReleaseBenchmarkOptions = { + version, + samples: Number(process.env.HUNK_RELEASE_BENCHMARK_SAMPLES ?? 5), + out: releaseBenchmarkPath(version, releaseBenchmarkDir(repoRoot)), + }; + let outExplicitlySet = false; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + + if (arg === "--version") { + options.version = readArgValue(args, index); + if (!outExplicitlySet) { + options.out = releaseBenchmarkPath(options.version, releaseBenchmarkDir(repoRoot)); + } + index += 1; + continue; + } + + if (arg === "--samples") { + options.samples = Number(readArgValue(args, index)); + index += 1; + continue; + } + + if (arg === "--out") { + options.out = path.resolve(readArgValue(args, index)); + outExplicitlySet = true; + index += 1; + continue; + } + + throw new Error(`Unknown release benchmark argument: ${arg}`); + } + + if (!Number.isFinite(options.samples) || options.samples < 1) { + throw new Error("--samples must be a positive number"); + } + + return options; +} + +/** Run the default benchmark suite and write the versioned release snapshot. */ +export async function main(args = Bun.argv.slice(2)) { + const options = await parseRunReleaseBenchmarkArgs(args); + mkdirSync(path.dirname(options.out), { recursive: true }); + + const proc = Bun.spawn( + ["bun", "run", "benchmarks/run.ts", "--samples", String(options.samples), "--out", options.out], + { + cwd: repoRoot, + stdin: "inherit", + stdout: "inherit", + stderr: "inherit", + env: { ...process.env, CI: process.env.CI ?? "1" }, + }, + ); + const exitCode = await proc.exited; + if (exitCode !== 0) { + throw new Error(`Release benchmark run failed with exit code ${exitCode}`); + } + + console.log(`Wrote release benchmark ${options.out}`); + console.log("Commit this file with the release prep change before pushing the release tag."); +} + +if (import.meta.main) { + await main(); +}