diff --git a/.github/workflows/benchmarks.yml b/.github/workflows/benchmarks.yml index 510e966d..5aeb6e97 100644 --- a/.github/workflows/benchmarks.yml +++ b/.github/workflows/benchmarks.yml @@ -9,10 +9,22 @@ on: - "docs/**" - "assets/**" - "LICENSE" + pull_request: + paths-ignore: + - "**/*.md" + - "docs/**" + - "assets/**" + - "LICENSE" workflow_dispatch: +permissions: + contents: read + issues: write + pull-requests: write + env: SKIP_INSTALL_SIMPLE_GIT_HOOKS: "1" + HUNK_BENCHMARK_SAMPLES: ${{ github.event_name == 'pull_request' && '1' || '3' }} concurrency: group: benchmarks-${{ github.workflow }}-${{ github.ref }} @@ -25,6 +37,8 @@ jobs: steps: - name: Check out repository uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + fetch-depth: 0 - name: Set up Bun uses: oven-sh/setup-bun@0c5077e51419868618aeaa5fe8019c62421857d6 # v2.2.0 @@ -34,36 +48,61 @@ jobs: - name: Install dependencies run: bun install --frozen-lockfile - - name: Run bootstrap benchmark + - name: Run head benchmarks run: | mkdir -p benchmark-results - bun run bench:bootstrap-load | tee benchmark-results/bootstrap-load.txt + bun run bench -- --samples "$HUNK_BENCHMARK_SAMPLES" --out benchmark-results/head.json \ + | tee benchmark-results/head.txt - - name: Run highlight prefetch benchmark + - name: Run base benchmarks + if: github.event_name == 'pull_request' run: | - bun run bench:highlight-prefetch | tee benchmark-results/highlight-prefetch.txt + git fetch origin main + git worktree add ../hunk-benchmark-base origin/main + rm -rf ../hunk-benchmark-base/benchmarks + cp -R benchmarks ../hunk-benchmark-base/benchmarks + cd ../hunk-benchmark-base + bun install --frozen-lockfile + bun run benchmarks/run.ts --samples "$HUNK_BENCHMARK_SAMPLES" --out "$GITHUB_WORKSPACE/benchmark-results/base.json" \ + | tee "$GITHUB_WORKSPACE/benchmark-results/base.txt" - - name: Run large stream benchmark + - name: Compare benchmark results + id: compare + if: github.event_name == 'pull_request' + continue-on-error: true run: | - bun run bench:large-stream | tee benchmark-results/large-stream.txt + bun run bench:compare -- \ + --base benchmark-results/base.json \ + --head benchmark-results/head.json \ + --out benchmark-results/comparison.json \ + --markdown benchmark-results/summary.md - name: Publish benchmark summary + if: always() run: | - { - echo '## Benchmark results' - echo - for file in benchmark-results/*.txt; do - echo "### $(basename "$file")" + if [ -f benchmark-results/summary.md ]; then + cat benchmark-results/summary.md >> "$GITHUB_STEP_SUMMARY" + else + { + echo '## Benchmark results' + echo echo '```text' - cat "$file" + cat benchmark-results/head.txt echo '```' - echo - done - } >> "$GITHUB_STEP_SUMMARY" + } >> "$GITHUB_STEP_SUMMARY" + fi + + - name: Comment benchmark summary on PR + if: always() && github.event_name == 'pull_request' && hashFiles('benchmark-results/summary.md') != '' + continue-on-error: true + env: + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: bun run bench:comment-pr -- --body benchmark-results/summary.md - name: Upload benchmark artifacts + if: always() uses: actions/upload-artifact@043fb46d1a93c77aae656e7c1c64a875d1fc6a0a # v7.0.1 with: name: benchmark-results - path: benchmark-results/*.txt + path: benchmark-results/* if-no-files-found: error diff --git a/CHANGELOG.md b/CHANGELOG.md index 7f62d9f5..723c75af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,8 @@ All notable user-visible changes to Hunk are documented in this file. ### Added +- Added CI performance benchmarks with PR comparison comments to guard Hunk startup, loading, rendering, highlighting, navigation, and memory costs. + ### Changed ### Fixed diff --git a/benchmarks/README.md b/benchmarks/README.md index 1bb5e78d..7027d596 100644 --- a/benchmarks/README.md +++ b/benchmarks/README.md @@ -1,28 +1,106 @@ # Benchmarks -Benchmark scripts, shared fixtures, and local result artifacts live here. +Benchmark scripts, shared fixtures, and local result artifacts live here. These benchmarks protect Hunk's core promise: fast loading, fast first render, fast navigation, and predictable memory use on large diffs. -## Scripts +## Running locally -- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo -- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness -- `large-stream.ts` — measures large split-stream first-frame and scroll cost, including note-enabled cases -- `large-stream-profile.ts` — profiles the main pure planning stages behind the large split-stream benchmark -- `large-stream-fixture.ts` — shared synthetic diff fixture used by the large-stream benchmarks +Run the full benchmark suite with one JSON result file: -## Running +```bash +bun run bench -- --samples 3 --out benchmarks/results/head.json +``` -From the project root: +Run focused scripts while iterating: ```bash bun run bench:bootstrap-load +bun run bench:working-tree-load +bun run bench:changeset-parse +bun run bench:render-layout bun run bench:highlight-prefetch bun run bench:large-stream bun run bench:large-stream-profile +bun run bench:memory +bun run bench:competitors +``` + +Compare two JSON result files: + +```bash +bun run bench:compare -- \ + --base benchmarks/results/base.json \ + --head benchmarks/results/head.json \ + --markdown benchmarks/results/summary.md +``` + +## Scripts + +- `bootstrap-load.ts` — measures bootstrap and git-loader cost on a synthetic large repo, including file-pair bootstrap. +- `working-tree-load.ts` — measures git working-tree loads across small, medium, large, many-untracked, and few-large-untracked repos. +- `changeset-parse.ts` — measures patch normalization, Pierre parsing, patch chunking, and normalized `DiffFile` construction for many-small-files, balanced, and large-single-file patches. +- `render-layout.ts` — measures pure split/stack row building, section geometry, and review-plan construction for many-small-files, balanced, and large-single-file streams. +- `highlight-prefetch.ts` — measures selected-file highlight startup and adjacent prefetch readiness. +- `large-stream.ts` — measures large split-stream first-frame and scroll cost. +- `large-stream-profile.ts` — optional local profiler for the main pure planning stages behind the large split-stream benchmark. +- `memory.ts` — optional local RSS/heap profiler after fixture loading, planning, first frame, and next-hunk navigation. +- `competitors.ts` — optional local informational comparisons against `git diff --no-ext-diff`, `delta`, `difftastic`, and `diff-so-fancy` when installed. +- `large-stream-fixture.ts` and `lib/fixtures.ts` — shared deterministic synthetic fixtures. + +## Output format + +Each script prints `METRIC name=value` lines. `benchmarks/run.ts` repeats scripts, aggregates samples, and writes JSON: + +```json +{ + "version": 1, + "samplesPerBenchmark": 3, + "results": [ + { + "name": "large-stream/cold_first_frame_ms", + "unit": "ms", + "samples": [61.2, 60.8, 62.1], + "median": 61.2, + "p75": 62.1, + "p95": 62.1, + "threshold": { + "maxRegressionRatio": 1.15, + "minAbsoluteRegression": 5 + }, + "comparable": true + } + ] +} ``` -## Results +## CI policy + +`.github/workflows/benchmarks.yml` runs the suite on `main`, pull requests, and manual dispatch. On pull requests it: + +1. Runs benchmarks on the PR revision. +2. Checks out `origin/main` in a sibling worktree. +3. Copies the PR benchmark harness into that base worktree so new benchmarks can compare base code during the PR that introduces them. +4. Runs the same benchmarks on base. +5. Compares medians and marks regressions in the PR summary without blocking the PR. +6. Uploads raw JSON/text artifacts. +7. Posts or updates one PR comment with a curated key-benchmark table, always including regressions and hiding noisy supporting metrics. + +The default CI suite intentionally excludes optional memory profiling, pure-planning profiling, and competitor comparisons to keep PR feedback fast. Pull requests use one sample per benchmark and are informational/non-blocking; `main` runs keep three samples for a more stable history. Run `bun run bench -- --include-competitors` or focused scripts locally when deeper diagnostics are needed. + +Initial thresholds: + +- Time metrics (`*_ms`): fail when PR median is more than 15% slower **and** at least 5ms slower. +- Memory metrics (`rss`/`heap`): fail when PR median is more than 20% higher **and** at least 8MiB higher. +- Counts, fixture sizes, availability flags, and optional competitor metrics are informational. + +Competitor comparisons are intentionally non-failing because installed tool versions and feature parity vary by environment. + +## Updating thresholds + +Prefer fixing regressions first. If a maintainer accepts an intentional tradeoff, update the threshold in `benchmarks/lib/benchmark-result.ts` and mention why in the PR. Keep thresholds broad enough for CI variability but tight enough to catch visible slowdowns. -Use `benchmarks/results/` for local benchmark output, notes, or captured runs. +## Noise troubleshooting -The folder stays in the repo so the convention is discoverable, but local result files inside it are ignored by default. +- Re-run failed jobs before investigating tiny deltas; thresholds include absolute tolerances to avoid failing on sub-5ms noise. +- PTY/renderer-adjacent metrics are noisier than pure parsing/planning metrics. +- Use `--samples 5` locally when validating borderline changes. +- Inspect uploaded raw samples before changing thresholds. diff --git a/benchmarks/changeset-parse.ts b/benchmarks/changeset-parse.ts new file mode 100644 index 00000000..5925623d --- /dev/null +++ b/benchmarks/changeset-parse.ts @@ -0,0 +1,59 @@ +// Benchmark raw patch parsing and normalized DiffFile construction for several diff shapes. +import { performance } from "perf_hooks"; +import { parsePatchFiles } from "@pierre/diffs"; +import { buildDiffFile } from "../src/core/diffFile"; +import { findPatchChunk, splitPatchIntoFileChunks } from "../src/core/patch/chunks"; +import { normalizePatchText } from "../src/core/patch/normalize"; +import { createSyntheticPatch } from "./lib/fixtures"; + +interface Scenario { + name: string; + patch: string; +} + +const scenarios: Scenario[] = [ + { + name: "many_small_files", + patch: createSyntheticPatch({ fileCount: 240, lines: 48, changedLines: 8 }), + }, + { + name: "balanced_changeset", + patch: createSyntheticPatch({ fileCount: 96, lines: 220, changedLines: 48 }), + }, + { + name: "large_single_file", + patch: createSyntheticPatch({ fileCount: 1, lines: 18_000, changedLines: 2_000 }), + }, +]; + +function measureScenario({ name, patch }: Scenario) { + const normalizeStart = performance.now(); + const normalized = normalizePatchText(patch); + const normalizeMs = performance.now() - normalizeStart; + + const parseStart = performance.now(); + const parsed = parsePatchFiles(normalized, "patch", true); + const parseMs = performance.now() - parseStart; + + const splitStart = performance.now(); + const chunks = splitPatchIntoFileChunks(normalized); + const splitMs = performance.now() - splitStart; + + const files = parsed.flatMap((entry) => entry.files); + const buildStart = performance.now(); + const diffFiles = files.map((metadata, index) => + buildDiffFile(metadata, findPatchChunk(metadata, chunks, index), index, name, null), + ); + const buildMs = performance.now() - buildStart; + + console.log(`METRIC ${name}_normalize_patch_ms=${normalizeMs.toFixed(2)}`); + console.log(`METRIC ${name}_parse_patch_ms=${parseMs.toFixed(2)}`); + console.log(`METRIC ${name}_split_chunks_ms=${splitMs.toFixed(2)}`); + console.log(`METRIC ${name}_build_diff_files_ms=${buildMs.toFixed(2)}`); + console.log(`METRIC ${name}_files=${diffFiles.length}`); + console.log(`METRIC ${name}_patch_bytes=${Buffer.byteLength(normalized)}`); +} + +for (const scenario of scenarios) { + measureScenario(scenario); +} diff --git a/benchmarks/comment-pr.ts b/benchmarks/comment-pr.ts new file mode 100644 index 00000000..387df039 --- /dev/null +++ b/benchmarks/comment-pr.ts @@ -0,0 +1,97 @@ +#!/usr/bin/env bun +import { readFileSync } from "node:fs"; + +const marker = ""; + +function requireEnv(name: string) { + const value = process.env[name]; + if (!value) { + throw new Error(`Missing ${name}`); + } + return value; +} + +function parseArgs(args: string[]) { + for (let index = 0; index < args.length; index += 1) { + if (args[index] === "--body") { + const value = args[index + 1]; + if (!value) { + throw new Error("Missing value for --body"); + } + return { bodyPath: value }; + } + } + + throw new Error("Usage: bun run benchmarks/comment-pr.ts --body benchmark-results/summary.md"); +} + +async function githubRequest(path: string, init: RequestInit = {}) { + const token = requireEnv("GITHUB_TOKEN"); + const response = await fetch(`https://api.github.com${path}`, { + ...init, + headers: { + Accept: "application/vnd.github+json", + Authorization: `Bearer ${token}`, + "X-GitHub-Api-Version": "2022-11-28", + ...init.headers, + }, + }); + + if (!response.ok) { + const text = await response.text(); + throw new Error( + `GitHub API ${init.method ?? "GET"} ${path} failed: ${response.status} ${text}`, + ); + } + + return response.status === 204 ? null : response.json(); +} + +/** Fetch every issue comment page so the marker lookup can update old bot comments. */ +async function fetchAllComments(repository: string, pullRequestNumber: number) { + const comments: Array<{ id: number; body?: string }> = []; + + for (let page = 1; ; page += 1) { + const batch = (await githubRequest( + `/repos/${repository}/issues/${pullRequestNumber}/comments?per_page=100&page=${page}`, + )) as Array<{ id: number; body?: string }>; + + comments.push(...batch); + + if (batch.length < 100) { + return comments; + } + } +} + +const { bodyPath } = parseArgs(Bun.argv.slice(2)); +const repository = requireEnv("GITHUB_REPOSITORY"); +const eventPath = requireEnv("GITHUB_EVENT_PATH"); +const event = JSON.parse(readFileSync(eventPath, "utf8")) as { pull_request?: { number: number } }; +const pullRequestNumber = event.pull_request?.number; + +if (!pullRequestNumber) { + console.log("No pull request in event payload; skipping benchmark comment."); + process.exit(0); +} + +const body = readFileSync(bodyPath, "utf8"); +const comments = await fetchAllComments(repository, pullRequestNumber); +const existing = comments.find((comment) => comment.body?.includes(marker)); + +if (existing) { + await githubRequest(`/repos/${repository}/issues/comments/${existing.id}`, { + method: "PATCH", + body: JSON.stringify({ body }), + }); + console.log(`Updated benchmark comment ${existing.id}.`); +} else { + const created = (await githubRequest( + `/repos/${repository}/issues/${pullRequestNumber}/comments`, + { + method: "POST", + body: JSON.stringify({ body }), + }, + )) as { id: number }; + console.log(`Created benchmark comment ${created.id}.`); +} diff --git a/benchmarks/compare.ts b/benchmarks/compare.ts new file mode 100644 index 00000000..f66fd45d --- /dev/null +++ b/benchmarks/compare.ts @@ -0,0 +1,293 @@ +#!/usr/bin/env bun +import { mkdirSync, readFileSync, writeFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import type { + BenchmarkComparisonResult, + BenchmarkComparisonRow, + BenchmarkMetricResult, + BenchmarkRunResult, +} from "./lib/benchmark-result"; + +interface CompareOptions { + base: string; + head: string; + out?: string; + markdown?: string; +} + +function readArgValue(args: string[], index: number) { + const value = args[index + 1]; + if (!value) { + throw new Error(`Missing value for ${args[index]}`); + } + return value; +} + +function parseArgs(args: string[]): CompareOptions { + const options: Partial = {}; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + + if (arg === "--base") { + options.base = readArgValue(args, index); + index += 1; + continue; + } + + if (arg === "--head") { + options.head = readArgValue(args, index); + index += 1; + continue; + } + + if (arg === "--out") { + options.out = readArgValue(args, index); + index += 1; + continue; + } + + if (arg === "--markdown") { + options.markdown = readArgValue(args, index); + index += 1; + continue; + } + + throw new Error(`Unknown benchmark compare argument: ${arg}`); + } + + if (!options.base || !options.head) { + throw new Error( + "Usage: bun run benchmarks/compare.ts --base base.json --head head.json [--out compare.json] [--markdown summary.md]", + ); + } + + return options as CompareOptions; +} + +function readRun(path: string): BenchmarkRunResult { + return JSON.parse(readFileSync(path, "utf8")) as BenchmarkRunResult; +} + +function compareMetric( + base: BenchmarkMetricResult | undefined, + head: BenchmarkMetricResult | undefined, +) { + if (!base && !head) { + throw new Error("Cannot compare two missing metrics"); + } + + const metric = head ?? base!; + const baseMedian = base?.median ?? 0; + const headMedian = head?.median ?? 0; + const absoluteDelta = headMedian - baseMedian; + const relativeDelta = baseMedian === 0 ? 0 : absoluteDelta / baseMedian; + + let status: BenchmarkComparisonRow["status"] = "pass"; + if (!base) { + status = "missing-base"; + } else if (!head) { + status = "missing-head"; + } else if (!metric.comparable || metric.name.includes("competitor_")) { + status = "informational"; + } else if ( + metric.threshold && + headMedian > baseMedian * metric.threshold.maxRegressionRatio && + absoluteDelta > metric.threshold.minAbsoluteRegression + ) { + status = "fail"; + } + + return { + name: metric.name, + unit: metric.unit, + baseMedian, + headMedian, + absoluteDelta, + relativeDelta, + threshold: metric.threshold, + status, + source: metric.source, + } satisfies BenchmarkComparisonRow; +} + +function formatNumber(value: number, unit: BenchmarkComparisonRow["unit"]) { + if (unit === "bytes") { + const mib = value / (1024 * 1024); + return `${mib.toFixed(1)} MiB`; + } + + if (unit === "ms") { + return `${value.toFixed(value >= 100 ? 1 : 2)} ms`; + } + + if (unit === "boolean") { + return value ? "yes" : "no"; + } + + return value.toFixed(Number.isInteger(value) ? 0 : 2); +} + +function formatDelta(row: BenchmarkComparisonRow) { + const sign = row.absoluteDelta >= 0 ? "+" : ""; + const relative = row.baseMedian === 0 ? "n/a" : `${sign}${(row.relativeDelta * 100).toFixed(1)}%`; + return `${sign}${formatNumber(row.absoluteDelta, row.unit)} (${relative})`; +} + +function formatThreshold(row: BenchmarkComparisonRow) { + if (!row.threshold) { + return "—"; + } + + return `+${((row.threshold.maxRegressionRatio - 1) * 100).toFixed(0)}% and +${formatNumber(row.threshold.minAbsoluteRegression, row.unit)}`; +} + +function statusIcon(status: BenchmarkComparisonRow["status"]) { + switch (status) { + case "pass": + return "✅"; + case "fail": + return "❌"; + case "informational": + return "ℹ️"; + case "missing-base": + case "missing-head": + return "⚠️"; + } +} + +const keyBenchmarkNames = new Set([ + "bootstrap-load/git_bootstrap_ms", + "bootstrap-load/file_pair_bootstrap_ms", + "working-tree-load/small_worktree_load_ms", + "working-tree-load/medium_worktree_load_ms", + "working-tree-load/large_worktree_load_ms", + "working-tree-load/untracked_many_small_load_ms", + "working-tree-load/untracked_few_large_load_ms", + "changeset-parse/many_small_files_parse_patch_ms", + "changeset-parse/balanced_changeset_parse_patch_ms", + "changeset-parse/large_single_file_parse_patch_ms", + "render-layout/many_small_files_review_plan_ms", + "render-layout/balanced_stream_review_plan_ms", + "render-layout/large_single_file_review_plan_ms", + "large-stream/cold_first_frame_ms", + "large-stream/warm_first_frame_ms", + "large-stream/windowed_scroll_ticks_ms", + "large-stream-profile/section_geometry_ms", + "large-stream-profile/review_plan_ms", + "highlight-prefetch/selected_startup_ms", + "highlight-prefetch/next_file_ready_ms", + "memory/first_frame_ms", + "memory/next_hunk_navigation_ms", + "memory/after_first_frame_rss_bytes", + "memory/after_navigation_rss_bytes", +]); + +/** Keep PR comments readable while all metrics remain enforced and available as artifacts. */ +function selectDisplayedComparableRows(rows: BenchmarkComparisonRow[]) { + const displayed = new Map(); + + for (const row of rows) { + if (row.status === "fail" || row.status === "missing-head" || keyBenchmarkNames.has(row.name)) { + displayed.set(row.name, row); + } + } + + return [...displayed.values()].sort((left, right) => left.name.localeCompare(right.name)); +} + +function competitorTimingRows(rows: BenchmarkComparisonRow[]) { + return rows.filter( + (row) => + row.status === "informational" && + row.name.includes("/competitor_") && + row.name.endsWith("_ms"), + ); +} + +function buildMarkdown(comparison: BenchmarkComparisonResult) { + const comparableRows = comparison.rows.filter((row) => row.status !== "informational"); + const displayedComparableRows = selectDisplayedComparableRows(comparableRows); + const hiddenComparableCount = comparableRows.length - displayedComparableRows.length; + const displayedCompetitorRows = competitorTimingRows(comparison.rows); + const lines = [ + "", + "## Hunk benchmark results", + "", + comparison.failed + ? "❌ One or more benchmarks regressed beyond the configured threshold." + : "✅ Benchmarks are within the configured thresholds.", + "", + `Base: \`${comparison.baseSha?.slice(0, 12) ?? "unknown"}\` · Head: \`${comparison.headSha?.slice(0, 12) ?? "unknown"}\``, + "", + "### Key Hunk benchmarks", + "", + "| Benchmark | Base median | PR median | Delta | Threshold | Status |", + "|---|---:|---:|---:|---:|:---:|", + ]; + + for (const row of displayedComparableRows) { + lines.push( + `| ${row.name} | ${formatNumber(row.baseMedian, row.unit)} | ${formatNumber(row.headMedian, row.unit)} | ${formatDelta(row)} | ${formatThreshold(row)} | ${statusIcon(row.status)} |`, + ); + } + + if (hiddenComparableCount > 0) { + lines.push( + "", + `${hiddenComparableCount} additional comparable Hunk metrics were checked but hidden to keep this comment readable. See the workflow artifacts for full JSON and text output.`, + ); + } + + if (displayedCompetitorRows.length > 0) { + lines.push("", "### Informational competitor comparison", ""); + lines.push("| Benchmark | Base median | PR median | Delta | Status |"); + lines.push("|---|---:|---:|---:|:---:|"); + for (const row of displayedCompetitorRows) { + lines.push( + `| ${row.name} | ${formatNumber(row.baseMedian, row.unit)} | ${formatNumber(row.headMedian, row.unit)} | ${formatDelta(row)} | ${statusIcon(row.status)} |`, + ); + } + } + + lines.push("", "Raw JSON and text logs are available in the benchmark workflow artifacts.", ""); + return lines.join("\n"); +} + +const options = parseArgs(Bun.argv.slice(2)); +const base = readRun(options.base); +const head = readRun(options.head); +const baseByName = new Map(base.results.map((result) => [result.name, result])); +const headByName = new Map(head.results.map((result) => [result.name, result])); +const names = new Set([...baseByName.keys(), ...headByName.keys()]); +const rows = [...names] + .map((name) => compareMetric(baseByName.get(name), headByName.get(name))) + .sort((left, right) => left.name.localeCompare(right.name)); + +const comparison: BenchmarkComparisonResult = { + version: 1, + generatedAt: new Date().toISOString(), + baseSha: base.gitSha, + headSha: head.gitSha, + failed: rows.some((row) => row.status === "fail" || row.status === "missing-head"), + rows, +}; +const markdown = buildMarkdown(comparison); + +console.log(markdown); + +if (options.out) { + const outPath = resolve(options.out); + mkdirSync(dirname(outPath), { recursive: true }); + writeFileSync(outPath, `${JSON.stringify(comparison, null, 2)}\n`); +} + +if (options.markdown) { + const markdownPath = resolve(options.markdown); + mkdirSync(dirname(markdownPath), { recursive: true }); + writeFileSync(markdownPath, markdown); +} + +if (comparison.failed) { + process.exitCode = 1; +} diff --git a/benchmarks/competitors.ts b/benchmarks/competitors.ts new file mode 100644 index 00000000..63ab61c9 --- /dev/null +++ b/benchmarks/competitors.ts @@ -0,0 +1,112 @@ +// Optional informational comparisons against diff-oriented CLI tools when installed. +import { writeFileSync } from "node:fs"; +import { join } from "node:path"; +import { performance } from "perf_hooks"; +import { + createChangedRepo, + createSyntheticPatch, + createSyntheticSource, + createTemporaryDirectory, + git, +} from "./lib/fixtures"; + +interface ToolScenario { + metric: string; + command: string[]; + stdin?: string; + cwd?: string; +} + +function commandExists(command: string) { + const proc = Bun.spawnSync(["sh", "-c", `command -v ${command} >/dev/null 2>&1`], { + stdout: "ignore", + stderr: "ignore", + }); + return proc.exitCode === 0; +} + +function measureTool({ metric, command, stdin, cwd }: ToolScenario) { + const start = performance.now(); + const proc = Bun.spawnSync(command, { + cwd, + stdin: stdin === undefined ? "ignore" : Buffer.from(stdin), + stdout: "ignore", + stderr: "pipe", + env: { ...process.env, NO_COLOR: "1", TERM: "xterm-256color" }, + }); + const duration = performance.now() - start; + + if (proc.exitCode !== 0) { + const stderr = Buffer.from(proc.stderr).toString("utf8").trim(); + console.log(`METRIC ${metric}_available=0`); + if (stderr) { + console.warn(`${command.join(" ")} failed: ${stderr}`); + } + return; + } + + console.log(`METRIC ${metric}_ms=${duration.toFixed(2)}`); + console.log(`METRIC ${metric}_available=1`); +} + +const patch = createSyntheticPatch({ fileCount: 96, lines: 180, changedLines: 36 }); +const patchFixture = createTemporaryDirectory("hunk-competitor-patch-"); +const repoFixture = createChangedRepo({ fileCount: 96, lines: 180, changedLines: 36 }); + +try { + const patchPath = join(patchFixture.path, "large.patch"); + const beforePath = join(patchFixture.path, "before.ts"); + const afterPath = join(patchFixture.path, "after.ts"); + writeFileSync(patchPath, patch); + writeFileSync( + beforePath, + createSyntheticSource(1, false, { lines: 12_000, changedLines: 2_000 }), + ); + writeFileSync(afterPath, createSyntheticSource(1, true, { lines: 12_000, changedLines: 2_000 })); + + measureTool({ + metric: "competitor_git_diff_no_ext_diff", + command: ["git", "diff", "--no-ext-diff", "--no-color"], + cwd: repoFixture.path, + }); + + // Warm git's object lookup so the metric above still validates the fixture even if not compared. + git(repoFixture.path, "status", "--short"); + + if (commandExists("delta")) { + measureTool({ + metric: "competitor_delta_patch_stdin", + command: ["delta", "--no-gitconfig", "--paging=never"], + stdin: patch, + }); + } else { + console.log("METRIC competitor_delta_patch_stdin_available=0"); + } + + if (commandExists("difft")) { + measureTool({ + metric: "competitor_difftastic_file_pair", + command: ["difft", "--color=never", beforePath, afterPath], + }); + } else if (commandExists("difftastic")) { + measureTool({ + metric: "competitor_difftastic_file_pair", + command: ["difftastic", "--color=never", beforePath, afterPath], + }); + } else { + console.log("METRIC competitor_difftastic_file_pair_available=0"); + } + + if (commandExists("diff-so-fancy")) { + measureTool({ + metric: "competitor_diff_so_fancy_patch_stdin", + command: ["diff-so-fancy"], + stdin: patch, + }); + } else { + console.log("METRIC competitor_diff_so_fancy_patch_stdin_available=0"); + } +} finally { + patchFixture.cleanup(); + repoFixture.cleanup(); +} diff --git a/benchmarks/large-stream-fixture.ts b/benchmarks/large-stream-fixture.ts index 0a95c184..386a3e31 100644 --- a/benchmarks/large-stream-fixture.ts +++ b/benchmarks/large-stream-fixture.ts @@ -3,37 +3,19 @@ import type { AppBootstrap, DiffFile } from "../src/core/types"; export const DEFAULT_FILE_COUNT = 180; export const DEFAULT_LINES_PER_FILE = 120; -export const DEFAULT_NOTES_PER_FILE = 2; - interface LargeSplitStreamFixtureOptions { fileCount?: number; linesPerFile?: number; - notesPerFile?: number; -} - -function createAgentAnnotations(index: number, notesPerFile: number) { - if (notesPerFile <= 0) { - return []; - } - - return Array.from({ length: notesPerFile }, (_, noteIndex) => { - const startLine = 40 + noteIndex * 12; - const endLine = startLine + 5; - return { - id: `note:${index}:${noteIndex}`, - newRange: [startLine, endLine] as [number, number], - summary: `Explain the split-mode refactor in file ${index}, hunk note ${noteIndex + 1}.`, - rationale: - "Synthetic benchmark note to exercise inline note placement, guide rows, and note-enabled full-stream rendering.", - }; - }); + changedStartLine?: number; + changedEndLine?: number; } export function createLargeSplitDiffFile( index: number, { linesPerFile = DEFAULT_LINES_PER_FILE, - notesPerFile = 0, + changedStartLine = 37, + changedEndLine = 84, }: Omit = {}, ): DiffFile { const path = `src/stream${index}.ts`; @@ -44,7 +26,7 @@ export function createLargeSplitDiffFile( const after = Array.from({ length: linesPerFile }, (_, lineIndex) => { const line = lineIndex + 1; - if (lineIndex >= 36 && lineIndex < 84) { + if (line >= changedStartLine && line <= changedEndLine) { return `export function stream${index}_${line}(value: number) { return value * ${line} + ${index}; }\n`; } @@ -66,40 +48,40 @@ export function createLargeSplitDiffFile( true, ); - const annotations = createAgentAnnotations(index, notesPerFile); - return { id: `stream:${index}`, path, patch: "", language: "typescript", - stats: { additions: 48, deletions: 48 }, + stats: { + additions: Math.max(0, changedEndLine - changedStartLine + 1), + deletions: Math.max(0, changedEndLine - changedStartLine + 1), + }, metadata, - agent: - annotations.length > 0 - ? { - path, - summary: `Synthetic note-heavy benchmark context for ${path}`, - annotations, - } - : null, + agent: null, }; } export function createLargeSplitStreamFiles({ fileCount = DEFAULT_FILE_COUNT, linesPerFile = DEFAULT_LINES_PER_FILE, - notesPerFile = 0, + changedStartLine, + changedEndLine, }: LargeSplitStreamFixtureOptions = {}) { return Array.from({ length: fileCount }, (_, index) => - createLargeSplitDiffFile(index + 1, { linesPerFile, notesPerFile }), + createLargeSplitDiffFile(index + 1, { + linesPerFile, + changedStartLine, + changedEndLine, + }), ); } export function createLargeSplitStreamBootstrap({ fileCount = DEFAULT_FILE_COUNT, linesPerFile = DEFAULT_LINES_PER_FILE, - notesPerFile = 0, + changedStartLine, + changedEndLine, }: LargeSplitStreamFixtureOptions = {}): AppBootstrap { return { input: { @@ -110,13 +92,18 @@ export function createLargeSplitStreamBootstrap({ }, }, changeset: { - id: `changeset:large-split-stream:${fileCount}:${linesPerFile}:${notesPerFile}`, + id: `changeset:large-split-stream:${fileCount}:${linesPerFile}`, sourceLabel: "repo", title: "repo working tree", - files: createLargeSplitStreamFiles({ fileCount, linesPerFile, notesPerFile }), + files: createLargeSplitStreamFiles({ + fileCount, + linesPerFile, + changedStartLine, + changedEndLine, + }), }, initialMode: "split", initialTheme: "midnight", - initialShowAgentNotes: notesPerFile > 0, + initialShowAgentNotes: false, }; } diff --git a/benchmarks/large-stream-profile.ts b/benchmarks/large-stream-profile.ts index aa8ee4b7..fa77419a 100644 --- a/benchmarks/large-stream-profile.ts +++ b/benchmarks/large-stream-profile.ts @@ -9,20 +9,10 @@ import { createLargeSplitStreamFiles, DEFAULT_FILE_COUNT, DEFAULT_LINES_PER_FILE, - DEFAULT_NOTES_PER_FILE, } from "./large-stream-fixture"; const theme = resolveTheme("midnight", null); -const windowedFiles = createLargeSplitStreamFiles({ notesPerFile: 0 }); -const noteFiles = createLargeSplitStreamFiles({ notesPerFile: DEFAULT_NOTES_PER_FILE }); - -function visibleAgentNotesForFile(file: (typeof noteFiles)[number]) { - const annotations = file.agent?.annotations ?? []; - return annotations.map((annotation, index) => ({ - id: `annotation:${file.id}:${annotation.id ?? index}`, - annotation, - })); -} +const windowedFiles = createLargeSplitStreamFiles(); function measureMs(run: () => void) { const start = performance.now(); @@ -43,24 +33,23 @@ const splitRowsMs = measureMs(() => { }); }); -let notePlannedRows = 0; -const noteReviewPlanMs = measureMs(() => { - noteFiles.forEach((file) => { +let plannedRows = 0; +const reviewPlanMs = measureMs(() => { + windowedFiles.forEach((file) => { const rows = buildSplitRows(file, null, theme); - notePlannedRows += buildReviewRenderPlan({ + plannedRows += buildReviewRenderPlan({ fileId: file.id, rows, showHunkHeaders: true, - visibleAgentNotes: visibleAgentNotesForFile(file), + visibleAgentNotes: [], }).length; }); }); console.log(`METRIC section_geometry_ms=${sectionGeometryMs.toFixed(2)}`); console.log(`METRIC split_rows_ms=${splitRowsMs.toFixed(2)}`); -console.log(`METRIC note_review_plan_ms=${noteReviewPlanMs.toFixed(2)}`); +console.log(`METRIC review_plan_ms=${reviewPlanMs.toFixed(2)}`); console.log(`METRIC split_rows=${windowedRows}`); -console.log(`METRIC note_planned_rows=${notePlannedRows}`); +console.log(`METRIC planned_rows=${plannedRows}`); console.log(`METRIC files=${DEFAULT_FILE_COUNT}`); console.log(`METRIC lines_per_file=${DEFAULT_LINES_PER_FILE}`); -console.log(`METRIC notes_per_file=${DEFAULT_NOTES_PER_FILE}`); diff --git a/benchmarks/large-stream.ts b/benchmarks/large-stream.ts index 21c739c3..3777ad30 100644 --- a/benchmarks/large-stream.ts +++ b/benchmarks/large-stream.ts @@ -1,5 +1,4 @@ -// Benchmark split-mode startup and scroll behaviour on very large review streams, -// including note-enabled cases that disable the placeholder windowing path. +// Benchmark split-mode startup and scroll behaviour on very large review streams. import { performance } from "perf_hooks"; import React from "react"; import { testRender } from "@opentui/react/test-utils"; @@ -9,14 +8,13 @@ import { createLargeSplitStreamBootstrap, DEFAULT_FILE_COUNT, DEFAULT_LINES_PER_FILE, - DEFAULT_NOTES_PER_FILE, } from "./large-stream-fixture"; const VIEWPORT = { width: 240, height: 28, } as const; -const SCROLL_TICKS = 18; +const SCROLL_TICKS = 4; const SCROLL_TARGET = { x: 170, y: 12, @@ -67,10 +65,10 @@ async function destroyRenderer(setup: BenchmarkRenderer) { }); } -async function measureFirstFrameMs(notesPerFile: number) { +async function measureFirstFrameMs() { const setup = await testRender( React.createElement(AppHost, { - bootstrap: createLargeSplitStreamBootstrap({ notesPerFile }), + bootstrap: createLargeSplitStreamBootstrap(), }), VIEWPORT, ); @@ -85,10 +83,10 @@ async function measureFirstFrameMs(notesPerFile: number) { } } -async function measureScrollTicksMs(notesPerFile: number) { +async function measureScrollTicksMs() { const setup = await testRender( React.createElement(AppHost, { - bootstrap: createLargeSplitStreamBootstrap({ notesPerFile }), + bootstrap: createLargeSplitStreamBootstrap(), }), VIEWPORT, ); @@ -112,18 +110,13 @@ async function measureScrollTicksMs(notesPerFile: number) { } } -const coldFirstFrameMs = await measureFirstFrameMs(0); -const warmFirstFrameMs = await measureFirstFrameMs(0); -const noteFirstFrameMs = await measureFirstFrameMs(DEFAULT_NOTES_PER_FILE); -const windowedScrollMs = await measureScrollTicksMs(0); -const noteScrollMs = await measureScrollTicksMs(DEFAULT_NOTES_PER_FILE); +const coldFirstFrameMs = await measureFirstFrameMs(); +const warmFirstFrameMs = await measureFirstFrameMs(); +const windowedScrollMs = await measureScrollTicksMs(); console.log(`METRIC cold_first_frame_ms=${coldFirstFrameMs.toFixed(2)}`); console.log(`METRIC warm_first_frame_ms=${warmFirstFrameMs.toFixed(2)}`); -console.log(`METRIC note_first_frame_ms=${noteFirstFrameMs.toFixed(2)}`); console.log(`METRIC windowed_scroll_ticks_ms=${windowedScrollMs.toFixed(2)}`); -console.log(`METRIC note_scroll_ticks_ms=${noteScrollMs.toFixed(2)}`); console.log(`METRIC scroll_ticks=${SCROLL_TICKS}`); console.log(`METRIC files=${DEFAULT_FILE_COUNT}`); console.log(`METRIC lines_per_file=${DEFAULT_LINES_PER_FILE}`); -console.log(`METRIC notes_per_file=${DEFAULT_NOTES_PER_FILE}`); diff --git a/benchmarks/lib/benchmark-result.ts b/benchmarks/lib/benchmark-result.ts new file mode 100644 index 00000000..dc0e8a26 --- /dev/null +++ b/benchmarks/lib/benchmark-result.ts @@ -0,0 +1,122 @@ +export interface BenchmarkThreshold { + maxRegressionRatio: number; + minAbsoluteRegression: number; +} + +export interface BenchmarkMetricResult { + name: string; + unit: "ms" | "bytes" | "count" | "ratio" | "boolean"; + samples: number[]; + median: number; + p75: number; + p95: number; + min: number; + max: number; + threshold?: BenchmarkThreshold; + comparable: boolean; + source: string; +} + +export interface BenchmarkRunResult { + version: 1; + generatedAt: string; + gitSha?: string; + samplesPerBenchmark: number; + results: BenchmarkMetricResult[]; +} + +export interface BenchmarkComparisonRow { + name: string; + unit: BenchmarkMetricResult["unit"]; + baseMedian: number; + headMedian: number; + absoluteDelta: number; + relativeDelta: number; + threshold?: BenchmarkThreshold; + status: "pass" | "fail" | "missing-base" | "missing-head" | "informational"; + source: string; +} + +export interface BenchmarkComparisonResult { + version: 1; + generatedAt: string; + baseSha?: string; + headSha?: string; + failed: boolean; + rows: BenchmarkComparisonRow[]; +} + +/** Return percentile values using nearest-rank indexing over sorted samples. */ +export function percentile(samples: number[], percentileValue: number) { + if (samples.length === 0) { + return 0; + } + + const sorted = [...samples].sort((left, right) => left - right); + const index = Math.min( + sorted.length - 1, + Math.max(0, Math.ceil((percentileValue / 100) * sorted.length) - 1), + ); + return sorted[index]!; +} + +/** Infer display and comparison metadata from the metric name emitted by a script. */ +export function classifyMetric( + name: string, +): Pick { + if (name.startsWith("competitor_")) { + return { unit: "ms", comparable: false }; + } + + if (name.endsWith("_ms")) { + return { + unit: "ms", + comparable: true, + threshold: { maxRegressionRatio: 1.15, minAbsoluteRegression: 5 }, + }; + } + + if ( + name.startsWith("is_") || + name.endsWith("_ready_before_move") || + name.endsWith("_available") + ) { + return { unit: "boolean", comparable: false }; + } + + if (name.includes("rss") || name.includes("heap")) { + return { + unit: "bytes", + comparable: true, + threshold: { maxRegressionRatio: 1.2, minAbsoluteRegression: 8 * 1024 * 1024 }, + }; + } + + if (name.endsWith("_bytes")) { + return { unit: "bytes", comparable: false }; + } + + return { unit: "count", comparable: false }; +} + +/** Build an aggregated result from raw numeric samples. */ +export function aggregateMetric( + source: string, + name: string, + samples: number[], +): BenchmarkMetricResult { + const classification = classifyMetric(name); + const sorted = [...samples].sort((left, right) => left - right); + + return { + name: `${source}/${name}`, + source, + samples, + median: percentile(sorted, 50), + p75: percentile(sorted, 75), + p95: percentile(sorted, 95), + min: sorted[0] ?? 0, + max: sorted.at(-1) ?? 0, + ...classification, + }; +} diff --git a/benchmarks/lib/fixtures.ts b/benchmarks/lib/fixtures.ts new file mode 100644 index 00000000..f7258e4e --- /dev/null +++ b/benchmarks/lib/fixtures.ts @@ -0,0 +1,136 @@ +import { mkdtempSync, rmSync, writeFileSync, mkdirSync } from "node:fs"; +import { tmpdir } from "node:os"; +import { dirname, join } from "node:path"; +import { createTwoFilesPatch } from "diff"; + +export interface SyntheticFileOptions { + lines: number; + changedStart?: number; + changedLines?: number; + extension?: string; +} + +export interface SyntheticPatchOptions extends SyntheticFileOptions { + fileCount: number; + prefix?: string; +} + +export interface TemporaryDirectory { + path: string; + cleanup: () => void; +} + +/** Create a temporary directory with a cleanup helper for benchmark fixtures. */ +export function createTemporaryDirectory(prefix: string): TemporaryDirectory { + const path = mkdtempSync(join(tmpdir(), prefix)); + return { + path, + cleanup: () => rmSync(path, { recursive: true, force: true }), + }; +} + +/** Run git in a benchmark fixture and throw with stderr on failure. */ +export function git(cwd: string, ...cmd: string[]) { + const proc = Bun.spawnSync(["git", ...cmd], { + cwd, + stdout: "pipe", + stderr: "pipe", + stdin: "ignore", + }); + + if (proc.exitCode !== 0) { + const stderr = Buffer.from(proc.stderr).toString("utf8"); + throw new Error(stderr.trim() || `git ${cmd.join(" ")} failed`); + } + + return Buffer.from(proc.stdout).toString("utf8"); +} + +/** Generate deterministic TypeScript-like contents with a controlled changed region. */ +export function createSyntheticSource( + fileIndex: number, + changed: boolean, + options: SyntheticFileOptions, +) { + const changedStart = options.changedStart ?? Math.floor(options.lines / 3); + const changedEnd = + changedStart + (options.changedLines ?? Math.max(4, Math.floor(options.lines / 6))); + + return Array.from({ length: options.lines }, (_, lineIndex) => { + const line = lineIndex + 1; + if (changed && lineIndex >= changedStart && lineIndex < changedEnd) { + return `export function bench${fileIndex}_${line}(value: number) { return value * ${line} + ${fileIndex}; }\n`; + } + + return `export function bench${fileIndex}_${line}(value: number) { return value + ${line}; }\n`; + }).join(""); +} + +/** Build one deterministic multi-file unified patch. */ +export function createSyntheticPatch({ + fileCount, + lines, + changedStart, + changedLines, + extension = "ts", + prefix = "src/bench", +}: SyntheticPatchOptions) { + return Array.from({ length: fileCount }, (_, index) => { + const fileIndex = index + 1; + const path = `${prefix}${fileIndex}.${extension}`; + const before = createSyntheticSource(fileIndex, false, { lines, changedStart, changedLines }); + const after = createSyntheticSource(fileIndex, true, { lines, changedStart, changedLines }); + + const patch = createTwoFilesPatch(path, path, before, after, "", "", { context: 3 }); + // Pierre's patch parser expects unified/git hunks; remove diff-package index banners. + return patch.replace(/^Index: .*\n=+\n/, "").trimEnd(); + }).join("\n"); +} + +/** Create a git repo with committed files and modified tracked contents. */ +export function createChangedRepo({ + fileCount, + lines, + changedStart, + changedLines, + extension = "ts", +}: SyntheticPatchOptions) { + const fixture = createTemporaryDirectory("hunk-benchmark-repo-"); + + git(fixture.path, "init"); + git(fixture.path, "config", "user.name", "Benchmark User"); + git(fixture.path, "config", "user.email", "benchmark@example.com"); + + for (let index = 1; index <= fileCount; index += 1) { + const relativePath = join("src", `bench${index}.${extension}`); + const absolutePath = join(fixture.path, relativePath); + mkdirSync(dirname(absolutePath), { recursive: true }); + writeFileSync( + absolutePath, + createSyntheticSource(index, false, { lines, changedStart, changedLines }), + ); + } + + git(fixture.path, "add", "."); + git(fixture.path, "commit", "-m", "initial benchmark fixture"); + + for (let index = 1; index <= fileCount; index += 1) { + const relativePath = join("src", `bench${index}.${extension}`); + writeFileSync( + join(fixture.path, relativePath), + createSyntheticSource(index, true, { lines, changedStart, changedLines }), + ); + } + + return fixture; +} + +/** Add deterministic untracked files to an existing benchmark repository. */ +export function addUntrackedFiles(repoDir: string, fileCount: number, lines: number) { + for (let index = 1; index <= fileCount; index += 1) { + const relativePath = join("untracked", `new${index}.ts`); + const absolutePath = join(repoDir, relativePath); + mkdirSync(dirname(absolutePath), { recursive: true }); + writeFileSync(absolutePath, createSyntheticSource(index, true, { lines })); + } +} diff --git a/benchmarks/memory.ts b/benchmarks/memory.ts new file mode 100644 index 00000000..7bbb58d0 --- /dev/null +++ b/benchmarks/memory.ts @@ -0,0 +1,72 @@ +// Track heap/RSS pressure for loading, planning, rendering, and navigating a large diff. +import { performance } from "perf_hooks"; +import React from "react"; +import { testRender } from "@opentui/react/test-utils"; +import { act } from "react"; +import { buildSplitRows } from "../src/ui/diff/pierre"; +import { buildReviewRenderPlan } from "../src/ui/diff/reviewRenderPlan"; +import { resolveTheme } from "../src/ui/themes"; +import { AppHost } from "../src/ui/AppHost"; +import { createLargeSplitStreamBootstrap } from "./large-stream-fixture"; + +const viewport = { width: 240, height: 28 } as const; + +function printMemory(prefix: string) { + const usage = process.memoryUsage(); + console.log(`METRIC ${prefix}_rss_bytes=${usage.rss}`); + console.log(`METRIC ${prefix}_heap_used_bytes=${usage.heapUsed}`); +} + +async function renderOnce(setup: Awaited>) { + await act(async () => { + await setup.renderOnce(); + await Bun.sleep(0); + }); +} + +const bootstrapStart = performance.now(); +const bootstrap = createLargeSplitStreamBootstrap({ + fileCount: 120, + linesPerFile: 120, +}); +console.log(`METRIC bootstrap_fixture_ms=${(performance.now() - bootstrapStart).toFixed(2)}`); +printMemory("after_bootstrap"); + +const theme = resolveTheme("midnight", null); +let plannedRows = 0; +const planningStart = performance.now(); +for (const file of bootstrap.changeset.files) { + const rows = buildSplitRows(file, null, theme); + plannedRows += buildReviewRenderPlan({ + fileId: file.id, + rows, + showHunkHeaders: true, + visibleAgentNotes: [], + }).length; +} +console.log(`METRIC planning_ms=${(performance.now() - planningStart).toFixed(2)}`); +console.log(`METRIC planned_rows=${plannedRows}`); +printMemory("after_planning"); + +const setup = await testRender(React.createElement(AppHost, { bootstrap }), viewport); +try { + const firstFrameStart = performance.now(); + await renderOnce(setup); + console.log(`METRIC first_frame_ms=${(performance.now() - firstFrameStart).toFixed(2)}`); + printMemory("after_first_frame"); + + const navigationStart = performance.now(); + for (let index = 0; index < 6; index += 1) { + await act(async () => { + await setup.mockInput.typeText("]"); + await setup.renderOnce(); + await Bun.sleep(0); + }); + } + console.log(`METRIC next_hunk_navigation_ms=${(performance.now() - navigationStart).toFixed(2)}`); + printMemory("after_navigation"); +} finally { + await act(async () => { + setup.renderer.destroy(); + }); +} diff --git a/benchmarks/render-layout.ts b/benchmarks/render-layout.ts new file mode 100644 index 00000000..0b805b06 --- /dev/null +++ b/benchmarks/render-layout.ts @@ -0,0 +1,78 @@ +// Benchmark pure diff row/layout planning across split, stack, and size-shape cases. +import { performance } from "perf_hooks"; +import { buildSplitRows, buildStackRows } from "../src/ui/diff/pierre"; +import { buildReviewRenderPlan } from "../src/ui/diff/reviewRenderPlan"; +import { measureDiffSectionGeometry } from "../src/ui/diff/diffSectionGeometry"; +import { resolveTheme } from "../src/ui/themes"; +import { createLargeSplitStreamFiles } from "./large-stream-fixture"; + +const theme = resolveTheme("midnight", null); + +function measureMs(run: () => void) { + const start = performance.now(); + run(); + return performance.now() - start; +} + +function measureScenario(name: string, files: ReturnType) { + let splitRows = 0; + let stackRows = 0; + let plannedRows = 0; + + const splitRowsMs = measureMs(() => { + for (const file of files) { + splitRows += buildSplitRows(file, null, theme).length; + } + }); + + const stackRowsMs = measureMs(() => { + for (const file of files) { + stackRows += buildStackRows(file, null, theme).length; + } + }); + + const geometryMs = measureMs(() => { + for (const file of files) { + measureDiffSectionGeometry(file, "split", true, theme); + } + }); + + const reviewPlanMs = measureMs(() => { + for (const file of files) { + const rows = buildSplitRows(file, null, theme); + plannedRows += buildReviewRenderPlan({ + fileId: file.id, + rows, + showHunkHeaders: true, + visibleAgentNotes: [], + }).length; + } + }); + + console.log(`METRIC ${name}_split_rows_ms=${splitRowsMs.toFixed(2)}`); + console.log(`METRIC ${name}_stack_rows_ms=${stackRowsMs.toFixed(2)}`); + console.log(`METRIC ${name}_geometry_ms=${geometryMs.toFixed(2)}`); + console.log(`METRIC ${name}_review_plan_ms=${reviewPlanMs.toFixed(2)}`); + console.log(`METRIC ${name}_files=${files.length}`); + console.log(`METRIC ${name}_split_rows=${splitRows}`); + console.log(`METRIC ${name}_stack_rows=${stackRows}`); + console.log(`METRIC ${name}_planned_rows=${plannedRows}`); +} + +measureScenario( + "many_small_files", + createLargeSplitStreamFiles({ fileCount: 360, linesPerFile: 48 }), +); +measureScenario( + "balanced_stream", + createLargeSplitStreamFiles({ fileCount: 180, linesPerFile: 120 }), +); +measureScenario( + "large_single_file", + createLargeSplitStreamFiles({ + fileCount: 1, + linesPerFile: 18_000, + changedStartLine: 1_000, + changedEndLine: 17_000, + }), +); diff --git a/benchmarks/run.ts b/benchmarks/run.ts new file mode 100644 index 00000000..d8560fd9 --- /dev/null +++ b/benchmarks/run.ts @@ -0,0 +1,186 @@ +#!/usr/bin/env bun +import { mkdirSync, writeFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { aggregateMetric, type BenchmarkRunResult } from "./lib/benchmark-result"; + +const defaultScripts = [ + "bootstrap-load.ts", + "working-tree-load.ts", + "changeset-parse.ts", + "render-layout.ts", + "highlight-prefetch.ts", + "large-stream.ts", +]; + +interface RunOptions { + samples: number; + out?: string; + includeCompetitors: boolean; + scripts: string[]; +} + +function readArgValue(args: string[], index: number) { + const value = args[index + 1]; + if (!value) { + throw new Error(`Missing value for ${args[index]}`); + } + return value; +} + +function parseArgs(args: string[]): RunOptions { + const options: RunOptions = { + samples: Number(process.env.HUNK_BENCHMARK_SAMPLES ?? 3), + includeCompetitors: false, + scripts: [], + }; + + for (let index = 0; index < args.length; index += 1) { + const arg = args[index]!; + + if (arg === "--samples") { + options.samples = Number(readArgValue(args, index)); + index += 1; + continue; + } + + if (arg === "--out") { + options.out = readArgValue(args, index); + index += 1; + continue; + } + + if (arg === "--include-competitors") { + options.includeCompetitors = true; + continue; + } + + if (arg === "--script") { + options.scripts.push(readArgValue(args, index)); + index += 1; + continue; + } + + throw new Error(`Unknown benchmark runner argument: ${arg}`); + } + + if (!Number.isFinite(options.samples) || options.samples < 1) { + throw new Error("--samples must be a positive number"); + } + + return options; +} + +function gitSha() { + const proc = Bun.spawnSync(["git", "rev-parse", "HEAD"], { + stdout: "pipe", + stderr: "ignore", + stdin: "ignore", + }); + + if (proc.exitCode !== 0) { + return undefined; + } + + return Buffer.from(proc.stdout).toString("utf8").trim(); +} + +function parseMetrics(output: string) { + const metrics = new Map(); + const metricPattern = /^METRIC\s+([A-Za-z0-9_.:-]+)=(-?\d+(?:\.\d+)?)$/; + + for (const line of output.split(/\r?\n/)) { + const match = metricPattern.exec(line.trim()); + if (!match) { + continue; + } + + metrics.set(match[1]!, Number(match[2]!)); + } + + return metrics; +} + +async function runScript(script: string) { + const proc = Bun.spawn(["bun", "run", `benchmarks/${script}`], { + stdout: "pipe", + stderr: "pipe", + stdin: "ignore", + env: { ...process.env, CI: process.env.CI ?? "1" }, + }); + + const [stdout, stderr, exitCode] = await Promise.all([ + new Response(proc.stdout).text(), + new Response(proc.stderr).text(), + proc.exited, + ]); + + if (stderr.trim()) { + console.warn(stderr.trim()); + } + + if (exitCode !== 0) { + throw new Error(`${script} failed with exit code ${exitCode}\n${stderr}`); + } + + process.stdout.write(stdout); + return parseMetrics(stdout); +} + +function formatValue(value: number) { + if (Math.abs(value) >= 100) { + return value.toFixed(1); + } + return value.toFixed(2); +} + +const options = parseArgs(Bun.argv.slice(2)); +const scripts = options.scripts.length > 0 ? options.scripts : [...defaultScripts]; +if (options.includeCompetitors) { + scripts.push("competitors.ts"); +} + +const samplesByMetric = new Map(); + +for (const script of scripts) { + const source = script.replace(/\.ts$/, ""); + console.log(`\n## ${source}`); + + for (let sample = 1; sample <= options.samples; sample += 1) { + console.log(`\n# sample ${sample}/${options.samples}`); + const metrics = await runScript(script); + + for (const [metric, value] of metrics) { + const key = `${source}/${metric}`; + const entry = samplesByMetric.get(key) ?? { source, metric, samples: [] }; + entry.samples.push(value); + samplesByMetric.set(key, entry); + } + } +} + +const results = [...samplesByMetric.values()] + .map(({ source, metric, samples }) => aggregateMetric(source, metric, samples)) + .sort((left, right) => left.name.localeCompare(right.name)); + +const runResult: BenchmarkRunResult = { + version: 1, + generatedAt: new Date().toISOString(), + gitSha: gitSha(), + samplesPerBenchmark: options.samples, + results, +}; + +console.log("\n## Aggregated benchmark medians"); +for (const result of results) { + const suffix = result.unit === "ms" ? "ms" : result.unit === "bytes" ? " bytes" : ""; + console.log( + `${result.name}: median=${formatValue(result.median)}${suffix} p95=${formatValue(result.p95)}${suffix}`, + ); +} + +if (options.out) { + const outPath = resolve(options.out); + mkdirSync(dirname(outPath), { recursive: true }); + writeFileSync(outPath, `${JSON.stringify(runResult, null, 2)}\n`); + console.log(`\nWrote ${outPath}`); +} diff --git a/benchmarks/working-tree-load.ts b/benchmarks/working-tree-load.ts new file mode 100644 index 00000000..8476113e --- /dev/null +++ b/benchmarks/working-tree-load.ts @@ -0,0 +1,68 @@ +// Benchmark git-backed working-tree loading, including untracked file handling. +import { performance } from "perf_hooks"; +import { loadAppBootstrap } from "../src/core/loaders"; +import { addUntrackedFiles, createChangedRepo } from "./lib/fixtures"; + +interface Scenario { + name: string; + fileCount: number; + lines: number; + untrackedFiles?: number; + untrackedLines?: number; +} + +const scenarios: Scenario[] = [ + { name: "small_worktree", fileCount: 16, lines: 80 }, + { name: "medium_worktree", fileCount: 96, lines: 180 }, + { name: "large_worktree", fileCount: 240, lines: 220 }, + { + name: "untracked_many_small", + fileCount: 16, + lines: 80, + untrackedFiles: 120, + untrackedLines: 36, + }, + { + name: "untracked_few_large", + fileCount: 8, + lines: 80, + untrackedFiles: 6, + untrackedLines: 5_000, + }, +]; + +async function measureScenario(scenario: Scenario) { + const fixture = createChangedRepo({ fileCount: scenario.fileCount, lines: scenario.lines }); + + try { + if (scenario.untrackedFiles) { + addUntrackedFiles(fixture.path, scenario.untrackedFiles, scenario.untrackedLines ?? 40); + } + + const start = performance.now(); + const bootstrap = await loadAppBootstrap( + { kind: "vcs", staged: false, options: { mode: "auto" } }, + { cwd: fixture.path }, + ); + const loadMs = performance.now() - start; + const additions = bootstrap.changeset.files.reduce( + (sum, file) => sum + file.stats.additions, + 0, + ); + const deletions = bootstrap.changeset.files.reduce( + (sum, file) => sum + file.stats.deletions, + 0, + ); + + console.log(`METRIC ${scenario.name}_load_ms=${loadMs.toFixed(2)}`); + console.log(`METRIC ${scenario.name}_files=${bootstrap.changeset.files.length}`); + console.log(`METRIC ${scenario.name}_additions=${additions}`); + console.log(`METRIC ${scenario.name}_deletions=${deletions}`); + } finally { + fixture.cleanup(); + } +} + +for (const scenario of scenarios) { + await measureScenario(scenario); +} diff --git a/package.json b/package.json index c4366970..9ea41a6f 100644 --- a/package.json +++ b/package.json @@ -68,10 +68,18 @@ "publish:prebuilt:npm": "bun run ./scripts/publish-prebuilt-npm.ts", "update:homebrew-formula": "bun run ./scripts/update-homebrew-formula.ts", "prepack": "bun run build:npm", + "bench": "bun run benchmarks/run.ts", + "bench:compare": "bun run benchmarks/compare.ts", + "bench:comment-pr": "bun run benchmarks/comment-pr.ts", "bench:bootstrap-load": "bun run benchmarks/bootstrap-load.ts", + "bench:working-tree-load": "bun run benchmarks/working-tree-load.ts", + "bench:changeset-parse": "bun run benchmarks/changeset-parse.ts", + "bench:render-layout": "bun run benchmarks/render-layout.ts", "bench:highlight-prefetch": "bun run benchmarks/highlight-prefetch.ts", "bench:large-stream": "bun run benchmarks/large-stream.ts", "bench:large-stream-profile": "bun run benchmarks/large-stream-profile.ts", + "bench:memory": "bun run benchmarks/memory.ts", + "bench:competitors": "bun run benchmarks/competitors.ts", "nix:update-lock": "nix run .#update-bun-lock" }, "dependencies": {