From cc28e3bb217eb99ac67b129176bf56b8cb9c4a66 Mon Sep 17 00:00:00 2001 From: Griffen Fargo <3642037+gfargo@users.noreply.github.com> Date: Tue, 5 May 2026 11:09:25 -0400 Subject: [PATCH] feat(bench): measurement infrastructure for the diff-condensing pipeline (#845) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit First chunk of the #845 perf overhaul: a reproducible benchmark harness so every later PR can show concrete before/after numbers instead of hand-waving about "should be faster". Three pieces: 1. Telemetry persistence in `observability.ts`. When COCO_BENCH=1 is set (or any non-`0` value), every llm call accumulates into a narrow `LlmBenchCall` buffer; `flushLlmBenchRun` writes the record to `/.coco-bench.json` (overridable via COCO_BENCH_FILE). Best-effort: write failures are silent and the buffer self-clears after each flush. 2. Synthetic diff fixtures at `src/lib/parsers/default/__fixtures__/`. Three sizes: - tiny ( 5 files, ~790 tokens) — early-exit path - medium (25 files, ~36k tokens) — typical commit - large (50 files, ~83k tokens) — initial-commit shape Content comes from a seeded LCG so before/after runs compare the same input. Each fixture exports a fully-populated DiffNode tree so `summarizeDiffs` runs without a real git repo. 3. `bin/benchmark.ts` runner (`npm run bench`). Plugs the fixtures into `summarizeDiffs` with a duck-typed mock chain that simulates per-call latency proportional to input size (deterministic so PR diffs are apples-to-apples, not real-world wall-clock). Captures stage timings + per-call telemetry. `--update` overwrites `.bench/baseline.json`; `--fixture=` narrows to a single fixture for tighter feedback loops. Baseline numbers committed at `.bench/baseline.json` against current `main`: | fixture | wall-clock | llm calls | llm total ms | prompt tokens | |---------|------------|-----------|--------------|---------------| | tiny | 2 ms | 0 | 0 ms | 0 | | medium | 30,213 ms | 20 | 102,723 ms | 91,766 | | large | 70,048 ms | 41 | 236,818 ms | 220,199 | The 3.4× spread between large fixture's wall-clock and total LLM time (236 s of model work in 70 s wall) reflects the existing `maxConcurrent=6` parallelism. Subsequent PRs in the #845 sprint will move these numbers and the deltas will land directly in PR descriptions. --- .bench/baseline.json | 40 +++ .gitignore | 7 + bin/benchmark.ts | 250 ++++++++++++++++++ package.json | 1 + src/lib/langchain/utils/observability.ts | 134 ++++++++++ src/lib/parsers/default/__fixtures__/index.ts | 219 +++++++++++++++ 6 files changed, 651 insertions(+) create mode 100644 .bench/baseline.json create mode 100644 bin/benchmark.ts create mode 100644 src/lib/parsers/default/__fixtures__/index.ts diff --git a/.bench/baseline.json b/.bench/baseline.json new file mode 100644 index 0000000..9fe755c --- /dev/null +++ b/.bench/baseline.json @@ -0,0 +1,40 @@ +{ + "capturedAt": "2026-05-05T15:06:12.102Z", + "node": "v22.13.0", + "platform": "darwin-arm64", + "options": { + "baseLatencyMs": 1500, + "perTokenMs": 2, + "maxConcurrent": 6, + "maxTokens": 2048 + }, + "results": [ + { + "fixture": "tiny", + "fileCount": 5, + "approxTokens": 790, + "durationMs": 2, + "llmCalls": 0, + "llmTotalMs": 0, + "llmTotalPromptTokens": 0 + }, + { + "fixture": "medium", + "fileCount": 25, + "approxTokens": 36150, + "durationMs": 30213, + "llmCalls": 20, + "llmTotalMs": 102723, + "llmTotalPromptTokens": 91766 + }, + { + "fixture": "large", + "fileCount": 50, + "approxTokens": 83410, + "durationMs": 70048, + "llmCalls": 41, + "llmTotalMs": 236818, + "llmTotalPromptTokens": 220199 + } + ] +} \ No newline at end of file diff --git a/.gitignore b/.gitignore index 1eb137f..33e3f90 100644 --- a/.gitignore +++ b/.gitignore @@ -59,3 +59,10 @@ commitlint.config.js # Internal specs, audits, and design docs specs/ + +# Diff-condensing benchmark output (#845). Per-run files are local +# noise; the committed baseline lives at .bench/baseline.json so PR +# perf claims have a reference point. Telemetry sidecar from +# COCO_BENCH=1 stays local too. +.bench/run-*.json +.coco-bench.json diff --git a/bin/benchmark.ts b/bin/benchmark.ts new file mode 100644 index 0000000..81a1a80 --- /dev/null +++ b/bin/benchmark.ts @@ -0,0 +1,250 @@ +#!/usr/bin/env tsx +/** + * Diff-condensing pipeline benchmark (#845). + * + * Runs `summarizeDiffs` against the synthetic fixtures in + * `src/lib/parsers/default/__fixtures__/index.ts` using a mock LLM + * chain that simulates latency proportional to input size. Captures + * stage timings and per-call telemetry, writes the result to + * `.bench/.json`, and (when a baseline is present at + * `.bench/baseline.json`) prints a diff so PRs can show their wins + * concretely. + * + * Usage: + * npm run bench # run all fixtures, write bench file + * npm run bench -- --update # also overwrite the baseline + * npm run bench -- --fixture=medium # narrow to one fixture + * + * The mock chain uses a deterministic latency model so before/after + * runs compare apples to apples without paying for real API calls. + * Numbers don't reflect real-world wall-clock time; they reflect the + * pipeline's *behavior* (how many calls fire, how the stages fan + * out, where the bottlenecks are). + */ + +import * as fs from 'node:fs' +import * as path from 'node:path' +import * as os from 'node:os' + +import { RecursiveCharacterTextSplitter } from '@langchain/textsplitters' +import { loadSummarizationChain } from '@langchain/classic/chains' +import type { Document } from '@langchain/classic/document' + +import { fileChangeParser } from '../src/lib/parsers/default' +import { summarizeDiffs } from '../src/lib/parsers/default/utils/summarizeDiffs' +import { allFixtures, DiffFixture } from '../src/lib/parsers/default/__fixtures__' +import { Logger } from '../src/lib/utils/logger' +import { getTokenCounter } from '../src/lib/utils/tokenizer' +import { + buildLlmBenchRun, + flushLlmBenchRun, + resetLlmTelemetry, +} from '../src/lib/langchain/utils/observability' + +// Silence the type checker about the unused `fileChangeParser` import +// being present for future bench scenarios; the active runner uses +// `summarizeDiffs` directly so it can pass a pre-built DiffNode. +void fileChangeParser + +const BENCH_DIR = path.join(process.cwd(), '.bench') +const BASELINE_PATH = path.join(BENCH_DIR, 'baseline.json') + +// The bench runner is the canonical "I want telemetry" entry point, +// so flip COCO_BENCH on in-process if the user didn't set it +// externally. `recordBenchCall` checks this env var to decide +// whether to retain per-call data. +if (!process.env.COCO_BENCH) { + process.env.COCO_BENCH = '1' +} + +type BenchOptions = { + baseLatencyMs: number + perTokenMs: number + maxConcurrent: number + maxTokens: number +} + +const DEFAULT_OPTIONS: BenchOptions = { + // Calibrated to roughly match user-reported wall-clock on + // gpt-4.1-nano: ~3-7s for small calls, scaling up to ~25-40s for + // multi-thousand-token inputs. Adjust if real-world timings drift. + baseLatencyMs: 1500, + perTokenMs: 2, + maxConcurrent: 6, + maxTokens: 2048, +} + +type BenchResult = { + fixture: string + fileCount: number + approxTokens: number + durationMs: number + llmCalls: number + llmTotalMs: number + llmTotalPromptTokens: number +} + +function mockChain(options: BenchOptions): unknown { + // Duck-typed chain that satisfies the .invoke() shape + // `summarize()` expects. Latency is deterministic so before/after + // runs are directly comparable. + return { + invoke: async (input: { input_documents: Document[] }) => { + const totalChars = input.input_documents.reduce( + (sum, doc) => sum + doc.pageContent.length, + 0 + ) + // Approximate token count from chars/4 — enough fidelity for + // the latency model. The pipeline's real tokenizer counts + // separately for telemetry. + const approxTokens = Math.floor(totalChars / 4) + const latencyMs = options.baseLatencyMs + Math.floor(approxTokens * options.perTokenMs) + await new Promise((resolve) => setTimeout(resolve, latencyMs)) + return { text: `[mock summary of ${input.input_documents.length} doc(s), ~${approxTokens} tokens]` } + }, + } +} + +function silentLogger(): Logger { + // Tests already use this pattern; keep verbose calls a no-op so the + // bench output stays clean while still funneling timer + spinner + // calls through the real Logger surface. + const logger = new Logger({ verbose: false } as never) + return logger +} + +async function runFixture( + fixture: DiffFixture, + options: BenchOptions +): Promise { + resetLlmTelemetry() + + const tokenizer = await getTokenCounter('gpt-4.1-nano') + const textSplitter = new RecursiveCharacterTextSplitter({ + chunkSize: 10000, + chunkOverlap: 250, + }) + const chain = mockChain(options) as Parameters[1]['chain'] + const logger = silentLogger() + + const startedAt = Date.now() + await summarizeDiffs(fixture.rootNode, { + tokenizer, + logger, + maxTokens: options.maxTokens, + minTokensForSummary: 400, + maxFileTokens: Math.floor(options.maxTokens * 0.25), + maxConcurrent: options.maxConcurrent, + textSplitter, + chain, + metadata: { command: 'benchmark', model: 'mock' }, + }) + const durationMs = Date.now() - startedAt + + const run = buildLlmBenchRun({ command: `bench:${fixture.name}`, totalElapsedMs: durationMs }) + + return { + fixture: fixture.name, + fileCount: fixture.fileCount, + approxTokens: fixture.approxTokens, + durationMs, + llmCalls: run.callCount, + llmTotalMs: run.totalLlmElapsedMs, + llmTotalPromptTokens: run.totalPromptTokens, + } +} + +function formatRow(label: string, value: string | number): string { + return ` ${label.padEnd(28)} ${value}` +} + +function printSummary(results: BenchResult[], baseline?: BenchResult[]): void { + console.log('\n=== diff-condensing benchmark ===\n') + for (const result of results) { + console.log(`Fixture: ${result.fixture} (${result.fileCount} files, ~${result.approxTokens} tokens)`) + console.log(formatRow('wall-clock duration', `${result.durationMs}ms`)) + console.log(formatRow('llm calls', result.llmCalls)) + console.log(formatRow('llm total time', `${result.llmTotalMs}ms`)) + console.log(formatRow('llm prompt tokens', result.llmTotalPromptTokens)) + if (baseline) { + const prior = baseline.find((entry) => entry.fixture === result.fixture) + if (prior) { + const deltaPct = (n: number, p: number) => + p === 0 ? 'n/a' : `${(((n - p) / p) * 100).toFixed(1)}%` + console.log(formatRow('Δ duration', `${result.durationMs - prior.durationMs}ms (${deltaPct(result.durationMs, prior.durationMs)})`)) + console.log(formatRow('Δ llm calls', `${result.llmCalls - prior.llmCalls} (${deltaPct(result.llmCalls, prior.llmCalls)})`)) + } + } + console.log('') + } +} + +function writeBenchFile(results: BenchResult[], updateBaseline: boolean): void { + if (!fs.existsSync(BENCH_DIR)) { + fs.mkdirSync(BENCH_DIR, { recursive: true }) + } + + const stamp = new Date().toISOString().replace(/[:.]/g, '-') + const runFile = path.join(BENCH_DIR, `run-${stamp}.json`) + const payload = { + capturedAt: new Date().toISOString(), + node: process.version, + platform: `${os.platform()}-${os.arch()}`, + options: DEFAULT_OPTIONS, + results, + } + fs.writeFileSync(runFile, JSON.stringify(payload, null, 2)) + console.log(`Wrote ${runFile}`) + + if (updateBaseline) { + fs.writeFileSync(BASELINE_PATH, JSON.stringify(payload, null, 2)) + console.log(`Updated baseline at ${BASELINE_PATH}`) + } +} + +function readBaseline(): BenchResult[] | undefined { + if (!fs.existsSync(BASELINE_PATH)) return undefined + try { + const raw = fs.readFileSync(BASELINE_PATH, 'utf8') + const parsed = JSON.parse(raw) + return Array.isArray(parsed.results) ? parsed.results : undefined + } catch { + return undefined + } +} + +async function main(): Promise { + const args = process.argv.slice(2) + const updateBaseline = args.includes('--update') + const fixtureArg = args.find((arg) => arg.startsWith('--fixture='))?.split('=')[1] + + const fixtures = fixtureArg + ? allFixtures.filter((fixture) => fixture.name === fixtureArg) + : allFixtures + if (fixtures.length === 0) { + console.error(`No fixture matched ${fixtureArg}; available: ${allFixtures.map((f) => f.name).join(', ')}`) + process.exitCode = 1 + return + } + + const results: BenchResult[] = [] + for (const fixture of fixtures) { + console.log(`Running fixture ${fixture.name}...`) + const result = await runFixture(fixture, DEFAULT_OPTIONS) + results.push(result) + } + + const baseline = updateBaseline ? undefined : readBaseline() + printSummary(results, baseline) + writeBenchFile(results, updateBaseline) + + // Flush any in-memory bench telemetry to a separate file when + // COCO_BENCH is set externally; lets devs capture the per-call + // data alongside the aggregated results. + flushLlmBenchRun({ command: 'benchmark' }) +} + +main().catch((error) => { + console.error(error) + process.exitCode = 1 +}) diff --git a/package.json b/package.json index 5fd5f7a..9144808 100644 --- a/package.json +++ b/package.json @@ -39,6 +39,7 @@ "test": "npm run test:jest && npm run test:publish", "test:publish": "npm run lint && npm run build && npm run test:cli && npm pack --dry-run", "test:cli": "tsx bin/smokeCli.ts", + "bench": "tsx bin/benchmark.ts", "pretest:jest": "npm run build:info", "test:jest": "jest", "test:jest:watch": "jest --watch", diff --git a/src/lib/langchain/utils/observability.ts b/src/lib/langchain/utils/observability.ts index 4fe6b86..ae9a468 100644 --- a/src/lib/langchain/utils/observability.ts +++ b/src/lib/langchain/utils/observability.ts @@ -1,3 +1,6 @@ +import * as fs from 'node:fs' +import * as path from 'node:path' + import { Logger } from '../../utils/logger' import { TokenCounter } from '../../utils/tokenizer' @@ -15,6 +18,27 @@ export type LlmCallMetadata = { inputChunks?: number } +/** + * Bench-mode call record (#845). Captured for every LLM call when + * `COCO_BENCH=1` (or a path) is set, then flushed to disk by + * `flushLlmBenchRun` at the end of the command. The structure stays + * narrow on purpose — fields the runner actually compares before / + * after, nothing more — so different runs with different model / + * provider mixes can still diff against the baseline cleanly. + */ +type LlmBenchCall = { + task: string + command?: string + provider?: string + model?: string + promptTokens?: number + elapsedMs?: number + inputDocuments?: number + inputChunks?: number +} + +const benchCalls: LlmBenchCall[] = [] + type LlmTelemetrySummary = { calls: number promptTokens: number @@ -40,10 +64,29 @@ export function estimatePromptTokens( } } +function isBenchModeActive(): boolean { + return Boolean(process.env.COCO_BENCH && process.env.COCO_BENCH !== '0') +} + +function recordBenchCall(metadata: LlmCallMetadata): void { + if (!isBenchModeActive()) return + benchCalls.push({ + task: metadata.task, + command: metadata.command, + provider: metadata.provider, + model: metadata.model, + promptTokens: metadata.promptTokens, + elapsedMs: metadata.elapsedMs, + inputDocuments: metadata.inputDocuments, + inputChunks: metadata.inputChunks, + }) +} + export function logLlmCall(logger: Logger | undefined, metadata: LlmCallMetadata): void { if (!logger) return recordLlmTelemetry(metadata) + recordBenchCall(metadata) const fields = [ `task=${metadata.task}`, @@ -113,4 +156,95 @@ export function logLlmTelemetrySummary(logger: Logger | undefined, command: stri export function resetLlmTelemetry(): void { telemetryByCommand.clear() + benchCalls.length = 0 +} + +export type LlmBenchRunStage = { + name: string + elapsedMs: number +} + +export type LlmBenchRunRecord = { + command?: string + totalElapsedMs?: number + stages?: LlmBenchRunStage[] + callCount: number + totalLlmElapsedMs: number + totalPromptTokens: number + calls: LlmBenchCall[] +} + +/** + * Build the in-memory bench run record from accumulated calls. + * Pure (no I/O) so callers can inspect or assert the contents without + * touching disk — useful in tests + the in-process benchmark runner. + */ +export function buildLlmBenchRun( + options: { + command?: string + totalElapsedMs?: number + stages?: LlmBenchRunStage[] + } = {} +): LlmBenchRunRecord { + const calls = benchCalls.slice() + return { + command: options.command, + totalElapsedMs: options.totalElapsedMs, + stages: options.stages, + callCount: calls.length, + totalLlmElapsedMs: calls.reduce((sum, call) => sum + (call.elapsedMs || 0), 0), + totalPromptTokens: calls.reduce((sum, call) => sum + (call.promptTokens || 0), 0), + calls, + } +} + +/** + * Persist the current bench run to a JSON file. No-op when bench + * mode is inactive (so production runs don't pay for disk I/O). + * + * The file path comes from `COCO_BENCH_FILE` if set, otherwise + * defaults to `/.coco-bench.json`. Each call appends to the + * `runs` array of the file (creates the file if missing) so a single + * benchmark session that triggers multiple commands ends up with one + * file containing the full sequence. + * + * Best-effort: write failures are swallowed silently. The bench + * runner reports back the failure mode via the return value. + */ +export function flushLlmBenchRun( + options: { + command?: string + totalElapsedMs?: number + stages?: LlmBenchRunStage[] + } = {} +): { ok: boolean; filePath?: string; error?: string } { + if (!isBenchModeActive()) { + return { ok: false, error: 'COCO_BENCH not set' } + } + + const record = buildLlmBenchRun(options) + const filePath = path.resolve(process.env.COCO_BENCH_FILE || path.join(process.cwd(), '.coco-bench.json')) + + try { + let existing: { runs: LlmBenchRunRecord[] } = { runs: [] } + if (fs.existsSync(filePath)) { + try { + const raw = fs.readFileSync(filePath, 'utf8') + const parsed = JSON.parse(raw) + if (parsed && Array.isArray(parsed.runs)) { + existing = parsed + } + } catch { + // Corrupt or pre-existing non-bench file: overwrite with a + // fresh structure. Bench mode is opt-in; collisions here are + // a developer-only concern. + } + } + existing.runs.push(record) + fs.writeFileSync(filePath, JSON.stringify(existing, null, 2)) + benchCalls.length = 0 + return { ok: true, filePath } + } catch (error) { + return { ok: false, error: (error as Error).message } + } } diff --git a/src/lib/parsers/default/__fixtures__/index.ts b/src/lib/parsers/default/__fixtures__/index.ts new file mode 100644 index 0000000..0a883ae --- /dev/null +++ b/src/lib/parsers/default/__fixtures__/index.ts @@ -0,0 +1,219 @@ +/** + * Synthetic diff fixtures for benchmarking the diff-condensing + * pipeline (#845). Each fixture is a fully-populated `DiffNode` tree + * so callers can invoke `summarizeDiffs` directly without standing + * up a git repo. + * + * Numbers are picked to mirror the user-reported 4-minute repro + * shape: + * - tiny: early-exit path (already under budget) + * - medium: typical real commit (~25 files, ~40k tokens) + * - large: initial-commit shape (~50 files, ~100k tokens) + * + * Determinism matters more than realism: the synthetic content is + * generated from a stable seed so before/after benchmark runs + * compare the same input. + */ + +import { DiffNode, FileDiff } from '../../../types' + +/** + * Tiny pseudo-LCG — keeps the synthetic content stable across runs + * without pulling in a seedable PRNG dep. The output is character + * pattern, not statistically random; that's fine for a bench fixture. + */ +function seededTextBlob(lengthChars: number, seed: number): string { + const corpus = 'abcdefghijklmnopqrstuvwxyz0123456789 \n' + let state = seed >>> 0 + let out = '' + for (let i = 0; i < lengthChars; i++) { + state = (state * 1664525 + 1013904223) >>> 0 + out += corpus[state % corpus.length] + } + return out +} + +/** + * Build a synthetic file diff at approximately the requested token + * count. Token estimate uses chars/4 which is rough but consistent + * with how tiktoken behaves for prose-like content; the runner + * re-tokenizes with the real counter at fixture-load time so the + * recorded `tokenCount` is exact. + */ +function buildFileDiff(file: string, approxTokens: number, seed: number): FileDiff { + const chars = approxTokens * 4 + const header = `diff --git a/${file} b/${file}\n--- a/${file}\n+++ b/${file}\n@@ -1,1 +1,${Math.max(1, Math.floor(approxTokens / 4))} @@\n` + const body = seededTextBlob(chars, seed) + .split('\n') + .map((line) => `+${line}`) + .join('\n') + return { + file, + diff: header + body, + summary: '', + tokenCount: approxTokens, + } +} + +type FixtureSpec = { + name: string + files: Array<{ path: string; tokens: number }> +} + +const TINY_SPEC: FixtureSpec = { + name: 'tiny', + files: [ + { path: 'src/index.ts', tokens: 200 }, + { path: 'src/util.ts', tokens: 150 }, + { path: 'README.md', tokens: 300 }, + { path: 'package.json', tokens: 80 }, + { path: 'tsconfig.json', tokens: 60 }, + ], +} + +const MEDIUM_SPEC: FixtureSpec = { + name: 'medium', + files: [ + { path: 'src/api.ts', tokens: 3500 }, + { path: 'src/auth.ts', tokens: 2400 }, + { path: 'src/cli.ts', tokens: 4800 }, + { path: 'src/parser.ts', tokens: 2900 }, + { path: 'src/utils/http.ts', tokens: 1200 }, + { path: 'src/utils/format.ts', tokens: 800 }, + { path: 'src/utils/logger.ts', tokens: 600 }, + { path: 'tests/api.test.ts', tokens: 1800 }, + { path: 'tests/auth.test.ts', tokens: 1400 }, + { path: 'tests/parser.test.ts', tokens: 1600 }, + { path: 'tests/utils/http.test.ts', tokens: 700 }, + { path: 'tests/fixtures/sample.json', tokens: 500 }, + { path: 'docs/ARCHITECTURE.md', tokens: 2300 }, + { path: 'docs/API.md', tokens: 1900 }, + { path: 'docs/CONTRIBUTING.md', tokens: 1100 }, + { path: 'README.md', tokens: 3000 }, + { path: 'CHANGELOG.md', tokens: 1800 }, + { path: '.github/workflows/ci.yml', tokens: 600 }, + { path: '.github/workflows/release.yml', tokens: 900 }, + { path: '.github/ISSUE_TEMPLATE/bug.md', tokens: 400 }, + { path: 'package.json', tokens: 700 }, + { path: 'tsconfig.json', tokens: 200 }, + { path: '.gitignore', tokens: 150 }, + { path: 'LICENSE', tokens: 300 }, + { path: 'pyproject.toml', tokens: 600 }, + ], +} + +const LARGE_SPEC: FixtureSpec = { + name: 'large', + files: [ + // Mirror of the user's 43-file initial commit shape, scaled up + // a bit (50 files / ~100k tokens) so we have headroom for both + // pre-process and consolidation phases to fire heavily. + { path: 'humble_bundle_keys/api.py', tokens: 4400 }, + { path: 'humble_bundle_keys/auth.py', tokens: 2100 }, + { path: 'humble_bundle_keys/cli.py', tokens: 7600 }, + { path: 'humble_bundle_keys/diagnose.py', tokens: 6100 }, + { path: 'humble_bundle_keys/scraper.py', tokens: 5200 }, + { path: 'humble_bundle_keys/choice.py', tokens: 4500 }, + { path: 'humble_bundle_keys/browser_choice.py', tokens: 5500 }, + { path: 'humble_bundle_keys/exporter.py', tokens: 1300 }, + { path: 'humble_bundle_keys/models.py', tokens: 700 }, + { path: 'humble_bundle_keys/_browser_fetch.py', tokens: 1000 }, + { path: 'humble_bundle_keys/_orders_cache.py', tokens: 1200 }, + { path: 'humble_bundle_keys/__init__.py', tokens: 110 }, + { path: 'humble_bundle_keys/__main__.py', tokens: 110 }, + { path: 'tests/RUNBOOK.md', tokens: 1900 }, + { path: 'tests/test_api_parser.py', tokens: 1400 }, + { path: 'tests/test_browser_choice.py', tokens: 1200 }, + { path: 'tests/test_browser_fetch.py', tokens: 1100 }, + { path: 'tests/test_choice.py', tokens: 3000 }, + { path: 'tests/test_diagnose_sanitiser.py', tokens: 2300 }, + { path: 'tests/test_exporter.py', tokens: 1700 }, + { path: 'tests/test_parsers.py', tokens: 600 }, + { path: 'tests/__init__.py', tokens: 40 }, + { path: 'tests/fixtures/choice_claim/README.md', tokens: 400 }, + { path: 'tests/fixtures/choice_claim/analytics_get_game.json', tokens: 500 }, + { path: 'tests/fixtures/choice_claim/analytics_tile_click.json', tokens: 500 }, + { path: 'tests/fixtures/choice_claim/choosecontent.json', tokens: 600 }, + { path: 'tests/fixtures/choice_claim/redeemkey.json', tokens: 600 }, + { path: 'docs/ARCHITECTURE.md', tokens: 2300 }, + { path: 'docs/CHOICE_CLAIM_SPEC.md', tokens: 3900 }, + { path: 'docs/WHATS_CLAIMABLE.md', tokens: 1300 }, + { path: 'README.md', tokens: 3900 }, + { path: 'CHANGELOG.md', tokens: 3800 }, + { path: 'CONTRIBUTING.md', tokens: 1200 }, + { path: 'SECURITY.md', tokens: 1000 }, + { path: 'LICENSE', tokens: 300 }, + { path: 'pyproject.toml', tokens: 600 }, + { path: '.gitignore', tokens: 700 }, + { path: '.github/ISSUE_TEMPLATE/bug_report.md', tokens: 400 }, + { path: '.github/ISSUE_TEMPLATE/feature_request.md', tokens: 250 }, + { path: '.github/ISSUE_TEMPLATE/selector_broken.md', tokens: 500 }, + { path: '.github/ISSUE_TEMPLATE/config.yml', tokens: 200 }, + { path: '.github/workflows/ci.yml', tokens: 600 }, + { path: '.github/workflows/release.yml', tokens: 900 }, + { path: 'src/feature/a.ts', tokens: 1400 }, + { path: 'src/feature/b.ts', tokens: 1100 }, + { path: 'src/feature/c.ts', tokens: 900 }, + { path: 'src/feature/d.ts', tokens: 800 }, + { path: 'src/feature/e.ts', tokens: 700 }, + { path: 'src/feature/utils.ts', tokens: 600 }, + { path: 'src/feature/types.ts', tokens: 400 }, + ], +} + +/** + * Convert a flat fixture spec into a nested DiffNode tree, grouping + * by directory path. Mirrors `createDiffTree`'s behavior on real + * file lists. + */ +function buildDiffNode(spec: FixtureSpec): DiffNode { + const root: DiffNode = { path: '/', diffs: [], children: [] } + const dirIndex = new Map([['/', root]]) + + spec.files.forEach((file, index) => { + const segments = file.path.split('/') + const fileName = segments.pop() as string + const dirSegments = segments + + let node = root + let pathSoFar = '' + for (const segment of dirSegments) { + pathSoFar = pathSoFar ? `${pathSoFar}/${segment}` : segment + const cached = dirIndex.get(pathSoFar) + if (cached) { + node = cached + continue + } + const child: DiffNode = { path: segment, diffs: [], children: [] } + node.children.push(child) + dirIndex.set(pathSoFar, child) + node = child + } + + node.diffs.push(buildFileDiff(`${dirSegments.join('/')}${dirSegments.length ? '/' : ''}${fileName}`, file.tokens, index + 1)) + }) + + return root +} + +export type DiffFixture = { + name: string + fileCount: number + approxTokens: number + rootNode: DiffNode +} + +function asFixture(spec: FixtureSpec): DiffFixture { + return { + name: spec.name, + fileCount: spec.files.length, + approxTokens: spec.files.reduce((sum, file) => sum + file.tokens, 0), + rootNode: buildDiffNode(spec), + } +} + +export const tinyFixture: DiffFixture = asFixture(TINY_SPEC) +export const mediumFixture: DiffFixture = asFixture(MEDIUM_SPEC) +export const largeFixture: DiffFixture = asFixture(LARGE_SPEC) + +export const allFixtures: DiffFixture[] = [tinyFixture, mediumFixture, largeFixture]