Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
147 changes: 123 additions & 24 deletions .bench/baseline.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"capturedAt": "2026-05-06T04:51:15.941Z",
"capturedAt": "2026-05-06T05:12:57.441Z",
"node": "v22.13.0",
"platform": "darwin-arm64",
"options": {
Expand All @@ -16,61 +16,148 @@
"durationMs": 2,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0
"llmTotalPromptTokens": 0,
"pass": "cold"
},
{
"fixture": "tiny",
"fileCount": 5,
"approxTokens": 790,
"durationMs": 1,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "medium",
"fileCount": 25,
"approxTokens": 36150,
"durationMs": 8502,
"durationMs": 8505,
"llmCalls": 6,
"llmTotalMs": 36155,
"llmTotalPromptTokens": 8525
"llmTotalMs": 36162,
"llmTotalPromptTokens": 8525,
"pass": "cold"
},
{
"fixture": "medium",
"fileCount": 25,
"approxTokens": 36150,
"durationMs": 4,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "large",
"fileCount": 50,
"approxTokens": 83410,
"durationMs": 14413,
"durationMs": 14415,
"llmCalls": 7,
"llmTotalMs": 55470,
"llmTotalPromptTokens": 17461
"llmTotalMs": 55466,
"llmTotalPromptTokens": 17461,
"pass": "cold"
},
{
"fixture": "large",
"fileCount": 50,
"approxTokens": 83410,
"durationMs": 6,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "feature-add",
"fileCount": 14,
"approxTokens": 17600,
"durationMs": 13062,
"durationMs": 13074,
"llmCalls": 4,
"llmTotalMs": 27557,
"llmTotalPromptTokens": 6117
"llmTotalMs": 27556,
"llmTotalPromptTokens": 6117,
"pass": "cold"
},
{
"fixture": "feature-add",
"fileCount": 14,
"approxTokens": 17600,
"durationMs": 4,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "refactor",
"fileCount": 30,
"approxTokens": 32650,
"durationMs": 51116,
"durationMs": 51115,
"llmCalls": 20,
"llmTotalMs": 187612,
"llmTotalPromptTokens": 53548
"llmTotalMs": 187600,
"llmTotalPromptTokens": 53548,
"pass": "cold"
},
{
"fixture": "refactor",
"fileCount": 30,
"approxTokens": 32650,
"durationMs": 5,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "initial-commit",
"fileCount": 50,
"approxTokens": 83410,
"durationMs": 16965,
"durationMs": 16962,
"llmCalls": 7,
"llmTotalMs": 57202,
"llmTotalPromptTokens": 17107
"llmTotalMs": 57196,
"llmTotalPromptTokens": 17107,
"pass": "cold"
},
{
"fixture": "initial-commit",
"fileCount": 50,
"approxTokens": 83410,
"durationMs": 5,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "docs-update",
"fileCount": 9,
"approxTokens": 15050,
"durationMs": 24384,
"durationMs": 24385,
"llmCalls": 7,
"llmTotalMs": 68468,
"llmTotalPromptTokens": 13139
"llmTotalMs": 68473,
"llmTotalPromptTokens": 13139,
"pass": "cold"
},
{
"fixture": "docs-update",
"fileCount": 9,
"approxTokens": 15050,
"durationMs": 3,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "dep-bump",
"fileCount": 2,
"approxTokens": 450,
"durationMs": 0,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "cold"
},
{
"fixture": "dep-bump",
Expand All @@ -79,16 +166,28 @@
"durationMs": 0,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0
"llmTotalPromptTokens": 0,
"pass": "warm"
},
{
"fixture": "monorepo",
"fileCount": 80,
"approxTokens": 159320,
"durationMs": 88957,
"durationMs": 88956,
"llmCalls": 80,
"llmTotalMs": 921786,
"llmTotalPromptTokens": 249565
"llmTotalMs": 921782,
"llmTotalPromptTokens": 249565,
"pass": "cold"
},
{
"fixture": "monorepo",
"fileCount": 80,
"approxTokens": 159320,
"durationMs": 29,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0,
"pass": "warm"
}
]
}
42 changes: 37 additions & 5 deletions bin/benchmark.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ import type { Document } from '@langchain/classic/document'

import { fileChangeParser } from '../src/lib/parsers/default'
import { summarizeDiffs } from '../src/lib/parsers/default/utils/summarizeDiffs'
import { clearDiffSummaryCache } from '../src/lib/parsers/default/utils/diffSummaryCache'
import { allFixtures, DiffFixture } from '../src/lib/parsers/default/__fixtures__'
import { Logger } from '../src/lib/utils/logger'
import { getTokenCounter } from '../src/lib/utils/tokenizer'
Expand Down Expand Up @@ -88,6 +89,8 @@ type BenchResult = {
llmCalls: number
llmTotalMs: number
llmTotalPromptTokens: number
/** When this row is a warm-cache re-run, the cold result it amortized against. */
pass?: 'cold' | 'warm'
}

/**
Expand Down Expand Up @@ -190,13 +193,19 @@ function formatRow(label: string, value: string | number): string {
function printSummary(results: BenchResult[], baseline?: BenchResult[]): void {
console.log('\n=== diff-condensing benchmark ===\n')
for (const result of results) {
console.log(`Fixture: ${result.fixture} (${result.fileCount} files, ~${result.approxTokens} tokens)`)
const passLabel = result.pass ? ` (${result.pass})` : ''
console.log(`Fixture: ${result.fixture}${passLabel} (${result.fileCount} files, ~${result.approxTokens} tokens)`)
console.log(formatRow('wall-clock duration', `${result.durationMs}ms`))
console.log(formatRow('llm calls', result.llmCalls))
console.log(formatRow('llm total time', `${result.llmTotalMs}ms`))
console.log(formatRow('llm prompt tokens', result.llmTotalPromptTokens))
if (baseline) {
const prior = baseline.find((entry) => entry.fixture === result.fixture)
// For repeat runs, only diff cold pass against baseline so warm
// numbers don't muddy the headline regression check.
const matchPass = result.pass ?? undefined
const prior = baseline.find(
(entry) => entry.fixture === result.fixture && (entry.pass ?? undefined) === matchPass
)
if (prior) {
const deltaPct = (n: number, p: number) =>
p === 0 ? 'n/a' : `${(((n - p) / p) * 100).toFixed(1)}%`
Expand Down Expand Up @@ -246,6 +255,16 @@ async function main(): Promise<void> {
const args = process.argv.slice(2)
const updateBaseline = args.includes('--update')
const fixtureArg = args.find((arg) => arg.startsWith('--fixture='))?.split('=')[1]
// --repeat runs each fixture twice: a cold pass (cache cleared
// beforehand) and a warm pass (cache populated by the cold pass).
// Demonstrates the cache hit rate added in #845 PR 5 — same fixture,
// unchanged inputs, second run should be essentially free.
const repeat = args.includes('--repeat')
// --no-cache disables the diff-summary cache for the run. Useful
// for reproducing pre-PR-5 numbers against the same harness.
if (args.includes('--no-cache')) {
process.env.COCO_NO_CACHE = '1'
}

const fixtures = fixtureArg
? allFixtures.filter((fixture) => fixture.name === fixtureArg)
Expand All @@ -258,9 +277,22 @@ async function main(): Promise<void> {

const results: BenchResult[] = []
for (const fixture of fixtures) {
console.log(`Running fixture ${fixture.name}...`)
const result = await runFixture(fixture, DEFAULT_OPTIONS)
results.push(result)
if (repeat) {
// Cold pass: clear the cache for this repo first so the run
// can't piggyback on a prior bench.
clearDiffSummaryCache(process.cwd())
console.log(`Running fixture ${fixture.name} (cold)...`)
const cold = await runFixture(fixture, DEFAULT_OPTIONS)
results.push({ ...cold, pass: 'cold' })

console.log(`Running fixture ${fixture.name} (warm)...`)
const warm = await runFixture(fixture, DEFAULT_OPTIONS)
results.push({ ...warm, pass: 'warm' })
} else {
console.log(`Running fixture ${fixture.name}...`)
const result = await runFixture(fixture, DEFAULT_OPTIONS)
results.push(result)
}
}

const baseline = updateBaseline ? undefined : readBaseline()
Expand Down
19 changes: 19 additions & 0 deletions src/commands/cache/config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
import { Arguments, Argv } from 'yargs'
import { getCommandUsageHeader } from '../../lib/ui/helpers'
import { BaseCommandOptions } from '../types'

export interface CacheOptions extends BaseCommandOptions {}

export type CacheArgv = Arguments<CacheOptions>

export const command = 'cache <subcommand>'

export const builder = (yargs: Argv) => {
return yargs
.positional('subcommand', {
describe: 'Cache action to run (clear, info)',
type: 'string',
choices: ['clear', 'info'] as const,
})
.usage(getCommandUsageHeader(command))
}
73 changes: 73 additions & 0 deletions src/commands/cache/handler.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
import * as fs from 'node:fs'
import * as os from 'node:os'
import * as path from 'node:path'

import {
diffSummaryKey,
getDiffSummaryCachePath,
writeDiffSummary,
} from '../../lib/parsers/default/utils/diffSummaryCache'
import { handler } from './handler'

describe('coco cache <subcommand>', () => {
let tmpRoot: string
let originalXdgCacheHome: string | undefined
let logger: { log: jest.Mock }

beforeEach(() => {
tmpRoot = fs.mkdtempSync(path.join(os.tmpdir(), 'coco-cache-cmd-'))
originalXdgCacheHome = process.env.XDG_CACHE_HOME
process.env.XDG_CACHE_HOME = tmpRoot
logger = { log: jest.fn() }
})

afterEach(() => {
if (originalXdgCacheHome === undefined) {
delete process.env.XDG_CACHE_HOME
} else {
process.env.XDG_CACHE_HOME = originalXdgCacheHome
}
fs.rmSync(tmpRoot, { recursive: true, force: true })
})

it('clear: removes the cache file when present', async () => {
const key = diffSummaryKey('diff', 'gpt', 'p')
writeDiffSummary(process.cwd(), key, { summary: 's', model: 'gpt', tokens: 5 })
expect(fs.existsSync(getDiffSummaryCachePath(process.cwd()))).toBe(true)

await handler({ subcommand: 'clear' } as never, logger as never)

expect(fs.existsSync(getDiffSummaryCachePath(process.cwd()))).toBe(false)
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Cleared'))
})

it('clear: reports no-op when the cache is cold', async () => {
await handler({ subcommand: 'clear' } as never, logger as never)
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('No diff-summary cache'))
})

it('info: reports entry count + on-disk size when warm', async () => {
const key = diffSummaryKey('diff', 'gpt', 'p')
writeDiffSummary(process.cwd(), key, { summary: 'summary text', model: 'gpt', tokens: 9 })

await handler({ subcommand: 'info' } as never, logger as never)

const lines = logger.log.mock.calls.map((args) => args[0]).join('\n')
expect(lines).toContain('entries')
expect(lines).toContain('1')
expect(lines).toContain('summary tokens')
})

it('info: notes a missing cache instead of erroring', async () => {
await handler({ subcommand: 'info' } as never, logger as never)
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('No diff-summary cache'))
})

it('rejects unknown subcommands and sets exit code', async () => {
const previousExit = process.exitCode
await handler({ subcommand: 'panic' } as never, logger as never)
expect(logger.log).toHaveBeenCalledWith(expect.stringContaining('Unknown cache subcommand'))
expect(process.exitCode).toBe(1)
process.exitCode = previousExit
})
})
Loading
Loading