Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 22 additions & 22 deletions .bench/baseline.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"capturedAt": "2026-05-06T00:58:34.994Z",
"capturedAt": "2026-05-06T01:12:11.470Z",
"node": "v22.13.0",
"platform": "darwin-arm64",
"options": {
Expand All @@ -13,7 +13,7 @@
"fixture": "tiny",
"fileCount": 5,
"approxTokens": 790,
"durationMs": 2,
"durationMs": 1,
"llmCalls": 0,
"llmTotalMs": 0,
"llmTotalPromptTokens": 0
Expand All @@ -22,54 +22,54 @@
"fixture": "medium",
"fileCount": 25,
"approxTokens": 36150,
"durationMs": 29267,
"llmCalls": 19,
"llmTotalMs": 109679,
"llmTotalPromptTokens": 36895
"durationMs": 6906,
"llmCalls": 6,
"llmTotalMs": 25221,
"llmTotalPromptTokens": 8525
},
{
"fixture": "large",
"fileCount": 50,
"approxTokens": 83410,
"durationMs": 59992,
"llmCalls": 30,
"llmTotalMs": 228089,
"llmTotalPromptTokens": 74609
"durationMs": 9749,
"llmCalls": 6,
"llmTotalMs": 42401,
"llmTotalPromptTokens": 16602
},
{
"fixture": "feature-add",
"fileCount": 14,
"approxTokens": 17600,
"durationMs": 19591,
"llmCalls": 11,
"llmTotalMs": 59354,
"llmTotalPromptTokens": 20707
"durationMs": 5640,
"llmCalls": 4,
"llmTotalMs": 18854,
"llmTotalPromptTokens": 6117
},
{
"fixture": "refactor",
"fileCount": 30,
"approxTokens": 32650,
"durationMs": 41340,
"durationMs": 41347,
"llmCalls": 20,
"llmTotalMs": 143983,
"llmTotalMs": 143990,
"llmTotalPromptTokens": 53548
},
{
"fixture": "initial-commit",
"fileCount": 50,
"approxTokens": 83410,
"durationMs": 60034,
"llmCalls": 30,
"llmTotalMs": 229291,
"llmTotalPromptTokens": 74948
"durationMs": 9818,
"llmCalls": 6,
"llmTotalMs": 42557,
"llmTotalPromptTokens": 16306
},
{
"fixture": "docs-update",
"fileCount": 9,
"approxTokens": 15050,
"durationMs": 18563,
"durationMs": 18564,
"llmCalls": 7,
"llmTotalMs": 52225,
"llmTotalMs": 52222,
"llmTotalPromptTokens": 13139
},
{
Expand Down
4 changes: 4 additions & 0 deletions schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -507,6 +507,10 @@
"AnthropicModel": {
"type": "string",
"enum": [
"claude-sonnet-4-6",
"claude-haiku-4-5-20251001",
"claude-haiku-4-5",
"claude-opus-4-7",
"claude-sonnet-4-0",
"claude-3-7-sonnet-latest",
"claude-3-5-haiku-latest",
Expand Down
7 changes: 7 additions & 0 deletions src/lib/langchain/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ export type OpenAIModel =
| 'gpt-4.1-nano'

export type AnthropicModel =
// Current generation (recommended for new users)
| 'claude-sonnet-4-6'
| 'claude-haiku-4-5-20251001'
| 'claude-haiku-4-5'
| 'claude-opus-4-7'
// Earlier 4.x line
| 'claude-sonnet-4-0'
// Pre-4.x (kept for users with existing service config pinned to these)
| 'claude-3-7-sonnet-latest'
| 'claude-3-5-haiku-latest'
| 'claude-3-5-sonnet-latest'
Expand Down
15 changes: 13 additions & 2 deletions src/lib/langchain/utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -103,7 +103,12 @@ export function getDefaultServiceApiKey(config: Config): string {

export const DEFAULT_OPENAI_LLM_SERVICE: OpenAILLMService = {
provider: 'openai',
model: 'gpt-4o-mini',
// Bumped from `gpt-4o-mini` to `gpt-4.1-nano` (#854). Diff
// condensing is bounded summarization — the cheaper / faster
// tier is the right default for it; quality is on par for this
// class of task. Users who want the older 4o-mini can still
// override via service config.
model: 'gpt-4.1-nano',
tokenLimit: 4096,
temperature: 0.32,
maxConcurrent: 12,
Expand All @@ -119,7 +124,13 @@ export const DEFAULT_OPENAI_LLM_SERVICE: OpenAILLMService = {

export const DEFAULT_ANTHROPIC_LLM_SERVICE: AnthropicLLMService = {
provider: 'anthropic',
model: 'claude-3-5-sonnet-20240620',
// Bumped from `claude-3-5-sonnet-20240620` to
// `claude-haiku-4-5-20251001` (#854). The Sonnet 3.5 default
// was nearly two model generations stale; Haiku 4.5 is the
// current fast tier and the right fit for diff summarization.
// Users who want Sonnet for quality-sensitive runs can still
// override via service config (recommended: `claude-sonnet-4-6`).
model: 'claude-haiku-4-5-20251001',
temperature: 0.32,
tokenLimit: 4096,
maxConcurrent: 12,
Expand Down
22 changes: 22 additions & 0 deletions src/lib/parsers/default/utils/summarizeLargeFiles.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { FileDiff, DiffNode } from '../../../types'
import { SummarizeContext, summarize } from '../../../langchain/chains/summarize'
import { TokenCounter } from '../../../utils/tokenizer'
import { Logger } from '../../../utils/logger'
import { summarizeTrivialDiff } from './trivialDiff'

export type SummarizeLargeFilesOptions = {
/**
Expand All @@ -22,6 +23,14 @@ export type SummarizeLargeFilesOptions = {

/**
* Summarize a single file diff that exceeds the token threshold.
*
* Trivial-shape short-circuit (#845, PR 2): pure additions / deletions
* / renames / binary changes have no information content beyond the
* diff's shape, so we templated-summarize them instead of paying for
* an LLM call. On initial-commit fixtures (lots of pure adds) this
* collapses the per-file summary phase entirely; the resulting tiny
* synthetic summaries usually drop the directory token totals under
* budget so wave consolidation skips too.
*/
async function summarizeFileDiff(
fileDiff: FileDiff,
Expand All @@ -33,6 +42,19 @@ async function summarizeFileDiff(
metadata,
}: Pick<SummarizeLargeFilesOptions, 'chain' | 'textSplitter' | 'tokenizer' | 'logger' | 'metadata'>
): Promise<FileDiff> {
const trivialSummary = summarizeTrivialDiff(fileDiff)
if (trivialSummary !== undefined) {
logger.verbose(
` - ${fileDiff.file}: trivial-shape skip (no LLM call)`,
{ color: 'gray' }
)
return {
...fileDiff,
diff: trivialSummary,
tokenCount: tokenizer(trivialSummary),
}
}

try {
const fileSummary = await summarize(
[
Expand Down
145 changes: 145 additions & 0 deletions src/lib/parsers/default/utils/trivialDiff.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
import { FileDiff } from '../../../types'
import {
detectTrivialDiffShape,
summarizeTrivialDiff,
} from './trivialDiff'

const additionDiff = `diff --git a/foo.ts b/foo.ts
new file mode 100644
index 0000000..1234567
--- /dev/null
+++ b/foo.ts
@@ -0,0 +1,3 @@
+export const foo = 1
+export const bar = 2
+export const baz = 3
`

const deletionDiff = `diff --git a/legacy.ts b/legacy.ts
deleted file mode 100644
index 1234567..0000000
--- a/legacy.ts
+++ /dev/null
@@ -1,3 +0,0 @@
-export const legacy = 1
-export const old = 2
-export const stale = 3
`

const renameDiff = `diff --git a/old/path.ts b/new/path.ts
similarity index 100%
rename from old/path.ts
rename to new/path.ts
`

const binaryDiff = `diff --git a/assets/logo.png b/assets/logo.png
Binary files a/assets/logo.png and b/assets/logo.png differ
`

const modificationDiff = `diff --git a/src/foo.ts b/src/foo.ts
index 1234567..89abcde 100644
--- a/src/foo.ts
+++ b/src/foo.ts
@@ -1,5 +1,7 @@
const foo = 1
-const bar = 2
+const bar = 22
+const baz = 3
const quux = 4
`

const renameWithEditDiff = `diff --git a/old/path.ts b/new/path.ts
similarity index 87%
rename from old/path.ts
rename to new/path.ts
@@ -1,3 +1,4 @@
const foo = 1
-const bar = 2
+const bar = 22
+const baz = 3
`

describe('detectTrivialDiffShape', () => {
it('detects pure additions', () => {
expect(detectTrivialDiffShape(additionDiff)).toBe('addition')
})

it('detects pure deletions', () => {
expect(detectTrivialDiffShape(deletionDiff)).toBe('deletion')
})

it('detects pure renames (no body)', () => {
expect(detectTrivialDiffShape(renameDiff)).toBe('rename')
})

it('detects binary file changes', () => {
expect(detectTrivialDiffShape(binaryDiff)).toBe('binary')
})

it('returns undefined for modifications (mixed +/-)', () => {
expect(detectTrivialDiffShape(modificationDiff)).toBeUndefined()
})

it('returns undefined for renames that also include edits (rename + body)', () => {
expect(detectTrivialDiffShape(renameWithEditDiff)).toBeUndefined()
})

it('returns undefined for empty input', () => {
expect(detectTrivialDiffShape('')).toBeUndefined()
})

it('ignores +++ / --- header markers when classifying', () => {
// The `+++ b/file` and `--- a/file` headers shouldn't fool the
// counter — they're metadata, not content.
expect(detectTrivialDiffShape(additionDiff)).toBe('addition')
expect(detectTrivialDiffShape(deletionDiff)).toBe('deletion')
})
})

describe('summarizeTrivialDiff', () => {
function makeDiff(file: string, diff: string): FileDiff {
return { file, diff, summary: '', tokenCount: 100 }
}

it('templated summary for pure addition includes line count', () => {
expect(summarizeTrivialDiff(makeDiff('foo.ts', additionDiff)))
.toBe('Added `foo.ts` (3 lines).')
})

it('templated summary for pure deletion includes line count', () => {
expect(summarizeTrivialDiff(makeDiff('legacy.ts', deletionDiff)))
.toBe('Removed `legacy.ts` (3 lines).')
})

it('singular line wording when count is 1', () => {
const oneLine = `diff --git a/foo b/foo
new file mode 100644
--- /dev/null
+++ b/foo
@@ -0,0 +1,1 @@
+only one line
`
expect(summarizeTrivialDiff(makeDiff('foo', oneLine)))
.toBe('Added `foo` (1 line).')
})

it('rename summary names both old and new path', () => {
expect(summarizeTrivialDiff(makeDiff('new/path.ts', renameDiff)))
.toBe('Renamed `old/path.ts` → `new/path.ts`.')
})

it('binary summary is shape-only (no line count)', () => {
expect(summarizeTrivialDiff(makeDiff('assets/logo.png', binaryDiff)))
.toBe('Updated binary file `assets/logo.png`.')
})

it('returns undefined for modifications so the LLM path stays in charge', () => {
expect(summarizeTrivialDiff(makeDiff('src/foo.ts', modificationDiff)))
.toBeUndefined()
})

it('returns undefined for renames-with-edit', () => {
expect(summarizeTrivialDiff(makeDiff('new/path.ts', renameWithEditDiff)))
.toBeUndefined()
})
})
Loading
Loading