diff --git a/.bench/baseline.json b/.bench/baseline.json index 74fcbbe..fc2cc46 100644 --- a/.bench/baseline.json +++ b/.bench/baseline.json @@ -1,19 +1,19 @@ { - "capturedAt": "2026-05-06T00:45:56.229Z", + "capturedAt": "2026-05-06T00:58:34.994Z", "node": "v22.13.0", "platform": "darwin-arm64", "options": { "baseLatencyMs": 1500, "perTokenMs": 2, "maxConcurrent": 6, - "maxTokens": 2048 + "maxTokens": 4096 }, "results": [ { "fixture": "tiny", "fileCount": 5, "approxTokens": 790, - "durationMs": 1, + "durationMs": 2, "llmCalls": 0, "llmTotalMs": 0, "llmTotalPromptTokens": 0 @@ -22,55 +22,55 @@ "fixture": "medium", "fileCount": 25, "approxTokens": 36150, - "durationMs": 31137, - "llmCalls": 20, - "llmTotalMs": 106348, - "llmTotalPromptTokens": 34237 + "durationMs": 29267, + "llmCalls": 19, + "llmTotalMs": 109679, + "llmTotalPromptTokens": 36895 }, { "fixture": "large", "fileCount": 50, "approxTokens": 83410, - "durationMs": 72093, - "llmCalls": 41, - "llmTotalMs": 244101, - "llmTotalPromptTokens": 74197 + "durationMs": 59992, + "llmCalls": 30, + "llmTotalMs": 228089, + "llmTotalPromptTokens": 74609 }, { "fixture": "feature-add", "fileCount": 14, "approxTokens": 17600, - "durationMs": 15967, + "durationMs": 19591, "llmCalls": 11, - "llmTotalMs": 54727, - "llmTotalPromptTokens": 18937 + "llmTotalMs": 59354, + "llmTotalPromptTokens": 20707 }, { "fixture": "refactor", "fileCount": 30, "approxTokens": 32650, - "durationMs": 33999, - "llmCalls": 28, - "llmTotalMs": 153888, - "llmTotalPromptTokens": 52430 + "durationMs": 41340, + "llmCalls": 20, + "llmTotalMs": 143983, + "llmTotalPromptTokens": 53548 }, { "fixture": "initial-commit", "fileCount": 50, "approxTokens": 83410, - "durationMs": 72285, - "llmCalls": 41, - "llmTotalMs": 245148, - "llmTotalPromptTokens": 74546 + "durationMs": 60034, + "llmCalls": 30, + "llmTotalMs": 229291, + "llmTotalPromptTokens": 74948 }, { "fixture": "docs-update", "fileCount": 9, "approxTokens": 15050, - "durationMs": 18570, - "llmCalls": 8, - "llmTotalMs": 56293, - "llmTotalPromptTokens": 13908 + "durationMs": 18563, + "llmCalls": 7, + "llmTotalMs": 52225, + "llmTotalPromptTokens": 13139 }, { "fixture": "dep-bump", diff --git a/bin/benchmark.ts b/bin/benchmark.ts index 81a1a80..7bdbb57 100644 --- a/bin/benchmark.ts +++ b/bin/benchmark.ts @@ -71,7 +71,11 @@ const DEFAULT_OPTIONS: BenchOptions = { baseLatencyMs: 1500, perTokenMs: 2, maxConcurrent: 6, - maxTokens: 2048, + // Match the canonical service tokenLimit from `langchain/utils.ts` + // (raised from 2048 to 4096 in PR 1 of #845). The bench mirrors + // the most-common production budget so per-PR diffs reflect what + // real users will see. + maxTokens: 4096, } type BenchResult = { diff --git a/src/lib/parsers/default/index.ts b/src/lib/parsers/default/index.ts index 667d8e8..025bfbb 100644 --- a/src/lib/parsers/default/index.ts +++ b/src/lib/parsers/default/index.ts @@ -49,10 +49,17 @@ export async function fileChangeParser({ // 1. Pre-process large files to prevent bias // 2. Group by directory and assess token count // 3. Wave-based parallel summarization until under budget + // + // The 4096 fallback (#845) matches the default service configs + // for openai / anthropic / ollama (`langchain/utils.ts`). It's a + // safety net for users with custom service definitions that omit + // `tokenLimit` — without it those users hit a degenerate 2048 + // budget that triggers needless pre-summarization on diffs the + // model could absorb whole. logger.startTimer() const summary = await summarizeDiffs(diffs, { tokenizer, - maxTokens: maxTokens || 2048, + maxTokens: maxTokens || 4096, minTokensForSummary, maxFileTokens, maxConcurrent, diff --git a/src/lib/parsers/default/utils/summarizeDiffs.ts b/src/lib/parsers/default/utils/summarizeDiffs.ts index 3375e73..dddc604 100644 --- a/src/lib/parsers/default/utils/summarizeDiffs.ts +++ b/src/lib/parsers/default/utils/summarizeDiffs.ts @@ -247,7 +247,15 @@ export async function summarizeDiffs( { tokenizer, logger, - maxTokens = 2048, + // Default raised to 4096 (#845) so the budget matches the + // canonical service configs in `langchain/utils.ts`. The + // previous 2048 default came from an earlier era when 4k + // context was a stretch for fast models; today every shipped + // service overrides it to 4096 anyway. Keeping this in sync + // with the service defaults means a caller that omits + // `maxTokens` doesn't accidentally fall into a tighter budget + // than the rest of the system assumes. + maxTokens = 4096, minTokensForSummary = 400, maxFileTokens, maxConcurrent = 6,