From b35995538a447fe4a763aab28bdbb2bdf308c003 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 16:30:09 -0700 Subject: [PATCH 01/18] Read files: if beyond max, read first 100k chars --- sdk/src/__tests__/read-files.test.ts | 12 +++++++----- sdk/src/tools/read-files.ts | 9 ++++----- 2 files changed, 11 insertions(+), 10 deletions(-) diff --git a/sdk/src/__tests__/read-files.test.ts b/sdk/src/__tests__/read-files.test.ts index 547bbfaa4..965662286 100644 --- a/sdk/src/__tests__/read-files.test.ts +++ b/sdk/src/__tests__/read-files.test.ts @@ -186,8 +186,8 @@ describe('getFiles', () => { }) describe('file too large', () => { - test('should truncate files over 100k chars to 1k chars with message', async () => { - const largeContent = 'x'.repeat(101_000) // 101k chars - over limit + test('should truncate files over 100k chars to first 100k chars with message', async () => { + const largeContent = 'x'.repeat(100_001) + 'y'.repeat(1000) // over limit const mockFs = createMockFs({ files: { '/project/large.bin': { @@ -203,11 +203,13 @@ describe('getFiles', () => { fs: mockFs, }) - // Should contain first 1k chars - expect(result['large.bin']).toContain('x'.repeat(1000)) + // Should contain first 100k chars + expect(result['large.bin']).toContain('x'.repeat(100_000)) + // Should NOT contain content beyond the limit + expect(result['large.bin']).not.toContain('y') // Should contain truncation message expect(result['large.bin']).toContain('FILE_TOO_LARGE') - expect(result['large.bin']).toContain('101,000 chars') + expect(result['large.bin']).toContain('101,001 chars') }) test('should read files at exactly 100k chars', async () => { diff --git a/sdk/src/tools/read-files.ts b/sdk/src/tools/read-files.ts index 351eddfb5..c3c85cc68 100644 --- a/sdk/src/tools/read-files.ts +++ b/sdk/src/tools/read-files.ts @@ -30,7 +30,6 @@ export async function getFiles(params: { const result: Record = {} const MAX_FILE_BYTES = 10 * 1024 * 1024 // 10MB - skip reading entirely const MAX_CHARS = 100_000 // 100k characters threshold - const TRUNCATE_TO_CHARS = 1_000 // Show first 1k chars when over limit const numFmt = new Intl.NumberFormat('en-US') const fmtNum = (n: number) => numFmt.format(n) @@ -84,14 +83,14 @@ export async function getFiles(params: { const content = await fs.readFile(fullPath, 'utf8') if (content.length > MAX_CHARS) { - const truncated = content.slice(0, TRUNCATE_TO_CHARS) + const truncated = content.slice(0, MAX_CHARS) result[relativePath] = truncated + '\n\n[FILE_TOO_LARGE: This file is ' + fmtNum(content.length) + - ' chars, exceeding the 100k char limit. Only the first ' + - fmtNum(TRUNCATE_TO_CHARS) + - ' chars are shown. Use other tools to read sections of the file.]' + ' chars, exceeding the ' + + fmtNum(MAX_CHARS) + + ' char limit. The content above has been truncated. Use other tools to read other sections of the file.]' } else { // Prepend TEMPLATE marker for example files result[relativePath] = isExampleFile From c655f3851bff14955e6f98435c18a333c3e9c72a Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 16:33:34 -0700 Subject: [PATCH 02/18] tweak description --- agents/basher.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/basher.ts b/agents/basher.ts index dc9dc689d..de7657d54 100644 --- a/agents/basher.ts +++ b/agents/basher.ts @@ -11,7 +11,7 @@ const basher: AgentDefinition = { model: 'google/gemini-3.1-flash-lite-preview', displayName: 'Basher', spawnerPrompt: - 'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. Requires both a shell command and a prompt.', + 'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. You must specifiy also the command to run within the params object.', inputSchema: { prompt: { @@ -24,7 +24,7 @@ const basher: AgentDefinition = { properties: { command: { type: 'string', - description: 'Terminal command to run in bash shell', + description: 'The terminal command to run in bash shell. Don\'t forget this field!', }, timeout_seconds: { type: 'number', From 80e4991e8c3e60b6b1dcecb120a2fc13b82e73cc Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 17:27:58 -0700 Subject: [PATCH 03/18] Include original tool call in malformed spawn/set_output tool call --- .../src/tools/handlers/tool/set-output.ts | 6 +++++- .../src/tools/handlers/tool/spawn-agent-utils.ts | 12 ++++++++++-- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts index 8dec29711..009755c5e 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts @@ -61,7 +61,11 @@ export const handleSetOutput = (async (params: { const prefix = usedData ? 'Output validation error: Your output was found inside the `data` field but still failed validation. Please fix the issues and try again without wrapping in `data`. Issues: ' : 'Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ' - const errorMessage = `${prefix}${bestError}` + const outputStr = JSON.stringify(output, null, 2) + const truncatedOutput = outputStr.length > 500 + ? outputStr.slice(0, 500) + '...(truncated)' + : outputStr + const errorMessage = `${prefix}${bestError}\n\nOriginal output value:\n${truncatedOutput}` logger.error( { output, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index 77dac6b36..d0144a4df 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -221,8 +221,12 @@ export function validateAgentInput( if (inputSchema.prompt) { const result = inputSchema.prompt.safeParse(prompt ?? '') if (!result.success) { + const promptStr = JSON.stringify(prompt ?? '', null, 2) + const truncatedPrompt = promptStr.length > 500 + ? promptStr.slice(0, 500) + '...(truncated)' + : promptStr throw new Error( - `Invalid prompt for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}`, + `Invalid prompt for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal prompt value:\n${truncatedPrompt}`, ) } } @@ -231,8 +235,12 @@ export function validateAgentInput( if (inputSchema.params) { const result = inputSchema.params.safeParse(params ?? {}) if (!result.success) { + const paramsStr = JSON.stringify(params ?? {}, null, 2) + const truncatedParams = paramsStr.length > 500 + ? paramsStr.slice(0, 500) + '...(truncated)' + : paramsStr throw new Error( - `Invalid params for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}`, + `Invalid params for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal params value:\n${truncatedParams}`, ) } } From c03b027fc5d4c67208341b89ab1a61fdb1cec22a Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 17:32:21 -0700 Subject: [PATCH 04/18] Update basher prompt --- agents/basher.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agents/basher.ts b/agents/basher.ts index de7657d54..8d88073b5 100644 --- a/agents/basher.ts +++ b/agents/basher.ts @@ -11,7 +11,7 @@ const basher: AgentDefinition = { model: 'google/gemini-3.1-flash-lite-preview', displayName: 'Basher', spawnerPrompt: - 'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. You must specifiy also the command to run within the params object.', + 'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. Every basher spawn MUST include params: { command: "" }. NEVER spawn basher with only a prompt — it will fail validation. The prompt field describes what to extract from the output, not the command itself.', inputSchema: { prompt: { From debcce044fb9c0921594c09b07f652d1516039e4 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 17:41:53 -0700 Subject: [PATCH 05/18] Some base2 prompt tweaks inspired by misses in eval task --- agents/base2/base2.ts | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 3d504edfb..a3a715b7f 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -131,8 +131,8 @@ export function createBase2( - Don't forget to add any imports that might be needed - Remove unused variables, functions, and files as a result of your changes. - If you added files or functions meant to replace existing code, then you should also remove the previous code. -- **Minimal new code comments:** Do not add many new comments while writing code, unless they were preexisting comments (keep those!) or unless the user asks you to add comments! -- **Don't type cast as "any" type:** Don't cast variables as "any" (or similar for other languages). This is a bad practice as it leads to bugs. The code is more robust when every expression is typed. +- **Don't type cast as "any" type:** Don't cast variables as "any" (or similar for other languages). This is a bad practice as it leads to bugs. Exception: when the value can truly be any type. +- **Prefer str_replace to write_file:** str_replace is more efficient for targeted changes and gives more feedback. Only use write_file for new files or when necessary to rewrite the entire file. # Spawning agents guidelines @@ -217,7 +217,7 @@ ${isDefault ${isDefault ? `[ You spawn a code-reviewer, a basher to typecheck the changes, and another basher to run tests, all in parallel ]` : isFree - ? `[ You spawn a code-reviewer-lite to review the changes, and a basher to typecheck the changes, and another basher to run tests, all in parallel ]` + ? `[ You spawn a code-reviewer-lite to review the changes, a basher to typecheck the local changes, a basher to typecheck the whole project, and another basher to run tests, all in parallel ]` : isMax ? `[ You spawn a basher to typecheck the changes, and another basher to run tests, in parallel. Then, you spawn a code-reviewer-multi-prompt to review the changes. ]` : '[ You spawn a basher to typecheck the changes and another basher to run tests, all in parallel ]' From 3e78731d7dfe2996b81a4a98cb7a9cec5eaf819c Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 17:43:20 -0700 Subject: [PATCH 06/18] Enable write_todos for free mode --- agents/base2/base2.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index a3a715b7f..7586e72db 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -58,7 +58,7 @@ export function createBase2( 'spawn_agents', 'read_files', 'read_subtree', - !isFast && !isFree && 'write_todos', + !isFast && 'write_todos', !isFast && !noAskUser && 'suggest_followups', 'str_replace', 'write_file', @@ -331,7 +331,7 @@ ${buildArray( `- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`, !noAskUser && 'After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice.', - (isDefault || isMax) && + (isDefault || isMax || isFree) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, isFree && `- For most requests, spawn the thinker-with-files-gemini agent to think through and plan the best response. This agent is extremely useful as it is very smart. You must take advantage of it and spawn it about once per user request. Gather all the necessary context *before* spawning it, and pass the relevant filePaths since it does not have access to the conversation history.`, From 8658b8f4708e0989f991e9c32c306c9f43a8fb24 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 18:13:40 -0700 Subject: [PATCH 07/18] Refactor helper for surfacing original tool call in error message --- .../src/tools/handlers/tool/set-output.ts | 7 ++----- .../src/tools/handlers/tool/spawn-agent-utils.ts | 13 +++---------- packages/agent-runtime/src/tools/tool-executor.ts | 15 +++++---------- packages/agent-runtime/src/util/format-value.ts | 10 ++++++++++ 4 files changed, 20 insertions(+), 25 deletions(-) create mode 100644 packages/agent-runtime/src/util/format-value.ts diff --git a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts index 009755c5e..97c613b86 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts @@ -1,6 +1,7 @@ import { jsonToolResult } from '@codebuff/common/util/messages' import { getAgentTemplate } from '../../../templates/agent-registry' +import { formatValueForError } from '../../../util/format-value' import type { CodebuffToolHandlerFunction } from '../handler-function-type' import type { @@ -61,11 +62,7 @@ export const handleSetOutput = (async (params: { const prefix = usedData ? 'Output validation error: Your output was found inside the `data` field but still failed validation. Please fix the issues and try again without wrapping in `data`. Issues: ' : 'Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ' - const outputStr = JSON.stringify(output, null, 2) - const truncatedOutput = outputStr.length > 500 - ? outputStr.slice(0, 500) + '...(truncated)' - : outputStr - const errorMessage = `${prefix}${bestError}\n\nOriginal output value:\n${truncatedOutput}` + const errorMessage = `${prefix}${bestError}\n\nOriginal output value:\n${formatValueForError(output)}` logger.error( { output, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index d0144a4df..0f6c3884b 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -5,6 +5,7 @@ import { generateCompactId } from '@codebuff/common/util/string' import { loopAgentSteps } from '../../../run-agent-step' import { getAgentTemplate } from '../../../templates/agent-registry' +import { formatValueForError } from '../../../util/format-value' import { filterUnfinishedToolCalls, withSystemTags, @@ -221,12 +222,8 @@ export function validateAgentInput( if (inputSchema.prompt) { const result = inputSchema.prompt.safeParse(prompt ?? '') if (!result.success) { - const promptStr = JSON.stringify(prompt ?? '', null, 2) - const truncatedPrompt = promptStr.length > 500 - ? promptStr.slice(0, 500) + '...(truncated)' - : promptStr throw new Error( - `Invalid prompt for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal prompt value:\n${truncatedPrompt}`, + `Invalid prompt for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal prompt value:\n${formatValueForError(prompt ?? '')}`, ) } } @@ -235,12 +232,8 @@ export function validateAgentInput( if (inputSchema.params) { const result = inputSchema.params.safeParse(params ?? {}) if (!result.success) { - const paramsStr = JSON.stringify(params ?? {}, null, 2) - const truncatedParams = paramsStr.length > 500 - ? paramsStr.slice(0, 500) + '...(truncated)' - : paramsStr throw new Error( - `Invalid params for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal params value:\n${truncatedParams}`, + `Invalid params for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal params value:\n${formatValueForError(params ?? {})}`, ) } } diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 81782c29d..da0cfbd3b 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -6,6 +6,7 @@ import { cloneDeep } from 'lodash' import { getMCPToolData } from '../mcp' import { MCP_TOOL_SEPARATOR } from '../mcp-constants' import { getAgentShortName } from '../templates/prompts' +import { formatValueForError } from '../util/format-value' import { codebuffToolHandlers } from './handlers/list' import { getMatchingSpawn, @@ -180,13 +181,10 @@ export async function executeToolCall( } if ('error' in toolCall) { - const inputStr = JSON.stringify(input, null, 2) - const truncatedInput = inputStr.length > 500 - ? inputStr.slice(0, 500) + '...(truncated)' - : inputStr + const formattedInput = formatValueForError(input) onResponseChunk({ type: 'error', - message: `${toolCall.error}\n\nOriginal tool call input:\n${truncatedInput}`, + message: `${toolCall.error}\n\nOriginal tool call input:\n${formattedInput}`, }) logger.debug( { toolCall, error: toolCall.error }, @@ -491,13 +489,10 @@ export async function executeCustomToolCall( } if ('error' in toolCall) { - const inputStr = JSON.stringify(input, null, 2) - const truncatedInput = inputStr.length > 500 - ? inputStr.slice(0, 500) + '...(truncated)' - : inputStr + const formattedInput = formatValueForError(input) onResponseChunk({ type: 'error', - message: `${toolCall.error}\n\nOriginal tool call input:\n${truncatedInput}`, + message: `${toolCall.error}\n\nOriginal tool call input:\n${formattedInput}`, }) logger.debug( { toolCall, error: toolCall.error }, diff --git a/packages/agent-runtime/src/util/format-value.ts b/packages/agent-runtime/src/util/format-value.ts new file mode 100644 index 000000000..c4bbdccaa --- /dev/null +++ b/packages/agent-runtime/src/util/format-value.ts @@ -0,0 +1,10 @@ +export function formatValueForError(value: unknown, maxLength = 500): string { + const jsonStr = JSON.stringify(value, null, 2) ?? 'undefined' + const truncated = jsonStr.length > maxLength + ? jsonStr.slice(0, maxLength) + '...(truncated)' + : jsonStr + if (value === null || value === undefined || typeof value !== 'object') { + return `${truncated} (type: ${value === null ? 'null' : typeof value})` + } + return truncated +} From d80a3afc45bd1a707e7dbccb615e4a429ed9184b Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 18:19:05 -0700 Subject: [PATCH 08/18] Spawn gemini thinker for complex problems only. Bump up to medium effort. --- agents/base2/base2.ts | 8 ++++---- agents/thinker/thinker-with-files-gemini.ts | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 7586e72db..096f7d2e5 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -143,7 +143,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u ${buildArray( '- Spawn context-gathering agents (file pickers and web/docs researchers) before making edits. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase.', isFree && 'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.', - isFree && 'You must spawn the thinker-with-files-gemini agent to think through and plan the reponse to most requests, unless the request is trivial. This agent is extremely useful as it is very smart! You must pass the relevant filePaths when spawning it, since it does not have access to the conversation history.', + isFree && `Spawn the thinker-with-files-gemini agent for complex problems — it's very smart. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths since it has no conversation history.`, isDefault && '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', (isDefault || isMax) && @@ -206,7 +206,7 @@ ${buildArray( [ You read a few other relevant files using the read_files tool ]${!noAskUser ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]` : '' - }${isFree ? `\n\n[ You spawn the thinker-with-files-gemini agent with the relevant filePaths to plan the best response ]` : ''} + } ${isDefault ? `[ You implement the changes using the editor agent ]` : isFast || isFree @@ -334,7 +334,7 @@ ${buildArray( (isDefault || isMax || isFree) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, isFree && - `- For most requests, spawn the thinker-with-files-gemini agent to think through and plan the best response. This agent is extremely useful as it is very smart. You must take advantage of it and spawn it about once per user request. Gather all the necessary context *before* spawning it, and pass the relevant filePaths since it does not have access to the conversation history.`, + `- For complex problems, spawn the thinker-with-files-gemini agent after gathering context. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths.`, (isDefault || isMax) && `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`, isDefault && @@ -380,7 +380,7 @@ function buildImplementationStepPrompt({ `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, 'You must use the skill tool to load any potentially relevant skills.', isFree && - `You must spawn the thinker-with-files-gemini agent once per user request to plan the best response. Pass the relevant filePaths since it does not have access to the conversation history.`, + `Spawn the thinker-with-files-gemini agent for complex problems, not routine edits. Pass the relevant filePaths.`, isMax && `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`, (isDefault || isMax) && diff --git a/agents/thinker/thinker-with-files-gemini.ts b/agents/thinker/thinker-with-files-gemini.ts index 0f9ec5ad3..364dcca96 100644 --- a/agents/thinker/thinker-with-files-gemini.ts +++ b/agents/thinker/thinker-with-files-gemini.ts @@ -8,7 +8,7 @@ const definition: SecretAgentDefinition = { model: 'google/gemini-3.1-pro-preview', displayName: 'Theo the Theorizer with Files (Gemini)', reasoningOptions: { - effort: 'low', + effort: 'medium', }, spawnerPrompt: 'Does deep thinking given the prompt and provided files using Gemini. Use this to help you solve a specific problem. This agent has no context on the conversation history so it cannot see files you have read or previous discussion. Instead, you must provide all the relevant context via the prompt or filePaths for this agent to work well.', From dbc24dd9eeeeba8537a927ad05b5cce9c2657c94 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 18:39:45 -0700 Subject: [PATCH 09/18] Much clearer basher agent input schema --- agents/basher.ts | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/agents/basher.ts b/agents/basher.ts index 8d88073b5..259d8fcbf 100644 --- a/agents/basher.ts +++ b/agents/basher.ts @@ -11,14 +11,9 @@ const basher: AgentDefinition = { model: 'google/gemini-3.1-flash-lite-preview', displayName: 'Basher', spawnerPrompt: - 'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. Every basher spawn MUST include params: { command: "" }. NEVER spawn basher with only a prompt — it will fail validation. The prompt field describes what to extract from the output, not the command itself.', + 'Runs a single terminal command and (recommended) describes its output using an LLM using the what_to_summarize field. A lightweight shell command executor. Every basher spawn MUST include params: { command: "" }.', inputSchema: { - prompt: { - type: 'string', - description: - 'What information from the command output is desired. Be specific about what to look for or extract.', - }, params: { type: 'object', properties: { @@ -26,15 +21,15 @@ const basher: AgentDefinition = { type: 'string', description: 'The terminal command to run in bash shell. Don\'t forget this field!', }, + what_to_summarize: { + type: 'string', + description: + 'What information from the command output is desired. Be specific about what to look for or extract. This is optional, and if not provided, the basher will return the full command output without summarization.', + }, timeout_seconds: { type: 'number', description: 'Set to -1 for no timeout. Default 30', }, - rawOutput: { - type: 'boolean', - description: - 'If true, returns the full command output without summarization. Defaults to false.', - }, }, required: ['command'], }, @@ -73,7 +68,7 @@ Do not use any tools! Only analyze the output of the command.`, } const timeout_seconds = params?.timeout_seconds as number | undefined - const rawOutput = params?.rawOutput as boolean | undefined + const what_to_summarize = params?.what_to_summarize as string | undefined // Run the command const { toolResult } = yield { @@ -84,7 +79,7 @@ Do not use any tools! Only analyze the output of the command.`, }, } - if (rawOutput) { + if (!what_to_summarize) { // Return the raw command output without summarization const result = toolResult?.[0] // Only return object values (command output objects), not plain strings From c36a16e64f9769fe332ce2dbb949cab903298e5f Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 19:27:29 -0700 Subject: [PATCH 10/18] Switch to code-searcher instead of code_search tool --- agents/base2/base2.ts | 12 ++++++------ agents/file-explorer/code-searcher.ts | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 096f7d2e5..d2ff6c757 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -67,13 +67,13 @@ export function createBase2( !noAskUser && 'ask_user', 'skill', 'set_output', - 'code_search', 'list_directory', 'glob', ), spawnableAgents: buildArray( !isMax && 'file-picker', isMax && 'file-picker-max', + 'code-searcher', 'researcher-web', 'researcher-docs', 'basher', @@ -124,7 +124,7 @@ export function createBase2( - Add thoughtful details like hover states, transitions, and micro-interactions - Apply design principles: hierarchy, contrast, balance, and movement - Create an impressive demonstration showcasing web development capabilities -- **Refactoring Awareness:** Whenever you modify an exported symbol like a function or class or variable, you should find and update all the references to it appropriately using the code_search tool. +- **Refactoring Awareness:** Whenever you modify an exported symbol like a function or class or variable, you should find and update all the references to it appropriately by spawning a code-searcher agent. - **Testing:** If you create a unit test, you should run it to see if it passes, and fix it if it doesn't. - **Package Management:** When adding new packages, use the basher agent to install the package rather than editing the package.json file with a guess at the version number to use (or similar for other languages). This way, you will be sure to have the latest version of the package. Do not install packages globally unless asked by the user (e.g. Don't run \`npm install -g \`). Always try to use the package manager associated with the project (e.g. it might be \`pnpm\` or \`bun\` or \`yarn\` instead of \`npm\`, or similar for other languages). - **Code Hygiene:** Make sure to leave things in a good state: @@ -141,7 +141,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u - **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. ${buildArray( - '- Spawn context-gathering agents (file pickers and web/docs researchers) before making edits. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase.', + '- Spawn context-gathering agents (file pickers, code searchers, and web/docs researchers) before making edits. Use the list_directory and glob tools directly for searching and exploring the codebase.', isFree && 'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.', isFree && `Spawn the thinker-with-files-gemini agent for complex problems — it's very smart. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths since it has no conversation history.`, isDefault && @@ -197,11 +197,11 @@ ${buildArray( please implement [a complex new feature] -[ You spawn 3 file-pickers and a docs researcher in parallel to find relevant files and do research online. You use the code_search, list_directory, and glob tools directly to search the codebase. ] +[ You spawn 3 file-pickers, 2 code-searchers, and a docs researcher in parallel to find relevant files and do research online. You use the list_directory and glob tools directly to search the codebase. ] [ You read a few of the relevant files using the read_files tool in two separate tool calls ] -[ You use code_search and glob tools, and spawn another file-picker to find more relevant files ] +[ You spawn another file-picker and code-searcher to find more relevant files, and use glob tools ] [ You read a few other relevant files using the read_files tool ]${!noAskUser ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]` @@ -300,7 +300,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} } } -const EXPLORE_PROMPT = `- Iteratively spawn file pickers, bashers, and web/docs researchers to gather context as needed. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` +const EXPLORE_PROMPT = `- Iteratively spawn file pickers, code searchers, bashers, and web/docs researchers to gather context as needed. Use the list_directory and glob tools directly for searching and exploring the codebase. The file-picker and code-searcher agents are very useful to find relevant files -- try spawning multiple in parallel (say, 2-5 file-pickers and 1-3 code-searchers) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` function buildImplementationInstructionsPrompt({ isSonnet, diff --git a/agents/file-explorer/code-searcher.ts b/agents/file-explorer/code-searcher.ts index 5204ebde3..43fee7795 100644 --- a/agents/file-explorer/code-searcher.ts +++ b/agents/file-explorer/code-searcher.ts @@ -49,7 +49,7 @@ const codeSearcher: SecretAgentDefinition = { id: 'code-searcher', displayName: 'Code Searcher', spawnerPrompt: - 'Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns up to 250 results across all source files, showing each line that matches the search pattern. Excludes git-ignored files.', + `Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns up to 250 results across all source files, showing each line that matches the search pattern. Excludes git-ignored files. You MUST pass searchQueries in params. Example input: { "params": { "searchQueries": [{ "pattern": "createUser", "flags": "-g *.ts" }, { "pattern": "deleteUser", "flags": "-g *.ts" }, { "pattern": "UserSchema", "maxResults": 5 }] } }`, model: 'anthropic/claude-sonnet-4.5', publisher, includeMessageHistory: false, From 98d9a7aa820bf0c58d38d55d2a52849906f90c4e Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 20:46:58 -0700 Subject: [PATCH 11/18] Add optional schema for subagents' params to help models with tool calls --- common/src/tools/params/tool/spawn-agents.ts | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/common/src/tools/params/tool/spawn-agents.ts b/common/src/tools/params/tool/spawn-agents.ts index c91e2e3e9..fe88beaa0 100644 --- a/common/src/tools/params/tool/spawn-agents.ts +++ b/common/src/tools/params/tool/spawn-agents.ts @@ -23,9 +23,25 @@ const inputSchema = z agent_type: z.string().describe('Agent to spawn'), prompt: z.string().optional().describe('Prompt to send to the agent'), params: z - .record(z.string(), z.any()) + .object({ + // Common agent fields (all optional hints — each agent validates its own required fields) + command: z.string().optional().describe('Terminal command to run (basher, tmux-cli)'), + what_to_summarize: z.string().optional().describe('What information from the command output is desired (basher)'), + timeout_seconds: z.number().optional().describe('Timeout for command. Set to -1 for no timeout. Default 30 (basher)'), + searchQueries: z.array(z.object({ + pattern: z.string().describe('The pattern to search for'), + flags: z.string().optional().describe('Optional ripgrep flags (e.g., "-i", "-g *.ts")'), + cwd: z.string().optional().describe('Optional working directory relative to project root'), + maxResults: z.number().optional().describe('Max results per file. Default 15'), + })).optional().describe('Array of code search queries (code-searcher)'), + filePaths: z.array(z.string()).optional().describe('Relevant file paths to read (opus-agent, gpt-5-agent, thinker-with-files-gemini)'), + directories: z.array(z.string()).optional().describe('Directories to search within (file-picker)'), + url: z.string().optional().describe('Starting URL to navigate to (browser-use)'), + prompts: z.array(z.string()).optional().describe('Array of strategy prompts (editor-multi-prompt, code-reviewer-multi-prompt)'), + }) + .catchall(z.any()) .optional() - .describe('Parameters object for the agent (if any)'), + .describe('Parameters object for the agent'), }) .array(), ), From 5e690b7d098e57dee266ef55a2f9fbe8e5843a75 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 12 Apr 2026 03:52:51 +0000 Subject: [PATCH 12/18] Bump version to 1.0.640 --- cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cli/release/package.json b/cli/release/package.json index 22c99696d..5ccbe9c04 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.639", + "version": "1.0.640", "description": "AI coding agent", "license": "MIT", "bin": { From ae0f568b0bdb9812387d6461aa33a0a909e410a5 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 12 Apr 2026 03:55:33 +0000 Subject: [PATCH 13/18] Bump Freebuff version to 0.0.31 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 28f0c0416..71efc9a4f 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.30", + "version": "0.0.31", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From caf29efc42d7b4ef2ab4496047db3982ab70682d Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 16:15:40 -0700 Subject: [PATCH 14/18] buffbench single eval --- evals/buffbench/main-single-eval.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/evals/buffbench/main-single-eval.ts b/evals/buffbench/main-single-eval.ts index bae330cdc..6eceac7a5 100644 --- a/evals/buffbench/main-single-eval.ts +++ b/evals/buffbench/main-single-eval.ts @@ -7,8 +7,8 @@ async function main() { await runBuffBench({ evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')], - agents: ['base2'], - taskIds: ['filter-system-history'], + agents: ['base2-free-evals'], + taskIds: ['server-agent-validation'], saveTraces, }) From ca63e1723ac1820ade1a917c55cb8108840d7e97 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 22:17:27 -0700 Subject: [PATCH 15/18] Free Glm 5.1!!! (#499) --- agents/__tests__/editor.test.ts | 20 ++--- agents/base2/base2.ts | 2 +- agents/editor/editor-lite.ts | 2 +- agents/editor/editor.ts | 8 +- agents/reviewer/code-reviewer-lite.ts | 2 +- agents/types/agent-definition.ts | 1 + cli/src/components/choice-ad-banner.tsx | 4 +- cli/src/utils/create-run-config.ts | 4 +- common/src/constants/agents.ts | 2 +- common/src/constants/free-agents.ts | 6 +- .../types/agent-definition.ts | 1 + scripts/test-fireworks-long.ts | 75 ++++++++++++++++--- web/src/llm-api/fireworks.ts | 42 ++++++++--- 13 files changed, 123 insertions(+), 46 deletions(-) diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 8a6b65760..9e14909f8 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -62,9 +62,9 @@ describe('editor agent', () => { expect(gpt5Editor.model).toBe('openai/gpt-5.1') }) - test('creates minimax editor', () => { - const minimaxEditor = createCodeEditor({ model: 'minimax' }) - expect(minimaxEditor.model).toBe('minimax/minimax-m2.5') + test('creates glm editor', () => { + const glmEditor = createCodeEditor({ model: 'glm' }) + expect(glmEditor.model).toBe('z-ai/glm-5.1') }) test('gpt-5 editor does not include think tags in instructions', () => { @@ -74,9 +74,9 @@ describe('editor agent', () => { }) test('glm editor does not include think tags in instructions', () => { - const minimaxEditor = createCodeEditor({ model: 'minimax' }) - expect(minimaxEditor.instructionsPrompt).not.toContain('') - expect(minimaxEditor.instructionsPrompt).not.toContain('') + const glmEditor = createCodeEditor({ model: 'glm' }) + expect(glmEditor.instructionsPrompt).not.toContain('') + expect(glmEditor.instructionsPrompt).not.toContain('') }) test('opus editor includes think tags in instructions', () => { @@ -88,17 +88,17 @@ describe('editor agent', () => { test('all variants have same base properties', () => { const opusEditor = createCodeEditor({ model: 'opus' }) const gpt5Editor = createCodeEditor({ model: 'gpt-5' }) - const minimaxEditor = createCodeEditor({ model: 'minimax' }) + const glmEditor = createCodeEditor({ model: 'glm' }) // All should have same basic structure expect(opusEditor.displayName).toBe(gpt5Editor.displayName) - expect(gpt5Editor.displayName).toBe(minimaxEditor.displayName) + expect(gpt5Editor.displayName).toBe(glmEditor.displayName) expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode) - expect(gpt5Editor.outputMode).toBe(minimaxEditor.outputMode) + expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode) expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames) - expect(gpt5Editor.toolNames).toEqual(minimaxEditor.toolNames) + expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames) }) }) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index d2ff6c757..22a58d82a 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -28,7 +28,7 @@ export function createBase2( return { publisher, - model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', + model: isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { data_collection: 'deny', } : { diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts index 9cb5675b5..29225f0c2 100644 --- a/agents/editor/editor-lite.ts +++ b/agents/editor/editor-lite.ts @@ -3,7 +3,7 @@ import { createCodeEditor } from './editor' import type { AgentDefinition } from '../types/agent-definition' const definition: AgentDefinition = { - ...createCodeEditor({ model: 'minimax' }), + ...createCodeEditor({ model: 'glm' }), id: 'editor-lite', } export default definition diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index 6beb22d22..e191609ad 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -4,7 +4,7 @@ import { publisher } from '../constants' import type { AgentDefinition } from '../types/agent-definition' export const createCodeEditor = (options: { - model: 'gpt-5' | 'opus' | 'minimax' + model: 'gpt-5' | 'opus' | 'glm' }): Omit => { const { model } = options return { @@ -12,8 +12,8 @@ export const createCodeEditor = (options: { model: options.model === 'gpt-5' ? 'openai/gpt-5.1' - : options.model === 'minimax' - ? 'minimax/minimax-m2.5' + : options.model === 'glm' + ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6', ...(options.model === 'opus' && { providerOptions: { @@ -65,7 +65,7 @@ OR for new files or major rewrites: } -${model === 'gpt-5' || model === 'minimax' +${model === 'gpt-5' || model === 'glm' ? '' : `Before you start writing your implementation, you should use tags to think about the best way to implement the changes. diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts index f1baa7dff..feafb87c4 100644 --- a/agents/reviewer/code-reviewer-lite.ts +++ b/agents/reviewer/code-reviewer-lite.ts @@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer' const definition: SecretAgentDefinition = { id: 'code-reviewer-lite', publisher, - ...createReviewer('minimax/minimax-m2.5'), + ...createReviewer('z-ai/glm-5.1'), } export default definition diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index abbcbc0cd..522994ac2 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -424,6 +424,7 @@ export type ModelName = | 'moonshotai/kimi-k2.5' | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' + | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' | 'z-ai/glm-4.6:nitro' | 'z-ai/glm-4.7' diff --git a/cli/src/components/choice-ad-banner.tsx b/cli/src/components/choice-ad-banner.tsx index 5a72e89ab..7ca3f1d4a 100644 --- a/cli/src/components/choice-ad-banner.tsx +++ b/cli/src/components/choice-ad-banner.tsx @@ -70,7 +70,7 @@ export const ChoiceAdBanner: React.FC = ({ ads, onImpressio } }, [visibleAds, onImpression]) - const hoverBorderColor = theme.link + const hoverBorderColor = theme.primary return ( = ({ ads, onImpressio diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index 3055f4e2c..c68535d78 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -1,5 +1,7 @@ import path from 'path' +import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents' + import { createEventHandler, createStreamChunkHandler, @@ -109,7 +111,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { content, previousRun: previousRunState ?? undefined, agentDefinitions, - maxAgentSteps: 100, + maxAgentSteps: MAX_AGENT_STEPS_DEFAULT, handleStreamChunk: createStreamChunkHandler(eventHandlerState), handleEvent: createEventHandler(eventHandlerState), signal: params.signal, diff --git a/common/src/constants/agents.ts b/common/src/constants/agents.ts index 01b92e37d..5737b7761 100644 --- a/common/src/constants/agents.ts +++ b/common/src/constants/agents.ts @@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce( {} as Record, ) -export const MAX_AGENT_STEPS_DEFAULT = 100 +export const MAX_AGENT_STEPS_DEFAULT = 200 diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index e56e3fb58..3a9f5c916 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -18,7 +18,7 @@ export const FREE_COST_MODE = 'free' as const */ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator - 'base2-free': new Set(['minimax/minimax-m2.5']), + 'base2-free': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // File exploration agents 'file-picker': new Set(['google/gemini-2.5-flash-lite']), @@ -33,10 +33,10 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'basher': new Set(['google/gemini-3.1-flash-lite-preview']), // Editor for free mode - 'editor-lite': new Set(['minimax/minimax-m2.5']), + 'editor-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // Code reviewer for free mode - 'code-reviewer-lite': new Set(['minimax/minimax-m2.5']), + 'code-reviewer-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // Thinker for free mode 'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']), diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index abbcbc0cd..522994ac2 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -424,6 +424,7 @@ export type ModelName = | 'moonshotai/kimi-k2.5' | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' + | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' | 'z-ai/glm-4.6:nitro' | 'z-ai/glm-4.7' diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts index 58a4cb099..ad01abac6 100644 --- a/scripts/test-fireworks-long.ts +++ b/scripts/test-fireworks-long.ts @@ -7,19 +7,70 @@ * to measure how well Fireworks caches the shared prefix across turns. * * Usage: - * bun scripts/test-fireworks-long.ts + * bun scripts/test-fireworks-long.ts [model] [--deployment] + * + * Models: + * glm-5.1 (default) — z-ai/glm-5.1 + * minimax — minimax/minimax-m2.5 + * + * Flags: + * --deployment Use custom deployment instead of serverless (standard API) + * Serverless is the default */ export { } const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' -const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9' -// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5' -// Pricing constants — https://fireworks.ai/pricing -const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +type ModelConfig = { + id: string // OpenRouter-style ID (for display) + standardModel: string // Fireworks standard API model ID + deploymentModel: string // Fireworks custom deployment model ID + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const MODEL_CONFIGS: Record = { + 'glm-5.1': { + id: 'z-ai/glm-5.1', + standardModel: 'accounts/fireworks/models/glm-5p1', + deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea', + inputCostPerToken: 1.40 / 1_000_000, + cachedInputCostPerToken: 0.26 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, + minimax: { + id: 'minimax/minimax-m2.5', + standardModel: 'accounts/fireworks/models/minimax-m2p5', + deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9', + inputCostPerToken: 0.30 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + }, +} + +const DEFAULT_MODEL = 'glm-5.1' + +function getModelConfig(modelArg?: string): ModelConfig { + const key = modelArg ?? DEFAULT_MODEL + const config = MODEL_CONFIGS[key] + if (!config) { + console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`) + process.exit(1) + } + return config +} + +const USE_DEPLOYMENT = process.argv.includes('--deployment') +const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long') +const MODEL = getModelConfig(modelArg) + +// Default to serverless (standard API); use --deployment for custom deployment +const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel +const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken +const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken +const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken const MAX_TOKENS = 100 @@ -39,9 +90,9 @@ function computeCost(usage: Record): { cost: number; breakdown: const totalCost = inputCost + cachedCost + outputCost const breakdown = [ - `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`, - `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`, - `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`, + `${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`, + `${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`, + `${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`, `Total: $${totalCost.toFixed(8)}`, ].join('\n ') @@ -270,11 +321,11 @@ async function main() { console.log('🧪 Fireworks 10-Turn Conversation Caching Test') console.log('='.repeat(60)) - console.log(`Model: ${FIREWORKS_MODEL}`) + console.log(`Model: ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`) console.log(`Base URL: ${FIREWORKS_BASE_URL}`) console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`) console.log(`Turns: ${TURN_PROMPTS.length}`) - console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`) + console.log(`Pricing: $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`) console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`) console.log('='.repeat(60)) console.log() diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 10f4bb22d..c377caaf5 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -29,6 +29,7 @@ const fireworksAgent = new Agent({ /** Map from OpenRouter model IDs to Fireworks standard API model IDs */ const FIREWORKS_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', + 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1', } /** Flag to enable custom Fireworks deployments (set to false to use global API only) */ @@ -37,6 +38,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true /** Custom deployment IDs for models with dedicated Fireworks deployments */ const FIREWORKS_DEPLOYMENT_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', + 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', } /** Check if current time is within deployment hours (10am–8pm ET) */ @@ -137,12 +139,31 @@ function createFireworksRequest(params: { }) } -// Fireworks per-token pricing (dollars per token) -const FIREWORKS_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const FIREWORKS_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const FIREWORKS_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +// Fireworks per-token pricing (dollars per token), keyed by OpenRouter model ID +interface FireworksPricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const FIREWORKS_PRICING_MAP: Record = { + 'minimax/minimax-m2.5': { + inputCostPerToken: 0.30 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + }, + 'z-ai/glm-5.1': { + inputCostPerToken: 1.40 / 1_000_000, + cachedInputCostPerToken: 0.26 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, +} + +function getFireworksPricing(model: string): FireworksPricing { + return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1'] +} -function extractUsageAndCost(usage: Record | undefined | null): UsageData { +function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null const completionDetails = usage.completion_tokens_details as Record | undefined | null @@ -153,11 +174,12 @@ function extractUsageAndCost(usage: Record | undefined | null): const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 // Fireworks doesn't return cost — compute from token counts and known pricing + const pricing = getFireworksPricing(model) const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * FIREWORKS_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * FIREWORKS_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * FIREWORKS_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * pricing.inputCostPerToken + + cacheReadInputTokens * pricing.cachedInputCostPerToken + + outputTokens * pricing.outputCostPerToken return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } @@ -192,7 +214,7 @@ export async function handleFireworksNonStream({ const data = await response.json() const content = data.choices?.[0]?.message?.content ?? '' const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' - const usageData = extractUsageAndCost(data.usage) + const usageData = extractUsageAndCost(data.usage, originalModel) insertMessageToBigQuery({ messageId: data.id, @@ -493,7 +515,7 @@ async function handleResponse({ return { state } } - const usageData = extractUsageAndCost(data.usage as Record) + const usageData = extractUsageAndCost(data.usage as Record, originalModel) const messageId = typeof data.id === 'string' ? data.id : 'unknown' insertMessageToBigQuery({ From 0379424b86b0deb68317ddd17b844737c6bb2ef3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" Date: Sun, 12 Apr 2026 05:39:25 +0000 Subject: [PATCH 16/18] Bump Freebuff version to 0.0.32 --- freebuff/cli/release/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 71efc9a4f..f4eed9d22 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.31", + "version": "0.0.32", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { From 2c6978e1486f48f57d2a89330e0a3c00cab08529 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 22:59:13 -0700 Subject: [PATCH 17/18] Disable minimax deployment --- web/src/llm-api/fireworks.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index c377caaf5..d9825930c 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -37,7 +37,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true /** Custom deployment IDs for models with dedicated Fireworks deployments */ const FIREWORKS_DEPLOYMENT_MAP: Record = { - 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', + // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', } From ff4deec22848c2ad99968d91d97626f8752824b9 Mon Sep 17 00:00:00 2001 From: James Grugett Date: Sat, 11 Apr 2026 23:07:21 -0700 Subject: [PATCH 18/18] Fix: Escape on ask_user form now interrupts assistant stream Previously, pressing Escape to skip an ask_user question only dismissed the form but the assistant kept running. Now Escape both skips the question and aborts the assistant stream, returning the user to the input box. --- cli/src/chat.tsx | 1 + cli/src/components/chat-input-bar.tsx | 3 +++ 2 files changed, 4 insertions(+) diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index 22422e191..1f65a51e4 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -1525,6 +1525,7 @@ export const Chat = ({ }, cwd: getProjectRoot() ?? process.cwd(), })} + onInterruptStream={chatKeyboardHandlers.onInterruptStream} /> )} diff --git a/cli/src/components/chat-input-bar.tsx b/cli/src/components/chat-input-bar.tsx index aa08b4bfc..5241d558f 100644 --- a/cli/src/components/chat-input-bar.tsx +++ b/cli/src/components/chat-input-bar.tsx @@ -71,6 +71,7 @@ interface ChatInputBarProps { // Handlers handleSubmit: () => Promise onPaste: (fallbackText?: string) => void + onInterruptStream: () => void } export const ChatInputBar = ({ @@ -108,6 +109,7 @@ export const ChatInputBar = ({ handlePublish, handleSubmit, onPaste, + onInterruptStream, }: ChatInputBarProps) => { const inputMode = useChatStore((state) => state.inputMode) const setInputMode = useChatStore((state) => state.setInputMode) @@ -290,6 +292,7 @@ export const ChatInputBar = ({ const handleFormSkip = () => { if (!askUserState) return skip() + onInterruptStream() } const effectivePlaceholder =