diff --git a/agents/__tests__/editor.test.ts b/agents/__tests__/editor.test.ts index 8a6b65760d..9e14909f89 100644 --- a/agents/__tests__/editor.test.ts +++ b/agents/__tests__/editor.test.ts @@ -62,9 +62,9 @@ describe('editor agent', () => { expect(gpt5Editor.model).toBe('openai/gpt-5.1') }) - test('creates minimax editor', () => { - const minimaxEditor = createCodeEditor({ model: 'minimax' }) - expect(minimaxEditor.model).toBe('minimax/minimax-m2.5') + test('creates glm editor', () => { + const glmEditor = createCodeEditor({ model: 'glm' }) + expect(glmEditor.model).toBe('z-ai/glm-5.1') }) test('gpt-5 editor does not include think tags in instructions', () => { @@ -74,9 +74,9 @@ describe('editor agent', () => { }) test('glm editor does not include think tags in instructions', () => { - const minimaxEditor = createCodeEditor({ model: 'minimax' }) - expect(minimaxEditor.instructionsPrompt).not.toContain('') - expect(minimaxEditor.instructionsPrompt).not.toContain('') + const glmEditor = createCodeEditor({ model: 'glm' }) + expect(glmEditor.instructionsPrompt).not.toContain('') + expect(glmEditor.instructionsPrompt).not.toContain('') }) test('opus editor includes think tags in instructions', () => { @@ -88,17 +88,17 @@ describe('editor agent', () => { test('all variants have same base properties', () => { const opusEditor = createCodeEditor({ model: 'opus' }) const gpt5Editor = createCodeEditor({ model: 'gpt-5' }) - const minimaxEditor = createCodeEditor({ model: 'minimax' }) + const glmEditor = createCodeEditor({ model: 'glm' }) // All should have same basic structure expect(opusEditor.displayName).toBe(gpt5Editor.displayName) - expect(gpt5Editor.displayName).toBe(minimaxEditor.displayName) + expect(gpt5Editor.displayName).toBe(glmEditor.displayName) expect(opusEditor.outputMode).toBe(gpt5Editor.outputMode) - expect(gpt5Editor.outputMode).toBe(minimaxEditor.outputMode) + expect(gpt5Editor.outputMode).toBe(glmEditor.outputMode) expect(opusEditor.toolNames).toEqual(gpt5Editor.toolNames) - expect(gpt5Editor.toolNames).toEqual(minimaxEditor.toolNames) + expect(gpt5Editor.toolNames).toEqual(glmEditor.toolNames) }) }) diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts index 3d504edfb2..22a58d82a9 100644 --- a/agents/base2/base2.ts +++ b/agents/base2/base2.ts @@ -28,7 +28,7 @@ export function createBase2( return { publisher, - model: isFree ? 'minimax/minimax-m2.5' : 'anthropic/claude-opus-4.6', + model: isFree ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6', providerOptions: isFree ? { data_collection: 'deny', } : { @@ -58,7 +58,7 @@ export function createBase2( 'spawn_agents', 'read_files', 'read_subtree', - !isFast && !isFree && 'write_todos', + !isFast && 'write_todos', !isFast && !noAskUser && 'suggest_followups', 'str_replace', 'write_file', @@ -67,13 +67,13 @@ export function createBase2( !noAskUser && 'ask_user', 'skill', 'set_output', - 'code_search', 'list_directory', 'glob', ), spawnableAgents: buildArray( !isMax && 'file-picker', isMax && 'file-picker-max', + 'code-searcher', 'researcher-web', 'researcher-docs', 'basher', @@ -124,15 +124,15 @@ export function createBase2( - Add thoughtful details like hover states, transitions, and micro-interactions - Apply design principles: hierarchy, contrast, balance, and movement - Create an impressive demonstration showcasing web development capabilities -- **Refactoring Awareness:** Whenever you modify an exported symbol like a function or class or variable, you should find and update all the references to it appropriately using the code_search tool. +- **Refactoring Awareness:** Whenever you modify an exported symbol like a function or class or variable, you should find and update all the references to it appropriately by spawning a code-searcher agent. - **Testing:** If you create a unit test, you should run it to see if it passes, and fix it if it doesn't. - **Package Management:** When adding new packages, use the basher agent to install the package rather than editing the package.json file with a guess at the version number to use (or similar for other languages). This way, you will be sure to have the latest version of the package. Do not install packages globally unless asked by the user (e.g. Don't run \`npm install -g \`). Always try to use the package manager associated with the project (e.g. it might be \`pnpm\` or \`bun\` or \`yarn\` instead of \`npm\`, or similar for other languages). - **Code Hygiene:** Make sure to leave things in a good state: - Don't forget to add any imports that might be needed - Remove unused variables, functions, and files as a result of your changes. - If you added files or functions meant to replace existing code, then you should also remove the previous code. -- **Minimal new code comments:** Do not add many new comments while writing code, unless they were preexisting comments (keep those!) or unless the user asks you to add comments! -- **Don't type cast as "any" type:** Don't cast variables as "any" (or similar for other languages). This is a bad practice as it leads to bugs. The code is more robust when every expression is typed. +- **Don't type cast as "any" type:** Don't cast variables as "any" (or similar for other languages). This is a bad practice as it leads to bugs. Exception: when the value can truly be any type. +- **Prefer str_replace to write_file:** str_replace is more efficient for targeted changes and gives more feedback. Only use write_file for new files or when necessary to rewrite the entire file. # Spawning agents guidelines @@ -141,9 +141,9 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u - **Spawn multiple agents in parallel:** This increases the speed of your response **and** allows you to be more comprehensive by spawning more total agents to synthesize the best response. - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. ${buildArray( - '- Spawn context-gathering agents (file pickers and web/docs researchers) before making edits. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase.', + '- Spawn context-gathering agents (file pickers, code searchers, and web/docs researchers) before making edits. Use the list_directory and glob tools directly for searching and exploring the codebase.', isFree && 'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.', - isFree && 'You must spawn the thinker-with-files-gemini agent to think through and plan the reponse to most requests, unless the request is trivial. This agent is extremely useful as it is very smart! You must pass the relevant filePaths when spawning it, since it does not have access to the conversation history.', + isFree && `Spawn the thinker-with-files-gemini agent for complex problems — it's very smart. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths since it has no conversation history.`, isDefault && '- Spawn the editor agent to implement the changes after you have gathered all the context you need.', (isDefault || isMax) && @@ -197,16 +197,16 @@ ${buildArray( please implement [a complex new feature] -[ You spawn 3 file-pickers and a docs researcher in parallel to find relevant files and do research online. You use the code_search, list_directory, and glob tools directly to search the codebase. ] +[ You spawn 3 file-pickers, 2 code-searchers, and a docs researcher in parallel to find relevant files and do research online. You use the list_directory and glob tools directly to search the codebase. ] [ You read a few of the relevant files using the read_files tool in two separate tool calls ] -[ You use code_search and glob tools, and spawn another file-picker to find more relevant files ] +[ You spawn another file-picker and code-searcher to find more relevant files, and use glob tools ] [ You read a few other relevant files using the read_files tool ]${!noAskUser ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]` : '' - }${isFree ? `\n\n[ You spawn the thinker-with-files-gemini agent with the relevant filePaths to plan the best response ]` : ''} + } ${isDefault ? `[ You implement the changes using the editor agent ]` : isFast || isFree @@ -217,7 +217,7 @@ ${isDefault ${isDefault ? `[ You spawn a code-reviewer, a basher to typecheck the changes, and another basher to run tests, all in parallel ]` : isFree - ? `[ You spawn a code-reviewer-lite to review the changes, and a basher to typecheck the changes, and another basher to run tests, all in parallel ]` + ? `[ You spawn a code-reviewer-lite to review the changes, a basher to typecheck the local changes, a basher to typecheck the whole project, and another basher to run tests, all in parallel ]` : isMax ? `[ You spawn a basher to typecheck the changes, and another basher to run tests, in parallel. Then, you spawn a code-reviewer-multi-prompt to review the changes. ]` : '[ You spawn a basher to typecheck the changes and another basher to run tests, all in parallel ]' @@ -300,7 +300,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT} } } -const EXPLORE_PROMPT = `- Iteratively spawn file pickers, bashers, and web/docs researchers to gather context as needed. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase. The file-picker agent in particular is very useful to find relevant files -- try spawning multiple in parallel (say, 2-5) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` +const EXPLORE_PROMPT = `- Iteratively spawn file pickers, code searchers, bashers, and web/docs researchers to gather context as needed. Use the list_directory and glob tools directly for searching and exploring the codebase. The file-picker and code-searcher agents are very useful to find relevant files -- try spawning multiple in parallel (say, 2-5 file-pickers and 1-3 code-searchers) to explore different parts of the codebase. Use read_subtree if you need to grok a particular part of the codebase. Read all the relevant files using the read_files tool.` function buildImplementationInstructionsPrompt({ isSonnet, @@ -331,10 +331,10 @@ ${buildArray( `- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`, !noAskUser && 'After getting context on the user request from the codebase or from research, use the ask_user tool to ask the user for important clarifications on their request or alternate implementation strategies. You should skip this step if the choice is obvious -- only ask the user if you need their help making the best choice.', - (isDefault || isMax) && + (isDefault || isMax || isFree) && `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`, isFree && - `- For most requests, spawn the thinker-with-files-gemini agent to think through and plan the best response. This agent is extremely useful as it is very smart. You must take advantage of it and spawn it about once per user request. Gather all the necessary context *before* spawning it, and pass the relevant filePaths since it does not have access to the conversation history.`, + `- For complex problems, spawn the thinker-with-files-gemini agent after gathering context. Skip it for routine edits and clearly-scoped changes. Pass the relevant filePaths.`, (isDefault || isMax) && `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`, isDefault && @@ -380,7 +380,7 @@ function buildImplementationStepPrompt({ `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`, 'You must use the skill tool to load any potentially relevant skills.', isFree && - `You must spawn the thinker-with-files-gemini agent once per user request to plan the best response. Pass the relevant filePaths since it does not have access to the conversation history.`, + `Spawn the thinker-with-files-gemini agent for complex problems, not routine edits. Pass the relevant filePaths.`, isMax && `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`, (isDefault || isMax) && diff --git a/agents/basher.ts b/agents/basher.ts index dc9dc689d1..259d8fcbf0 100644 --- a/agents/basher.ts +++ b/agents/basher.ts @@ -11,30 +11,25 @@ const basher: AgentDefinition = { model: 'google/gemini-3.1-flash-lite-preview', displayName: 'Basher', spawnerPrompt: - 'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. Requires both a shell command and a prompt.', + 'Runs a single terminal command and (recommended) describes its output using an LLM using the what_to_summarize field. A lightweight shell command executor. Every basher spawn MUST include params: { command: "" }.', inputSchema: { - prompt: { - type: 'string', - description: - 'What information from the command output is desired. Be specific about what to look for or extract.', - }, params: { type: 'object', properties: { command: { type: 'string', - description: 'Terminal command to run in bash shell', + description: 'The terminal command to run in bash shell. Don\'t forget this field!', + }, + what_to_summarize: { + type: 'string', + description: + 'What information from the command output is desired. Be specific about what to look for or extract. This is optional, and if not provided, the basher will return the full command output without summarization.', }, timeout_seconds: { type: 'number', description: 'Set to -1 for no timeout. Default 30', }, - rawOutput: { - type: 'boolean', - description: - 'If true, returns the full command output without summarization. Defaults to false.', - }, }, required: ['command'], }, @@ -73,7 +68,7 @@ Do not use any tools! Only analyze the output of the command.`, } const timeout_seconds = params?.timeout_seconds as number | undefined - const rawOutput = params?.rawOutput as boolean | undefined + const what_to_summarize = params?.what_to_summarize as string | undefined // Run the command const { toolResult } = yield { @@ -84,7 +79,7 @@ Do not use any tools! Only analyze the output of the command.`, }, } - if (rawOutput) { + if (!what_to_summarize) { // Return the raw command output without summarization const result = toolResult?.[0] // Only return object values (command output objects), not plain strings diff --git a/agents/editor/editor-lite.ts b/agents/editor/editor-lite.ts index 9cb5675b5e..29225f0c29 100644 --- a/agents/editor/editor-lite.ts +++ b/agents/editor/editor-lite.ts @@ -3,7 +3,7 @@ import { createCodeEditor } from './editor' import type { AgentDefinition } from '../types/agent-definition' const definition: AgentDefinition = { - ...createCodeEditor({ model: 'minimax' }), + ...createCodeEditor({ model: 'glm' }), id: 'editor-lite', } export default definition diff --git a/agents/editor/editor.ts b/agents/editor/editor.ts index 6beb22d221..e191609ad2 100644 --- a/agents/editor/editor.ts +++ b/agents/editor/editor.ts @@ -4,7 +4,7 @@ import { publisher } from '../constants' import type { AgentDefinition } from '../types/agent-definition' export const createCodeEditor = (options: { - model: 'gpt-5' | 'opus' | 'minimax' + model: 'gpt-5' | 'opus' | 'glm' }): Omit => { const { model } = options return { @@ -12,8 +12,8 @@ export const createCodeEditor = (options: { model: options.model === 'gpt-5' ? 'openai/gpt-5.1' - : options.model === 'minimax' - ? 'minimax/minimax-m2.5' + : options.model === 'glm' + ? 'z-ai/glm-5.1' : 'anthropic/claude-opus-4.6', ...(options.model === 'opus' && { providerOptions: { @@ -65,7 +65,7 @@ OR for new files or major rewrites: } -${model === 'gpt-5' || model === 'minimax' +${model === 'gpt-5' || model === 'glm' ? '' : `Before you start writing your implementation, you should use tags to think about the best way to implement the changes. diff --git a/agents/file-explorer/code-searcher.ts b/agents/file-explorer/code-searcher.ts index 5204ebde3b..43fee77956 100644 --- a/agents/file-explorer/code-searcher.ts +++ b/agents/file-explorer/code-searcher.ts @@ -49,7 +49,7 @@ const codeSearcher: SecretAgentDefinition = { id: 'code-searcher', displayName: 'Code Searcher', spawnerPrompt: - 'Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns up to 250 results across all source files, showing each line that matches the search pattern. Excludes git-ignored files.', + `Mechanically runs multiple code search queries (using ripgrep line-oriented search) and returns up to 250 results across all source files, showing each line that matches the search pattern. Excludes git-ignored files. You MUST pass searchQueries in params. Example input: { "params": { "searchQueries": [{ "pattern": "createUser", "flags": "-g *.ts" }, { "pattern": "deleteUser", "flags": "-g *.ts" }, { "pattern": "UserSchema", "maxResults": 5 }] } }`, model: 'anthropic/claude-sonnet-4.5', publisher, includeMessageHistory: false, diff --git a/agents/reviewer/code-reviewer-lite.ts b/agents/reviewer/code-reviewer-lite.ts index f1baa7dffc..feafb87c45 100644 --- a/agents/reviewer/code-reviewer-lite.ts +++ b/agents/reviewer/code-reviewer-lite.ts @@ -5,7 +5,7 @@ import { createReviewer } from './code-reviewer' const definition: SecretAgentDefinition = { id: 'code-reviewer-lite', publisher, - ...createReviewer('minimax/minimax-m2.5'), + ...createReviewer('z-ai/glm-5.1'), } export default definition diff --git a/agents/thinker/thinker-with-files-gemini.ts b/agents/thinker/thinker-with-files-gemini.ts index 0f9ec5ad33..364dcca96c 100644 --- a/agents/thinker/thinker-with-files-gemini.ts +++ b/agents/thinker/thinker-with-files-gemini.ts @@ -8,7 +8,7 @@ const definition: SecretAgentDefinition = { model: 'google/gemini-3.1-pro-preview', displayName: 'Theo the Theorizer with Files (Gemini)', reasoningOptions: { - effort: 'low', + effort: 'medium', }, spawnerPrompt: 'Does deep thinking given the prompt and provided files using Gemini. Use this to help you solve a specific problem. This agent has no context on the conversation history so it cannot see files you have read or previous discussion. Instead, you must provide all the relevant context via the prompt or filePaths for this agent to work well.', diff --git a/agents/types/agent-definition.ts b/agents/types/agent-definition.ts index abbcbc0cda..522994ac27 100644 --- a/agents/types/agent-definition.ts +++ b/agents/types/agent-definition.ts @@ -424,6 +424,7 @@ export type ModelName = | 'moonshotai/kimi-k2.5' | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' + | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' | 'z-ai/glm-4.6:nitro' | 'z-ai/glm-4.7' diff --git a/cli/release/package.json b/cli/release/package.json index 22c99696d8..5ccbe9c048 100644 --- a/cli/release/package.json +++ b/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "codebuff", - "version": "1.0.639", + "version": "1.0.640", "description": "AI coding agent", "license": "MIT", "bin": { diff --git a/cli/src/chat.tsx b/cli/src/chat.tsx index 22422e1918..1f65a51e4e 100644 --- a/cli/src/chat.tsx +++ b/cli/src/chat.tsx @@ -1525,6 +1525,7 @@ export const Chat = ({ }, cwd: getProjectRoot() ?? process.cwd(), })} + onInterruptStream={chatKeyboardHandlers.onInterruptStream} /> )} diff --git a/cli/src/components/chat-input-bar.tsx b/cli/src/components/chat-input-bar.tsx index aa08b4bfc8..5241d558f2 100644 --- a/cli/src/components/chat-input-bar.tsx +++ b/cli/src/components/chat-input-bar.tsx @@ -71,6 +71,7 @@ interface ChatInputBarProps { // Handlers handleSubmit: () => Promise onPaste: (fallbackText?: string) => void + onInterruptStream: () => void } export const ChatInputBar = ({ @@ -108,6 +109,7 @@ export const ChatInputBar = ({ handlePublish, handleSubmit, onPaste, + onInterruptStream, }: ChatInputBarProps) => { const inputMode = useChatStore((state) => state.inputMode) const setInputMode = useChatStore((state) => state.setInputMode) @@ -290,6 +292,7 @@ export const ChatInputBar = ({ const handleFormSkip = () => { if (!askUserState) return skip() + onInterruptStream() } const effectivePlaceholder = diff --git a/cli/src/components/choice-ad-banner.tsx b/cli/src/components/choice-ad-banner.tsx index 5a72e89ab5..7ca3f1d4ac 100644 --- a/cli/src/components/choice-ad-banner.tsx +++ b/cli/src/components/choice-ad-banner.tsx @@ -70,7 +70,7 @@ export const ChoiceAdBanner: React.FC = ({ ads, onImpressio } }, [visibleAds, onImpression]) - const hoverBorderColor = theme.link + const hoverBorderColor = theme.primary return ( = ({ ads, onImpressio diff --git a/cli/src/utils/create-run-config.ts b/cli/src/utils/create-run-config.ts index 3055f4e2c2..c68535d78d 100644 --- a/cli/src/utils/create-run-config.ts +++ b/cli/src/utils/create-run-config.ts @@ -1,5 +1,7 @@ import path from 'path' +import { MAX_AGENT_STEPS_DEFAULT } from '@codebuff/common/constants/agents' + import { createEventHandler, createStreamChunkHandler, @@ -109,7 +111,7 @@ export const createRunConfig = (params: CreateRunConfigParams) => { content, previousRun: previousRunState ?? undefined, agentDefinitions, - maxAgentSteps: 100, + maxAgentSteps: MAX_AGENT_STEPS_DEFAULT, handleStreamChunk: createStreamChunkHandler(eventHandlerState), handleEvent: createEventHandler(eventHandlerState), signal: params.signal, diff --git a/common/src/constants/agents.ts b/common/src/constants/agents.ts index 01b92e37d4..5737b77614 100644 --- a/common/src/constants/agents.ts +++ b/common/src/constants/agents.ts @@ -92,4 +92,4 @@ export const AGENT_NAME_TO_TYPES = Object.entries(AGENT_NAMES).reduce( {} as Record, ) -export const MAX_AGENT_STEPS_DEFAULT = 100 +export const MAX_AGENT_STEPS_DEFAULT = 200 diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts index e56e3fb58a..3a9f5c9166 100644 --- a/common/src/constants/free-agents.ts +++ b/common/src/constants/free-agents.ts @@ -18,7 +18,7 @@ export const FREE_COST_MODE = 'free' as const */ export const FREE_MODE_AGENT_MODELS: Record> = { // Root orchestrator - 'base2-free': new Set(['minimax/minimax-m2.5']), + 'base2-free': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // File exploration agents 'file-picker': new Set(['google/gemini-2.5-flash-lite']), @@ -33,10 +33,10 @@ export const FREE_MODE_AGENT_MODELS: Record> = { 'basher': new Set(['google/gemini-3.1-flash-lite-preview']), // Editor for free mode - 'editor-lite': new Set(['minimax/minimax-m2.5']), + 'editor-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // Code reviewer for free mode - 'code-reviewer-lite': new Set(['minimax/minimax-m2.5']), + 'code-reviewer-lite': new Set(['minimax/minimax-m2.5', 'z-ai/glm-5.1']), // Thinker for free mode 'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']), diff --git a/common/src/templates/initial-agents-dir/types/agent-definition.ts b/common/src/templates/initial-agents-dir/types/agent-definition.ts index abbcbc0cda..522994ac27 100644 --- a/common/src/templates/initial-agents-dir/types/agent-definition.ts +++ b/common/src/templates/initial-agents-dir/types/agent-definition.ts @@ -424,6 +424,7 @@ export type ModelName = | 'moonshotai/kimi-k2.5' | 'moonshotai/kimi-k2.5:nitro' | 'z-ai/glm-5' + | 'z-ai/glm-5.1' | 'z-ai/glm-4.6' | 'z-ai/glm-4.6:nitro' | 'z-ai/glm-4.7' diff --git a/common/src/tools/params/tool/spawn-agents.ts b/common/src/tools/params/tool/spawn-agents.ts index c91e2e3e9d..fe88beaa07 100644 --- a/common/src/tools/params/tool/spawn-agents.ts +++ b/common/src/tools/params/tool/spawn-agents.ts @@ -23,9 +23,25 @@ const inputSchema = z agent_type: z.string().describe('Agent to spawn'), prompt: z.string().optional().describe('Prompt to send to the agent'), params: z - .record(z.string(), z.any()) + .object({ + // Common agent fields (all optional hints — each agent validates its own required fields) + command: z.string().optional().describe('Terminal command to run (basher, tmux-cli)'), + what_to_summarize: z.string().optional().describe('What information from the command output is desired (basher)'), + timeout_seconds: z.number().optional().describe('Timeout for command. Set to -1 for no timeout. Default 30 (basher)'), + searchQueries: z.array(z.object({ + pattern: z.string().describe('The pattern to search for'), + flags: z.string().optional().describe('Optional ripgrep flags (e.g., "-i", "-g *.ts")'), + cwd: z.string().optional().describe('Optional working directory relative to project root'), + maxResults: z.number().optional().describe('Max results per file. Default 15'), + })).optional().describe('Array of code search queries (code-searcher)'), + filePaths: z.array(z.string()).optional().describe('Relevant file paths to read (opus-agent, gpt-5-agent, thinker-with-files-gemini)'), + directories: z.array(z.string()).optional().describe('Directories to search within (file-picker)'), + url: z.string().optional().describe('Starting URL to navigate to (browser-use)'), + prompts: z.array(z.string()).optional().describe('Array of strategy prompts (editor-multi-prompt, code-reviewer-multi-prompt)'), + }) + .catchall(z.any()) .optional() - .describe('Parameters object for the agent (if any)'), + .describe('Parameters object for the agent'), }) .array(), ), diff --git a/evals/buffbench/main-single-eval.ts b/evals/buffbench/main-single-eval.ts index bae330cdcf..6eceac7a5c 100644 --- a/evals/buffbench/main-single-eval.ts +++ b/evals/buffbench/main-single-eval.ts @@ -7,8 +7,8 @@ async function main() { await runBuffBench({ evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')], - agents: ['base2'], - taskIds: ['filter-system-history'], + agents: ['base2-free-evals'], + taskIds: ['server-agent-validation'], saveTraces, }) diff --git a/freebuff/cli/release/package.json b/freebuff/cli/release/package.json index 28f0c04169..f4eed9d22d 100644 --- a/freebuff/cli/release/package.json +++ b/freebuff/cli/release/package.json @@ -1,6 +1,6 @@ { "name": "freebuff", - "version": "0.0.30", + "version": "0.0.32", "description": "The world's strongest free coding agent", "license": "MIT", "bin": { diff --git a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts index 8dec297118..97c613b86a 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/set-output.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/set-output.ts @@ -1,6 +1,7 @@ import { jsonToolResult } from '@codebuff/common/util/messages' import { getAgentTemplate } from '../../../templates/agent-registry' +import { formatValueForError } from '../../../util/format-value' import type { CodebuffToolHandlerFunction } from '../handler-function-type' import type { @@ -61,7 +62,7 @@ export const handleSetOutput = (async (params: { const prefix = usedData ? 'Output validation error: Your output was found inside the `data` field but still failed validation. Please fix the issues and try again without wrapping in `data`. Issues: ' : 'Output validation error: Output failed to match the output schema and was ignored. You might want to try again! Issues: ' - const errorMessage = `${prefix}${bestError}` + const errorMessage = `${prefix}${bestError}\n\nOriginal output value:\n${formatValueForError(output)}` logger.error( { output, diff --git a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts index 77dac6b366..0f6c3884b6 100644 --- a/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts +++ b/packages/agent-runtime/src/tools/handlers/tool/spawn-agent-utils.ts @@ -5,6 +5,7 @@ import { generateCompactId } from '@codebuff/common/util/string' import { loopAgentSteps } from '../../../run-agent-step' import { getAgentTemplate } from '../../../templates/agent-registry' +import { formatValueForError } from '../../../util/format-value' import { filterUnfinishedToolCalls, withSystemTags, @@ -222,7 +223,7 @@ export function validateAgentInput( const result = inputSchema.prompt.safeParse(prompt ?? '') if (!result.success) { throw new Error( - `Invalid prompt for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}`, + `Invalid prompt for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal prompt value:\n${formatValueForError(prompt ?? '')}`, ) } } @@ -232,7 +233,7 @@ export function validateAgentInput( const result = inputSchema.params.safeParse(params ?? {}) if (!result.success) { throw new Error( - `Invalid params for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}`, + `Invalid params for agent ${agentType}: ${JSON.stringify(result.error.issues, null, 2)}\n\nOriginal params value:\n${formatValueForError(params ?? {})}`, ) } } diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts index 81782c29d5..da0cfbd3b2 100644 --- a/packages/agent-runtime/src/tools/tool-executor.ts +++ b/packages/agent-runtime/src/tools/tool-executor.ts @@ -6,6 +6,7 @@ import { cloneDeep } from 'lodash' import { getMCPToolData } from '../mcp' import { MCP_TOOL_SEPARATOR } from '../mcp-constants' import { getAgentShortName } from '../templates/prompts' +import { formatValueForError } from '../util/format-value' import { codebuffToolHandlers } from './handlers/list' import { getMatchingSpawn, @@ -180,13 +181,10 @@ export async function executeToolCall( } if ('error' in toolCall) { - const inputStr = JSON.stringify(input, null, 2) - const truncatedInput = inputStr.length > 500 - ? inputStr.slice(0, 500) + '...(truncated)' - : inputStr + const formattedInput = formatValueForError(input) onResponseChunk({ type: 'error', - message: `${toolCall.error}\n\nOriginal tool call input:\n${truncatedInput}`, + message: `${toolCall.error}\n\nOriginal tool call input:\n${formattedInput}`, }) logger.debug( { toolCall, error: toolCall.error }, @@ -491,13 +489,10 @@ export async function executeCustomToolCall( } if ('error' in toolCall) { - const inputStr = JSON.stringify(input, null, 2) - const truncatedInput = inputStr.length > 500 - ? inputStr.slice(0, 500) + '...(truncated)' - : inputStr + const formattedInput = formatValueForError(input) onResponseChunk({ type: 'error', - message: `${toolCall.error}\n\nOriginal tool call input:\n${truncatedInput}`, + message: `${toolCall.error}\n\nOriginal tool call input:\n${formattedInput}`, }) logger.debug( { toolCall, error: toolCall.error }, diff --git a/packages/agent-runtime/src/util/format-value.ts b/packages/agent-runtime/src/util/format-value.ts new file mode 100644 index 0000000000..c4bbdccaa8 --- /dev/null +++ b/packages/agent-runtime/src/util/format-value.ts @@ -0,0 +1,10 @@ +export function formatValueForError(value: unknown, maxLength = 500): string { + const jsonStr = JSON.stringify(value, null, 2) ?? 'undefined' + const truncated = jsonStr.length > maxLength + ? jsonStr.slice(0, maxLength) + '...(truncated)' + : jsonStr + if (value === null || value === undefined || typeof value !== 'object') { + return `${truncated} (type: ${value === null ? 'null' : typeof value})` + } + return truncated +} diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts index 58a4cb099f..ad01abac66 100644 --- a/scripts/test-fireworks-long.ts +++ b/scripts/test-fireworks-long.ts @@ -7,19 +7,70 @@ * to measure how well Fireworks caches the shared prefix across turns. * * Usage: - * bun scripts/test-fireworks-long.ts + * bun scripts/test-fireworks-long.ts [model] [--deployment] + * + * Models: + * glm-5.1 (default) — z-ai/glm-5.1 + * minimax — minimax/minimax-m2.5 + * + * Flags: + * --deployment Use custom deployment instead of serverless (standard API) + * Serverless is the default */ export { } const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1' -const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9' -// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5' -// Pricing constants — https://fireworks.ai/pricing -const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +type ModelConfig = { + id: string // OpenRouter-style ID (for display) + standardModel: string // Fireworks standard API model ID + deploymentModel: string // Fireworks custom deployment model ID + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const MODEL_CONFIGS: Record = { + 'glm-5.1': { + id: 'z-ai/glm-5.1', + standardModel: 'accounts/fireworks/models/glm-5p1', + deploymentModel: 'accounts/james-65d217/deployments/mjb4i7ea', + inputCostPerToken: 1.40 / 1_000_000, + cachedInputCostPerToken: 0.26 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, + minimax: { + id: 'minimax/minimax-m2.5', + standardModel: 'accounts/fireworks/models/minimax-m2p5', + deploymentModel: 'accounts/james-65d217/deployments/lnfid5h9', + inputCostPerToken: 0.30 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + }, +} + +const DEFAULT_MODEL = 'glm-5.1' + +function getModelConfig(modelArg?: string): ModelConfig { + const key = modelArg ?? DEFAULT_MODEL + const config = MODEL_CONFIGS[key] + if (!config) { + console.error(`❌ Unknown model: "${key}". Available models: ${Object.keys(MODEL_CONFIGS).join(', ')}`) + process.exit(1) + } + return config +} + +const USE_DEPLOYMENT = process.argv.includes('--deployment') +const modelArg = process.argv.find((a, i) => i > 1 && !a.startsWith('-') && a !== 'long') +const MODEL = getModelConfig(modelArg) + +// Default to serverless (standard API); use --deployment for custom deployment +const FIREWORKS_MODEL = USE_DEPLOYMENT ? MODEL.deploymentModel : MODEL.standardModel +const INPUT_COST_PER_TOKEN = MODEL.inputCostPerToken +const CACHED_INPUT_COST_PER_TOKEN = MODEL.cachedInputCostPerToken +const OUTPUT_COST_PER_TOKEN = MODEL.outputCostPerToken const MAX_TOKENS = 100 @@ -39,9 +90,9 @@ function computeCost(usage: Record): { cost: number; breakdown: const totalCost = inputCost + cachedCost + outputCost const breakdown = [ - `${nonCachedInput} non-cached input × $0.30/M = $${inputCost.toFixed(8)}`, - `${cachedTokens} cached input × $0.03/M = $${cachedCost.toFixed(8)}`, - `${outputTokens} output × $1.20/M = $${outputCost.toFixed(8)}`, + `${nonCachedInput} non-cached input × $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${inputCost.toFixed(8)}`, + `${cachedTokens} cached input × $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${cachedCost.toFixed(8)}`, + `${outputTokens} output × $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M = $${outputCost.toFixed(8)}`, `Total: $${totalCost.toFixed(8)}`, ].join('\n ') @@ -270,11 +321,11 @@ async function main() { console.log('🧪 Fireworks 10-Turn Conversation Caching Test') console.log('='.repeat(60)) - console.log(`Model: ${FIREWORKS_MODEL}`) + console.log(`Model: ${MODEL.id} (${FIREWORKS_MODEL}) [${USE_DEPLOYMENT ? 'deployment' : 'serverless'}]`) console.log(`Base URL: ${FIREWORKS_BASE_URL}`) console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`) console.log(`Turns: ${TURN_PROMPTS.length}`) - console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`) + console.log(`Pricing: $${(INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M input, $${(CACHED_INPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M cached, $${(OUTPUT_COST_PER_TOKEN * 1_000_000).toFixed(2)}/M output`) console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`) console.log('='.repeat(60)) console.log() diff --git a/sdk/src/__tests__/read-files.test.ts b/sdk/src/__tests__/read-files.test.ts index 547bbfaa45..9656622865 100644 --- a/sdk/src/__tests__/read-files.test.ts +++ b/sdk/src/__tests__/read-files.test.ts @@ -186,8 +186,8 @@ describe('getFiles', () => { }) describe('file too large', () => { - test('should truncate files over 100k chars to 1k chars with message', async () => { - const largeContent = 'x'.repeat(101_000) // 101k chars - over limit + test('should truncate files over 100k chars to first 100k chars with message', async () => { + const largeContent = 'x'.repeat(100_001) + 'y'.repeat(1000) // over limit const mockFs = createMockFs({ files: { '/project/large.bin': { @@ -203,11 +203,13 @@ describe('getFiles', () => { fs: mockFs, }) - // Should contain first 1k chars - expect(result['large.bin']).toContain('x'.repeat(1000)) + // Should contain first 100k chars + expect(result['large.bin']).toContain('x'.repeat(100_000)) + // Should NOT contain content beyond the limit + expect(result['large.bin']).not.toContain('y') // Should contain truncation message expect(result['large.bin']).toContain('FILE_TOO_LARGE') - expect(result['large.bin']).toContain('101,000 chars') + expect(result['large.bin']).toContain('101,001 chars') }) test('should read files at exactly 100k chars', async () => { diff --git a/sdk/src/tools/read-files.ts b/sdk/src/tools/read-files.ts index 351eddfb54..c3c85cc68e 100644 --- a/sdk/src/tools/read-files.ts +++ b/sdk/src/tools/read-files.ts @@ -30,7 +30,6 @@ export async function getFiles(params: { const result: Record = {} const MAX_FILE_BYTES = 10 * 1024 * 1024 // 10MB - skip reading entirely const MAX_CHARS = 100_000 // 100k characters threshold - const TRUNCATE_TO_CHARS = 1_000 // Show first 1k chars when over limit const numFmt = new Intl.NumberFormat('en-US') const fmtNum = (n: number) => numFmt.format(n) @@ -84,14 +83,14 @@ export async function getFiles(params: { const content = await fs.readFile(fullPath, 'utf8') if (content.length > MAX_CHARS) { - const truncated = content.slice(0, TRUNCATE_TO_CHARS) + const truncated = content.slice(0, MAX_CHARS) result[relativePath] = truncated + '\n\n[FILE_TOO_LARGE: This file is ' + fmtNum(content.length) + - ' chars, exceeding the 100k char limit. Only the first ' + - fmtNum(TRUNCATE_TO_CHARS) + - ' chars are shown. Use other tools to read sections of the file.]' + ' chars, exceeding the ' + + fmtNum(MAX_CHARS) + + ' char limit. The content above has been truncated. Use other tools to read other sections of the file.]' } else { // Prepend TEMPLATE marker for example files result[relativePath] = isExampleFile diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts index 10f4bb22d8..d9825930c0 100644 --- a/web/src/llm-api/fireworks.ts +++ b/web/src/llm-api/fireworks.ts @@ -29,6 +29,7 @@ const fireworksAgent = new Agent({ /** Map from OpenRouter model IDs to Fireworks standard API model IDs */ const FIREWORKS_MODEL_MAP: Record = { 'minimax/minimax-m2.5': 'accounts/fireworks/models/minimax-m2p5', + 'z-ai/glm-5.1': 'accounts/fireworks/models/glm-5p1', } /** Flag to enable custom Fireworks deployments (set to false to use global API only) */ @@ -36,7 +37,8 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = true /** Custom deployment IDs for models with dedicated Fireworks deployments */ const FIREWORKS_DEPLOYMENT_MAP: Record = { - 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', + // 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9', + 'z-ai/glm-5.1': 'accounts/james-65d217/deployments/mjb4i7ea', } /** Check if current time is within deployment hours (10am–8pm ET) */ @@ -137,12 +139,31 @@ function createFireworksRequest(params: { }) } -// Fireworks per-token pricing (dollars per token) -const FIREWORKS_INPUT_COST_PER_TOKEN = 0.30 / 1_000_000 -const FIREWORKS_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000 -const FIREWORKS_OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000 +// Fireworks per-token pricing (dollars per token), keyed by OpenRouter model ID +interface FireworksPricing { + inputCostPerToken: number + cachedInputCostPerToken: number + outputCostPerToken: number +} + +const FIREWORKS_PRICING_MAP: Record = { + 'minimax/minimax-m2.5': { + inputCostPerToken: 0.30 / 1_000_000, + cachedInputCostPerToken: 0.03 / 1_000_000, + outputCostPerToken: 1.20 / 1_000_000, + }, + 'z-ai/glm-5.1': { + inputCostPerToken: 1.40 / 1_000_000, + cachedInputCostPerToken: 0.26 / 1_000_000, + outputCostPerToken: 4.40 / 1_000_000, + }, +} + +function getFireworksPricing(model: string): FireworksPricing { + return FIREWORKS_PRICING_MAP[model] ?? FIREWORKS_MODEL_MAP['z-ai/glm-5.1'] +} -function extractUsageAndCost(usage: Record | undefined | null): UsageData { +function extractUsageAndCost(usage: Record | undefined | null, model: string): UsageData { if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 } const promptDetails = usage.prompt_tokens_details as Record | undefined | null const completionDetails = usage.completion_tokens_details as Record | undefined | null @@ -153,11 +174,12 @@ function extractUsageAndCost(usage: Record | undefined | null): const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0 // Fireworks doesn't return cost — compute from token counts and known pricing + const pricing = getFireworksPricing(model) const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens) const cost = - nonCachedInputTokens * FIREWORKS_INPUT_COST_PER_TOKEN + - cacheReadInputTokens * FIREWORKS_CACHED_INPUT_COST_PER_TOKEN + - outputTokens * FIREWORKS_OUTPUT_COST_PER_TOKEN + nonCachedInputTokens * pricing.inputCostPerToken + + cacheReadInputTokens * pricing.cachedInputCostPerToken + + outputTokens * pricing.outputCostPerToken return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost } } @@ -192,7 +214,7 @@ export async function handleFireworksNonStream({ const data = await response.json() const content = data.choices?.[0]?.message?.content ?? '' const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? '' - const usageData = extractUsageAndCost(data.usage) + const usageData = extractUsageAndCost(data.usage, originalModel) insertMessageToBigQuery({ messageId: data.id, @@ -493,7 +515,7 @@ async function handleResponse({ return { state } } - const usageData = extractUsageAndCost(data.usage as Record) + const usageData = extractUsageAndCost(data.usage as Record, originalModel) const messageId = typeof data.id === 'string' ? data.id : 'unknown' insertMessageToBigQuery({