axistore80-coder · pull · Apr 11, 2026 · Apr 10, 2026 · Apr 10, 2026 · Apr 10, 2026
diff --git a/agents/base2/base2.ts b/agents/base2/base2.ts
@@ -87,7 +87,7 @@ export function createBase2(
       isFree && 'code-reviewer-lite',
       isDefault && 'code-reviewer',
       isMax && 'code-reviewer-multi-prompt',
-      isFree && 'thinker-gemini',
+      isFree && 'thinker-with-files-gemini',
       'thinker-gpt',
       'context-pruner',
     ),
@@ -143,7 +143,7 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
   ${buildArray(
         '- Spawn context-gathering agents (file pickers and web/docs researchers) before making edits. Use the code_search, list_directory, and glob tools directly for searching and exploring the codebase.',
         isFree && 'Do not spawn the thinker-gpt agent, unless the user asks. Not everyone has connected their ChatGPT subscription to Codebuff to allow for it.',
-        isFree && 'You should spawn the thinker-gemini agent whenever you encounter a complex problem or the user asks you to think about a problem. This agent is extremely useful as it is very smart. You must take advantage of it and spawn it often!',
+        isFree && 'You must spawn the thinker-with-files-gemini agent to think through and plan the reponse to most requests, unless the request is trivial. This agent is extremely useful as it is very smart! You must pass the relevant filePaths when spawning it, since it does not have access to the conversation history.',
         isDefault &&
         '- Spawn the editor agent to implement the changes after you have gathered all the context you need.',
         (isDefault || isMax) &&
@@ -206,7 +206,7 @@ ${buildArray(
 [ You read a few other relevant files using the read_files tool ]${!noAskUser
         ? `\n\n[ You ask the user for important clarifications on their request or alternate implementation strategies using the ask_user tool ]`
         : ''
-      }
+      }${isFree ? `\n\n[ You spawn the thinker-with-files-gemini agent with the relevant filePaths to plan the best response ]` : ''}
 ${isDefault
         ? `[ You implement the changes using the editor agent ]`
         : isFast || isFree
@@ -334,7 +334,7 @@ ${buildArray(
     (isDefault || isMax) &&
     `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${isFast ? '' : ' You should include a step to review the changes after you have implemented the changes.'}:${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} You may be able to do reviewing and validation in parallel in the same step. Skip write_todos for simple tasks like quick edits or answering questions.`,
     isFree &&
-    `- For complex problems, spawn the thinker-gemini agent to help find the best solution. This agent is extremely useful as it is very smart. You must take advantage of it and spawn it often!`,
+    `- For most requests, spawn the thinker-with-files-gemini agent to think through and plan the best response. This agent is extremely useful as it is very smart. You must take advantage of it and spawn it about once per user request. Gather all the necessary context *before* spawning it, and pass the relevant filePaths since it does not have access to the conversation history.`,
     (isDefault || isMax) &&
     `- For quick problems, briefly explain your reasoning to the user. If you need to think longer, write your thoughts within the <think> tags. Finally, for complex problems, spawn the thinker agent to help find the best solution. (gpt-5-agent is a last resort for complex problems)`,
     isDefault &&
@@ -379,6 +379,8 @@ function buildImplementationStepPrompt({
     isMax &&
     `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}, or until you require more information from the user.`,
     'You must use the skill tool to load any potentially relevant skills.',
+    isFree &&
+    `You must spawn the thinker-with-files-gemini agent once per user request to plan the best response. Pass the relevant filePaths since it does not have access to the conversation history.`,
     isMax &&
     `You must spawn the 'editor-multi-prompt' agent to implement code changes rather than using the str_replace or write_file tools, since it will generate the best code changes.`,
     (isDefault || isMax) &&

diff --git a/agents/basher.ts b/agents/basher.ts
@@ -11,7 +11,7 @@ const basher: AgentDefinition = {
   model: 'google/gemini-3.1-flash-lite-preview',
   displayName: 'Basher',
   spawnerPrompt:
-    'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor.',
+    'Runs a single terminal command and describes its output using an LLM. A lightweight shell command executor. Requires both a shell command and a prompt.',
 
   inputSchema: {
     prompt: {
@@ -24,7 +24,7 @@ const basher: AgentDefinition = {
       properties: {
         command: {
           type: 'string',
-          description: 'Terminal command to run',
+          description: 'Terminal command to run in bash shell',
         },
         timeout_seconds: {
           type: 'number',

diff --git a/agents/thinker/thinker-gemini.ts b/agents/thinker/thinker-gemini.ts
@@ -7,10 +7,13 @@ const definition: SecretAgentDefinition = {
   id: 'thinker-gemini',
   model: 'google/gemini-3.1-pro-preview',
   providerOptions: undefined,
+  reasoningOptions: {
+    effort: 'low',
+  },
   outputSchema: undefined,
   outputMode: 'last_message',
   inheritParentSystemPrompt: false,
-  instructionsPrompt: `You are the thinker-gemini agent. Think deeply about the user request and when satisfied, write out your response.
+  instructionsPrompt: `You are the thinker-gemini agent. Think about the user request and when satisfied, write out a very concise response that captures the most important points. DO NOT be verbose -- say the absolute minimum needed to answer the user's question correctly.
 
 The parent agent will see your response. DO NOT call any tools. No need to spawn the thinker agent, because you are already the thinker agent. Just do the thinking work now.`,
   handleSteps: function* () {

diff --git a/agents/thinker/thinker-with-files-gemini.ts b/agents/thinker/thinker-with-files-gemini.ts
@@ -0,0 +1,61 @@
+import { publisher } from '../constants'
+
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'thinker-with-files-gemini',
+  publisher,
+  model: 'google/gemini-3.1-pro-preview',
+  displayName: 'Theo the Theorizer with Files (Gemini)',
+  reasoningOptions: {
+    effort: 'low',
+  },
+  spawnerPrompt:
+    'Does deep thinking given the prompt and provided files using Gemini. Use this to help you solve a specific problem. This agent has no context on the conversation history so it cannot see files you have read or previous discussion. Instead, you must provide all the relevant context via the prompt or filePaths for this agent to work well.',
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description: 'The problem you are trying to solve',
+    },
+    params: {
+      type: 'object',
+      properties: {
+        filePaths: {
+          type: 'array',
+          items: {
+            type: 'string',
+            description: 'The path to a file',
+          },
+          description:
+            'A list of relevant file paths to read before thinking. Try to provide ALL the files that could be relevant to your request.',
+        },
+      },
+      required: ['filePaths'],
+    },
+  },
+  outputMode: 'last_message',
+  outputSchema: undefined,
+  includeMessageHistory: false,
+  inheritParentSystemPrompt: false,
+  spawnableAgents: [],
+  toolNames: [],
+
+  instructionsPrompt: `You are the thinker-with-files-gemini agent. Think about the user request and when satisfied, write out a very concise response that captures the most important points. DO NOT be verbose -- say the absolute minimum needed to answer the user's question correctly.
+
+The parent agent will see your response. DO NOT call any tools. No need to spawn the thinker agent, because you are already the thinker agent. Just do the thinking work now.`,
+
+  handleSteps: function* ({ params }) {
+    const filePaths = params?.filePaths as string[] | undefined
+
+    if (filePaths && filePaths.length > 0) {
+      yield {
+        toolName: 'read_files',
+        input: { paths: filePaths },
+      }
+    }
+
+    yield 'STEP'
+  },
+}
+
+export default definition
diff --git a/common/src/constants/free-agents.ts b/common/src/constants/free-agents.ts
@@ -37,6 +37,9 @@ export const FREE_MODE_AGENT_MODELS: Record<string, Set<string>> = {
 
   // Code reviewer for free mode
   'code-reviewer-lite': new Set(['minimax/minimax-m2.5']),
+
+  // Thinker for free mode
+  'thinker-with-files-gemini': new Set(['google/gemini-3.1-pro-preview']),
 }
 
 /**

diff --git a/common/src/tools/params/tool/skill.ts b/common/src/tools/params/tool/skill.ts
@@ -34,9 +34,11 @@ export const AVAILABLE_SKILLS_PLACEHOLDER = '{{AVAILABLE_SKILLS}}'
 // Base description - the full description with available skills is generated dynamically
 const baseDescription = `Load a skill by name to get its full instructions. Skills provide reusable behaviors and domain-specific knowledge that you can use to complete tasks.
 
-The following are the only skills that are currently available (do not try to use any other skills):
+The following are the pre-loaded skills available at session start:
 ${AVAILABLE_SKILLS_PLACEHOLDER}
 
+Note: You can also load any skill that was created during this session by specifying its name. The skill will be loaded dynamically from disk.
+
 Example:
 ${$getNativeToolCallExampleString({
   toolName,

diff --git a/evals/buffbench/eval-codebuff.json b/evals/buffbench/eval-codebuff.json
@@ -28,6 +28,7 @@
     "STRIPE_SECRET_KEY": "test-stripe-key",
     "STRIPE_WEBHOOK_SECRET_KEY": "test-stripe-webhook",
     "STRIPE_TEAM_FEE_PRICE_ID": "test-team-price-id",
+    "STRIPE_USAGE_PRICE_ID": "test-usage-price-id",
     "LOOPS_API_KEY": "test-loops",
     "DISCORD_PUBLIC_KEY": "test-discord-public",
     "DISCORD_BOT_TOKEN": "test-discord-bot",

diff --git a/evals/buffbench/main-hard-tasks.ts b/evals/buffbench/main-hard-tasks.ts
@@ -13,6 +13,8 @@ function loadTaskIds(evalPath: string): string[] {
 }
 
 async function main() {
+  const saveTraces = process.argv.includes('--save-traces')
+
   const evalPaths = [
     path.join(__dirname, 'eval-codebuff2.json'),
     path.join(__dirname, 'eval-manifold2.json'),
@@ -33,6 +35,7 @@ async function main() {
     agents: ['base2', 'external:claude'],
     taskIds: allTaskIds,
     taskConcurrency: 4,
+    saveTraces,
   })
 
   process.exit(0)

diff --git a/evals/buffbench/main-nightly.ts b/evals/buffbench/main-nightly.ts
@@ -8,6 +8,8 @@ import type { MetaAnalysisResult } from './meta-analyzer'
 import type { AgentEvalResults } from './types'
 
 async function main() {
+  const saveTraces = process.argv.includes('--save-traces')
+
   console.log('Starting nightly buffbench evaluation...')
   console.log('Eval set: codebuff')
   console.log()
@@ -16,6 +18,7 @@ async function main() {
     evalDataPaths: [ path.join(__dirname, 'eval-codebuff.json')],
     agents: ['base2-free'],
     taskConcurrency: 3,
+    saveTraces,
   })
 
   console.log('\nNightly buffbench evaluation completed successfully!')

diff --git a/evals/buffbench/main-single-eval.ts b/evals/buffbench/main-single-eval.ts
@@ -3,10 +3,13 @@ import path from 'path'
 import { runBuffBench } from './run-buffbench'
 
 async function main() {
+  const saveTraces = process.argv.includes('--save-traces')
+
   await runBuffBench({
     evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')],
     agents: ['base2'],
     taskIds: ['filter-system-history'],
+    saveTraces,
   })
 
   process.exit(0)

diff --git a/evals/buffbench/main.ts b/evals/buffbench/main.ts
@@ -3,13 +3,16 @@ import path from 'path'
 import { runBuffBench } from './run-buffbench'
 
 async function main() {
+  const saveTraces = process.argv.includes('--save-traces')
+
   // Compare Codebuff agents against external CLI agents
   // Use 'external:claude' for Claude Code CLI
   // Use 'external:codex' for OpenAI Codex CLI
   await runBuffBench({
     evalDataPaths: [path.join(__dirname, 'eval-codebuff.json')],
-    agents: ['base2-free'],
+    agents: ['base2-free-evals'],
     taskConcurrency: 5,
+    saveTraces,
   })
 
   process.exit(0)

diff --git a/evals/buffbench/run-buffbench.ts b/evals/buffbench/run-buffbench.ts
@@ -57,6 +57,7 @@ async function runTask(options: {
   printEvents: boolean
   finalCheckCommands?: string[]
   disableAnalysis?: boolean
+  saveTraces?: boolean
 }) {
   const {
     client,
@@ -74,6 +75,7 @@ async function runTask(options: {
     printEvents,
     finalCheckCommands,
     disableAnalysis,
+    saveTraces = false,
   } = options
 
   console.log(
@@ -173,6 +175,21 @@ async function runTask(options: {
       finalCheckOutputs: agentResult.finalCheckOutputs,
     })
 
+    // Save judge traces to separate files if saveTraces is enabled
+    if (saveTraces) {
+      const tracesDir = path.join(logsDir, 'traces')
+      if (!fs.existsSync(tracesDir)) {
+        fs.mkdirSync(tracesDir, { recursive: true })
+      }
+
+      // Save agent trace only (not judge traces)
+      const agentTracePath = path.join(
+        tracesDir,
+        `${index + 1}-${safeTaskId}-${safeAgentId}-${safeCommitShort}-agent.json`,
+      )
+      fs.writeFileSync(agentTracePath, JSON.stringify(agentResult.trace, null, 2))
+    }
+
     fs.writeFileSync(
       tracePath,
       JSON.stringify(commitTraces[commitTraces.length - 1], null, 2),
@@ -300,6 +317,7 @@ export async function runBuffBench(options: {
   taskIds?: string[]
   extractLessons?: boolean
   disableAnalysis?: boolean
+  saveTraces?: boolean
 }) {
   const {
     evalDataPaths,
@@ -308,6 +326,7 @@ export async function runBuffBench(options: {
     taskIds,
     extractLessons = false,
     disableAnalysis = false,
+    saveTraces = false,
   } = options
 
   if (evalDataPaths.length === 0) {
@@ -453,6 +472,7 @@ export async function runBuffBench(options: {
         printEvents: agents.length === 1 && taskConcurrency === 1,
         finalCheckCommands: evalData.finalCheckCommands,
         disableAnalysis,
+        saveTraces,
       }),
     )
   })