Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion agents/context-pruner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ const definition: AgentDefinition = {
id: 'context-pruner',
publisher,
displayName: 'Context Pruner',
model: 'openai/gpt-5-mini',
model: 'anthropic/claude-sonnet-4.6',

spawnerPrompt: `Spawn this agent between steps to prune context, summarizing the conversation into a condensed format when context exceeds the limit.`,

Expand Down
2 changes: 1 addition & 1 deletion cli/release/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "codebuff",
"version": "1.0.640",
"version": "1.0.641",
"description": "AI coding agent",
"license": "MIT",
"bin": {
Expand Down
2 changes: 1 addition & 1 deletion freebuff/cli/release/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "freebuff",
"version": "0.0.32",
"version": "0.0.33",
"description": "The world's strongest free coding agent",
"license": "MIT",
"bin": {
Expand Down
4 changes: 3 additions & 1 deletion packages/agent-runtime/src/llm-api/codebuff-web-api.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,13 +230,14 @@ export async function callTokenCountAPI(params: {
messages: unknown[]
system?: string
model?: string
tools?: Array<{ name: string; description?: string; input_schema?: unknown }>
fetch: typeof globalThis.fetch
logger: Logger
env: CodebuffWebApiEnv
baseUrl?: string
apiKey?: string
}): Promise<{ inputTokens?: number; error?: string }> {
const { messages, system, model, fetch, logger, env } = params
const { messages, system, model, tools, fetch, logger, env } = params
const baseUrl = params.baseUrl ?? env.clientEnv.NEXT_PUBLIC_CODEBUFF_APP_URL
const apiKey = params.apiKey ?? env.ciEnv.CODEBUFF_API_KEY

Expand All @@ -248,6 +249,7 @@ export async function callTokenCountAPI(params: {
const payload: Record<string, unknown> = { messages }
if (system) payload.system = system
if (model) payload.model = model
if (tools) payload.tools = tools

try {
const res = await withTimeout(
Expand Down
13 changes: 13 additions & 0 deletions packages/agent-runtime/src/run-agent-step.ts
Original file line number Diff line number Diff line change
Expand Up @@ -806,6 +806,18 @@ export async function loopAgentSteps(
systemPrompt: system,
toolDefinitions,
}

// Convert tool definitions to Anthropic format for accurate token counting
// Tool definitions are stored as { [name]: { description, inputSchema } }
// Anthropic count_tokens API expects [{ name, description, input_schema }]
const toolsForTokenCount = Object.entries(toolDefinitions).map(
([name, def]) => ({
name,
...(def.description && { description: def.description }),
...(def.inputSchema && { input_schema: def.inputSchema }),
}),
)

let shouldEndTurn = false
let hasRetriedOutputSchema = false
let currentPrompt = prompt
Expand Down Expand Up @@ -845,6 +857,7 @@ export async function loopAgentSteps(
messages: messagesWithStepPrompt,
system,
model: agentTemplate.model,
tools: toolsForTokenCount,
fetch,
logger,
env: { clientEnv, ciEnv },
Expand Down
35 changes: 25 additions & 10 deletions web/src/app/api/v1/token-count/_post.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ import {
isClaudeModel,
toAnthropicModelId,
} from '@codebuff/common/constants/claude-oauth'
import { isOpenAIProviderModel } from '@codebuff/common/constants/chatgpt-oauth'
import { getErrorObject } from '@codebuff/common/util/error'
import { env } from '@codebuff/internal/env'
import { NextResponse } from 'next/server'
Expand All @@ -22,6 +23,11 @@ const tokenCountRequestSchema = z.object({
messages: z.array(z.any()),
system: z.string().optional(),
model: z.string().optional(),
tools: z.array(z.object({
name: z.string(),
description: z.string().optional(),
input_schema: z.any().optional(),
})).optional(),
})

type TokenCountRequest = z.infer<typeof tokenCountRequestSchema>
Expand Down Expand Up @@ -74,24 +80,27 @@ export async function postTokenCount(params: {
return bodyResult.response
}

const { messages, system, model } = bodyResult.data
const { messages, system, model, tools } = bodyResult.data

try {
const useOpenAI = model != null && false // isOpenAIProviderModel(model)
const inputTokens = useOpenAI
? await countTokensViaOpenAI({ messages, system, model, fetch, logger })
: await countTokensViaAnthropic({
messages,
system,
model,
fetch,
logger,
})
messages,
system,
model,
tools,
fetch,
logger,
})

logger.info({
userId,
messageCount: messages.length,
hasSystem: !!system,
hasTools: !!tools,
toolCount: tools?.length,
model: model ?? DEFAULT_ANTHROPIC_MODEL,
tokenCount: inputTokens,
provider: useOpenAI ? 'openai' : 'anthropic',
Expand Down Expand Up @@ -285,10 +294,11 @@ async function countTokensViaAnthropic(params: {
messages: TokenCountRequest['messages']
system: string | undefined
model: string | undefined
tools: TokenCountRequest['tools']
fetch: typeof globalThis.fetch
logger: Logger
}): Promise<number> {
const { messages, system, model, fetch, logger } = params
const { messages, system, model, tools, fetch, logger } = params

// Convert messages to Anthropic format
const anthropicMessages = convertToAnthropicMessages(messages)
Expand All @@ -315,6 +325,7 @@ async function countTokensViaAnthropic(params: {
model: anthropicModelId,
messages: anthropicMessages,
...(system && { system }),
...(tools && { tools }),
}),
},
)
Expand All @@ -337,8 +348,12 @@ async function countTokensViaAnthropic(params: {
const data = await response.json()
const baseTokens = data.input_tokens

// Add 30% buffer for non-Anthropic models since tokenizers differ
if (isNonAnthropicModel) {
// Add 30% buffer for OpenAI and Gemini models since their tokenizers differ from Anthropic's
// Other non-Anthropic models (x-ai, qwen, deepseek, etc.) are routed through providers that
// use similar tokenization, so the buffer is not needed and was causing premature context pruning.
const isOpenAIModel = model ? isOpenAIProviderModel(model) : false
const isGeminiModel = model?.startsWith('google/') ?? false
if (isOpenAIModel || isGeminiModel) {
return Math.ceil(baseTokens * (1 + NON_ANTHROPIC_TOKEN_BUFFER))
}

Expand Down
22 changes: 11 additions & 11 deletions web/src/llm-api/__tests__/fireworks-deployment.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@ import {

import type { Logger } from '@codebuff/common/types/contracts/logger'

const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5'
const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/lnfid5h9'
const STANDARD_MODEL_ID = 'accounts/fireworks/models/glm-5p1'
const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/mjb4i7ea'

function createMockLogger(): Logger {
return {
Expand Down Expand Up @@ -78,7 +78,7 @@ describe('Fireworks deployment routing', () => {
})

const minimalBody = {
model: 'minimax/minimax-m2.5',
model: 'z-ai/glm-5.1',
messages: [{ role: 'user' as const, content: 'test' }],
}

Expand Down Expand Up @@ -115,7 +115,7 @@ describe('Fireworks deployment routing', () => {

const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: false,
Expand All @@ -140,7 +140,7 @@ describe('Fireworks deployment routing', () => {
try {
const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down Expand Up @@ -184,7 +184,7 @@ describe('Fireworks deployment routing', () => {
try {
const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down Expand Up @@ -231,7 +231,7 @@ describe('Fireworks deployment routing', () => {
try {
const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down Expand Up @@ -272,7 +272,7 @@ describe('Fireworks deployment routing', () => {
try {
const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down Expand Up @@ -303,7 +303,7 @@ describe('Fireworks deployment routing', () => {
try {
const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down Expand Up @@ -363,7 +363,7 @@ describe('Fireworks deployment routing', () => {
try {
const response = await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down Expand Up @@ -403,7 +403,7 @@ describe('Fireworks deployment routing', () => {
try {
await createFireworksRequestWithFallback({
body: minimalBody as never,
originalModel: 'minimax/minimax-m2.5',
originalModel: 'z-ai/glm-5.1',
fetch: mockFetch,
logger,
useCustomDeployment: true,
Expand Down
Loading