diff --git a/packages/session/package.json b/packages/session/package.json index b6588e9..5066a0f 100644 --- a/packages/session/package.json +++ b/packages/session/package.json @@ -1,6 +1,6 @@ { "name": "@stello-ai/session", - "version": "0.7.1", + "version": "0.7.2", "description": "Session layer for Stello — conversation topology engine", "license": "Apache-2.0", "author": "Stello Contributors", diff --git a/packages/session/src/__tests__/anthropic.test.ts b/packages/session/src/__tests__/anthropic.test.ts index a0f224c..c00b81a 100644 --- a/packages/session/src/__tests__/anthropic.test.ts +++ b/packages/session/src/__tests__/anthropic.test.ts @@ -162,3 +162,80 @@ describe('createAnthropicAdapter stream()', () => { expect(chunks.map((c) => c.delta).join('')).toBe('hello') }) }) + +describe('createAnthropicAdapter complete() max_tokens', () => { + beforeEach(() => { + messagesStream.mockReset() + messagesCreate.mockReset() + messagesCreate.mockResolvedValue({ + content: [{ type: 'text', text: 'ok' }], + usage: { input_tokens: 1, output_tokens: 1 }, + }) + }) + + it('未配置时回落到内建默认值 4096', async () => { + const adapter = createAnthropicAdapter({ + apiKey: 'k', + model: 'm', + maxContextTokens: 200_000, + }) + await adapter.complete([{ role: 'user', content: 'hi' }]) + expect(messagesCreate).toHaveBeenCalledWith( + expect.objectContaining({ max_tokens: 4096 }), + undefined, + ) + }) + + it('options.maxOutputTokens 覆盖内建默认值', async () => { + const adapter = createAnthropicAdapter({ + apiKey: 'k', + model: 'm', + maxContextTokens: 200_000, + maxOutputTokens: 8192, + }) + await adapter.complete([{ role: 'user', content: 'hi' }]) + expect(messagesCreate).toHaveBeenCalledWith( + expect.objectContaining({ max_tokens: 8192 }), + undefined, + ) + }) + + it('调用方 maxTokens 优先级最高,盖过 options.maxOutputTokens', async () => { + const adapter = createAnthropicAdapter({ + apiKey: 'k', + model: 'm', + maxContextTokens: 200_000, + maxOutputTokens: 8192, + }) + await adapter.complete([{ role: 'user', content: 'hi' }], { maxTokens: 2048 }) + expect(messagesCreate).toHaveBeenCalledWith( + expect.objectContaining({ max_tokens: 2048 }), + undefined, + ) + }) +}) + +describe('createAnthropicAdapter stream() max_tokens', () => { + beforeEach(() => { + messagesStream.mockReset() + messagesCreate.mockReset() + messagesStream.mockReturnValue(asyncIterableFrom([])) + }) + + it('options.maxOutputTokens 用于 stream() 请求', async () => { + const adapter = createAnthropicAdapter({ + apiKey: 'k', + model: 'm', + maxContextTokens: 200_000, + maxOutputTokens: 8192, + }) + if (!adapter.stream) throw new Error('adapter.stream is required') + for await (const _ of adapter.stream([{ role: 'user', content: 'hi' }])) { + void _ + } + expect(messagesStream).toHaveBeenCalledWith( + expect.objectContaining({ max_tokens: 8192 }), + undefined, + ) + }) +}) diff --git a/packages/session/src/__tests__/openai-compatible.test.ts b/packages/session/src/__tests__/openai-compatible.test.ts index fd87b30..dfb29c5 100644 --- a/packages/session/src/__tests__/openai-compatible.test.ts +++ b/packages/session/src/__tests__/openai-compatible.test.ts @@ -72,6 +72,40 @@ describe('createOpenAICompatibleAdapter', () => { ) }) + it('options.maxOutputTokens 覆盖内建默认值', async () => { + const adapter = createOpenAICompatibleAdapter({ + apiKey: 'test-key', + baseURL: 'https://api.example.com/v1', + model: 'test-model', + maxContextTokens: 128_000, + maxOutputTokens: 8192, + }) + + await adapter.complete([{ role: 'user', content: 'hello' }]) + + expect(createCompletion).toHaveBeenCalledWith( + expect.objectContaining({ max_tokens: 8192, stream: false }), + undefined, + ) + }) + + it('调用方 maxTokens 优先级最高,盖过 options.maxOutputTokens', async () => { + const adapter = createOpenAICompatibleAdapter({ + apiKey: 'test-key', + baseURL: 'https://api.example.com/v1', + model: 'test-model', + maxContextTokens: 128_000, + maxOutputTokens: 8192, + }) + + await adapter.complete([{ role: 'user', content: 'hello' }], { maxTokens: 2048 }) + + expect(createCompletion).toHaveBeenCalledWith( + expect.objectContaining({ max_tokens: 2048, stream: false }), + undefined, + ) + }) + it('signal 透传到 SDK request options', async () => { const adapter = createOpenAICompatibleAdapter({ apiKey: 'test-key', diff --git a/packages/session/src/adapters/anthropic.ts b/packages/session/src/adapters/anthropic.ts index 3741b16..edabc74 100644 --- a/packages/session/src/adapters/anthropic.ts +++ b/packages/session/src/adapters/anthropic.ts @@ -13,10 +13,17 @@ import type { LLMAdapter, LLMResult, LLMChunk, Message, ToolCall, LLMCompleteOpt export interface AnthropicAdapterOptions { apiKey: string model: string - /** 模型上下文窗口大小(token 数) */ + /** 模型上下文窗口大小(token 数),用于自动压缩判断 */ maxContextTokens: number /** 自定义 API 端点,兼容 MiniMax 等 Anthropic 协议服务 */ baseURL?: string + /** + * 单次请求的输出 token 上限。被写入 Anthropic API 的 `max_tokens`。 + * 优先级:`completeOptions.maxTokens` > `options.maxOutputTokens` > 4096。 + * 设过低会让长输出(多个子话题的 tool call args、长 synthesis 等) + * 在中途被截断,引发上层 JSON 解析失败。建议按模型上限设置。 + */ + maxOutputTokens?: number } /** 将 Stello 内部 Message 转换为 Anthropic MessageParam 格式 */ @@ -138,7 +145,7 @@ export function createAnthropicAdapter(options: AnthropicAdapterOptions): LLMAda const response = await client.messages.create( { model: options.model, - max_tokens: completeOptions?.maxTokens ?? 4096, + max_tokens: completeOptions?.maxTokens ?? options.maxOutputTokens ?? 4096, ...(completeOptions?.temperature !== undefined && { temperature: completeOptions.temperature }), ...(system && { system }), ...(completeOptions?.tools && completeOptions.tools.length > 0 @@ -172,7 +179,7 @@ export function createAnthropicAdapter(options: AnthropicAdapterOptions): LLMAda const stream = client.messages.stream( { model: options.model, - max_tokens: completeOptions?.maxTokens ?? 4096, + max_tokens: completeOptions?.maxTokens ?? options.maxOutputTokens ?? 4096, ...(completeOptions?.temperature !== undefined && { temperature: completeOptions.temperature }), ...(system && { system }), ...(completeOptions?.tools && completeOptions.tools.length > 0 diff --git a/packages/session/src/adapters/openai-compatible.ts b/packages/session/src/adapters/openai-compatible.ts index 5a51f73..42d4f27 100644 --- a/packages/session/src/adapters/openai-compatible.ts +++ b/packages/session/src/adapters/openai-compatible.ts @@ -11,11 +11,18 @@ type ChatToolCallDelta = NonNullable< export interface OpenAICompatibleOptions { apiKey: string model: string - /** 模型上下文窗口大小(token 数) */ + /** 模型上下文窗口大小(token 数),用于自动压缩判断 */ maxContextTokens: number baseURL: string /** 额外的请求参数(如 MiniMax 的 reasoning_split 等) */ extraBody?: Record + /** + * 单次请求的输出 token 上限。被写入请求的 `max_tokens`。 + * 优先级:`completeOptions.maxTokens` > `options.maxOutputTokens` > 4096。 + * 设过低会让长输出(多个子话题的 tool call args、长 synthesis 等) + * 在中途被截断,引发上层 JSON 解析失败。 + */ + maxOutputTokens?: number } /** 合并连续的 system 消息,兼容只接受单条 system 的提供方。 */ @@ -46,7 +53,7 @@ export function createOpenAICompatibleAdapter(options: OpenAICompatibleOptions): const normalizedMessages = mergeConsecutiveSystemMessages(messages) return { model: options.model, - max_tokens: completeOptions?.maxTokens ?? 4096, + max_tokens: completeOptions?.maxTokens ?? options.maxOutputTokens ?? 4096, ...(completeOptions?.temperature !== undefined && { temperature: completeOptions.temperature }), ...(completeOptions?.tools ? {