Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion packages/session/package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "@stello-ai/session",
"version": "0.7.1",
"version": "0.7.2",
"description": "Session layer for Stello — conversation topology engine",
"license": "Apache-2.0",
"author": "Stello Contributors",
Expand Down
77 changes: 77 additions & 0 deletions packages/session/src/__tests__/anthropic.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -162,3 +162,80 @@ describe('createAnthropicAdapter stream()', () => {
expect(chunks.map((c) => c.delta).join('')).toBe('hello')
})
})

describe('createAnthropicAdapter complete() max_tokens', () => {
beforeEach(() => {
messagesStream.mockReset()
messagesCreate.mockReset()
messagesCreate.mockResolvedValue({
content: [{ type: 'text', text: 'ok' }],
usage: { input_tokens: 1, output_tokens: 1 },
})
})

it('未配置时回落到内建默认值 4096', async () => {
const adapter = createAnthropicAdapter({
apiKey: 'k',
model: 'm',
maxContextTokens: 200_000,
})
await adapter.complete([{ role: 'user', content: 'hi' }])
expect(messagesCreate).toHaveBeenCalledWith(
expect.objectContaining({ max_tokens: 4096 }),
undefined,
)
})

it('options.maxOutputTokens 覆盖内建默认值', async () => {
const adapter = createAnthropicAdapter({
apiKey: 'k',
model: 'm',
maxContextTokens: 200_000,
maxOutputTokens: 8192,
})
await adapter.complete([{ role: 'user', content: 'hi' }])
expect(messagesCreate).toHaveBeenCalledWith(
expect.objectContaining({ max_tokens: 8192 }),
undefined,
)
})

it('调用方 maxTokens 优先级最高,盖过 options.maxOutputTokens', async () => {
const adapter = createAnthropicAdapter({
apiKey: 'k',
model: 'm',
maxContextTokens: 200_000,
maxOutputTokens: 8192,
})
await adapter.complete([{ role: 'user', content: 'hi' }], { maxTokens: 2048 })
expect(messagesCreate).toHaveBeenCalledWith(
expect.objectContaining({ max_tokens: 2048 }),
undefined,
)
})
})

describe('createAnthropicAdapter stream() max_tokens', () => {
beforeEach(() => {
messagesStream.mockReset()
messagesCreate.mockReset()
messagesStream.mockReturnValue(asyncIterableFrom([]))
})

it('options.maxOutputTokens 用于 stream() 请求', async () => {
const adapter = createAnthropicAdapter({
apiKey: 'k',
model: 'm',
maxContextTokens: 200_000,
maxOutputTokens: 8192,
})
if (!adapter.stream) throw new Error('adapter.stream is required')
for await (const _ of adapter.stream([{ role: 'user', content: 'hi' }])) {
void _
}
expect(messagesStream).toHaveBeenCalledWith(
expect.objectContaining({ max_tokens: 8192 }),
undefined,
)
})
})
34 changes: 34 additions & 0 deletions packages/session/src/__tests__/openai-compatible.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,40 @@ describe('createOpenAICompatibleAdapter', () => {
)
})

it('options.maxOutputTokens 覆盖内建默认值', async () => {
const adapter = createOpenAICompatibleAdapter({
apiKey: 'test-key',
baseURL: 'https://api.example.com/v1',
model: 'test-model',
maxContextTokens: 128_000,
maxOutputTokens: 8192,
})

await adapter.complete([{ role: 'user', content: 'hello' }])

expect(createCompletion).toHaveBeenCalledWith(
expect.objectContaining({ max_tokens: 8192, stream: false }),
undefined,
)
})

it('调用方 maxTokens 优先级最高,盖过 options.maxOutputTokens', async () => {
const adapter = createOpenAICompatibleAdapter({
apiKey: 'test-key',
baseURL: 'https://api.example.com/v1',
model: 'test-model',
maxContextTokens: 128_000,
maxOutputTokens: 8192,
})

await adapter.complete([{ role: 'user', content: 'hello' }], { maxTokens: 2048 })

expect(createCompletion).toHaveBeenCalledWith(
expect.objectContaining({ max_tokens: 2048, stream: false }),
undefined,
)
})

it('signal 透传到 SDK request options', async () => {
const adapter = createOpenAICompatibleAdapter({
apiKey: 'test-key',
Expand Down
13 changes: 10 additions & 3 deletions packages/session/src/adapters/anthropic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -13,10 +13,17 @@ import type { LLMAdapter, LLMResult, LLMChunk, Message, ToolCall, LLMCompleteOpt
export interface AnthropicAdapterOptions {
apiKey: string
model: string
/** 模型上下文窗口大小(token 数) */
/** 模型上下文窗口大小(token 数),用于自动压缩判断 */
maxContextTokens: number
/** 自定义 API 端点,兼容 MiniMax 等 Anthropic 协议服务 */
baseURL?: string
/**
* 单次请求的输出 token 上限。被写入 Anthropic API 的 `max_tokens`。
* 优先级:`completeOptions.maxTokens` > `options.maxOutputTokens` > 4096。
* 设过低会让长输出(多个子话题的 tool call args、长 synthesis 等)
* 在中途被截断,引发上层 JSON 解析失败。建议按模型上限设置。
*/
maxOutputTokens?: number
}

/** 将 Stello 内部 Message 转换为 Anthropic MessageParam 格式 */
Expand Down Expand Up @@ -138,7 +145,7 @@ export function createAnthropicAdapter(options: AnthropicAdapterOptions): LLMAda
const response = await client.messages.create(
{
model: options.model,
max_tokens: completeOptions?.maxTokens ?? 4096,
max_tokens: completeOptions?.maxTokens ?? options.maxOutputTokens ?? 4096,
...(completeOptions?.temperature !== undefined && { temperature: completeOptions.temperature }),
...(system && { system }),
...(completeOptions?.tools && completeOptions.tools.length > 0
Expand Down Expand Up @@ -172,7 +179,7 @@ export function createAnthropicAdapter(options: AnthropicAdapterOptions): LLMAda
const stream = client.messages.stream(
{
model: options.model,
max_tokens: completeOptions?.maxTokens ?? 4096,
max_tokens: completeOptions?.maxTokens ?? options.maxOutputTokens ?? 4096,
...(completeOptions?.temperature !== undefined && { temperature: completeOptions.temperature }),
...(system && { system }),
...(completeOptions?.tools && completeOptions.tools.length > 0
Expand Down
11 changes: 9 additions & 2 deletions packages/session/src/adapters/openai-compatible.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,11 +11,18 @@ type ChatToolCallDelta = NonNullable<
export interface OpenAICompatibleOptions {
apiKey: string
model: string
/** 模型上下文窗口大小(token 数) */
/** 模型上下文窗口大小(token 数),用于自动压缩判断 */
maxContextTokens: number
baseURL: string
/** 额外的请求参数(如 MiniMax 的 reasoning_split 等) */
extraBody?: Record<string, unknown>
/**
* 单次请求的输出 token 上限。被写入请求的 `max_tokens`。
* 优先级:`completeOptions.maxTokens` > `options.maxOutputTokens` > 4096。
* 设过低会让长输出(多个子话题的 tool call args、长 synthesis 等)
* 在中途被截断,引发上层 JSON 解析失败。
*/
maxOutputTokens?: number
}

/** 合并连续的 system 消息,兼容只接受单条 system 的提供方。 */
Expand Down Expand Up @@ -46,7 +53,7 @@ export function createOpenAICompatibleAdapter(options: OpenAICompatibleOptions):
const normalizedMessages = mergeConsecutiveSystemMessages(messages)
return {
model: options.model,
max_tokens: completeOptions?.maxTokens ?? 4096,
max_tokens: completeOptions?.maxTokens ?? options.maxOutputTokens ?? 4096,
...(completeOptions?.temperature !== undefined && { temperature: completeOptions.temperature }),
...(completeOptions?.tools
? {
Expand Down