From b31c5bcf52ad56871789e61361369d3b9864e538 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 28 Feb 2026 12:24:22 +0800 Subject: [PATCH 1/3] fix(adapters): handle separate usage chunk in OpenAI streaming responses Some OpenAI-compatible providers send token usage in a separate chunk (with choices=[]) after the finish_reason chunk, rather than bundling them together. Previously, this usage-only chunk was silently skipped because data.choices[0] was undefined, causing token counts to always be recorded as -1 for streaming requests. This broke TPM rate limiting and usage tracking. Now both patterns are supported: - Usage bundled with finish_reason in the same chunk - Usage in a separate subsequent chunk (choices=[]) Co-Authored-By: Claude Opus 4.6 --- backend/src/adapters/upstream/openai.ts | 33 +++++++++++++++++-------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/backend/src/adapters/upstream/openai.ts b/backend/src/adapters/upstream/openai.ts index 817652a..d144d95 100644 --- a/backend/src/adapters/upstream/openai.ts +++ b/backend/src/adapters/upstream/openai.ts @@ -11,8 +11,7 @@ import type { InternalResponse, InternalStreamChunk, InternalToolDefinition, - InternalUsage, - ProviderConfig, +ProviderConfig, StopReason, TextContentBlock, ThinkingContentBlock, @@ -387,7 +386,7 @@ export const openaiUpstreamAdapter: UpstreamAdapter = { buildRequest( request: InternalRequest, - provider: ProviderConfig, + provider: ProviderConfig, ): { url: string; init: RequestInit } { // Build messages array with system prompt const messages: OpenAIMessage[] = []; @@ -510,6 +509,18 @@ export const openaiUpstreamAdapter: UpstreamAdapter = { const choice = data.choices[0]; if (!choice) { + // Usage-only chunk (choices=[]) — some providers send usage separately + // after the finish_reason chunk when stream_options.include_usage=true + if (data.usage) { + yield { + type: "message_delta", + messageDelta: { stopReason: null }, + usage: { + inputTokens: data.usage.prompt_tokens, + outputTokens: data.usage.completion_tokens, + }, + }; + } continue; } @@ -581,18 +592,20 @@ export const openaiUpstreamAdapter: UpstreamAdapter = { // Handle finish reason if (choice.finish_reason) { yield { type: "content_block_stop", index: blockIndex }; - const usage: InternalUsage = data.usage - ? { - inputTokens: data.usage.prompt_tokens, - outputTokens: data.usage.completion_tokens, - } - : { inputTokens: -1, outputTokens: -1 }; + // Include usage only if upstream provided it in this chunk. + // Some providers bundle usage with finish_reason; others send a + // separate usage-only chunk (choices=[]) immediately after. yield { type: "message_delta", messageDelta: { stopReason: convertFinishReason(choice.finish_reason), }, - usage, + ...(data.usage && { + usage: { + inputTokens: data.usage.prompt_tokens, + outputTokens: data.usage.completion_tokens, + }, + }), }; } } From 36e6b0484dbeb1c3649ad7f197055e53a6b5a4ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 28 Feb 2026 12:24:39 +0800 Subject: [PATCH 2/3] fix(db): filter soft-deleted providers in listUniqueSystemNames listUniqueSystemNames() only filtered NOT models.deleted but did not join the providers table or check NOT providers.deleted. When a provider was soft-deleted, its models (still deleted=false) would appear in the global model registry, but getModelsWithProviderBySystemName() correctly filtered them out, causing the UI to show models with no providers. Add innerJoin on ProvidersTable and NOT providers.deleted filter to match the behavior of getModelsWithProviderBySystemName(). Co-Authored-By: Claude Opus 4.6 --- backend/src/db/index.ts | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/backend/src/db/index.ts b/backend/src/db/index.ts index 1db6744..c433623 100644 --- a/backend/src/db/index.ts +++ b/backend/src/db/index.ts @@ -955,9 +955,14 @@ export async function listUniqueSystemNames( const r = await db .selectDistinct({ systemName: schema.ModelsTable.systemName }) .from(schema.ModelsTable) + .innerJoin( + schema.ProvidersTable, + eq(schema.ModelsTable.providerId, schema.ProvidersTable.id), + ) .where( and( not(schema.ModelsTable.deleted), + not(schema.ProvidersTable.deleted), modelType ? eq(schema.ModelsTable.modelType, modelType) : undefined, ), ) From ffb6c2cb8fb57619d362437615f5e617cab906d0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E9=9F=A9=E7=BF=94=E5=AE=87?= Date: Sat, 28 Feb 2026 12:35:05 +0800 Subject: [PATCH 3/3] fix(adapters): omit messageDelta in usage-only chunk to avoid overwriting stopReason Remove explicit stopReason: null from the usage-only message_delta yield to prevent overwriting a previously set stopReason. This aligns with the pattern used in openai-responses.ts for usage-only events. Co-Authored-By: Claude Opus 4.6 --- backend/src/adapters/upstream/openai.ts | 1 - 1 file changed, 1 deletion(-) diff --git a/backend/src/adapters/upstream/openai.ts b/backend/src/adapters/upstream/openai.ts index d144d95..64c935c 100644 --- a/backend/src/adapters/upstream/openai.ts +++ b/backend/src/adapters/upstream/openai.ts @@ -514,7 +514,6 @@ export const openaiUpstreamAdapter: UpstreamAdapter = { if (data.usage) { yield { type: "message_delta", - messageDelta: { stopReason: null }, usage: { inputTokens: data.usage.prompt_tokens, outputTokens: data.usage.completion_tokens,