From b90b3be68c3f3e2eb2eb8516e0c9f15bd2e3522c Mon Sep 17 00:00:00 2001 From: Harrison Conlin Date: Tue, 19 May 2026 18:51:24 +1000 Subject: [PATCH 1/4] fix(extractUsageCostDetails): add upstream_inference_cost fallback for total cost MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The previous implementation used a single safeCost() call wrapping a ?? chain: safeCost(details.total_cost ?? usage?.cost ?? usage?.estimated_cost). This had two problems: - upstream_inference_cost was not considered at all - The entire ?? chain was validated as one, so safeCost() could not distinguish which source provided the value Restructure into an explicit three-step fallback, each validated independently: 1. cost_details.total_cost (LLM gateway, most detailed) 2. usage.cost (direct cost for OpenRouter-style providers) 3. cost_details.upstream_inference_cost (upstream OpenRouter-style cost) Steps 2→3 use || (falsy coalescing) to handle the OpenRouter quirk where usage.cost and/or upstream_inference_cost may be populated depending on the upstream provider and key type used (e.g. BYOK keys report 0 for usage.cost, but upstream_inference_cost carries the actual provider cost). --- packages/backend/src/utils/usage-normalizer.ts | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) diff --git a/packages/backend/src/utils/usage-normalizer.ts b/packages/backend/src/utils/usage-normalizer.ts index 714bcde8..0e040937 100644 --- a/packages/backend/src/utils/usage-normalizer.ts +++ b/packages/backend/src/utils/usage-normalizer.ts @@ -53,8 +53,21 @@ export function extractUsageCostDetails(usage: any): ProviderCostDetails | null const details = usage?.cost_details; if (!details || typeof details !== 'object') return null; - // Validate that at least one cost field is a valid number - const totalCost = safeCost(details.total_cost ?? usage?.cost ?? usage?.estimated_cost); + // Determine total cost: + // 1. cost_details.total_cost + // 2. usage.cost or usage.estimated_cost (standard path) + // 3. cost_details.upstream_inference_cost (OpenRouter quirk) + let totalCost = safeCost(details.total_cost); + + const costFromUsage = safeCost(usage?.cost ?? usage?.estimated_cost); + const upstreamInferenceCost = safeCost(details.upstream_inference_cost); + + if (totalCost === null) { + // || not ?? — BYOK keys report usage.cost=0 (Plexus charges nothing), so a + // falsy 0 should fall through to upstreamInferenceCost which carries the + // actual provider cost. + totalCost = costFromUsage || upstreamInferenceCost; + } if (totalCost === null) return null; return { From d65d340f00c5b07581232727e68063df4184a3ca Mon Sep 17 00:00:00 2001 From: Harrison Conlin Date: Tue, 19 May 2026 18:52:25 +1000 Subject: [PATCH 2/4] refactor(extractUsageCostDetails): keep upstream cost fields separate from standard fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously upstream_inference_prompt_cost was aliased directly into input_cost. However, these fields have different semantics: upstream_inference_prompt_cost = input_cost + cached_input_cost (i.e., the combined prompt cost including cached tokens) Aliasing it into input_cost caused applyUsageCostDetails to zero out costCached, silently merging the cached portion into costInput. Changes: - Stop aliasing upstream_inference_prompt_cost → input_cost and upstream_inference_completions_cost → output_cost - Add same-tier aliasing: upstream_inference_input_cost → upstream_inference_prompt_cost and upstream_inference_output_cost → upstream_inference_completions_cost (same semantics, different provider naming conventions) - Update extraction tests to assert upstream fields stay separate - Add tests for BYOK fallback, Responses API input/output variants, and LLM Gateway field priority --- .../src/utils/__tests__/provider-cost.test.ts | 140 ++++++++++++++---- .../backend/src/utils/usage-normalizer.ts | 15 +- 2 files changed, 124 insertions(+), 31 deletions(-) diff --git a/packages/backend/src/utils/__tests__/provider-cost.test.ts b/packages/backend/src/utils/__tests__/provider-cost.test.ts index f314d77b..1a8adbfd 100644 --- a/packages/backend/src/utils/__tests__/provider-cost.test.ts +++ b/packages/backend/src/utils/__tests__/provider-cost.test.ts @@ -124,42 +124,43 @@ describe('applyProviderReportedCost', () => { describe('extractUsageCostDetails', () => { test('extracts cost_details from the new usage format', () => { + // Real response: glm-5.1 via LLM Gateway (has both gateway and upstream fields) const usage = { - prompt_tokens: 23, - total_tokens: 66, - completion_tokens: 43, - estimated_cost: 0.00017465, + prompt_tokens: 90122, + completion_tokens: 104, + total_tokens: 90226, + cost: 0.022101624, prompt_tokens_details: { - cached_tokens: 0, + cached_tokens: 89536, cache_write_tokens: 0, + audio_tokens: 0, + video_tokens: 0, + image_tokens: 0, }, - cost: 0.00017465, cost_details: { - upstream_inference_cost: 0.00017465, - upstream_inference_prompt_cost: 0.00002415, - upstream_inference_completions_cost: 0.0001505, - total_cost: 0.00017465, - input_cost: 0.00002415, - output_cost: 0.0001505, - cached_input_cost: 0, + upstream_inference_cost: 0.022101624, + upstream_inference_prompt_cost: 0.021689784, + upstream_inference_completions_cost: 0.00041184, + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, cache_write_input_cost: 0, request_cost: 0, web_search_cost: 0, image_input_cost: null, image_output_cost: null, audio_input_cost: null, - data_storage_cost: 0.00000106, }, }; const result = extractUsageCostDetails(usage); expect(result).not.toBeNull(); - expect(result!.total_cost).toBe(0.00017465); - expect(result!.input_cost).toBe(0.00002415); - expect(result!.output_cost).toBe(0.0001505); - expect(result!.cached_input_cost).toBe(0); + expect(result!.total_cost).toBe(0.022101624); + expect(result!.input_cost).toBe(0.00073836); + expect(result!.output_cost).toBe(0.00041184); + expect(result!.cached_input_cost).toBe(0.020951424); expect(result!.cache_write_input_cost).toBe(0); - expect(result!.data_storage_cost).toBe(0.00000106); }); test('falls back to usage.cost when cost_details.total_cost is missing', () => { @@ -223,25 +224,40 @@ describe('extractUsageCostDetails', () => { expect(extractUsageCostDetails(undefined)).toBeNull(); }); - test('maps upstream_inference_prompt_cost as fallback for input_cost', () => { + test('keeps upstream prompt/completions fields separate from input_cost/output_cost', () => { + // Real response: normal-tier (no gateway input_cost/output_cost fields) const usage = { - cost: 0.01, + completion_tokens: 2177, + cost: 0.00435825, cost_details: { - upstream_inference_prompt_cost: 0.003, - upstream_inference_completions_cost: 0.007, + upstream_inference_completions_cost: 0.004354, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 4.25e-6, }, + is_byok: false, + prompt_tokens: 17, + prompt_tokens_details: { cached_tokens: 0 }, }; const result = extractUsageCostDetails(usage); - expect(result!.input_cost).toBe(0.003); - expect(result!.output_cost).toBe(0.007); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.00435825); + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + expect(result!.upstream_inference_prompt_cost).toBe(4.25e-6); + expect(result!.upstream_inference_completions_cost).toBe(0.004354); }); test('preserves null values for optional cost fields', () => { + // Real response: LLM Gateway — image/audio costs null for text-only models const usage = { - cost: 0.01, + cost: 0.022101624, cost_details: { - total_cost: 0.01, + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, + cache_write_input_cost: 0, image_input_cost: null, image_output_cost: null, audio_input_cost: null, @@ -254,6 +270,76 @@ describe('extractUsageCostDetails', () => { expect(result!.audio_input_cost).toBeNull(); }); + test('uses upstream_inference_cost as total when usage.cost is 0 (BYOK)', () => { + // Real response: BYOK — Plexus charges $0, actual cost reported in upstream_inference_cost + const usage = { + completion_tokens: 91, + cost: 0, + cost_details: { + upstream_inference_completions_cost: 0.0002275, + upstream_inference_cost: 0.0003253, + upstream_inference_prompt_cost: 9.78e-5, + }, + is_byok: true, + prompt_tokens: 326, + prompt_tokens_details: { cached_tokens: 0 }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.0003253); + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + expect(result!.upstream_inference_prompt_cost).toBe(9.78e-5); + expect(result!.upstream_inference_completions_cost).toBe(0.0002275); + }); + + test('aliases upstream_inference_input/output_cost to prompt/completions (Responses API)', () => { + // Real response: OpenAI Responses API uses _input/_output suffix rather than _prompt/_completions + const usage = { + input_tokens: 78, + input_tokens_details: { cached_tokens: 0 }, + output_tokens: 37, + total_tokens: 115, + cost: 0.0000113, + is_byok: false, + cost_details: { + upstream_inference_cost: null, + upstream_inference_input_cost: 0.0000039, + upstream_inference_output_cost: 0.0000074, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.0000113); + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + expect(result!.upstream_inference_prompt_cost).toBe(0.0000039); + expect(result!.upstream_inference_completions_cost).toBe(0.0000074); + }); + + test('uses input_cost/output_cost directly when present alongside upstream fields', () => { + // Real response: LLM Gateway includes both gateway fields (input_cost/output_cost/cached_input_cost) + // and upstream fields (upstream_inference_prompt/completions_cost); gateway fields take priority + const usage = { + cost: 0.022101624, + cost_details: { + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, + upstream_inference_prompt_cost: 0.021689784, + upstream_inference_completions_cost: 0.00041184, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result!.input_cost).toBe(0.00073836); + expect(result!.output_cost).toBe(0.00041184); + expect(result!.cached_input_cost).toBe(0.020951424); + }); + test('returns null for negative total_cost', () => { const usage = { cost_details: { diff --git a/packages/backend/src/utils/usage-normalizer.ts b/packages/backend/src/utils/usage-normalizer.ts index 0e040937..a5302f04 100644 --- a/packages/backend/src/utils/usage-normalizer.ts +++ b/packages/backend/src/utils/usage-normalizer.ts @@ -72,13 +72,20 @@ export function extractUsageCostDetails(usage: any): ProviderCostDetails | null return { total_cost: totalCost, - input_cost: safeCost(details.input_cost ?? details.upstream_inference_prompt_cost), - output_cost: safeCost(details.output_cost ?? details.upstream_inference_completions_cost), + // upstream_inference_prompt_cost includes cached tokens (input_cost + cached_input_cost), + // so it can't be mapped directly to input_cost. The upstream fields are preserved + // here and dispatched separately in applyUsageCostDetails(). + input_cost: safeCost(details.input_cost), + output_cost: safeCost(details.output_cost), cached_input_cost: safeCost(details.cached_input_cost), cache_write_input_cost: safeCost(details.cache_write_input_cost), upstream_inference_cost: safeCost(details.upstream_inference_cost), - upstream_inference_prompt_cost: safeCost(details.upstream_inference_prompt_cost), - upstream_inference_completions_cost: safeCost(details.upstream_inference_completions_cost), + upstream_inference_prompt_cost: safeCost( + details.upstream_inference_prompt_cost ?? details.upstream_inference_input_cost + ), + upstream_inference_completions_cost: safeCost( + details.upstream_inference_completions_cost ?? details.upstream_inference_output_cost + ), request_cost: safeCost(details.request_cost), web_search_cost: safeCost(details.web_search_cost), image_input_cost: safeCost(details.image_input_cost), From 6c63c2aec94813ed3a2f6a0a43b099996f1f9793 Mon Sep 17 00:00:00 2001 From: Harrison Conlin Date: Tue, 19 May 2026 18:54:46 +1000 Subject: [PATCH 3/4] feat(applyUsageCostDetails): add three-tier cost dispatch MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously applyUsageCostDetails only had two branches: superset (per-bucket breakdown) and minimal (proportional distribution). Now that extractUsageCostDetails no longer aliases normal-tier upstream_inference_prompt_cost into input_cost, the normal-tier case needs explicit handling. Three tiers: 1. Superset: input_cost/cached_input_cost/cache_write_input_cost present → use per-bucket breakdown directly 2. Normal: upstream_inference_prompt_cost/completions_cost present but no input-side superset fields → use upstream prompt/completions split, then distribute the prompt portion by Plexus's own cache ratio 3. Minimal: no breakdown at all → proportional distribution from previously calculated costs --- .../src/utils/__tests__/provider-cost.test.ts | 128 +++++++++++++----- packages/backend/src/utils/provider-cost.ts | 43 +++++- 2 files changed, 130 insertions(+), 41 deletions(-) diff --git a/packages/backend/src/utils/__tests__/provider-cost.test.ts b/packages/backend/src/utils/__tests__/provider-cost.test.ts index 1a8adbfd..8e48c9f8 100644 --- a/packages/backend/src/utils/__tests__/provider-cost.test.ts +++ b/packages/backend/src/utils/__tests__/provider-cost.test.ts @@ -232,7 +232,7 @@ describe('extractUsageCostDetails', () => { cost_details: { upstream_inference_completions_cost: 0.004354, upstream_inference_cost: null, - upstream_inference_prompt_cost: 4.25e-6, + upstream_inference_prompt_cost: 4.25e-06, }, is_byok: false, prompt_tokens: 17, @@ -244,7 +244,7 @@ describe('extractUsageCostDetails', () => { expect(result!.total_cost).toBe(0.00435825); expect(result!.input_cost).toBeNull(); expect(result!.output_cost).toBeNull(); - expect(result!.upstream_inference_prompt_cost).toBe(4.25e-6); + expect(result!.upstream_inference_prompt_cost).toBe(4.25e-06); expect(result!.upstream_inference_completions_cost).toBe(0.004354); }); @@ -278,7 +278,7 @@ describe('extractUsageCostDetails', () => { cost_details: { upstream_inference_completions_cost: 0.0002275, upstream_inference_cost: 0.0003253, - upstream_inference_prompt_cost: 9.78e-5, + upstream_inference_prompt_cost: 9.78e-05, }, is_byok: true, prompt_tokens: 326, @@ -290,7 +290,7 @@ describe('extractUsageCostDetails', () => { expect(result!.total_cost).toBe(0.0003253); expect(result!.input_cost).toBeNull(); expect(result!.output_cost).toBeNull(); - expect(result!.upstream_inference_prompt_cost).toBe(9.78e-5); + expect(result!.upstream_inference_prompt_cost).toBe(9.78e-05); expect(result!.upstream_inference_completions_cost).toBe(0.0002275); }); @@ -352,37 +352,38 @@ describe('extractUsageCostDetails', () => { }); describe('applyUsageCostDetails', () => { - test('overrides costs with provider cost_details breakdown', () => { + test('applies gateway input/output/cached costs directly when full breakdown is present', () => { const record = createUsageRecord(); + // Extracted from: glm-5.1 via LLM Gateway (real response) const costDetails: ProviderCostDetails = { - total_cost: 0.00017465, - input_cost: 0.00002415, - output_cost: 0.0001505, - cached_input_cost: 0, + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, cache_write_input_cost: 0, - upstream_inference_cost: 0.00017465, - upstream_inference_prompt_cost: 0.00002415, - upstream_inference_completions_cost: 0.0001505, + upstream_inference_cost: 0.022101624, + upstream_inference_prompt_cost: 0.021689784, + upstream_inference_completions_cost: 0.00041184, request_cost: 0, web_search_cost: 0, image_input_cost: null, image_output_cost: null, audio_input_cost: null, - data_storage_cost: 0.00000106, + data_storage_cost: null, }; applyUsageCostDetails(record, costDetails); - expect(record.costTotal).toBe(0.00017465); + expect(record.costTotal).toBeCloseTo(0.022101624, 8); expect(record.costSource).toBe('provider_reported'); - expect(record.providerReportedCost).toBe(0.00017465); - expect(record.costInput).toBe(0.00002415); - expect(record.costOutput).toBe(0.0001505); - expect(record.costCached).toBe(0); + expect(record.providerReportedCost).toBe(0.022101624); + expect(record.costInput).toBe(0.00073836); + expect(record.costOutput).toBe(0.00041184); + expect(record.costCached).toBeCloseTo(0.020951424, 8); expect(record.costCacheWrite).toBe(0); }); - test('falls back to proportional distribution when no breakdown available', () => { + test('falls back to proportional distribution when no cost breakdown available', () => { const record = createUsageRecord(); // costInput=0.001, costOutput=0.002, costCached=0.0005, total=0.0035 const costDetails: ProviderCostDetails = { @@ -411,7 +412,7 @@ describe('applyUsageCostDetails', () => { expect(record.costCached).toBeCloseTo((0.0005 / 0.0035) * 0.007, 8); }); - test('attributes full cost to input when no breakdown and no prior costs', () => { + test('attributes full cost to input when no cost breakdown and no prior costs', () => { const record = createUsageRecord({ costInput: 0, costOutput: 0, @@ -445,7 +446,69 @@ describe('applyUsageCostDetails', () => { expect(record.costCacheWrite).toBe(0); }); - test('uses partial breakdown — only input_cost provided', () => { + test('splits upstream prompt cost between input and cached using existing cost ratio', () => { + const record = createUsageRecord(); + // createUsageRecord defaults: costInput=0.001, costCached=0.0005 + // Prompt ratio: input=0.001/(0.001+0.0005)=2/3, cached=0.0005/(0.001+0.0005)=1/3 + // Extracted from: z-ai/glm-5-turbo-20260315 (real response, cached_tokens=128/173 prompt tokens) + const costDetails: ProviderCostDetails = { + total_cost: 0.00021672, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: 0.00021672, + upstream_inference_prompt_cost: 0.00008472, + upstream_inference_completions_cost: 0.000132, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00021672); + expect(record.costSource).toBe('provider_reported'); + expect(record.costOutput).toBe(0.000132); + // Prompt (0.00008472) split by record ratio: input=2/3, cached=1/3 + expect(record.costInput).toBeCloseTo((2 / 3) * 0.00008472, 8); + expect(record.costCached).toBeCloseTo((1 / 3) * 0.00008472, 8); + expect(record.costCacheWrite).toBe(0); + }); + + test('attributes full upstream prompt cost to input when no cached tokens', () => { + const record = createUsageRecord({ costCached: 0, costCacheWrite: 0, costTotal: 0.003 }); + // Extracted from: normal-tier real response (cached_tokens=0) + const costDetails: ProviderCostDetails = { + total_cost: 0.00435825, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 4.25e-06, + upstream_inference_completions_cost: 0.004354, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00435825); + expect(record.costOutput).toBe(0.004354); + expect(record.costInput).toBe(4.25e-06); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('uses partial gateway breakdown when only some per-bucket costs are available', () => { const record = createUsageRecord(); const costDetails: ProviderCostDetails = { total_cost: 0.005, @@ -481,9 +544,6 @@ describe('applyUsageCostDetails', () => { output_cost: 0.002, cached_input_cost: null, cache_write_input_cost: null, - upstream_inference_cost: null, - upstream_inference_prompt_cost: null, - upstream_inference_completions_cost: null, request_cost: null, web_search_cost: null, image_input_cost: null, @@ -514,15 +574,12 @@ describe('applyUsageCostDetails', () => { output_cost: 0.0001505, cached_input_cost: 0, cache_write_input_cost: 0, - upstream_inference_cost: 0.00017465, - upstream_inference_prompt_cost: 0.00002415, - upstream_inference_completions_cost: 0.0001505, request_cost: 0, web_search_cost: 0, - image_input_cost: null, - image_output_cost: null, - audio_input_cost: null, - data_storage_cost: 0.00000106, + image_input_cost: 0, + image_output_cost: 0, + audio_input_cost: 0, + data_storage_cost: 0, }; applyUsageCostDetails(record, costDetails); @@ -542,14 +599,11 @@ describe('applyUsageCostDetails', () => { output_cost: 0, cached_input_cost: 0, cache_write_input_cost: 0, - upstream_inference_cost: 0, - upstream_inference_prompt_cost: 0, - upstream_inference_completions_cost: 0, request_cost: 0, web_search_cost: 0, - image_input_cost: null, - image_output_cost: null, - audio_input_cost: null, + image_input_cost: 0, + image_output_cost: 0, + audio_input_cost: 0, data_storage_cost: 0, }; diff --git a/packages/backend/src/utils/provider-cost.ts b/packages/backend/src/utils/provider-cost.ts index e98b74b1..9d3d3b08 100644 --- a/packages/backend/src/utils/provider-cost.ts +++ b/packages/backend/src/utils/provider-cost.ts @@ -90,20 +90,55 @@ export function applyUsageCostDetails( usageRecord.costSource = 'provider_reported'; usageRecord.providerReportedCost = totalCost; - // Use the detailed cost breakdown when available + // Three tiers of provider cost reporting: + // 1. Superset: explicit per-bucket breakdown (input_cost, output_cost, cached_input_cost, cache_write_input_cost) + // 2. Normal: upstream_inference_prompt_cost/completions_cost split, but no cache granularity + // 3. Minimal: no breakdown at all — distribute proportionally from previously calculated costs + const inputCost = costDetails.input_cost; const outputCost = costDetails.output_cost; const cachedCost = costDetails.cached_input_cost; const cacheWriteCost = costDetails.cache_write_input_cost; - if (inputCost !== null || outputCost !== null || cachedCost !== null || cacheWriteCost !== null) { - // Provider gave us an explicit per-bucket breakdown — use it directly + if (inputCost !== null || cachedCost !== null || cacheWriteCost !== null) { + // Superset: provider gave us an explicit per-bucket breakdown — use it directly + // Note: output_cost alone being non-null is not enough to identify superset; + // it's also reported by normal-tier as upstream_inference_completions_cost. + // Check the input-side fields (which normal-tier does not report separately). usageRecord.costInput = Number((inputCost ?? 0).toFixed(8)); usageRecord.costOutput = Number((outputCost ?? 0).toFixed(8)); usageRecord.costCached = Number((cachedCost ?? 0).toFixed(8)); usageRecord.costCacheWrite = Number((cacheWriteCost ?? 0).toFixed(8)); + } else if ( + costDetails.upstream_inference_prompt_cost != null || + costDetails.upstream_inference_completions_cost != null + ) { + // Normal: upstream gave us prompt vs completions split, but no cache granularity. + // Use the upstream split for the input vs output totals, then preserve Plexus's + // own calculated ratio within the prompt portion for cache/non-cache distribution. + const promptTotal = costDetails.upstream_inference_prompt_cost ?? 0; + const completionsTotal = costDetails.upstream_inference_completions_cost ?? 0; + + usageRecord.costOutput = Number((completionsTotal ?? 0).toFixed(8)); + + // Split the prompt portion by Plexus's own input/cached/cacheWrite ratio + const prevInput = usageRecord.costInput || 0; + const prevCached = usageRecord.costCached || 0; + const prevCacheWrite = usageRecord.costCacheWrite || 0; + const prevPromptTotal = prevInput + prevCached + prevCacheWrite; + + if (prevPromptTotal > 0) { + usageRecord.costInput = Number(((prevInput / prevPromptTotal) * promptTotal).toFixed(8)); + usageRecord.costCached = Number(((prevCached / prevPromptTotal) * promptTotal).toFixed(8)); + usageRecord.costCacheWrite = Number(((prevCacheWrite / prevPromptTotal) * promptTotal).toFixed(8)); + } else { + // No prior breakdown — attribute full prompt cost to input + usageRecord.costInput = Number(promptTotal.toFixed(8)); + usageRecord.costCached = 0; + usageRecord.costCacheWrite = 0; + } } else { - // No breakdown — distribute proportionally like we do for SSE `: cost` comments + // Minimal: no breakdown — distribute proportionally from previously calculated costs const prevInputCost = usageRecord.costInput || 0; const prevOutputCost = usageRecord.costOutput || 0; const prevCachedCost = usageRecord.costCached || 0; From 768cef8e56e312ad62dc000a64e34c209eaf4bff Mon Sep 17 00:00:00 2001 From: Harrison Conlin Date: Wed, 20 May 2026 10:33:23 +1000 Subject: [PATCH 4/4] test(provider-cost): expand coverage for edge cases and end-to-end flows Add tests for: zero-cost non-BYOK requests, OpenRouter markup (cost >> upstream sum), zero prompt tokens, heavy-cache-hit ratio split, end-to-end BYOK and non-BYOK extract+apply flows. Also normalise scientific notation literals and add missing upstream_inference_* fields to existing superset fixtures. Co-Authored-By: Claude Sonnet 4.6 --- .../src/utils/__tests__/provider-cost.test.ts | 219 +++++++++++++++++- 1 file changed, 213 insertions(+), 6 deletions(-) diff --git a/packages/backend/src/utils/__tests__/provider-cost.test.ts b/packages/backend/src/utils/__tests__/provider-cost.test.ts index 8e48c9f8..5449531e 100644 --- a/packages/backend/src/utils/__tests__/provider-cost.test.ts +++ b/packages/backend/src/utils/__tests__/provider-cost.test.ts @@ -232,7 +232,7 @@ describe('extractUsageCostDetails', () => { cost_details: { upstream_inference_completions_cost: 0.004354, upstream_inference_cost: null, - upstream_inference_prompt_cost: 4.25e-06, + upstream_inference_prompt_cost: 4.25e-6, }, is_byok: false, prompt_tokens: 17, @@ -244,7 +244,7 @@ describe('extractUsageCostDetails', () => { expect(result!.total_cost).toBe(0.00435825); expect(result!.input_cost).toBeNull(); expect(result!.output_cost).toBeNull(); - expect(result!.upstream_inference_prompt_cost).toBe(4.25e-06); + expect(result!.upstream_inference_prompt_cost).toBe(4.25e-6); expect(result!.upstream_inference_completions_cost).toBe(0.004354); }); @@ -278,7 +278,7 @@ describe('extractUsageCostDetails', () => { cost_details: { upstream_inference_completions_cost: 0.0002275, upstream_inference_cost: 0.0003253, - upstream_inference_prompt_cost: 9.78e-05, + upstream_inference_prompt_cost: 9.78e-5, }, is_byok: true, prompt_tokens: 326, @@ -290,7 +290,7 @@ describe('extractUsageCostDetails', () => { expect(result!.total_cost).toBe(0.0003253); expect(result!.input_cost).toBeNull(); expect(result!.output_cost).toBeNull(); - expect(result!.upstream_inference_prompt_cost).toBe(9.78e-05); + expect(result!.upstream_inference_prompt_cost).toBe(9.78e-5); expect(result!.upstream_inference_completions_cost).toBe(0.0002275); }); @@ -340,6 +340,89 @@ describe('extractUsageCostDetails', () => { expect(result!.cached_input_cost).toBe(0.020951424); }); + test('returns null when cost is 0 and upstream_inference_cost is null (non-BYOK zero-cost)', () => { + // Real response: stream_error — non-BYOK request that genuinely cost $0. + // The || fallback in total cost detection causes 0 || null → null, so extract + // returns null. This is acceptable: zero-cost requests have nothing to report. + const usage = { + prompt_tokens: 43, + completion_tokens: 10, + total_tokens: 53, + cost: 0, + is_byok: false, + prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0 }, + cost_details: { + upstream_inference_cost: null, + upstream_inference_prompt_cost: 0, + upstream_inference_completions_cost: 0, + }, + completion_tokens_details: { reasoning_tokens: 11, image_tokens: 0 }, + }; + + expect(extractUsageCostDetails(usage)).toBeNull(); + }); + + test('handles cost much larger than upstream sum (OpenRouter markup)', () => { + // Real response: file_annotation — OpenRouter's cost includes provider overhead/markup + // that is not reflected in the upstream_inference_prompt/completions_cost fields. + // cost ($0.00216775) is ~13x the upstream sum ($0.00016775). + const usage = { + completion_tokens: 80, + completion_tokens_details: { image_tokens: 0, reasoning_tokens: 64 }, + cost: 0.00216775, + cost_details: { + upstream_inference_completions_cost: 0.00016, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 7.75e-6, + }, + is_byok: false, + prompt_tokens: 31, + prompt_tokens_details: { audio_tokens: 0, cached_tokens: 0, video_tokens: 0 }, + total_tokens: 111, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + // total_cost comes from usage.cost (not upstream sum) + expect(result!.total_cost).toBe(0.00216775); + // upstream fields preserved separately + expect(result!.upstream_inference_prompt_cost).toBe(7.75e-6); + expect(result!.upstream_inference_completions_cost).toBe(0.00016); + // no gateway fields + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + }); + + test('handles zero prompt tokens with all cost on completions', () => { + // Real response: video_url_public_api — prompt_tokens=0, all cost on output side. + // upstream_inference_prompt_cost=0, upstream_inference_cost equals cost. + const usage = { + completion_tokens: 180, + completion_tokens_details: { image_tokens: 0, reasoning_tokens: 0 }, + cost: 0.00045, + cost_details: { + upstream_inference_completions_cost: 0.00045, + upstream_inference_cost: 0.00045, + upstream_inference_prompt_cost: 0, + }, + is_byok: false, + prompt_tokens: 0, + prompt_tokens_details: { + audio_tokens: 0, + cache_write_tokens: 0, + cached_tokens: 0, + video_tokens: 0, + }, + total_tokens: 180, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.00045); + expect(result!.upstream_inference_prompt_cost).toBe(0); + expect(result!.upstream_inference_completions_cost).toBe(0.00045); + }); + test('returns null for negative total_cost', () => { const usage = { cost_details: { @@ -479,6 +562,45 @@ describe('applyUsageCostDetails', () => { expect(record.costCacheWrite).toBe(0); }); + test('splits upstream prompt cost by ratio when upstream_inference_cost is null (heavy cache hit)', () => { + // Real response: x-ai/grok-4 via OpenRouter — 679/687 prompt tokens cached. + // upstream_inference_cost is null; total comes from usage.cost instead. + // Prior costs use token-proportional amounts: costInput=0.00008 (8 tokens), + // costCached=0.00679 (679 tokens), prevPromptTotal=0.00687. + const record = createUsageRecord({ + costInput: 0.00008, + costCached: 0.00679, + costCacheWrite: 0, + costTotal: 0.00687, + }); + const costDetails: ProviderCostDetails = { + total_cost: 0.00333825, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 0.00053325, + upstream_inference_completions_cost: 0.002805, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00333825); + expect(record.costSource).toBe('provider_reported'); + expect(record.costOutput).toBe(0.002805); + // Prompt (0.00053325) split by prior ratio: input=0.00008/0.00687, cached=0.00679/0.00687 + expect(record.costInput).toBeCloseTo((0.00008 / 0.00687) * 0.00053325, 8); + expect(record.costCached).toBeCloseTo((0.00679 / 0.00687) * 0.00053325, 8); + expect(record.costCacheWrite).toBe(0); + }); + test('attributes full upstream prompt cost to input when no cached tokens', () => { const record = createUsageRecord({ costCached: 0, costCacheWrite: 0, costTotal: 0.003 }); // Extracted from: normal-tier real response (cached_tokens=0) @@ -489,7 +611,7 @@ describe('applyUsageCostDetails', () => { cached_input_cost: null, cache_write_input_cost: null, upstream_inference_cost: null, - upstream_inference_prompt_cost: 4.25e-06, + upstream_inference_prompt_cost: 4.25e-6, upstream_inference_completions_cost: 0.004354, request_cost: null, web_search_cost: null, @@ -503,11 +625,87 @@ describe('applyUsageCostDetails', () => { expect(record.costTotal).toBe(0.00435825); expect(record.costOutput).toBe(0.004354); - expect(record.costInput).toBe(4.25e-06); + expect(record.costInput).toBe(4.25e-6); expect(record.costCached).toBe(0); expect(record.costCacheWrite).toBe(0); }); + test('end-to-end BYOK: extract + apply uses upstream cost when usage.cost is 0', () => { + // Real response: google_nested_schema BYOK — cost=0, real cost in upstream_inference_cost. + // extractUsageCostDetails picks upstream_inference_cost as total; + // applyUsageCostDetails hits the normal-tier branch (no gateway fields, only upstream). + const usage = { + completion_tokens: 91, + cost: 0, + cost_details: { + upstream_inference_completions_cost: 0.0002275, + upstream_inference_cost: 0.0003253, + upstream_inference_prompt_cost: 9.78e-5, + }, + is_byok: true, + prompt_tokens: 326, + prompt_tokens_details: { cached_tokens: 0 }, + }; + + const extracted = extractUsageCostDetails(usage); + expect(extracted).not.toBeNull(); + expect(extracted!.total_cost).toBe(0.0003253); + + // Record has no prior cost breakdown (fresh record from a BYOK provider) + const record = createUsageRecord({ + costInput: 0, + costOutput: 0, + costCached: 0, + costCacheWrite: 0, + costTotal: 0, + }); + applyUsageCostDetails(record, extracted!); + + expect(record.costTotal).toBe(0.0003253); + expect(record.costSource).toBe('provider_reported'); + // Normal-tier: output from upstream, full prompt portion to input (no cached tokens in record) + expect(record.costOutput).toBe(0.0002275); + expect(record.costInput).toBe(9.78e-5); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('end-to-end non-BYOK normal-tier: extract + apply', () => { + // Real response: usage.yaml second interaction — cost=0.00435825, only upstream fields. + // upstream_inference_cost is null (not BYOK), total comes from usage.cost. + const usage = { + completion_tokens: 2177, + cost: 0.00435825, + cost_details: { + upstream_inference_completions_cost: 0.004354, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 4.25e-6, + }, + is_byok: false, + prompt_tokens: 17, + prompt_tokens_details: { cached_tokens: 0 }, + }; + + const extracted = extractUsageCostDetails(usage); + expect(extracted).not.toBeNull(); + expect(extracted!.total_cost).toBe(0.00435825); + + // Record with no prior breakdown + const record = createUsageRecord({ + costInput: 0, + costOutput: 0, + costCached: 0, + costCacheWrite: 0, + costTotal: 0, + }); + applyUsageCostDetails(record, extracted!); + + expect(record.costTotal).toBe(0.00435825); + expect(record.costOutput).toBe(0.004354); + expect(record.costInput).toBe(4.25e-6); + expect(record.costCached).toBe(0); + }); + test('uses partial gateway breakdown when only some per-bucket costs are available', () => { const record = createUsageRecord(); const costDetails: ProviderCostDetails = { @@ -550,6 +748,9 @@ describe('applyUsageCostDetails', () => { image_output_cost: null, audio_input_cost: null, data_storage_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, }; applyUsageCostDetails(record, costDetails); @@ -580,6 +781,9 @@ describe('applyUsageCostDetails', () => { image_output_cost: 0, audio_input_cost: 0, data_storage_cost: 0, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, }; applyUsageCostDetails(record, costDetails); @@ -605,6 +809,9 @@ describe('applyUsageCostDetails', () => { image_output_cost: 0, audio_input_cost: 0, data_storage_cost: 0, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, }; applyUsageCostDetails(record, costDetails);