diff --git a/packages/backend/src/utils/__tests__/provider-cost.test.ts b/packages/backend/src/utils/__tests__/provider-cost.test.ts index f314d77b..5449531e 100644 --- a/packages/backend/src/utils/__tests__/provider-cost.test.ts +++ b/packages/backend/src/utils/__tests__/provider-cost.test.ts @@ -124,42 +124,43 @@ describe('applyProviderReportedCost', () => { describe('extractUsageCostDetails', () => { test('extracts cost_details from the new usage format', () => { + // Real response: glm-5.1 via LLM Gateway (has both gateway and upstream fields) const usage = { - prompt_tokens: 23, - total_tokens: 66, - completion_tokens: 43, - estimated_cost: 0.00017465, + prompt_tokens: 90122, + completion_tokens: 104, + total_tokens: 90226, + cost: 0.022101624, prompt_tokens_details: { - cached_tokens: 0, + cached_tokens: 89536, cache_write_tokens: 0, + audio_tokens: 0, + video_tokens: 0, + image_tokens: 0, }, - cost: 0.00017465, cost_details: { - upstream_inference_cost: 0.00017465, - upstream_inference_prompt_cost: 0.00002415, - upstream_inference_completions_cost: 0.0001505, - total_cost: 0.00017465, - input_cost: 0.00002415, - output_cost: 0.0001505, - cached_input_cost: 0, + upstream_inference_cost: 0.022101624, + upstream_inference_prompt_cost: 0.021689784, + upstream_inference_completions_cost: 0.00041184, + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, cache_write_input_cost: 0, request_cost: 0, web_search_cost: 0, image_input_cost: null, image_output_cost: null, audio_input_cost: null, - data_storage_cost: 0.00000106, }, }; const result = extractUsageCostDetails(usage); expect(result).not.toBeNull(); - expect(result!.total_cost).toBe(0.00017465); - expect(result!.input_cost).toBe(0.00002415); - expect(result!.output_cost).toBe(0.0001505); - expect(result!.cached_input_cost).toBe(0); + expect(result!.total_cost).toBe(0.022101624); + expect(result!.input_cost).toBe(0.00073836); + expect(result!.output_cost).toBe(0.00041184); + expect(result!.cached_input_cost).toBe(0.020951424); expect(result!.cache_write_input_cost).toBe(0); - expect(result!.data_storage_cost).toBe(0.00000106); }); test('falls back to usage.cost when cost_details.total_cost is missing', () => { @@ -223,25 +224,40 @@ describe('extractUsageCostDetails', () => { expect(extractUsageCostDetails(undefined)).toBeNull(); }); - test('maps upstream_inference_prompt_cost as fallback for input_cost', () => { + test('keeps upstream prompt/completions fields separate from input_cost/output_cost', () => { + // Real response: normal-tier (no gateway input_cost/output_cost fields) const usage = { - cost: 0.01, + completion_tokens: 2177, + cost: 0.00435825, cost_details: { - upstream_inference_prompt_cost: 0.003, - upstream_inference_completions_cost: 0.007, + upstream_inference_completions_cost: 0.004354, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 4.25e-6, }, + is_byok: false, + prompt_tokens: 17, + prompt_tokens_details: { cached_tokens: 0 }, }; const result = extractUsageCostDetails(usage); - expect(result!.input_cost).toBe(0.003); - expect(result!.output_cost).toBe(0.007); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.00435825); + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + expect(result!.upstream_inference_prompt_cost).toBe(4.25e-6); + expect(result!.upstream_inference_completions_cost).toBe(0.004354); }); test('preserves null values for optional cost fields', () => { + // Real response: LLM Gateway — image/audio costs null for text-only models const usage = { - cost: 0.01, + cost: 0.022101624, cost_details: { - total_cost: 0.01, + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, + cache_write_input_cost: 0, image_input_cost: null, image_output_cost: null, audio_input_cost: null, @@ -254,6 +270,159 @@ describe('extractUsageCostDetails', () => { expect(result!.audio_input_cost).toBeNull(); }); + test('uses upstream_inference_cost as total when usage.cost is 0 (BYOK)', () => { + // Real response: BYOK — Plexus charges $0, actual cost reported in upstream_inference_cost + const usage = { + completion_tokens: 91, + cost: 0, + cost_details: { + upstream_inference_completions_cost: 0.0002275, + upstream_inference_cost: 0.0003253, + upstream_inference_prompt_cost: 9.78e-5, + }, + is_byok: true, + prompt_tokens: 326, + prompt_tokens_details: { cached_tokens: 0 }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.0003253); + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + expect(result!.upstream_inference_prompt_cost).toBe(9.78e-5); + expect(result!.upstream_inference_completions_cost).toBe(0.0002275); + }); + + test('aliases upstream_inference_input/output_cost to prompt/completions (Responses API)', () => { + // Real response: OpenAI Responses API uses _input/_output suffix rather than _prompt/_completions + const usage = { + input_tokens: 78, + input_tokens_details: { cached_tokens: 0 }, + output_tokens: 37, + total_tokens: 115, + cost: 0.0000113, + is_byok: false, + cost_details: { + upstream_inference_cost: null, + upstream_inference_input_cost: 0.0000039, + upstream_inference_output_cost: 0.0000074, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.0000113); + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + expect(result!.upstream_inference_prompt_cost).toBe(0.0000039); + expect(result!.upstream_inference_completions_cost).toBe(0.0000074); + }); + + test('uses input_cost/output_cost directly when present alongside upstream fields', () => { + // Real response: LLM Gateway includes both gateway fields (input_cost/output_cost/cached_input_cost) + // and upstream fields (upstream_inference_prompt/completions_cost); gateway fields take priority + const usage = { + cost: 0.022101624, + cost_details: { + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, + upstream_inference_prompt_cost: 0.021689784, + upstream_inference_completions_cost: 0.00041184, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result!.input_cost).toBe(0.00073836); + expect(result!.output_cost).toBe(0.00041184); + expect(result!.cached_input_cost).toBe(0.020951424); + }); + + test('returns null when cost is 0 and upstream_inference_cost is null (non-BYOK zero-cost)', () => { + // Real response: stream_error — non-BYOK request that genuinely cost $0. + // The || fallback in total cost detection causes 0 || null → null, so extract + // returns null. This is acceptable: zero-cost requests have nothing to report. + const usage = { + prompt_tokens: 43, + completion_tokens: 10, + total_tokens: 53, + cost: 0, + is_byok: false, + prompt_tokens_details: { cached_tokens: 0, audio_tokens: 0 }, + cost_details: { + upstream_inference_cost: null, + upstream_inference_prompt_cost: 0, + upstream_inference_completions_cost: 0, + }, + completion_tokens_details: { reasoning_tokens: 11, image_tokens: 0 }, + }; + + expect(extractUsageCostDetails(usage)).toBeNull(); + }); + + test('handles cost much larger than upstream sum (OpenRouter markup)', () => { + // Real response: file_annotation — OpenRouter's cost includes provider overhead/markup + // that is not reflected in the upstream_inference_prompt/completions_cost fields. + // cost ($0.00216775) is ~13x the upstream sum ($0.00016775). + const usage = { + completion_tokens: 80, + completion_tokens_details: { image_tokens: 0, reasoning_tokens: 64 }, + cost: 0.00216775, + cost_details: { + upstream_inference_completions_cost: 0.00016, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 7.75e-6, + }, + is_byok: false, + prompt_tokens: 31, + prompt_tokens_details: { audio_tokens: 0, cached_tokens: 0, video_tokens: 0 }, + total_tokens: 111, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + // total_cost comes from usage.cost (not upstream sum) + expect(result!.total_cost).toBe(0.00216775); + // upstream fields preserved separately + expect(result!.upstream_inference_prompt_cost).toBe(7.75e-6); + expect(result!.upstream_inference_completions_cost).toBe(0.00016); + // no gateway fields + expect(result!.input_cost).toBeNull(); + expect(result!.output_cost).toBeNull(); + }); + + test('handles zero prompt tokens with all cost on completions', () => { + // Real response: video_url_public_api — prompt_tokens=0, all cost on output side. + // upstream_inference_prompt_cost=0, upstream_inference_cost equals cost. + const usage = { + completion_tokens: 180, + completion_tokens_details: { image_tokens: 0, reasoning_tokens: 0 }, + cost: 0.00045, + cost_details: { + upstream_inference_completions_cost: 0.00045, + upstream_inference_cost: 0.00045, + upstream_inference_prompt_cost: 0, + }, + is_byok: false, + prompt_tokens: 0, + prompt_tokens_details: { + audio_tokens: 0, + cache_write_tokens: 0, + cached_tokens: 0, + video_tokens: 0, + }, + total_tokens: 180, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.00045); + expect(result!.upstream_inference_prompt_cost).toBe(0); + expect(result!.upstream_inference_completions_cost).toBe(0.00045); + }); + test('returns null for negative total_cost', () => { const usage = { cost_details: { @@ -266,37 +435,38 @@ describe('extractUsageCostDetails', () => { }); describe('applyUsageCostDetails', () => { - test('overrides costs with provider cost_details breakdown', () => { + test('applies gateway input/output/cached costs directly when full breakdown is present', () => { const record = createUsageRecord(); + // Extracted from: glm-5.1 via LLM Gateway (real response) const costDetails: ProviderCostDetails = { - total_cost: 0.00017465, - input_cost: 0.00002415, - output_cost: 0.0001505, - cached_input_cost: 0, + total_cost: 0.022101624, + input_cost: 0.00073836, + output_cost: 0.00041184, + cached_input_cost: 0.020951424, cache_write_input_cost: 0, - upstream_inference_cost: 0.00017465, - upstream_inference_prompt_cost: 0.00002415, - upstream_inference_completions_cost: 0.0001505, + upstream_inference_cost: 0.022101624, + upstream_inference_prompt_cost: 0.021689784, + upstream_inference_completions_cost: 0.00041184, request_cost: 0, web_search_cost: 0, image_input_cost: null, image_output_cost: null, audio_input_cost: null, - data_storage_cost: 0.00000106, + data_storage_cost: null, }; applyUsageCostDetails(record, costDetails); - expect(record.costTotal).toBe(0.00017465); + expect(record.costTotal).toBeCloseTo(0.022101624, 8); expect(record.costSource).toBe('provider_reported'); - expect(record.providerReportedCost).toBe(0.00017465); - expect(record.costInput).toBe(0.00002415); - expect(record.costOutput).toBe(0.0001505); - expect(record.costCached).toBe(0); + expect(record.providerReportedCost).toBe(0.022101624); + expect(record.costInput).toBe(0.00073836); + expect(record.costOutput).toBe(0.00041184); + expect(record.costCached).toBeCloseTo(0.020951424, 8); expect(record.costCacheWrite).toBe(0); }); - test('falls back to proportional distribution when no breakdown available', () => { + test('falls back to proportional distribution when no cost breakdown available', () => { const record = createUsageRecord(); // costInput=0.001, costOutput=0.002, costCached=0.0005, total=0.0035 const costDetails: ProviderCostDetails = { @@ -325,7 +495,7 @@ describe('applyUsageCostDetails', () => { expect(record.costCached).toBeCloseTo((0.0005 / 0.0035) * 0.007, 8); }); - test('attributes full cost to input when no breakdown and no prior costs', () => { + test('attributes full cost to input when no cost breakdown and no prior costs', () => { const record = createUsageRecord({ costInput: 0, costOutput: 0, @@ -359,7 +529,184 @@ describe('applyUsageCostDetails', () => { expect(record.costCacheWrite).toBe(0); }); - test('uses partial breakdown — only input_cost provided', () => { + test('splits upstream prompt cost between input and cached using existing cost ratio', () => { + const record = createUsageRecord(); + // createUsageRecord defaults: costInput=0.001, costCached=0.0005 + // Prompt ratio: input=0.001/(0.001+0.0005)=2/3, cached=0.0005/(0.001+0.0005)=1/3 + // Extracted from: z-ai/glm-5-turbo-20260315 (real response, cached_tokens=128/173 prompt tokens) + const costDetails: ProviderCostDetails = { + total_cost: 0.00021672, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: 0.00021672, + upstream_inference_prompt_cost: 0.00008472, + upstream_inference_completions_cost: 0.000132, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00021672); + expect(record.costSource).toBe('provider_reported'); + expect(record.costOutput).toBe(0.000132); + // Prompt (0.00008472) split by record ratio: input=2/3, cached=1/3 + expect(record.costInput).toBeCloseTo((2 / 3) * 0.00008472, 8); + expect(record.costCached).toBeCloseTo((1 / 3) * 0.00008472, 8); + expect(record.costCacheWrite).toBe(0); + }); + + test('splits upstream prompt cost by ratio when upstream_inference_cost is null (heavy cache hit)', () => { + // Real response: x-ai/grok-4 via OpenRouter — 679/687 prompt tokens cached. + // upstream_inference_cost is null; total comes from usage.cost instead. + // Prior costs use token-proportional amounts: costInput=0.00008 (8 tokens), + // costCached=0.00679 (679 tokens), prevPromptTotal=0.00687. + const record = createUsageRecord({ + costInput: 0.00008, + costCached: 0.00679, + costCacheWrite: 0, + costTotal: 0.00687, + }); + const costDetails: ProviderCostDetails = { + total_cost: 0.00333825, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 0.00053325, + upstream_inference_completions_cost: 0.002805, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00333825); + expect(record.costSource).toBe('provider_reported'); + expect(record.costOutput).toBe(0.002805); + // Prompt (0.00053325) split by prior ratio: input=0.00008/0.00687, cached=0.00679/0.00687 + expect(record.costInput).toBeCloseTo((0.00008 / 0.00687) * 0.00053325, 8); + expect(record.costCached).toBeCloseTo((0.00679 / 0.00687) * 0.00053325, 8); + expect(record.costCacheWrite).toBe(0); + }); + + test('attributes full upstream prompt cost to input when no cached tokens', () => { + const record = createUsageRecord({ costCached: 0, costCacheWrite: 0, costTotal: 0.003 }); + // Extracted from: normal-tier real response (cached_tokens=0) + const costDetails: ProviderCostDetails = { + total_cost: 0.00435825, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 4.25e-6, + upstream_inference_completions_cost: 0.004354, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00435825); + expect(record.costOutput).toBe(0.004354); + expect(record.costInput).toBe(4.25e-6); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('end-to-end BYOK: extract + apply uses upstream cost when usage.cost is 0', () => { + // Real response: google_nested_schema BYOK — cost=0, real cost in upstream_inference_cost. + // extractUsageCostDetails picks upstream_inference_cost as total; + // applyUsageCostDetails hits the normal-tier branch (no gateway fields, only upstream). + const usage = { + completion_tokens: 91, + cost: 0, + cost_details: { + upstream_inference_completions_cost: 0.0002275, + upstream_inference_cost: 0.0003253, + upstream_inference_prompt_cost: 9.78e-5, + }, + is_byok: true, + prompt_tokens: 326, + prompt_tokens_details: { cached_tokens: 0 }, + }; + + const extracted = extractUsageCostDetails(usage); + expect(extracted).not.toBeNull(); + expect(extracted!.total_cost).toBe(0.0003253); + + // Record has no prior cost breakdown (fresh record from a BYOK provider) + const record = createUsageRecord({ + costInput: 0, + costOutput: 0, + costCached: 0, + costCacheWrite: 0, + costTotal: 0, + }); + applyUsageCostDetails(record, extracted!); + + expect(record.costTotal).toBe(0.0003253); + expect(record.costSource).toBe('provider_reported'); + // Normal-tier: output from upstream, full prompt portion to input (no cached tokens in record) + expect(record.costOutput).toBe(0.0002275); + expect(record.costInput).toBe(9.78e-5); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('end-to-end non-BYOK normal-tier: extract + apply', () => { + // Real response: usage.yaml second interaction — cost=0.00435825, only upstream fields. + // upstream_inference_cost is null (not BYOK), total comes from usage.cost. + const usage = { + completion_tokens: 2177, + cost: 0.00435825, + cost_details: { + upstream_inference_completions_cost: 0.004354, + upstream_inference_cost: null, + upstream_inference_prompt_cost: 4.25e-6, + }, + is_byok: false, + prompt_tokens: 17, + prompt_tokens_details: { cached_tokens: 0 }, + }; + + const extracted = extractUsageCostDetails(usage); + expect(extracted).not.toBeNull(); + expect(extracted!.total_cost).toBe(0.00435825); + + // Record with no prior breakdown + const record = createUsageRecord({ + costInput: 0, + costOutput: 0, + costCached: 0, + costCacheWrite: 0, + costTotal: 0, + }); + applyUsageCostDetails(record, extracted!); + + expect(record.costTotal).toBe(0.00435825); + expect(record.costOutput).toBe(0.004354); + expect(record.costInput).toBe(4.25e-6); + expect(record.costCached).toBe(0); + }); + + test('uses partial gateway breakdown when only some per-bucket costs are available', () => { const record = createUsageRecord(); const costDetails: ProviderCostDetails = { total_cost: 0.005, @@ -395,15 +742,15 @@ describe('applyUsageCostDetails', () => { output_cost: 0.002, cached_input_cost: null, cache_write_input_cost: null, - upstream_inference_cost: null, - upstream_inference_prompt_cost: null, - upstream_inference_completions_cost: null, request_cost: null, web_search_cost: null, image_input_cost: null, image_output_cost: null, audio_input_cost: null, data_storage_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, }; applyUsageCostDetails(record, costDetails); @@ -428,15 +775,15 @@ describe('applyUsageCostDetails', () => { output_cost: 0.0001505, cached_input_cost: 0, cache_write_input_cost: 0, - upstream_inference_cost: 0.00017465, - upstream_inference_prompt_cost: 0.00002415, - upstream_inference_completions_cost: 0.0001505, request_cost: 0, web_search_cost: 0, - image_input_cost: null, - image_output_cost: null, - audio_input_cost: null, - data_storage_cost: 0.00000106, + image_input_cost: 0, + image_output_cost: 0, + audio_input_cost: 0, + data_storage_cost: 0, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, }; applyUsageCostDetails(record, costDetails); @@ -456,15 +803,15 @@ describe('applyUsageCostDetails', () => { output_cost: 0, cached_input_cost: 0, cache_write_input_cost: 0, - upstream_inference_cost: 0, - upstream_inference_prompt_cost: 0, - upstream_inference_completions_cost: 0, request_cost: 0, web_search_cost: 0, - image_input_cost: null, - image_output_cost: null, - audio_input_cost: null, + image_input_cost: 0, + image_output_cost: 0, + audio_input_cost: 0, data_storage_cost: 0, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, }; applyUsageCostDetails(record, costDetails); diff --git a/packages/backend/src/utils/provider-cost.ts b/packages/backend/src/utils/provider-cost.ts index e98b74b1..9d3d3b08 100644 --- a/packages/backend/src/utils/provider-cost.ts +++ b/packages/backend/src/utils/provider-cost.ts @@ -90,20 +90,55 @@ export function applyUsageCostDetails( usageRecord.costSource = 'provider_reported'; usageRecord.providerReportedCost = totalCost; - // Use the detailed cost breakdown when available + // Three tiers of provider cost reporting: + // 1. Superset: explicit per-bucket breakdown (input_cost, output_cost, cached_input_cost, cache_write_input_cost) + // 2. Normal: upstream_inference_prompt_cost/completions_cost split, but no cache granularity + // 3. Minimal: no breakdown at all — distribute proportionally from previously calculated costs + const inputCost = costDetails.input_cost; const outputCost = costDetails.output_cost; const cachedCost = costDetails.cached_input_cost; const cacheWriteCost = costDetails.cache_write_input_cost; - if (inputCost !== null || outputCost !== null || cachedCost !== null || cacheWriteCost !== null) { - // Provider gave us an explicit per-bucket breakdown — use it directly + if (inputCost !== null || cachedCost !== null || cacheWriteCost !== null) { + // Superset: provider gave us an explicit per-bucket breakdown — use it directly + // Note: output_cost alone being non-null is not enough to identify superset; + // it's also reported by normal-tier as upstream_inference_completions_cost. + // Check the input-side fields (which normal-tier does not report separately). usageRecord.costInput = Number((inputCost ?? 0).toFixed(8)); usageRecord.costOutput = Number((outputCost ?? 0).toFixed(8)); usageRecord.costCached = Number((cachedCost ?? 0).toFixed(8)); usageRecord.costCacheWrite = Number((cacheWriteCost ?? 0).toFixed(8)); + } else if ( + costDetails.upstream_inference_prompt_cost != null || + costDetails.upstream_inference_completions_cost != null + ) { + // Normal: upstream gave us prompt vs completions split, but no cache granularity. + // Use the upstream split for the input vs output totals, then preserve Plexus's + // own calculated ratio within the prompt portion for cache/non-cache distribution. + const promptTotal = costDetails.upstream_inference_prompt_cost ?? 0; + const completionsTotal = costDetails.upstream_inference_completions_cost ?? 0; + + usageRecord.costOutput = Number((completionsTotal ?? 0).toFixed(8)); + + // Split the prompt portion by Plexus's own input/cached/cacheWrite ratio + const prevInput = usageRecord.costInput || 0; + const prevCached = usageRecord.costCached || 0; + const prevCacheWrite = usageRecord.costCacheWrite || 0; + const prevPromptTotal = prevInput + prevCached + prevCacheWrite; + + if (prevPromptTotal > 0) { + usageRecord.costInput = Number(((prevInput / prevPromptTotal) * promptTotal).toFixed(8)); + usageRecord.costCached = Number(((prevCached / prevPromptTotal) * promptTotal).toFixed(8)); + usageRecord.costCacheWrite = Number(((prevCacheWrite / prevPromptTotal) * promptTotal).toFixed(8)); + } else { + // No prior breakdown — attribute full prompt cost to input + usageRecord.costInput = Number(promptTotal.toFixed(8)); + usageRecord.costCached = 0; + usageRecord.costCacheWrite = 0; + } } else { - // No breakdown — distribute proportionally like we do for SSE `: cost` comments + // Minimal: no breakdown — distribute proportionally from previously calculated costs const prevInputCost = usageRecord.costInput || 0; const prevOutputCost = usageRecord.costOutput || 0; const prevCachedCost = usageRecord.costCached || 0; diff --git a/packages/backend/src/utils/usage-normalizer.ts b/packages/backend/src/utils/usage-normalizer.ts index 714bcde8..a5302f04 100644 --- a/packages/backend/src/utils/usage-normalizer.ts +++ b/packages/backend/src/utils/usage-normalizer.ts @@ -53,19 +53,39 @@ export function extractUsageCostDetails(usage: any): ProviderCostDetails | null const details = usage?.cost_details; if (!details || typeof details !== 'object') return null; - // Validate that at least one cost field is a valid number - const totalCost = safeCost(details.total_cost ?? usage?.cost ?? usage?.estimated_cost); + // Determine total cost: + // 1. cost_details.total_cost + // 2. usage.cost or usage.estimated_cost (standard path) + // 3. cost_details.upstream_inference_cost (OpenRouter quirk) + let totalCost = safeCost(details.total_cost); + + const costFromUsage = safeCost(usage?.cost ?? usage?.estimated_cost); + const upstreamInferenceCost = safeCost(details.upstream_inference_cost); + + if (totalCost === null) { + // || not ?? — BYOK keys report usage.cost=0 (Plexus charges nothing), so a + // falsy 0 should fall through to upstreamInferenceCost which carries the + // actual provider cost. + totalCost = costFromUsage || upstreamInferenceCost; + } if (totalCost === null) return null; return { total_cost: totalCost, - input_cost: safeCost(details.input_cost ?? details.upstream_inference_prompt_cost), - output_cost: safeCost(details.output_cost ?? details.upstream_inference_completions_cost), + // upstream_inference_prompt_cost includes cached tokens (input_cost + cached_input_cost), + // so it can't be mapped directly to input_cost. The upstream fields are preserved + // here and dispatched separately in applyUsageCostDetails(). + input_cost: safeCost(details.input_cost), + output_cost: safeCost(details.output_cost), cached_input_cost: safeCost(details.cached_input_cost), cache_write_input_cost: safeCost(details.cache_write_input_cost), upstream_inference_cost: safeCost(details.upstream_inference_cost), - upstream_inference_prompt_cost: safeCost(details.upstream_inference_prompt_cost), - upstream_inference_completions_cost: safeCost(details.upstream_inference_completions_cost), + upstream_inference_prompt_cost: safeCost( + details.upstream_inference_prompt_cost ?? details.upstream_inference_input_cost + ), + upstream_inference_completions_cost: safeCost( + details.upstream_inference_completions_cost ?? details.upstream_inference_output_cost + ), request_cost: safeCost(details.request_cost), web_search_cost: safeCost(details.web_search_cost), image_input_cost: safeCost(details.image_input_cost),