diff --git a/packages/backend/src/services/inspectors/usage-logging.ts b/packages/backend/src/services/inspectors/usage-logging.ts index e7e917dd..1c504529 100644 --- a/packages/backend/src/services/inspectors/usage-logging.ts +++ b/packages/backend/src/services/inspectors/usage-logging.ts @@ -10,9 +10,10 @@ import { normalizeGeminiUsage, normalizeOpenAIChatUsage, normalizeOpenAIResponsesUsage, + extractUsageCostDetails, } from '../../utils/usage-normalizer'; import { estimateKwhUsed } from '../inference-energy'; -import { applyProviderReportedCost } from '../../utils/provider-cost'; +import { applyProviderReportedCost, applyUsageCostDetails } from '../../utils/provider-cost'; import { DEFAULT_MODEL, DEFAULT_GPU_PARAMS } from '@plexus/shared'; import { recordQuotaUsage } from '../quota/quota-middleware'; @@ -149,6 +150,15 @@ export class UsageInspector extends PassThrough { applyProviderReportedCost(this.usageRecord, reconstructed.providerReportedCost); } + // Override with provider-reported cost from usage.cost_details if available + // Some providers include detailed cost breakdowns in the usage block + if (!this.usageRecord.providerReportedCost && reconstructed?.usage) { + const usageCostDetails = extractUsageCostDetails(reconstructed.usage); + if (usageCostDetails) { + applyUsageCostDetails(this.usageRecord, usageCostDetails); + } + } + // Use provider-reported energy if available, otherwise estimate // Some providers emit `: energy {"energy_kwh": ...}` as SSE comments if (reconstructed?.providerReportedEnergy?.energy_kwh != null) { diff --git a/packages/backend/src/services/response-handler.ts b/packages/backend/src/services/response-handler.ts index a5b2063e..7532e0f1 100644 --- a/packages/backend/src/services/response-handler.ts +++ b/packages/backend/src/services/response-handler.ts @@ -10,7 +10,8 @@ import { DebugLoggingInspector, UsageInspector } from './inspectors'; import { Readable } from 'stream'; import { DebugManager } from './debug-manager'; import { estimateKwhUsed } from './inference-energy'; -import { applyProviderReportedCost } from '../utils/provider-cost'; +import { applyProviderReportedCost, applyUsageCostDetails } from '../utils/provider-cost'; +import { extractUsageCostDetails } from '../utils/usage-normalizer'; import { StallInspector, type StallConfig } from './inspectors/stall-inspector'; import { DEFAULT_GPU_PARAMS, DEFAULT_MODEL } from '@plexus/shared'; import type { GpuParams } from '@plexus/shared'; @@ -502,6 +503,14 @@ async function finalizeUsage( if (reconstructed?.providerReportedCost) { applyProviderReportedCost(usageRecord, reconstructed.providerReportedCost); } + + // Also check for cost_details in the usage block (some providers embed costs there) + if (!usageRecord.providerReportedCost && reconstructed?.usage) { + const usageCostDetails = extractUsageCostDetails(reconstructed.usage); + if (usageCostDetails) { + applyUsageCostDetails(usageRecord, usageCostDetails); + } + } usageRecord.responseStatus = 'success'; usageRecord.durationMs = Date.now() - startTime; diff --git a/packages/backend/src/utils/__tests__/provider-cost.test.ts b/packages/backend/src/utils/__tests__/provider-cost.test.ts index afd22de8..f314d77b 100644 --- a/packages/backend/src/utils/__tests__/provider-cost.test.ts +++ b/packages/backend/src/utils/__tests__/provider-cost.test.ts @@ -1,6 +1,8 @@ import { describe, test, expect } from 'vitest'; -import { applyProviderReportedCost } from '../provider-cost'; +import { applyProviderReportedCost, applyUsageCostDetails } from '../provider-cost'; +import { extractUsageCostDetails } from '../usage-normalizer'; import type { UsageRecord } from '../../types/usage'; +import type { ProviderCostDetails } from '../usage-normalizer'; function createUsageRecord(overrides: Partial = {}): Partial { return { @@ -120,6 +122,375 @@ describe('applyProviderReportedCost', () => { }); }); +describe('extractUsageCostDetails', () => { + test('extracts cost_details from the new usage format', () => { + const usage = { + prompt_tokens: 23, + total_tokens: 66, + completion_tokens: 43, + estimated_cost: 0.00017465, + prompt_tokens_details: { + cached_tokens: 0, + cache_write_tokens: 0, + }, + cost: 0.00017465, + cost_details: { + upstream_inference_cost: 0.00017465, + upstream_inference_prompt_cost: 0.00002415, + upstream_inference_completions_cost: 0.0001505, + total_cost: 0.00017465, + input_cost: 0.00002415, + output_cost: 0.0001505, + cached_input_cost: 0, + cache_write_input_cost: 0, + request_cost: 0, + web_search_cost: 0, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: 0.00000106, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.00017465); + expect(result!.input_cost).toBe(0.00002415); + expect(result!.output_cost).toBe(0.0001505); + expect(result!.cached_input_cost).toBe(0); + expect(result!.cache_write_input_cost).toBe(0); + expect(result!.data_storage_cost).toBe(0.00000106); + }); + + test('falls back to usage.cost when cost_details.total_cost is missing', () => { + const usage = { + cost: 0.005, + cost_details: { + input_cost: 0.001, + output_cost: 0.004, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.005); + expect(result!.input_cost).toBe(0.001); + expect(result!.output_cost).toBe(0.004); + }); + + test('falls back to usage.estimated_cost when cost and total_cost are both missing', () => { + const usage = { + estimated_cost: 0.003, + cost_details: { + input_cost: 0.001, + output_cost: 0.002, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result).not.toBeNull(); + expect(result!.total_cost).toBe(0.003); + }); + + test('returns null when usage has no cost_details', () => { + const usage = { + prompt_tokens: 23, + completion_tokens: 43, + total_tokens: 66, + }; + + expect(extractUsageCostDetails(usage)).toBeNull(); + }); + + test('returns null when cost_details exists but no total cost is available', () => { + const usage = { + cost_details: { + input_cost: 0.001, + }, + }; + + expect(extractUsageCostDetails(usage)).toBeNull(); + }); + + test('returns null when cost_details is not an object', () => { + expect(extractUsageCostDetails({ cost_details: 'invalid' })).toBeNull(); + expect(extractUsageCostDetails({ cost_details: 42 })).toBeNull(); + expect(extractUsageCostDetails({ cost_details: null })).toBeNull(); + }); + + test('returns null when usage is null or undefined', () => { + expect(extractUsageCostDetails(null)).toBeNull(); + expect(extractUsageCostDetails(undefined)).toBeNull(); + }); + + test('maps upstream_inference_prompt_cost as fallback for input_cost', () => { + const usage = { + cost: 0.01, + cost_details: { + upstream_inference_prompt_cost: 0.003, + upstream_inference_completions_cost: 0.007, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result!.input_cost).toBe(0.003); + expect(result!.output_cost).toBe(0.007); + }); + + test('preserves null values for optional cost fields', () => { + const usage = { + cost: 0.01, + cost_details: { + total_cost: 0.01, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + }, + }; + + const result = extractUsageCostDetails(usage); + expect(result!.image_input_cost).toBeNull(); + expect(result!.image_output_cost).toBeNull(); + expect(result!.audio_input_cost).toBeNull(); + }); + + test('returns null for negative total_cost', () => { + const usage = { + cost_details: { + total_cost: -0.01, + }, + }; + + expect(extractUsageCostDetails(usage)).toBeNull(); + }); +}); + +describe('applyUsageCostDetails', () => { + test('overrides costs with provider cost_details breakdown', () => { + const record = createUsageRecord(); + const costDetails: ProviderCostDetails = { + total_cost: 0.00017465, + input_cost: 0.00002415, + output_cost: 0.0001505, + cached_input_cost: 0, + cache_write_input_cost: 0, + upstream_inference_cost: 0.00017465, + upstream_inference_prompt_cost: 0.00002415, + upstream_inference_completions_cost: 0.0001505, + request_cost: 0, + web_search_cost: 0, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: 0.00000106, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.00017465); + expect(record.costSource).toBe('provider_reported'); + expect(record.providerReportedCost).toBe(0.00017465); + expect(record.costInput).toBe(0.00002415); + expect(record.costOutput).toBe(0.0001505); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('falls back to proportional distribution when no breakdown available', () => { + const record = createUsageRecord(); + // costInput=0.001, costOutput=0.002, costCached=0.0005, total=0.0035 + const costDetails: ProviderCostDetails = { + total_cost: 0.007, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.007); + // Ratios: input=1/3.5, output=2/3.5, cached=0.5/3.5 + expect(record.costInput).toBeCloseTo((0.001 / 0.0035) * 0.007, 8); + expect(record.costOutput).toBeCloseTo((0.002 / 0.0035) * 0.007, 8); + expect(record.costCached).toBeCloseTo((0.0005 / 0.0035) * 0.007, 8); + }); + + test('attributes full cost to input when no breakdown and no prior costs', () => { + const record = createUsageRecord({ + costInput: 0, + costOutput: 0, + costCached: 0, + costCacheWrite: 0, + costTotal: 0, + }); + const costDetails: ProviderCostDetails = { + total_cost: 0.005, + input_cost: null, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.005); + expect(record.costInput).toBe(0.005); + expect(record.costOutput).toBe(0); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('uses partial breakdown — only input_cost provided', () => { + const record = createUsageRecord(); + const costDetails: ProviderCostDetails = { + total_cost: 0.005, + input_cost: 0.002, + output_cost: null, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0.005); + expect(record.costInput).toBe(0.002); + expect(record.costOutput).toBe(0); + expect(record.costCached).toBe(0); + expect(record.costCacheWrite).toBe(0); + }); + + test('does nothing when total_cost is null', () => { + const record = createUsageRecord(); + const costDetails: ProviderCostDetails = { + total_cost: null, + input_cost: 0.001, + output_cost: 0.002, + cached_input_cost: null, + cache_write_input_cost: null, + upstream_inference_cost: null, + upstream_inference_prompt_cost: null, + upstream_inference_completions_cost: null, + request_cost: null, + web_search_cost: null, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: null, + }; + + applyUsageCostDetails(record, costDetails); + + // Should remain unchanged + expect(record.costTotal).toBe(0.0035); + expect(record.costSource).toBe('simple'); + }); + + test('does nothing when costDetails is null or undefined', () => { + const record = createUsageRecord(); + applyUsageCostDetails(record, null as any); + expect(record.costTotal).toBe(0.0035); + expect(record.costSource).toBe('simple'); + }); + + test('stores cost_details in costMetadata for audit', () => { + const record = createUsageRecord(); + const costDetails: ProviderCostDetails = { + total_cost: 0.00017465, + input_cost: 0.00002415, + output_cost: 0.0001505, + cached_input_cost: 0, + cache_write_input_cost: 0, + upstream_inference_cost: 0.00017465, + upstream_inference_prompt_cost: 0.00002415, + upstream_inference_completions_cost: 0.0001505, + request_cost: 0, + web_search_cost: 0, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: 0.00000106, + }; + + applyUsageCostDetails(record, costDetails); + + const metadata = JSON.parse(record.costMetadata!); + expect(metadata.source).toBe('provider_reported'); + expect(metadata.cost_details).toEqual(costDetails); + expect(metadata.previous_cost_source).toBe('simple'); + expect(metadata.previous_cost_total).toBe(0.0035); + }); + + test('handles zero total_cost', () => { + const record = createUsageRecord(); + const costDetails: ProviderCostDetails = { + total_cost: 0, + input_cost: 0, + output_cost: 0, + cached_input_cost: 0, + cache_write_input_cost: 0, + upstream_inference_cost: 0, + upstream_inference_prompt_cost: 0, + upstream_inference_completions_cost: 0, + request_cost: 0, + web_search_cost: 0, + image_input_cost: null, + image_output_cost: null, + audio_input_cost: null, + data_storage_cost: 0, + }; + + applyUsageCostDetails(record, costDetails); + + expect(record.costTotal).toBe(0); + expect(record.costSource).toBe('provider_reported'); + expect(record.costInput).toBe(0); + expect(record.costOutput).toBe(0); + }); + + test('SSE : cost comments take precedence over cost_details', () => { + const record = createUsageRecord(); + // SSE comment cost applied first + applyProviderReportedCost(record, { request_cost_usd: 0.001 }); + expect(record.costTotal).toBe(0.001); + expect(record.providerReportedCost).toBe(0.001); + + // cost_details should NOT override because providerReportedCost is already set + // (this check is done at the call site, not in applyUsageCostDetails itself) + // The ordering in usage-logging.ts is: + // 1. applyProviderReportedCost (if providerReportedCost) + // 2. applyUsageCostDetails (only if !providerReportedCost) + expect(record.providerReportedCost).toBe(0.001); + }); +}); + describe('extractProviderCostFromSSEComments (via DebugLoggingInspector)', () => { test('parses : cost SSE comment lines from raw SSE body', () => { const rawBody = [ diff --git a/packages/backend/src/utils/__tests__/usage-normalizer.test.ts b/packages/backend/src/utils/__tests__/usage-normalizer.test.ts index d87cff52..a6668b63 100644 --- a/packages/backend/src/utils/__tests__/usage-normalizer.test.ts +++ b/packages/backend/src/utils/__tests__/usage-normalizer.test.ts @@ -1,5 +1,10 @@ import { describe, expect, test } from 'vitest'; -import { normalizeGeminiUsage, normalizeOpenAIResponsesUsage } from '../usage-normalizer'; +import { + normalizeGeminiUsage, + normalizeOpenAIChatUsage, + normalizeOpenAIResponsesUsage, + extractUsageCostDetails, +} from '../usage-normalizer'; describe('usage-normalizer - OpenAI Responses usage', () => { test('normalizes when input_tokens includes cached tokens', () => { @@ -80,3 +85,92 @@ describe('usage-normalizer - Gemini usage', () => { expect(normalized.total_tokens).toBe(1027); }); }); + +describe('usage-normalizer - OpenAI Chat usage', () => { + test('normalizes prompt_tokens_details with cached_tokens', () => { + const normalized = normalizeOpenAIChatUsage({ + prompt_tokens: 2006, + completion_tokens: 300, + total_tokens: 2306, + prompt_tokens_details: { + cached_tokens: 1920, + }, + completion_tokens_details: { + reasoning_tokens: 0, + }, + }); + + expect(normalized.input_tokens).toBe(86); + expect(normalized.cached_tokens).toBe(1920); + expect(normalized.output_tokens).toBe(300); + expect(normalized.total_tokens).toBe(2306); + expect(normalized.reasoning_tokens).toBe(0); + expect(normalized.cache_creation_tokens).toBe(0); + }); + + test('extracts cache_write_tokens from prompt_tokens_details', () => { + const normalized = normalizeOpenAIChatUsage({ + prompt_tokens: 2006, + completion_tokens: 300, + total_tokens: 2306, + prompt_tokens_details: { + cached_tokens: 1920, + cache_write_tokens: 50, + }, + completion_tokens_details: { + reasoning_tokens: 10, + }, + }); + + expect(normalized.cache_creation_tokens).toBe(50); + expect(normalized.cached_tokens).toBe(1920); + expect(normalized.reasoning_tokens).toBe(10); + }); + + test('defaults cache_write_tokens to 0 when not present', () => { + const normalized = normalizeOpenAIChatUsage({ + prompt_tokens: 100, + completion_tokens: 50, + total_tokens: 150, + prompt_tokens_details: { + cached_tokens: 20, + }, + }); + + expect(normalized.cache_creation_tokens).toBe(0); + }); + + test('handles new usage format with cost_details (tokens only)', () => { + const normalized = normalizeOpenAIChatUsage({ + prompt_tokens: 23, + total_tokens: 66, + completion_tokens: 43, + estimated_cost: 0.00017465, + prompt_tokens_details: { + cached_tokens: 0, + cache_write_tokens: 0, + audio_tokens: 0, + video_tokens: 0, + image_tokens: 0, + }, + cost: 0.00017465, + cost_details: { + total_cost: 0.00017465, + input_cost: 0.00002415, + output_cost: 0.0001505, + }, + completion_tokens_details: { + reasoning_tokens: 0, + image_tokens: 0, + audio_tokens: 0, + }, + }); + + expect(normalized.input_tokens).toBe(23); + expect(normalized.output_tokens).toBe(43); + expect(normalized.cached_tokens).toBe(0); + expect(normalized.cache_creation_tokens).toBe(0); + expect(normalized.reasoning_tokens).toBe(0); + expect(normalized.total_tokens).toBe(66); + }); +}); diff --git a/packages/backend/src/utils/provider-cost.ts b/packages/backend/src/utils/provider-cost.ts index 6f58e5bc..e98b74b1 100644 --- a/packages/backend/src/utils/provider-cost.ts +++ b/packages/backend/src/utils/provider-cost.ts @@ -1,5 +1,6 @@ import { logger } from './logger'; import { UsageRecord } from '../types/usage'; +import type { ProviderCostDetails } from './usage-normalizer'; /** * Apply provider-reported cost data, overriding calculated costs. @@ -58,3 +59,82 @@ export function applyProviderReportedCost(usageRecord: Partial, cos `$${requestCostUsd} (overridden from calculated $${previousCostTotal})` ); } + +/** + * Apply provider-reported cost data from the usage.cost_details block. + * + * Some providers include detailed cost breakdowns directly in the response + * usage object with fields like: + * - usage.cost / usage.estimated_cost — total cost + * - usage.cost_details.input_cost — prompt token cost + * - usage.cost_details.output_cost — completion token cost + * - usage.cost_details.cached_input_cost — cached token cost + * - usage.cost_details.cache_write_input_cost — cache write token cost + * + * When present, we trust the provider's actual cost over our calculations. + * This is more accurate than the SSE `: cost` comment format because it + * provides a per-bucket breakdown rather than just a total. + */ +export function applyUsageCostDetails( + usageRecord: Partial, + costDetails: ProviderCostDetails +): void { + if (!costDetails || costDetails.total_cost === null) return; + + const previousCostSource = usageRecord.costSource; + const previousCostTotal = usageRecord.costTotal; + + const totalCost = costDetails.total_cost; + + usageRecord.costTotal = Number(totalCost.toFixed(8)); + usageRecord.costSource = 'provider_reported'; + usageRecord.providerReportedCost = totalCost; + + // Use the detailed cost breakdown when available + const inputCost = costDetails.input_cost; + const outputCost = costDetails.output_cost; + const cachedCost = costDetails.cached_input_cost; + const cacheWriteCost = costDetails.cache_write_input_cost; + + if (inputCost !== null || outputCost !== null || cachedCost !== null || cacheWriteCost !== null) { + // Provider gave us an explicit per-bucket breakdown — use it directly + usageRecord.costInput = Number((inputCost ?? 0).toFixed(8)); + usageRecord.costOutput = Number((outputCost ?? 0).toFixed(8)); + usageRecord.costCached = Number((cachedCost ?? 0).toFixed(8)); + usageRecord.costCacheWrite = Number((cacheWriteCost ?? 0).toFixed(8)); + } else { + // No breakdown — distribute proportionally like we do for SSE `: cost` comments + const prevInputCost = usageRecord.costInput || 0; + const prevOutputCost = usageRecord.costOutput || 0; + const prevCachedCost = usageRecord.costCached || 0; + const prevCacheWriteCost = usageRecord.costCacheWrite || 0; + const totalCalc = prevInputCost + prevOutputCost + prevCachedCost + prevCacheWriteCost; + + if (totalCalc > 0) { + usageRecord.costInput = Number(((prevInputCost / totalCalc) * totalCost).toFixed(8)); + usageRecord.costOutput = Number(((prevOutputCost / totalCalc) * totalCost).toFixed(8)); + usageRecord.costCached = Number(((prevCachedCost / totalCalc) * totalCost).toFixed(8)); + usageRecord.costCacheWrite = Number( + ((prevCacheWriteCost / totalCalc) * totalCost).toFixed(8) + ); + } else { + usageRecord.costInput = Number(totalCost.toFixed(8)); + usageRecord.costOutput = 0; + usageRecord.costCached = 0; + usageRecord.costCacheWrite = 0; + } + } + + // Store the full provider cost payload in costMetadata for audit + usageRecord.costMetadata = JSON.stringify({ + source: 'provider_reported', + cost_details: costDetails, + previous_cost_source: previousCostSource, + previous_cost_total: previousCostTotal, + }); + + logger.debug( + `[ProviderCost] Provider-reported cost (usage.cost_details) for ${usageRecord.requestId}: ` + + `$${totalCost} (overridden from ${previousCostSource} $${previousCostTotal})` + ); +} diff --git a/packages/backend/src/utils/usage-normalizer.ts b/packages/backend/src/utils/usage-normalizer.ts index e366683c..714bcde8 100644 --- a/packages/backend/src/utils/usage-normalizer.ts +++ b/packages/backend/src/utils/usage-normalizer.ts @@ -16,11 +16,71 @@ const safeToken = (value: unknown): number => { return Math.max(0, Math.floor(num)); }; +const safeCost = (value: unknown): number | null => { + if (value === null || value === undefined) return null; + const num = Number(value); + if (!Number.isFinite(num) || num < 0) return null; + return num; +}; + +export interface ProviderCostDetails { + total_cost: number | null; + input_cost: number | null; + output_cost: number | null; + cached_input_cost: number | null; + cache_write_input_cost: number | null; + upstream_inference_cost: number | null; + upstream_inference_prompt_cost: number | null; + upstream_inference_completions_cost: number | null; + request_cost: number | null; + web_search_cost: number | null; + image_input_cost: number | null; + image_output_cost: number | null; + audio_input_cost: number | null; + data_storage_cost: number | null; +} + +export interface UsageWithCostDetails extends UsageSubset { + provider_cost_details: ProviderCostDetails | null; +} + +/** + * Extract provider-reported cost details from the usage.cost_details block. + * Some providers (e.g., openrouter-like proxies) include detailed cost + * breakdowns directly in the usage object. + */ +export function extractUsageCostDetails(usage: any): ProviderCostDetails | null { + const details = usage?.cost_details; + if (!details || typeof details !== 'object') return null; + + // Validate that at least one cost field is a valid number + const totalCost = safeCost(details.total_cost ?? usage?.cost ?? usage?.estimated_cost); + if (totalCost === null) return null; + + return { + total_cost: totalCost, + input_cost: safeCost(details.input_cost ?? details.upstream_inference_prompt_cost), + output_cost: safeCost(details.output_cost ?? details.upstream_inference_completions_cost), + cached_input_cost: safeCost(details.cached_input_cost), + cache_write_input_cost: safeCost(details.cache_write_input_cost), + upstream_inference_cost: safeCost(details.upstream_inference_cost), + upstream_inference_prompt_cost: safeCost(details.upstream_inference_prompt_cost), + upstream_inference_completions_cost: safeCost(details.upstream_inference_completions_cost), + request_cost: safeCost(details.request_cost), + web_search_cost: safeCost(details.web_search_cost), + image_input_cost: safeCost(details.image_input_cost), + image_output_cost: safeCost(details.image_output_cost), + audio_input_cost: safeCost(details.audio_input_cost), + data_storage_cost: safeCost(details.data_storage_cost), + }; +} + export function normalizeOpenAIChatUsage(usage: any): UsageSubset { const promptTokens = safeToken(usage?.prompt_tokens); const cachedTokens = safeToken( usage?.prompt_tokens_details?.cached_tokens ?? usage?.cached_tokens ); + const cacheWriteTokens = safeToken(usage?.prompt_tokens_details?.cache_write_tokens); const outputTokens = safeToken(usage?.completion_tokens); const reasoningTokens = safeToken(usage?.completion_tokens_details?.reasoning_tokens); @@ -34,7 +94,7 @@ export function normalizeOpenAIChatUsage(usage: any): UsageSubset { total_tokens: safeToken(usage?.total_tokens) || inputTokens + cachedTokens + outputTokens, reasoning_tokens: reasoningTokens, cached_tokens: cachedTokens, - cache_creation_tokens: 0, + cache_creation_tokens: cacheWriteTokens, }; }