From 72fd1ff16e407a19acd2a0bfb0cdaac9c2a5d598 Mon Sep 17 00:00:00 2001 From: mnajafian-nv Date: Tue, 2 Jun 2026 20:28:52 -0700 Subject: [PATCH 1/2] fix: surface explicit observability costs Signed-off-by: mnajafian-nv --- crates/core/src/observability/atif.rs | 58 +++++++++++-------- .../core/src/observability/openinference.rs | 34 ++++++++--- crates/core/tests/unit/atif_tests.rs | 40 ++++++++++++- .../tests/unit/observability/atof_tests.rs | 12 +++- .../unit/observability/openinference_tests.rs | 16 ++++- .../openclaw-plugin.mdx | 5 +- integrations/openclaw/test/llm-replay.test.ts | 2 + 7 files changed, 126 insertions(+), 41 deletions(-) diff --git a/crates/core/src/observability/atif.rs b/crates/core/src/observability/atif.rs index efc0dade..a09af56b 100644 --- a/crates/core/src/observability/atif.rs +++ b/crates/core/src/observability/atif.rs @@ -629,6 +629,7 @@ const TOKEN_USAGE_KNOWN_KEYS: &[&str] = &[ "completion_tokens", "cached_tokens", "cost_usd", + "cost", "prompt_token_ids", "completion_token_ids", "logprobs", @@ -638,7 +639,7 @@ const TOKEN_USAGE_KNOWN_KEYS: &[&str] = &[ /// /// Supports NeMo Relay `token_usage` and provider-native `usage` payloads. /// Populates `extra` with any unknown usage keys (e.g. reasoning_tokens or total_tokens). -/// Returns `None` if the response has no recognized token counts. +/// Returns `None` if the response has no recognized token or cost metrics. fn extract_metrics(output: &Json) -> Option { let usage = token_usage_object(output)?; let prompt = usage_u64(usage, &["prompt_tokens", "input_tokens"]); @@ -651,7 +652,10 @@ fn extract_metrics(output: &Json) -> Option { &["cache_read_input_tokens", "cache_creation_input_tokens"], ) }); - let cost = usage.get("cost_usd").and_then(Json::as_f64); + let cost = usage + .get("cost_usd") + .and_then(Json::as_f64) + .or_else(|| usage.get("cost")?.as_object()?.get("total")?.as_f64()); let prompt_ids = usage .get("prompt_token_ids") .and_then(Json::as_array) @@ -675,7 +679,7 @@ fn extract_metrics(output: &Json) -> Option { } else { Some(Json::Object(extra_map)) }; - if prompt.is_none() && completion.is_none() && cached.is_none() { + if prompt.is_none() && completion.is_none() && cached.is_none() && cost.is_none() { return None; } Some(AtifMetrics { @@ -1064,44 +1068,50 @@ fn event_extra(event: &Event) -> Json { Json::Object(extra) } -/// Compute aggregate `final_metrics` by summing token counts across all steps. +/// Compute aggregate `final_metrics` by summing metrics across all steps. /// -/// Always returns `Some(AtifFinalMetrics)` with `total_steps` set. Token/cost -/// fields are populated when at least one step carries metrics. +/// Always returns `Some(AtifFinalMetrics)` with `total_steps` set. Each token +/// or cost total is populated only when at least one step provides that field. fn compute_final_metrics(steps: &[AtifStep]) -> Option { let mut total_prompt: u64 = 0; let mut total_completion: u64 = 0; let mut total_cached: u64 = 0; let mut total_cost: f64 = 0.0; - let mut has_any = false; + let mut has_prompt = false; + let mut has_completion = false; + let mut has_cached = false; + let mut has_cost = false; for step in steps { if let Some(m) = &step.metrics { - has_any = true; - total_prompt += m.prompt_tokens.unwrap_or(0); - total_completion += m.completion_tokens.unwrap_or(0); - total_cached += m.cached_tokens.unwrap_or(0); - total_cost += m.cost_usd.unwrap_or(0.0); + if let Some(prompt_tokens) = m.prompt_tokens { + has_prompt = true; + total_prompt += prompt_tokens; + } + if let Some(completion_tokens) = m.completion_tokens { + has_completion = true; + total_completion += completion_tokens; + } + if let Some(cached_tokens) = m.cached_tokens { + has_cached = true; + total_cached += cached_tokens; + } + if let Some(cost) = m.cost_usd { + has_cost = true; + total_cost += cost; + } } } Some(AtifFinalMetrics { - total_prompt_tokens: if has_any { Some(total_prompt) } else { None }, - total_completion_tokens: if has_any { + total_prompt_tokens: if has_prompt { Some(total_prompt) } else { None }, + total_completion_tokens: if has_completion { Some(total_completion) } else { None }, - total_cached_tokens: if has_any && total_cached > 0 { - Some(total_cached) - } else { - None - }, - total_cost_usd: if has_any && total_cost > 0.0 { - Some(total_cost) - } else { - None - }, + total_cached_tokens: if has_cached { Some(total_cached) } else { None }, + total_cost_usd: if has_cost { Some(total_cost) } else { None }, total_steps: Some(steps.len() as u64), extra: None, }) diff --git a/crates/core/src/observability/openinference.rs b/crates/core/src/observability/openinference.rs index 990902fa..cb3264b0 100644 --- a/crates/core/src/observability/openinference.rs +++ b/crates/core/src/observability/openinference.rs @@ -687,6 +687,10 @@ fn start_attributes(event: &Event) -> Vec { fn end_attributes(event: &Event) -> Vec { let mut attributes = Vec::new(); + let is_llm = event + .category() + .is_some_and(|category| category.as_str() == "llm"); + push_serialized( &mut attributes, "nemo_relay.end.output_json", @@ -696,10 +700,7 @@ fn end_attributes(event: &Event) -> Vec { attributes.push(KeyValue::new(oi::output::VALUE, output)); attributes.push(KeyValue::new(oi::output::MIME_TYPE, mime_type)); } - let fallback_usage = if event - .category() - .is_some_and(|category| category.as_str() == "llm") - { + let fallback_usage = if is_llm { usage_from_manual_llm_output(event.output()) } else { None @@ -708,11 +709,7 @@ fn end_attributes(event: &Event) -> Vec { .annotated_response() .and_then(|response| response.usage.as_ref()) .or(fallback_usage.as_ref()); - if event - .category() - .is_some_and(|category| category.as_str() == "llm") - && let Some(usage) = usage - { + if is_llm && let Some(usage) = usage { if let Some(v) = usage.prompt_tokens { attributes.push(KeyValue::new(oi::llm::token_count::PROMPT, v as i64)); } @@ -735,9 +732,28 @@ fn end_attributes(event: &Event) -> Vec { )); } } + if is_llm && let Some(cost_total) = cost_total_from_manual_llm_output(event.output()) { + attributes.push(KeyValue::new(oi::llm::cost::TOTAL, cost_total)); + } attributes } +fn cost_total_from_manual_llm_output(output: Option<&Json>) -> Option { + let object = output?.as_object()?; + let usage = object.get("usage").and_then(Json::as_object); + let token_usage = object.get("token_usage").and_then(Json::as_object); + usage + .and_then(cost_total_from_usage) + .or_else(|| token_usage.and_then(cost_total_from_usage)) +} + +fn cost_total_from_usage(usage: &serde_json::Map) -> Option { + usage + .get("cost_usd") + .and_then(Json::as_f64) + .or_else(|| usage.get("cost")?.as_object()?.get("total")?.as_f64()) +} + fn usage_from_manual_llm_output(output: Option<&Json>) -> Option { let object = output?.as_object()?; let usage = object.get("usage").and_then(Json::as_object); diff --git a/crates/core/tests/unit/atif_tests.rs b/crates/core/tests/unit/atif_tests.rs index 16a533a7..0dffda5d 100644 --- a/crates/core/tests/unit/atif_tests.rs +++ b/crates/core/tests/unit/atif_tests.rs @@ -614,6 +614,7 @@ fn test_extract_metrics_supports_provider_usage_payloads() { "prompt_tokens": 10, "completion_tokens": 20, "total_tokens": 30, + "cost_usd": 0.001, "prompt_tokens_details": { "cached_tokens": 4 } @@ -627,19 +628,56 @@ fn test_extract_metrics_supports_provider_usage_payloads() { openai_metrics.extra.as_ref().unwrap()["total_tokens"], json!(30) ); + assert_eq!(openai_metrics.cost_usd, Some(0.001)); let anthropic_metrics = extract_metrics(&json!({ "usage": { "input_tokens": 11, "output_tokens": 22, "cache_read_input_tokens": 3, - "cache_creation_input_tokens": 5 + "cache_creation_input_tokens": 5, + "cost": { "total": 0.0042 } } })) .unwrap(); assert_eq!(anthropic_metrics.prompt_tokens, Some(11)); assert_eq!(anthropic_metrics.completion_tokens, Some(22)); assert_eq!(anthropic_metrics.cached_tokens, Some(8)); + assert_eq!(anthropic_metrics.cost_usd, Some(0.0042)); +} + +#[test] +fn test_final_metrics_preserve_explicit_zero_cost_without_fabricating_tokens() { + let final_metrics = compute_final_metrics(&[AtifStep { + step_id: 1, + source: "assistant".to_string(), + message: json!("done"), + timestamp: None, + model_name: None, + reasoning_effort: None, + reasoning_content: None, + tool_calls: None, + observation: None, + metrics: Some(AtifMetrics { + prompt_tokens: None, + completion_tokens: None, + cached_tokens: None, + cost_usd: Some(0.0), + prompt_token_ids: None, + completion_token_ids: None, + logprobs: None, + extra: None, + }), + llm_call_count: None, + is_copied_context: None, + extra: None, + }]) + .unwrap(); + + assert_eq!(final_metrics.total_prompt_tokens, None); + assert_eq!(final_metrics.total_completion_tokens, None); + assert_eq!(final_metrics.total_cached_tokens, None); + assert_eq!(final_metrics.total_cost_usd, Some(0.0)); } #[test] diff --git a/crates/core/tests/unit/observability/atof_tests.rs b/crates/core/tests/unit/observability/atof_tests.rs index 43831c0d..d78f3e23 100644 --- a/crates/core/tests/unit/observability/atof_tests.rs +++ b/crates/core/tests/unit/observability/atof_tests.rs @@ -301,7 +301,8 @@ fn subscriber_preserves_wire_format_llm_lifecycle_payloads_as_raw_jsonl() { "input_tokens": 11, "output_tokens": 7, "cache_read_input_tokens": 3, - "cache_creation_input_tokens": 5 + "cache_creation_input_tokens": 5, + "cost": {"total": 0.0042} } }), ), @@ -335,7 +336,8 @@ fn subscriber_preserves_wire_format_llm_lifecycle_payloads_as_raw_jsonl() { "input_tokens": 75, "output_tokens": 20, "total_tokens": 95, - "input_tokens_details": {"cached_tokens": 10} + "input_tokens_details": {"cached_tokens": 10}, + "cost_usd": 0.005 } }), ), @@ -371,7 +373,8 @@ fn subscriber_preserves_wire_format_llm_lifecycle_payloads_as_raw_jsonl() { "prompt_tokens": 3, "completion_tokens": 4, "total_tokens": 7, - "prompt_tokens_details": {"cached_tokens": 2} + "prompt_tokens_details": {"cached_tokens": 2}, + "cost_usd": 0.001 } }), ), @@ -407,6 +410,7 @@ fn subscriber_preserves_wire_format_llm_lifecycle_payloads_as_raw_jsonl() { assert_eq!(lines[1]["scope_category"], "end"); assert_eq!(lines[1]["data"]["content"][1]["type"], "tool_use"); assert_eq!(lines[1]["data"]["usage"]["cache_creation_input_tokens"], 5); + assert_eq!(lines[1]["data"]["usage"]["cost"]["total"], 0.0042); assert_eq!(lines[2]["metadata"]["gateway_path"], "/v1/responses"); assert_eq!(lines[2]["data"]["input"], "Find the weather."); @@ -415,6 +419,7 @@ fn subscriber_preserves_wire_format_llm_lifecycle_payloads_as_raw_jsonl() { lines[3]["data"]["usage"]["input_tokens_details"]["cached_tokens"], 10 ); + assert_eq!(lines[3]["data"]["usage"]["cost_usd"], 0.005); assert_eq!(lines[4]["metadata"]["gateway_path"], "/v1/chat/completions"); assert_eq!( @@ -429,6 +434,7 @@ fn subscriber_preserves_wire_format_llm_lifecycle_payloads_as_raw_jsonl() { lines[5]["data"]["usage"]["prompt_tokens_details"]["cached_tokens"], 2 ); + assert_eq!(lines[5]["data"]["usage"]["cost_usd"], 0.001); } #[test] diff --git a/crates/core/tests/unit/observability/openinference_tests.rs b/crates/core/tests/unit/observability/openinference_tests.rs index b203ae37..129bae4f 100644 --- a/crates/core/tests/unit/observability/openinference_tests.rs +++ b/crates/core/tests/unit/observability/openinference_tests.rs @@ -692,7 +692,8 @@ fn output_value_extracts_chat_completion_display_text() { "prompt_tokens": 3, "completion_tokens": 4, "total_tokens": 7, - "prompt_tokens_details": {"cached_tokens": 2} + "prompt_tokens_details": {"cached_tokens": 2}, + "cost_usd": 0.001 } })), )); @@ -718,6 +719,7 @@ fn output_value_extracts_chat_completion_display_text() { attributes.get("llm.token_count.prompt_details.cache_read"), Some(&"2".to_string()) ); + assert_eq!(attributes.get("llm.cost.total"), Some(&"0.001".to_string())); } #[test] @@ -767,7 +769,8 @@ fn output_value_extracts_openai_responses_display_text_and_usage() { "input_tokens": 75, "output_tokens": 20, "total_tokens": 95, - "input_tokens_details": {"cached_tokens": 10} + "input_tokens_details": {"cached_tokens": 10}, + "cost_usd": 0.005 } })), )); @@ -801,6 +804,7 @@ fn output_value_extracts_openai_responses_display_text_and_usage() { attributes.get("llm.token_count.prompt_details.cache_read"), Some(&"10".to_string()) ); + assert_eq!(attributes.get("llm.cost.total"), Some(&"0.005".to_string())); } #[test] @@ -1496,6 +1500,7 @@ fn llm_end_with_manual_usage_payload_emits_token_count_attributes() { attributes.get("llm.token_count.prompt_details.cache_write"), Some(&"10".to_string()) ); + assert!(!attributes.contains_key("llm.cost.total")); } #[test] @@ -1546,7 +1551,8 @@ fn anthropic_messages_output_emits_openinference_text_tool_and_usage_attributes( "input_tokens": 11, "output_tokens": 7, "cache_read_input_tokens": 3, - "cache_creation_input_tokens": 5 + "cache_creation_input_tokens": 5, + "cost": {"total": 0.0042} } })), Some( @@ -1593,6 +1599,10 @@ fn anthropic_messages_output_emits_openinference_text_tool_and_usage_attributes( attributes.get("llm.token_count.prompt_details.cache_write"), Some(&"5".to_string()) ); + assert_eq!( + attributes.get("llm.cost.total"), + Some(&"0.0042".to_string()) + ); } #[test] diff --git a/docs/supported-integrations/openclaw-plugin.mdx b/docs/supported-integrations/openclaw-plugin.mdx index 715c2aac..8832a7fd 100644 --- a/docs/supported-integrations/openclaw-plugin.mdx +++ b/docs/supported-integrations/openclaw-plugin.mdx @@ -281,7 +281,10 @@ emits diagnostic marks instead of inventing latency. This keeps traces honest and makes current fidelity boundaries explicit. When OpenClaw provides usage data, the plugin maps input, output, total, cache -read, cache write, and cost fields into OpenInference-friendly usage fields. +read, and cache write fields into OpenInference token-count attributes, and +maps explicit cost fields into OpenInference cost attributes. +NeMo Relay does not infer provider pricing; cost attributes are emitted only +when OpenClaw supplies explicit cost fields. ## Troubleshooting diff --git a/integrations/openclaw/test/llm-replay.test.ts b/integrations/openclaw/test/llm-replay.test.ts index 0e067de1..a68a67e8 100644 --- a/integrations/openclaw/test/llm-replay.test.ts +++ b/integrations/openclaw/test/llm-replay.test.ts @@ -125,6 +125,7 @@ describe('LLM replay', () => { output_tokens: 7, cache_read_input_tokens: 3, cache_creation_input_tokens: 5, + cost: { total: 0.0042 }, }, }; @@ -182,6 +183,7 @@ describe('LLM replay', () => { cache_read_tokens: 3, cache_write_tokens: 5, total_tokens: 18, + cost_usd: 0.0042, }); assert.deepEqual((response.openclaw as ResponseOpenClaw).assistant_tool_call_names, ['web_search']); }); From e78a59b1b332c80daa549ec4ebd8551c74deaa0d Mon Sep 17 00:00:00 2001 From: mnajafian-nv Date: Wed, 3 Jun 2026 08:16:52 -0700 Subject: [PATCH 2/2] Update docs/supported-integrations/openclaw-plugin.mdx Co-authored-by: Will Killian <2007799+willkill07@users.noreply.github.com> Signed-off-by: mnajafian-nv --- docs/supported-integrations/openclaw-plugin.mdx | 2 -- 1 file changed, 2 deletions(-) diff --git a/docs/supported-integrations/openclaw-plugin.mdx b/docs/supported-integrations/openclaw-plugin.mdx index 8832a7fd..c828886f 100644 --- a/docs/supported-integrations/openclaw-plugin.mdx +++ b/docs/supported-integrations/openclaw-plugin.mdx @@ -283,8 +283,6 @@ and makes current fidelity boundaries explicit. When OpenClaw provides usage data, the plugin maps input, output, total, cache read, and cache write fields into OpenInference token-count attributes, and maps explicit cost fields into OpenInference cost attributes. -NeMo Relay does not infer provider pricing; cost attributes are emitted only -when OpenClaw supplies explicit cost fields. ## Troubleshooting