From 34d6047275eba543fc2269df3aeb1309fc67f1ba Mon Sep 17 00:00:00 2001 From: suryaiyer95 Date: Fri, 20 Mar 2026 13:07:49 -0700 Subject: [PATCH] feat: track per-generation token usage in telemetry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Emit `generation` telemetry event on every LLM step-finish with model_id, provider_id, agent, finish_reason, cost, duration_ms, and token breakdown - Token fields are flat to comply with Azure App Insights custom measurements schema: `tokens_input`, `tokens_output`, and optionally `tokens_reasoning`, `tokens_cache_read`, `tokens_cache_write` - Optional token fields are only included when the provider actually returns them — reasoning only for reasoning models, cache fields only when active - Remove unused `TokensPayload` type and special-case serializer handler - Step duration tracked from `start-step` to `finish-step` events - Update telemetry.md with accurate generation event field description - Update existing tests for flat token field shape Co-Authored-By: Claude Sonnet 4.6 --- docs/docs/reference/telemetry.md | 2 +- .../opencode/src/altimate/telemetry/index.ts | 23 +++++---------- packages/opencode/src/session/processor.ts | 29 +++++++++++++++++++ .../opencode/test/session/processor.test.ts | 16 +++++----- .../opencode/test/telemetry/telemetry.test.ts | 16 ++++------ 5 files changed, 51 insertions(+), 35 deletions(-) diff --git a/docs/docs/reference/telemetry.md b/docs/docs/reference/telemetry.md index c3b72cc6d4..4535ae72b6 100644 --- a/docs/docs/reference/telemetry.md +++ b/docs/docs/reference/telemetry.md @@ -11,7 +11,7 @@ We collect the following categories of events: | `session_start` | A new CLI session begins | | `session_end` | A CLI session ends (includes duration) | | `session_forked` | A session is forked from an existing one | -| `generation` | An AI model generation completes (model ID, token counts, duration — no prompt content) | +| `generation` | An AI model generation (step) completes — model ID, provider ID, agent, finish reason, cost, duration, and token breakdown: input, output, and when available: reasoning tokens (reasoning models only), cache-read tokens (prompt cache hit), cache-write tokens (new cache entry). No prompt content. | | `tool_call` | A tool is invoked (tool name and category — no arguments or output) | | `native_call` | A native engine call completes (method name and duration — no arguments) | | `command` | A CLI command is executed (command name only) | diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index e0f0130e44..0477bbb3a7 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -14,14 +14,6 @@ export namespace Telemetry { const MAX_BUFFER_SIZE = 200 const REQUEST_TIMEOUT_MS = 10_000 - export type TokensPayload = { - input: number - output: number - reasoning: number - cache_read: number - cache_write: number - } - export type Event = | { type: "session_start" @@ -50,9 +42,15 @@ export namespace Telemetry { provider_id: string agent: string finish_reason: string - tokens: TokensPayload cost: number duration_ms: number + // Flat token fields — only present when data is available from the provider. + // No nested objects: Azure App Insights custom measures must be top-level numbers. + tokens_input: number + tokens_output: number + tokens_reasoning?: number // only for reasoning models + tokens_cache_read?: number // only when a cached prompt was reused + tokens_cache_write?: number // only when a new cache entry was written } | { type: "tool_call" @@ -571,14 +569,9 @@ export namespace Telemetry { } const measurements: Record = {} - // Flatten all fields — nested `tokens` object gets prefixed keys for (const [k, v] of Object.entries(fields)) { if (k === "session_id" || k === "project_id" || k === "_retried") continue - if (k === "tokens" && typeof v === "object" && v !== null) { - for (const [tk, tv] of Object.entries(v as Record)) { - if (typeof tv === "number") measurements[`tokens_${tk}`] = tv - } - } else if (typeof v === "number") { + if (typeof v === "number") { measurements[k] = v } else if (v !== undefined && v !== null) { properties[k] = typeof v === "object" ? JSON.stringify(v) : String(v) diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 70ea586b84..2add53f47c 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -16,6 +16,9 @@ import { PermissionNext } from "@/permission/next" import { Question } from "@/question" import { PartID } from "./schema" import type { SessionID, MessageID } from "./schema" +// altimate_change start — import Telemetry for per-generation token tracking +import { Telemetry } from "@/altimate/telemetry" +// altimate_change end export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -35,6 +38,9 @@ export namespace SessionProcessor { let blocked = false let attempt = 0 let needsCompaction = false + // altimate_change start — per-step generation telemetry + let stepStartTime = Date.now() + // altimate_change end const result = { get message() { @@ -233,6 +239,9 @@ export namespace SessionProcessor { case "start-step": snapshot = await Snapshot.track() + // altimate_change start — record step start time for generation telemetry duration + stepStartTime = Date.now() + // altimate_change end await Session.updatePart({ id: PartID.ascending(), messageID: input.assistantMessage.id, @@ -251,6 +260,26 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens + // altimate_change start — emit per-generation telemetry with token breakdown + // Optional fields are only included when the provider actually returns them. + Telemetry.track({ + type: "generation", + timestamp: Date.now(), + session_id: input.sessionID, + message_id: input.assistantMessage.id, + model_id: input.model.id, + provider_id: input.model.providerID, + agent: input.assistantMessage.agent, + finish_reason: value.finishReason ?? "unknown", + cost: usage.cost, + duration_ms: Date.now() - stepStartTime, + tokens_input: usage.tokens.input, + tokens_output: usage.tokens.output, + ...(value.usage.reasoningTokens !== undefined && { tokens_reasoning: usage.tokens.reasoning }), + ...(value.usage.cachedInputTokens !== undefined && { tokens_cache_read: usage.tokens.cache.read }), + ...(usage.tokens.cache.write > 0 && { tokens_cache_write: usage.tokens.cache.write }), + }) + // altimate_change end await Session.updatePart({ id: PartID.ascending(), reason: value.finishReason, diff --git a/packages/opencode/test/session/processor.test.ts b/packages/opencode/test/session/processor.test.ts index 857966a5a6..fd12e483d6 100644 --- a/packages/opencode/test/session/processor.test.ts +++ b/packages/opencode/test/session/processor.test.ts @@ -468,20 +468,18 @@ describe("generation telemetry", () => { provider_id: "anthropic", agent: "builder", finish_reason: "end_turn", - tokens: { - input: 1000, - output: 500, - reasoning: 200, - cache_read: 800, - cache_write: 100, - }, + tokens_input: 1000, + tokens_output: 500, + tokens_reasoning: 200, + tokens_cache_read: 800, + tokens_cache_write: 100, cost: 0.05, duration_ms: 3000, } expect(event.model_id).toBe("claude-opus-4-6") - expect(event.tokens.input).toBe(1000) - expect(event.tokens.cache_read).toBe(800) + expect(event.tokens_input).toBe(1000) + expect(event.tokens_cache_read).toBe(800) expect(event.cost).toBe(0.05) expect(event.finish_reason).toBe("end_turn") }) diff --git a/packages/opencode/test/telemetry/telemetry.test.ts b/packages/opencode/test/telemetry/telemetry.test.ts index c7c5757365..b8f4b1fb1a 100644 --- a/packages/opencode/test/telemetry/telemetry.test.ts +++ b/packages/opencode/test/telemetry/telemetry.test.ts @@ -624,7 +624,7 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { } }) - test("nested tokens object is flattened with tokens_ prefix", async () => { + test("flat token fields appear in measurements", async () => { const { fetchCalls, cleanup } = await initWithMockedFetch() try { Telemetry.track({ @@ -636,13 +636,11 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { provider_id: "anthropic", agent: "builder", finish_reason: "end_turn", - tokens: { - input: 100, - output: 200, - reasoning: 50, - cache_read: 10, - cache_write: 5, - }, + tokens_input: 100, + tokens_output: 200, + tokens_reasoning: 50, + tokens_cache_read: 10, + tokens_cache_write: 5, cost: 0.01, duration_ms: 2000, }) @@ -656,8 +654,6 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { expect(measurements.tokens_reasoning).toBe(50) expect(measurements.tokens_cache_read).toBe(10) expect(measurements.tokens_cache_write).toBe(5) - // Raw "tokens" key should not appear in properties - expect(envelopes[0].data.baseData.properties.tokens).toBeUndefined() } finally { cleanup() }