From 2038e2f22560a900a96488f0ac410dd380f14910 Mon Sep 17 00:00:00 2001 From: suryaiyer95 Date: Fri, 20 Mar 2026 01:19:29 -0700 Subject: [PATCH 1/2] feat: track per-generation token usage in telemetry (input/output/cache/reasoning) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Emit `generation` telemetry event on every LLM step-finish with model_id, provider_id, agent, finish_reason, cost, duration_ms, and token breakdown - Token fields are flat (no nested objects) to comply with Azure App Insights custom measurements schema: `tokens_input`, `tokens_output`, and optionally `tokens_reasoning`, `tokens_cache_read`, `tokens_cache_write` - Optional token fields are only included when the provider actually returns them — reasoning only for reasoning models, cache_read/write only when prompt caching is active — never defaulted to 0 - Step duration tracked from `start-step` to `finish-step` events - Adds `altimate_change` markers in `processor.ts` (upstream file) - Updates telemetry.md docs with accurate generation event description Co-Authored-By: Claude Sonnet 4.6 --- docs/docs/reference/telemetry.md | 2 +- .../opencode/src/altimate/telemetry/index.ts | 8 ++++- packages/opencode/src/session/processor.ts | 29 +++++++++++++++++++ 3 files changed, 37 insertions(+), 2 deletions(-) diff --git a/docs/docs/reference/telemetry.md b/docs/docs/reference/telemetry.md index c3b72cc6d4..4535ae72b6 100644 --- a/docs/docs/reference/telemetry.md +++ b/docs/docs/reference/telemetry.md @@ -11,7 +11,7 @@ We collect the following categories of events: | `session_start` | A new CLI session begins | | `session_end` | A CLI session ends (includes duration) | | `session_forked` | A session is forked from an existing one | -| `generation` | An AI model generation completes (model ID, token counts, duration — no prompt content) | +| `generation` | An AI model generation (step) completes — model ID, provider ID, agent, finish reason, cost, duration, and token breakdown: input, output, and when available: reasoning tokens (reasoning models only), cache-read tokens (prompt cache hit), cache-write tokens (new cache entry). No prompt content. | | `tool_call` | A tool is invoked (tool name and category — no arguments or output) | | `native_call` | A native engine call completes (method name and duration — no arguments) | | `command` | A CLI command is executed (command name only) | diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index e0f0130e44..204fd937eb 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -50,9 +50,15 @@ export namespace Telemetry { provider_id: string agent: string finish_reason: string - tokens: TokensPayload cost: number duration_ms: number + // Flat token fields — only present when data is available from the provider. + // No nested objects: Azure App Insights custom measures must be top-level numbers. + tokens_input: number + tokens_output: number + tokens_reasoning?: number // only for reasoning models + tokens_cache_read?: number // only when a cached prompt was reused + tokens_cache_write?: number // only when a new cache entry was written } | { type: "tool_call" diff --git a/packages/opencode/src/session/processor.ts b/packages/opencode/src/session/processor.ts index 70ea586b84..2fd644cfe2 100644 --- a/packages/opencode/src/session/processor.ts +++ b/packages/opencode/src/session/processor.ts @@ -16,6 +16,9 @@ import { PermissionNext } from "@/permission/next" import { Question } from "@/question" import { PartID } from "./schema" import type { SessionID, MessageID } from "./schema" +// altimate_change start — import Telemetry for per-generation token tracking +import { Telemetry } from "@/altimate/telemetry" +// altimate_change end export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -47,6 +50,9 @@ export namespace SessionProcessor { log.info("process") needsCompaction = false const shouldBreak = (await Config.get()).experimental?.continue_loop_on_deny !== true + // altimate_change start — track step start time for generation telemetry duration_ms + let stepStartTime = Date.now() + // altimate_change end while (true) { try { let currentText: MessageV2.TextPart | undefined @@ -240,6 +246,9 @@ export namespace SessionProcessor { snapshot, type: "step-start", }) + // altimate_change start — record step start time for generation telemetry duration + stepStartTime = Date.now() + // altimate_change end break case "finish-step": @@ -251,6 +260,26 @@ export namespace SessionProcessor { input.assistantMessage.finish = value.finishReason input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens + // altimate_change start — emit per-generation telemetry with token breakdown + // Only include token fields that are actually provided by the API (never default to 0). + Telemetry.track({ + type: "generation", + timestamp: Date.now(), + session_id: input.sessionID, + message_id: input.assistantMessage.id, + model_id: streamInput.model.id, + provider_id: streamInput.model.providerID, + agent: streamInput.agent.name, + finish_reason: value.finishReason, + cost: usage.cost, + duration_ms: Date.now() - stepStartTime, + tokens_input: usage.tokens.input, + tokens_output: usage.tokens.output, + ...(value.usage.reasoningTokens !== undefined && { tokens_reasoning: usage.tokens.reasoning }), + ...(value.usage.cachedInputTokens !== undefined && { tokens_cache_read: usage.tokens.cache.read }), + ...(usage.tokens.cache.write > 0 && { tokens_cache_write: usage.tokens.cache.write }), + }) + // altimate_change end await Session.updatePart({ id: PartID.ascending(), reason: value.finishReason, From a389e18980c4b9702bb03218e2131614baf84b24 Mon Sep 17 00:00:00 2001 From: suryaiyer95 Date: Fri, 20 Mar 2026 01:29:28 -0700 Subject: [PATCH 2/2] fix: remove unused TokensPayload type and update tests for flat token fields - Remove `TokensPayload` export (dead code since generation event now uses flat fields) - Update processor.test.ts: construct generation event with flat tokens_* fields - Update telemetry.test.ts: use flat tokens_* fields, rename test to reflect new shape Co-Authored-By: Claude Sonnet 4.6 --- .../opencode/src/altimate/telemetry/index.ts | 7 ------- packages/opencode/test/session/processor.test.ts | 16 +++++++--------- .../opencode/test/telemetry/telemetry.test.ts | 16 ++++++---------- 3 files changed, 13 insertions(+), 26 deletions(-) diff --git a/packages/opencode/src/altimate/telemetry/index.ts b/packages/opencode/src/altimate/telemetry/index.ts index 204fd937eb..ded93596cf 100644 --- a/packages/opencode/src/altimate/telemetry/index.ts +++ b/packages/opencode/src/altimate/telemetry/index.ts @@ -14,13 +14,6 @@ export namespace Telemetry { const MAX_BUFFER_SIZE = 200 const REQUEST_TIMEOUT_MS = 10_000 - export type TokensPayload = { - input: number - output: number - reasoning: number - cache_read: number - cache_write: number - } export type Event = | { diff --git a/packages/opencode/test/session/processor.test.ts b/packages/opencode/test/session/processor.test.ts index 857966a5a6..fd12e483d6 100644 --- a/packages/opencode/test/session/processor.test.ts +++ b/packages/opencode/test/session/processor.test.ts @@ -468,20 +468,18 @@ describe("generation telemetry", () => { provider_id: "anthropic", agent: "builder", finish_reason: "end_turn", - tokens: { - input: 1000, - output: 500, - reasoning: 200, - cache_read: 800, - cache_write: 100, - }, + tokens_input: 1000, + tokens_output: 500, + tokens_reasoning: 200, + tokens_cache_read: 800, + tokens_cache_write: 100, cost: 0.05, duration_ms: 3000, } expect(event.model_id).toBe("claude-opus-4-6") - expect(event.tokens.input).toBe(1000) - expect(event.tokens.cache_read).toBe(800) + expect(event.tokens_input).toBe(1000) + expect(event.tokens_cache_read).toBe(800) expect(event.cost).toBe(0.05) expect(event.finish_reason).toBe("end_turn") }) diff --git a/packages/opencode/test/telemetry/telemetry.test.ts b/packages/opencode/test/telemetry/telemetry.test.ts index c7c5757365..b8f4b1fb1a 100644 --- a/packages/opencode/test/telemetry/telemetry.test.ts +++ b/packages/opencode/test/telemetry/telemetry.test.ts @@ -624,7 +624,7 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { } }) - test("nested tokens object is flattened with tokens_ prefix", async () => { + test("flat token fields appear in measurements", async () => { const { fetchCalls, cleanup } = await initWithMockedFetch() try { Telemetry.track({ @@ -636,13 +636,11 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { provider_id: "anthropic", agent: "builder", finish_reason: "end_turn", - tokens: { - input: 100, - output: 200, - reasoning: 50, - cache_read: 10, - cache_write: 5, - }, + tokens_input: 100, + tokens_output: 200, + tokens_reasoning: 50, + tokens_cache_read: 10, + tokens_cache_write: 5, cost: 0.01, duration_ms: 2000, }) @@ -656,8 +654,6 @@ describe("telemetry.toAppInsightsEnvelopes (indirect)", () => { expect(measurements.tokens_reasoning).toBe(50) expect(measurements.tokens_cache_read).toBe(10) expect(measurements.tokens_cache_write).toBe(5) - // Raw "tokens" key should not appear in properties - expect(envelopes[0].data.baseData.properties.tokens).toBeUndefined() } finally { cleanup() }