DEVtheOPS
diff --git a/‎bun.lock‎
Lines changed: 4 additions & 0 deletions b/‎bun.lock‎
Lines changed: 4 additions & 0 deletions
diff --git a/‎package.json‎
Lines changed: 2 additions & 0 deletions b/‎package.json‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎src/config.ts‎
Lines changed: 9 additions & 0 deletions b/‎src/config.ts‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎src/handlers/message.ts‎
Lines changed: 182 additions & 60 deletions b/‎src/handlers/message.ts‎
Lines changed: 182 additions & 60 deletions
@@ -41,9 +41,11 @@
     "@opentelemetry/api": "^1.9.0",
     "@opentelemetry/exporter-logs-otlp-grpc": "^0.213.0",
     "@opentelemetry/exporter-metrics-otlp-grpc": "^0.213.0",
+    "@opentelemetry/exporter-trace-otlp-grpc": "^0.213.0",
     "@opentelemetry/resources": "^2.6.0",
     "@opentelemetry/sdk-logs": "^0.213.0",
     "@opentelemetry/sdk-metrics": "^2.6.0",
+    "@opentelemetry/sdk-trace-base": "^2.6.0",
     "@opentelemetry/semantic-conventions": "^1.40.0",
     "typescript": "^5.9.3"
   },
 
@@ -10,6 +10,7 @@ export type PluginConfig = {
   otlpHeaders: string | undefined
   resourceAttributes: string | undefined
   disabledMetrics: Set<string>
+  disabledTraces: Set<string>
 }
 
 /** Parses a positive integer from an environment variable, returning `fallback` if absent or invalid. */
@@ -41,6 +42,13 @@ export function loadConfig(): PluginConfig {
       .filter(Boolean),
   )
 
+  const disabledTraces = new Set(
+    (process.env["OPENCODE_DISABLE_TRACES"] ?? "")
+      .split(",")
+      .map(s => s.trim())
+      .filter(Boolean),
+  )
+
   return {
     enabled: !!process.env["OPENCODE_ENABLE_TELEMETRY"],
     endpoint: process.env["OPENCODE_OTLP_ENDPOINT"] ?? "http://localhost:4317",
@@ -50,6 +58,7 @@ export function loadConfig(): PluginConfig {
     otlpHeaders,
     resourceAttributes,
     disabledMetrics,
+    disabledTraces,
   }
 }
 
 
@@ -1,6 +1,7 @@
 import { SeverityNumber } from "@opentelemetry/api-logs"
+import { SpanStatusCode, SpanKind, context, trace } from "@opentelemetry/api"
 import type { AssistantMessage, EventMessageUpdated, EventMessagePartUpdated, ToolPart } from "@opencode-ai/sdk"
-import { errorSummary, setBoundedMap, accumulateSessionTotals, isMetricEnabled } from "../util.ts"
+import { errorSummary, setBoundedMap, accumulateSessionTotals, isMetricEnabled, isTraceEnabled } from "../util.ts"
 import type { HandlerContext } from "../types.ts"
 
 type SubtaskPart = {
@@ -13,8 +14,8 @@ type SubtaskPart = {
 }
 
 /**
- * Handles a completed assistant message: increments token and cost counters and emits
- * either an `api_request` or `api_error` log event depending on whether the message errored.
+ * Handles a completed assistant message: increments token and cost counters, emits
+ * either an `api_request` or `api_error` log event, and ends the LLM span for this message.
  * The `agent` attribute is sourced from the session totals, which are populated by the
  * `chat.message` hook when the user prompt is received.
  */
@@ -75,6 +76,28 @@ export function handleMessageUpdated(e: EventMessageUpdated, ctx: HandlerContext
     cost_usd: assistant.cost,
   })
 
+  const msgKey = `${sessionID}:${assistant.id}`
+  const msgSpan = ctx.messageSpans.get(msgKey)
+  if (msgSpan) {
+    msgSpan.setAttributes({
+      "gen_ai.usage.input_tokens": assistant.tokens.input,
+      "gen_ai.usage.output_tokens": assistant.tokens.output,
+      "gen_ai.usage.reasoning_tokens": assistant.tokens.reasoning,
+      "gen_ai.usage.cache_read_tokens": assistant.tokens.cache.read,
+      "gen_ai.usage.cache_creation_tokens": assistant.tokens.cache.write,
+      "gen_ai.response.finish_reason": assistant.error ? "error" : "stop",
+      cost_usd: assistant.cost,
+      duration_ms: duration,
+    })
+    if (assistant.error) {
+      msgSpan.setStatus({ code: SpanStatusCode.ERROR, message: errorSummary(assistant.error) })
+    } else {
+      msgSpan.setStatus({ code: SpanStatusCode.OK })
+    }
+    msgSpan.end(assistant.time.completed)
+    ctx.messageSpans.delete(msgKey)
+  }
+
   if (assistant.error) {
     ctx.logger.emit({
       severityNumber: SeverityNumber.ERROR,
@@ -137,9 +160,13 @@ export function handleMessageUpdated(e: EventMessageUpdated, ctx: HandlerContext
 
 /**
  * Tracks tool execution time between `running` and `completed`/`error` part updates,
- * records a `tool.duration` histogram measurement, and emits a `tool_result` log event.
- * Also handles `subtask` parts, incrementing the sub-agent invocation counter and emitting
- * a `subtask_invoked` log event.
+ * records a `tool.duration` histogram measurement, manages the tool child span, and emits
+ * a `tool_result` log event. Also handles `subtask` parts, incrementing the sub-agent
+ * invocation counter and emitting a `subtask_invoked` log event.
+ *
+ * For tool spans: on `running` a child span of the current session span is started and stored
+ * in `pendingToolSpans`. On `completed`/`error` the span is ended with appropriate status.
+ * If no `running` event was seen (out-of-order), a best-effort span is started and immediately ended.
  */
 export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: HandlerContext) {
   const part = e.properties.part
@@ -175,70 +202,165 @@ export function handleMessagePartUpdated(e: EventMessagePartUpdated, ctx: Handle
     })
   }
 
-  if (part.type !== "tool") return
+  if (part.type === "tool") {
+    const toolPart = part as ToolPart
+    const key = `${toolPart.sessionID}:${toolPart.callID}`
 
-  const toolPart = part as ToolPart
-  const key = `${toolPart.sessionID}:${toolPart.callID}`
+    if (toolPart.state.status === "running") {
+      const toolSpan = isTraceEnabled("tool", ctx)
+        ? (() => {
+            const sessionSpan = ctx.sessionSpans.get(toolPart.sessionID)
+            const parentCtx = sessionSpan
+              ? trace.setSpan(context.active(), sessionSpan)
+              : context.active()
+            return ctx.tracer.startSpan(
+              `${ctx.tracePrefix}tool.${toolPart.tool}`,
+              {
+                startTime: toolPart.state.time.start,
+                kind: SpanKind.INTERNAL,
+                attributes: {
+                  "session.id": toolPart.sessionID,
+                  "tool.name": toolPart.tool,
+                  ...ctx.commonAttrs,
+                },
+              },
+              parentCtx,
+            )
+          })()
+        : undefined
+      setBoundedMap(ctx.pendingToolSpans, key, {
+        tool: toolPart.tool,
+        sessionID: toolPart.sessionID,
+        startMs: toolPart.state.time.start,
+        span: toolSpan,
+      })
+      ctx.log("debug", "otel: tool span started", { sessionID: toolPart.sessionID, tool: toolPart.tool, key })
+      return
+    }
 
-  if (toolPart.state.status === "running") {
-    setBoundedMap(ctx.pendingToolSpans, key, {
-      tool: toolPart.tool,
-      sessionID: toolPart.sessionID,
-      startMs: toolPart.state.time.start,
-    })
-    ctx.log("debug", "otel: tool span started", { sessionID: toolPart.sessionID, tool: toolPart.tool, key })
-    return
-  }
+    if (toolPart.state.status !== "completed" && toolPart.state.status !== "error") return
+
+    const pending = ctx.pendingToolSpans.get(key)
+    ctx.pendingToolSpans.delete(key)
+    const start = pending?.startMs ?? toolPart.state.time.start
+    const end = toolPart.state.time.end
+    if (end === undefined) return
+    const duration_ms = end - start
+    const success = toolPart.state.status === "completed"
 
-  if (toolPart.state.status !== "completed" && toolPart.state.status !== "error") return
+    if (isMetricEnabled("tool.duration", ctx)) {
+      ctx.instruments.toolDurationHistogram.record(duration_ms, {
+        ...ctx.commonAttrs,
+        "session.id": toolPart.sessionID,
+        tool_name: toolPart.tool,
+        success,
+      })
+    }
 
-  const span = ctx.pendingToolSpans.get(key)
-  ctx.pendingToolSpans.delete(key)
-  const start = span?.startMs ?? toolPart.state.time.start
-  const end = toolPart.state.time.end
-  if (end === undefined) return
-  const duration_ms = end - start
-  const success = toolPart.state.status === "completed"
+    if (isTraceEnabled("tool", ctx)) {
+      const toolSpan = pending?.span ?? (() => {
+        const sessionSpan = ctx.sessionSpans.get(toolPart.sessionID)
+        const parentCtx = sessionSpan
+          ? trace.setSpan(context.active(), sessionSpan)
+          : context.active()
+        return ctx.tracer.startSpan(
+          `${ctx.tracePrefix}tool.${toolPart.tool}`,
+          {
+            startTime: start,
+            kind: SpanKind.INTERNAL,
+            attributes: {
+              "session.id": toolPart.sessionID,
+              "tool.name": toolPart.tool,
+              ...ctx.commonAttrs,
+            },
+          },
+          parentCtx,
+        )
+      })()
+      toolSpan.setAttribute("tool.success", success)
+      if (success) {
+        const output = (toolPart.state as { output: string }).output
+        toolSpan.setAttribute("tool.result_size_bytes", Buffer.byteLength(output, "utf8"))
+        toolSpan.setStatus({ code: SpanStatusCode.OK })
+      } else {
+        const err = (toolPart.state as { error: string }).error
+        toolSpan.setAttribute("tool.error", err)
+        toolSpan.setStatus({ code: SpanStatusCode.ERROR, message: err })
+      }
+      toolSpan.end(end)
+    }
 
-  if (isMetricEnabled("tool.duration", ctx)) {
-    ctx.instruments.toolDurationHistogram.record(duration_ms, {
-      ...ctx.commonAttrs,
-      "session.id": toolPart.sessionID,
+    const sizeAttr = success
+      ? { tool_result_size_bytes: Buffer.byteLength((toolPart.state as { output: string }).output, "utf8") }
+      : { error: (toolPart.state as { error: string }).error }
+
+    ctx.logger.emit({
+      severityNumber: success ? SeverityNumber.INFO : SeverityNumber.ERROR,
+      severityText: success ? "INFO" : "ERROR",
+      timestamp: start,
+      observedTimestamp: Date.now(),
+      body: "tool_result",
+      attributes: {
+        "event.name": "tool_result",
+        "session.id": toolPart.sessionID,
+        tool_name: toolPart.tool,
+        success,
+        duration_ms,
+        ...sizeAttr,
+        ...ctx.commonAttrs,
+      },
+    })
+    ctx.log("debug", "otel: tool.duration histogram recorded", {
+      sessionID: toolPart.sessionID,
       tool_name: toolPart.tool,
+      duration_ms,
       success,
     })
-  }
-
-  const sizeAttr = success
-    ? { tool_result_size_bytes: Buffer.byteLength((toolPart.state as { output: string }).output, "utf8") }
-    : { error: (toolPart.state as { error: string }).error }
-
-  ctx.logger.emit({
-    severityNumber: success ? SeverityNumber.INFO : SeverityNumber.ERROR,
-    severityText: success ? "INFO" : "ERROR",
-    timestamp: start,
-    observedTimestamp: Date.now(),
-    body: "tool_result",
-    attributes: {
-      "event.name": "tool_result",
-      "session.id": toolPart.sessionID,
+    return ctx.log(success ? "info" : "error", "otel: tool_result", {
+      sessionID: toolPart.sessionID,
       tool_name: toolPart.tool,
       success,
       duration_ms,
-      ...sizeAttr,
-      ...ctx.commonAttrs,
+    })
+  }
+}
+
+/**
+ * Starts an LLM span for an assistant message when it first appears in `message.updated`.
+ * The span is parented to the session span and carries `gen_ai.*` semantic attributes for
+ * the model and provider. It is ended in `handleMessageUpdated` once the message completes.
+ *
+ * Only called for assistant messages that have not yet completed (`time.completed` absent).
+ */
+export function startMessageSpan(
+  sessionID: string,
+  messageID: string,
+  modelID: string,
+  providerID: string,
+  startTime: number,
+  ctx: HandlerContext,
+) {
+  if (!isTraceEnabled("llm", ctx)) return
+  const msgKey = `${sessionID}:${messageID}`
+  if (ctx.messageSpans.has(msgKey)) return
+  const sessionSpan = ctx.sessionSpans.get(sessionID)
+  const parentCtx = sessionSpan
+    ? trace.setSpan(context.active(), sessionSpan)
+    : context.active()
+
+  const msgSpan = ctx.tracer.startSpan(
+    "gen_ai.chat",
+    {
+      startTime,
+      kind: SpanKind.CLIENT,
+      attributes: {
+        "gen_ai.system": providerID,
+        "gen_ai.request.model": modelID,
+        "session.id": sessionID,
+        ...ctx.commonAttrs,
+      },
     },
-  })
-  ctx.log("debug", "otel: tool.duration histogram recorded", {
-    sessionID: toolPart.sessionID,
-    tool_name: toolPart.tool,
-    duration_ms,
-    success,
-  })
-  return ctx.log(success ? "info" : "error", "otel: tool_result", {
-    sessionID: toolPart.sessionID,
-    tool_name: toolPart.tool,
-    success,
-    duration_ms,
-  })
+    parentCtx,
+  )
+  setBoundedMap(ctx.messageSpans, msgKey, msgSpan)
 }