diff --git a/packages/voltcode/package.json b/packages/voltcode/package.json
index 648eca762..fa1341191 100644
--- a/packages/voltcode/package.json
+++ b/packages/voltcode/package.json
@@ -14,6 +14,7 @@
     "control-plane": "bun run src/control-plane/main.ts",
     "scheduler": "bun run src/control-plane/scheduler.ts",
     "check-status": "bun run script/check_status.ts",
+    "profile-llm-calls": "bun run scripts/profile-llm-calls.ts --cases short --depth 1 --breadth 1",
     "random": "echo 'Random script updated at $(date)' && echo 'Change queued successfully' && echo 'Another change made' && echo 'Yet another change' && echo 'One more change' && echo 'Final change' && echo 'Another final change' && echo 'Yet another final change'",
     "clean": "echo 'Cleaning up...' && rm -rf node_modules dist",
     "lint": "echo 'Running lint checks...' && bun test --coverage",
diff --git a/packages/voltcode/scripts/profile-llm-calls.ts b/packages/voltcode/scripts/profile-llm-calls.ts
new file mode 100644
index 000000000..1878c5915
--- /dev/null
+++ b/packages/voltcode/scripts/profile-llm-calls.ts
@@ -0,0 +1,521 @@
+import { Session } from "../src/session"
+import { SessionPrompt } from "../src/session/prompt"
+import { Provider } from "../src/provider/provider"
+import { bootstrap } from "../src/cli/bootstrap"
+import { LLMProfiler } from "../src/session/llm-profiler"
+import { Identifier } from "../src/id/id"
+import { promises as fs } from "fs"
+import { Log } from "../src/util/log"
+
+const MODEL = {
+  providerID: "anthropic",
+  modelID: "claude-opus-4-6",
+}
+
+type TestCase = {
+  name: string
+  depth: number
+  breadth: number
+}
+
+type RunResult = {
+  name: string
+  totalDurationMs: number
+  sessionID: string
+  sessions: string[]
+  calls: ReturnType<typeof LLMProfiler.snapshot>["calls"]
+}
+
+const CASES: TestCase[] = [
+  { name: "baseline-no-subtask", depth: 0, breadth: 0 },
+  { name: "single-subtask", depth: 1, breadth: 1 },
+  { name: "tree-depth-2-breadth-2", depth: 2, breadth: 2 },
+]
+
+function toArg(name: string, fallback: string): string {
+  const index = process.argv.findIndex((item) => item === `--${name}`)
+  if (index === -1) return fallback
+  return process.argv[index + 1] ?? fallback
+}
+
+async function readModel() {
+  return Provider.getModel(MODEL.providerID, MODEL.modelID)
+}
+
+function buildSubtaskPrompt(parts: { sessionID: string; depth: number; index: number }) {
+  return [
+    {
+      type: "subtask",
+      agent: "general",
+      description: `tree-${parts.depth}-node-${parts.index} for ${parts.sessionID}`,
+      prompt: "Perform this leaf task with one short sentence and stop.",
+      model: MODEL,
+    },
+  ] as const
+}
+
+function buildLeafPrompt() {
+  return [{ type: "text", text: "Reply with a short acknowledgement and stop." }]
+}
+
+async function collectChildren(
+  beforeMessageIDs: Set<string>,
+  sessionID: string,
+): Promise<string[]> {
+  const messages = await Session.messages({ sessionID })
+  const newMessages = messages.filter((message) => !beforeMessageIDs.has(message.info.id))
+  const childSessionIDs = new Set<string>()
+
+  for (const message of newMessages) {
+    if (message.info.role !== "assistant") continue
+    for (const part of message.parts) {
+      if (part.type !== "tool" || part.tool !== "task") continue
+      const state = part.state as { metadata?: { sessionId?: string }; output?: string }
+      const metaSession = state.metadata?.sessionId
+      const outputSession = typeof part.state === "object" && typeof state.output === "string" ? state.output : undefined
+      const fromMetadata = metaSession && childSessionIDs.has(metaSession) ? undefined : metaSession
+      const fromOutput = outputSession ? outputSession.match(/session_id:\s*([A-Za-z0-9_-]+)/i)?.[1] : undefined
+      const child = fromMetadata ?? fromOutput
+      if (child) childSessionIDs.add(child)
+        }
+      }
+
+  return [...childSessionIDs]
+}
+
+async function executeSubtaskTree(input: {
+  sessionID: string
+  depthRemaining: number
+  path: string
+  breadth: number
+  model: Awaited<ReturnType<typeof readModel>>
+  sessions: Set<string>
+}) {
+  input.sessions.add(input.sessionID)
+
+  const parts =
+    input.depthRemaining > 0
+      ? Array.from({ length: input.breadth }, (_, index) =>
+          buildSubtaskPrompt({
+            sessionID: input.sessionID,
+            depth: input.depthRemaining,
+            index: index + 1,
+          }),
+        ).flat()
+      : buildLeafPrompt()
+
+  const before = await Session.messages({ sessionID: input.sessionID })
+  const beforeIDs = new Set(before.map((msg) => msg.info.id))
+
+  await SessionPrompt.prompt({
+    sessionID: input.sessionID,
+    model: {
+      providerID: input.model.providerID,
+      modelID: input.model.id,
+    },
+    agent: "general",
+    parts,
+  })
+
+  if (input.depthRemaining === 0) return
+
+  const childSessions = await collectChildren(beforeIDs, input.sessionID)
+  for (let i = 0; i < childSessions.length; i++) {
+    await executeSubtaskTree({
+      sessionID: childSessions[i],
+      depthRemaining: input.depthRemaining - 1,
+      path: `${input.path}.${i + 1}`,
+      breadth: input.breadth,
+      model: input.model,
+      sessions: input.sessions,
+    })
+  }
+}
+
+function percentile(values: number[], ratio: number): number {
+  if (values.length === 0) return 0
+  const sorted = [...values].sort((a, b) => a - b)
+  const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(ratio * sorted.length) - 1))
+  return sorted[index] ?? 0
+}
+
+function hashPrefix(hash: string): string {
+  return hash.slice(0, 10)
+}
+
+function hashReuseStats(values: string[]) {
+  const seen = new Set<string>()
+  let reused = 0
+  for (const value of values) {
+    if (seen.has(value)) {
+      reused += 1
+    } else {
+      seen.add(value)
+    }
+  }
+  return { uniqueCount: seen.size, reusedCount: reused }
+}
+
+async function summarizeCase(result: RunResult) {
+  const allCalls = result.calls
+  const completed = allCalls.filter((call) => call.status === "completed")
+  const failed = allCalls.filter((call) => call.status === "failed")
+  const active = allCalls.filter((call) => call.status === "started")
+
+  const sessionDepth = new Map<string, number>()
+  const sessionParent = new Map<string, string | undefined>()
+  for (const id of result.sessions) {
+    if (sessionDepth.has(id)) continue
+    const root = await Session.get(id).catch(() => undefined)
+    if (!root) continue
+    sessionParent.set(id, root.parentID)
+    let depth = 0
+    let cursor: string | undefined = root.parentID
+    const visited = new Set<string>()
+    while (cursor) {
+      if (visited.has(cursor)) break
+      visited.add(cursor)
+      if (sessionDepth.has(cursor)) {
+        depth += sessionDepth.get(cursor)! + 1
+        break
+      }
+      const parent = await Session.get(cursor).catch(() => undefined)
+      if (!parent?.parentID) break
+      cursor = parent.parentID
+      depth += 1
+    }
+    sessionDepth.set(id, Math.max(0, depth))
+  }
+
+  const callsWithDepth = allCalls.map((call) => ({
+    ...call,
+    depth: sessionDepth.get(call.sessionID) ?? 0,
+  }))
+
+  const totalLlmMs = completed.reduce((sum, call) => sum + (call.totalMs ?? 0), 0)
+  const totalFirstChunk = completed.reduce((sum, call) => sum + (call.firstChunkMs ?? 0), 0)
+  const totalStreamCreated = completed.reduce((sum, call) => sum + (call.streamCreatedMs ?? 0), 0)
+
+  const completedByDepth = new Map<number, number[]>()
+  for (const call of completed) {
+    const depth = sessionDepth.get(call.sessionID) ?? 0
+    const bucket = completedByDepth.get(depth) ?? []
+    bucket.push(call.totalMs ?? 0)
+    completedByDepth.set(depth, bucket)
+  }
+
+  const tokenTotals = completed.reduce(
+    (acc, call) => {
+      acc.input += call.usage?.inputTokens ?? 0
+      acc.output += call.usage?.outputTokens ?? 0
+      acc.reasoning += call.usage?.reasoningTokens ?? 0
+      acc.cacheRead += call.usage?.cacheReadTokens ?? 0
+      acc.cacheWrite += call.usage?.cacheWriteTokens ?? 0
+      return acc
+    },
+    { input: 0, output: 0, reasoning: 0, cacheRead: 0, cacheWrite: 0 },
+  )
+
+  const processorCalls = completed.filter((call) => call.source === "session.processor")
+  const orderedCompleted = [...completed].sort((a, b) => a.startedAt - b.startedAt)
+  const firstCallBySession = new Map<string, (typeof completed)[number]>()
+  for (const call of orderedCompleted) {
+    if (!firstCallBySession.has(call.sessionID)) firstCallBySession.set(call.sessionID, call)
+  }
+
+  const processorHashes = processorCalls.map((call) => call.context.systemPromptHash)
+  const processorHistoryHashes = processorCalls.map((call) => call.context.messageHistoryHash)
+  const processorHashStats = hashReuseStats(processorHashes)
+  const historyHashStats = hashReuseStats(processorHistoryHashes)
+
+  let subtaskHashMatch = 0
+  let subtaskHashDiff = 0
+  let subtaskHashUnknown = 0
+  let subtaskHistoryMatch = 0
+  let subtaskHistoryDiff = 0
+  let subtaskHistoryUnknown = 0
+  let subtaskSystemMatchHistoryDiff = 0
+  for (const [sessionID, firstCall] of firstCallBySession) {
+    const parentID = sessionParent.get(sessionID)
+    if (!parentID) continue
+    const parentFirstCall = firstCallBySession.get(parentID)
+    if (!parentFirstCall) {
+      subtaskHashUnknown += 1
+      subtaskHistoryUnknown += 1
+      continue
+    }
+    const systemMatch = parentFirstCall.context.systemPromptHash === firstCall.context.systemPromptHash
+    const historyMatch = parentFirstCall.context.messageHistoryHash === firstCall.context.messageHistoryHash
+
+    if (systemMatch) {
+      subtaskHashMatch += 1
+      if (!historyMatch) {
+        subtaskSystemMatchHistoryDiff += 1
+      }
+    } else {
+      subtaskHashDiff += 1
+    }
+
+    if (historyMatch) {
+      subtaskHistoryMatch += 1
+    } else {
+      subtaskHistoryDiff += 1
+    }
+  }
+  const subtaskSessionsWithCallData = subtaskHashMatch + subtaskHashDiff + subtaskHashUnknown
+
+  const latency = completed.map((call) => call.totalMs ?? 0)
+  const ttft = completed
+    .map((call) => call.firstChunkMs ?? 0)
+    .filter((value) => value > 0)
+  const streamCreated = completed
+    .map((call) => call.streamCreatedMs ?? 0)
+    .filter((value) => value > 0)
+
+  console.log(`\n=== ${result.name} ===`)
+  console.log(`total runtime: ${Math.round(result.totalDurationMs)}ms`)
+  console.log(`sessions touched: ${result.sessions.length}`)
+  console.log(`llm calls: ${allCalls.length} total, ${completed.length} completed, ${failed.length} failed`)
+  if (active.length > 0) {
+    console.log(`active llm calls at snapshot: ${active.length}`)
+  }
+  console.log(`output tokens: ${tokenTotals.output} (input ${tokenTotals.input}, reasoning ${tokenTotals.reasoning})`)
+  console.log(
+    `cache tokens: read ${tokenTotals.cacheRead} (${Math.round((tokenTotals.cacheRead / Math.max(1, completed.length)) * 10_000) / 10_000} ` +
+      `avg, ${Math.round((tokenTotals.cacheRead / Math.max(1, tokenTotals.input + tokenTotals.output || 1)) * 100)}% of in+out), ` +
+      `write ${tokenTotals.cacheWrite}`,
+  )
+  console.log(
+    `llm latency total: ${Math.round(totalLlmMs)}ms (mean ${Math.round(totalLlmMs / Math.max(1, completed.length))}ms)`,
+  )
+  console.log(`ttft total: ${Math.round(totalFirstChunk)}ms`)
+  console.log(`stream-init total: ${Math.round(totalStreamCreated)}ms`)
+  if (result.totalDurationMs > 0) {
+    console.log(`llm share of wall time: ${((totalLlmMs / result.totalDurationMs) * 100).toFixed(1)}%`)
+  }
+  if (latency.length) {
+    console.log(
+      `latency p50=${percentile(latency, 0.5)}ms p95=${percentile(latency, 0.95)}ms p99=${percentile(latency, 0.99)}ms`,
+    )
+  }
+  if (ttft.length) {
+    console.log(`ttft p50=${percentile(ttft, 0.5)}ms p95=${percentile(ttft, 0.95)}ms p99=${percentile(ttft, 0.99)}ms`)
+  }
+  if (streamCreated.length) {
+    console.log(
+      `stream-init p50=${percentile(streamCreated, 0.5)}ms p95=${percentile(streamCreated, 0.95)}ms p99=${percentile(
+        streamCreated,
+        0.99,
+      )}ms`,
+    )
+  }
+  if (processorCalls.length > 0) {
+    console.log(
+      `system prompt hashes: ${processorCalls.length} processor calls, ${processorHashStats.uniqueCount} unique; ` +
+        `reused from earlier ${processorHashStats.reusedCount} (${Math.round((processorHashStats.reusedCount / processorCalls.length) * 100)}%)`,
+    )
+    console.log(
+      `full-prompt history hashes: ${processorCalls.length} processor calls, ${historyHashStats.uniqueCount} unique; ` +
+        `reused from earlier ${historyHashStats.reusedCount} (${Math.round((historyHashStats.reusedCount / processorCalls.length) * 100)}%)`,
+    )
+  }
+  if (subtaskSessionsWithCallData > 0) {
+    console.log(
+      `subtask first-call system hash: match=${subtaskHashMatch} diff=${subtaskHashDiff} unknown=${subtaskHashUnknown} ` +
+        `(match ${Math.round((subtaskHashMatch / subtaskSessionsWithCallData) * 100)}%)`,
+    )
+    console.log(
+      `subtask first-call full prompt hash: match=${subtaskHistoryMatch} diff=${subtaskHistoryDiff} unknown=${subtaskHistoryUnknown} ` +
+        `(match ${Math.round((subtaskHistoryMatch / subtaskSessionsWithCallData) * 100)}%)`,
+    )
+    if (subtaskHashMatch > 0) {
+      console.log(
+        `system-match but different full prompt: ${subtaskSystemMatchHistoryDiff} of ${subtaskHashMatch} ` +
+          `(${Math.round((subtaskSystemMatchHistoryDiff / Math.max(1, subtaskHashMatch)) * 100)}%)`,
+      )
+    }
+  }
+
+  const byDepthStats = [...completedByDepth.entries()]
+    .sort((a, b) => a[0] - b[0])
+    .map(([depth, items]) => `depth ${depth}: calls=${items.length} p50=${percentile(items, 0.5)}ms p95=${percentile(items, 0.95)}ms`)
+  if (byDepthStats.length) {
+    console.log("depth breakdown:")
+    for (const row of byDepthStats) console.log(`  ${row}`)
+  }
+
+  const bySource = new Map<string, number[]>()
+  for (const call of completed) {
+    const bucket = bySource.get(call.source) ?? []
+    bucket.push(call.totalMs ?? 0)
+    bySource.set(call.source, bucket)
+  }
+  const sourceStats = [...bySource.entries()]
+    .map(([source, items]) => `${source}: ${items.length} p50=${percentile(items, 0.5)}ms p95=${percentile(items, 0.95)}ms`)
+    .sort()
+  if (sourceStats.length) {
+    console.log("source breakdown:")
+    for (const row of sourceStats) console.log(`  ${row}`)
+  }
+
+  const slowest = [...completed]
+    .sort((a, b) => (b.totalMs ?? 0) - (a.totalMs ?? 0))
+    .slice(0, 5)
+    .map((call) => ({
+      callID: call.callID,
+      sessionID: call.sessionID,
+      depth: sessionDepth.get(call.sessionID),
+      ms: call.totalMs ?? 0,
+      outputTokens: call.usage?.outputTokens ?? 0,
+      promptChars: call.context.messageChars + call.context.systemChars,
+      serializedChars: call.context.serializedMessageChars,
+      model: `${call.providerID}/${call.modelID}`,
+      modelPromptTokens: call.context.approxPromptTokens,
+      systemPromptHash: call.context.systemPromptHash,
+      messageHistoryHash: call.context.messageHistoryHash,
+      cacheRead: call.usage?.cacheReadTokens ?? 0,
+      cacheWrite: call.usage?.cacheWriteTokens ?? 0,
+      ttft: call.firstChunkMs ?? 0,
+      streamMs: call.streamCreatedMs ?? 0,
+    }))
+
+  console.log("top slowest processor calls:")
+  for (const row of slowest) {
+    console.log(
+      `  ${row.callID} ${row.model} session=${row.sessionID} depth=${row.depth} ${row.ms}ms output=${row.outputTokens} ` +
+        `prompt=${row.promptChars} serialized=${row.serializedChars} approxTokens=${row.modelPromptTokens} hash=${hashPrefix(
+          row.systemPromptHash,
+        )} history=${hashPrefix(row.messageHistoryHash)} cacheRead=${row.cacheRead} cacheWrite=${row.cacheWrite} ` +
+          `ttft=${row.ttft} stream=${row.streamMs}`,
+    )
+  }
+
+  const tokenBySize = [...processorCalls].sort(
+    (a, b) => (b.context.messageChars + b.context.systemChars) - (a.context.messageChars + a.context.systemChars),
+  )
+  console.log("top context-size processor calls:")
+  for (const call of tokenBySize.slice(0, 3)) {
+    const promptChars = call.context.messageChars + call.context.systemChars
+    const tokens = call.usage?.outputTokens ?? 0
+    console.log(
+      `  ${call.callID} ${call.sessionID} depth=${sessionDepth.get(call.sessionID)} ${promptChars} chars, output=${tokens}, tools=${
+        call.context.toolNames.slice(0, 5).join(",")
+      }, systemHash=${hashPrefix(call.context.systemPromptHash)} historyHash=${hashPrefix(call.context.messageHistoryHash)} ` +
+        `cacheRead=${call.usage?.cacheReadTokens ?? 0} cacheWrite=${call.usage?.cacheWriteTokens ?? 0}`,
+    )
+  }
+
+  const sessionCallCount = new Map<string, number>()
+  const sessionDuration = new Map<string, number>()
+  for (const call of allCalls) {
+    sessionCallCount.set(call.sessionID, (sessionCallCount.get(call.sessionID) ?? 0) + 1)
+    sessionDuration.set(call.sessionID, sessionDuration.get(call.sessionID) ?? 0)
+    const base = sessionDuration.get(call.sessionID) ?? 0
+    sessionDuration.set(call.sessionID, base + (call.totalMs ?? 0))
+  }
+  const sessionRows = [...sessionDuration.entries()]
+    .sort((a, b) => b[1] - a[1])
+    .slice(0, 10)
+  if (sessionRows.length) {
+    console.log("sessions by llm time:")
+    for (const [sessionID, ms] of sessionRows) {
+      console.log(
+        `  ${sessionID} calls=${sessionCallCount.get(sessionID) ?? 0} total=${Math.round(ms)}ms avg=${
+          Math.round((ms / (sessionCallCount.get(sessionID) ?? 1)) || 0)
+        }ms`,
+      )
+    }
+  }
+}
+
+async function runCase(def: TestCase) {
+  const model = await readModel()
+  const root = await Session.createNext({
+    title: `profiling-${Identifier.ascending("session")}-${def.name}`,
+    directory: process.cwd(),
+    permission: [{ permission: "read", action: "allow", pattern: "*" }],
+  })
+  const sessions = new Set<string>()
+  const started = performance.now()
+  try {
+    await executeSubtaskTree({
+      sessionID: root.id,
+      depthRemaining: def.depth,
+      path: def.name,
+      breadth: def.breadth,
+      model,
+      sessions,
+    })
+  } finally {
+    // no-op cleanup is handled by caller
+  }
+  const totalDurationMs = performance.now() - started
+  const snapshot = LLMProfiler.snapshot()
+
+  return {
+    name: def.name,
+    totalDurationMs,
+    sessionID: root.id,
+    sessions: [...sessions],
+    calls: snapshot.calls,
+  }
+}
+
+async function main() {
+  const logLevelEnv = process.env.VOLTCODE_LOG_LEVEL?.toUpperCase()
+  const parsedLogLevel = ["TRACE", "DEBUG", "INFO", "WARN", "ERROR"].includes(logLevelEnv ?? "")
+    ? (logLevelEnv as Log.Level)
+    : "INFO"
+  await Log.init({
+    print: true,
+    dev: true,
+    level: parsedLogLevel,
+  })
+
+  const runDepth = Math.max(0, Number(toArg("depth", "1")))
+  const runBreadth = Math.max(1, Number(toArg("breadth", "1")))
+  const runModel = toArg("model", MODEL.modelID)
+  if (runModel) {
+    MODEL.modelID = runModel
+  }
+  const casesArg = toArg("cases", "short")
+  const selectedCases = casesArg === "full" ? CASES : [
+    { name: "baseline-no-subtask", depth: 0, breadth: 0 },
+    { name: "single-subtask", depth: 1, breadth: 1 },
+    { name: "short-tree", depth: runDepth, breadth: Math.min(runBreadth, 2) },
+  ] as TestCase[]
+  const exportJson = toArg("json", "")
+
+  const model = await Provider.getModel(MODEL.providerID, MODEL.modelID)
+  console.log(`Loaded model: ${model.providerID}/${model.id}`)
+  console.log(`Running ${selectedCases.length} case(s) with Opus 4.6`)
+
+  const results = []
+  for (const item of selectedCases) {
+    LLMProfiler.setEnabled(true)
+    LLMProfiler.clear()
+    const result = await runCase(item)
+    results.push(result)
+    await summarizeCase(result)
+    await Session.remove(result.sessionID)
+  }
+
+  if (exportJson) {
+    await fs.writeFile(
+      exportJson,
+      JSON.stringify(
+        {
+          model: `${MODEL.providerID}/${MODEL.modelID}`,
+          createdAt: new Date().toISOString(),
+          cases: results,
+        },
+        null,
+        2,
+      ),
+    )
+  }
+}
+
+await bootstrap(process.cwd(), async () => {
+  await main()
+})
diff --git a/packages/voltcode/scripts/subtask-response-latency-exp.ts b/packages/voltcode/scripts/subtask-response-latency-exp.ts
new file mode 100644
index 000000000..6e553a2b5
--- /dev/null
+++ b/packages/voltcode/scripts/subtask-response-latency-exp.ts
@@ -0,0 +1,106 @@
+import { Session } from "../src/session"
+import { SessionPrompt } from "../src/session/prompt"
+import { Provider } from "../src/provider/provider"
+import { LLMProfiler } from "../src/session/llm-profiler"
+import { Identifier } from "../src/id/id"
+import { Log } from "../src/util/log"
+import { Instance } from "../src/project/instance"
+
+function childTextChars(value: string) {
+  return value.length
+}
+
+async function runCase(name: string, parts: any[]) {
+  const model = await Provider.getModel("anthropic", "claude-opus-4-6")
+  const root = await Session.createNext({
+    title: `subtask-exp-${name}-${Identifier.ascending("session")}`,
+    directory: process.cwd(),
+    permission: [{ permission: "read", action: "allow", pattern: "*" }],
+  })
+
+  LLMProfiler.setEnabled(true)
+  LLMProfiler.clear()
+
+  const before = await Session.messages({ sessionID: root.id })
+  const beforeIds = new Set(before.map((x) => x.info.id))
+
+  await SessionPrompt.prompt({
+    sessionID: root.id,
+    model: { providerID: model.providerID, modelID: model.id },
+    agent: "general",
+    parts,
+  })
+
+  const after = await Session.messages({ sessionID: root.id })
+  const newMsgs = after.filter((m) => !beforeIds.has(m.info.id))
+  const toolParts = newMsgs
+    .flatMap((m) => m.parts)
+    .filter((p: any) => p.type === "tool" && p.tool === "task") as any[]
+
+  const allCalls = LLMProfiler.snapshot().calls
+  const completed = allCalls.filter((c) => c.status === "completed")
+
+  console.log(`\n=== ${name} ===`)
+  console.log(`root=${root.id} completedCalls=${completed.length}`)
+  for (const c of completed) {
+    console.log(
+      `${c.callID} session=${c.sessionID} source=${c.source} model=${c.providerID}/${c.modelID} outputTok=${c.usage?.outputTokens ?? 0} input=${c.usage?.inputTokens ?? 0} cacheRead=${c.usage?.cacheReadTokens ?? 0} cacheWrite=${c.usage?.cacheWriteTokens ?? 0} totalMs=${c.totalMs ?? 0}`,
+    )
+  }
+
+  if (toolParts.length === 0) {
+    console.log("No task tool outputs")
+  } else {
+    for (let i = 0; i < toolParts.length; i++) {
+      const part = toolParts[i]
+      const output = typeof part.state.output === "string" ? part.state.output : ""
+      const childSession = (part.state?.metadata?.sessionId || "") as string
+      console.log(`tool#${i + 1} outputChars=${output.length} session=${childSession}`)
+      console.log(`tool output preview=${JSON.stringify(output.slice(0, 260))}`)
+      console.log(`tool output tail=${JSON.stringify(output.slice(-260))}`)
+      if (childSession) {
+        const childMessages = await Session.messages({ sessionID: childSession })
+        const childTexts = childMessages
+          .flatMap((m) => m.parts)
+          .filter((p: any) => p.type === "text")
+          .map((p: any) => p.text)
+          .join("\n")
+        console.log(`childTextChars=${childTextChars(childTexts)} preview=${JSON.stringify(childTexts.slice(0, 260))}`)
+      }
+    }
+  }
+
+  await Session.remove(root.id)
+}
+
+await Instance.provide({
+  directory: process.cwd(),
+  init: async () => {
+    await Log.init({ print: false, dev: true, level: "ERROR" })
+  },
+  fn: async () => {
+    await runCase("short-subtask", [
+      {
+        type: "subtask",
+        agent: "general",
+        description: "short-task",
+        prompt: "Give a short 1-paragraph status update and stop.",
+        model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+      },
+    ])
+
+    await runCase("long-subtask", [
+      {
+        type: "subtask",
+        agent: "general",
+        description: "long-task",
+        prompt:
+          "You are asked to produce a very detailed design review for a hypothetical module named VoltScheduler, including at least 12 concrete risks, 8 recommended mitigations, 6 concrete follow-up test cases, and a short migration checklist. " +
+          "Be explicit and granular with names/examples. Include section headers. Return a full response around 700-900 words.",
+        model: { providerID: "anthropic", modelID: "claude-opus-4-6" },
+      },
+    ])
+
+    await runCase("baseline", [{ type: "text", text: "Reply with a concise one-sentence summary of how many subagents you think would help." }])
+  },
+})
diff --git a/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx b/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx
index 93d60b99e..68f6c9b76 100644
--- a/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx
+++ b/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx
@@ -36,6 +36,12 @@ interface ToolCallInfo {
   metadata?: Record<string, unknown>
 }
 
+interface ThinkingInfo {
+  text: string
+  isStreaming: boolean
+  startTime?: number
+}
+
 // Flatten jobs into a list for display
 function flattenJobs(jobs: JobTree[]): FlattenResult {
   if (jobs.length === 0) return { items: [], maxDepth: 0 }
@@ -429,12 +435,13 @@ function ToolCallDetailView(props: { call: ToolCallInfo; tick: number; width: nu
 
 function SelectedTaskDetails(props: {
   item: FlattenedItem | null
-  sessionData: { prompt: string; toolCalls: ToolCallInfo[] }
+  sessionData: { prompt: string; toolCalls: ToolCallInfo[]; thinking: ThinkingInfo | null }
   recentToolCalls: ToolCallInfo[]
   focusSection: "tree" | "toolcalls"
   selectedToolIndex: number
   paneWidth: number
   isJobRoot: boolean
+  thinking: ThinkingInfo | null
   onToolCallClick?: (index: number) => void
 }) {
   const { theme } = useTheme()
@@ -530,11 +537,80 @@ function SelectedTaskDetails(props: {
             )
           }}
         </For>
+        <Show when={props.thinking}>
+          {(thinking) => {
+            const thinkingIndex = () => props.recentToolCalls.length
+            const isSelected = () => props.focusSection === "toolcalls" && thinkingIndex() === props.selectedToolIndex
+            const bgColor = () => (isSelected() ? RGBA.fromInts(40, 40, 40, 255) : RGBA.fromInts(0, 0, 0, 0))
+            const label = () => {
+              const preview = thinking().text.replace(/\n/g, " ").slice(0, Math.max(20, props.paneWidth - 30))
+              return preview ? `${preview}${thinking().text.length > props.paneWidth - 30 ? "..." : ""}` : ""
+            }
+            return (
+              <box
+                flexDirection="row"
+                gap={1}
+                backgroundColor={bgColor()}
+                onMouseUp={() => props.onToolCallClick?.(thinkingIndex())}
+              >
+                <text fg={thinking().isStreaming ? theme.warning : theme.textMuted}>
+                  {thinking().isStreaming ? "◐" : "●"}
+                </text>
+                <text fg={theme.accent}>
+                  <i>{"thinking".padEnd(12)}</i>
+                </text>
+                <text fg={theme.textMuted}>{label()}</text>
+              </box>
+            )
+          }}
+        </Show>
       </scrollbox>
     </box>
   )
 }
 
+// Thinking/reasoning detail view — shows tail of CoT, streams in live
+function ThinkingDetailView(props: { thinking: ThinkingInfo; tick: number; width: number; height: number }) {
+  const { theme } = useTheme()
+
+  const lines = createMemo(() => {
+    // Force reactivity on tick so we re-render while streaming
+    void props.tick
+    const text = props.thinking.text
+    if (!text) return props.thinking.isStreaming ? ["(thinking...)"] : ["(no reasoning content)"]
+
+    const lineWidth = Math.max(40, props.width - 2)
+    // Wrap all text into lines
+    const allLines = wrapText(text, lineWidth, 10000)
+    // Show only what fits in the available height, biased toward the end
+    const maxLines = Math.max(1, props.height - 3)
+    if (allLines.length <= maxLines) return allLines
+    return allLines.slice(-maxLines)
+  })
+
+  const duration = () => {
+    if (!props.thinking.startTime) return ""
+    return formatLiveDuration(props.thinking.startTime, props.thinking.isStreaming ? undefined : Date.now(), props.tick)
+  }
+
+  return (
+    <box flexDirection="column">
+      <box flexDirection="row" gap={2}>
+        <text fg={theme.accent}>
+          <b>Thinking</b>
+          {props.thinking.isStreaming ? " (streaming)" : ""}
+        </text>
+        <Show when={duration()}>
+          <text fg={theme.textMuted}>{duration()}</text>
+        </Show>
+      </box>
+      <box flexDirection="column" paddingTop={1}>
+        <For each={lines()}>{(line) => <text fg={theme.textMuted}>{line}</text>}</For>
+      </box>
+    </box>
+  )
+}
+
 export function TaskTreePane(props: TaskTreePaneProps) {
   const { data } = useTaskTree()
   const { theme } = useTheme()
@@ -593,7 +669,7 @@ export function TaskTreePane(props: TaskTreePaneProps) {
     // For job-root, show only the root tool call
     if (!item || item.type === "job-root") {
       const job = item?.job
-      if (!job) return { prompt: "", toolCalls: [] as ToolCallInfo[] }
+      if (!job) return { prompt: "", toolCalls: [] as ToolCallInfo[], thinking: null as ThinkingInfo | null }
 
       // For job root, return just the job's tool call info
       const toolCall: ToolCallInfo = {
@@ -606,7 +682,7 @@ export function TaskTreePane(props: TaskTreePaneProps) {
         output: job.toolOutput,
         metadata: {},
       }
-      return { prompt: job.toolTitle, toolCalls: [toolCall] }
+      return { prompt: job.toolTitle, toolCalls: [toolCall], thinking: null as ThinkingInfo | null }
     }
 
     // For job-child, show the child session's tool calls
@@ -629,8 +705,9 @@ export function TaskTreePane(props: TaskTreePaneProps) {
       }
     }
 
-    // Collect all tool calls from all assistant messages
+    // Collect all tool calls and find reasoning parts from all assistant messages
     const toolCalls: ToolCallInfo[] = []
+    let thinking: ThinkingInfo | null = null
     for (const msg of messages) {
       if (msg.role === "assistant") {
         const parts = sync.data.part[msg.id] ?? []
@@ -649,11 +726,19 @@ export function TaskTreePane(props: TaskTreePaneProps) {
               metadata: toolPart.state.metadata,
             })
           }
+          if (part.type === "reasoning") {
+            const rp = part as ReasoningPart
+            thinking = {
+              text: rp.text,
+              isStreaming: !rp.time?.end,
+              startTime: rp.time?.start,
+            }
+          }
         }
       }
     }
 
-    return { prompt, toolCalls }
+    return { prompt, toolCalls, thinking }
   })
 
   // Get last 10 tool calls, newest at bottom
@@ -707,14 +792,27 @@ export function TaskTreePane(props: TaskTreePaneProps) {
     }
   }
 
+  // Total selectable items in the tool call section (tool calls + optional thinking)
+  const toolSectionCount = createMemo(() => {
+    const count = recentToolCalls().length
+    const thinking = selectedSessionData().thinking
+    return thinking ? count + 1 : count
+  })
+
+  // Whether the selected tool index points to the thinking entry
+  const isThinkingSelected = createMemo(() => {
+    const thinking = selectedSessionData().thinking
+    return thinking && selectedToolIndex() === recentToolCalls().length
+  })
+
   // Tool call list navigation
   function moveToolCall(direction: number) {
-    const calls = recentToolCalls()
-    if (calls.length === 0) return
+    const total = toolSectionCount()
+    if (total === 0) return
 
     let next = selectedToolIndex() + direction
     if (next < 0) next = 0
-    if (next >= calls.length) next = calls.length - 1
+    if (next >= total) next = total - 1
 
     setSelectedToolIndex(next)
   }
@@ -742,8 +840,8 @@ export function TaskTreePane(props: TaskTreePaneProps) {
     if (evt.name === "tab") {
       evt.preventDefault()
       if (focusSection() === "tree") {
-        const calls = recentToolCalls()
-        if (calls.length > 0) {
+        const total = toolSectionCount()
+        if (total > 0) {
           setFocusSection("toolcalls")
           if (selectedToolIndex() < 0) setSelectedToolIndex(0)
         }
@@ -796,8 +894,8 @@ export function TaskTreePane(props: TaskTreePaneProps) {
         const items = flattened().items
         if (items.length > 0) setSelectedIndex(items.length - 1)
       } else {
-        const calls = recentToolCalls()
-        if (calls.length > 0) setSelectedToolIndex(calls.length - 1)
+        const total = toolSectionCount()
+        if (total > 0) setSelectedToolIndex(total - 1)
       }
     }
   })
@@ -873,6 +971,7 @@ export function TaskTreePane(props: TaskTreePaneProps) {
             selectedToolIndex={selectedToolIndex()}
             paneWidth={paneWidth()}
             isJobRoot={isJobRootSelected()}
+            thinking={selectedSessionData().thinking}
             onToolCallClick={selectToolCall}
           />
         </box>
@@ -883,10 +982,22 @@ export function TaskTreePane(props: TaskTreePaneProps) {
         </box>
         <scrollbox height={sectionHeights().detailHeight}>
           <Show
-            when={selectedToolCall()}
-            fallback={<text fg={theme.textMuted}>Select a job or tool call to view details</text>}
+            when={isThinkingSelected() && selectedSessionData().thinking}
+            fallback={
+              <Show
+                when={selectedToolCall()}
+                fallback={<text fg={theme.textMuted}>Select a job or tool call to view details</text>}
+              >
+                <ToolCallDetailView call={selectedToolCall()!} tick={tick()} width={paneWidth()} />
+              </Show>
+            }
           >
-            <ToolCallDetailView call={selectedToolCall()!} tick={tick()} width={paneWidth()} />
+            <ThinkingDetailView
+              thinking={selectedSessionData().thinking!}
+              tick={tick()}
+              width={paneWidth()}
+              height={sectionHeights().detailHeight}
+            />
           </Show>
         </scrollbox>
       </box>
@@ -920,3 +1031,9 @@ interface ToolPart {
     time?: { start: number; end?: number }
   }
 }
+
+interface ReasoningPart {
+  type: "reasoning"
+  text: string
+  time?: { start: number; end?: number }
+}
diff --git a/packages/voltcode/src/session/llm-profiler.ts b/packages/voltcode/src/session/llm-profiler.ts
new file mode 100644
index 000000000..dea0e9feb
--- /dev/null
+++ b/packages/voltcode/src/session/llm-profiler.ts
@@ -0,0 +1,266 @@
+import { createHash } from "node:crypto"
+import { Identifier } from "@/id/id"
+
+export namespace LLMProfiler {
+  export type Source = "session.processor" | "session.summary" | "session.title" | "other"
+  export type Status = "started" | "completed" | "failed"
+
+  export type Usage = {
+    inputTokens: number
+    outputTokens: number
+    reasoningTokens: number
+    cacheReadTokens: number
+    cacheWriteTokens: number
+    cost: number
+  }
+
+  export type PromptMetrics = {
+    messageCount: number
+    systemMessageCount: number
+    systemChars: number
+    messageChars: number
+    serializedMessageChars: number
+    systemPromptHash: string
+    messageHistoryHash: string
+    toolCount: number
+    toolNames: string[]
+    approxPromptTokens: number
+  }
+
+  export type Call = {
+    callID: string
+    sessionID: string
+    modelID: string
+    providerID: string
+    source: Source
+    small: boolean
+    messageID?: string
+    agent?: string
+    startedAt: number
+    finishedAt?: number
+    streamCreatedAt?: number
+    streamCreatedMs?: number
+    firstChunkAt?: number
+    firstChunkMs?: number
+    totalMs?: number
+    usage?: Usage
+    status: Status
+    error?: string
+    finishReason?: string
+    context: PromptMetrics
+    statusCode?: string
+  }
+
+  type MutableCall = Omit<Call, "startedAt" | "context"> & {
+    startedAt: number
+    context: PromptMetrics
+  }
+
+  type StartInput = {
+    sessionID: string
+    modelID: string
+    providerID: string
+    source: Source
+    small: boolean
+    context: PromptMetrics
+    messageID?: string
+    agent?: string
+  }
+
+  const enabledEnv = process.env["VOLTCODE_LLM_PROFILE"]?.toLowerCase()
+  let enabled = enabledEnv === "1" || enabledEnv === "true" || enabledEnv === "yes"
+
+  const activeCalls = new Map<string, MutableCall>()
+  const completedCalls: MutableCall[] = []
+
+  const now = () => performance.now()
+
+  export function setEnabled(next: boolean) {
+    enabled = next
+  }
+
+  export function clear() {
+    activeCalls.clear()
+    completedCalls.length = 0
+  }
+
+  export function isEnabled() {
+    return enabled
+  }
+
+  function normalizeForHash(value: unknown): unknown {
+    if (value === null || typeof value !== "object") return value
+    if (typeof value === "bigint") return value.toString()
+    if (typeof value === "function") return undefined
+    if (Array.isArray(value)) return value.map((entry) => normalizeForHash(entry))
+    const source = value as Record<string, unknown>
+    const normalized: Record<string, unknown> = {}
+    for (const key of Object.keys(source).sort()) {
+      const next = normalizeForHash(source[key])
+      if (typeof next !== "undefined") normalized[key] = next
+    }
+    return normalized
+  }
+
+  function stableHashString(value: unknown): string {
+    return JSON.stringify(normalizeForHash(value))
+  }
+
+  export function start(input: StartInput): string {
+    if (!enabled) return ""
+    const callID = Identifier.ascending("part")
+    const call: MutableCall = {
+      ...input,
+      callID,
+      startedAt: now(),
+      status: "started",
+      source: input.source,
+      small: !!input.small,
+      streamCreatedMs: undefined,
+      firstChunkMs: undefined,
+      totalMs: undefined,
+      usage: undefined,
+      statusCode: undefined,
+      context: input.context,
+    }
+    activeCalls.set(callID, call)
+    return callID
+  }
+
+  export function ensure(callID: string, input: StartInput): string {
+    if (!enabled) return ""
+    if (callID) return callID
+    return start(input)
+  }
+
+  function get(callID: string) {
+    return activeCalls.get(callID)
+  }
+
+  function finalize(callID: string, status: Status) {
+    const call = activeCalls.get(callID)
+    if (!call) return
+    activeCalls.delete(callID)
+    call.status = status
+    call.finishedAt = now()
+    call.totalMs = Math.max(0, Math.round(call.finishedAt - call.startedAt))
+    completedCalls.push(call)
+  }
+
+  export function complete(callID: string, input: {
+    usage?: Usage
+    statusCode?: string
+    finishReason?: string
+    context?: Partial<PromptMetrics>
+  }) {
+    if (!enabled || !callID) return
+    const call = get(callID)
+    if (!call) return
+    if (input.context) {
+      call.context = {
+        ...call.context,
+        ...input.context,
+      }
+    }
+    if (input.usage) call.usage = input.usage
+    if (input.statusCode) call.statusCode = input.statusCode
+    if (input.finishReason) call.finishReason = input.finishReason
+    finalize(callID, "completed")
+  }
+
+  export function fail(callID: string, error: unknown) {
+    if (!enabled || !callID) return
+    const call = get(callID)
+    if (!call) return
+    if (error !== undefined) {
+      call.error = typeof error === "string" ? error : error instanceof Error ? error.message : String(error)
+    }
+    finalize(callID, "failed")
+  }
+
+  export function streamCreated(callID: string, ms: number) {
+    if (!enabled || !callID) return
+    const call = get(callID)
+    if (!call) return
+    call.streamCreatedAt = now()
+    call.streamCreatedMs = ms
+  }
+
+  export function firstChunk(callID: string, ms: number) {
+    if (!enabled || !callID) return
+    const call = get(callID)
+    if (!call) return
+    call.firstChunkAt = now()
+    call.firstChunkMs = ms
+  }
+
+  export function snapshot() {
+    return {
+      enabled,
+      calls: [...Array.from(activeCalls.values()), ...completedCalls].sort((a, b) => a.startedAt - b.startedAt),
+      activeCount: activeCalls.size,
+      completedCount: completedCalls.length,
+    }
+  }
+
+  export function buildPromptMetrics(input: {
+    system: string[]
+    messages: Array<{ content: string | unknown[] }>
+    tools: Record<string, unknown>
+    small: boolean
+  }): PromptMetrics {
+    const systemText = input.system.join("\n")
+    const systemPromptHash = createHash("sha256").update(systemText).digest("hex")
+    const messageHistoryHash = createHash("sha256")
+      .update(stableHashString({ system: input.system, messages: input.messages }))
+      .digest("hex")
+    const messageText = input.messages
+      .map((message) => {
+        const content = Array.isArray(message.content)
+          ? message.content
+              .map((part: any) => {
+                if (part == null) return ""
+                if (typeof part.text === "string") return part.text
+                if (typeof part.content === "string") return part.content
+                return JSON.stringify(part)
+              })
+              .join("")
+          : typeof message.content === "string"
+            ? message.content
+            : JSON.stringify(message.content ?? "")
+        return content
+      })
+      .join("\n")
+
+    const serializedMessageChars = JSON.stringify(input.messages).length
+    const messageChars = input.messages.reduce((sum, message) => {
+      const normalized = typeof message.content === "string"
+        ? message.content
+        : Array.isArray(message.content)
+          ? message.content
+              .map((part: any) => {
+                if (part == null) return ""
+                if (typeof part.text === "string") return part.text
+                if (typeof part.content === "string") return part.content
+                return JSON.stringify(part)
+              })
+              .join("")
+          : JSON.stringify(message.content ?? "")
+      return sum + normalized.length
+    }, 0)
+
+    const contextChars = messageText.length + systemText.length
+    return {
+      systemMessageCount: input.system.length,
+      systemChars: systemText.length,
+      messageCount: input.messages.length,
+      messageChars,
+      serializedMessageChars,
+      systemPromptHash,
+      messageHistoryHash,
+      toolCount: Object.keys(input.tools).length,
+      toolNames: Object.keys(input.tools),
+      approxPromptTokens: Math.max(1, Math.round(contextChars / 4)),
+    }
+  }
+}
diff --git a/packages/voltcode/src/session/llm.ts b/packages/voltcode/src/session/llm.ts
index 6449628dd..5eb066522 100644
--- a/packages/voltcode/src/session/llm.ts
+++ b/packages/voltcode/src/session/llm.ts
@@ -23,6 +23,7 @@ import { SystemPrompt } from "./system"
 import { Flag } from "@/flag/flag"
 import { PermissionNext } from "@/permission/next"
 import { Auth } from "@/auth"
+import { LLMProfiler } from "./llm-profiler"
 
 export namespace LLM {
   const log = Log.create({ service: "llm" })
@@ -38,6 +39,7 @@ export namespace LLM {
     abort: AbortSignal
     messages: ModelMessage[]
     small?: boolean
+    traceID?: string
     tools: Record<string, Tool>
     retries?: number
     /** Pre-assembled system prompt (already transformed by plugin). Skips inline assembly when provided. */
@@ -171,6 +173,41 @@ export namespace LLM {
       })
     }
 
+    const messages = [
+      ...(isCodex
+        ? [
+            {
+              role: "user",
+              content: system.join("\n\n"),
+            } as ModelMessage,
+          ]
+        : system.map(
+            (x): ModelMessage => ({
+              role: "system",
+              content: x,
+            }),
+          )),
+      ...input.messages,
+    ]
+
+    if (!input.traceID) {
+      input.traceID = LLMProfiler.start({
+        sessionID: input.sessionID,
+        source: "other",
+        modelID: input.model.id,
+        providerID: input.model.providerID,
+        small: input.small ?? false,
+        messageID: input.user.id,
+        agent: input.agent.name,
+        context: LLMProfiler.buildPromptMetrics({
+          system,
+          messages,
+          tools: input.tools,
+          small: input.small ?? false,
+        }),
+      })
+    }
+
     return streamText({
       onError(error) {
         l.error("stream error", {
@@ -229,22 +266,7 @@ export namespace LLM {
         ...input.model.headers,
       },
       maxRetries: input.retries ?? 0,
-      messages: [
-        ...(isCodex
-          ? [
-              {
-                role: "user",
-                content: system.join("\n\n"),
-              } as ModelMessage,
-            ]
-          : system.map(
-              (x): ModelMessage => ({
-                role: "system",
-                content: x,
-              }),
-            )),
-        ...input.messages,
-      ],
+      messages,
       model: wrapLanguageModel({
         model: language as any,
         middleware: [
diff --git a/packages/voltcode/src/session/processor.ts b/packages/voltcode/src/session/processor.ts
index ca1e2ba61..038743767 100644
--- a/packages/voltcode/src/session/processor.ts
+++ b/packages/voltcode/src/session/processor.ts
@@ -20,6 +20,7 @@ import { Flag } from "@/flag/flag"
 import { handleLargeToolOutput, LARGE_TOOL_OUTPUT_THRESHOLD } from "./large-tool-output"
 import type { LcmToolMetadata } from "./lcm/types"
 import { Token } from "@/util/token"
+import { LLMProfiler } from "./llm-profiler"
 
 export namespace SessionProcessor {
   const DOOM_LOOP_THRESHOLD = 3
@@ -336,11 +337,32 @@ export namespace SessionProcessor {
             let currentText: MessageV2.TextPart | undefined
             let reasoningMap: Record<string, MessageV2.ReasoningPart> = {}
             const llmStreamStart = performance.now()
-            stream = await LLM.stream(streamInput)
+            const callID = LLMProfiler.start({
+              sessionID: input.sessionID,
+              source: "session.processor",
+              modelID: input.model.id,
+              providerID: input.model.providerID,
+              small: streamInput.small ?? false,
+              messageID: input.assistantMessage.id,
+              agent: input.assistantMessage.agent,
+              context: LLMProfiler.buildPromptMetrics({
+                system: streamInput.system,
+                messages: streamInput.messages,
+                tools: streamInput.tools,
+                small: streamInput.small ?? false,
+              }),
+            })
+            streamInput.traceID = callID
+            const streamInputWithTrace = {
+              ...streamInput,
+              traceID: callID,
+            }
+            stream = await LLM.stream(streamInputWithTrace)
             log.trace("process.timing.llmStreamCreated", {
               sessionID: input.sessionID,
               ms: Math.round(performance.now() - llmStreamStart),
             })
+            LLMProfiler.streamCreated(callID, Math.round(performance.now() - llmStreamStart))
 
             // Use dedicated chunk timeout from Flag, separate from fetch timeout
             // Set to 0 to disable chunk timeout entirely
@@ -364,6 +386,7 @@ export namespace SessionProcessor {
                   sessionID: input.sessionID,
                   ms: Math.round(performance.now() - llmStreamStart),
                 })
+                LLMProfiler.firstChunk(callID, Math.round(performance.now() - llmStreamStart))
                 firstChunk = false
               }
               input.abort.throwIfAborted()
@@ -778,6 +801,17 @@ export namespace SessionProcessor {
                 input.assistantMessage.cost += usage.cost
                 input.assistantMessage.tokens = usage.tokens
                 await Session.updateMessage(input.assistantMessage)
+                LLMProfiler.complete(callID, {
+                  usage: {
+                    inputTokens: usage.tokens.input,
+                    outputTokens: usage.tokens.output,
+                    reasoningTokens: usage.tokens.reasoning,
+                    cacheReadTokens: usage.tokens.cache.read,
+                    cacheWriteTokens: usage.tokens.cache.write,
+                    cost: usage.cost,
+                  },
+                  finishReason: input.assistantMessage.finish,
+                })
                 log.info("usage.summary", {
                   sessionID: input.sessionID,
                   messageID: input.assistantMessage.id,
@@ -793,9 +827,14 @@ export namespace SessionProcessor {
                   providerID: input.model.providerID,
                   modelID: input.model.id,
                 })
+                LLMProfiler.fail(callID, "usage missing")
               }
             }
           } catch (e: any) {
+            const callID = streamInput.traceID
+            if (callID) {
+              LLMProfiler.fail(callID, e)
+            }
             log.error("process", {
               error: e,
               stack: JSON.stringify(e.stack),
diff --git a/packages/voltcode/src/session/prompt.ts b/packages/voltcode/src/session/prompt.ts
index 9e158a2a8..305deafbe 100644
--- a/packages/voltcode/src/session/prompt.ts
+++ b/packages/voltcode/src/session/prompt.ts
@@ -1980,6 +1980,7 @@ export namespace SessionPrompt {
             })
           },
         }
+        const subtaskStart = performance.now()
         const result = await taskTool.execute(taskArgs, taskCtx).catch((error) => {
           executionError = error
           log.error("subtask execution failed", { error, agent: task.agent, description: task.description })
@@ -1994,6 +1995,14 @@ export namespace SessionPrompt {
           },
           result,
         )
+        log.trace("loop.timing.subtaskExecution", {
+          sessionID,
+          messageID: assistantMessage.id,
+          task: task.agent,
+          ms: Math.round(performance.now() - subtaskStart),
+          success: !executionError,
+          childSessionID: result?.metadata?.sessionId,
+        })
         assistantMessage.finish = "tool-calls"
         assistantMessage.time.completed = Date.now()
         await Session.updateMessage(assistantMessage)