From b54618c7cef1c49f7028a2057e01396039da2e47 Mon Sep 17 00:00:00 2001 From: Theodore Blackman Date: Fri, 6 Mar 2026 21:00:13 -0800 Subject: [PATCH 1/2] feat: show model thinking/reasoning in task tree viewer When a sub-task's model is emitting chain-of-thought reasoning, the task tree viewer now shows a "thinking" entry at the end of the tool call list. Selecting it displays the reasoning text in the detail pane, streaming in live and showing the most recent tokens when content exceeds available space. Co-Authored-By: Claude Opus 4.6 --- .../cli/cmd/tui/component/task-tree-pane.tsx | 147 ++++++++++++++++-- 1 file changed, 132 insertions(+), 15 deletions(-) diff --git a/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx b/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx index 93d60b99e..68f6c9b76 100644 --- a/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx +++ b/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx @@ -36,6 +36,12 @@ interface ToolCallInfo { metadata?: Record } +interface ThinkingInfo { + text: string + isStreaming: boolean + startTime?: number +} + // Flatten jobs into a list for display function flattenJobs(jobs: JobTree[]): FlattenResult { if (jobs.length === 0) return { items: [], maxDepth: 0 } @@ -429,12 +435,13 @@ function ToolCallDetailView(props: { call: ToolCallInfo; tick: number; width: nu function SelectedTaskDetails(props: { item: FlattenedItem | null - sessionData: { prompt: string; toolCalls: ToolCallInfo[] } + sessionData: { prompt: string; toolCalls: ToolCallInfo[]; thinking: ThinkingInfo | null } recentToolCalls: ToolCallInfo[] focusSection: "tree" | "toolcalls" selectedToolIndex: number paneWidth: number isJobRoot: boolean + thinking: ThinkingInfo | null onToolCallClick?: (index: number) => void }) { const { theme } = useTheme() @@ -530,11 +537,80 @@ function SelectedTaskDetails(props: { ) }} + + {(thinking) => { + const thinkingIndex = () => props.recentToolCalls.length + const isSelected = () => props.focusSection === "toolcalls" && thinkingIndex() === props.selectedToolIndex + const bgColor = () => (isSelected() ? RGBA.fromInts(40, 40, 40, 255) : RGBA.fromInts(0, 0, 0, 0)) + const label = () => { + const preview = thinking().text.replace(/\n/g, " ").slice(0, Math.max(20, props.paneWidth - 30)) + return preview ? `${preview}${thinking().text.length > props.paneWidth - 30 ? "..." : ""}` : "" + } + return ( + props.onToolCallClick?.(thinkingIndex())} + > + + {thinking().isStreaming ? "◐" : "●"} + + + {"thinking".padEnd(12)} + + {label()} + + ) + }} + ) } +// Thinking/reasoning detail view — shows tail of CoT, streams in live +function ThinkingDetailView(props: { thinking: ThinkingInfo; tick: number; width: number; height: number }) { + const { theme } = useTheme() + + const lines = createMemo(() => { + // Force reactivity on tick so we re-render while streaming + void props.tick + const text = props.thinking.text + if (!text) return props.thinking.isStreaming ? ["(thinking...)"] : ["(no reasoning content)"] + + const lineWidth = Math.max(40, props.width - 2) + // Wrap all text into lines + const allLines = wrapText(text, lineWidth, 10000) + // Show only what fits in the available height, biased toward the end + const maxLines = Math.max(1, props.height - 3) + if (allLines.length <= maxLines) return allLines + return allLines.slice(-maxLines) + }) + + const duration = () => { + if (!props.thinking.startTime) return "" + return formatLiveDuration(props.thinking.startTime, props.thinking.isStreaming ? undefined : Date.now(), props.tick) + } + + return ( + + + + Thinking + {props.thinking.isStreaming ? " (streaming)" : ""} + + + {duration()} + + + + {(line) => {line}} + + + ) +} + export function TaskTreePane(props: TaskTreePaneProps) { const { data } = useTaskTree() const { theme } = useTheme() @@ -593,7 +669,7 @@ export function TaskTreePane(props: TaskTreePaneProps) { // For job-root, show only the root tool call if (!item || item.type === "job-root") { const job = item?.job - if (!job) return { prompt: "", toolCalls: [] as ToolCallInfo[] } + if (!job) return { prompt: "", toolCalls: [] as ToolCallInfo[], thinking: null as ThinkingInfo | null } // For job root, return just the job's tool call info const toolCall: ToolCallInfo = { @@ -606,7 +682,7 @@ export function TaskTreePane(props: TaskTreePaneProps) { output: job.toolOutput, metadata: {}, } - return { prompt: job.toolTitle, toolCalls: [toolCall] } + return { prompt: job.toolTitle, toolCalls: [toolCall], thinking: null as ThinkingInfo | null } } // For job-child, show the child session's tool calls @@ -629,8 +705,9 @@ export function TaskTreePane(props: TaskTreePaneProps) { } } - // Collect all tool calls from all assistant messages + // Collect all tool calls and find reasoning parts from all assistant messages const toolCalls: ToolCallInfo[] = [] + let thinking: ThinkingInfo | null = null for (const msg of messages) { if (msg.role === "assistant") { const parts = sync.data.part[msg.id] ?? [] @@ -649,11 +726,19 @@ export function TaskTreePane(props: TaskTreePaneProps) { metadata: toolPart.state.metadata, }) } + if (part.type === "reasoning") { + const rp = part as ReasoningPart + thinking = { + text: rp.text, + isStreaming: !rp.time?.end, + startTime: rp.time?.start, + } + } } } } - return { prompt, toolCalls } + return { prompt, toolCalls, thinking } }) // Get last 10 tool calls, newest at bottom @@ -707,14 +792,27 @@ export function TaskTreePane(props: TaskTreePaneProps) { } } + // Total selectable items in the tool call section (tool calls + optional thinking) + const toolSectionCount = createMemo(() => { + const count = recentToolCalls().length + const thinking = selectedSessionData().thinking + return thinking ? count + 1 : count + }) + + // Whether the selected tool index points to the thinking entry + const isThinkingSelected = createMemo(() => { + const thinking = selectedSessionData().thinking + return thinking && selectedToolIndex() === recentToolCalls().length + }) + // Tool call list navigation function moveToolCall(direction: number) { - const calls = recentToolCalls() - if (calls.length === 0) return + const total = toolSectionCount() + if (total === 0) return let next = selectedToolIndex() + direction if (next < 0) next = 0 - if (next >= calls.length) next = calls.length - 1 + if (next >= total) next = total - 1 setSelectedToolIndex(next) } @@ -742,8 +840,8 @@ export function TaskTreePane(props: TaskTreePaneProps) { if (evt.name === "tab") { evt.preventDefault() if (focusSection() === "tree") { - const calls = recentToolCalls() - if (calls.length > 0) { + const total = toolSectionCount() + if (total > 0) { setFocusSection("toolcalls") if (selectedToolIndex() < 0) setSelectedToolIndex(0) } @@ -796,8 +894,8 @@ export function TaskTreePane(props: TaskTreePaneProps) { const items = flattened().items if (items.length > 0) setSelectedIndex(items.length - 1) } else { - const calls = recentToolCalls() - if (calls.length > 0) setSelectedToolIndex(calls.length - 1) + const total = toolSectionCount() + if (total > 0) setSelectedToolIndex(total - 1) } } }) @@ -873,6 +971,7 @@ export function TaskTreePane(props: TaskTreePaneProps) { selectedToolIndex={selectedToolIndex()} paneWidth={paneWidth()} isJobRoot={isJobRootSelected()} + thinking={selectedSessionData().thinking} onToolCallClick={selectToolCall} /> @@ -883,10 +982,22 @@ export function TaskTreePane(props: TaskTreePaneProps) { Select a job or tool call to view details} + when={isThinkingSelected() && selectedSessionData().thinking} + fallback={ + Select a job or tool call to view details} + > + + + } > - + @@ -920,3 +1031,9 @@ interface ToolPart { time?: { start: number; end?: number } } } + +interface ReasoningPart { + type: "reasoning" + text: string + time?: { start: number; end?: number } +} From d56a6959fb066f4b5ee88a856eb080d24530e16f Mon Sep 17 00:00:00 2001 From: Theodore Blackman Date: Wed, 11 Mar 2026 11:00:16 -0700 Subject: [PATCH 2/2] feat: add LLM call profiling infrastructure Adds LLMProfiler for tracing LLM calls end-to-end (stream creation, first chunk, completion, failure) with prompt metrics. Includes profiling scripts and subtask execution timing. Co-Authored-By: Claude Opus 4.6 --- packages/voltcode/package.json | 1 + .../voltcode/scripts/profile-llm-calls.ts | 521 ++++++++++++++++++ .../scripts/subtask-response-latency-exp.ts | 106 ++++ packages/voltcode/src/session/llm-profiler.ts | 266 +++++++++ packages/voltcode/src/session/llm.ts | 54 +- packages/voltcode/src/session/processor.ts | 41 +- packages/voltcode/src/session/prompt.ts | 9 + 7 files changed, 981 insertions(+), 17 deletions(-) create mode 100644 packages/voltcode/scripts/profile-llm-calls.ts create mode 100644 packages/voltcode/scripts/subtask-response-latency-exp.ts create mode 100644 packages/voltcode/src/session/llm-profiler.ts diff --git a/packages/voltcode/package.json b/packages/voltcode/package.json index 648eca762..fa1341191 100644 --- a/packages/voltcode/package.json +++ b/packages/voltcode/package.json @@ -14,6 +14,7 @@ "control-plane": "bun run src/control-plane/main.ts", "scheduler": "bun run src/control-plane/scheduler.ts", "check-status": "bun run script/check_status.ts", + "profile-llm-calls": "bun run scripts/profile-llm-calls.ts --cases short --depth 1 --breadth 1", "random": "echo 'Random script updated at $(date)' && echo 'Change queued successfully' && echo 'Another change made' && echo 'Yet another change' && echo 'One more change' && echo 'Final change' && echo 'Another final change' && echo 'Yet another final change'", "clean": "echo 'Cleaning up...' && rm -rf node_modules dist", "lint": "echo 'Running lint checks...' && bun test --coverage", diff --git a/packages/voltcode/scripts/profile-llm-calls.ts b/packages/voltcode/scripts/profile-llm-calls.ts new file mode 100644 index 000000000..1878c5915 --- /dev/null +++ b/packages/voltcode/scripts/profile-llm-calls.ts @@ -0,0 +1,521 @@ +import { Session } from "../src/session" +import { SessionPrompt } from "../src/session/prompt" +import { Provider } from "../src/provider/provider" +import { bootstrap } from "../src/cli/bootstrap" +import { LLMProfiler } from "../src/session/llm-profiler" +import { Identifier } from "../src/id/id" +import { promises as fs } from "fs" +import { Log } from "../src/util/log" + +const MODEL = { + providerID: "anthropic", + modelID: "claude-opus-4-6", +} + +type TestCase = { + name: string + depth: number + breadth: number +} + +type RunResult = { + name: string + totalDurationMs: number + sessionID: string + sessions: string[] + calls: ReturnType["calls"] +} + +const CASES: TestCase[] = [ + { name: "baseline-no-subtask", depth: 0, breadth: 0 }, + { name: "single-subtask", depth: 1, breadth: 1 }, + { name: "tree-depth-2-breadth-2", depth: 2, breadth: 2 }, +] + +function toArg(name: string, fallback: string): string { + const index = process.argv.findIndex((item) => item === `--${name}`) + if (index === -1) return fallback + return process.argv[index + 1] ?? fallback +} + +async function readModel() { + return Provider.getModel(MODEL.providerID, MODEL.modelID) +} + +function buildSubtaskPrompt(parts: { sessionID: string; depth: number; index: number }) { + return [ + { + type: "subtask", + agent: "general", + description: `tree-${parts.depth}-node-${parts.index} for ${parts.sessionID}`, + prompt: "Perform this leaf task with one short sentence and stop.", + model: MODEL, + }, + ] as const +} + +function buildLeafPrompt() { + return [{ type: "text", text: "Reply with a short acknowledgement and stop." }] +} + +async function collectChildren( + beforeMessageIDs: Set, + sessionID: string, +): Promise { + const messages = await Session.messages({ sessionID }) + const newMessages = messages.filter((message) => !beforeMessageIDs.has(message.info.id)) + const childSessionIDs = new Set() + + for (const message of newMessages) { + if (message.info.role !== "assistant") continue + for (const part of message.parts) { + if (part.type !== "tool" || part.tool !== "task") continue + const state = part.state as { metadata?: { sessionId?: string }; output?: string } + const metaSession = state.metadata?.sessionId + const outputSession = typeof part.state === "object" && typeof state.output === "string" ? state.output : undefined + const fromMetadata = metaSession && childSessionIDs.has(metaSession) ? undefined : metaSession + const fromOutput = outputSession ? outputSession.match(/session_id:\s*([A-Za-z0-9_-]+)/i)?.[1] : undefined + const child = fromMetadata ?? fromOutput + if (child) childSessionIDs.add(child) + } + } + + return [...childSessionIDs] +} + +async function executeSubtaskTree(input: { + sessionID: string + depthRemaining: number + path: string + breadth: number + model: Awaited> + sessions: Set +}) { + input.sessions.add(input.sessionID) + + const parts = + input.depthRemaining > 0 + ? Array.from({ length: input.breadth }, (_, index) => + buildSubtaskPrompt({ + sessionID: input.sessionID, + depth: input.depthRemaining, + index: index + 1, + }), + ).flat() + : buildLeafPrompt() + + const before = await Session.messages({ sessionID: input.sessionID }) + const beforeIDs = new Set(before.map((msg) => msg.info.id)) + + await SessionPrompt.prompt({ + sessionID: input.sessionID, + model: { + providerID: input.model.providerID, + modelID: input.model.id, + }, + agent: "general", + parts, + }) + + if (input.depthRemaining === 0) return + + const childSessions = await collectChildren(beforeIDs, input.sessionID) + for (let i = 0; i < childSessions.length; i++) { + await executeSubtaskTree({ + sessionID: childSessions[i], + depthRemaining: input.depthRemaining - 1, + path: `${input.path}.${i + 1}`, + breadth: input.breadth, + model: input.model, + sessions: input.sessions, + }) + } +} + +function percentile(values: number[], ratio: number): number { + if (values.length === 0) return 0 + const sorted = [...values].sort((a, b) => a - b) + const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(ratio * sorted.length) - 1)) + return sorted[index] ?? 0 +} + +function hashPrefix(hash: string): string { + return hash.slice(0, 10) +} + +function hashReuseStats(values: string[]) { + const seen = new Set() + let reused = 0 + for (const value of values) { + if (seen.has(value)) { + reused += 1 + } else { + seen.add(value) + } + } + return { uniqueCount: seen.size, reusedCount: reused } +} + +async function summarizeCase(result: RunResult) { + const allCalls = result.calls + const completed = allCalls.filter((call) => call.status === "completed") + const failed = allCalls.filter((call) => call.status === "failed") + const active = allCalls.filter((call) => call.status === "started") + + const sessionDepth = new Map() + const sessionParent = new Map() + for (const id of result.sessions) { + if (sessionDepth.has(id)) continue + const root = await Session.get(id).catch(() => undefined) + if (!root) continue + sessionParent.set(id, root.parentID) + let depth = 0 + let cursor: string | undefined = root.parentID + const visited = new Set() + while (cursor) { + if (visited.has(cursor)) break + visited.add(cursor) + if (sessionDepth.has(cursor)) { + depth += sessionDepth.get(cursor)! + 1 + break + } + const parent = await Session.get(cursor).catch(() => undefined) + if (!parent?.parentID) break + cursor = parent.parentID + depth += 1 + } + sessionDepth.set(id, Math.max(0, depth)) + } + + const callsWithDepth = allCalls.map((call) => ({ + ...call, + depth: sessionDepth.get(call.sessionID) ?? 0, + })) + + const totalLlmMs = completed.reduce((sum, call) => sum + (call.totalMs ?? 0), 0) + const totalFirstChunk = completed.reduce((sum, call) => sum + (call.firstChunkMs ?? 0), 0) + const totalStreamCreated = completed.reduce((sum, call) => sum + (call.streamCreatedMs ?? 0), 0) + + const completedByDepth = new Map() + for (const call of completed) { + const depth = sessionDepth.get(call.sessionID) ?? 0 + const bucket = completedByDepth.get(depth) ?? [] + bucket.push(call.totalMs ?? 0) + completedByDepth.set(depth, bucket) + } + + const tokenTotals = completed.reduce( + (acc, call) => { + acc.input += call.usage?.inputTokens ?? 0 + acc.output += call.usage?.outputTokens ?? 0 + acc.reasoning += call.usage?.reasoningTokens ?? 0 + acc.cacheRead += call.usage?.cacheReadTokens ?? 0 + acc.cacheWrite += call.usage?.cacheWriteTokens ?? 0 + return acc + }, + { input: 0, output: 0, reasoning: 0, cacheRead: 0, cacheWrite: 0 }, + ) + + const processorCalls = completed.filter((call) => call.source === "session.processor") + const orderedCompleted = [...completed].sort((a, b) => a.startedAt - b.startedAt) + const firstCallBySession = new Map() + for (const call of orderedCompleted) { + if (!firstCallBySession.has(call.sessionID)) firstCallBySession.set(call.sessionID, call) + } + + const processorHashes = processorCalls.map((call) => call.context.systemPromptHash) + const processorHistoryHashes = processorCalls.map((call) => call.context.messageHistoryHash) + const processorHashStats = hashReuseStats(processorHashes) + const historyHashStats = hashReuseStats(processorHistoryHashes) + + let subtaskHashMatch = 0 + let subtaskHashDiff = 0 + let subtaskHashUnknown = 0 + let subtaskHistoryMatch = 0 + let subtaskHistoryDiff = 0 + let subtaskHistoryUnknown = 0 + let subtaskSystemMatchHistoryDiff = 0 + for (const [sessionID, firstCall] of firstCallBySession) { + const parentID = sessionParent.get(sessionID) + if (!parentID) continue + const parentFirstCall = firstCallBySession.get(parentID) + if (!parentFirstCall) { + subtaskHashUnknown += 1 + subtaskHistoryUnknown += 1 + continue + } + const systemMatch = parentFirstCall.context.systemPromptHash === firstCall.context.systemPromptHash + const historyMatch = parentFirstCall.context.messageHistoryHash === firstCall.context.messageHistoryHash + + if (systemMatch) { + subtaskHashMatch += 1 + if (!historyMatch) { + subtaskSystemMatchHistoryDiff += 1 + } + } else { + subtaskHashDiff += 1 + } + + if (historyMatch) { + subtaskHistoryMatch += 1 + } else { + subtaskHistoryDiff += 1 + } + } + const subtaskSessionsWithCallData = subtaskHashMatch + subtaskHashDiff + subtaskHashUnknown + + const latency = completed.map((call) => call.totalMs ?? 0) + const ttft = completed + .map((call) => call.firstChunkMs ?? 0) + .filter((value) => value > 0) + const streamCreated = completed + .map((call) => call.streamCreatedMs ?? 0) + .filter((value) => value > 0) + + console.log(`\n=== ${result.name} ===`) + console.log(`total runtime: ${Math.round(result.totalDurationMs)}ms`) + console.log(`sessions touched: ${result.sessions.length}`) + console.log(`llm calls: ${allCalls.length} total, ${completed.length} completed, ${failed.length} failed`) + if (active.length > 0) { + console.log(`active llm calls at snapshot: ${active.length}`) + } + console.log(`output tokens: ${tokenTotals.output} (input ${tokenTotals.input}, reasoning ${tokenTotals.reasoning})`) + console.log( + `cache tokens: read ${tokenTotals.cacheRead} (${Math.round((tokenTotals.cacheRead / Math.max(1, completed.length)) * 10_000) / 10_000} ` + + `avg, ${Math.round((tokenTotals.cacheRead / Math.max(1, tokenTotals.input + tokenTotals.output || 1)) * 100)}% of in+out), ` + + `write ${tokenTotals.cacheWrite}`, + ) + console.log( + `llm latency total: ${Math.round(totalLlmMs)}ms (mean ${Math.round(totalLlmMs / Math.max(1, completed.length))}ms)`, + ) + console.log(`ttft total: ${Math.round(totalFirstChunk)}ms`) + console.log(`stream-init total: ${Math.round(totalStreamCreated)}ms`) + if (result.totalDurationMs > 0) { + console.log(`llm share of wall time: ${((totalLlmMs / result.totalDurationMs) * 100).toFixed(1)}%`) + } + if (latency.length) { + console.log( + `latency p50=${percentile(latency, 0.5)}ms p95=${percentile(latency, 0.95)}ms p99=${percentile(latency, 0.99)}ms`, + ) + } + if (ttft.length) { + console.log(`ttft p50=${percentile(ttft, 0.5)}ms p95=${percentile(ttft, 0.95)}ms p99=${percentile(ttft, 0.99)}ms`) + } + if (streamCreated.length) { + console.log( + `stream-init p50=${percentile(streamCreated, 0.5)}ms p95=${percentile(streamCreated, 0.95)}ms p99=${percentile( + streamCreated, + 0.99, + )}ms`, + ) + } + if (processorCalls.length > 0) { + console.log( + `system prompt hashes: ${processorCalls.length} processor calls, ${processorHashStats.uniqueCount} unique; ` + + `reused from earlier ${processorHashStats.reusedCount} (${Math.round((processorHashStats.reusedCount / processorCalls.length) * 100)}%)`, + ) + console.log( + `full-prompt history hashes: ${processorCalls.length} processor calls, ${historyHashStats.uniqueCount} unique; ` + + `reused from earlier ${historyHashStats.reusedCount} (${Math.round((historyHashStats.reusedCount / processorCalls.length) * 100)}%)`, + ) + } + if (subtaskSessionsWithCallData > 0) { + console.log( + `subtask first-call system hash: match=${subtaskHashMatch} diff=${subtaskHashDiff} unknown=${subtaskHashUnknown} ` + + `(match ${Math.round((subtaskHashMatch / subtaskSessionsWithCallData) * 100)}%)`, + ) + console.log( + `subtask first-call full prompt hash: match=${subtaskHistoryMatch} diff=${subtaskHistoryDiff} unknown=${subtaskHistoryUnknown} ` + + `(match ${Math.round((subtaskHistoryMatch / subtaskSessionsWithCallData) * 100)}%)`, + ) + if (subtaskHashMatch > 0) { + console.log( + `system-match but different full prompt: ${subtaskSystemMatchHistoryDiff} of ${subtaskHashMatch} ` + + `(${Math.round((subtaskSystemMatchHistoryDiff / Math.max(1, subtaskHashMatch)) * 100)}%)`, + ) + } + } + + const byDepthStats = [...completedByDepth.entries()] + .sort((a, b) => a[0] - b[0]) + .map(([depth, items]) => `depth ${depth}: calls=${items.length} p50=${percentile(items, 0.5)}ms p95=${percentile(items, 0.95)}ms`) + if (byDepthStats.length) { + console.log("depth breakdown:") + for (const row of byDepthStats) console.log(` ${row}`) + } + + const bySource = new Map() + for (const call of completed) { + const bucket = bySource.get(call.source) ?? [] + bucket.push(call.totalMs ?? 0) + bySource.set(call.source, bucket) + } + const sourceStats = [...bySource.entries()] + .map(([source, items]) => `${source}: ${items.length} p50=${percentile(items, 0.5)}ms p95=${percentile(items, 0.95)}ms`) + .sort() + if (sourceStats.length) { + console.log("source breakdown:") + for (const row of sourceStats) console.log(` ${row}`) + } + + const slowest = [...completed] + .sort((a, b) => (b.totalMs ?? 0) - (a.totalMs ?? 0)) + .slice(0, 5) + .map((call) => ({ + callID: call.callID, + sessionID: call.sessionID, + depth: sessionDepth.get(call.sessionID), + ms: call.totalMs ?? 0, + outputTokens: call.usage?.outputTokens ?? 0, + promptChars: call.context.messageChars + call.context.systemChars, + serializedChars: call.context.serializedMessageChars, + model: `${call.providerID}/${call.modelID}`, + modelPromptTokens: call.context.approxPromptTokens, + systemPromptHash: call.context.systemPromptHash, + messageHistoryHash: call.context.messageHistoryHash, + cacheRead: call.usage?.cacheReadTokens ?? 0, + cacheWrite: call.usage?.cacheWriteTokens ?? 0, + ttft: call.firstChunkMs ?? 0, + streamMs: call.streamCreatedMs ?? 0, + })) + + console.log("top slowest processor calls:") + for (const row of slowest) { + console.log( + ` ${row.callID} ${row.model} session=${row.sessionID} depth=${row.depth} ${row.ms}ms output=${row.outputTokens} ` + + `prompt=${row.promptChars} serialized=${row.serializedChars} approxTokens=${row.modelPromptTokens} hash=${hashPrefix( + row.systemPromptHash, + )} history=${hashPrefix(row.messageHistoryHash)} cacheRead=${row.cacheRead} cacheWrite=${row.cacheWrite} ` + + `ttft=${row.ttft} stream=${row.streamMs}`, + ) + } + + const tokenBySize = [...processorCalls].sort( + (a, b) => (b.context.messageChars + b.context.systemChars) - (a.context.messageChars + a.context.systemChars), + ) + console.log("top context-size processor calls:") + for (const call of tokenBySize.slice(0, 3)) { + const promptChars = call.context.messageChars + call.context.systemChars + const tokens = call.usage?.outputTokens ?? 0 + console.log( + ` ${call.callID} ${call.sessionID} depth=${sessionDepth.get(call.sessionID)} ${promptChars} chars, output=${tokens}, tools=${ + call.context.toolNames.slice(0, 5).join(",") + }, systemHash=${hashPrefix(call.context.systemPromptHash)} historyHash=${hashPrefix(call.context.messageHistoryHash)} ` + + `cacheRead=${call.usage?.cacheReadTokens ?? 0} cacheWrite=${call.usage?.cacheWriteTokens ?? 0}`, + ) + } + + const sessionCallCount = new Map() + const sessionDuration = new Map() + for (const call of allCalls) { + sessionCallCount.set(call.sessionID, (sessionCallCount.get(call.sessionID) ?? 0) + 1) + sessionDuration.set(call.sessionID, sessionDuration.get(call.sessionID) ?? 0) + const base = sessionDuration.get(call.sessionID) ?? 0 + sessionDuration.set(call.sessionID, base + (call.totalMs ?? 0)) + } + const sessionRows = [...sessionDuration.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, 10) + if (sessionRows.length) { + console.log("sessions by llm time:") + for (const [sessionID, ms] of sessionRows) { + console.log( + ` ${sessionID} calls=${sessionCallCount.get(sessionID) ?? 0} total=${Math.round(ms)}ms avg=${ + Math.round((ms / (sessionCallCount.get(sessionID) ?? 1)) || 0) + }ms`, + ) + } + } +} + +async function runCase(def: TestCase) { + const model = await readModel() + const root = await Session.createNext({ + title: `profiling-${Identifier.ascending("session")}-${def.name}`, + directory: process.cwd(), + permission: [{ permission: "read", action: "allow", pattern: "*" }], + }) + const sessions = new Set() + const started = performance.now() + try { + await executeSubtaskTree({ + sessionID: root.id, + depthRemaining: def.depth, + path: def.name, + breadth: def.breadth, + model, + sessions, + }) + } finally { + // no-op cleanup is handled by caller + } + const totalDurationMs = performance.now() - started + const snapshot = LLMProfiler.snapshot() + + return { + name: def.name, + totalDurationMs, + sessionID: root.id, + sessions: [...sessions], + calls: snapshot.calls, + } +} + +async function main() { + const logLevelEnv = process.env.VOLTCODE_LOG_LEVEL?.toUpperCase() + const parsedLogLevel = ["TRACE", "DEBUG", "INFO", "WARN", "ERROR"].includes(logLevelEnv ?? "") + ? (logLevelEnv as Log.Level) + : "INFO" + await Log.init({ + print: true, + dev: true, + level: parsedLogLevel, + }) + + const runDepth = Math.max(0, Number(toArg("depth", "1"))) + const runBreadth = Math.max(1, Number(toArg("breadth", "1"))) + const runModel = toArg("model", MODEL.modelID) + if (runModel) { + MODEL.modelID = runModel + } + const casesArg = toArg("cases", "short") + const selectedCases = casesArg === "full" ? CASES : [ + { name: "baseline-no-subtask", depth: 0, breadth: 0 }, + { name: "single-subtask", depth: 1, breadth: 1 }, + { name: "short-tree", depth: runDepth, breadth: Math.min(runBreadth, 2) }, + ] as TestCase[] + const exportJson = toArg("json", "") + + const model = await Provider.getModel(MODEL.providerID, MODEL.modelID) + console.log(`Loaded model: ${model.providerID}/${model.id}`) + console.log(`Running ${selectedCases.length} case(s) with Opus 4.6`) + + const results = [] + for (const item of selectedCases) { + LLMProfiler.setEnabled(true) + LLMProfiler.clear() + const result = await runCase(item) + results.push(result) + await summarizeCase(result) + await Session.remove(result.sessionID) + } + + if (exportJson) { + await fs.writeFile( + exportJson, + JSON.stringify( + { + model: `${MODEL.providerID}/${MODEL.modelID}`, + createdAt: new Date().toISOString(), + cases: results, + }, + null, + 2, + ), + ) + } +} + +await bootstrap(process.cwd(), async () => { + await main() +}) diff --git a/packages/voltcode/scripts/subtask-response-latency-exp.ts b/packages/voltcode/scripts/subtask-response-latency-exp.ts new file mode 100644 index 000000000..6e553a2b5 --- /dev/null +++ b/packages/voltcode/scripts/subtask-response-latency-exp.ts @@ -0,0 +1,106 @@ +import { Session } from "../src/session" +import { SessionPrompt } from "../src/session/prompt" +import { Provider } from "../src/provider/provider" +import { LLMProfiler } from "../src/session/llm-profiler" +import { Identifier } from "../src/id/id" +import { Log } from "../src/util/log" +import { Instance } from "../src/project/instance" + +function childTextChars(value: string) { + return value.length +} + +async function runCase(name: string, parts: any[]) { + const model = await Provider.getModel("anthropic", "claude-opus-4-6") + const root = await Session.createNext({ + title: `subtask-exp-${name}-${Identifier.ascending("session")}`, + directory: process.cwd(), + permission: [{ permission: "read", action: "allow", pattern: "*" }], + }) + + LLMProfiler.setEnabled(true) + LLMProfiler.clear() + + const before = await Session.messages({ sessionID: root.id }) + const beforeIds = new Set(before.map((x) => x.info.id)) + + await SessionPrompt.prompt({ + sessionID: root.id, + model: { providerID: model.providerID, modelID: model.id }, + agent: "general", + parts, + }) + + const after = await Session.messages({ sessionID: root.id }) + const newMsgs = after.filter((m) => !beforeIds.has(m.info.id)) + const toolParts = newMsgs + .flatMap((m) => m.parts) + .filter((p: any) => p.type === "tool" && p.tool === "task") as any[] + + const allCalls = LLMProfiler.snapshot().calls + const completed = allCalls.filter((c) => c.status === "completed") + + console.log(`\n=== ${name} ===`) + console.log(`root=${root.id} completedCalls=${completed.length}`) + for (const c of completed) { + console.log( + `${c.callID} session=${c.sessionID} source=${c.source} model=${c.providerID}/${c.modelID} outputTok=${c.usage?.outputTokens ?? 0} input=${c.usage?.inputTokens ?? 0} cacheRead=${c.usage?.cacheReadTokens ?? 0} cacheWrite=${c.usage?.cacheWriteTokens ?? 0} totalMs=${c.totalMs ?? 0}`, + ) + } + + if (toolParts.length === 0) { + console.log("No task tool outputs") + } else { + for (let i = 0; i < toolParts.length; i++) { + const part = toolParts[i] + const output = typeof part.state.output === "string" ? part.state.output : "" + const childSession = (part.state?.metadata?.sessionId || "") as string + console.log(`tool#${i + 1} outputChars=${output.length} session=${childSession}`) + console.log(`tool output preview=${JSON.stringify(output.slice(0, 260))}`) + console.log(`tool output tail=${JSON.stringify(output.slice(-260))}`) + if (childSession) { + const childMessages = await Session.messages({ sessionID: childSession }) + const childTexts = childMessages + .flatMap((m) => m.parts) + .filter((p: any) => p.type === "text") + .map((p: any) => p.text) + .join("\n") + console.log(`childTextChars=${childTextChars(childTexts)} preview=${JSON.stringify(childTexts.slice(0, 260))}`) + } + } + } + + await Session.remove(root.id) +} + +await Instance.provide({ + directory: process.cwd(), + init: async () => { + await Log.init({ print: false, dev: true, level: "ERROR" }) + }, + fn: async () => { + await runCase("short-subtask", [ + { + type: "subtask", + agent: "general", + description: "short-task", + prompt: "Give a short 1-paragraph status update and stop.", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }, + ]) + + await runCase("long-subtask", [ + { + type: "subtask", + agent: "general", + description: "long-task", + prompt: + "You are asked to produce a very detailed design review for a hypothetical module named VoltScheduler, including at least 12 concrete risks, 8 recommended mitigations, 6 concrete follow-up test cases, and a short migration checklist. " + + "Be explicit and granular with names/examples. Include section headers. Return a full response around 700-900 words.", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }, + ]) + + await runCase("baseline", [{ type: "text", text: "Reply with a concise one-sentence summary of how many subagents you think would help." }]) + }, +}) diff --git a/packages/voltcode/src/session/llm-profiler.ts b/packages/voltcode/src/session/llm-profiler.ts new file mode 100644 index 000000000..dea0e9feb --- /dev/null +++ b/packages/voltcode/src/session/llm-profiler.ts @@ -0,0 +1,266 @@ +import { createHash } from "node:crypto" +import { Identifier } from "@/id/id" + +export namespace LLMProfiler { + export type Source = "session.processor" | "session.summary" | "session.title" | "other" + export type Status = "started" | "completed" | "failed" + + export type Usage = { + inputTokens: number + outputTokens: number + reasoningTokens: number + cacheReadTokens: number + cacheWriteTokens: number + cost: number + } + + export type PromptMetrics = { + messageCount: number + systemMessageCount: number + systemChars: number + messageChars: number + serializedMessageChars: number + systemPromptHash: string + messageHistoryHash: string + toolCount: number + toolNames: string[] + approxPromptTokens: number + } + + export type Call = { + callID: string + sessionID: string + modelID: string + providerID: string + source: Source + small: boolean + messageID?: string + agent?: string + startedAt: number + finishedAt?: number + streamCreatedAt?: number + streamCreatedMs?: number + firstChunkAt?: number + firstChunkMs?: number + totalMs?: number + usage?: Usage + status: Status + error?: string + finishReason?: string + context: PromptMetrics + statusCode?: string + } + + type MutableCall = Omit & { + startedAt: number + context: PromptMetrics + } + + type StartInput = { + sessionID: string + modelID: string + providerID: string + source: Source + small: boolean + context: PromptMetrics + messageID?: string + agent?: string + } + + const enabledEnv = process.env["VOLTCODE_LLM_PROFILE"]?.toLowerCase() + let enabled = enabledEnv === "1" || enabledEnv === "true" || enabledEnv === "yes" + + const activeCalls = new Map() + const completedCalls: MutableCall[] = [] + + const now = () => performance.now() + + export function setEnabled(next: boolean) { + enabled = next + } + + export function clear() { + activeCalls.clear() + completedCalls.length = 0 + } + + export function isEnabled() { + return enabled + } + + function normalizeForHash(value: unknown): unknown { + if (value === null || typeof value !== "object") return value + if (typeof value === "bigint") return value.toString() + if (typeof value === "function") return undefined + if (Array.isArray(value)) return value.map((entry) => normalizeForHash(entry)) + const source = value as Record + const normalized: Record = {} + for (const key of Object.keys(source).sort()) { + const next = normalizeForHash(source[key]) + if (typeof next !== "undefined") normalized[key] = next + } + return normalized + } + + function stableHashString(value: unknown): string { + return JSON.stringify(normalizeForHash(value)) + } + + export function start(input: StartInput): string { + if (!enabled) return "" + const callID = Identifier.ascending("part") + const call: MutableCall = { + ...input, + callID, + startedAt: now(), + status: "started", + source: input.source, + small: !!input.small, + streamCreatedMs: undefined, + firstChunkMs: undefined, + totalMs: undefined, + usage: undefined, + statusCode: undefined, + context: input.context, + } + activeCalls.set(callID, call) + return callID + } + + export function ensure(callID: string, input: StartInput): string { + if (!enabled) return "" + if (callID) return callID + return start(input) + } + + function get(callID: string) { + return activeCalls.get(callID) + } + + function finalize(callID: string, status: Status) { + const call = activeCalls.get(callID) + if (!call) return + activeCalls.delete(callID) + call.status = status + call.finishedAt = now() + call.totalMs = Math.max(0, Math.round(call.finishedAt - call.startedAt)) + completedCalls.push(call) + } + + export function complete(callID: string, input: { + usage?: Usage + statusCode?: string + finishReason?: string + context?: Partial + }) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + if (input.context) { + call.context = { + ...call.context, + ...input.context, + } + } + if (input.usage) call.usage = input.usage + if (input.statusCode) call.statusCode = input.statusCode + if (input.finishReason) call.finishReason = input.finishReason + finalize(callID, "completed") + } + + export function fail(callID: string, error: unknown) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + if (error !== undefined) { + call.error = typeof error === "string" ? error : error instanceof Error ? error.message : String(error) + } + finalize(callID, "failed") + } + + export function streamCreated(callID: string, ms: number) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + call.streamCreatedAt = now() + call.streamCreatedMs = ms + } + + export function firstChunk(callID: string, ms: number) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + call.firstChunkAt = now() + call.firstChunkMs = ms + } + + export function snapshot() { + return { + enabled, + calls: [...Array.from(activeCalls.values()), ...completedCalls].sort((a, b) => a.startedAt - b.startedAt), + activeCount: activeCalls.size, + completedCount: completedCalls.length, + } + } + + export function buildPromptMetrics(input: { + system: string[] + messages: Array<{ content: string | unknown[] }> + tools: Record + small: boolean + }): PromptMetrics { + const systemText = input.system.join("\n") + const systemPromptHash = createHash("sha256").update(systemText).digest("hex") + const messageHistoryHash = createHash("sha256") + .update(stableHashString({ system: input.system, messages: input.messages })) + .digest("hex") + const messageText = input.messages + .map((message) => { + const content = Array.isArray(message.content) + ? message.content + .map((part: any) => { + if (part == null) return "" + if (typeof part.text === "string") return part.text + if (typeof part.content === "string") return part.content + return JSON.stringify(part) + }) + .join("") + : typeof message.content === "string" + ? message.content + : JSON.stringify(message.content ?? "") + return content + }) + .join("\n") + + const serializedMessageChars = JSON.stringify(input.messages).length + const messageChars = input.messages.reduce((sum, message) => { + const normalized = typeof message.content === "string" + ? message.content + : Array.isArray(message.content) + ? message.content + .map((part: any) => { + if (part == null) return "" + if (typeof part.text === "string") return part.text + if (typeof part.content === "string") return part.content + return JSON.stringify(part) + }) + .join("") + : JSON.stringify(message.content ?? "") + return sum + normalized.length + }, 0) + + const contextChars = messageText.length + systemText.length + return { + systemMessageCount: input.system.length, + systemChars: systemText.length, + messageCount: input.messages.length, + messageChars, + serializedMessageChars, + systemPromptHash, + messageHistoryHash, + toolCount: Object.keys(input.tools).length, + toolNames: Object.keys(input.tools), + approxPromptTokens: Math.max(1, Math.round(contextChars / 4)), + } + } +} diff --git a/packages/voltcode/src/session/llm.ts b/packages/voltcode/src/session/llm.ts index 6449628dd..5eb066522 100644 --- a/packages/voltcode/src/session/llm.ts +++ b/packages/voltcode/src/session/llm.ts @@ -23,6 +23,7 @@ import { SystemPrompt } from "./system" import { Flag } from "@/flag/flag" import { PermissionNext } from "@/permission/next" import { Auth } from "@/auth" +import { LLMProfiler } from "./llm-profiler" export namespace LLM { const log = Log.create({ service: "llm" }) @@ -38,6 +39,7 @@ export namespace LLM { abort: AbortSignal messages: ModelMessage[] small?: boolean + traceID?: string tools: Record retries?: number /** Pre-assembled system prompt (already transformed by plugin). Skips inline assembly when provided. */ @@ -171,6 +173,41 @@ export namespace LLM { }) } + const messages = [ + ...(isCodex + ? [ + { + role: "user", + content: system.join("\n\n"), + } as ModelMessage, + ] + : system.map( + (x): ModelMessage => ({ + role: "system", + content: x, + }), + )), + ...input.messages, + ] + + if (!input.traceID) { + input.traceID = LLMProfiler.start({ + sessionID: input.sessionID, + source: "other", + modelID: input.model.id, + providerID: input.model.providerID, + small: input.small ?? false, + messageID: input.user.id, + agent: input.agent.name, + context: LLMProfiler.buildPromptMetrics({ + system, + messages, + tools: input.tools, + small: input.small ?? false, + }), + }) + } + return streamText({ onError(error) { l.error("stream error", { @@ -229,22 +266,7 @@ export namespace LLM { ...input.model.headers, }, maxRetries: input.retries ?? 0, - messages: [ - ...(isCodex - ? [ - { - role: "user", - content: system.join("\n\n"), - } as ModelMessage, - ] - : system.map( - (x): ModelMessage => ({ - role: "system", - content: x, - }), - )), - ...input.messages, - ], + messages, model: wrapLanguageModel({ model: language as any, middleware: [ diff --git a/packages/voltcode/src/session/processor.ts b/packages/voltcode/src/session/processor.ts index ca1e2ba61..038743767 100644 --- a/packages/voltcode/src/session/processor.ts +++ b/packages/voltcode/src/session/processor.ts @@ -20,6 +20,7 @@ import { Flag } from "@/flag/flag" import { handleLargeToolOutput, LARGE_TOOL_OUTPUT_THRESHOLD } from "./large-tool-output" import type { LcmToolMetadata } from "./lcm/types" import { Token } from "@/util/token" +import { LLMProfiler } from "./llm-profiler" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -336,11 +337,32 @@ export namespace SessionProcessor { let currentText: MessageV2.TextPart | undefined let reasoningMap: Record = {} const llmStreamStart = performance.now() - stream = await LLM.stream(streamInput) + const callID = LLMProfiler.start({ + sessionID: input.sessionID, + source: "session.processor", + modelID: input.model.id, + providerID: input.model.providerID, + small: streamInput.small ?? false, + messageID: input.assistantMessage.id, + agent: input.assistantMessage.agent, + context: LLMProfiler.buildPromptMetrics({ + system: streamInput.system, + messages: streamInput.messages, + tools: streamInput.tools, + small: streamInput.small ?? false, + }), + }) + streamInput.traceID = callID + const streamInputWithTrace = { + ...streamInput, + traceID: callID, + } + stream = await LLM.stream(streamInputWithTrace) log.trace("process.timing.llmStreamCreated", { sessionID: input.sessionID, ms: Math.round(performance.now() - llmStreamStart), }) + LLMProfiler.streamCreated(callID, Math.round(performance.now() - llmStreamStart)) // Use dedicated chunk timeout from Flag, separate from fetch timeout // Set to 0 to disable chunk timeout entirely @@ -364,6 +386,7 @@ export namespace SessionProcessor { sessionID: input.sessionID, ms: Math.round(performance.now() - llmStreamStart), }) + LLMProfiler.firstChunk(callID, Math.round(performance.now() - llmStreamStart)) firstChunk = false } input.abort.throwIfAborted() @@ -778,6 +801,17 @@ export namespace SessionProcessor { input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens await Session.updateMessage(input.assistantMessage) + LLMProfiler.complete(callID, { + usage: { + inputTokens: usage.tokens.input, + outputTokens: usage.tokens.output, + reasoningTokens: usage.tokens.reasoning, + cacheReadTokens: usage.tokens.cache.read, + cacheWriteTokens: usage.tokens.cache.write, + cost: usage.cost, + }, + finishReason: input.assistantMessage.finish, + }) log.info("usage.summary", { sessionID: input.sessionID, messageID: input.assistantMessage.id, @@ -793,9 +827,14 @@ export namespace SessionProcessor { providerID: input.model.providerID, modelID: input.model.id, }) + LLMProfiler.fail(callID, "usage missing") } } } catch (e: any) { + const callID = streamInput.traceID + if (callID) { + LLMProfiler.fail(callID, e) + } log.error("process", { error: e, stack: JSON.stringify(e.stack), diff --git a/packages/voltcode/src/session/prompt.ts b/packages/voltcode/src/session/prompt.ts index 9e158a2a8..305deafbe 100644 --- a/packages/voltcode/src/session/prompt.ts +++ b/packages/voltcode/src/session/prompt.ts @@ -1980,6 +1980,7 @@ export namespace SessionPrompt { }) }, } + const subtaskStart = performance.now() const result = await taskTool.execute(taskArgs, taskCtx).catch((error) => { executionError = error log.error("subtask execution failed", { error, agent: task.agent, description: task.description }) @@ -1994,6 +1995,14 @@ export namespace SessionPrompt { }, result, ) + log.trace("loop.timing.subtaskExecution", { + sessionID, + messageID: assistantMessage.id, + task: task.agent, + ms: Math.round(performance.now() - subtaskStart), + success: !executionError, + childSessionID: result?.metadata?.sessionId, + }) assistantMessage.finish = "tool-calls" assistantMessage.time.completed = Date.now() await Session.updateMessage(assistantMessage)