diff --git a/packages/voltcode/package.json b/packages/voltcode/package.json index 648eca762..fa1341191 100644 --- a/packages/voltcode/package.json +++ b/packages/voltcode/package.json @@ -14,6 +14,7 @@ "control-plane": "bun run src/control-plane/main.ts", "scheduler": "bun run src/control-plane/scheduler.ts", "check-status": "bun run script/check_status.ts", + "profile-llm-calls": "bun run scripts/profile-llm-calls.ts --cases short --depth 1 --breadth 1", "random": "echo 'Random script updated at $(date)' && echo 'Change queued successfully' && echo 'Another change made' && echo 'Yet another change' && echo 'One more change' && echo 'Final change' && echo 'Another final change' && echo 'Yet another final change'", "clean": "echo 'Cleaning up...' && rm -rf node_modules dist", "lint": "echo 'Running lint checks...' && bun test --coverage", diff --git a/packages/voltcode/scripts/profile-llm-calls.ts b/packages/voltcode/scripts/profile-llm-calls.ts new file mode 100644 index 000000000..1878c5915 --- /dev/null +++ b/packages/voltcode/scripts/profile-llm-calls.ts @@ -0,0 +1,521 @@ +import { Session } from "../src/session" +import { SessionPrompt } from "../src/session/prompt" +import { Provider } from "../src/provider/provider" +import { bootstrap } from "../src/cli/bootstrap" +import { LLMProfiler } from "../src/session/llm-profiler" +import { Identifier } from "../src/id/id" +import { promises as fs } from "fs" +import { Log } from "../src/util/log" + +const MODEL = { + providerID: "anthropic", + modelID: "claude-opus-4-6", +} + +type TestCase = { + name: string + depth: number + breadth: number +} + +type RunResult = { + name: string + totalDurationMs: number + sessionID: string + sessions: string[] + calls: ReturnType["calls"] +} + +const CASES: TestCase[] = [ + { name: "baseline-no-subtask", depth: 0, breadth: 0 }, + { name: "single-subtask", depth: 1, breadth: 1 }, + { name: "tree-depth-2-breadth-2", depth: 2, breadth: 2 }, +] + +function toArg(name: string, fallback: string): string { + const index = process.argv.findIndex((item) => item === `--${name}`) + if (index === -1) return fallback + return process.argv[index + 1] ?? fallback +} + +async function readModel() { + return Provider.getModel(MODEL.providerID, MODEL.modelID) +} + +function buildSubtaskPrompt(parts: { sessionID: string; depth: number; index: number }) { + return [ + { + type: "subtask", + agent: "general", + description: `tree-${parts.depth}-node-${parts.index} for ${parts.sessionID}`, + prompt: "Perform this leaf task with one short sentence and stop.", + model: MODEL, + }, + ] as const +} + +function buildLeafPrompt() { + return [{ type: "text", text: "Reply with a short acknowledgement and stop." }] +} + +async function collectChildren( + beforeMessageIDs: Set, + sessionID: string, +): Promise { + const messages = await Session.messages({ sessionID }) + const newMessages = messages.filter((message) => !beforeMessageIDs.has(message.info.id)) + const childSessionIDs = new Set() + + for (const message of newMessages) { + if (message.info.role !== "assistant") continue + for (const part of message.parts) { + if (part.type !== "tool" || part.tool !== "task") continue + const state = part.state as { metadata?: { sessionId?: string }; output?: string } + const metaSession = state.metadata?.sessionId + const outputSession = typeof part.state === "object" && typeof state.output === "string" ? state.output : undefined + const fromMetadata = metaSession && childSessionIDs.has(metaSession) ? undefined : metaSession + const fromOutput = outputSession ? outputSession.match(/session_id:\s*([A-Za-z0-9_-]+)/i)?.[1] : undefined + const child = fromMetadata ?? fromOutput + if (child) childSessionIDs.add(child) + } + } + + return [...childSessionIDs] +} + +async function executeSubtaskTree(input: { + sessionID: string + depthRemaining: number + path: string + breadth: number + model: Awaited> + sessions: Set +}) { + input.sessions.add(input.sessionID) + + const parts = + input.depthRemaining > 0 + ? Array.from({ length: input.breadth }, (_, index) => + buildSubtaskPrompt({ + sessionID: input.sessionID, + depth: input.depthRemaining, + index: index + 1, + }), + ).flat() + : buildLeafPrompt() + + const before = await Session.messages({ sessionID: input.sessionID }) + const beforeIDs = new Set(before.map((msg) => msg.info.id)) + + await SessionPrompt.prompt({ + sessionID: input.sessionID, + model: { + providerID: input.model.providerID, + modelID: input.model.id, + }, + agent: "general", + parts, + }) + + if (input.depthRemaining === 0) return + + const childSessions = await collectChildren(beforeIDs, input.sessionID) + for (let i = 0; i < childSessions.length; i++) { + await executeSubtaskTree({ + sessionID: childSessions[i], + depthRemaining: input.depthRemaining - 1, + path: `${input.path}.${i + 1}`, + breadth: input.breadth, + model: input.model, + sessions: input.sessions, + }) + } +} + +function percentile(values: number[], ratio: number): number { + if (values.length === 0) return 0 + const sorted = [...values].sort((a, b) => a - b) + const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(ratio * sorted.length) - 1)) + return sorted[index] ?? 0 +} + +function hashPrefix(hash: string): string { + return hash.slice(0, 10) +} + +function hashReuseStats(values: string[]) { + const seen = new Set() + let reused = 0 + for (const value of values) { + if (seen.has(value)) { + reused += 1 + } else { + seen.add(value) + } + } + return { uniqueCount: seen.size, reusedCount: reused } +} + +async function summarizeCase(result: RunResult) { + const allCalls = result.calls + const completed = allCalls.filter((call) => call.status === "completed") + const failed = allCalls.filter((call) => call.status === "failed") + const active = allCalls.filter((call) => call.status === "started") + + const sessionDepth = new Map() + const sessionParent = new Map() + for (const id of result.sessions) { + if (sessionDepth.has(id)) continue + const root = await Session.get(id).catch(() => undefined) + if (!root) continue + sessionParent.set(id, root.parentID) + let depth = 0 + let cursor: string | undefined = root.parentID + const visited = new Set() + while (cursor) { + if (visited.has(cursor)) break + visited.add(cursor) + if (sessionDepth.has(cursor)) { + depth += sessionDepth.get(cursor)! + 1 + break + } + const parent = await Session.get(cursor).catch(() => undefined) + if (!parent?.parentID) break + cursor = parent.parentID + depth += 1 + } + sessionDepth.set(id, Math.max(0, depth)) + } + + const callsWithDepth = allCalls.map((call) => ({ + ...call, + depth: sessionDepth.get(call.sessionID) ?? 0, + })) + + const totalLlmMs = completed.reduce((sum, call) => sum + (call.totalMs ?? 0), 0) + const totalFirstChunk = completed.reduce((sum, call) => sum + (call.firstChunkMs ?? 0), 0) + const totalStreamCreated = completed.reduce((sum, call) => sum + (call.streamCreatedMs ?? 0), 0) + + const completedByDepth = new Map() + for (const call of completed) { + const depth = sessionDepth.get(call.sessionID) ?? 0 + const bucket = completedByDepth.get(depth) ?? [] + bucket.push(call.totalMs ?? 0) + completedByDepth.set(depth, bucket) + } + + const tokenTotals = completed.reduce( + (acc, call) => { + acc.input += call.usage?.inputTokens ?? 0 + acc.output += call.usage?.outputTokens ?? 0 + acc.reasoning += call.usage?.reasoningTokens ?? 0 + acc.cacheRead += call.usage?.cacheReadTokens ?? 0 + acc.cacheWrite += call.usage?.cacheWriteTokens ?? 0 + return acc + }, + { input: 0, output: 0, reasoning: 0, cacheRead: 0, cacheWrite: 0 }, + ) + + const processorCalls = completed.filter((call) => call.source === "session.processor") + const orderedCompleted = [...completed].sort((a, b) => a.startedAt - b.startedAt) + const firstCallBySession = new Map() + for (const call of orderedCompleted) { + if (!firstCallBySession.has(call.sessionID)) firstCallBySession.set(call.sessionID, call) + } + + const processorHashes = processorCalls.map((call) => call.context.systemPromptHash) + const processorHistoryHashes = processorCalls.map((call) => call.context.messageHistoryHash) + const processorHashStats = hashReuseStats(processorHashes) + const historyHashStats = hashReuseStats(processorHistoryHashes) + + let subtaskHashMatch = 0 + let subtaskHashDiff = 0 + let subtaskHashUnknown = 0 + let subtaskHistoryMatch = 0 + let subtaskHistoryDiff = 0 + let subtaskHistoryUnknown = 0 + let subtaskSystemMatchHistoryDiff = 0 + for (const [sessionID, firstCall] of firstCallBySession) { + const parentID = sessionParent.get(sessionID) + if (!parentID) continue + const parentFirstCall = firstCallBySession.get(parentID) + if (!parentFirstCall) { + subtaskHashUnknown += 1 + subtaskHistoryUnknown += 1 + continue + } + const systemMatch = parentFirstCall.context.systemPromptHash === firstCall.context.systemPromptHash + const historyMatch = parentFirstCall.context.messageHistoryHash === firstCall.context.messageHistoryHash + + if (systemMatch) { + subtaskHashMatch += 1 + if (!historyMatch) { + subtaskSystemMatchHistoryDiff += 1 + } + } else { + subtaskHashDiff += 1 + } + + if (historyMatch) { + subtaskHistoryMatch += 1 + } else { + subtaskHistoryDiff += 1 + } + } + const subtaskSessionsWithCallData = subtaskHashMatch + subtaskHashDiff + subtaskHashUnknown + + const latency = completed.map((call) => call.totalMs ?? 0) + const ttft = completed + .map((call) => call.firstChunkMs ?? 0) + .filter((value) => value > 0) + const streamCreated = completed + .map((call) => call.streamCreatedMs ?? 0) + .filter((value) => value > 0) + + console.log(`\n=== ${result.name} ===`) + console.log(`total runtime: ${Math.round(result.totalDurationMs)}ms`) + console.log(`sessions touched: ${result.sessions.length}`) + console.log(`llm calls: ${allCalls.length} total, ${completed.length} completed, ${failed.length} failed`) + if (active.length > 0) { + console.log(`active llm calls at snapshot: ${active.length}`) + } + console.log(`output tokens: ${tokenTotals.output} (input ${tokenTotals.input}, reasoning ${tokenTotals.reasoning})`) + console.log( + `cache tokens: read ${tokenTotals.cacheRead} (${Math.round((tokenTotals.cacheRead / Math.max(1, completed.length)) * 10_000) / 10_000} ` + + `avg, ${Math.round((tokenTotals.cacheRead / Math.max(1, tokenTotals.input + tokenTotals.output || 1)) * 100)}% of in+out), ` + + `write ${tokenTotals.cacheWrite}`, + ) + console.log( + `llm latency total: ${Math.round(totalLlmMs)}ms (mean ${Math.round(totalLlmMs / Math.max(1, completed.length))}ms)`, + ) + console.log(`ttft total: ${Math.round(totalFirstChunk)}ms`) + console.log(`stream-init total: ${Math.round(totalStreamCreated)}ms`) + if (result.totalDurationMs > 0) { + console.log(`llm share of wall time: ${((totalLlmMs / result.totalDurationMs) * 100).toFixed(1)}%`) + } + if (latency.length) { + console.log( + `latency p50=${percentile(latency, 0.5)}ms p95=${percentile(latency, 0.95)}ms p99=${percentile(latency, 0.99)}ms`, + ) + } + if (ttft.length) { + console.log(`ttft p50=${percentile(ttft, 0.5)}ms p95=${percentile(ttft, 0.95)}ms p99=${percentile(ttft, 0.99)}ms`) + } + if (streamCreated.length) { + console.log( + `stream-init p50=${percentile(streamCreated, 0.5)}ms p95=${percentile(streamCreated, 0.95)}ms p99=${percentile( + streamCreated, + 0.99, + )}ms`, + ) + } + if (processorCalls.length > 0) { + console.log( + `system prompt hashes: ${processorCalls.length} processor calls, ${processorHashStats.uniqueCount} unique; ` + + `reused from earlier ${processorHashStats.reusedCount} (${Math.round((processorHashStats.reusedCount / processorCalls.length) * 100)}%)`, + ) + console.log( + `full-prompt history hashes: ${processorCalls.length} processor calls, ${historyHashStats.uniqueCount} unique; ` + + `reused from earlier ${historyHashStats.reusedCount} (${Math.round((historyHashStats.reusedCount / processorCalls.length) * 100)}%)`, + ) + } + if (subtaskSessionsWithCallData > 0) { + console.log( + `subtask first-call system hash: match=${subtaskHashMatch} diff=${subtaskHashDiff} unknown=${subtaskHashUnknown} ` + + `(match ${Math.round((subtaskHashMatch / subtaskSessionsWithCallData) * 100)}%)`, + ) + console.log( + `subtask first-call full prompt hash: match=${subtaskHistoryMatch} diff=${subtaskHistoryDiff} unknown=${subtaskHistoryUnknown} ` + + `(match ${Math.round((subtaskHistoryMatch / subtaskSessionsWithCallData) * 100)}%)`, + ) + if (subtaskHashMatch > 0) { + console.log( + `system-match but different full prompt: ${subtaskSystemMatchHistoryDiff} of ${subtaskHashMatch} ` + + `(${Math.round((subtaskSystemMatchHistoryDiff / Math.max(1, subtaskHashMatch)) * 100)}%)`, + ) + } + } + + const byDepthStats = [...completedByDepth.entries()] + .sort((a, b) => a[0] - b[0]) + .map(([depth, items]) => `depth ${depth}: calls=${items.length} p50=${percentile(items, 0.5)}ms p95=${percentile(items, 0.95)}ms`) + if (byDepthStats.length) { + console.log("depth breakdown:") + for (const row of byDepthStats) console.log(` ${row}`) + } + + const bySource = new Map() + for (const call of completed) { + const bucket = bySource.get(call.source) ?? [] + bucket.push(call.totalMs ?? 0) + bySource.set(call.source, bucket) + } + const sourceStats = [...bySource.entries()] + .map(([source, items]) => `${source}: ${items.length} p50=${percentile(items, 0.5)}ms p95=${percentile(items, 0.95)}ms`) + .sort() + if (sourceStats.length) { + console.log("source breakdown:") + for (const row of sourceStats) console.log(` ${row}`) + } + + const slowest = [...completed] + .sort((a, b) => (b.totalMs ?? 0) - (a.totalMs ?? 0)) + .slice(0, 5) + .map((call) => ({ + callID: call.callID, + sessionID: call.sessionID, + depth: sessionDepth.get(call.sessionID), + ms: call.totalMs ?? 0, + outputTokens: call.usage?.outputTokens ?? 0, + promptChars: call.context.messageChars + call.context.systemChars, + serializedChars: call.context.serializedMessageChars, + model: `${call.providerID}/${call.modelID}`, + modelPromptTokens: call.context.approxPromptTokens, + systemPromptHash: call.context.systemPromptHash, + messageHistoryHash: call.context.messageHistoryHash, + cacheRead: call.usage?.cacheReadTokens ?? 0, + cacheWrite: call.usage?.cacheWriteTokens ?? 0, + ttft: call.firstChunkMs ?? 0, + streamMs: call.streamCreatedMs ?? 0, + })) + + console.log("top slowest processor calls:") + for (const row of slowest) { + console.log( + ` ${row.callID} ${row.model} session=${row.sessionID} depth=${row.depth} ${row.ms}ms output=${row.outputTokens} ` + + `prompt=${row.promptChars} serialized=${row.serializedChars} approxTokens=${row.modelPromptTokens} hash=${hashPrefix( + row.systemPromptHash, + )} history=${hashPrefix(row.messageHistoryHash)} cacheRead=${row.cacheRead} cacheWrite=${row.cacheWrite} ` + + `ttft=${row.ttft} stream=${row.streamMs}`, + ) + } + + const tokenBySize = [...processorCalls].sort( + (a, b) => (b.context.messageChars + b.context.systemChars) - (a.context.messageChars + a.context.systemChars), + ) + console.log("top context-size processor calls:") + for (const call of tokenBySize.slice(0, 3)) { + const promptChars = call.context.messageChars + call.context.systemChars + const tokens = call.usage?.outputTokens ?? 0 + console.log( + ` ${call.callID} ${call.sessionID} depth=${sessionDepth.get(call.sessionID)} ${promptChars} chars, output=${tokens}, tools=${ + call.context.toolNames.slice(0, 5).join(",") + }, systemHash=${hashPrefix(call.context.systemPromptHash)} historyHash=${hashPrefix(call.context.messageHistoryHash)} ` + + `cacheRead=${call.usage?.cacheReadTokens ?? 0} cacheWrite=${call.usage?.cacheWriteTokens ?? 0}`, + ) + } + + const sessionCallCount = new Map() + const sessionDuration = new Map() + for (const call of allCalls) { + sessionCallCount.set(call.sessionID, (sessionCallCount.get(call.sessionID) ?? 0) + 1) + sessionDuration.set(call.sessionID, sessionDuration.get(call.sessionID) ?? 0) + const base = sessionDuration.get(call.sessionID) ?? 0 + sessionDuration.set(call.sessionID, base + (call.totalMs ?? 0)) + } + const sessionRows = [...sessionDuration.entries()] + .sort((a, b) => b[1] - a[1]) + .slice(0, 10) + if (sessionRows.length) { + console.log("sessions by llm time:") + for (const [sessionID, ms] of sessionRows) { + console.log( + ` ${sessionID} calls=${sessionCallCount.get(sessionID) ?? 0} total=${Math.round(ms)}ms avg=${ + Math.round((ms / (sessionCallCount.get(sessionID) ?? 1)) || 0) + }ms`, + ) + } + } +} + +async function runCase(def: TestCase) { + const model = await readModel() + const root = await Session.createNext({ + title: `profiling-${Identifier.ascending("session")}-${def.name}`, + directory: process.cwd(), + permission: [{ permission: "read", action: "allow", pattern: "*" }], + }) + const sessions = new Set() + const started = performance.now() + try { + await executeSubtaskTree({ + sessionID: root.id, + depthRemaining: def.depth, + path: def.name, + breadth: def.breadth, + model, + sessions, + }) + } finally { + // no-op cleanup is handled by caller + } + const totalDurationMs = performance.now() - started + const snapshot = LLMProfiler.snapshot() + + return { + name: def.name, + totalDurationMs, + sessionID: root.id, + sessions: [...sessions], + calls: snapshot.calls, + } +} + +async function main() { + const logLevelEnv = process.env.VOLTCODE_LOG_LEVEL?.toUpperCase() + const parsedLogLevel = ["TRACE", "DEBUG", "INFO", "WARN", "ERROR"].includes(logLevelEnv ?? "") + ? (logLevelEnv as Log.Level) + : "INFO" + await Log.init({ + print: true, + dev: true, + level: parsedLogLevel, + }) + + const runDepth = Math.max(0, Number(toArg("depth", "1"))) + const runBreadth = Math.max(1, Number(toArg("breadth", "1"))) + const runModel = toArg("model", MODEL.modelID) + if (runModel) { + MODEL.modelID = runModel + } + const casesArg = toArg("cases", "short") + const selectedCases = casesArg === "full" ? CASES : [ + { name: "baseline-no-subtask", depth: 0, breadth: 0 }, + { name: "single-subtask", depth: 1, breadth: 1 }, + { name: "short-tree", depth: runDepth, breadth: Math.min(runBreadth, 2) }, + ] as TestCase[] + const exportJson = toArg("json", "") + + const model = await Provider.getModel(MODEL.providerID, MODEL.modelID) + console.log(`Loaded model: ${model.providerID}/${model.id}`) + console.log(`Running ${selectedCases.length} case(s) with Opus 4.6`) + + const results = [] + for (const item of selectedCases) { + LLMProfiler.setEnabled(true) + LLMProfiler.clear() + const result = await runCase(item) + results.push(result) + await summarizeCase(result) + await Session.remove(result.sessionID) + } + + if (exportJson) { + await fs.writeFile( + exportJson, + JSON.stringify( + { + model: `${MODEL.providerID}/${MODEL.modelID}`, + createdAt: new Date().toISOString(), + cases: results, + }, + null, + 2, + ), + ) + } +} + +await bootstrap(process.cwd(), async () => { + await main() +}) diff --git a/packages/voltcode/scripts/subtask-response-latency-exp.ts b/packages/voltcode/scripts/subtask-response-latency-exp.ts new file mode 100644 index 000000000..6e553a2b5 --- /dev/null +++ b/packages/voltcode/scripts/subtask-response-latency-exp.ts @@ -0,0 +1,106 @@ +import { Session } from "../src/session" +import { SessionPrompt } from "../src/session/prompt" +import { Provider } from "../src/provider/provider" +import { LLMProfiler } from "../src/session/llm-profiler" +import { Identifier } from "../src/id/id" +import { Log } from "../src/util/log" +import { Instance } from "../src/project/instance" + +function childTextChars(value: string) { + return value.length +} + +async function runCase(name: string, parts: any[]) { + const model = await Provider.getModel("anthropic", "claude-opus-4-6") + const root = await Session.createNext({ + title: `subtask-exp-${name}-${Identifier.ascending("session")}`, + directory: process.cwd(), + permission: [{ permission: "read", action: "allow", pattern: "*" }], + }) + + LLMProfiler.setEnabled(true) + LLMProfiler.clear() + + const before = await Session.messages({ sessionID: root.id }) + const beforeIds = new Set(before.map((x) => x.info.id)) + + await SessionPrompt.prompt({ + sessionID: root.id, + model: { providerID: model.providerID, modelID: model.id }, + agent: "general", + parts, + }) + + const after = await Session.messages({ sessionID: root.id }) + const newMsgs = after.filter((m) => !beforeIds.has(m.info.id)) + const toolParts = newMsgs + .flatMap((m) => m.parts) + .filter((p: any) => p.type === "tool" && p.tool === "task") as any[] + + const allCalls = LLMProfiler.snapshot().calls + const completed = allCalls.filter((c) => c.status === "completed") + + console.log(`\n=== ${name} ===`) + console.log(`root=${root.id} completedCalls=${completed.length}`) + for (const c of completed) { + console.log( + `${c.callID} session=${c.sessionID} source=${c.source} model=${c.providerID}/${c.modelID} outputTok=${c.usage?.outputTokens ?? 0} input=${c.usage?.inputTokens ?? 0} cacheRead=${c.usage?.cacheReadTokens ?? 0} cacheWrite=${c.usage?.cacheWriteTokens ?? 0} totalMs=${c.totalMs ?? 0}`, + ) + } + + if (toolParts.length === 0) { + console.log("No task tool outputs") + } else { + for (let i = 0; i < toolParts.length; i++) { + const part = toolParts[i] + const output = typeof part.state.output === "string" ? part.state.output : "" + const childSession = (part.state?.metadata?.sessionId || "") as string + console.log(`tool#${i + 1} outputChars=${output.length} session=${childSession}`) + console.log(`tool output preview=${JSON.stringify(output.slice(0, 260))}`) + console.log(`tool output tail=${JSON.stringify(output.slice(-260))}`) + if (childSession) { + const childMessages = await Session.messages({ sessionID: childSession }) + const childTexts = childMessages + .flatMap((m) => m.parts) + .filter((p: any) => p.type === "text") + .map((p: any) => p.text) + .join("\n") + console.log(`childTextChars=${childTextChars(childTexts)} preview=${JSON.stringify(childTexts.slice(0, 260))}`) + } + } + } + + await Session.remove(root.id) +} + +await Instance.provide({ + directory: process.cwd(), + init: async () => { + await Log.init({ print: false, dev: true, level: "ERROR" }) + }, + fn: async () => { + await runCase("short-subtask", [ + { + type: "subtask", + agent: "general", + description: "short-task", + prompt: "Give a short 1-paragraph status update and stop.", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }, + ]) + + await runCase("long-subtask", [ + { + type: "subtask", + agent: "general", + description: "long-task", + prompt: + "You are asked to produce a very detailed design review for a hypothetical module named VoltScheduler, including at least 12 concrete risks, 8 recommended mitigations, 6 concrete follow-up test cases, and a short migration checklist. " + + "Be explicit and granular with names/examples. Include section headers. Return a full response around 700-900 words.", + model: { providerID: "anthropic", modelID: "claude-opus-4-6" }, + }, + ]) + + await runCase("baseline", [{ type: "text", text: "Reply with a concise one-sentence summary of how many subagents you think would help." }]) + }, +}) diff --git a/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx b/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx index 93d60b99e..68f6c9b76 100644 --- a/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx +++ b/packages/voltcode/src/cli/cmd/tui/component/task-tree-pane.tsx @@ -36,6 +36,12 @@ interface ToolCallInfo { metadata?: Record } +interface ThinkingInfo { + text: string + isStreaming: boolean + startTime?: number +} + // Flatten jobs into a list for display function flattenJobs(jobs: JobTree[]): FlattenResult { if (jobs.length === 0) return { items: [], maxDepth: 0 } @@ -429,12 +435,13 @@ function ToolCallDetailView(props: { call: ToolCallInfo; tick: number; width: nu function SelectedTaskDetails(props: { item: FlattenedItem | null - sessionData: { prompt: string; toolCalls: ToolCallInfo[] } + sessionData: { prompt: string; toolCalls: ToolCallInfo[]; thinking: ThinkingInfo | null } recentToolCalls: ToolCallInfo[] focusSection: "tree" | "toolcalls" selectedToolIndex: number paneWidth: number isJobRoot: boolean + thinking: ThinkingInfo | null onToolCallClick?: (index: number) => void }) { const { theme } = useTheme() @@ -530,11 +537,80 @@ function SelectedTaskDetails(props: { ) }} + + {(thinking) => { + const thinkingIndex = () => props.recentToolCalls.length + const isSelected = () => props.focusSection === "toolcalls" && thinkingIndex() === props.selectedToolIndex + const bgColor = () => (isSelected() ? RGBA.fromInts(40, 40, 40, 255) : RGBA.fromInts(0, 0, 0, 0)) + const label = () => { + const preview = thinking().text.replace(/\n/g, " ").slice(0, Math.max(20, props.paneWidth - 30)) + return preview ? `${preview}${thinking().text.length > props.paneWidth - 30 ? "..." : ""}` : "" + } + return ( + props.onToolCallClick?.(thinkingIndex())} + > + + {thinking().isStreaming ? "◐" : "●"} + + + {"thinking".padEnd(12)} + + {label()} + + ) + }} + ) } +// Thinking/reasoning detail view — shows tail of CoT, streams in live +function ThinkingDetailView(props: { thinking: ThinkingInfo; tick: number; width: number; height: number }) { + const { theme } = useTheme() + + const lines = createMemo(() => { + // Force reactivity on tick so we re-render while streaming + void props.tick + const text = props.thinking.text + if (!text) return props.thinking.isStreaming ? ["(thinking...)"] : ["(no reasoning content)"] + + const lineWidth = Math.max(40, props.width - 2) + // Wrap all text into lines + const allLines = wrapText(text, lineWidth, 10000) + // Show only what fits in the available height, biased toward the end + const maxLines = Math.max(1, props.height - 3) + if (allLines.length <= maxLines) return allLines + return allLines.slice(-maxLines) + }) + + const duration = () => { + if (!props.thinking.startTime) return "" + return formatLiveDuration(props.thinking.startTime, props.thinking.isStreaming ? undefined : Date.now(), props.tick) + } + + return ( + + + + Thinking + {props.thinking.isStreaming ? " (streaming)" : ""} + + + {duration()} + + + + {(line) => {line}} + + + ) +} + export function TaskTreePane(props: TaskTreePaneProps) { const { data } = useTaskTree() const { theme } = useTheme() @@ -593,7 +669,7 @@ export function TaskTreePane(props: TaskTreePaneProps) { // For job-root, show only the root tool call if (!item || item.type === "job-root") { const job = item?.job - if (!job) return { prompt: "", toolCalls: [] as ToolCallInfo[] } + if (!job) return { prompt: "", toolCalls: [] as ToolCallInfo[], thinking: null as ThinkingInfo | null } // For job root, return just the job's tool call info const toolCall: ToolCallInfo = { @@ -606,7 +682,7 @@ export function TaskTreePane(props: TaskTreePaneProps) { output: job.toolOutput, metadata: {}, } - return { prompt: job.toolTitle, toolCalls: [toolCall] } + return { prompt: job.toolTitle, toolCalls: [toolCall], thinking: null as ThinkingInfo | null } } // For job-child, show the child session's tool calls @@ -629,8 +705,9 @@ export function TaskTreePane(props: TaskTreePaneProps) { } } - // Collect all tool calls from all assistant messages + // Collect all tool calls and find reasoning parts from all assistant messages const toolCalls: ToolCallInfo[] = [] + let thinking: ThinkingInfo | null = null for (const msg of messages) { if (msg.role === "assistant") { const parts = sync.data.part[msg.id] ?? [] @@ -649,11 +726,19 @@ export function TaskTreePane(props: TaskTreePaneProps) { metadata: toolPart.state.metadata, }) } + if (part.type === "reasoning") { + const rp = part as ReasoningPart + thinking = { + text: rp.text, + isStreaming: !rp.time?.end, + startTime: rp.time?.start, + } + } } } } - return { prompt, toolCalls } + return { prompt, toolCalls, thinking } }) // Get last 10 tool calls, newest at bottom @@ -707,14 +792,27 @@ export function TaskTreePane(props: TaskTreePaneProps) { } } + // Total selectable items in the tool call section (tool calls + optional thinking) + const toolSectionCount = createMemo(() => { + const count = recentToolCalls().length + const thinking = selectedSessionData().thinking + return thinking ? count + 1 : count + }) + + // Whether the selected tool index points to the thinking entry + const isThinkingSelected = createMemo(() => { + const thinking = selectedSessionData().thinking + return thinking && selectedToolIndex() === recentToolCalls().length + }) + // Tool call list navigation function moveToolCall(direction: number) { - const calls = recentToolCalls() - if (calls.length === 0) return + const total = toolSectionCount() + if (total === 0) return let next = selectedToolIndex() + direction if (next < 0) next = 0 - if (next >= calls.length) next = calls.length - 1 + if (next >= total) next = total - 1 setSelectedToolIndex(next) } @@ -742,8 +840,8 @@ export function TaskTreePane(props: TaskTreePaneProps) { if (evt.name === "tab") { evt.preventDefault() if (focusSection() === "tree") { - const calls = recentToolCalls() - if (calls.length > 0) { + const total = toolSectionCount() + if (total > 0) { setFocusSection("toolcalls") if (selectedToolIndex() < 0) setSelectedToolIndex(0) } @@ -796,8 +894,8 @@ export function TaskTreePane(props: TaskTreePaneProps) { const items = flattened().items if (items.length > 0) setSelectedIndex(items.length - 1) } else { - const calls = recentToolCalls() - if (calls.length > 0) setSelectedToolIndex(calls.length - 1) + const total = toolSectionCount() + if (total > 0) setSelectedToolIndex(total - 1) } } }) @@ -873,6 +971,7 @@ export function TaskTreePane(props: TaskTreePaneProps) { selectedToolIndex={selectedToolIndex()} paneWidth={paneWidth()} isJobRoot={isJobRootSelected()} + thinking={selectedSessionData().thinking} onToolCallClick={selectToolCall} /> @@ -883,10 +982,22 @@ export function TaskTreePane(props: TaskTreePaneProps) { Select a job or tool call to view details} + when={isThinkingSelected() && selectedSessionData().thinking} + fallback={ + Select a job or tool call to view details} + > + + + } > - + @@ -920,3 +1031,9 @@ interface ToolPart { time?: { start: number; end?: number } } } + +interface ReasoningPart { + type: "reasoning" + text: string + time?: { start: number; end?: number } +} diff --git a/packages/voltcode/src/session/llm-profiler.ts b/packages/voltcode/src/session/llm-profiler.ts new file mode 100644 index 000000000..dea0e9feb --- /dev/null +++ b/packages/voltcode/src/session/llm-profiler.ts @@ -0,0 +1,266 @@ +import { createHash } from "node:crypto" +import { Identifier } from "@/id/id" + +export namespace LLMProfiler { + export type Source = "session.processor" | "session.summary" | "session.title" | "other" + export type Status = "started" | "completed" | "failed" + + export type Usage = { + inputTokens: number + outputTokens: number + reasoningTokens: number + cacheReadTokens: number + cacheWriteTokens: number + cost: number + } + + export type PromptMetrics = { + messageCount: number + systemMessageCount: number + systemChars: number + messageChars: number + serializedMessageChars: number + systemPromptHash: string + messageHistoryHash: string + toolCount: number + toolNames: string[] + approxPromptTokens: number + } + + export type Call = { + callID: string + sessionID: string + modelID: string + providerID: string + source: Source + small: boolean + messageID?: string + agent?: string + startedAt: number + finishedAt?: number + streamCreatedAt?: number + streamCreatedMs?: number + firstChunkAt?: number + firstChunkMs?: number + totalMs?: number + usage?: Usage + status: Status + error?: string + finishReason?: string + context: PromptMetrics + statusCode?: string + } + + type MutableCall = Omit & { + startedAt: number + context: PromptMetrics + } + + type StartInput = { + sessionID: string + modelID: string + providerID: string + source: Source + small: boolean + context: PromptMetrics + messageID?: string + agent?: string + } + + const enabledEnv = process.env["VOLTCODE_LLM_PROFILE"]?.toLowerCase() + let enabled = enabledEnv === "1" || enabledEnv === "true" || enabledEnv === "yes" + + const activeCalls = new Map() + const completedCalls: MutableCall[] = [] + + const now = () => performance.now() + + export function setEnabled(next: boolean) { + enabled = next + } + + export function clear() { + activeCalls.clear() + completedCalls.length = 0 + } + + export function isEnabled() { + return enabled + } + + function normalizeForHash(value: unknown): unknown { + if (value === null || typeof value !== "object") return value + if (typeof value === "bigint") return value.toString() + if (typeof value === "function") return undefined + if (Array.isArray(value)) return value.map((entry) => normalizeForHash(entry)) + const source = value as Record + const normalized: Record = {} + for (const key of Object.keys(source).sort()) { + const next = normalizeForHash(source[key]) + if (typeof next !== "undefined") normalized[key] = next + } + return normalized + } + + function stableHashString(value: unknown): string { + return JSON.stringify(normalizeForHash(value)) + } + + export function start(input: StartInput): string { + if (!enabled) return "" + const callID = Identifier.ascending("part") + const call: MutableCall = { + ...input, + callID, + startedAt: now(), + status: "started", + source: input.source, + small: !!input.small, + streamCreatedMs: undefined, + firstChunkMs: undefined, + totalMs: undefined, + usage: undefined, + statusCode: undefined, + context: input.context, + } + activeCalls.set(callID, call) + return callID + } + + export function ensure(callID: string, input: StartInput): string { + if (!enabled) return "" + if (callID) return callID + return start(input) + } + + function get(callID: string) { + return activeCalls.get(callID) + } + + function finalize(callID: string, status: Status) { + const call = activeCalls.get(callID) + if (!call) return + activeCalls.delete(callID) + call.status = status + call.finishedAt = now() + call.totalMs = Math.max(0, Math.round(call.finishedAt - call.startedAt)) + completedCalls.push(call) + } + + export function complete(callID: string, input: { + usage?: Usage + statusCode?: string + finishReason?: string + context?: Partial + }) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + if (input.context) { + call.context = { + ...call.context, + ...input.context, + } + } + if (input.usage) call.usage = input.usage + if (input.statusCode) call.statusCode = input.statusCode + if (input.finishReason) call.finishReason = input.finishReason + finalize(callID, "completed") + } + + export function fail(callID: string, error: unknown) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + if (error !== undefined) { + call.error = typeof error === "string" ? error : error instanceof Error ? error.message : String(error) + } + finalize(callID, "failed") + } + + export function streamCreated(callID: string, ms: number) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + call.streamCreatedAt = now() + call.streamCreatedMs = ms + } + + export function firstChunk(callID: string, ms: number) { + if (!enabled || !callID) return + const call = get(callID) + if (!call) return + call.firstChunkAt = now() + call.firstChunkMs = ms + } + + export function snapshot() { + return { + enabled, + calls: [...Array.from(activeCalls.values()), ...completedCalls].sort((a, b) => a.startedAt - b.startedAt), + activeCount: activeCalls.size, + completedCount: completedCalls.length, + } + } + + export function buildPromptMetrics(input: { + system: string[] + messages: Array<{ content: string | unknown[] }> + tools: Record + small: boolean + }): PromptMetrics { + const systemText = input.system.join("\n") + const systemPromptHash = createHash("sha256").update(systemText).digest("hex") + const messageHistoryHash = createHash("sha256") + .update(stableHashString({ system: input.system, messages: input.messages })) + .digest("hex") + const messageText = input.messages + .map((message) => { + const content = Array.isArray(message.content) + ? message.content + .map((part: any) => { + if (part == null) return "" + if (typeof part.text === "string") return part.text + if (typeof part.content === "string") return part.content + return JSON.stringify(part) + }) + .join("") + : typeof message.content === "string" + ? message.content + : JSON.stringify(message.content ?? "") + return content + }) + .join("\n") + + const serializedMessageChars = JSON.stringify(input.messages).length + const messageChars = input.messages.reduce((sum, message) => { + const normalized = typeof message.content === "string" + ? message.content + : Array.isArray(message.content) + ? message.content + .map((part: any) => { + if (part == null) return "" + if (typeof part.text === "string") return part.text + if (typeof part.content === "string") return part.content + return JSON.stringify(part) + }) + .join("") + : JSON.stringify(message.content ?? "") + return sum + normalized.length + }, 0) + + const contextChars = messageText.length + systemText.length + return { + systemMessageCount: input.system.length, + systemChars: systemText.length, + messageCount: input.messages.length, + messageChars, + serializedMessageChars, + systemPromptHash, + messageHistoryHash, + toolCount: Object.keys(input.tools).length, + toolNames: Object.keys(input.tools), + approxPromptTokens: Math.max(1, Math.round(contextChars / 4)), + } + } +} diff --git a/packages/voltcode/src/session/llm.ts b/packages/voltcode/src/session/llm.ts index 6449628dd..5eb066522 100644 --- a/packages/voltcode/src/session/llm.ts +++ b/packages/voltcode/src/session/llm.ts @@ -23,6 +23,7 @@ import { SystemPrompt } from "./system" import { Flag } from "@/flag/flag" import { PermissionNext } from "@/permission/next" import { Auth } from "@/auth" +import { LLMProfiler } from "./llm-profiler" export namespace LLM { const log = Log.create({ service: "llm" }) @@ -38,6 +39,7 @@ export namespace LLM { abort: AbortSignal messages: ModelMessage[] small?: boolean + traceID?: string tools: Record retries?: number /** Pre-assembled system prompt (already transformed by plugin). Skips inline assembly when provided. */ @@ -171,6 +173,41 @@ export namespace LLM { }) } + const messages = [ + ...(isCodex + ? [ + { + role: "user", + content: system.join("\n\n"), + } as ModelMessage, + ] + : system.map( + (x): ModelMessage => ({ + role: "system", + content: x, + }), + )), + ...input.messages, + ] + + if (!input.traceID) { + input.traceID = LLMProfiler.start({ + sessionID: input.sessionID, + source: "other", + modelID: input.model.id, + providerID: input.model.providerID, + small: input.small ?? false, + messageID: input.user.id, + agent: input.agent.name, + context: LLMProfiler.buildPromptMetrics({ + system, + messages, + tools: input.tools, + small: input.small ?? false, + }), + }) + } + return streamText({ onError(error) { l.error("stream error", { @@ -229,22 +266,7 @@ export namespace LLM { ...input.model.headers, }, maxRetries: input.retries ?? 0, - messages: [ - ...(isCodex - ? [ - { - role: "user", - content: system.join("\n\n"), - } as ModelMessage, - ] - : system.map( - (x): ModelMessage => ({ - role: "system", - content: x, - }), - )), - ...input.messages, - ], + messages, model: wrapLanguageModel({ model: language as any, middleware: [ diff --git a/packages/voltcode/src/session/processor.ts b/packages/voltcode/src/session/processor.ts index ca1e2ba61..038743767 100644 --- a/packages/voltcode/src/session/processor.ts +++ b/packages/voltcode/src/session/processor.ts @@ -20,6 +20,7 @@ import { Flag } from "@/flag/flag" import { handleLargeToolOutput, LARGE_TOOL_OUTPUT_THRESHOLD } from "./large-tool-output" import type { LcmToolMetadata } from "./lcm/types" import { Token } from "@/util/token" +import { LLMProfiler } from "./llm-profiler" export namespace SessionProcessor { const DOOM_LOOP_THRESHOLD = 3 @@ -336,11 +337,32 @@ export namespace SessionProcessor { let currentText: MessageV2.TextPart | undefined let reasoningMap: Record = {} const llmStreamStart = performance.now() - stream = await LLM.stream(streamInput) + const callID = LLMProfiler.start({ + sessionID: input.sessionID, + source: "session.processor", + modelID: input.model.id, + providerID: input.model.providerID, + small: streamInput.small ?? false, + messageID: input.assistantMessage.id, + agent: input.assistantMessage.agent, + context: LLMProfiler.buildPromptMetrics({ + system: streamInput.system, + messages: streamInput.messages, + tools: streamInput.tools, + small: streamInput.small ?? false, + }), + }) + streamInput.traceID = callID + const streamInputWithTrace = { + ...streamInput, + traceID: callID, + } + stream = await LLM.stream(streamInputWithTrace) log.trace("process.timing.llmStreamCreated", { sessionID: input.sessionID, ms: Math.round(performance.now() - llmStreamStart), }) + LLMProfiler.streamCreated(callID, Math.round(performance.now() - llmStreamStart)) // Use dedicated chunk timeout from Flag, separate from fetch timeout // Set to 0 to disable chunk timeout entirely @@ -364,6 +386,7 @@ export namespace SessionProcessor { sessionID: input.sessionID, ms: Math.round(performance.now() - llmStreamStart), }) + LLMProfiler.firstChunk(callID, Math.round(performance.now() - llmStreamStart)) firstChunk = false } input.abort.throwIfAborted() @@ -778,6 +801,17 @@ export namespace SessionProcessor { input.assistantMessage.cost += usage.cost input.assistantMessage.tokens = usage.tokens await Session.updateMessage(input.assistantMessage) + LLMProfiler.complete(callID, { + usage: { + inputTokens: usage.tokens.input, + outputTokens: usage.tokens.output, + reasoningTokens: usage.tokens.reasoning, + cacheReadTokens: usage.tokens.cache.read, + cacheWriteTokens: usage.tokens.cache.write, + cost: usage.cost, + }, + finishReason: input.assistantMessage.finish, + }) log.info("usage.summary", { sessionID: input.sessionID, messageID: input.assistantMessage.id, @@ -793,9 +827,14 @@ export namespace SessionProcessor { providerID: input.model.providerID, modelID: input.model.id, }) + LLMProfiler.fail(callID, "usage missing") } } } catch (e: any) { + const callID = streamInput.traceID + if (callID) { + LLMProfiler.fail(callID, e) + } log.error("process", { error: e, stack: JSON.stringify(e.stack), diff --git a/packages/voltcode/src/session/prompt.ts b/packages/voltcode/src/session/prompt.ts index 9e158a2a8..305deafbe 100644 --- a/packages/voltcode/src/session/prompt.ts +++ b/packages/voltcode/src/session/prompt.ts @@ -1980,6 +1980,7 @@ export namespace SessionPrompt { }) }, } + const subtaskStart = performance.now() const result = await taskTool.execute(taskArgs, taskCtx).catch((error) => { executionError = error log.error("subtask execution failed", { error, agent: task.agent, description: task.description }) @@ -1994,6 +1995,14 @@ export namespace SessionPrompt { }, result, ) + log.trace("loop.timing.subtaskExecution", { + sessionID, + messageID: assistantMessage.id, + task: task.agent, + ms: Math.round(performance.now() - subtaskStart), + success: !executionError, + childSessionID: result?.metadata?.sessionId, + }) assistantMessage.finish = "tool-calls" assistantMessage.time.completed = Date.now() await Session.updateMessage(assistantMessage)