From 78a439cbd1917597eb4f4c61c8e5ab1383a1bb25 Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 18:51:24 +0800 Subject: [PATCH 01/18] feat(runtime): add background shell sessions with output persistence Support long-running bash in the background with XML completion notices, idle-turn UI separation, 10k output summaries, full logs under thread data, and sandbox-safe output_file reads. Co-authored-by: Cursor --- .../adapters/tool/background-shell-tool.ts | 143 ++++++++ kun/src/adapters/tool/builtin-bash-tool.ts | 322 +++++++++++++----- kun/src/adapters/tool/builtin-tool-types.ts | 27 ++ .../tool/builtin-tool-utils.symlink.test.ts | 15 + kun/src/adapters/tool/builtin-tool-utils.ts | 13 + kun/src/contracts/background-shell.ts | 35 ++ kun/src/contracts/events.ts | 26 +- kun/src/contracts/items.ts | 4 + kun/src/contracts/turns.ts | 7 +- kun/src/domain/item.ts | 2 + kun/src/loop/agent-loop.ts | 19 +- kun/src/loop/steering-queue.ts | 24 +- kun/src/ports/tool-host.ts | 2 + kun/src/server/routes/background-shells.ts | 42 +++ kun/src/server/routes/index.ts | 17 + kun/src/server/routes/server-runtime.ts | 2 + kun/src/server/routes/turns.ts | 8 +- kun/src/server/runtime-factory.ts | 33 +- kun/src/services/background-shell-notice.ts | 74 ++++ kun/src/services/background-shell-output.ts | 137 ++++++++ kun/src/services/background-shell-runtime.ts | 221 ++++++++++++ kun/src/services/turn-service.ts | 19 +- kun/tests/background-shell-notice.test.ts | 77 +++++ kun/tests/background-shell-output.test.ts | 79 +++++ kun/tests/background-shell-runtime.test.ts | 89 +++++ kun/tests/builtin-tools.test.ts | 290 +++++++++++++--- src/main/ipc/app-ipc-schemas.ts | 9 +- src/renderer/src/agent/kun-contract.ts | 1 + src/renderer/src/agent/kun-mapper.ts | 3 + src/renderer/src/agent/types.ts | 1 + src/renderer/src/components/Workbench.tsx | 4 +- .../chat/BackgroundShellOverlay.tsx | 199 +++++++++++ .../src/components/chat/MessageTimeline.tsx | 7 + .../chat/message-timeline-bubbles.tsx | 94 ++++- .../chat/message-timeline-process.tsx | 10 +- .../chat/message-timeline-turns.test.ts | 26 +- .../components/chat/message-timeline-turns.ts | 11 + src/renderer/src/locales/en/common.json | 6 + src/renderer/src/locales/zh/common.json | 6 + .../src/store/chat-store-runtime-helpers.ts | 2 + src/shared/background-shell-notice.ts | 43 +++ src/shared/kun-endpoints.ts | 10 + 42 files changed, 1987 insertions(+), 172 deletions(-) create mode 100644 kun/src/adapters/tool/background-shell-tool.ts create mode 100644 kun/src/contracts/background-shell.ts create mode 100644 kun/src/server/routes/background-shells.ts create mode 100644 kun/src/services/background-shell-notice.ts create mode 100644 kun/src/services/background-shell-output.ts create mode 100644 kun/src/services/background-shell-runtime.ts create mode 100644 kun/tests/background-shell-notice.test.ts create mode 100644 kun/tests/background-shell-output.test.ts create mode 100644 kun/tests/background-shell-runtime.test.ts create mode 100644 src/renderer/src/components/chat/BackgroundShellOverlay.tsx create mode 100644 src/shared/background-shell-notice.ts diff --git a/kun/src/adapters/tool/background-shell-tool.ts b/kun/src/adapters/tool/background-shell-tool.ts new file mode 100644 index 000000000..7804e5de4 --- /dev/null +++ b/kun/src/adapters/tool/background-shell-tool.ts @@ -0,0 +1,143 @@ +import { LocalToolHost, type LocalTool } from './local-tool-host.js' +import { withToolBoundary } from './builtin-tool-utils.js' +import type { BackgroundShellRecordInput } from './builtin-tool-types.js' +import { + isBashSessionId, + listBashSessionRecords, + pollBashSession, + readBashSessionPayload, + stopBashSessionById, + writeBashSessionStdin +} from './builtin-bash-tool.js' + +export type BackgroundShellToolOptions = { + listBackgroundSessions?: (threadId?: string) => readonly BackgroundShellRecordInput[] +} + + +function normalizeYieldSeconds(value: unknown): number { + const raw = typeof value === 'number' && Number.isFinite(value) ? Math.floor(value) : 10 + return Math.max(1, Math.min(60, raw)) +} + +export function createBackgroundShellTool(options: BackgroundShellToolOptions = {}): LocalTool { + return LocalToolHost.defineTool({ + name: 'background_shell', + description: + 'Manage shell sessions started with bash background=true. The bash tool assigns an 8-character session_id when starting a background command; use that id here. action="list" lists running sessions by default (set include_finished=true to also show completed/stopped/failed sessions; optional thread_only). action="read" returns a non-blocking output snapshot. action="poll" waits up to yield_seconds for more output or exit. action="write" sends stdin via input. action="stop" terminates a running session.', + inputSchema: { + type: 'object', + properties: { + action: { + type: 'string', + enum: ['list', 'read', 'poll', 'write', 'stop'] + }, + session_id: { + type: 'string', + description: 'Required for read, poll, write, and stop. The 8-character id returned by bash when background=true.' + }, + yield_seconds: { type: 'number' }, + include_finished: { type: 'boolean', default: false }, + thread_only: { type: 'boolean', default: true }, + input: { type: 'string' } + }, + required: ['action'], + additionalProperties: false + }, + policy: 'auto', + toolKind: 'tool_call', + execute: async (args, context) => + withToolBoundary(async () => { + const action = typeof args.action === 'string' ? args.action.trim() : '' + if (action === 'list') { + const threadOnly = args.thread_only !== false + const threadId = threadOnly ? context.threadId : undefined + let sessions = options.listBackgroundSessions + ? [...options.listBackgroundSessions(threadId)] + : await listBashSessionRecords(threadId) + if (args.include_finished !== true) { + sessions = sessions.filter((session) => session.status === 'running') + } + return { + output: { + sessions: sessions.map((session) => ({ + session_id: session.id, + command: session.command, + cwd: session.cwd, + shell: session.shell, + status: session.status, + started_at: session.startedAt, + ...(session.finishedAt ? { finished_at: session.finishedAt } : {}), + exit_code: session.exitCode, + output: session.output, + output_preview: session.output, + ...(session.outputTruncated ? { output_truncated: true } : {}), + ...(session.outputFilePath ? { output_file: session.outputFilePath } : {}), + detached: session.detached + })), + running: sessions.filter((session) => session.status === 'running').length + } + } + } + + const sessionId = typeof args.session_id === 'string' ? args.session_id.trim() : '' + if (!sessionId) { + return { output: { error: 'session_id is required' }, isError: true } + } + if (!isBashSessionId(sessionId)) { + return { + output: { + error: 'session_id must be the 8-character id returned by bash when background=true', + session_id: sessionId + }, + isError: true + } + } + + if (action === 'read') { + const payload = await readBashSessionPayload(sessionId) + if (!payload) { + return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true } + } + return { output: payload, isError: payload.status === 'failed' } + } + + if (action === 'stop') { + const stopped = await stopBashSessionById(sessionId) + const payload = await readBashSessionPayload(sessionId) + if (!payload) { + return { + output: { error: 'background shell session not found', session_id: sessionId, stopped }, + isError: true + } + } + return { + output: { ...payload, stop_sent: stopped }, + isError: payload.status === 'running' || payload.status === 'failed' + } + } + + if (action === 'write') { + const payload = await writeBashSessionStdin( + sessionId, + typeof args.input === 'string' ? args.input : '', + normalizeYieldSeconds(args.yield_seconds) + ) + if (!payload) { + return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true } + } + return { output: payload, isError: payload.status === 'failed' } + } + + if (action === 'poll') { + const payload = await pollBashSession(sessionId, normalizeYieldSeconds(args.yield_seconds)) + if (!payload) { + return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true } + } + return { output: payload, isError: payload.status === 'failed' } + } + + return { output: { error: `unsupported background_shell action: ${action}` }, isError: true } + }) + }) +} diff --git a/kun/src/adapters/tool/builtin-bash-tool.ts b/kun/src/adapters/tool/builtin-bash-tool.ts index 1024d34c9..e83ebe30a 100644 --- a/kun/src/adapters/tool/builtin-bash-tool.ts +++ b/kun/src/adapters/tool/builtin-bash-tool.ts @@ -1,11 +1,14 @@ import { mkdir } from 'node:fs/promises' -import { randomUUID } from 'node:crypto' +import { randomBytes } from 'node:crypto' import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process' import { LocalToolHost, type LocalTool } from './local-tool-host.js' import { OutputAccumulator } from './output-accumulator.js' import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize } from './truncate.js' -import type { BashLocalToolOptions, TextSlice, TruncateMode } from './builtin-tool-types.js' +import type { BashLocalToolOptions, TextSlice, TruncateMode, BackgroundShellRecordInput } from './builtin-tool-types.js' import { DEFAULT_BASH_TIMEOUT_SECONDS } from './builtin-tool-types.js' +import { + BackgroundShellOutputWriter +} from '../../services/background-shell-output.js' import { describeKind, normalizePositiveInteger, @@ -28,6 +31,8 @@ type BashSessionStatus = 'running' | 'completed' | 'stopped' | 'failed' type BashSession = { id: string + threadId?: string + turnId?: string command: string cwd: string shell: string @@ -40,7 +45,9 @@ type BashSession = { error?: string stopRequested: boolean finalized: boolean + detached: boolean exitWaiters: Set<() => void> + outputWriter?: BackgroundShellOutputWriter } type BashPayload = { @@ -66,6 +73,9 @@ type BashPayload = { partial?: boolean stop_sent?: boolean error?: string + output_file?: string + output_truncated?: boolean + output_total_chars?: number } const bashSessions = new Map() @@ -241,8 +251,24 @@ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)) } +const SESSION_ID_ALPHABET = 'abcdefghijklmnopqrstuvwxyz0123456789' +const SESSION_ID_LENGTH = 8 +const SESSION_ID_PATTERN = /^[a-z0-9]{8}$/ + function nextSessionId(): string { - return `bash_${randomUUID().replace(/-/g, '').slice(0, 12)}` + for (let attempt = 0; attempt < 64; attempt++) { + const bytes = randomBytes(SESSION_ID_LENGTH) + let id = '' + for (let i = 0; i < SESSION_ID_LENGTH; i++) { + id += SESSION_ID_ALPHABET[bytes[i]! % SESSION_ID_ALPHABET.length] + } + if (!bashSessions.has(id)) return id + } + throw new Error('failed to allocate unique bash session id') +} + +export function isBashSessionId(value: unknown): value is string { + return typeof value === 'string' && SESSION_ID_PATTERN.test(value) } function textSliceFromSnapshot(snapshot: ReturnType): TextSlice { @@ -297,13 +323,56 @@ async function finalizeSessionOutput(session: BashSession): Promise { await sleep(SESSION_EXIT_FLUSH_MS) session.output.finish() await session.output.closeTempFile() + await session.outputWriter?.close() session.finalized = true } +async function backgroundSessionPayload( + session: BashSession, + options: { stopSent?: boolean } = {} +): Promise { + if (session.status !== 'running') { + await finalizeSessionOutput(session) + } + const fields = await backgroundShellOutputFields(session) + return { + command: session.command, + cwd: session.cwd, + shell: session.shell, + exit_code: session.exitCode, + output: fields.output, + full_output_path: fields.output_file || null, + truncation: fields.output_truncated + ? { + total_lines: 0, + output_lines: 0, + total_bytes: fields.output_total_chars, + output_bytes: Buffer.byteLength(fields.output, 'utf8'), + truncated_by: 'bytes', + last_line_partial: false + } + : null, + output_file: fields.output_file, + output_truncated: fields.output_truncated, + output_total_chars: fields.output_total_chars, + session_id: session.id, + status: session.status, + started_at: session.startedAt, + ...(session.finishedAt ? { finished_at: session.finishedAt } : {}), + ...(typeof session.child.pid === 'number' ? { pid: session.child.pid } : {}), + ...(session.status === 'running' ? { partial: true } : {}), + ...(options.stopSent ? { stop_sent: true } : {}), + ...(session.error ? { error: session.error } : {}) + } +} + async function sessionPayload( session: BashSession, options: { stopSent?: boolean } = {} ): Promise { + if (session.outputWriter) { + return backgroundSessionPayload(session, options) + } if (session.status !== 'running') { await finalizeSessionOutput(session) } @@ -377,21 +446,132 @@ function normalizeYieldSeconds(value: unknown): number { return Math.max(1, Math.min(MAX_BASH_YIELD_SECONDS, raw)) } +function recordFromSession( + session: BashSession, + output: string, + truncated?: boolean, + detached = false, + outputFilePath?: string +): BackgroundShellRecordInput { + return { + id: session.id, + threadId: session.threadId ?? '', + turnId: session.turnId ?? '', + command: session.command, + cwd: session.cwd, + shell: session.shell, + status: session.status, + startedAt: session.startedAt, + ...(session.finishedAt ? { finishedAt: session.finishedAt } : {}), + exitCode: session.exitCode, + output, + ...(truncated ? { outputTruncated: true } : {}), + ...(outputFilePath ? { outputFilePath } : {}), + ...(session.error ? { error: session.error } : {}), + detached + } +} + +async function backgroundShellOutputFields(session: BashSession): Promise<{ + output: string + output_truncated: boolean + output_total_chars: number + output_file: string +}> { + const writer = session.outputWriter + if (!writer) { + return { + output: '', + output_truncated: false, + output_total_chars: 0, + output_file: '' + } + } + const fields = await writer.buildReturnFields() + return { + output: fields.summary, + output_truncated: fields.truncated, + output_total_chars: fields.totalChars, + output_file: fields.output_file + } +} + +async function recordFromBackgroundSession(session: BashSession, detached: boolean): Promise { + const fields = await backgroundShellOutputFields(session) + return recordFromSession( + session, + fields.output, + fields.output_truncated, + detached, + fields.output_file + ) +} + function sessionById(sessionId: unknown): BashSession | null { const id = typeof sessionId === 'string' ? sessionId.trim() : '' return id ? bashSessions.get(id) ?? null : null } -async function startBashSession( +export async function stopBashSessionById(sessionId: string): Promise { + const session = sessionById(sessionId) + if (!session || session.status !== 'running') return false + stopSession(session) + await waitForSessionExitOrDelay(session, STOP_GRACE_MS) + return session.status !== 'running' +} + +export async function readBashSessionPayload(sessionId: string): Promise { + const session = sessionById(sessionId) + if (!session) return null + return sessionPayload(session) +} + +export async function listBashSessionRecords(threadId?: string): Promise { + const records: BackgroundShellRecordInput[] = [] + for (const session of bashSessions.values()) { + if (threadId && session.threadId !== threadId) continue + records.push(await recordFromBackgroundSession(session, session.detached)) + } + return records.sort((a, b) => b.startedAt.localeCompare(a.startedAt)) +} + +export async function pollBashSession(sessionId: string, yieldSeconds: number): Promise { + const session = sessionById(sessionId) + if (!session) return null + await waitForSessionExitOrDelay(session, normalizeYieldSeconds(yieldSeconds) * 1000) + return sessionPayload(session) +} + +export async function writeBashSessionStdin( + sessionId: string, + input: string, + yieldSeconds: number +): Promise { + const session = sessionById(sessionId) + if (!session) return null + if (session.status !== 'running') return sessionPayload(session) + session.child.stdin.write(input) + await waitForSessionExitOrDelay(session, normalizeYieldSeconds(yieldSeconds) * 1000) + return sessionPayload(session) +} + +async function startBackgroundBashSession( input: { command: string cwd: string + threadId: string + turnId: string signal: AbortSignal timeoutSeconds: number - yieldSeconds: number + detached: boolean + dataDir?: string }, + hooks: BashLocalToolOptions['backgroundShell'], onUpdate?: (update: { output: unknown; isError?: boolean }) => Promise | void ): Promise<{ payload: BashPayload; isError?: boolean }> { + if (!input.dataDir?.trim()) { + throw new Error('background shell sessions require runtime dataDir') + } await mkdir(input.cwd, { recursive: true }) const shellRuntime = shellRuntimeInfo() const child = spawn(shellRuntime.shell, shellCommandArgs(shellRuntime, input.command), { @@ -401,22 +581,38 @@ async function startBashSession( stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }) + const sessionId = nextSessionId() + const outputWriter = new BackgroundShellOutputWriter(input.dataDir, input.threadId, sessionId) + await outputWriter.open() const session: BashSession = { - id: nextSessionId(), + id: sessionId, + threadId: input.threadId, + turnId: input.turnId, command: input.command, cwd: input.cwd, shell: shellRuntime.name, child, output: createOutputAccumulator(), + outputWriter, startedAt: new Date().toISOString(), exitCode: null, status: 'running', stopRequested: false, finalized: false, + detached: input.detached, exitWaiters: new Set() } bashSessions.set(session.id, session) + const notifyUpdated = async () => { + if (!hooks) return + await hooks.onSessionUpdated?.(await recordFromBackgroundSession(session, input.detached)) + } + const notifySettled = async () => { + if (!hooks) return + await hooks.onSessionSettled?.(await recordFromBackgroundSession(session, input.detached)) + } + let updateDirty = false let updateTimer: NodeJS.Timeout | undefined let lastUpdateAt = 0 @@ -425,7 +621,9 @@ async function startBashSession( if (!liveUpdates || !onUpdate || !updateDirty) return updateDirty = false lastUpdateAt = Date.now() - await onUpdate({ output: await sessionPayload(session) }) + const payload = await sessionPayload(session) + await onUpdate({ output: payload }) + void notifyUpdated() } const scheduleUpdate = () => { if (!liveUpdates || !onUpdate) return @@ -443,49 +641,38 @@ async function startBashSession( } const handleData = (chunk: Buffer | string) => { if (session.finalized) return - session.output.append(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)) + const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk) + session.output.append(buffer) + session.outputWriter?.append(buffer) scheduleUpdate() } child.stdout.on('data', handleData) child.stderr.on('data', handleData) child.once('error', (error) => { settleSession(session, 'failed', null, error.message) + void notifySettled() }) child.once('exit', (code) => { settleSession(session, session.stopRequested ? 'stopped' : 'completed', code) + void notifySettled() }) - const onAbort = () => stopSession(session) - input.signal.addEventListener('abort', onAbort, { once: true }) - const timeoutMs = input.timeoutSeconds * 1000 - const yieldMs = Math.min(input.yieldSeconds * 1000, timeoutMs) - const exited = await waitForSessionExitOrDelay(session, yieldMs) - input.signal.removeEventListener('abort', onAbort) - if (updateTimer) clearTimeout(updateTimer) - - if (input.signal.aborted) { - liveUpdates = false - stopSession(session) - throw new Error('command aborted') - } - if (!exited && timeoutMs <= yieldMs) { - liveUpdates = false - stopSession(session) - await waitForSessionExitOrDelay(session, STOP_GRACE_MS) - throw new Error(`command timed out after ${input.timeoutSeconds} seconds`) - } - - if (exited) { - await emitUpdate() - liveUpdates = false - const payload = await sessionPayload(session) - if (session.status === 'failed') return { payload, isError: true } - return { payload, isError: session.exitCode !== null && session.exitCode !== 0 } + const initialPayload = await sessionPayload(session) + await hooks?.onSessionStarted?.(await recordFromBackgroundSession(session, input.detached)) + + if (input.detached) { + const timeoutMs = input.timeoutSeconds * 1000 + const timeoutTimer = setTimeout(() => { + if (session.status !== 'running') return + stopSession(session) + }, timeoutMs) + timeoutTimer.unref?.() + child.once('exit', () => clearTimeout(timeoutTimer)) + child.once('error', () => clearTimeout(timeoutTimer)) + return { payload: initialPayload } } - await emitUpdate() - liveUpdates = false - return { payload: await sessionPayload(session) } + throw new Error('startBackgroundBashSession requires detached=true') } function appendTruncationNotice(text: string, truncated: TextSlice, mode: TruncateMode): string { @@ -499,22 +686,18 @@ function appendTruncationNotice(text: string, truncated: TextSlice, mode: Trunca export function createBashLocalTool(options: BashLocalToolOptions = {}): LocalTool { const bashOps = options.operations + const shellHooks = options.backgroundShell + const backgroundShellDataDir = options.backgroundShellDataDir const shellRuntime = shellRuntimeInfo() return LocalToolHost.defineTool({ name: 'bash', - description: `Execute a shell command in the workspace using the host platform shell. Current shell: ${shellRuntime.name}. Use ${shellRuntime.syntax} syntax. Return combined stdout and stderr. Long-running commands return a session_id; use action="poll" to block up to yield_seconds (default ${DEFAULT_BASH_YIELD_SECONDS}s, max ${MAX_BASH_YIELD_SECONDS}s) waiting for more output or process exit, action="write" with input to send stdin, or action="stop" to terminate the session.`, + description: `Execute a shell command in the workspace using the host platform shell. Current shell: ${shellRuntime.name}. Use ${shellRuntime.syntax} syntax. Return combined stdout and stderr. Runs synchronously by default (background defaults to false). Set background=true to start a detached session that keeps running after the turn ends; the tool assigns an 8-character session_id in the response. Use the background_shell tool to list, read, poll, write, or stop background sessions.`, inputSchema: { type: 'object', properties: { command: { type: 'string' }, timeout: { type: 'number' }, - yield_seconds: { type: 'number' }, - action: { - type: 'string', - enum: ['run', 'poll', 'write', 'stop'] - }, - session_id: { type: 'string' }, - input: { type: 'string' } + background: { type: 'boolean', default: false } }, required: [], additionalProperties: false @@ -522,53 +705,34 @@ export function createBashLocalTool(options: BashLocalToolOptions = {}): LocalTo policy: 'on-request', toolKind: 'command_execution', execute: async (args, context, onUpdate) => withToolBoundary(async () => { - const action = typeof args.action === 'string' ? args.action.trim() : '' - if (action && action !== 'run') { - if (action !== 'poll' && action !== 'write' && action !== 'stop') { - return { output: { error: `unsupported bash session action: ${action}` }, isError: true } - } - const session = sessionById(args.session_id) - if (!session) { - return { output: { error: 'bash session not found', session_id: args.session_id ?? null }, isError: true } - } - if (action === 'write') { - if (session.status !== 'running') { - return { output: await sessionPayload(session), isError: true } - } - const input = typeof args.input === 'string' ? args.input : '' - session.child.stdin.write(input) - await waitForSessionExitOrDelay(session, normalizeYieldSeconds(args.yield_seconds) * 1000) - const payload = await sessionPayload(session) - return { output: payload, isError: payload.status === 'failed' } - } - if (action === 'stop') { - stopSession(session) - await waitForSessionExitOrDelay(session, STOP_GRACE_MS) - const payload = await sessionPayload(session, { stopSent: true }) - return { output: payload, isError: session.status === 'running' || session.status === 'failed' } - } - await waitForSessionExitOrDelay(session, normalizeYieldSeconds(args.yield_seconds) * 1000) - return { output: await sessionPayload(session), isError: session.status === 'failed' } - } - const command = typeof args.command === 'string' ? args.command : '' if (!command.trim()) return { output: { error: 'command is required' }, isError: true } const timeout = normalizePositiveInteger( args.timeout, options.defaultTimeoutSeconds ?? DEFAULT_BASH_TIMEOUT_SECONDS ) - const yieldSeconds = normalizeYieldSeconds(args.yield_seconds) + const background = args.background === true const cwd = workspaceRoot(context.workspace) try { - if (!bashOps?.exec) { - const result = await startBashSession( + if (background) { + if (bashOps?.exec) { + return { + output: { error: 'background sessions are not supported with custom bash exec operations' }, + isError: true + } + } + const result = await startBackgroundBashSession( { command, cwd, + threadId: context.threadId, + turnId: context.turnId, signal: context.abortSignal, timeoutSeconds: timeout, - yieldSeconds + detached: true, + dataDir: backgroundShellDataDir }, + shellHooks, onUpdate ) return { @@ -582,7 +746,7 @@ export function createBashLocalTool(options: BashLocalToolOptions = {}): LocalTo context.abortSignal, timeout, onUpdate, - bashOps.exec + bashOps?.exec ) const payload = resultPayload({ command, diff --git a/kun/src/adapters/tool/builtin-tool-types.ts b/kun/src/adapters/tool/builtin-tool-types.ts index 6436d1405..24f0f1873 100644 --- a/kun/src/adapters/tool/builtin-tool-types.ts +++ b/kun/src/adapters/tool/builtin-tool-types.ts @@ -120,9 +120,36 @@ export type ReadLocalToolOptions = { operations?: ReadLocalToolOperations } +export type BackgroundShellRecordInput = { + id: string + threadId: string + turnId: string + command: string + cwd: string + shell: string + status: 'running' | 'completed' | 'stopped' | 'failed' + startedAt: string + finishedAt?: string + exitCode: number | null + output: string + outputTruncated?: boolean + outputFilePath?: string + error?: string + detached: boolean +} + +export type BackgroundShellHooks = { + onSessionStarted?: (record: BackgroundShellRecordInput) => void | Promise + onSessionUpdated?: (record: BackgroundShellRecordInput) => void | Promise + onSessionSettled?: (record: BackgroundShellRecordInput) => void | Promise + isDetachedSession?: (sessionId: string) => boolean +} + export type BashLocalToolOptions = { defaultTimeoutSeconds?: number operations?: BashLocalToolOperations + backgroundShell?: BackgroundShellHooks + backgroundShellDataDir?: string } export type WriteLocalToolOptions = { diff --git a/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts b/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts index e2fd5ff01..0a360b6c7 100644 --- a/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts +++ b/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts @@ -4,6 +4,7 @@ import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import type { ToolHostContext } from '../../ports/tool-host.js' import { resolveWorkspacePath } from './builtin-tool-utils.js' +import { resolveBackgroundShellOutputPaths } from '../../services/background-shell-output.js' function context(workspace: string): ToolHostContext { return { @@ -114,6 +115,20 @@ describe('resolveWorkspacePath sandbox mode', () => { ).rejects.toThrow(/escapes the workspace root/) }) + it('allows background shell output files outside the workspace in read-only sandbox', async () => { + const runtimeDataDir = join(base, 'runtime-data') + const { outputFilePath } = resolveBackgroundShellOutputPaths(runtimeDataDir, 'thr_1', 'abcd1234') + await mkdir(join(runtimeDataDir, 'threads', 'thr_1', 'background-shells'), { recursive: true }) + await writeFile(outputFilePath, 'full log') + const resolved = await resolveWorkspacePath(outputFilePath, { + ...context(workspace), + sandboxMode: 'read-only', + runtimeDataDir, + threadId: 'thr_1' + }) + expect(resolved.absolutePath).toBe(outputFilePath) + }) + it('does not require the workspace root to exist under danger-full-access', async () => { const missingWs = join(base, 'does-not-exist') const target = join(outside, 'sys.txt') diff --git a/kun/src/adapters/tool/builtin-tool-utils.ts b/kun/src/adapters/tool/builtin-tool-utils.ts index e60445ab0..c2d63f6a1 100644 --- a/kun/src/adapters/tool/builtin-tool-utils.ts +++ b/kun/src/adapters/tool/builtin-tool-utils.ts @@ -4,6 +4,7 @@ import { spawn, spawnSync, type ChildProcess } from 'node:child_process' import { basename, dirname, isAbsolute, join, relative, resolve, sep, win32 } from 'node:path' import type { ToolHostContext } from '../../ports/tool-host.js' import { effectiveSandboxMode } from './sandbox-policy.js' +import { isBackgroundShellOutputPath } from '../../services/background-shell-output.js' import type { EditInstruction, FsStats, @@ -65,6 +66,18 @@ export async function resolveWorkspacePath(inputPath: string, context: ToolHostC }> { const root = workspaceRoot(context.workspace) const lexicalAbsolutePath = isAbsolute(inputPath) ? resolve(inputPath) : resolve(root, inputPath) + if ( + isBackgroundShellOutputPath(lexicalAbsolutePath, { + runtimeDataDir: context.runtimeDataDir, + threadId: context.threadId + }) + ) { + return { + workspaceRoot: root, + absolutePath: resolve(lexicalAbsolutePath), + relativePath: relative(root, resolve(lexicalAbsolutePath)) || '.' + } + } // In full-access mode the workspace boundary is not enforced: the user has // explicitly opted into reaching paths outside the workspace. This mirrors // canWritePath(), which already permits writes anywhere under diff --git a/kun/src/contracts/background-shell.ts b/kun/src/contracts/background-shell.ts new file mode 100644 index 000000000..3d7f06758 --- /dev/null +++ b/kun/src/contracts/background-shell.ts @@ -0,0 +1,35 @@ +import { z } from 'zod' + +export const BackgroundShellStatus = z.enum(['running', 'completed', 'stopped', 'failed']) +export type BackgroundShellStatus = z.infer + +export const BackgroundShellRecord = z.object({ + id: z.string().min(1), + threadId: z.string().min(1), + turnId: z.string().min(1), + command: z.string(), + cwd: z.string(), + shell: z.string(), + status: BackgroundShellStatus, + startedAt: z.string(), + finishedAt: z.string().optional(), + exitCode: z.number().int().nullable(), + output: z.string(), + outputTruncated: z.boolean().optional(), + outputFilePath: z.string().optional(), + error: z.string().optional(), + detached: z.boolean() +}).strict() +export type BackgroundShellRecord = z.infer + +export const BackgroundShellListResponse = z.object({ + sessions: z.array(BackgroundShellRecord), + running: z.number().int().nonnegative() +}).strict() +export type BackgroundShellListResponse = z.infer + +export const BackgroundShellStopResponse = z.object({ + sessionId: z.string().min(1), + stopped: z.boolean() +}).strict() +export type BackgroundShellStopResponse = z.infer diff --git a/kun/src/contracts/events.ts b/kun/src/contracts/events.ts index 45e0c4797..05d07c98c 100644 --- a/kun/src/contracts/events.ts +++ b/kun/src/contracts/events.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { TurnItem } from './items.js' +import { TurnItem, UserMessageSource } from './items.js' import { ThreadGoalSchema, ThreadTodoListSchema } from './threads.js' import { UsageSnapshotSchema } from './usage.js' import { RuntimeErrorSeverity } from './errors.js' @@ -39,6 +39,9 @@ export const RuntimeEventKind = z.enum([ 'goal_cleared', 'todos_updated', 'todos_cleared', + 'bash_session_started', + 'bash_session_updated', + 'bash_session_completed', 'pipeline_stage', 'usage', 'error', @@ -125,6 +128,8 @@ export const TurnLifecycleEvent = RuntimeEventBase.extend({ ]), status: z.string().optional(), text: z.string().optional(), + displayText: z.string().optional(), + messageSource: UserMessageSource.optional(), message: z.string().optional(), code: z.string().optional(), details: z.unknown().optional(), @@ -226,6 +231,24 @@ export const TodoEvent = RuntimeEventBase.extend({ }) export type TodoEvent = z.infer +export const BashSessionEvent = RuntimeEventBase.extend({ + kind: z.enum(['bash_session_started', 'bash_session_updated', 'bash_session_completed']), + sessionId: z.string().min(1), + command: z.string(), + cwd: z.string(), + shell: z.string(), + status: z.enum(['running', 'completed', 'stopped', 'failed']), + startedAt: z.string(), + finishedAt: z.string().optional(), + exitCode: z.number().int().nullable().optional(), + detached: z.boolean(), + output: z.string().default(''), + outputTruncated: z.boolean().optional(), + outputFilePath: z.string().optional(), + error: z.string().optional() +}) +export type BashSessionEvent = z.infer + export const UsageEvent = RuntimeEventBase.extend({ kind: z.literal('usage'), model: z.string().optional(), @@ -268,6 +291,7 @@ export const RuntimeEvent = z.discriminatedUnion('kind', [ CompactionEvent, GoalEvent, TodoEvent, + BashSessionEvent, PipelineStageEvent, UsageEvent, ErrorEvent, diff --git a/kun/src/contracts/items.ts b/kun/src/contracts/items.ts index d75e927d2..622608384 100644 --- a/kun/src/contracts/items.ts +++ b/kun/src/contracts/items.ts @@ -51,10 +51,14 @@ export const UserFileReferenceSchema = z.object({ }) export type UserFileReference = z.infer +export const UserMessageSource = z.enum(['background_shell']) +export type UserMessageSource = z.infer + export const UserTurnItem = TurnItemBase.extend({ kind: z.literal('user_message'), text: z.string(), displayText: z.string().optional(), + messageSource: UserMessageSource.optional(), attachmentIds: z.array(z.string().min(1)).optional(), fileReferences: z.array(UserFileReferenceSchema).optional(), workspaceCheckpointId: z.string().min(1).optional() diff --git a/kun/src/contracts/turns.ts b/kun/src/contracts/turns.ts index c908fb4c6..1e5d14516 100644 --- a/kun/src/contracts/turns.ts +++ b/kun/src/contracts/turns.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { TurnItem, UserFileReferenceSchema } from './items.js' +import { TurnItem, UserFileReferenceSchema, UserMessageSource } from './items.js' import { isGuiPlanRelativePath } from '../shared/gui-plan.js' import { ApprovalPolicySchema, SandboxModeSchema } from './policy.js' @@ -91,6 +91,7 @@ export type Turn = z.infer export const StartTurnRequest = z.object({ prompt: z.string().min(1), displayText: z.string().optional(), + messageSource: UserMessageSource.optional(), model: z.string().optional(), reasoningEffort: TurnReasoningEffortSchema.optional(), approvalPolicy: ApprovalPolicySchema.optional(), @@ -135,7 +136,9 @@ export const StartTurnResponse = z.object({ export type StartTurnResponse = z.infer export const SteerTurnRequest = z.object({ - text: z.string().min(1) + text: z.string().min(1), + displayText: z.string().optional(), + messageSource: UserMessageSource.optional() }) export type SteerTurnRequest = z.infer diff --git a/kun/src/domain/item.ts b/kun/src/domain/item.ts index 35f12ffbe..3263d8813 100644 --- a/kun/src/domain/item.ts +++ b/kun/src/domain/item.ts @@ -9,6 +9,7 @@ export function makeUserItem(input: { threadId: string text: string displayText?: string + messageSource?: 'background_shell' attachmentIds?: string[] fileReferences?: Array<{ path: string; relativePath: string; name: string; kind?: 'file' | 'directory' }> workspaceCheckpointId?: string @@ -34,6 +35,7 @@ export function makeUserItem(input: { kind: 'user_message', text: input.text, ...(displayText && displayText !== input.text ? { displayText } : {}), + ...(input.messageSource ? { messageSource: input.messageSource } : {}), ...(attachmentIds?.length ? { attachmentIds } : {}), ...(fileReferences?.length ? { fileReferences } : {}), ...(input.workspaceCheckpointId ? { workspaceCheckpointId: input.workspaceCheckpointId } : {}) diff --git a/kun/src/loop/agent-loop.ts b/kun/src/loop/agent-loop.ts index acd38a82b..b9ff86220 100644 --- a/kun/src/loop/agent-loop.ts +++ b/kun/src/loop/agent-loop.ts @@ -672,6 +672,8 @@ export type AgentLoopOptions = { skillRuntime?: SkillRuntime attachmentStore?: AttachmentStore memoryStore?: MemoryStore + /** Kun runtime data root for sandbox-safe background shell output reads. */ + runtimeDataDir?: string tokenEconomy?: TokenEconomyConfig contextCompaction?: ContextCompactionConfig /** Internal-LLM role model routing (smallModel slot + title/summary/codeReview overrides). */ @@ -1269,18 +1271,15 @@ export class AgentLoop { private async drainSteering(threadId: string, turnId: string, signal: AbortSignal): Promise { const pending = this.opts.steering.drain() if (pending.length === 0) return - for (const text of pending) { - const item: TurnItem = { + for (const entry of pending) { + const item = makeUserItem({ id: this.opts.ids.next('item_steered'), turnId, threadId, - role: 'user', - status: 'completed', - createdAt: this.opts.nowIso(), - finishedAt: this.opts.nowIso(), - kind: 'user_message', - text - } + text: entry.text, + ...(entry.displayText ? { displayText: entry.displayText } : {}), + ...(entry.messageSource ? { messageSource: entry.messageSource } : {}) + }) await this.opts.turns.applyItem(threadId, item) } void signal @@ -1466,6 +1465,7 @@ export class AgentLoop { ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}), approvalPolicy, sandboxMode, + ...(this.opts.runtimeDataDir ? { runtimeDataDir: this.opts.runtimeDataDir } : {}), abortSignal: signal, awaitApproval: async () => 'allow', ...(userInputDisabled @@ -2244,6 +2244,7 @@ export class AgentLoop { ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}), approvalPolicy: input.approvalPolicy, sandboxMode: input.sandboxMode, + ...(this.opts.runtimeDataDir ? { runtimeDataDir: this.opts.runtimeDataDir } : {}), abortSignal: input.signal, awaitApproval: async (approval) => { await this.opts.events.record({ diff --git a/kun/src/loop/steering-queue.ts b/kun/src/loop/steering-queue.ts index da1959e72..928ba7626 100644 --- a/kun/src/loop/steering-queue.ts +++ b/kun/src/loop/steering-queue.ts @@ -4,8 +4,14 @@ * as user inputs at the next safe loop boundary. The queue is cleared * on turn completion or interruption. */ +export type SteeringEntry = { + text: string + displayText?: string + messageSource?: 'background_shell' +} + export class SteeringQueue { - private readonly buffer: string[] = [] + private readonly buffer: SteeringEntry[] = [] private turnId: string | null = null setTurn(turnId: string | null): void { @@ -15,14 +21,18 @@ export class SteeringQueue { this.turnId = turnId } - enqueue(turnId: string, text: string): void { + enqueue(turnId: string, entry: SteeringEntry): void { if (this.turnId !== turnId) { this.buffer.length = 0 this.turnId = turnId } - const trimmed = text.trim() - if (!trimmed) return - this.buffer.push(trimmed) + const text = entry.text.trim() + if (!text) return + this.buffer.push({ + text, + ...(entry.displayText?.trim() ? { displayText: entry.displayText.trim() } : {}), + ...(entry.messageSource ? { messageSource: entry.messageSource } : {}) + }) } /** @@ -30,7 +40,7 @@ export class SteeringQueue { * this at safe boundaries (after a model response, before the next * model request). Returns an empty array when nothing is pending. */ - drain(): string[] { + drain(): SteeringEntry[] { if (this.buffer.length === 0) return [] const out = [...this.buffer] this.buffer.length = 0 @@ -41,7 +51,7 @@ export class SteeringQueue { * Peek at the queued text without removing it. Used by the UI to * show pending steering in a "pending injection" indicator. */ - peek(): string[] { + peek(): SteeringEntry[] { return [...this.buffer] } diff --git a/kun/src/ports/tool-host.ts b/kun/src/ports/tool-host.ts index 2dd1669be..95765cc56 100644 --- a/kun/src/ports/tool-host.ts +++ b/kun/src/ports/tool-host.ts @@ -91,6 +91,8 @@ export type ToolHostContext = { approvalPolicy: ApprovalPolicy /** Filesystem/command sandbox selected for this turn. Defaults at execution time for old callers. */ sandboxMode?: SandboxMode + /** Kun runtime data root; used to allow sandbox-safe reads of background shell output files. */ + runtimeDataDir?: string abortSignal: AbortSignal /** Resolves a pending approval with the user's decision. */ awaitApproval: (approval: ApprovalRequest) => Promise<'allow' | 'deny'> diff --git a/kun/src/server/routes/background-shells.ts b/kun/src/server/routes/background-shells.ts new file mode 100644 index 000000000..0c519f357 --- /dev/null +++ b/kun/src/server/routes/background-shells.ts @@ -0,0 +1,42 @@ +import type { BackgroundShellRuntime } from '../../services/background-shell-runtime.js' +import { jsonResponse, type JsonResponse } from '../response.js' +import { ERRORS } from './runtime-error.js' + +export async function backgroundShellList( + runtime: BackgroundShellRuntime | undefined, + request: Request +): Promise { + if (!runtime) { + return jsonResponse({ sessions: [], running: 0 }) + } + const url = new URL(request.url) + const threadId = url.searchParams.get('thread_id') ?? undefined + const sessions = runtime.listSessions(threadId) + return jsonResponse({ + sessions, + running: sessions.filter((session) => session.status === 'running').length + }) +} + +export async function backgroundShellGet( + runtime: BackgroundShellRuntime | undefined, + sessionId: string +): Promise { + if (!runtime) return ERRORS.unavailable('background shell runtime is unavailable') + if (!sessionId.trim()) return ERRORS.validation('sessionId is required', []) + const session = runtime.getSession(sessionId) + if (!session) return ERRORS.notFound(`background shell not found: ${sessionId}`) + return jsonResponse(session) +} + +export async function backgroundShellStop( + runtime: BackgroundShellRuntime | undefined, + sessionId: string +): Promise { + if (!runtime) return ERRORS.unavailable('background shell runtime is unavailable') + if (!sessionId.trim()) return ERRORS.validation('sessionId is required', []) + const stopped = await runtime.stopSession(sessionId) + return jsonResponse({ sessionId, stopped }) +} + +export { ERRORS as BackgroundShellErrors } diff --git a/kun/src/server/routes/index.ts b/kun/src/server/routes/index.ts index 11cf12a17..51d59671e 100644 --- a/kun/src/server/routes/index.ts +++ b/kun/src/server/routes/index.ts @@ -51,6 +51,11 @@ import { delegationDiagnostics, delegationProfiles } from './delegation.js' +import { + backgroundShellGet, + backgroundShellList, + backgroundShellStop +} from './background-shells.js' import { isAuthorized, bearerToken } from '../auth.js' import { ERRORS } from './runtime-error.js' import type { ServerRuntime } from './server-runtime.js' @@ -150,6 +155,18 @@ export function buildRouter(runtime: ServerRuntime): Router { if (!authorize(request, runtime)) return ERRORS.unauthorized() return delegationAbort(runtime.delegationRuntime, ctx.params.childId) }) + router.add('GET', '/v1/background-shells', async (request) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return backgroundShellList(runtime.backgroundShellRuntime, request) + }) + router.add('GET', '/v1/background-shells/:sessionId', async (request, ctx) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return backgroundShellGet(runtime.backgroundShellRuntime, ctx.params.sessionId) + }) + router.add('POST', '/v1/background-shells/:sessionId/stop', async (request, ctx) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return backgroundShellStop(runtime.backgroundShellRuntime, ctx.params.sessionId) + }) router.add('GET', '/v1/workspace/status', async (request) => { if (!authorize(request, runtime)) return ERRORS.unauthorized() const url = new URL(request.url) diff --git a/kun/src/server/routes/server-runtime.ts b/kun/src/server/routes/server-runtime.ts index 74ca31329..b7f905f96 100644 --- a/kun/src/server/routes/server-runtime.ts +++ b/kun/src/server/routes/server-runtime.ts @@ -27,6 +27,7 @@ import type { MemoryDiagnostics } from '../../contracts/memory.js' import type { MemoryStore } from '../../memory/memory-store.js' import type { ReviewTarget } from '../../contracts/review.js' import type { DelegationRuntime } from '../../delegation/delegation-runtime.js' +import type { BackgroundShellRuntime } from '../../services/background-shell-runtime.js' import type { ModelClient } from '../../ports/model-client.js' import type { RolesConfig } from '../../config/kun-config.js' import type { ImmutablePrefix } from '../../cache/immutable-prefix.js' @@ -71,6 +72,7 @@ export type ServerRuntime = { * listing. Optional so test scaffolds can omit it. */ delegationRuntime?: DelegationRuntime + backgroundShellRuntime?: BackgroundShellRuntime /** * Default ModelClient + model id for one-shot completions outside the * agent loop (e.g. AI-generated subagent profiles). Optional so test diff --git a/kun/src/server/routes/turns.ts b/kun/src/server/routes/turns.ts index 7b438f829..af113d5e4 100644 --- a/kun/src/server/routes/turns.ts +++ b/kun/src/server/routes/turns.ts @@ -53,7 +53,13 @@ export async function steerTurn( if (!parsed.success) { return ERRORS.validation('invalid steer turn body', parsed.error.issues) } - await turns.steerTurn({ threadId, turnId, text: parsed.data.text }) + await turns.steerTurn({ + threadId, + turnId, + text: parsed.data.text, + ...(parsed.data.displayText ? { displayText: parsed.data.displayText } : {}), + ...(parsed.data.messageSource ? { messageSource: parsed.data.messageSource } : {}) + }) return jsonResponse({ ok: true }) } diff --git a/kun/src/server/runtime-factory.ts b/kun/src/server/runtime-factory.ts index a493fdfb7..399c1363e 100644 --- a/kun/src/server/runtime-factory.ts +++ b/kun/src/server/runtime-factory.ts @@ -78,6 +78,10 @@ import { resolveConfiguredHooks, type HooksConfig } from '../hooks/hook-config.j import { FileMemoryStore } from '../memory/memory-store.js' import { DelegationRuntime, FileDelegationStore } from '../delegation/delegation-runtime.js' import { createChildAgentExecutor } from '../delegation/child-agent-executor.js' +import { BackgroundShellRuntime } from '../services/background-shell-runtime.js' +import { stopBashSessionById, createBashLocalTool } from '../adapters/tool/builtin-bash-tool.js' +import { createBackgroundShellTool } from '../adapters/tool/background-shell-tool.js' +import type { LocalTool } from '../adapters/tool/local-tool-host.js' export type KunServeRuntimeOptions = { host: string @@ -237,6 +241,28 @@ export async function createKunServeRuntime( ids, nowIso }) + const backgroundShellRuntime = new BackgroundShellRuntime({ + events, + threadStore, + turns: turnService, + nowIso + }) + backgroundShellRuntime.bindStopHandler(stopBashSessionById) + const backgroundShellTool = createBackgroundShellTool({ + listBackgroundSessions: (threadId) => backgroundShellRuntime.listSessions(threadId) + }) + const withBackgroundShellTools = (tools: LocalTool[]): LocalTool[] => { + const mapped = tools.map((tool) => + tool.name === 'bash' + ? createBashLocalTool({ + backgroundShell: backgroundShellRuntime.bashHooks(), + backgroundShellDataDir: input.dataDir + }) + : tool + ) + const withoutBackgroundShell = mapped.filter((tool) => tool.name !== 'background_shell') + return [...withoutBackgroundShell, backgroundShellTool] + } const reviewService = new ReviewService({ threadStore, turns: turnService, @@ -281,7 +307,7 @@ export async function createKunServeRuntime( kind: 'built-in' as const, enabled: true, available: true, - tools: buildDefaultLocalTools() + tools: withBackgroundShellTools(buildDefaultLocalTools()) }, ...mcpProviders.providers, ...webProviders.providers, @@ -496,6 +522,7 @@ export async function createKunServeRuntime( ...(resolvedHooks.length ? { hooks: resolvedHooks } : {}), ...(attachmentStore ? { attachmentStore } : {}), ...(memoryStore ? { memoryStore } : {}), + runtimeDataDir: options.dataDir, onPlanWritten: async ({ threadId, planId, relativePath, markdown }) => { await threadService.syncTodosFromPlan(threadId, { planId, @@ -505,6 +532,9 @@ export async function createKunServeRuntime( }) } }) + backgroundShellRuntime.bindAgentLoop({ + runTurn: (threadId, turnId) => loop.runTurn(threadId, turnId) + }) const startedAt = options.startedAt ?? nowIso() return { threadService, @@ -522,6 +552,7 @@ export async function createKunServeRuntime( ...(attachmentStore ? { attachmentStore } : {}), ...(memoryStore ? { memoryStore } : {}), ...(delegationRuntime ? { delegationRuntime } : {}), + backgroundShellRuntime, modelClient, defaultModel: options.model, ...(options.roles ? { roles: options.roles } : {}), diff --git a/kun/src/services/background-shell-notice.ts b/kun/src/services/background-shell-notice.ts new file mode 100644 index 000000000..aabdb3728 --- /dev/null +++ b/kun/src/services/background-shell-notice.ts @@ -0,0 +1,74 @@ +import type { BackgroundShellRecord } from '../contracts/background-shell.js' + +export type BackgroundShellCompletionNotice = { + sessionId: string + command: string + exitCode: number + outputPreview: string + hint: string +} + +function escapeXml(text: string): string { + return text + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') +} + +function unescapeXml(text: string): string { + return text + .replace(/"/g, '"') + .replace(/>/g, '>') + .replace(/</g, '<') + .replace(/&/g, '&') +} + +function summarizeOutput(output: string, max = 400): string { + const trimmed = output.trim() + if (trimmed.length <= max) return trimmed + return `${trimmed.slice(0, max)}…` +} + +function readXmlTag(xml: string, tag: string): string | null { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)) + if (!match) return null + return unescapeXml(match[1].trim()) +} + +export function formatBackgroundShellCompletionNotice(record: BackgroundShellRecord): string { + const sessionId = record.id + const outputPreview = summarizeOutput(record.output) || '(empty)' + const hint = record.outputFilePath + ? `Full output is saved at ${record.outputFilePath}. Use background_shell action="read" with session_id="${sessionId}" for a fresh summary.` + : `Use background_shell action="read" with session_id="${sessionId}" to inspect the full output.` + const lines = [ + '', + `${escapeXml(sessionId)}`, + `${escapeXml(record.command)}`, + `${record.exitCode ?? 0}`, + `${escapeXml(outputPreview)}`, + ...(record.outputFilePath ? [`${escapeXml(record.outputFilePath)}`] : []), + `${escapeXml(hint)}`, + '' + ] + return lines.join('\n') +} + +export function parseBackgroundShellCompletionNotice(text: string): BackgroundShellCompletionNotice | null { + const trimmed = text.trim() + if (!trimmed.includes('')) return null + const sessionId = readXmlTag(trimmed, 'session_id') + const command = readXmlTag(trimmed, 'command') + const exitCodeRaw = readXmlTag(trimmed, 'exit_code') + const outputPreview = readXmlTag(trimmed, 'output_preview') + const hint = readXmlTag(trimmed, 'hint') + if (!sessionId || !command || exitCodeRaw === null || outputPreview === null || !hint) return null + const exitCode = Number.parseInt(exitCodeRaw, 10) + if (!Number.isFinite(exitCode)) return null + return { sessionId, command, exitCode, outputPreview, hint } +} + +export function backgroundShellNoticeDisplayText(sessionId: string): string { + return `Background shell ${sessionId} completed` +} diff --git a/kun/src/services/background-shell-output.ts b/kun/src/services/background-shell-output.ts new file mode 100644 index 000000000..66fd6a0cd --- /dev/null +++ b/kun/src/services/background-shell-output.ts @@ -0,0 +1,137 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { createWriteStream, type WriteStream } from 'node:fs' +import { isAbsolute, join, relative, resolve, sep } from 'node:path' + +/** Shared per-thread folder for all background shell logs (alongside messages.jsonl). */ +export const BACKGROUND_SHELL_OUTPUT_SUBDIR = 'background-shells' +export const DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS = 10_000 +export const BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE = + '\n[background shell output truncated; use output_file for the full log]' + +export type BackgroundShellOutputPaths = { + outputDir: string + outputFilePath: string +} + +export type BackgroundShellOutputSummary = { + summary: string + truncated: boolean + totalChars: number +} + +export function resolveBackgroundShellOutputDir(dataDir: string, threadId: string): string { + return join(resolve(dataDir, 'threads', threadId), BACKGROUND_SHELL_OUTPUT_SUBDIR) +} + +export function resolveBackgroundShellOutputPaths( + dataDir: string, + threadId: string, + sessionId: string +): BackgroundShellOutputPaths { + const outputDir = resolveBackgroundShellOutputDir(dataDir, threadId) + const outputFilePath = resolve(outputDir, `${sessionId}.output`) + return { outputDir, outputFilePath } +} + +export function isBackgroundShellOutputPath( + absolutePath: string, + options: { runtimeDataDir?: string; threadId?: string } +): boolean { + const dataDir = options.runtimeDataDir?.trim() + if (!dataDir) return false + const normalized = resolve(absolutePath) + const threadId = options.threadId?.trim() + if (threadId) { + const dir = resolveBackgroundShellOutputDir(dataDir, threadId) + if (!normalized.startsWith(`${dir}${sep}`) && normalized !== dir) return false + return normalized.endsWith('.output') + } + const threadsRoot = resolve(dataDir, 'threads') + const rel = relative(threadsRoot, normalized) + if (!rel || rel.startsWith('..') || isAbsolute(rel)) return false + const parts = rel.split(sep) + return parts.length === 3 && parts[1] === BACKGROUND_SHELL_OUTPUT_SUBDIR && parts[2]?.endsWith('.output') === true +} + +export function summarizeBackgroundShellOutput( + fullOutput: string, + maxChars = DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS +): BackgroundShellOutputSummary { + const chars = [...fullOutput] + const totalChars = chars.length + if (totalChars <= maxChars) { + return { summary: fullOutput, truncated: false, totalChars } + } + const noticeChars = [...BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE].length + const bodyBudget = Math.max(1, maxChars - noticeChars) + const body = chars.slice(-bodyBudget).join('') + return { + summary: `${body}${BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE}`, + truncated: true, + totalChars + } +} + +export async function readBackgroundShellOutputSummary( + outputFilePath: string, + maxChars = DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS +): Promise { + try { + const full = await readFile(outputFilePath, 'utf-8') + return summarizeBackgroundShellOutput(full, maxChars) + } catch { + return { summary: '', truncated: false, totalChars: 0 } + } +} + +export class BackgroundShellOutputWriter { + private stream: WriteStream | undefined + private closed = false + + readonly paths: BackgroundShellOutputPaths + + constructor(dataDir: string, threadId: string, sessionId: string) { + this.paths = resolveBackgroundShellOutputPaths(dataDir, threadId, sessionId) + } + + async open(): Promise { + await mkdir(this.paths.outputDir, { recursive: true }) + await writeFile(this.paths.outputFilePath, '', 'utf-8') + this.stream = createWriteStream(this.paths.outputFilePath, { flags: 'a' }) + } + + append(chunk: Buffer | string): void { + if (this.closed) return + if (!this.stream) { + throw new Error('background shell output writer is not open') + } + this.stream.write(chunk) + } + + async close(): Promise { + if (this.closed) return + this.closed = true + if (!this.stream) { + await mkdir(this.paths.outputDir, { recursive: true }) + await writeFile(this.paths.outputFilePath, '', 'utf-8') + return + } + const stream = this.stream + this.stream = undefined + await new Promise((resolvePromise, reject) => { + stream.once('finish', resolvePromise) + stream.once('error', reject) + stream.end() + }) + } + + async buildReturnFields( + maxChars = DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS + ): Promise { + const summary = await readBackgroundShellOutputSummary(this.paths.outputFilePath, maxChars) + return { + ...summary, + output_file: this.paths.outputFilePath + } + } +} diff --git a/kun/src/services/background-shell-runtime.ts b/kun/src/services/background-shell-runtime.ts new file mode 100644 index 000000000..63ad93eab --- /dev/null +++ b/kun/src/services/background-shell-runtime.ts @@ -0,0 +1,221 @@ +import type { BackgroundShellRecord, BackgroundShellStatus } from '../contracts/background-shell.js' +import type { RuntimeEventRecorder } from './runtime-event-recorder.js' +import type { ThreadStore } from '../ports/thread-store.js' +import type { TurnService } from './turn-service.js' +import type { BackgroundShellHooks } from '../adapters/tool/builtin-tool-types.js' +import { + backgroundShellNoticeDisplayText, + formatBackgroundShellCompletionNotice +} from './background-shell-notice.js' + +export type BackgroundShellRuntimeDeps = { + events: RuntimeEventRecorder + threadStore: ThreadStore + turns: TurnService + nowIso: () => string +} + +type RunTurnFn = (threadId: string, turnId: string) => Promise + +export class BackgroundShellRuntime { + private readonly sessions = new Map() + private readonly detachedIds = new Set() + private runTurn: RunTurnFn | null = null + + constructor(private readonly deps: BackgroundShellRuntimeDeps) {} + + bindAgentLoop(input: { runTurn: RunTurnFn }): void { + this.runTurn = input.runTurn + } + + bashHooks(): BackgroundShellHooks { + return { + onSessionStarted: (record) => this.handleSessionStarted(record), + onSessionUpdated: (record) => this.handleSessionUpdated(record), + onSessionSettled: (record) => this.handleSessionSettled(record), + isDetachedSession: (sessionId) => this.detachedIds.has(sessionId) + } + } + + listSessions(threadId?: string): BackgroundShellRecord[] { + const all = [...this.sessions.values()] + const filtered = threadId ? all.filter((session) => session.threadId === threadId) : all + return filtered.sort((a, b) => b.startedAt.localeCompare(a.startedAt)) + } + + getSession(sessionId: string): BackgroundShellRecord | null { + return this.sessions.get(sessionId) ?? null + } + + private stopHandler: ((sessionId: string) => Promise) | null = null + + bindStopHandler(handler: (sessionId: string) => Promise): void { + this.stopHandler = handler + } + + async stopSession(sessionId: string): Promise { + if (!this.stopHandler) return false + return this.stopHandler(sessionId) + } + + markDetached(sessionId: string): void { + this.detachedIds.add(sessionId) + } + + unmarkDetached(sessionId: string): void { + this.detachedIds.delete(sessionId) + } + + upsertSession(record: BackgroundShellRecord): void { + this.sessions.set(record.id, record) + } + + removeSession(sessionId: string): void { + this.sessions.delete(sessionId) + this.detachedIds.delete(sessionId) + } + + private sessionEventOutput(record: BackgroundShellRecord): { + output: string + outputTruncated?: true + outputFilePath?: string + } { + return { + output: record.output, + ...(record.outputTruncated ? { outputTruncated: true as const } : {}), + ...(record.outputFilePath ? { outputFilePath: record.outputFilePath } : {}) + } + } + + private async handleSessionStarted(record: BackgroundShellRecord): Promise { + this.sessions.set(record.id, record) + if (record.detached) this.detachedIds.add(record.id) + await this.deps.events.record({ + kind: 'bash_session_started', + threadId: record.threadId, + turnId: record.turnId, + sessionId: record.id, + command: record.command, + cwd: record.cwd, + shell: record.shell, + status: record.status, + startedAt: record.startedAt, + detached: record.detached, + ...this.sessionEventOutput(record) + }) + } + + private async handleSessionUpdated(record: BackgroundShellRecord): Promise { + this.sessions.set(record.id, record) + await this.deps.events.record({ + kind: 'bash_session_updated', + threadId: record.threadId, + turnId: record.turnId, + sessionId: record.id, + command: record.command, + cwd: record.cwd, + shell: record.shell, + status: record.status, + startedAt: record.startedAt, + ...(record.finishedAt ? { finishedAt: record.finishedAt } : {}), + exitCode: record.exitCode, + detached: record.detached, + ...this.sessionEventOutput(record), + ...(record.error ? { error: record.error } : {}) + }) + } + + private async handleSessionSettled(record: BackgroundShellRecord): Promise { + this.sessions.set(record.id, record) + await this.deps.events.record({ + kind: 'bash_session_completed', + threadId: record.threadId, + turnId: record.turnId, + sessionId: record.id, + command: record.command, + cwd: record.cwd, + shell: record.shell, + status: record.status, + startedAt: record.startedAt, + ...(record.finishedAt ? { finishedAt: record.finishedAt } : {}), + exitCode: record.exitCode, + detached: record.detached, + ...this.sessionEventOutput(record), + ...(record.error ? { error: record.error } : {}) + }) + if (record.detached && record.status === 'completed' && record.exitCode === 0) { + await this.notifyAgent(record) + } + if (record.status !== 'running') { + this.detachedIds.delete(record.id) + } + } + + private async notifyAgent(record: BackgroundShellRecord): Promise { + const thread = await this.deps.threadStore.get(record.threadId) + if (!thread) return + const notice = formatBackgroundShellCompletionNotice(record) + const displayText = backgroundShellNoticeDisplayText(record.id) + const noticeMeta = { + displayText, + messageSource: 'background_shell' as const + } + if (thread.status === 'running') { + const runningTurn = [...thread.turns].reverse().find((turn) => turn.status === 'running') + if (runningTurn) { + await this.deps.turns.steerTurn({ + threadId: record.threadId, + turnId: runningTurn.id, + text: notice, + ...noticeMeta + }) + return + } + } + if (!this.runTurn) return + const started = await this.deps.turns.startTurn({ + threadId: record.threadId, + request: { + prompt: notice, + ...noticeMeta + } + }) + void this.runTurn(record.threadId, started.turnId) + } +} + +export function toBackgroundShellRecord(input: { + id: string + threadId: string + turnId: string + command: string + cwd: string + shell: string + status: BackgroundShellStatus + startedAt: string + finishedAt?: string + exitCode: number | null + output: string + outputTruncated?: boolean + outputFilePath?: string + error?: string + detached: boolean +}): BackgroundShellRecord { + return { + id: input.id, + threadId: input.threadId, + turnId: input.turnId, + command: input.command, + cwd: input.cwd, + shell: input.shell, + status: input.status, + startedAt: input.startedAt, + ...(input.finishedAt ? { finishedAt: input.finishedAt } : {}), + exitCode: input.exitCode, + output: input.output, + ...(input.outputTruncated ? { outputTruncated: true } : {}), + ...(input.outputFilePath ? { outputFilePath: input.outputFilePath } : {}), + ...(input.error ? { error: input.error } : {}), + detached: input.detached + } +} diff --git a/kun/src/services/turn-service.ts b/kun/src/services/turn-service.ts index 8d9c1eed6..4adfdf4ef 100644 --- a/kun/src/services/turn-service.ts +++ b/kun/src/services/turn-service.ts @@ -88,6 +88,7 @@ export class TurnService { threadId: input.threadId, text: input.request.prompt, displayText: input.request.displayText, + messageSource: input.request.messageSource, attachmentIds: input.request.attachmentIds ?? [], fileReferences: input.request.fileReferences ?? [], workspaceCheckpointId: input.request.workspaceCheckpointId @@ -156,13 +157,25 @@ export class TurnService { } } - async steerTurn(input: { threadId: string; turnId: string; text: string }): Promise { - this.deps.steering.enqueue(input.turnId, input.text) + async steerTurn(input: { + threadId: string + turnId: string + text: string + displayText?: string + messageSource?: 'background_shell' + }): Promise { + this.deps.steering.enqueue(input.turnId, { + text: input.text, + ...(input.displayText ? { displayText: input.displayText } : {}), + ...(input.messageSource ? { messageSource: input.messageSource } : {}) + }) await this.deps.events.record({ kind: 'turn_steered', threadId: input.threadId, turnId: input.turnId, - text: input.text + text: input.text, + ...(input.displayText ? { displayText: input.displayText } : {}), + ...(input.messageSource ? { messageSource: input.messageSource } : {}) }) } diff --git a/kun/tests/background-shell-notice.test.ts b/kun/tests/background-shell-notice.test.ts new file mode 100644 index 000000000..32b3d7c5e --- /dev/null +++ b/kun/tests/background-shell-notice.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it } from 'vitest' +import { + backgroundShellNoticeDisplayText, + formatBackgroundShellCompletionNotice, + parseBackgroundShellCompletionNotice +} from '../src/services/background-shell-notice.js' + +describe('background-shell-notice', () => { + it('formats and parses completion notices as xml', () => { + const xml = formatBackgroundShellCompletionNotice({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm run build', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'build ok', + detached: true + }) + expect(xml).toContain('') + expect(xml).toContain('abcd1234') + expect(xml).toContain('npm run build') + expect(parseBackgroundShellCompletionNotice(xml)).toEqual({ + sessionId: 'abcd1234', + command: 'npm run build', + exitCode: 0, + outputPreview: 'build ok', + hint: expect.stringContaining('background_shell action="read"') + }) + }) + + it('escapes xml characters in command and output preview', () => { + const xml = formatBackgroundShellCompletionNotice({ + id: 'sess1', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'echo "&"', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: 0, + output: '', + detached: true + }) + expect(xml).toContain('echo "<tag>&"') + expect(parseBackgroundShellCompletionNotice(xml)?.command).toBe('echo "&"') + expect(parseBackgroundShellCompletionNotice(xml)?.outputPreview).toBe('') + }) + + it('includes the output file path in completion notices when available', () => { + const xml = formatBackgroundShellCompletionNotice({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm run build', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: 0, + output: 'ok', + outputFilePath: '/data/threads/thr_1/background-shells/abcd1234.output', + detached: true + }) + expect(xml).toContain('/data/threads/thr_1/background-shells/abcd1234.output') + expect(xml).not.toContain('') + }) + + it('builds a short display label for the renderer', () => { + expect(backgroundShellNoticeDisplayText('abcd1234')).toBe('Background shell abcd1234 completed') + }) +}) diff --git a/kun/tests/background-shell-output.test.ts b/kun/tests/background-shell-output.test.ts new file mode 100644 index 000000000..30019cd9b --- /dev/null +++ b/kun/tests/background-shell-output.test.ts @@ -0,0 +1,79 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join, resolve } from 'node:path' +import { afterEach, describe, expect, it } from 'vitest' +import { + BACKGROUND_SHELL_OUTPUT_SUBDIR, + BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE, + BackgroundShellOutputWriter, + DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS, + isBackgroundShellOutputPath, + readBackgroundShellOutputSummary, + resolveBackgroundShellOutputPaths, + summarizeBackgroundShellOutput +} from '../src/services/background-shell-output.js' + +describe('background-shell-output', () => { + let tempDir = '' + + afterEach(async () => { + if (tempDir) await rm(tempDir, { recursive: true, force: true }) + }) + + it('stores all session logs under one thread-scoped folder', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const first = resolveBackgroundShellOutputPaths(tempDir, 'thr_1', 'aaaa1111') + const second = resolveBackgroundShellOutputPaths(tempDir, 'thr_1', 'bbbb2222') + expect(first.outputDir).toBe(second.outputDir) + expect(first.outputDir).toContain(`${BACKGROUND_SHELL_OUTPUT_SUBDIR}`) + expect(first.outputFilePath.endsWith('aaaa1111.output')).toBe(true) + expect(second.outputFilePath.endsWith('bbbb2222.output')).toBe(true) + expect(resolve(first.outputFilePath)).toBe(first.outputFilePath) + }) + + it('appends a truncation notice to summarized output', () => { + const full = 'x'.repeat(DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS + 50) + const summary = summarizeBackgroundShellOutput(full) + expect(summary.truncated).toBe(true) + expect(summary.summary.endsWith(BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE)).toBe(true) + expect([...summary.summary].length).toBeLessThanOrEqual(DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS) + }) + + it('always creates an output file and summarizes from disk', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const writer = new BackgroundShellOutputWriter(tempDir, 'thr_1', 'sess1234') + await writer.open() + writer.append('hello\n') + writer.append('x'.repeat(DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS + 50)) + const live = await writer.buildReturnFields() + expect(live.output_file).toContain('sess1234.output') + expect(live.truncated).toBe(true) + expect(live.summary).toContain(BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE) + await writer.close() + const persisted = await readFile(live.output_file, 'utf-8') + expect(persisted.startsWith('hello\n')).toBe(true) + const summary = await readBackgroundShellOutputSummary(live.output_file) + expect(summary.truncated).toBe(true) + }) + + it('creates an empty output file even when no bytes were written', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const writer = new BackgroundShellOutputWriter(tempDir, 'thr_1', 'empty01') + await writer.open() + await writer.close() + const fields = await writer.buildReturnFields() + expect(await readFile(fields.output_file, 'utf-8')).toBe('') + expect(summarizeBackgroundShellOutput('').truncated).toBe(false) + }) + + it('recognizes background shell output paths for sandbox read bypass', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const { outputFilePath } = resolveBackgroundShellOutputPaths(tempDir, 'thr_1', 'sess1234') + expect( + isBackgroundShellOutputPath(outputFilePath, { runtimeDataDir: tempDir, threadId: 'thr_1' }) + ).toBe(true) + expect(isBackgroundShellOutputPath('/tmp/other.log', { runtimeDataDir: tempDir, threadId: 'thr_1' })).toBe( + false + ) + }) +}) diff --git a/kun/tests/background-shell-runtime.test.ts b/kun/tests/background-shell-runtime.test.ts new file mode 100644 index 000000000..54bc59a2b --- /dev/null +++ b/kun/tests/background-shell-runtime.test.ts @@ -0,0 +1,89 @@ +import { describe, expect, it, vi } from 'vitest' +import { BackgroundShellRuntime } from '../src/services/background-shell-runtime.js' + +describe('BackgroundShellRuntime', () => { + it('steers a running turn when a detached shell completes successfully', async () => { + const steerTurn = vi.fn(async () => undefined) + const startTurn = vi.fn(async () => ({ threadId: 'thr_1', turnId: 'turn_new', userMessageItemId: 'item_1' })) + const runTurn = vi.fn(async () => undefined) + const runtime = new BackgroundShellRuntime({ + events: { record: vi.fn(async () => undefined) }, + threadStore: { + get: vi.fn(async () => ({ + id: 'thr_1', + status: 'running', + turns: [{ id: 'turn_1', status: 'running' }] + })) + }, + turns: { steerTurn, startTurn }, + nowIso: () => '2026-01-01T00:00:00.000Z' + }) + runtime.bindAgentLoop({ runTurn }) + await runtime.bashHooks().onSessionSettled?.({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm test', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'ok', + detached: true + }) + expect(steerTurn).toHaveBeenCalledWith({ + threadId: 'thr_1', + turnId: 'turn_1', + text: expect.stringContaining('abcd1234'), + displayText: 'Background shell abcd1234 completed', + messageSource: 'background_shell' + }) + expect(startTurn).not.toHaveBeenCalled() + expect(runTurn).not.toHaveBeenCalled() + }) + + it('starts a new turn with messageSource when the thread is idle', async () => { + const steerTurn = vi.fn(async () => undefined) + const startTurn = vi.fn(async () => ({ threadId: 'thr_1', turnId: 'turn_new', userMessageItemId: 'item_1' })) + const runTurn = vi.fn(async () => undefined) + const runtime = new BackgroundShellRuntime({ + events: { record: vi.fn(async () => undefined) }, + threadStore: { + get: vi.fn(async () => ({ + id: 'thr_1', + status: 'idle', + turns: [{ id: 'turn_1', status: 'completed' }] + })) + }, + turns: { steerTurn, startTurn }, + nowIso: () => '2026-01-01T00:00:00.000Z' + }) + runtime.bindAgentLoop({ runTurn }) + await runtime.bashHooks().onSessionSettled?.({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm test', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'ok', + detached: true + }) + expect(startTurn).toHaveBeenCalledWith({ + threadId: 'thr_1', + request: { + prompt: expect.stringContaining(''), + displayText: 'Background shell abcd1234 completed', + messageSource: 'background_shell' + } + }) + expect(runTurn).toHaveBeenCalledWith('thr_1', 'turn_new') + expect(steerTurn).not.toHaveBeenCalled() + }) +}) diff --git a/kun/tests/builtin-tools.test.ts b/kun/tests/builtin-tools.test.ts index a5d3a5716..6a8678e12 100644 --- a/kun/tests/builtin-tools.test.ts +++ b/kun/tests/builtin-tools.test.ts @@ -43,6 +43,7 @@ import { createLsTool, createLsToolDefinition } from '../src/adapters/tool/builtin-tools.js' +import { createBackgroundShellTool } from '../src/adapters/tool/background-shell-tool.js' import { createReadTool as createReadToolFromModule } from '../src/adapters/tool/read.js' import { createBashTool as createBashToolFromModule } from '../src/adapters/tool/bash.js' import { createEditTool as createEditToolFromModule } from '../src/adapters/tool/edit.js' @@ -92,15 +93,27 @@ async function executeTool( describe('Kun built-in tools', () => { let workspace: string + let backgroundShellDataDir: string let host: LocalToolHost + function createBackgroundBashLocalTool( + options: Parameters[0] = {} + ): ReturnType { + return createBashLocalTool({ + ...options, + backgroundShellDataDir + }) + } + beforeEach(async () => { workspace = await mkdtemp(join(tmpdir(), 'kun-tools-')) + backgroundShellDataDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-data-')) host = new LocalToolHost({ tools: defaultLocalTools }) }) afterEach(async () => { await rm(workspace, { recursive: true, force: true }) + await rm(backgroundShellDataDir, { recursive: true, force: true }) }) it('advertises the pi-style built-in tool family by default', async () => { @@ -561,89 +574,256 @@ describe('Kun built-in tools', () => { expect(Date.now() - startedAt).toBeLessThan(1500) }) - it('returns a pollable bash session for foreground long-running commands', async () => { + it('blocks foreground bash commands until the process exits', async () => { const startedAt = Date.now() const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 5', - yield_seconds: 1, + command: 'echo ready; sleep 2; echo done', timeout: 10 }) - expect(output.exit_code).toBe(null) - expect(output.status).toBe('running') - expect(typeof output.session_id).toBe('string') + expect(output.exit_code).toBe(0) expect(String(output.output)).toContain('ready') - expect(Date.now() - startedAt).toBeLessThan(2500) + expect(String(output.output)).toContain('done') + expect(output.session_id).toBeUndefined() + expect(Date.now() - startedAt).toBeGreaterThanOrEqual(1800) + }) - const stopped = await executeTool(host, workspace, 'bash', { - action: 'stop', - session_id: String(output.session_id) + it('returns immediately for background bash sessions and keeps running after abort', async () => { + const hooks = { + started: [] as string[], + settled: [] as string[] + } + const backgroundHost = new LocalToolHost({ + tools: [ + createBackgroundBashLocalTool({ + backgroundShell: { + onSessionStarted: async (record) => { + hooks.started.push(record.id) + }, + onSessionSettled: async (record) => { + hooks.settled.push(record.id) + }, + isDetachedSession: (sessionId) => hooks.started.includes(sessionId) + } + }), + createBackgroundShellTool() + ] }) - expect(stopped.status).toBe('stopped') - expect(stopped.stop_sent).toBe(true) + const abortController = new AbortController() + const startedAt = Date.now() + const output = await backgroundHost.execute( + { + callId: 'call_bash_background', + toolName: 'bash', + arguments: { + command: 'echo bg-ready; sleep 5; echo bg-done', + background: true, + timeout: 10 + } + }, + buildContext(workspace, { abortSignal: abortController.signal }) + ) + expect(output.item.kind).toBe('tool_result') + if (output.item.kind !== 'tool_result') throw new Error('expected tool_result') + const payload = output.item.output as Record + expect(payload.status).toBe('running') + expect(typeof payload.session_id).toBe('string') + expect(String(payload.session_id)).toMatch(/^[a-z0-9]{8}$/) + expect(typeof payload.output_file).toBe('string') + expect(String(payload.output_file)).toMatch(/\.output$/) + expect(Date.now() - startedAt).toBeLessThan(500) + expect(hooks.started).toHaveLength(1) + + abortController.abort() + await new Promise((resolve) => setTimeout(resolve, 2500)) + const read = await backgroundHost.execute( + { + callId: 'call_bash_background_read', + toolName: 'background_shell', + arguments: { + action: 'read', + session_id: String(payload.session_id) + } + }, + buildContext(workspace) + ) + expect(read.item.kind).toBe('tool_result') + if (read.item.kind !== 'tool_result') throw new Error('expected tool_result') + const readPayload = read.item.output as Record + expect(readPayload.status).toBe('running') + + await backgroundHost.execute( + { + callId: 'call_bash_background_stop', + toolName: 'background_shell', + arguments: { + action: 'stop', + session_id: String(payload.session_id) + } + }, + buildContext(workspace) + ) + expect(hooks.settled.length).toBeGreaterThanOrEqual(1) }) - it('polls completed bash sessions for final output', async () => { - const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 2; echo done', - yield_seconds: 1, - timeout: 10 + it('polls completed background shell sessions via background_shell', async () => { + const backgroundHost = new LocalToolHost({ + tools: [createBackgroundBashLocalTool(), createBackgroundShellTool()] }) - - expect(output.status).toBe('running') + const started = await backgroundHost.execute( + { + callId: 'call_bash_bg_poll', + toolName: 'bash', + arguments: { + command: 'echo ready; sleep 2; echo done', + background: true, + timeout: 10 + } + }, + buildContext(workspace) + ) + expect(started.item.kind).toBe('tool_result') + if (started.item.kind !== 'tool_result') throw new Error('expected tool_result') + const sessionId = String((started.item.output as { session_id?: string }).session_id) await new Promise((resolve) => setTimeout(resolve, 2500)) - const polled = await executeTool(host, workspace, 'bash', { + const polled = await executeTool(backgroundHost, workspace, 'background_shell', { action: 'poll', - session_id: String(output.session_id) + session_id: sessionId, + yield_seconds: 1 }) expect(polled.status).toBe('completed') expect(polled.exit_code).toBe(0) expect(String(polled.output)).toContain('done') + expect(typeof polled.output_file).toBe('string') }) - it('blocks the poll action for at least yield_seconds while the session keeps running', async () => { - const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 5; echo done', - yield_seconds: 1, - timeout: 10 + it('lists background shell sessions via background_shell', async () => { + const backgroundHost = new LocalToolHost({ + tools: [ + createBackgroundBashLocalTool(), + createBackgroundShellTool({ + listBackgroundSessions: () => [ + { + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'sleep 10', + cwd: workspace, + shell: 'bash', + status: 'running', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: null, + output: 'running', + detached: true + } + ] + }) + ] }) - expect(output.status).toBe('running') - - const startedAt = Date.now() - const polled = await executeTool(host, workspace, 'bash', { - action: 'poll', - session_id: String(output.session_id), - yield_seconds: 2 + await backgroundHost.execute( + { + callId: 'call_bash_bg', + toolName: 'bash', + arguments: { command: 'echo hi', background: true, timeout: 10 } + }, + buildContext(workspace) + ) + const listed = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'list', + thread_only: false }) - const elapsed = Date.now() - startedAt - expect(elapsed).toBeGreaterThanOrEqual(1800) - expect(polled.status).toBe('running') + expect(listed.running).toBe(1) + expect((listed.sessions as Array<{ session_id?: string }>)?.[0]?.session_id).toBe('abcd1234') + }) - await executeTool(host, workspace, 'bash', { - action: 'stop', - session_id: String(output.session_id) + it('persists full background shell output to the thread record directory', async () => { + const backgroundHost = new LocalToolHost({ + tools: [createBackgroundBashLocalTool(), createBackgroundShellTool()] }) + const started = await backgroundHost.execute( + { + callId: 'call_bash_bg_output_file', + toolName: 'bash', + arguments: { + command: "node -e \"process.stdout.write('line-one\\n'); process.stdout.write('x'.repeat(10050))\"", + background: true, + timeout: 10 + } + }, + buildContext(workspace) + ) + expect(started.item.kind).toBe('tool_result') + if (started.item.kind !== 'tool_result') throw new Error('expected tool_result') + const payload = started.item.output as Record + const outputFile = String(payload.output_file) + expect(outputFile).toContain('background-shells') + expect(outputFile.endsWith(`${String(payload.session_id)}.output`)).toBe(true) + await new Promise((resolve) => setTimeout(resolve, 500)) + const full = await readFile(outputFile, 'utf-8') + expect(full.startsWith('line-one\n')).toBe(true) + expect([...full].length).toBeGreaterThan(10_000) + const read = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'read', + session_id: String(payload.session_id) + }) + expect(read.output_truncated).toBe(true) + expect(String(read.output)).toContain('[background shell output truncated') + expect(read.output_file).toBe(outputFile) }) - it('returns from poll early once the session exits before yield_seconds', async () => { - const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 1; echo done', - yield_seconds: 1, - timeout: 10 + it('hides finished background shell sessions from list unless include_finished=true', async () => { + const backgroundHost = new LocalToolHost({ + tools: [ + createBackgroundShellTool({ + listBackgroundSessions: () => [ + { + id: 'runng001', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'sleep 10', + cwd: workspace, + shell: 'bash', + status: 'running', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: null, + output: 'running', + detached: true + }, + { + id: 'done0001', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'echo done', + cwd: workspace, + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'done', + detached: true + } + ] + }) + ] + }) + const runningOnly = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'list', + thread_only: false }) - expect(output.status).toBe('running') + expect(runningOnly.running).toBe(1) + expect((runningOnly.sessions as Array<{ session_id?: string }>).map((s) => s.session_id)).toEqual(['runng001']) - const startedAt = Date.now() - const polled = await executeTool(host, workspace, 'bash', { - action: 'poll', - session_id: String(output.session_id), - yield_seconds: 10 + const withFinished = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'list', + thread_only: false, + include_finished: true }) - const elapsed = Date.now() - startedAt - expect(elapsed).toBeLessThan(3000) - expect(polled.status).toBe('completed') - expect(polled.exit_code).toBe(0) - expect(String(polled.output)).toContain('done') + expect(withFinished.running).toBe(1) + expect((withFinished.sessions as Array<{ session_id?: string }>).map((s) => s.session_id)).toEqual([ + 'runng001', + 'done0001' + ]) }) it('includes the active shell in bash partial updates', async () => { diff --git a/src/main/ipc/app-ipc-schemas.ts b/src/main/ipc/app-ipc-schemas.ts index 756af1af5..708704d14 100644 --- a/src/main/ipc/app-ipc-schemas.ts +++ b/src/main/ipc/app-ipc-schemas.ts @@ -26,7 +26,9 @@ import { KUN_THREAD_TEMPLATE, KUN_USER_INPUT_TEMPLATE, KUN_USAGE_TEMPLATE, - KUN_DEBUG_LLM_ROUNDS_TEMPLATE + KUN_DEBUG_LLM_ROUNDS_TEMPLATE, + KUN_BACKGROUND_SHELLS_TEMPLATE, + KUN_BACKGROUND_SHELL_TEMPLATE } from '../../shared/kun-endpoints' import { IMAGE_GENERATION_PROTOCOLS, @@ -173,7 +175,10 @@ const ENDPOINTS: readonly EndpointTemplate[] = [ compileEndpoint(KUN_USER_INPUT_TEMPLATE, ['POST']), compileEndpoint(KUN_SESSION_RESUME_TEMPLATE, ['POST']), compileEndpoint(KUN_USAGE_TEMPLATE, ['GET']), - compileEndpoint(KUN_DEBUG_LLM_ROUNDS_TEMPLATE, ['GET']) + compileEndpoint(KUN_DEBUG_LLM_ROUNDS_TEMPLATE, ['GET']), + compileEndpoint(KUN_BACKGROUND_SHELLS_TEMPLATE, ['GET']), + compileEndpoint(KUN_BACKGROUND_SHELL_TEMPLATE, ['GET']), + compileEndpoint(`${KUN_BACKGROUND_SHELL_TEMPLATE}/stop`, ['POST']) ] function isAllowedRuntimeRequest(value: { path: string; method?: string }): boolean { diff --git a/src/renderer/src/agent/kun-contract.ts b/src/renderer/src/agent/kun-contract.ts index e6e36d183..3e6eba17e 100644 --- a/src/renderer/src/agent/kun-contract.ts +++ b/src/renderer/src/agent/kun-contract.ts @@ -378,6 +378,7 @@ export type CoreTurnItemJson = { kind: string text?: string displayText?: string + messageSource?: 'background_shell' toolName?: string callId?: string toolKind?: 'tool_call' | 'command_execution' | 'file_change' diff --git a/src/renderer/src/agent/kun-mapper.ts b/src/renderer/src/agent/kun-mapper.ts index a6f321e5f..08ab7aef7 100644 --- a/src/renderer/src/agent/kun-mapper.ts +++ b/src/renderer/src/agent/kun-mapper.ts @@ -296,6 +296,9 @@ function applyRuntimeDisclosureMeta( if (displayText && displayText !== item.text?.trim()) { meta.displayText = displayText } + if (item.messageSource === 'background_shell') { + meta.messageSource = 'background_shell' + } if (attachmentIds) meta.attachmentIds = attachmentIds if (fileReferences) meta.fileReferences = fileReferences if (activeSkillIds) meta.activeSkillIds = activeSkillIds diff --git a/src/renderer/src/agent/types.ts b/src/renderer/src/agent/types.ts index 7a6bbac5d..86a3a4029 100644 --- a/src/renderer/src/agent/types.ts +++ b/src/renderer/src/agent/types.ts @@ -72,6 +72,7 @@ export type WebCitationSource = { export type RuntimeDisclosureMetadata = { displayText?: string + messageSource?: 'background_shell' turnId?: string workspaceCheckpointId?: string attachmentIds?: string[] diff --git a/src/renderer/src/components/Workbench.tsx b/src/renderer/src/components/Workbench.tsx index 3f4890327..89eb3ca32 100644 --- a/src/renderer/src/components/Workbench.tsx +++ b/src/renderer/src/components/Workbench.tsx @@ -49,6 +49,7 @@ import { type ComposerExecutionSettings, type ComposerFileReference } from './chat/FloatingComposer' +import { BackgroundShellOverlay } from './chat/BackgroundShellOverlay' import { ChatFileTreePanel, type ChatFileTreeReference } from './chat/ChatFileTreePanel' import { composerReasoningEffortRequestValue, @@ -2764,7 +2765,8 @@ export function Workbench(): ReactElement { {uiModeCameosEnabled && !focusModeEnabled ? : null} {!focusModeEnabled ? : null} -
+
+ {activeThreadRelation === 'side' && activeThreadParentId ? ( { + const query = threadId ? `?thread_id=${encodeURIComponent(threadId)}` : '' + const result = await rendererRuntimeClient.runtimeRequest(`${KUN_BACKGROUND_SHELLS_PATH}${query}`) + if (!result.ok) return { sessions: [], running: 0 } + try { + return JSON.parse(result.body) as BackgroundShellListResponse + } catch { + return { sessions: [], running: 0 } + } +} + +async function stopBackgroundShell(sessionId: string): Promise { + await rendererRuntimeClient.runtimeRequest(kunBackgroundShellStopPath(sessionId), 'POST') +} + +type BackgroundShellOverlayProps = { + runtimeReady?: boolean +} + +export function BackgroundShellOverlay({ + runtimeReady = false +}: BackgroundShellOverlayProps): ReactElement | null { + const { t } = useTranslation('chat') + const [open, setOpen] = useState(false) + const [sessions, setSessions] = useState([]) + const [selectedId, setSelectedId] = useState(null) + + const refresh = useCallback(async () => { + if (!runtimeReady) return + const data = await fetchBackgroundShells() + setSessions(data.sessions) + }, [runtimeReady]) + + useEffect(() => { + void refresh() + if (!runtimeReady) return + const timer = window.setInterval(() => { + void refresh() + }, 2000) + return () => window.clearInterval(timer) + }, [refresh, runtimeReady]) + + const runningCount = useMemo( + () => sessions.filter((session) => session.status === 'running').length, + [sessions] + ) + const selected = useMemo( + () => sessions.find((session) => session.id === selectedId) ?? sessions[0] ?? null, + [selectedId, sessions] + ) + + if (runningCount <= 0 && !open) return null + + const handleStop = async (sessionId: string) => { + await stopBackgroundShell(sessionId) + await refresh() + } + + return ( +
+ {open ? ( +
+
+
+

+ {t('backgroundShells.title', { defaultValue: 'Background shells' })} +

+

+ {t('backgroundShells.runningCount', { + defaultValue: '{{count}} running', + count: runningCount + })} +

+
+ +
+
+ {sessions.length === 0 ? ( +

+ {t('backgroundShells.empty', { defaultValue: 'No background shells.' })} +

+ ) : ( + sessions.map((session) => { + const active = selected?.id === session.id + return ( + + ) + }) + )} +
+ {selected ? ( +
+
+

{selected.command}

+ {selected.status === 'running' ? ( + + ) : null} +
+
+                {selected.output.trim() || t('backgroundShells.noOutput', { defaultValue: '(no output yet)' })}
+              
+ {selected.outputFilePath ? ( +

+ {t('backgroundShells.outputFile', { defaultValue: 'Full output' })}: {selected.outputFilePath} + {selected.outputTruncated + ? ` · ${t('backgroundShells.outputTruncated', { defaultValue: 'preview truncated' })}` + : ''} +

+ ) : null} +
+ ) : null} +
+ ) : null} + +
+ ) +} diff --git a/src/renderer/src/components/chat/MessageTimeline.tsx b/src/renderer/src/components/chat/MessageTimeline.tsx index 402ae0847..f7dccff03 100644 --- a/src/renderer/src/components/chat/MessageTimeline.tsx +++ b/src/renderer/src/components/chat/MessageTimeline.tsx @@ -22,6 +22,7 @@ import type { UiPluginLabelKey } from '@shared/ui-plugin' import { useUiPluginWorkLabel } from '../../store/ui-plugin-store' import { groupTurns, + isBackgroundShellNoticeBlock, sameTurnContent, splitThink, stableTurnKey, @@ -90,6 +91,12 @@ function blockScrollStamp(block: ChatBlock | undefined): string { } function turnPreview(turn: Turn, fallback: string): string { + if (turn.user && isBackgroundShellNoticeBlock(turn.user)) { + const display = turn.user.meta?.displayText?.trim() + if (display) { + return display.length > 48 ? `${display.slice(0, 47).trimEnd()}...` : display + } + } const text = turn.user?.text.trim() ?? '' if (!text) return fallback const oneLine = text.replace(/\s+/g, ' ') diff --git a/src/renderer/src/components/chat/message-timeline-bubbles.tsx b/src/renderer/src/components/chat/message-timeline-bubbles.tsx index d5a663ef2..14be7c082 100644 --- a/src/renderer/src/components/chat/message-timeline-bubbles.tsx +++ b/src/renderer/src/components/chat/message-timeline-bubbles.tsx @@ -3,13 +3,15 @@ import { memo, useEffect, useMemo, useRef, useState } from 'react' import ReactMarkdown from 'react-markdown' import remarkGfm from 'remark-gfm' import { useTranslation } from 'react-i18next' -import { ArrowDown, Check, ChevronDown, ChevronRight, Copy, Download, File, FileEdit, GitFork, ImageIcon, Loader2, MessageSquareQuote, PencilLine, RotateCcw, Terminal, Video, Wrench } from 'lucide-react' +import { ArrowDown, Check, ChevronDown, ChevronRight, Copy, Download, File, FileEdit, GitFork, ImageIcon, Loader2, MessageSquareQuote, PencilLine, RotateCcw, SquareTerminal, Terminal, Video, Wrench } from 'lucide-react' import type { AttachmentReference, ChatBlock, GeneratedFileReference, RuntimeDisclosureMetadata, ToolBlock, UserFileReference, UserInputAnswer } from '../../agent/types' import { extractUnifiedDiffText } from '../../lib/diff-stats' import { useChatStore } from '../../store/chat-store' import { getProvider } from '../../agent/registry' import { parseWritePromptForDisplay } from '../../write/quoted-selection' import { parseClawUserPromptForDisplay, type ClawUserPromptDisplay } from '@shared/app-settings' +import { parseBackgroundShellCompletionNotice } from '@shared/background-shell-notice' +import { isBackgroundShellNoticeBlock } from './message-timeline-turns' import { openWorkspacePathInEditor } from '../../lib/open-workspace-path' import { DiffView } from '../DiffView' import { AssistantMarkdown } from './AssistantMarkdown' @@ -20,6 +22,93 @@ import { answersByQuestionId, shouldShowQuestionHeader } from './user-input-pane const COPY_FEEDBACK_RESET_MS = 1600 +function BackgroundShellNoticeBubble({ + block, + nested = false +}: { + block: Extract + nested?: boolean +}): ReactElement { + const { t } = useTranslation('common') + const [outputExpanded, setOutputExpanded] = useState(false) + const parsed = useMemo(() => parseBackgroundShellCompletionNotice(block.text), [block.text]) + const title = + block.meta?.displayText?.trim() || + t('backgroundShellNotice.title', { defaultValue: 'Background shell completed' }) + const outputPreview = parsed?.outputPreview ?? '' + const canExpandOutput = outputPreview.length > 180 + + return ( +
+
+
+ +
+

{title}

+ {parsed ? ( +
+
+
+ {t('backgroundShellNotice.sessionId', { defaultValue: 'Session' })} +
+
{parsed.sessionId}
+
+
+
+ {t('backgroundShellNotice.command', { defaultValue: 'Command' })} +
+
{parsed.command}
+
+
+
+ {t('backgroundShellNotice.exitCode', { defaultValue: 'Exit code' })} +
+
{parsed.exitCode}
+
+
+ ) : null} + {outputPreview ? ( +
+ +
+                  {outputPreview}
+                
+
+ ) : null} + {parsed?.outputFile ? ( +

+ {t('backgroundShellNotice.outputFile', { defaultValue: 'Full output file' })}: {parsed.outputFile} +

+ ) : null} +
+
+
+
+ ) +} + /** * User message bubble with hover affordance to rewind/edit. Click the rewind * pill, the bubble flips into a textarea, and Resend submits an edited @@ -1253,6 +1342,9 @@ function MessageBubbleImpl({ }): ReactElement { const { t, i18n } = useTranslation('common') const resolveApproval = useChatStore((s) => s.resolveApproval) + if (block.kind === 'user' && isBackgroundShellNoticeBlock(block)) { + return + } if (block.kind === 'user') { return } diff --git a/src/renderer/src/components/chat/message-timeline-process.tsx b/src/renderer/src/components/chat/message-timeline-process.tsx index 2c5f8a5fd..ffa9146a1 100644 --- a/src/renderer/src/components/chat/message-timeline-process.tsx +++ b/src/renderer/src/components/chat/message-timeline-process.tsx @@ -25,7 +25,7 @@ import { useChatStore } from '../../store/chat-store' import { DiffView } from '../DiffView' import { AssistantMarkdown } from './AssistantMarkdown' import { MessageBubble } from './message-timeline-bubbles' -import { blockHasPendingRuntimeWork, splitThink } from './message-timeline-turns' +import { blockHasPendingRuntimeWork, isBackgroundShellNoticeBlock, splitThink } from './message-timeline-turns' import { formatDuration, formatToolTitle } from './message-timeline-tools' import { SubagentGroup } from './SubagentCallCard' @@ -848,6 +848,7 @@ type ProcessDetail = | { kind: 'tool'; text: string; isPatch: boolean; isError: boolean; filePath?: string } | { kind: 'approval' } | { kind: 'user_input' } + | { kind: 'background_shell' } | { kind: 'text'; text: string } function summarizeProcessText(text: string, max = 96): string { @@ -1101,6 +1102,7 @@ function getProcessDetail(block: ChatBlock, summaryText?: string): ProcessDetail } if (block.kind === 'approval') return { kind: 'approval' } if (block.kind === 'user_input') return { kind: 'user_input' } + if (isBackgroundShellNoticeBlock(block)) return { kind: 'background_shell' } if (block.kind === 'system' && block.text.trim()) { if (block.detail?.trim()) return { kind: 'text', text: block.detail } // Short system messages already fit in the summary line — skip the @@ -1171,6 +1173,9 @@ function ProcessEntryDetail({ if (detail.kind === 'user_input' && block.kind === 'user_input') { return } + if (detail.kind === 'background_shell' && block.kind === 'user') { + return + } return null } @@ -1187,6 +1192,9 @@ function describeProcessBlock( if (block.kind === 'tool') { return summarizeToolBlock(block, t) } + if (isBackgroundShellNoticeBlock(block)) { + return block.meta?.displayText?.trim() || t('backgroundShellNotice.title', { defaultValue: 'Background shell completed' }) + } if (block.kind === 'compaction') { if (block.status === 'running') return t('compactionRunning') if (block.status === 'error') return block.summary || t('compactionFailed') diff --git a/src/renderer/src/components/chat/message-timeline-turns.test.ts b/src/renderer/src/components/chat/message-timeline-turns.test.ts index 83ee0901a..45117d708 100644 --- a/src/renderer/src/components/chat/message-timeline-turns.test.ts +++ b/src/renderer/src/components/chat/message-timeline-turns.test.ts @@ -29,16 +29,24 @@ describe('message timeline turns', () => { expect(sameTurnContent(first, second)).toBe(true) }) - it('detects updates to a block inside an otherwise stable turn', () => { - const firstBlocks: ChatBlock[] = [ - { kind: 'user', id: 'user_1', text: 'Hello' }, - { kind: 'assistant', id: 'assistant_1', text: 'Hi' } - ] - const nextBlocks: ChatBlock[] = [ - firstBlocks[0], - { kind: 'assistant', id: 'assistant_1', text: 'Hi again' } + it('keeps background shell notices inside the current turn instead of splitting it', () => { + const notice: ChatBlock = { + kind: 'user', + id: 'notice_1', + text: 'abcd1234', + meta: { messageSource: 'background_shell', displayText: 'Background shell abcd1234 completed' } + } + const blocks: ChatBlock[] = [ + { kind: 'user', id: 'user_1', text: 'Run build in background' }, + { kind: 'assistant', id: 'assistant_1', text: 'Started.' }, + notice, + { kind: 'assistant', id: 'assistant_2', text: 'Build finished.' } ] - expect(sameTurnContent(groupTurns(firstBlocks)[0], groupTurns(nextBlocks)[0])).toBe(false) + const turns = groupTurns(blocks) + + expect(turns).toHaveLength(1) + expect(turns[0]?.user?.id).toBe('user_1') + expect(turns[0]?.blocks.map((block) => block.id)).toEqual(['assistant_1', 'notice_1', 'assistant_2']) }) }) diff --git a/src/renderer/src/components/chat/message-timeline-turns.ts b/src/renderer/src/components/chat/message-timeline-turns.ts index ec2ab88af..7afeb033b 100644 --- a/src/renderer/src/components/chat/message-timeline-turns.ts +++ b/src/renderer/src/components/chat/message-timeline-turns.ts @@ -1,16 +1,26 @@ import type { ChatBlock } from '../../agent/types' +import { isBackgroundShellNoticeSource } from '@shared/background-shell-notice' export type Turn = { user?: Extract blocks: ChatBlock[] } +export function isBackgroundShellNoticeBlock(block: ChatBlock): boolean { + return block.kind === 'user' && isBackgroundShellNoticeSource(block.meta?.messageSource) +} + export function groupTurns(blocks: ChatBlock[]): Turn[] { const turns: Turn[] = [] let current: Turn | null = null for (const block of blocks) { if (block.kind === 'user') { + if (isBackgroundShellNoticeBlock(block)) { + if (!current) current = { blocks: [] } + current.blocks.push(block) + continue + } if (current) turns.push(current) current = { user: block, blocks: [] } continue @@ -56,6 +66,7 @@ export function blockHasPendingRuntimeWork(block: ChatBlock): boolean { export function isProcessBlock(block: ChatBlock): boolean { return ( + isBackgroundShellNoticeBlock(block) || block.kind === 'reasoning' || block.kind === 'tool' || block.kind === 'compaction' || diff --git a/src/renderer/src/locales/en/common.json b/src/renderer/src/locales/en/common.json index c3b4fb1bf..62e43bc41 100644 --- a/src/renderer/src/locales/en/common.json +++ b/src/renderer/src/locales/en/common.json @@ -1998,6 +1998,12 @@ "threadForkPointFrom": "Branch from {{title}} starts here", "compactionRunning": "Compacting context", "compactionManualCompleted": "Compacted context", + "backgroundShellNotice.title": "Background shell completed", + "backgroundShellNotice.sessionId": "Session", + "backgroundShellNotice.command": "Command", + "backgroundShellNotice.exitCode": "Exit code", + "backgroundShellNotice.outputPreview": "Output preview", + "backgroundShellNotice.outputFile": "Full output file", "compactionManualCompletedWithTokens": "Compacted context · ~{{tokens}} tokens freed", "compactionAutoCompleted": "Auto-compacted context", "compactionAutoCompletedWithTokens": "Auto-compacted context · ~{{tokens}} tokens freed", diff --git a/src/renderer/src/locales/zh/common.json b/src/renderer/src/locales/zh/common.json index 831726da9..3c8105c5f 100644 --- a/src/renderer/src/locales/zh/common.json +++ b/src/renderer/src/locales/zh/common.json @@ -1998,6 +1998,12 @@ "threadForkPointFrom": "从「{{title}}」分叉,后续从这里开始", "compactionRunning": "正在压缩上下文", "compactionManualCompleted": "已压缩上下文", + "backgroundShellNotice.title": "后台 shell 已完成", + "backgroundShellNotice.sessionId": "会话", + "backgroundShellNotice.command": "命令", + "backgroundShellNotice.exitCode": "退出码", + "backgroundShellNotice.outputPreview": "输出预览", + "backgroundShellNotice.outputFile": "完整输出文件", "compactionManualCompletedWithTokens": "已压缩上下文 · 释放约 {{tokens}} tokens", "compactionAutoCompleted": "已自动压缩上下文", "compactionAutoCompletedWithTokens": "已自动压缩上下文 · 释放约 {{tokens}} tokens", diff --git a/src/renderer/src/store/chat-store-runtime-helpers.ts b/src/renderer/src/store/chat-store-runtime-helpers.ts index 375dae170..bd592f88f 100644 --- a/src/renderer/src/store/chat-store-runtime-helpers.ts +++ b/src/renderer/src/store/chat-store-runtime-helpers.ts @@ -4,6 +4,7 @@ import type { RuntimeDisclosureMetadata, UserMessageEventPayload } from '../agent/types' +import { isBackgroundShellNoticeSource } from '@shared/background-shell-notice' import { normalizeWorkspaceRoot } from '../lib/workspace-path' import { shouldAutoTitleThread } from '../lib/thread-title' import type { ChatState } from './chat-store-types' @@ -40,6 +41,7 @@ export function threadHasPendingRuntimeWork(blocks: ChatBlock[]): boolean { for (const block of blocks) { if (block.kind === 'user') { + if (isBackgroundShellNoticeSource(block.meta?.messageSource)) continue pendingInCurrentTurn = false continue } diff --git a/src/shared/background-shell-notice.ts b/src/shared/background-shell-notice.ts new file mode 100644 index 000000000..720542c9e --- /dev/null +++ b/src/shared/background-shell-notice.ts @@ -0,0 +1,43 @@ +export type BackgroundShellCompletionNotice = { + sessionId: string + command: string + exitCode: number + outputPreview: string + outputFile?: string + hint: string +} + +function unescapeXml(text: string): string { + return text + .replace(/"/g, '"') + .replace(/>/g, '>') + .replace(/</g, '<') + .replace(/&/g, '&') +} + +function readXmlTag(xml: string, tag: string): string | null { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)) + if (!match) return null + return unescapeXml(match[1].trim()) +} + +export function parseBackgroundShellCompletionNotice(text: string): BackgroundShellCompletionNotice | null { + const trimmed = text.trim() + if (!trimmed.includes('')) return null + const sessionId = readXmlTag(trimmed, 'session_id') + const command = readXmlTag(trimmed, 'command') + const exitCodeRaw = readXmlTag(trimmed, 'exit_code') + const outputPreview = readXmlTag(trimmed, 'output_preview') + const outputFile = readXmlTag(trimmed, 'output_file') ?? undefined + const hint = readXmlTag(trimmed, 'hint') + if (!sessionId || !command || exitCodeRaw === null || outputPreview === null || !hint) return null + const exitCode = Number.parseInt(exitCodeRaw, 10) + if (!Number.isFinite(exitCode)) return null + return { sessionId, command, exitCode, outputPreview, ...(outputFile ? { outputFile } : {}), hint } +} + +export function isBackgroundShellNoticeSource( + messageSource: unknown +): messageSource is 'background_shell' { + return messageSource === 'background_shell' +} diff --git a/src/shared/kun-endpoints.ts b/src/shared/kun-endpoints.ts index 677659edb..865262c0b 100644 --- a/src/shared/kun-endpoints.ts +++ b/src/shared/kun-endpoints.ts @@ -121,6 +121,16 @@ export const KUN_USAGE_TEMPLATE = '/v1/usage' export const KUN_DEBUG_LLM_ROUNDS_PATH = '/v1/debug/llm-rounds' export const KUN_DEBUG_LLM_ROUNDS_TEMPLATE = '/v1/debug/llm-rounds' +export const KUN_BACKGROUND_SHELLS_PATH = '/v1/background-shells' +export const KUN_BACKGROUND_SHELLS_TEMPLATE = '/v1/background-shells' +export const KUN_BACKGROUND_SHELL_TEMPLATE = '/v1/background-shells/{sessionId}' +export function kunBackgroundShellPath(sessionId: string): string { + return `/v1/background-shells/${encodeURIComponent(sessionId)}` +} +export function kunBackgroundShellStopPath(sessionId: string): string { + return `${kunBackgroundShellPath(sessionId)}/stop` +} + /** Thread mode shared with the Kun contract. */ export type KunThreadMode = 'agent' | 'plan' From 1a04c343aab169b52b693afded752bec8409c73c Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 18:53:30 +0800 Subject: [PATCH 02/18] fix(runtime): use options.dataDir for background shell output path Co-authored-by: Cursor --- kun/src/server/runtime-factory.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kun/src/server/runtime-factory.ts b/kun/src/server/runtime-factory.ts index 399c1363e..81a7292f4 100644 --- a/kun/src/server/runtime-factory.ts +++ b/kun/src/server/runtime-factory.ts @@ -256,7 +256,7 @@ export async function createKunServeRuntime( tool.name === 'bash' ? createBashLocalTool({ backgroundShell: backgroundShellRuntime.bashHooks(), - backgroundShellDataDir: input.dataDir + backgroundShellDataDir: options.dataDir }) : tool ) From 7fd7bd364ba5fa27c988216ee1e5f6ad8d2a86b4 Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 18:54:09 +0800 Subject: [PATCH 03/18] fix(renderer): correct runtime-client import in BackgroundShellOverlay Co-authored-by: Cursor --- src/renderer/src/components/chat/BackgroundShellOverlay.tsx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/renderer/src/components/chat/BackgroundShellOverlay.tsx b/src/renderer/src/components/chat/BackgroundShellOverlay.tsx index f2b29adfd..0a5cbea1a 100644 --- a/src/renderer/src/components/chat/BackgroundShellOverlay.tsx +++ b/src/renderer/src/components/chat/BackgroundShellOverlay.tsx @@ -6,7 +6,7 @@ import { KUN_BACKGROUND_SHELLS_PATH, kunBackgroundShellStopPath } from '@shared/kun-endpoints' -import { rendererRuntimeClient } from '../agent/runtime-client' +import { rendererRuntimeClient } from '../../agent/runtime-client' type BackgroundShellSession = { id: string From 2df95804ac3e7c716f994e5739f981ffed3ca2f4 Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 19:55:00 +0800 Subject: [PATCH 04/18] fix(settings): decouple memory and skill loading from current workspace Settings should show global Kun state instead of filtering memories and skill roots by the sidebar's selected project. List all memories in the settings panel, require an explicit target path when creating scoped memories, and mutate records using their stored workspace paths. Co-authored-by: Cursor --- kun/src/memory/memory-store.ts | 6 +-- kun/src/server/routes/memory.ts | 3 +- kun/tests/memory-store.test.ts | 21 +++++++++ src/renderer/src/agent/kun-runtime.ts | 5 ++- src/renderer/src/agent/types.ts | 2 +- src/renderer/src/components/SettingsView.tsx | 37 ++++++++++------ .../settings-section-memory.test.ts | 17 +++++--- .../components/settings-section-memory.tsx | 21 ++++++++- .../src/lib/load-kun-diagnostics.test.ts | 43 +++++++++++++++++-- src/renderer/src/lib/load-kun-diagnostics.ts | 9 +++- src/renderer/src/locales/en/settings.json | 1 + src/renderer/src/locales/zh/settings.json | 1 + 12 files changed, 134 insertions(+), 32 deletions(-) diff --git a/kun/src/memory/memory-store.ts b/kun/src/memory/memory-store.ts index 04f95c8c5..1134ab92b 100644 --- a/kun/src/memory/memory-store.ts +++ b/kun/src/memory/memory-store.ts @@ -13,7 +13,7 @@ export interface MemoryStore { create(input: MemoryCreateRequest): Promise update(id: string, patch: MemoryUpdateRequest, access?: MemoryAccess): Promise delete(id: string, access?: MemoryAccess): Promise - list(filter?: { workspace?: string; includeDeleted?: boolean }): Promise + list(filter?: { workspace?: string; includeDeleted?: boolean; all?: boolean }): Promise retrieve(input: { query: string; workspace?: string; limit: number }): Promise diagnostics(): Promise setLastInjected(ids: string[]): void @@ -84,11 +84,11 @@ export class FileMemoryStore implements MemoryStore { return next } - async list(filter: { workspace?: string; includeDeleted?: boolean } = {}): Promise { + async list(filter: { workspace?: string; includeDeleted?: boolean; all?: boolean } = {}): Promise { const records = await this.readAll() return records .filter((record) => filter.includeDeleted || !record.deletedAt) - .filter((record) => inScope(record, filter.workspace)) + .filter((record) => filter.all || inScope(record, filter.workspace)) .sort((a, b) => b.updatedAt.localeCompare(a.updatedAt)) } diff --git a/kun/src/server/routes/memory.ts b/kun/src/server/routes/memory.ts index 2ca8132dc..2d4a7cee0 100644 --- a/kun/src/server/routes/memory.ts +++ b/kun/src/server/routes/memory.ts @@ -10,7 +10,8 @@ export async function listMemories(store: MemoryStore | undefined, request: Requ return jsonResponse({ memories: await store.list({ workspace: url.searchParams.get('workspace') ?? undefined, - includeDeleted: url.searchParams.get('include_deleted') === 'true' + includeDeleted: url.searchParams.get('include_deleted') === 'true', + all: url.searchParams.get('all') === 'true' }) }) } diff --git a/kun/tests/memory-store.test.ts b/kun/tests/memory-store.test.ts index e8aed9046..9a2526212 100644 --- a/kun/tests/memory-store.test.ts +++ b/kun/tests/memory-store.test.ts @@ -236,6 +236,27 @@ describe('Memory store and recall', () => { expect(hits).toEqual([]) }) + it('lists every memory for settings management when all=true', async () => { + const store = createStore() + await store.create({ + content: 'Project Alpha deploys with pnpm', + scope: 'project', + workspace: '/tmp/project-alpha' + }) + await store.create({ + content: 'Other workspace preference', + scope: 'workspace', + workspace: '/tmp/other' + }) + await store.create({ + content: 'User prefers concise answers', + scope: 'user' + }) + + expect(await store.list({ workspace: '/tmp/project-alpha' })).toHaveLength(2) + expect(await store.list({ all: true })).toHaveLength(3) + }) + it('isolates project memories and scope-protects mutations', async () => { const store = createStore() const memory = await store.create({ diff --git a/src/renderer/src/agent/kun-runtime.ts b/src/renderer/src/agent/kun-runtime.ts index 79cba80c7..783d7be19 100644 --- a/src/renderer/src/agent/kun-runtime.ts +++ b/src/renderer/src/agent/kun-runtime.ts @@ -705,10 +705,11 @@ export class KunRuntimeProvider implements AgentProvider { ) } - async listMemories(options: { workspace?: string; includeDeleted?: boolean } = {}): Promise { + async listMemories(options: { workspace?: string; includeDeleted?: boolean; all?: boolean } = {}): Promise { const query = buildQuery({ workspace: options.workspace, - include_deleted: options.includeDeleted + include_deleted: options.includeDeleted, + all: options.all }) const response = await rendererRuntimeClient.runtimeRequest(`${KUN_MEMORY_PATH}${query}`, 'GET') if (!response.ok) { diff --git a/src/renderer/src/agent/types.ts b/src/renderer/src/agent/types.ts index 7a6bbac5d..69e4cafa7 100644 --- a/src/renderer/src/agent/types.ts +++ b/src/renderer/src/agent/types.ts @@ -512,7 +512,7 @@ export interface AgentProvider { attachmentId: string, options?: { threadId?: string; workspace?: string } ): Promise - listMemories?(options?: { workspace?: string; includeDeleted?: boolean }): Promise + listMemories?(options?: { workspace?: string; includeDeleted?: boolean; all?: boolean }): Promise createMemory?(input: { content: string scope?: 'user' | 'workspace' | 'project' diff --git a/src/renderer/src/components/SettingsView.tsx b/src/renderer/src/components/SettingsView.tsx index 03c3c6c1a..41aff377d 100644 --- a/src/renderer/src/components/SettingsView.tsx +++ b/src/renderer/src/components/SettingsView.tsx @@ -152,7 +152,6 @@ export function SettingsView(): ReactElement { const formTheme = form?.theme const formUiFontScale = form?.uiFontScale const writeTypography = form?.write?.typography - const formWorkspaceRoot = form?.workspaceRoot const formKun = form ? getKunRuntimeSettings(form) : null const formPort = formKun?.port const formGuiUpdateChannel = form?.guiUpdate?.channel @@ -388,15 +387,16 @@ export function SettingsView(): ReactElement { if (typeof window.kunGui?.listSkillRoots !== 'function') return setSkillRootsLoading(true) try { - const workspaceRoot = normalizeWorkspaceRoot(expandHomePath(formWorkspaceRoot ?? '')) - const result = await window.kunGui.listSkillRoots(workspaceRoot || undefined) + // Settings is global: list every configured skill root from persisted + // settings, not the sidebar's currently selected project workspace. + const result = await window.kunGui.listSkillRoots() if (result.ok) setSkillRoots(result.roots) } catch { /* listing skill roots is best-effort; keep the last known list */ } finally { setSkillRootsLoading(false) } - }, [expandHomePath, formWorkspaceRoot]) + }, []) useEffect(() => { if (category !== 'agents') return @@ -492,9 +492,7 @@ export function SettingsView(): ReactElement { setRuntimeDiagnosticsBusy(true) setRuntimeDiagnosticsNotice(null) try { - const loaded = await loadKunDiagnostics(provider, { - workspace: normalizeWorkspaceRoot(expandHomePath(formWorkspaceRoot ?? '')) - }) + const loaded = await loadKunDiagnostics(provider, { listAllMemories: true }) if (loaded.runtimeInfo !== undefined) setRuntimeInfo(loaded.runtimeInfo) if (loaded.toolDiagnostics !== undefined) setToolDiagnostics(loaded.toolDiagnostics) if (loaded.memoryRecords !== undefined) setMemoryRecords(loaded.memoryRecords) @@ -512,7 +510,7 @@ export function SettingsView(): ReactElement { } finally { setRuntimeDiagnosticsBusy(false) } - }, [expandHomePath, formWorkspaceRoot]) + }, []) useEffect(() => { if (category !== 'agents' && category !== 'memory') return @@ -535,18 +533,31 @@ export function SettingsView(): ReactElement { void refreshMemoryDiagnostics() }, [category, memoryRecords]) + const memoryMutationWorkspace = useCallback((memoryId: string): string | undefined => { + const record = memoryRecords.find((item) => item.id === memoryId) + if (!record || record.scope === 'user') return undefined + if (record.scope === 'project') { + return record.project ?? record.workspace + } + return record.workspace + }, [memoryRecords]) + const createMemoryRecord = async (input: { content: string scope?: 'user' | 'workspace' | 'project' + targetPath?: string tags?: string[] confidence?: number }): Promise => { const provider = getProvider() if (typeof provider.createMemory !== 'function') return false try { - const workspace = normalizeWorkspaceRoot(formWorkspaceRoot) + const workspace = normalizeWorkspaceRoot(expandHomePath(input.targetPath ?? '')) const memory = await provider.createMemory({ - ...input, + content: input.content, + scope: input.scope, + tags: input.tags, + confidence: input.confidence, ...(input.scope === 'user' ? {} : { workspace }), ...(input.scope === 'project' ? { project: workspace } : {}) }) @@ -569,7 +580,7 @@ export function SettingsView(): ReactElement { if (typeof provider.updateMemory !== 'function') return false try { const memory = await provider.updateMemory(memoryId, patch, { - workspace: normalizeWorkspaceRoot(formWorkspaceRoot) + workspace: memoryMutationWorkspace(memoryId) }) setMemoryRecords((records) => records.map((record) => (record.id === memoryId ? memory : record))) return true @@ -587,7 +598,7 @@ export function SettingsView(): ReactElement { if (typeof provider.updateMemory !== 'function') return try { const memory = await provider.updateMemory(memoryId, { disabled: true }, { - workspace: normalizeWorkspaceRoot(formWorkspaceRoot) + workspace: memoryMutationWorkspace(memoryId) }) setMemoryRecords((records) => records.map((record) => record.id === memoryId ? memory : record)) } catch (error) { @@ -603,7 +614,7 @@ export function SettingsView(): ReactElement { if (typeof provider.deleteMemory !== 'function') return try { await provider.deleteMemory(memoryId, { - workspace: normalizeWorkspaceRoot(formWorkspaceRoot) + workspace: memoryMutationWorkspace(memoryId) }) setMemoryRecords((records) => records.filter((record) => record.id !== memoryId)) } catch (error) { diff --git a/src/renderer/src/components/settings-section-memory.test.ts b/src/renderer/src/components/settings-section-memory.test.ts index af811648e..b2e1a303b 100644 --- a/src/renderer/src/components/settings-section-memory.test.ts +++ b/src/renderer/src/components/settings-section-memory.test.ts @@ -187,7 +187,7 @@ describe('isMemoryDraftDirty', () => { it('returns false in view mode regardless of draft', () => { const record = sampleRecord() const dialog: MemoryDialogState = { mode: 'view', memory: record } - const draft: MemoryDraft = { content: 'totally different', scope: 'user', tags: 'x', confidence: 0 } + const draft: MemoryDraft = { content: 'totally different', scope: 'user', targetPath: '', tags: 'x', confidence: 0 } expect(isMemoryDraftDirty(dialog, draft)).toBe(false) }) @@ -197,6 +197,7 @@ describe('isMemoryDraftDirty', () => { const draft: MemoryDraft = { content: record.content, scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary, kook-bot', confidence: record.confidence ?? 1 } @@ -209,6 +210,7 @@ describe('isMemoryDraftDirty', () => { const baseline: MemoryDraft = { content: record.content, scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary', confidence: 1 } @@ -219,15 +221,15 @@ describe('isMemoryDraftDirty', () => { it('returns false in create mode for an empty draft on the default scope', () => { const dialog: MemoryDialogState = { mode: 'create' } - const draft: MemoryDraft = { content: ' ', scope: 'workspace', tags: ' ', confidence: 1 } + const draft: MemoryDraft = { content: ' ', scope: 'workspace', targetPath: '', tags: ' ', confidence: 1 } expect(isMemoryDraftDirty(dialog, draft)).toBe(false) }) it('returns true in create mode when any field changes from the empty default', () => { const dialog: MemoryDialogState = { mode: 'create' } - expect(isMemoryDraftDirty(dialog, { content: 'hello', scope: 'workspace', tags: '', confidence: 1 })).toBe(true) - expect(isMemoryDraftDirty(dialog, { content: '', scope: 'workspace', tags: 'tag', confidence: 1 })).toBe(true) - expect(isMemoryDraftDirty(dialog, { content: '', scope: 'user', tags: '', confidence: 1 })).toBe(true) + expect(isMemoryDraftDirty(dialog, { content: 'hello', scope: 'workspace', targetPath: '', tags: '', confidence: 1 })).toBe(true) + expect(isMemoryDraftDirty(dialog, { content: '', scope: 'workspace', targetPath: '', tags: 'tag', confidence: 1 })).toBe(true) + expect(isMemoryDraftDirty(dialog, { content: '', scope: 'user', targetPath: '', tags: '', confidence: 1 })).toBe(true) }) }) @@ -238,6 +240,7 @@ describe('attemptCloseMemoryDialog', () => { const draft: MemoryDraft = { content: record.content, scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary', confidence: 1 } @@ -254,7 +257,7 @@ describe('attemptCloseMemoryDialog', () => { const close = vi.fn() const result = await attemptCloseMemoryDialog({ dialog: null, - draft: { content: 'anything', scope: 'workspace', tags: '', confidence: 1 }, + draft: { content: 'anything', scope: 'workspace', targetPath: '', tags: '', confidence: 1 }, confirm, close }) @@ -269,6 +272,7 @@ describe('attemptCloseMemoryDialog', () => { const draft: MemoryDraft = { content: 'EDITED content', scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary', confidence: 1 } @@ -285,6 +289,7 @@ describe('attemptCloseMemoryDialog', () => { const draft: MemoryDraft = { content: 'half-typed thought', scope: 'workspace', + targetPath: '', tags: '', confidence: 1 } diff --git a/src/renderer/src/components/settings-section-memory.tsx b/src/renderer/src/components/settings-section-memory.tsx index fa839cc31..694c4d48b 100644 --- a/src/renderer/src/components/settings-section-memory.tsx +++ b/src/renderer/src/components/settings-section-memory.tsx @@ -10,6 +10,7 @@ type MemoryScope = 'user' | 'workspace' | 'project' export type MemoryDraft = { content: string scope: MemoryScope + targetPath: string tags: string confidence: number } @@ -22,6 +23,7 @@ export type MemoryDialogState = const EMPTY_DRAFT: MemoryDraft = { content: '', scope: 'workspace', + targetPath: '', tags: '', confidence: 1 } @@ -64,6 +66,7 @@ export function isMemoryDraftDirty( return ( draft.content.trim() !== '' || draft.tags.trim() !== '' || + draft.targetPath.trim() !== '' || draft.scope !== DEFAULT_DRAFT_SCOPE ) } @@ -135,6 +138,7 @@ export function MemorySettingsSection({ ctx }: { ctx: Record }): Re setDraft({ content: record.content, scope: record.scope, + targetPath: projectForMemory(record) ?? '', tags: (record.tags ?? []).join(', '), confidence: record.confidence ?? 1 }) @@ -164,11 +168,14 @@ export function MemorySettingsSection({ ctx }: { ctx: Record }): Re const saveDraft = async (): Promise => { const trimmed = draft.content.trim() if (!trimmed) return + const targetPath = draft.targetPath.trim() + if (dialog?.mode === 'create' && draft.scope !== 'user' && !targetPath) return let ok = false if (dialog?.mode === 'create') { ok = await createMemoryRecord({ content: trimmed, scope: draft.scope, + ...(draft.scope === 'user' ? {} : { targetPath }), tags: parseTags(draft.tags), confidence: draft.confidence }) @@ -465,6 +472,15 @@ function MemoryRecordDialog({ ) : null} + {dialog.mode === 'create' && draft.scope !== 'user' ? ( + onDraftChange((prev) => ({ ...prev, targetPath: e.target.value }))} + placeholder={t('memoryTargetPathPlaceholder')} + className="min-w-[200px] flex-1 rounded-lg border border-ds-border-muted bg-ds-surface-subtle px-2 py-1 text-[12px] text-ds-ink outline-none" + /> + ) : null} {t('memorySave')} diff --git a/src/renderer/src/lib/load-kun-diagnostics.test.ts b/src/renderer/src/lib/load-kun-diagnostics.test.ts index f89708ff7..69982bf9b 100644 --- a/src/renderer/src/lib/load-kun-diagnostics.test.ts +++ b/src/renderer/src/lib/load-kun-diagnostics.test.ts @@ -9,10 +9,13 @@ describe('loadKunDiagnostics', () => { const provider = { getRuntimeInfo: async () => runtimeInfo, getToolDiagnostics: async () => toolDiagnostics, - listMemories: async () => memoryRecords + listMemories: async (options?: { all?: boolean; includeDeleted?: boolean }) => { + expect(options).toEqual({ all: true, includeDeleted: false }) + return memoryRecords + } } - const loaded = await loadKunDiagnostics(provider, { workspace: '/tmp/project' }) + const loaded = await loadKunDiagnostics(provider) expect(loaded.runtimeInfo).toBe(runtimeInfo) expect(loaded.toolDiagnostics).toBe(toolDiagnostics) @@ -20,6 +23,40 @@ describe('loadKunDiagnostics', () => { expect(loaded.errors).toEqual([]) }) + it('loads all memories by default for global settings diagnostics', async () => { + const memoryRecords = [{ id: 'mem_1', content: 'remember this' }] as any + const provider = { + getRuntimeInfo: async () => null, + getToolDiagnostics: async () => null, + listMemories: async (options?: { all?: boolean }) => { + expect(options).toEqual({ all: true, includeDeleted: false }) + return memoryRecords + } + } + + const loaded = await loadKunDiagnostics(provider) + + expect(loaded.memoryRecords).toBe(memoryRecords) + expect(loaded.errors).toEqual([]) + }) + + it('can scope memory loading to the current workspace when explicitly requested', async () => { + const memoryRecords = [{ id: 'mem_ws', content: 'workspace only' }] as any + const provider = { + getRuntimeInfo: async () => null, + getToolDiagnostics: async () => null, + listMemories: async (options?: { all?: boolean }) => { + expect(options).toEqual({ includeDeleted: false }) + return memoryRecords + } + } + + const loaded = await loadKunDiagnostics(provider, { listAllMemories: false }) + + expect(loaded.memoryRecords).toBe(memoryRecords) + expect(loaded.errors).toEqual([]) + }) + it('keeps successful diagnostics when memory loading fails', async () => { const runtimeInfo = { pid: 42 } as any const toolDiagnostics = { providers: [{ id: 'builtin' }], mcpServers: [] } as any @@ -31,7 +68,7 @@ describe('loadKunDiagnostics', () => { } } - const loaded = await loadKunDiagnostics(provider, { workspace: '/tmp/project' }) + const loaded = await loadKunDiagnostics(provider) expect(loaded.runtimeInfo).toBe(runtimeInfo) expect(loaded.toolDiagnostics).toBe(toolDiagnostics) diff --git a/src/renderer/src/lib/load-kun-diagnostics.ts b/src/renderer/src/lib/load-kun-diagnostics.ts index c3bd683f1..a6d6f0761 100644 --- a/src/renderer/src/lib/load-kun-diagnostics.ts +++ b/src/renderer/src/lib/load-kun-diagnostics.ts @@ -17,13 +17,18 @@ export type LoadedKunDiagnostics = { export async function loadKunDiagnostics( provider: DiagnosticsProvider, - options: { workspace?: string } = {} + options: { listAllMemories?: boolean } = {} ): Promise { + const listAllMemories = options.listAllMemories !== false const [runtimeInfo, toolDiagnostics, memoryRecords] = await Promise.allSettled([ provider.getRuntimeInfo ? provider.getRuntimeInfo() : Promise.resolve(null), provider.getToolDiagnostics ? provider.getToolDiagnostics() : Promise.resolve(null), provider.listMemories - ? provider.listMemories({ workspace: options.workspace, includeDeleted: false }) + ? provider.listMemories( + listAllMemories + ? { all: true, includeDeleted: false } + : { includeDeleted: false } + ) : Promise.resolve([]) ]) diff --git a/src/renderer/src/locales/en/settings.json b/src/renderer/src/locales/en/settings.json index 52717ceec..f1ec86fee 100644 --- a/src/renderer/src/locales/en/settings.json +++ b/src/renderer/src/locales/en/settings.json @@ -1231,6 +1231,7 @@ "memoryCancel": "Cancel", "memoryEmpty": "No memory records yet. The assistant will create them automatically as it learns your preferences, or add one manually.", "memoryContentPlaceholder": "What should the assistant remember? e.g. \"Prefer TypeScript with 2-space indentation.\"", + "memoryTargetPathPlaceholder": "Absolute path to the workspace or project directory", "memoryTagsPlaceholder": "Tags, comma-separated", "memoryConfidence": "Confidence", "memoryProject": "Project", diff --git a/src/renderer/src/locales/zh/settings.json b/src/renderer/src/locales/zh/settings.json index 00cfdbe8f..d1e0cd08a 100644 --- a/src/renderer/src/locales/zh/settings.json +++ b/src/renderer/src/locales/zh/settings.json @@ -1231,6 +1231,7 @@ "memoryCancel": "取消", "memoryEmpty": "暂无记忆记录。助手会在了解你的偏好后自动创建,也可以手动添加。", "memoryContentPlaceholder": "想让助手记住什么?例如:「偏好 TypeScript,2 空格缩进」", + "memoryTargetPathPlaceholder": "工作区或项目目录的绝对路径", "memoryTagsPlaceholder": "标签,逗号分隔", "memoryConfidence": "置信度", "memoryProject": "所属项目", From a47c78bdbd4819e68dc2fb078342242ee86e5f98 Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 20:26:12 +0800 Subject: [PATCH 05/18] feat(settings): add configurable conversation text width Let users adjust chat message and composer column width from General settings via a persisted CSS variable. Co-authored-by: Cursor --- src/main/claw-runtime.test.ts | 1 + src/main/claw-schedule-mcp-config.test.ts | 1 + src/main/claw-scheduled-task-detector.test.ts | 1 + src/main/ipc/app-ipc-schemas.ts | 4 +- .../ipc/register-app-ipc-handlers.test.ts | 1 + src/main/kun-process.test.ts | 1 + src/main/kun-regression.test.ts | 1 + src/main/runtime/kun-adapter.test.ts | 1 + src/main/runtime/managed-runtime-idle.test.ts | 1 + src/main/schedule-runtime.test.ts | 1 + src/main/services/skill-service.test.ts | 1 + .../write-inline-completion-service.test.ts | 1 + src/main/settings-store.ts | 2 + src/main/upstream-models.test.ts | 1 + src/main/workflow-runtime.nodes.test.ts | 1 + src/main/workflow-runtime.run.test.ts | 1 + src/renderer/src/agent/kun-runtime.test.ts | 1 + src/renderer/src/agent/runtime-client.test.ts | 1 + src/renderer/src/components/SettingsView.tsx | 7 ++- .../src/components/chat/ChatStarterGrid.tsx | 2 +- .../src/components/chat/FloatingComposer.tsx | 2 +- .../chat/InitialSessionUsageHeatmap.tsx | 2 +- .../src/components/chat/MessageTimeline.tsx | 2 +- .../chat/message-timeline-empty.tsx | 4 +- .../components/settings-section-claw.test.ts | 1 + .../settings-section-general.test.ts | 1 + .../components/settings-section-general.tsx | 63 +++++++++++++++++++ src/renderer/src/components/settings-utils.ts | 3 + src/renderer/src/lib/apply-theme.ts | 9 ++- .../src/lib/claw-model-options.test.ts | 1 + .../src/lib/settings-home-paths.test.ts | 1 + src/renderer/src/locales/en/settings.json | 6 ++ src/renderer/src/locales/zh/settings.json | 6 ++ .../src/store/chat-store-app-actions.test.ts | 1 + .../src/store/chat-store-app-actions.ts | 3 + .../chat-store-navigation-actions.test.ts | 1 + .../store/chat-store-navigation-actions.ts | 2 + .../store/chat-store-thread-actions.test.ts | 1 + src/renderer/src/store/chat-store.ts | 2 + src/renderer/src/styles/base-shell.css | 6 +- src/shared/app-settings-normalize.ts | 2 + src/shared/app-settings-provider.test.ts | 1 + src/shared/app-settings-types.ts | 14 +++++ src/shared/app-settings.test.ts | 16 +++++ 44 files changed, 170 insertions(+), 11 deletions(-) diff --git a/src/main/claw-runtime.test.ts b/src/main/claw-runtime.test.ts index d543d54a2..7a1932563 100644 --- a/src/main/claw-runtime.test.ts +++ b/src/main/claw-runtime.test.ts @@ -25,6 +25,7 @@ function buildSettings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/claw-schedule-mcp-config.test.ts b/src/main/claw-schedule-mcp-config.test.ts index dc0bbe9eb..d65d469a7 100644 --- a/src/main/claw-schedule-mcp-config.test.ts +++ b/src/main/claw-schedule-mcp-config.test.ts @@ -34,6 +34,7 @@ function createSettings(patch: Partial = locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/claw-scheduled-task-detector.test.ts b/src/main/claw-scheduled-task-detector.test.ts index 82b717694..0d37b585d 100644 --- a/src/main/claw-scheduled-task-detector.test.ts +++ b/src/main/claw-scheduled-task-detector.test.ts @@ -28,6 +28,7 @@ function settings(endpointFormat: ModelEndpointFormat): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider, agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/ipc/app-ipc-schemas.ts b/src/main/ipc/app-ipc-schemas.ts index 756af1af5..8945ca890 100644 --- a/src/main/ipc/app-ipc-schemas.ts +++ b/src/main/ipc/app-ipc-schemas.ts @@ -46,7 +46,7 @@ import { } from '../../shared/app-settings' import { DESKTOP_COMMANDS } from '../../shared/kun-gui-api' import { GUI_UPDATE_CHANNELS } from '../../shared/gui-update' -import { WINDOW_CLOSE_ACTIONS, UI_FONT_SCALE_MIN, UI_FONT_SCALE_MAX } from '../../shared/app-settings' +import { WINDOW_CLOSE_ACTIONS, CHAT_CONTENT_MAX_WIDTH_MIN, CHAT_CONTENT_MAX_WIDTH_MAX, UI_FONT_SCALE_MIN, UI_FONT_SCALE_MAX } from '../../shared/app-settings' import { KEYBOARD_SHORTCUT_COMMANDS } from '../../shared/keyboard-shortcuts' import { WRITE_EXPORT_FORMATS } from '../../shared/write-export' import { WRITE_INFOGRAPHIC_MAX_TEXT_CHARS } from '../../shared/write-infographic' @@ -211,6 +211,7 @@ const uiFontScaleSchema = z.union([ z.number().min(UI_FONT_SCALE_MIN).max(UI_FONT_SCALE_MAX), z.enum(['small', 'medium', 'large']) ]) +const chatContentMaxWidthSchema = z.number().min(CHAT_CONTENT_MAX_WIDTH_MIN).max(CHAT_CONTENT_MAX_WIDTH_MAX) const hexColorSchema = z.string().trim().regex(/^#[0-9a-fA-F]{6}$/) const approvalPolicySchema = z.enum(['always', 'on-request', 'untrusted', 'never', 'auto', 'suggest']) const sandboxModeSchema = z.enum(['read-only', 'workspace-write', 'danger-full-access', 'external-sandbox']) @@ -1347,6 +1348,7 @@ const settingsPatchObjectSchema = z.object({ locale: localeSchema.optional(), theme: themeSchema.optional(), uiFontScale: uiFontScaleSchema.optional(), + chatContentMaxWidthPx: chatContentMaxWidthSchema.optional(), cursorSpotlight: z.boolean().optional(), cursorSpotlightColor: hexColorSchema.optional(), provider: modelProviderPatchSchema.optional(), diff --git a/src/main/ipc/register-app-ipc-handlers.test.ts b/src/main/ipc/register-app-ipc-handlers.test.ts index bc6ae84bb..73128b2e5 100644 --- a/src/main/ipc/register-app-ipc-handlers.test.ts +++ b/src/main/ipc/register-app-ipc-handlers.test.ts @@ -37,6 +37,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/kun-process.test.ts b/src/main/kun-process.test.ts index 079d3509f..b902c4b45 100644 --- a/src/main/kun-process.test.ts +++ b/src/main/kun-process.test.ts @@ -34,6 +34,7 @@ function createSettings(binaryPath: string): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/main/kun-regression.test.ts b/src/main/kun-regression.test.ts index b3344d56c..764247151 100644 --- a/src/main/kun-regression.test.ts +++ b/src/main/kun-regression.test.ts @@ -113,6 +113,7 @@ describe('Kun single-agent regression', () => { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings(19000) diff --git a/src/main/runtime/kun-adapter.test.ts b/src/main/runtime/kun-adapter.test.ts index ccc5b6a89..f61a70a36 100644 --- a/src/main/runtime/kun-adapter.test.ts +++ b/src/main/runtime/kun-adapter.test.ts @@ -22,6 +22,7 @@ function settingsForPort(port: number): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/main/runtime/managed-runtime-idle.test.ts b/src/main/runtime/managed-runtime-idle.test.ts index 9d6852413..ba3fd9a77 100644 --- a/src/main/runtime/managed-runtime-idle.test.ts +++ b/src/main/runtime/managed-runtime-idle.test.ts @@ -21,6 +21,7 @@ const settings: AppSettingsV1 = { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', diff --git a/src/main/schedule-runtime.test.ts b/src/main/schedule-runtime.test.ts index 75dbcc131..180b86cab 100644 --- a/src/main/schedule-runtime.test.ts +++ b/src/main/schedule-runtime.test.ts @@ -84,6 +84,7 @@ function settingsWith( locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/main/services/skill-service.test.ts b/src/main/services/skill-service.test.ts index e076578de..767f840f1 100644 --- a/src/main/services/skill-service.test.ts +++ b/src/main/services/skill-service.test.ts @@ -290,6 +290,7 @@ describe('skill-service', () => { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot, diff --git a/src/main/services/write-inline-completion-service.test.ts b/src/main/services/write-inline-completion-service.test.ts index 3bf961c58..224c34f5b 100644 --- a/src/main/services/write-inline-completion-service.test.ts +++ b/src/main/services/write-inline-completion-service.test.ts @@ -30,6 +30,7 @@ function createSettings(patch: Partial ({ locale: 'en', theme: 'system', uiFontScale: DEFAULT_UI_FONT_SCALE, + chatContentMaxWidthPx: DEFAULT_CHAT_CONTENT_MAX_WIDTH_PX, cursorSpotlight: true, cursorSpotlightColor: DEFAULT_CURSOR_SPOTLIGHT_COLOR, provider: defaultModelProviderSettings(), diff --git a/src/main/upstream-models.test.ts b/src/main/upstream-models.test.ts index 77da1dba9..c6764c0ab 100644 --- a/src/main/upstream-models.test.ts +++ b/src/main/upstream-models.test.ts @@ -23,6 +23,7 @@ function settings(dataDir: string, model = 'settings-model'): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: { ...provider, providers: [ diff --git a/src/main/workflow-runtime.nodes.test.ts b/src/main/workflow-runtime.nodes.test.ts index 9c6aa2a88..9cb4b06d1 100644 --- a/src/main/workflow-runtime.nodes.test.ts +++ b/src/main/workflow-runtime.nodes.test.ts @@ -106,6 +106,7 @@ function buildSettings( locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { ...defaultKunRuntimeSettings(), model: 'test-model', apiKey: 'test-key' } }, workspaceRoot: '/tmp/workflow-workspace', diff --git a/src/main/workflow-runtime.run.test.ts b/src/main/workflow-runtime.run.test.ts index 665f5459f..3188206a6 100644 --- a/src/main/workflow-runtime.run.test.ts +++ b/src/main/workflow-runtime.run.test.ts @@ -47,6 +47,7 @@ function settingsWithWorkflows(workflows: WorkflowV1[], modules: WorkflowCustomM locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { ...defaultKunRuntimeSettings(), model: 'test-model', apiKey: 'test-key' } }, workspaceRoot: '/tmp/workflow-workspace', diff --git a/src/renderer/src/agent/kun-runtime.test.ts b/src/renderer/src/agent/kun-runtime.test.ts index 9b44387c2..e07fbf946 100644 --- a/src/renderer/src/agent/kun-runtime.test.ts +++ b/src/renderer/src/agent/kun-runtime.test.ts @@ -21,6 +21,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/renderer/src/agent/runtime-client.test.ts b/src/renderer/src/agent/runtime-client.test.ts index 7980674c9..96c9404b0 100644 --- a/src/renderer/src/agent/runtime-client.test.ts +++ b/src/renderer/src/agent/runtime-client.test.ts @@ -18,6 +18,7 @@ function settings(apiKey: string): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/renderer/src/components/SettingsView.tsx b/src/renderer/src/components/SettingsView.tsx index 03c3c6c1a..bb783f3fc 100644 --- a/src/renderer/src/components/SettingsView.tsx +++ b/src/renderer/src/components/SettingsView.tsx @@ -25,6 +25,7 @@ import type { } from '../agent/kun-contract' import type { WriteInlineCompletionDebugEntry } from '@shared/write-inline-completion' import { + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyTheme, @@ -151,6 +152,7 @@ export function SettingsView(): ReactElement { const permissionsSectionRef = useRef(null) const formTheme = form?.theme const formUiFontScale = form?.uiFontScale + const formChatContentMaxWidthPx = form?.chatContentMaxWidthPx const writeTypography = form?.write?.typography const formWorkspaceRoot = form?.workspaceRoot const formKun = form ? getKunRuntimeSettings(form) : null @@ -207,10 +209,11 @@ export function SettingsView(): ReactElement { }, []) useEffect(() => { - if (!formTheme || !formUiFontScale) return + if (!formTheme || formUiFontScale == null || formChatContentMaxWidthPx == null) return applyTheme(formTheme) applyUiFontScale(formUiFontScale) - }, [formTheme, formUiFontScale]) + applyChatContentMaxWidth(formChatContentMaxWidthPx) + }, [formTheme, formUiFontScale, formChatContentMaxWidthPx]) useEffect(() => { if (typeof formCursorSpotlight === 'boolean') { diff --git a/src/renderer/src/components/chat/ChatStarterGrid.tsx b/src/renderer/src/components/chat/ChatStarterGrid.tsx index 75c73c295..4ece60ba1 100644 --- a/src/renderer/src/components/chat/ChatStarterGrid.tsx +++ b/src/renderer/src/components/chat/ChatStarterGrid.tsx @@ -49,7 +49,7 @@ export function ChatStarterGrid({ }): ReactElement { const { t } = useTranslation('common') return ( -
+
{CHAT_STARTERS.map((starter) => (
} /> + +
+ + setChatContentMaxWidthPx(Number(e.target.value))} + /> + +
+ +
+ setChatContentMaxWidthPx(Number(e.target.value))} + /> + px +
+ +
+
+
+ } + /> void) | null = null @@ -49,6 +51,11 @@ export function applyUiFontScale(scale: UiFontScale): void { root.style.setProperty('--ds-ui-scale', String(normalizeUiFontScale(scale))) } +export function applyChatContentMaxWidth(widthPx: ChatContentMaxWidthPx): void { + const root = document.documentElement + root.style.setProperty('--ds-chat-content-max-width', `${normalizeChatContentMaxWidth(widthPx)}px`) +} + export function applyCursorSpotlight(enabled: boolean): void { document.documentElement.dataset.cursorSpotlight = enabled ? 'on' : 'off' } diff --git a/src/renderer/src/lib/claw-model-options.test.ts b/src/renderer/src/lib/claw-model-options.test.ts index 07eac8069..e3295aa35 100644 --- a/src/renderer/src/lib/claw-model-options.test.ts +++ b/src/renderer/src/lib/claw-model-options.test.ts @@ -26,6 +26,7 @@ function buildSettings(models: string[]): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.88, + chatContentMaxWidthPx: 896, provider, agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', diff --git a/src/renderer/src/lib/settings-home-paths.test.ts b/src/renderer/src/lib/settings-home-paths.test.ts index 240aad019..69c2707c6 100644 --- a/src/renderer/src/lib/settings-home-paths.test.ts +++ b/src/renderer/src/lib/settings-home-paths.test.ts @@ -93,6 +93,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, cursorSpotlight: true, provider: defaultModelProviderSettings(), agents: { diff --git a/src/renderer/src/locales/en/settings.json b/src/renderer/src/locales/en/settings.json index 52717ceec..830326619 100644 --- a/src/renderer/src/locales/en/settings.json +++ b/src/renderer/src/locales/en/settings.json @@ -188,6 +188,12 @@ "fontScaleMedium": "Medium", "fontScaleLarge": "Large", "fontScaleCurrent": "Current: {{value}}", + "chatContentMaxWidth": "Conversation text width", + "chatContentMaxWidthDesc": "Adjust how wide chat messages and the composer are displayed.", + "chatContentMaxWidthNarrow": "Narrow", + "chatContentMaxWidthWide": "Wide", + "chatContentMaxWidthDecrease": "Decrease conversation text width", + "chatContentMaxWidthIncrease": "Increase conversation text width", "cursorSpotlight": "Interactive effects", "cursorSpotlightDesc": "Show a soft cursor-follow spotlight on the title bar and sidebar.", "cursorSpotlightColor": "Interaction effect color", diff --git a/src/renderer/src/locales/zh/settings.json b/src/renderer/src/locales/zh/settings.json index 00cfdbe8f..02d4437dc 100644 --- a/src/renderer/src/locales/zh/settings.json +++ b/src/renderer/src/locales/zh/settings.json @@ -188,6 +188,12 @@ "fontScaleMedium": "中", "fontScaleLarge": "大", "fontScaleCurrent": "当前:{{value}}", + "chatContentMaxWidth": "对话文字宽度", + "chatContentMaxWidthDesc": "调整对话消息与输入框的正文显示宽度。", + "chatContentMaxWidthNarrow": "窄", + "chatContentMaxWidthWide": "宽", + "chatContentMaxWidthDecrease": "减小对话文字宽度", + "chatContentMaxWidthIncrease": "增大对话文字宽度", "cursorSpotlight": "交互特效", "cursorSpotlightDesc": "在置顶栏和侧边栏显示跟随鼠标的柔和高光。", "cursorSpotlightColor": "交互特效颜色", diff --git a/src/renderer/src/store/chat-store-app-actions.test.ts b/src/renderer/src/store/chat-store-app-actions.test.ts index a4a21aadb..2eaf89db4 100644 --- a/src/renderer/src/store/chat-store-app-actions.test.ts +++ b/src/renderer/src/store/chat-store-app-actions.test.ts @@ -84,6 +84,7 @@ function buildHarness(fetchModelsResult: FetchModelsResult): { }, applyTheme: () => undefined, applyUiFontScale: () => undefined, + applyChatContentMaxWidth: () => undefined, applyCursorSpotlight: () => undefined, applyCursorSpotlightColor: () => undefined, applyWriteTypography: () => undefined, diff --git a/src/renderer/src/store/chat-store-app-actions.ts b/src/renderer/src/store/chat-store-app-actions.ts index 8f61fcbb9..7829fd91f 100644 --- a/src/renderer/src/store/chat-store-app-actions.ts +++ b/src/renderer/src/store/chat-store-app-actions.ts @@ -33,6 +33,7 @@ type CreateAppActionsOptions = { setComposerModelLoadPromise: (promise: Promise | null) => void applyTheme: (theme: AppSettingsV1['theme']) => void applyUiFontScale: (scale: AppSettingsV1['uiFontScale']) => void + applyChatContentMaxWidth: (widthPx: AppSettingsV1['chatContentMaxWidthPx']) => void applyCursorSpotlight: (enabled: boolean) => void applyCursorSpotlightColor: (color: AppSettingsV1['cursorSpotlightColor']) => void applyWriteTypography: (typography: AppSettingsV1['write']['typography']) => void @@ -75,6 +76,7 @@ export function createAppActions(options: CreateAppActionsOptions): Pick< setComposerModelLoadPromise, applyTheme, applyUiFontScale, + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyWriteTypography, @@ -239,6 +241,7 @@ export function createAppActions(options: CreateAppActionsOptions): Pick< const workspaceRoot = normalizeWorkspaceRoot(settings.workspaceRoot) applyTheme(settings.theme) applyUiFontScale(settings.uiFontScale) + applyChatContentMaxWidth(settings.chatContentMaxWidthPx) applyCursorSpotlight(settings.cursorSpotlight !== false) applyCursorSpotlightColor(settings.cursorSpotlightColor) if (settings.write?.typography) applyWriteTypography(settings.write.typography) diff --git a/src/renderer/src/store/chat-store-navigation-actions.test.ts b/src/renderer/src/store/chat-store-navigation-actions.test.ts index 430b98d6a..7713ec613 100644 --- a/src/renderer/src/store/chat-store-navigation-actions.test.ts +++ b/src/renderer/src/store/chat-store-navigation-actions.test.ts @@ -230,6 +230,7 @@ describe('onClawChannelActivity routes through subscribeThreadEventsLive (not se }, theme: 'dark', uiFontScale: 1, + chatContentMaxWidthPx: 896, locale: 'en', agents: { kun: { apiKey: 'test-key', model: 'deepseek-v4-pro', baseUrl: '' } }, disabledSkillIds: [] diff --git a/src/renderer/src/store/chat-store-navigation-actions.ts b/src/renderer/src/store/chat-store-navigation-actions.ts index 12df5a486..24f283701 100644 --- a/src/renderer/src/store/chat-store-navigation-actions.ts +++ b/src/renderer/src/store/chat-store-navigation-actions.ts @@ -3,6 +3,7 @@ import { getProvider } from '../agent/registry' import { rendererRuntimeClient } from '../agent/runtime-client' import i18n from '../i18n' import { + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyTheme, @@ -376,6 +377,7 @@ export function createNavigationActions( const needsInitialSetup = !getActiveAgentApiKey(settings).trim() applyTheme(settings.theme) applyUiFontScale(settings.uiFontScale) + applyChatContentMaxWidth(settings.chatContentMaxWidthPx) applyCursorSpotlight(settings.cursorSpotlight !== false) applyCursorSpotlightColor(settings.cursorSpotlightColor) if (settings.write?.typography) applyWriteTypography(settings.write.typography) diff --git a/src/renderer/src/store/chat-store-thread-actions.test.ts b/src/renderer/src/store/chat-store-thread-actions.test.ts index b4527ffa9..2c873f355 100644 --- a/src/renderer/src/store/chat-store-thread-actions.test.ts +++ b/src/renderer/src/store/chat-store-thread-actions.test.ts @@ -475,6 +475,7 @@ describe('chat-store-thread-actions createThread conversation mode', () => { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: { providers: [], apiKey: '', baseUrl: '', proxy: { enabled: false } }, agents: { kun: { model: 'deepseek-v4-pro', apiKey: 'k', baseUrl: '' } }, workspaceRoot: '/tmp/workspace', diff --git a/src/renderer/src/store/chat-store.ts b/src/renderer/src/store/chat-store.ts index 6ca9589e4..4bf30978c 100644 --- a/src/renderer/src/store/chat-store.ts +++ b/src/renderer/src/store/chat-store.ts @@ -4,6 +4,7 @@ import { getProvider } from '../agent/registry' import { rendererRuntimeClient } from '../agent/runtime-client' import i18n from '../i18n' import { + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyDocumentLocale, @@ -162,6 +163,7 @@ export const useChatStore = create((set, get) => ({ }, applyTheme, applyUiFontScale, + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyWriteTypography, diff --git a/src/renderer/src/styles/base-shell.css b/src/renderer/src/styles/base-shell.css index 37e00fd91..7f4262245 100644 --- a/src/renderer/src/styles/base-shell.css +++ b/src/renderer/src/styles/base-shell.css @@ -2849,7 +2849,11 @@ pre { } .ds-chat-column-inset { - padding-inline: clamp(0.75rem, calc((100% - 48rem) / 2 + 1rem), 2rem); + padding-inline: clamp(0.75rem, calc((100% - var(--ds-chat-content-max-width, 56rem)) / 2 + 1rem), 2rem); +} + +.ds-chat-content-max-width { + max-width: var(--ds-chat-content-max-width, 56rem); } .ds-chat-stage { diff --git a/src/shared/app-settings-normalize.ts b/src/shared/app-settings-normalize.ts index f98773a00..99f6f55ca 100644 --- a/src/shared/app-settings-normalize.ts +++ b/src/shared/app-settings-normalize.ts @@ -6,6 +6,7 @@ import { DEFAULT_CURSOR_SPOTLIGHT_COLOR, DEFAULT_LOG_RETENTION_DAYS, normalizeGuiUpdateChannel, + normalizeChatContentMaxWidth, normalizeUiFontScale, type AppBehaviorConfigV1, type AppSettingsV1, @@ -78,6 +79,7 @@ export function normalizeAppSettings(settings: AppSettingsV1): AppSettingsV1 { ? maybeSettings.theme : 'system', uiFontScale: normalizeUiFontScale(maybeSettings.uiFontScale), + chatContentMaxWidthPx: normalizeChatContentMaxWidth(maybeSettings.chatContentMaxWidthPx), cursorSpotlight: maybeSettings.cursorSpotlight !== false, cursorSpotlightColor: normalizeCursorSpotlightColor(maybeSettings.cursorSpotlightColor), provider: providerSettings, diff --git a/src/shared/app-settings-provider.test.ts b/src/shared/app-settings-provider.test.ts index 5f9a92223..3519d34bf 100644 --- a/src/shared/app-settings-provider.test.ts +++ b/src/shared/app-settings-provider.test.ts @@ -44,6 +44,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: { ...defaultModelProviderSettings(), providers: [ diff --git a/src/shared/app-settings-types.ts b/src/shared/app-settings-types.ts index f61164b86..07b18d2a4 100644 --- a/src/shared/app-settings-types.ts +++ b/src/shared/app-settings-types.ts @@ -43,6 +43,19 @@ export function normalizeUiFontScale(value: unknown): UiFontScale { if (!Number.isFinite(num)) return DEFAULT_UI_FONT_SCALE return Math.min(UI_FONT_SCALE_MAX, Math.max(UI_FONT_SCALE_MIN, Math.round(num * 100) / 100)) } +/** Max width of the main chat message column, in CSS pixels. */ +export type ChatContentMaxWidthPx = number +export const CHAT_CONTENT_MAX_WIDTH_MIN = 640 +export const CHAT_CONTENT_MAX_WIDTH_MAX = 1200 +export const DEFAULT_CHAT_CONTENT_MAX_WIDTH_PX = 896 +export function normalizeChatContentMaxWidth(value: unknown): ChatContentMaxWidthPx { + const num = typeof value === 'number' ? value : Number(value) + if (!Number.isFinite(num)) return DEFAULT_CHAT_CONTENT_MAX_WIDTH_PX + return Math.min( + CHAT_CONTENT_MAX_WIDTH_MAX, + Math.max(CHAT_CONTENT_MAX_WIDTH_MIN, Math.round(num / 8) * 8) + ) +} export type ScheduleRunMode = 'agent' | 'plan' export type ScheduleKind = 'manual' | 'interval' | 'daily' | 'at' export type ScheduleTaskStatus = 'idle' | 'queued' | 'running' | 'success' | 'error' @@ -1748,6 +1761,7 @@ export type AppSettingsV1 = { locale: 'en' | 'zh' theme: 'system' | 'light' | 'dark' uiFontScale: UiFontScale + chatContentMaxWidthPx: ChatContentMaxWidthPx cursorSpotlight?: boolean cursorSpotlightColor?: string provider: ModelProviderSettingsV1 diff --git a/src/shared/app-settings.test.ts b/src/shared/app-settings.test.ts index 2b7478b46..569d4177c 100644 --- a/src/shared/app-settings.test.ts +++ b/src/shared/app-settings.test.ts @@ -32,6 +32,7 @@ import { isKunRuntimeInsecure, migrateLegacyAppSettings, normalizeAppSettings, + normalizeChatContentMaxWidth, parseClawUserPromptForDisplay, inferModelEndpointFormatFromUrl, kunToolPermissionModeFromSettings, @@ -52,6 +53,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() @@ -74,6 +76,20 @@ function settings(): AppSettingsV1 { } } +describe('chat content max width', () => { + it('defaults invalid values to 896px', () => { + expect(normalizeChatContentMaxWidth(undefined)).toBe(896) + expect(normalizeChatContentMaxWidth('bad')).toBe(896) + }) + + it('clamps and rounds to 8px steps', () => { + expect(normalizeChatContentMaxWidth(500)).toBe(640) + expect(normalizeChatContentMaxWidth(896)).toBe(896) + expect(normalizeChatContentMaxWidth(1300)).toBe(1200) + expect(normalizeChatContentMaxWidth(905)).toBe(904) + }) +}) + describe('model endpoint format inference', () => { it('treats /completions custom endpoints as Chat Completions-shaped', () => { expect(inferModelEndpointFormatFromUrl('https://api.example.com/custom/completions')).toBe('chat_completions') From 88c11a99d27a9cd5002044ca6a85aa15d2d70bba Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 20:41:07 +0800 Subject: [PATCH 06/18] feat(chat): show memory summary on injected memory chip hover Persist injected memory previews on turns and surface them in timeline tooltips so users can inspect what context was applied without opening settings. Co-authored-by: Cursor --- .../adapters/hybrid/hybrid-thread-store.ts | 5 ++ kun/src/contracts/turns.ts | 7 ++ kun/src/domain/turn.ts | 1 + kun/src/loop/agent-loop.ts | 5 ++ kun/src/services/thread-service.ts | 2 + kun/src/services/turn-service.ts | 4 + kun/src/shared/memory-preview.ts | 5 ++ src/renderer/src/agent/kun-contract.ts | 2 + src/renderer/src/agent/kun-mapper.ts | 18 ++++ src/renderer/src/agent/kun-runtime.ts | 1 + src/renderer/src/agent/types.ts | 1 + .../src/components/chat/MessageTimeline.tsx | 3 + .../chat/injected-memory-lookup.test.ts | 29 +++++++ .../chat/injected-memory-lookup.tsx | 86 +++++++++++++++++++ .../chat/injected-memory-meta-chip.tsx | 84 ++++++++++++++++++ .../chat/message-timeline-bubbles.tsx | 5 +- .../chat/message-timeline-process.tsx | 5 +- src/renderer/src/lib/memory-preview.ts | 5 ++ 18 files changed, 262 insertions(+), 6 deletions(-) create mode 100644 kun/src/shared/memory-preview.ts create mode 100644 src/renderer/src/components/chat/injected-memory-lookup.test.ts create mode 100644 src/renderer/src/components/chat/injected-memory-lookup.tsx create mode 100644 src/renderer/src/components/chat/injected-memory-meta-chip.tsx create mode 100644 src/renderer/src/lib/memory-preview.ts diff --git a/kun/src/adapters/hybrid/hybrid-thread-store.ts b/kun/src/adapters/hybrid/hybrid-thread-store.ts index e9578db0a..4fc5873e9 100644 --- a/kun/src/adapters/hybrid/hybrid-thread-store.ts +++ b/kun/src/adapters/hybrid/hybrid-thread-store.ts @@ -932,6 +932,10 @@ function mergeTurnMetadata(previous: Turn, next: Turn): Turn { attachmentIds: mergeStringArrays(previous.attachmentIds, next.attachmentIds), activeSkillIds: mergeStringArrays(previous.activeSkillIds, next.activeSkillIds), injectedMemoryIds: mergeStringArrays(previous.injectedMemoryIds, next.injectedMemoryIds), + injectedMemorySummaries: + next.injectedMemorySummaries.length > 0 + ? next.injectedMemorySummaries + : previous.injectedMemorySummaries, items: mergeTurnItems(previous.items, next.items) } } @@ -971,6 +975,7 @@ function turnFromItems(threadId: string, turnId: string, items: TurnItem[], fall attachmentIds: attachmentIdsFromItems(items), activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], createdAt, finishedAt: hasOpenItem ? undefined : items[items.length - 1]?.finishedAt ?? fallbackTime, items diff --git a/kun/src/contracts/turns.ts b/kun/src/contracts/turns.ts index c908fb4c6..7e68d43e6 100644 --- a/kun/src/contracts/turns.ts +++ b/kun/src/contracts/turns.ts @@ -50,6 +50,12 @@ export const TurnStatus = z.enum([ ]) export type TurnStatus = z.infer +export const InjectedMemorySummarySchema = z.object({ + id: z.string().min(1), + content: z.string() +}) +export type InjectedMemorySummary = z.infer + export const TurnSchema = z.object({ id: z.string().min(1), threadId: z.string().min(1), @@ -66,6 +72,7 @@ export const TurnSchema = z.object({ attachmentIds: z.array(z.string().min(1)).default([]), activeSkillIds: z.array(z.string().min(1)).default([]), injectedMemoryIds: z.array(z.string().min(1)).default([]), + injectedMemorySummaries: z.array(InjectedMemorySummarySchema).default([]), skillInjectionBytes: z.number().int().nonnegative().optional(), workspaceCheckpointId: z.string().min(1).optional(), toolCatalogFingerprint: z.string().optional(), diff --git a/kun/src/domain/turn.ts b/kun/src/domain/turn.ts index 0560bb87e..88dbec125 100644 --- a/kun/src/domain/turn.ts +++ b/kun/src/domain/turn.ts @@ -30,6 +30,7 @@ export function createTurnRecord(input: { attachmentIds: [...(input.attachmentIds ?? [])], activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], ...(model ? { model } : {}), ...(reasoningEffort ? { reasoningEffort } : {}), ...(input.guiPlan ? { guiPlan: input.guiPlan } : {}), diff --git a/kun/src/loop/agent-loop.ts b/kun/src/loop/agent-loop.ts index acd38a82b..8fb2ddbaa 100644 --- a/kun/src/loop/agent-loop.ts +++ b/kun/src/loop/agent-loop.ts @@ -54,6 +54,7 @@ import { makeErrorItem } from '../domain/item.js' import { touchThread } from '../domain/thread.js' +import { memoryPreview } from '../shared/memory-preview.js' import { repairModelHistoryItems } from '../domain/model-history-repair.js' import type { TurnItem } from '../contracts/items.js' import type { ThreadGoal, ThreadTodoList } from '../contracts/threads.js' @@ -1509,6 +1510,10 @@ export class AgentLoop { activeSkillIds: skillResolution.activeSkillIds, skillInjectionBytes: skillResolution.injectedBytes, injectedMemoryIds: memories.map((memory) => memory.id), + injectedMemorySummaries: memories.map((memory) => ({ + id: memory.id, + content: memoryPreview(memory.content) + })), toolCatalogFingerprint: toolCatalog.fingerprint, toolCatalogToolCount: toolCatalog.toolCount, toolCatalogDrift: toolCatalogDrift.kind !== 'none' diff --git a/kun/src/services/thread-service.ts b/kun/src/services/thread-service.ts index 796c50620..245282936 100644 --- a/kun/src/services/thread-service.ts +++ b/kun/src/services/thread-service.ts @@ -749,6 +749,7 @@ function rebuildTurnsFromItems(input: { attachmentIds: [], activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], createdAt: input.now, finishedAt: input.now, items: [] @@ -767,6 +768,7 @@ function rebuildTurnsFromItems(input: { attachmentIds: attachmentIdsFromItems(items), activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], createdAt: items[0]?.createdAt ?? input.now, finishedAt: input.now, items diff --git a/kun/src/services/turn-service.ts b/kun/src/services/turn-service.ts index 8d9c1eed6..699dd1b57 100644 --- a/kun/src/services/turn-service.ts +++ b/kun/src/services/turn-service.ts @@ -446,6 +446,7 @@ export class TurnService { Partial, | 'activeSkillIds' | 'injectedMemoryIds' + | 'injectedMemorySummaries' | 'skillInjectionBytes' | 'toolCatalogFingerprint' | 'toolCatalogToolCount' @@ -460,6 +461,9 @@ export class TurnService { ...turn, ...(patch.activeSkillIds ? { activeSkillIds: [...patch.activeSkillIds] } : {}), ...(patch.injectedMemoryIds ? { injectedMemoryIds: [...patch.injectedMemoryIds] } : {}), + ...(patch.injectedMemorySummaries + ? { injectedMemorySummaries: [...patch.injectedMemorySummaries] } + : {}), ...(patch.skillInjectionBytes !== undefined ? { skillInjectionBytes: patch.skillInjectionBytes } : {}), ...(patch.toolCatalogFingerprint ? { toolCatalogFingerprint: patch.toolCatalogFingerprint } : {}), ...(patch.toolCatalogToolCount !== undefined ? { toolCatalogToolCount: patch.toolCatalogToolCount } : {}), diff --git a/kun/src/shared/memory-preview.ts b/kun/src/shared/memory-preview.ts new file mode 100644 index 000000000..aa5e866ad --- /dev/null +++ b/kun/src/shared/memory-preview.ts @@ -0,0 +1,5 @@ +export function memoryPreview(content: string, maxLength = 200): string { + const compact = content.replace(/\s+/g, ' ').trim() + if (compact.length <= maxLength) return compact + return `${compact.slice(0, maxLength).trimEnd()}...` +} diff --git a/src/renderer/src/agent/kun-contract.ts b/src/renderer/src/agent/kun-contract.ts index e6e36d183..54794c4f2 100644 --- a/src/renderer/src/agent/kun-contract.ts +++ b/src/renderer/src/agent/kun-contract.ts @@ -362,6 +362,7 @@ export type CoreTurnJson = { attachmentIds?: string[] activeSkillIds?: string[] injectedMemoryIds?: string[] + injectedMemorySummaries?: Array<{ id: string; content: string }> skillInjectionBytes?: number workspaceCheckpointId?: string error?: string @@ -409,6 +410,7 @@ export type CoreTurnItemJson = { workspaceCheckpointId?: string activeSkillIds?: string[] injectedMemoryIds?: string[] + injectedMemorySummaries?: Array<{ id: string; content: string }> skillInjectionBytes?: number target?: CoreReviewTargetJson title?: string diff --git a/src/renderer/src/agent/kun-mapper.ts b/src/renderer/src/agent/kun-mapper.ts index a6f321e5f..1f4a477f5 100644 --- a/src/renderer/src/agent/kun-mapper.ts +++ b/src/renderer/src/agent/kun-mapper.ts @@ -278,6 +278,22 @@ function normalizeUserFileReferences(value: unknown): Array<{ return references.length > 0 ? references : undefined } +function normalizeInjectedMemorySummaries( + value: unknown +): Array<{ id: string; content: string }> | undefined { + if (!Array.isArray(value)) return undefined + const summaries = value + .map((entry) => { + if (!entry || typeof entry !== 'object') return null + const raw = entry as Record + const id = typeof raw.id === 'string' && raw.id.trim() ? raw.id.trim() : '' + const content = typeof raw.content === 'string' && raw.content.trim() ? raw.content.trim() : '' + return id && content ? { id, content } : null + }) + .filter((entry): entry is { id: string; content: string } => entry !== null) + return summaries.length > 0 ? summaries : undefined +} + function applyRuntimeDisclosureMeta( meta: Record, item: CoreTurnItemJson, @@ -290,6 +306,7 @@ function applyRuntimeDisclosureMeta( const attachmentIds = stringArray(item.attachmentIds) const activeSkillIds = stringArray(item.activeSkillIds) const injectedMemoryIds = stringArray(item.injectedMemoryIds) + const injectedMemorySummaries = normalizeInjectedMemorySummaries(item.injectedMemorySummaries) const fileReferences = normalizeUserFileReferences(item.fileReferences) const normalizedChild = normalizeChildMetadata(child) const displayText = typeof item.displayText === 'string' ? item.displayText.trim() : '' @@ -300,6 +317,7 @@ function applyRuntimeDisclosureMeta( if (fileReferences) meta.fileReferences = fileReferences if (activeSkillIds) meta.activeSkillIds = activeSkillIds if (injectedMemoryIds) meta.injectedMemoryIds = injectedMemoryIds + if (injectedMemorySummaries) meta.injectedMemorySummaries = injectedMemorySummaries if (typeof item.skillInjectionBytes === 'number') { meta.skillInjectionBytes = item.skillInjectionBytes } diff --git a/src/renderer/src/agent/kun-runtime.ts b/src/renderer/src/agent/kun-runtime.ts index 79cba80c7..9df1b1f52 100644 --- a/src/renderer/src/agent/kun-runtime.ts +++ b/src/renderer/src/agent/kun-runtime.ts @@ -217,6 +217,7 @@ export class KunRuntimeProvider implements AgentProvider { attachmentIds: turn.attachmentIds, activeSkillIds: turn.activeSkillIds, injectedMemoryIds: turn.injectedMemoryIds, + injectedMemorySummaries: turn.injectedMemorySummaries, skillInjectionBytes: turn.skillInjectionBytes, workspaceCheckpointId: item.workspaceCheckpointId ?? turn.workspaceCheckpointId })) diff --git a/src/renderer/src/agent/types.ts b/src/renderer/src/agent/types.ts index 7a6bbac5d..1051e5ee9 100644 --- a/src/renderer/src/agent/types.ts +++ b/src/renderer/src/agent/types.ts @@ -80,6 +80,7 @@ export type RuntimeDisclosureMetadata = { generatedFiles?: GeneratedFileReference[] activeSkillIds?: string[] injectedMemoryIds?: string[] + injectedMemorySummaries?: Array<{ id: string; content: string }> skillInjectionBytes?: number child?: RuntimeChildMetadata sources?: WebCitationSource[] diff --git a/src/renderer/src/components/chat/MessageTimeline.tsx b/src/renderer/src/components/chat/MessageTimeline.tsx index 402ae0847..3cfa1e96c 100644 --- a/src/renderer/src/components/chat/MessageTimeline.tsx +++ b/src/renderer/src/components/chat/MessageTimeline.tsx @@ -28,6 +28,7 @@ import { type Turn } from './message-timeline-turns' import { extractPlanMetadataFromBlock } from '../../plan/plan-tool' +import { InjectedMemoryLookupProvider } from './injected-memory-lookup' import { planDisplayNameFromRelativePath } from '../../plan/plan-path' export { summarizeToolBlock } from './message-timeline-process' @@ -255,6 +256,7 @@ export function MessageTimeline({ } return ( +
{visibleTurnAnchors.length > 2 ? (
+ ) } diff --git a/src/renderer/src/components/chat/injected-memory-lookup.test.ts b/src/renderer/src/components/chat/injected-memory-lookup.test.ts new file mode 100644 index 000000000..083fe9d2e --- /dev/null +++ b/src/renderer/src/components/chat/injected-memory-lookup.test.ts @@ -0,0 +1,29 @@ +import { describe, expect, it } from 'vitest' +import { resolveInjectedMemoryTooltipLines } from './injected-memory-lookup' + +describe('resolveInjectedMemoryTooltipLines', () => { + it('prefers turn metadata summaries over live lookup', () => { + const lines = resolveInjectedMemoryTooltipLines( + { + injectedMemorySummaries: [{ id: 'mem_1', content: 'User prefers dark mode.' }] + }, + ['mem_1'], + new Map([['mem_1', 'Stale lookup value']]) + ) + + expect(lines).toEqual(['User prefers dark mode.']) + }) + + it('falls back to live lookup and numbers multiple memories', () => { + const lines = resolveInjectedMemoryTooltipLines( + undefined, + ['mem_1', 'mem_2'], + new Map([ + ['mem_1', 'First memory'], + ['mem_2', 'Second memory'] + ]) + ) + + expect(lines).toEqual(['1. First memory', '2. Second memory']) + }) +}) diff --git a/src/renderer/src/components/chat/injected-memory-lookup.tsx b/src/renderer/src/components/chat/injected-memory-lookup.tsx new file mode 100644 index 000000000..ad1edc707 --- /dev/null +++ b/src/renderer/src/components/chat/injected-memory-lookup.tsx @@ -0,0 +1,86 @@ +import { createContext, useContext, useEffect, useMemo, useState, type ReactElement, type ReactNode } from 'react' +import { getProvider } from '../../agent/registry' +import { memoryPreview } from '../../lib/memory-preview' + +const InjectedMemoryLookupContext = createContext>(new Map()) + +export function InjectedMemoryLookupProvider({ + workspaceRoot, + children +}: { + workspaceRoot?: string + children: ReactNode +}): ReactElement { + const [lookup, setLookup] = useState>(() => new Map()) + + useEffect(() => { + const provider = getProvider() + if (typeof provider.listMemories !== 'function') { + setLookup(new Map()) + return + } + let cancelled = false + void provider + .listMemories({ workspace: workspaceRoot, includeDeleted: true }) + .then((records) => { + if (cancelled) return + setLookup(new Map(records.map((record) => [record.id, memoryPreview(record.content)]))) + }) + .catch(() => { + if (!cancelled) setLookup(new Map()) + }) + return () => { + cancelled = true + } + }, [workspaceRoot]) + + return ( + + {children} + + ) +} + +export function useInjectedMemoryLookup(): Map { + return useContext(InjectedMemoryLookupContext) +} + +export function metaInjectedMemorySummaries( + meta: Record | undefined +): Array<{ id: string; content: string }> { + const value = meta?.injectedMemorySummaries + if (!Array.isArray(value)) return [] + return value + .map((entry) => { + if (!entry || typeof entry !== 'object') return null + const raw = entry as Record + const id = typeof raw.id === 'string' && raw.id.trim() ? raw.id.trim() : '' + const content = typeof raw.content === 'string' && raw.content.trim() ? raw.content.trim() : '' + return id && content ? { id, content } : null + }) + .filter((entry): entry is { id: string; content: string } => entry !== null) +} + +export function resolveInjectedMemoryTooltipLines( + meta: Record | undefined, + memoryIds: string[], + lookup: Map +): string[] { + const summariesById = new Map(metaInjectedMemorySummaries(meta).map((entry) => [entry.id, entry.content])) + return memoryIds.map((id, index) => { + const content = summariesById.get(id) ?? lookup.get(id) + if (!content) return memoryIds.length > 1 ? `${index + 1}. ${id}` : id + return memoryIds.length > 1 ? `${index + 1}. ${content}` : content + }) +} + +export function useInjectedMemoryTooltipText( + meta: Record | undefined, + memoryIds: string[] +): string { + const lookup = useInjectedMemoryLookup() + return useMemo( + () => resolveInjectedMemoryTooltipLines(meta, memoryIds, lookup).join('\n\n'), + [lookup, memoryIds, meta] + ) +} diff --git a/src/renderer/src/components/chat/injected-memory-meta-chip.tsx b/src/renderer/src/components/chat/injected-memory-meta-chip.tsx new file mode 100644 index 000000000..2b1e73b66 --- /dev/null +++ b/src/renderer/src/components/chat/injected-memory-meta-chip.tsx @@ -0,0 +1,84 @@ +import type { ReactElement } from 'react' +import { createPortal } from 'react-dom' +import { useCallback, useRef, useState } from 'react' +import { useTranslation } from 'react-i18next' +import { useInjectedMemoryTooltipText } from './injected-memory-lookup' + +type TooltipState = { + text: string + x: number + y: number +} + +function chipTooltipPosition(clientX: number, anchorRect: DOMRect): { x: number; y: number } { + const maxWidth = Math.min(320, window.innerWidth - 24) + const x = Math.max(12, Math.min(clientX - maxWidth / 2, window.innerWidth - maxWidth - 12)) + const y = Math.max(12, anchorRect.top - 8) + return { x, y } +} + +export function InjectedMemoryMetaChip({ + meta, + memoryIds, + chipClass +}: { + meta?: Record + memoryIds: string[] + chipClass: string +}): ReactElement | null { + const { t } = useTranslation('common') + const anchorRef = useRef(null) + const [tooltip, setTooltip] = useState(null) + const tooltipText = useInjectedMemoryTooltipText(meta, memoryIds) + + const showTooltip = useCallback( + (clientX: number): void => { + if (!tooltipText.trim()) return + const anchorRect = anchorRef.current?.getBoundingClientRect() + if (!anchorRect) return + setTooltip({ text: tooltipText, ...chipTooltipPosition(clientX, anchorRect) }) + }, + [tooltipText] + ) + + const moveTooltip = useCallback((clientX: number): void => { + setTooltip((current) => { + if (!current) return current + const anchorRect = anchorRef.current?.getBoundingClientRect() + if (!anchorRect) return current + return { ...current, ...chipTooltipPosition(clientX, anchorRect) } + }) + }, []) + + const hideTooltip = useCallback((): void => { + setTooltip(null) + }, []) + + if (memoryIds.length === 0) return null + + return ( + <> + showTooltip(event.clientX)} + onPointerMove={(event) => moveTooltip(event.clientX)} + onPointerLeave={hideTooltip} + onPointerCancel={hideTooltip} + > + {t('toolInjectedMemories')} {memoryIds.length} + + {tooltip + ? createPortal( +
+ {tooltip.text} +
, + document.body + ) + : null} + + ) +} diff --git a/src/renderer/src/components/chat/message-timeline-bubbles.tsx b/src/renderer/src/components/chat/message-timeline-bubbles.tsx index d5a663ef2..9804ef05f 100644 --- a/src/renderer/src/components/chat/message-timeline-bubbles.tsx +++ b/src/renderer/src/components/chat/message-timeline-bubbles.tsx @@ -17,6 +17,7 @@ import { ImagePreviewLightbox } from './ImagePreviewLightbox' import { ModelMetaTag, WritePromptMetaDisclosure } from './message-timeline-cards' import { readNumber, formatDuration, formatToolTitle } from './message-timeline-tools' import { answersByQuestionId, shouldShowQuestionHeader } from './user-input-panel-logic' +import { InjectedMemoryMetaChip } from './injected-memory-meta-chip' const COPY_FEEDBACK_RESET_MS = 1600 @@ -893,9 +894,7 @@ function RuntimeMetaChips({ ) : null} {injectedMemoryIds.length > 0 ? ( - - {t('toolInjectedMemories')} {injectedMemoryIds.length} - + ) : null} {childLabel ? ( diff --git a/src/renderer/src/components/chat/message-timeline-process.tsx b/src/renderer/src/components/chat/message-timeline-process.tsx index 2c5f8a5fd..653587cc8 100644 --- a/src/renderer/src/components/chat/message-timeline-process.tsx +++ b/src/renderer/src/components/chat/message-timeline-process.tsx @@ -28,6 +28,7 @@ import { MessageBubble } from './message-timeline-bubbles' import { blockHasPendingRuntimeWork, splitThink } from './message-timeline-turns' import { formatDuration, formatToolTitle } from './message-timeline-tools' import { SubagentGroup } from './SubagentCallCard' +import { InjectedMemoryMetaChip } from './injected-memory-meta-chip' export type ProcessSection = { id: string @@ -983,9 +984,7 @@ function RuntimeMetaBadges({ ) : null} {injectedMemoryIds.length > 0 ? ( - - {t('toolInjectedMemories')} {injectedMemoryIds.length} - + ) : null} {attachmentIds.length > 0 ? ( diff --git a/src/renderer/src/lib/memory-preview.ts b/src/renderer/src/lib/memory-preview.ts new file mode 100644 index 000000000..aa5e866ad --- /dev/null +++ b/src/renderer/src/lib/memory-preview.ts @@ -0,0 +1,5 @@ +export function memoryPreview(content: string, maxLength = 200): string { + const compact = content.replace(/\s+/g, ' ').trim() + if (compact.length <= maxLength) return compact + return `${compact.slice(0, maxLength).trimEnd()}...` +} From f3d7606e793d90357e2ef670ee40d1b20b3070ba Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 20:53:12 +0800 Subject: [PATCH 07/18] fix(ui): remove left border and dim thinking process text - Remove left border line from thinking process and tool call sections - Change thinking process text color to text-ds-faint (darker) to match tool calls --- src/renderer/src/components/chat/message-timeline-process.tsx | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/renderer/src/components/chat/message-timeline-process.tsx b/src/renderer/src/components/chat/message-timeline-process.tsx index 2c5f8a5fd..9529c42a3 100644 --- a/src/renderer/src/components/chat/message-timeline-process.tsx +++ b/src/renderer/src/components/chat/message-timeline-process.tsx @@ -312,12 +312,12 @@ export function ProcessSectionRow({ {expanded ? (
{shouldRenderDetail ? ( section.kind === 'reasoning' ? ( -
+
) : ( From 81639abe05e3aad7f77d2e6a7500f93dc764cf20 Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 21:06:50 +0800 Subject: [PATCH 08/18] fix(renderer): polish background shell callback UI and tool summaries Improve idle callback presentation with BellRing headers, parameterized background_shell tool labels, and client-side notice detection that no longer overwrites the active user prompt on steered completion events. Co-authored-by: Cursor --- .../adapters/tool/background-shell-tool.ts | 1 - kun/src/adapters/tool/builtin-bash-tool.ts | 19 +- kun/src/loop/token-economy.ts | 4 +- kun/tests/builtin-tools.test.ts | 4 +- src/renderer/src/agent/kun-contract.ts | 1 - src/renderer/src/agent/kun-mapper.ts | 12 +- src/renderer/src/agent/types.ts | 2 +- src/renderer/src/components/Workbench.tsx | 2 - .../chat/BackgroundShellOverlay.tsx | 4 +- .../src/components/chat/FloatingComposer.tsx | 10 +- .../chat/MessageTimeline.tool-summary.test.ts | 41 +++- .../chat/message-timeline-bubbles.tsx | 87 ++++--- .../chat/message-timeline-process.tsx | 43 +++- .../components/chat/message-timeline-tools.ts | 72 +++++- .../chat/message-timeline-turns.test.ts | 23 +- .../components/chat/message-timeline-turns.ts | 4 +- src/renderer/src/locales/en/common.json | 11 + src/renderer/src/locales/zh/common.json | 11 + .../store/chat-store-runtime-helpers.test.ts | 229 +++++------------- .../src/store/chat-store-runtime-helpers.ts | 24 +- src/renderer/src/store/chat-store-runtime.ts | 44 ++-- src/shared/background-shell-notice.ts | 24 ++ 22 files changed, 399 insertions(+), 273 deletions(-) diff --git a/kun/src/adapters/tool/background-shell-tool.ts b/kun/src/adapters/tool/background-shell-tool.ts index 7804e5de4..1fa8dfffd 100644 --- a/kun/src/adapters/tool/background-shell-tool.ts +++ b/kun/src/adapters/tool/background-shell-tool.ts @@ -70,7 +70,6 @@ export function createBackgroundShellTool(options: BackgroundShellToolOptions = ...(session.finishedAt ? { finished_at: session.finishedAt } : {}), exit_code: session.exitCode, output: session.output, - output_preview: session.output, ...(session.outputTruncated ? { output_truncated: true } : {}), ...(session.outputFilePath ? { output_file: session.outputFilePath } : {}), detached: session.detached diff --git a/kun/src/adapters/tool/builtin-bash-tool.ts b/kun/src/adapters/tool/builtin-bash-tool.ts index e83ebe30a..3a040d6ed 100644 --- a/kun/src/adapters/tool/builtin-bash-tool.ts +++ b/kun/src/adapters/tool/builtin-bash-tool.ts @@ -56,8 +56,8 @@ type BashPayload = { shell: string exit_code: number | null output: string - full_output_path: string | null - truncation: null | { + full_output_path?: string | null + truncation?: null | { total_lines: number output_lines: number total_bytes: number @@ -74,8 +74,6 @@ type BashPayload = { stop_sent?: boolean error?: string output_file?: string - output_truncated?: boolean - output_total_chars?: number } const bashSessions = new Map() @@ -341,20 +339,7 @@ async function backgroundSessionPayload( shell: session.shell, exit_code: session.exitCode, output: fields.output, - full_output_path: fields.output_file || null, - truncation: fields.output_truncated - ? { - total_lines: 0, - output_lines: 0, - total_bytes: fields.output_total_chars, - output_bytes: Buffer.byteLength(fields.output, 'utf8'), - truncated_by: 'bytes', - last_line_partial: false - } - : null, output_file: fields.output_file, - output_truncated: fields.output_truncated, - output_total_chars: fields.output_total_chars, session_id: session.id, status: session.status, started_at: session.startedAt, diff --git a/kun/src/loop/token-economy.ts b/kun/src/loop/token-economy.ts index 21c0f4877..fc3272ae8 100644 --- a/kun/src/loop/token-economy.ts +++ b/kun/src/loop/token-economy.ts @@ -406,10 +406,12 @@ function compactToolOutput(toolName: string, output: unknown): unknown { } function compactBashOutput(output: JsonRecord): JsonRecord { + const hasExternalOutput = + Boolean(output.full_output_path) || Boolean(output.output_file) return { ...output, output: typeof output.output === 'string' - ? compactCommandOutput(output.output, Boolean(output.full_output_path)) + ? compactCommandOutput(output.output, hasExternalOutput) : output.output } } diff --git a/kun/tests/builtin-tools.test.ts b/kun/tests/builtin-tools.test.ts index 6a8678e12..e8dbaba14 100644 --- a/kun/tests/builtin-tools.test.ts +++ b/kun/tests/builtin-tools.test.ts @@ -766,9 +766,11 @@ describe('Kun built-in tools', () => { action: 'read', session_id: String(payload.session_id) }) - expect(read.output_truncated).toBe(true) expect(String(read.output)).toContain('[background shell output truncated') expect(read.output_file).toBe(outputFile) + expect(read.full_output_path).toBeUndefined() + expect(read.truncation).toBeUndefined() + expect(read.output_truncated).toBeUndefined() }) it('hides finished background shell sessions from list unless include_finished=true', async () => { diff --git a/src/renderer/src/agent/kun-contract.ts b/src/renderer/src/agent/kun-contract.ts index 3e6eba17e..e6e36d183 100644 --- a/src/renderer/src/agent/kun-contract.ts +++ b/src/renderer/src/agent/kun-contract.ts @@ -378,7 +378,6 @@ export type CoreTurnItemJson = { kind: string text?: string displayText?: string - messageSource?: 'background_shell' toolName?: string callId?: string toolKind?: 'tool_call' | 'command_execution' | 'file_change' diff --git a/src/renderer/src/agent/kun-mapper.ts b/src/renderer/src/agent/kun-mapper.ts index 08ab7aef7..f4668d2e4 100644 --- a/src/renderer/src/agent/kun-mapper.ts +++ b/src/renderer/src/agent/kun-mapper.ts @@ -21,6 +21,7 @@ import type { UserInputQuestion } from './types' import { redactSecrets, redactSecretText } from '@shared/secret-redaction' +import { applyClientUserMessageSourceMeta } from '@shared/background-shell-notice' import type { CoreChildRuntimeMetadataJson, CoreRuntimeEventJson, @@ -296,9 +297,7 @@ function applyRuntimeDisclosureMeta( if (displayText && displayText !== item.text?.trim()) { meta.displayText = displayText } - if (item.messageSource === 'background_shell') { - meta.messageSource = 'background_shell' - } + applyClientUserMessageSourceMeta(meta, item.text ?? '') if (attachmentIds) meta.attachmentIds = attachmentIds if (fileReferences) meta.fileReferences = fileReferences if (activeSkillIds) meta.activeSkillIds = activeSkillIds @@ -525,8 +524,13 @@ function toolBlockFromItem(item: CoreTurnItemJson, child?: CoreChildRuntimeMetad const generatedFiles = extractToolGeneratedFiles(item) if (generatedFiles) meta.generatedFiles = generatedFiles const presentation = inferToolPresentation(item) + const payload = payloadFor(item) if (presentation.command) meta.command = presentation.command - if (presentation.toolKind === 'command_execution') applyCommandResultMeta(meta, item) + if (presentation.toolKind === 'command_execution' || item.toolName === 'background_shell') { + applyCommandResultMeta(meta, item) + } + const action = readStructuredString(payload, 'action') + if (action) meta.action = action if (isPlan) { const plan = extractPlanMetadata(item) if (plan) meta.plan = plan diff --git a/src/renderer/src/agent/types.ts b/src/renderer/src/agent/types.ts index 86a3a4029..caacd9559 100644 --- a/src/renderer/src/agent/types.ts +++ b/src/renderer/src/agent/types.ts @@ -72,7 +72,7 @@ export type WebCitationSource = { export type RuntimeDisclosureMetadata = { displayText?: string - messageSource?: 'background_shell' + messageSource?: 'background_shell' // client-only rendering hint; never sent to the runtime turnId?: string workspaceCheckpointId?: string attachmentIds?: string[] diff --git a/src/renderer/src/components/Workbench.tsx b/src/renderer/src/components/Workbench.tsx index 89eb3ca32..e489e9de9 100644 --- a/src/renderer/src/components/Workbench.tsx +++ b/src/renderer/src/components/Workbench.tsx @@ -49,7 +49,6 @@ import { type ComposerExecutionSettings, type ComposerFileReference } from './chat/FloatingComposer' -import { BackgroundShellOverlay } from './chat/BackgroundShellOverlay' import { ChatFileTreePanel, type ChatFileTreeReference } from './chat/ChatFileTreePanel' import { composerReasoningEffortRequestValue, @@ -2766,7 +2765,6 @@ export function Workbench(): ReactElement { {!focusModeEnabled ? : null}
- {activeThreadRelation === 'side' && activeThreadParentId ? ( +
{open ? (
diff --git a/src/renderer/src/components/chat/FloatingComposer.tsx b/src/renderer/src/components/chat/FloatingComposer.tsx index 577175c51..ea20981ad 100644 --- a/src/renderer/src/components/chat/FloatingComposer.tsx +++ b/src/renderer/src/components/chat/FloatingComposer.tsx @@ -95,6 +95,7 @@ import { } from './FloatingComposerModelPicker' import { FloatingComposerAgentPicker } from './FloatingComposerAgentPicker' import { FloatingComposerUserInputPanel } from './FloatingComposerUserInputPanel' +import { BackgroundShellOverlay } from './BackgroundShellOverlay' import { useComposerUserInput, type PendingUserInputBlock } from './use-composer-user-input' import { selectLivePendingUserInput } from './user-input-panel-logic' import { @@ -1616,8 +1617,9 @@ export function FloatingComposer({ />
- {showGoalFloater && activeThreadGoal && !pendingUserInputBlock ? ( -
+
+ {runtimeReady ? : null} + {showGoalFloater && activeThreadGoal && !pendingUserInputBlock ? (
@@ -1672,8 +1674,8 @@ export function FloatingComposer({
-
- ) : null} + ) : null} +
{composerMenuOpen && slashQuery == null ? (
= { toolBuiltinGrep: 'Search', toolBuiltinFind: 'Find', toolBuiltinLs: 'List', - toolBuiltinBash: 'Bash' + toolBuiltinBash: 'Bash', + toolBuiltinBackgroundShell: 'Background shell', + toolActionBackgroundShellRead: 'Read background shell', + toolActionBackgroundShellList: 'List background shells' } const t = (key: string) => labels[key] ?? (key === 'toolActionCommand' ? 'Ran command' : key) @@ -120,6 +123,34 @@ describe('MessageTimeline tool summaries', () => { ) ).toBe('Ran command npm test') }) + + it('summarizes background_shell with action, session id, and command', () => { + expect( + summarizeToolBlock( + toolBlock({ + summary: 'background_shell', + meta: { + toolName: 'background_shell', + action: 'read', + session_id: '2mcorxhe', + command: 'sleep 15 && echo "Hello from background!"' + }, + detail: JSON.stringify( + { + action: 'read', + session_id: '2mcorxhe', + command: 'sleep 15 && echo "Hello from background!"', + exit_code: 0, + status: 'completed' + }, + null, + 2 + ) + }), + t + ) + ).toBe('Read background shell 2mcorxhe sleep 15 && echo "Hello from background!"') + }) }) describe('MessageTimeline Kun runtime metadata smoke', () => { @@ -296,7 +327,7 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { expect(html).not.toContain('bg-red-500/10') }) - it('renders the same runtime metadata on process timeline rows', () => { + it('renders tool-specific runtime metadata on process timeline rows', () => { const block: ChatBlock = toolBlock({ summary: 'delegate: research', meta: { @@ -325,9 +356,9 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { }) ) - expect(html).toContain('Attachments 1') - expect(html).toContain('Skills 1') - expect(html).toContain('Memories 1') + expect(html).not.toContain('Attachments 1') + expect(html).not.toContain('Skills 1') + expect(html).not.toContain('Memories 1') expect(html).toContain('Child agent') expect(html).toContain('research') expect(html).toContain('Sources 1') diff --git a/src/renderer/src/components/chat/message-timeline-bubbles.tsx b/src/renderer/src/components/chat/message-timeline-bubbles.tsx index 14be7c082..9c9d9a3b3 100644 --- a/src/renderer/src/components/chat/message-timeline-bubbles.tsx +++ b/src/renderer/src/components/chat/message-timeline-bubbles.tsx @@ -3,7 +3,7 @@ import { memo, useEffect, useMemo, useRef, useState } from 'react' import ReactMarkdown from 'react-markdown' import remarkGfm from 'remark-gfm' import { useTranslation } from 'react-i18next' -import { ArrowDown, Check, ChevronDown, ChevronRight, Copy, Download, File, FileEdit, GitFork, ImageIcon, Loader2, MessageSquareQuote, PencilLine, RotateCcw, SquareTerminal, Terminal, Video, Wrench } from 'lucide-react' +import { ArrowDown, Check, ChevronDown, ChevronRight, Copy, Download, File, FileEdit, GitFork, ImageIcon, Loader2, MessageSquareQuote, PencilLine, RotateCcw, Terminal, Video, Wrench } from 'lucide-react' import type { AttachmentReference, ChatBlock, GeneratedFileReference, RuntimeDisclosureMetadata, ToolBlock, UserFileReference, UserInputAnswer } from '../../agent/types' import { extractUnifiedDiffText } from '../../lib/diff-stats' import { useChatStore } from '../../store/chat-store' @@ -17,7 +17,7 @@ import { DiffView } from '../DiffView' import { AssistantMarkdown } from './AssistantMarkdown' import { ImagePreviewLightbox } from './ImagePreviewLightbox' import { ModelMetaTag, WritePromptMetaDisclosure } from './message-timeline-cards' -import { readNumber, formatDuration, formatToolTitle } from './message-timeline-tools' +import { readNumber, formatDuration, formatToolTitle, summarizeBackgroundShellToolBlock } from './message-timeline-tools' import { answersByQuestionId, shouldShowQuestionHeader } from './user-input-panel-logic' const COPY_FEEDBACK_RESET_MS = 1600 @@ -37,34 +37,50 @@ function BackgroundShellNoticeBubble({ t('backgroundShellNotice.title', { defaultValue: 'Background shell completed' }) const outputPreview = parsed?.outputPreview ?? '' const canExpandOutput = outputPreview.length > 180 + const exitCodeTone = + parsed && parsed.exitCode === 0 + ? 'border-emerald-500/25 bg-emerald-500/10 text-emerald-700 dark:text-emerald-300' + : 'border-orange-400/30 bg-orange-500/10 text-orange-800 dark:text-orange-200' return ( -
-
-
- -
+
+
+
+ + {t('backgroundShellNotice.kindLabel', { defaultValue: 'Background callback' })} + + {parsed ? ( + <> + + + {t('backgroundShellNotice.sessionId', { defaultValue: 'Session' })} + + {parsed.sessionId} + + + + {t('backgroundShellNotice.exitCode', { defaultValue: 'Exit code' })} + + {parsed.exitCode} + + + ) : null} +
+

{title}

{parsed ? (
-
-
- {t('backgroundShellNotice.sessionId', { defaultValue: 'Session' })} -
-
{parsed.sessionId}
-
{t('backgroundShellNotice.command', { defaultValue: 'Command' })}
{parsed.command}
-
-
- {t('backgroundShellNotice.exitCode', { defaultValue: 'Exit code' })} -
-
{parsed.exitCode}
-
) : null} {outputPreview ? ( @@ -102,7 +118,6 @@ function BackgroundShellNoticeBubble({ {t('backgroundShellNotice.outputFile', { defaultValue: 'Full output file' })}: {parsed.outputFile}

) : null} -
@@ -939,16 +954,18 @@ function metaSources(meta: Record | undefined): Array<{ title?: function RuntimeMetaChips({ meta, align = 'left', - hideAttachments = false + hideAttachments = false, + hideTurnDisclosure = false }: { meta?: Record align?: 'left' | 'right' hideAttachments?: boolean + hideTurnDisclosure?: boolean }): ReactElement | null { const { t } = useTranslation('common') - const attachmentIds = metaStringArray(meta, 'attachmentIds') - const activeSkillIds = metaStringArray(meta, 'activeSkillIds') - const injectedMemoryIds = metaStringArray(meta, 'injectedMemoryIds') + const attachmentIds = hideTurnDisclosure || hideAttachments ? [] : metaStringArray(meta, 'attachmentIds') + const activeSkillIds = hideTurnDisclosure ? [] : metaStringArray(meta, 'activeSkillIds') + const injectedMemoryIds = hideTurnDisclosure ? [] : metaStringArray(meta, 'injectedMemoryIds') const sources = metaSources(meta) const child = meta?.child && typeof meta.child === 'object' ? meta.child as Record : null const childLabel = @@ -1527,13 +1544,21 @@ function ToolEntry({ block, nested = false }: { block: ToolBlock; nested?: boole ? 'border-amber-300/80 bg-amber-500/10 text-amber-950 dark:border-amber-800/50 dark:bg-amber-950/30 dark:text-amber-100' : 'border-ds-border bg-ds-subtle text-ds-ink' + const toolName = typeof block.meta?.toolName === 'string' ? block.meta.toolName.trim() : '' + const displaySummary = + toolName === 'background_shell' + ? summarizeBackgroundShellToolBlock(block, t) + : block.summary + const Icon = block.toolKind === 'file_change' ? FileEdit : block.toolKind === 'command_execution' ? Terminal : Wrench const kindLabel = - block.toolKind === 'file_change' - ? t('toolKindFile') - : block.toolKind === 'command_execution' - ? t('toolKindCommand') - : t('toolKindTool') + toolName === 'background_shell' + ? t('toolBuiltinBackgroundShell', { defaultValue: 'Background shell' }) + : block.toolKind === 'file_change' + ? t('toolKindFile') + : block.toolKind === 'command_execution' + ? t('toolKindCommand') + : t('toolKindTool') const exitCode = readNumber(block.meta, 'exit_code') const durationMs = readNumber(block.meta, 'duration_ms') @@ -1593,9 +1618,9 @@ function ToolEntry({ block, nested = false }: { block: ToolBlock; nested?: boole {block.filePath ? ( {block.filePath} — ) : null} - {block.summary} + {displaySummary}
- +
{canExpand ? ( effectiveOpen ? ( diff --git a/src/renderer/src/components/chat/message-timeline-process.tsx b/src/renderer/src/components/chat/message-timeline-process.tsx index ffa9146a1..d783b454b 100644 --- a/src/renderer/src/components/chat/message-timeline-process.tsx +++ b/src/renderer/src/components/chat/message-timeline-process.tsx @@ -12,6 +12,7 @@ import { MessageSquareQuote, Minimize2, PencilLine, + BellRing, Search, Terminal, Wrench @@ -26,7 +27,12 @@ import { DiffView } from '../DiffView' import { AssistantMarkdown } from './AssistantMarkdown' import { MessageBubble } from './message-timeline-bubbles' import { blockHasPendingRuntimeWork, isBackgroundShellNoticeBlock, splitThink } from './message-timeline-turns' -import { formatDuration, formatToolTitle } from './message-timeline-tools' +import { + formatDuration, + formatToolTitle, + isBackgroundShellCommandBlock, + summarizeBackgroundShellToolBlock +} from './message-timeline-tools' import { SubagentGroup } from './SubagentCallCard' export type ProcessSection = { @@ -641,6 +647,7 @@ function summarizeExecutionSection( ): string { let fileCount = 0 let commandCount = 0 + let backgroundCommandCount = 0 let toolCount = 0 let approvalCount = 0 @@ -653,7 +660,11 @@ function summarizeExecutionSection( if (block.toolKind === 'file_change') { fileCount += 1 } else if (block.toolKind === 'command_execution') { - commandCount += 1 + if (isBackgroundShellCommandBlock(block)) { + backgroundCommandCount += 1 + } else { + commandCount += 1 + } } else { toolCount += 1 } @@ -665,6 +676,13 @@ function summarizeExecutionSection( fileCount === 1 ? t('groupEditedFile') : t('groupEditedFiles', { count: fileCount }) ) } + if (backgroundCommandCount > 0) { + parts.push( + backgroundCommandCount === 1 + ? t('groupRanBackgroundCommand') + : t('groupRanBackgroundCommands', { count: backgroundCommandCount }) + ) + } if (commandCount > 0) { parts.push( commandCount === 1 @@ -703,6 +721,7 @@ function processBlockIcon(block: ChatBlock): LucideIcon | null { if (block.kind === 'compaction') return Minimize2 if (block.kind === 'approval') return Wrench if (block.kind === 'user_input') return MessageSquareQuote + if (isBackgroundShellNoticeBlock(block)) return BellRing if (block.kind !== 'tool') return null return toolBlockIcon(block) } @@ -889,6 +908,8 @@ function builtInToolLabel( case 'bash': case 'shell': return t('toolBuiltinBash') + case 'background_shell': + return t('toolBuiltinBackgroundShell', { defaultValue: 'Background shell' }) case 'delegate_task': // Routed to SubagentCallCard before the generic row; labeled here as a // defensive fallback so an ungrouped delegate block never reads as raw JSON. @@ -947,10 +968,11 @@ function RuntimeMetaBadges({ }): ReactElement | null { const meta = block.kind === 'tool' || block.kind === 'approval' || block.kind === 'user' ? block.meta : undefined if (!meta) return null + const showTurnDisclosure = block.kind !== 'tool' const sources = readMetaSources(meta) - const attachmentIds = readMetaStringArray(meta, 'attachmentIds') - const activeSkillIds = readMetaStringArray(meta, 'activeSkillIds') - const injectedMemoryIds = readMetaStringArray(meta, 'injectedMemoryIds') + const attachmentIds = showTurnDisclosure ? readMetaStringArray(meta, 'attachmentIds') : [] + const activeSkillIds = showTurnDisclosure ? readMetaStringArray(meta, 'activeSkillIds') : [] + const injectedMemoryIds = showTurnDisclosure ? readMetaStringArray(meta, 'injectedMemoryIds') : [] const child = meta.child && typeof meta.child === 'object' ? meta.child as Record : null const childLabel = typeof child?.childLabel === 'string' && child.childLabel.trim() @@ -1031,6 +1053,10 @@ export function summarizeToolBlock( readMetaString(block.meta, 'pattern') const command = readMetaString(block.meta, 'command') + if (toolName === 'background_shell') { + return summarizeBackgroundShellToolBlock(block, t) + } + if ((toolName === 'read_file' || toolName === 'read') && filePath) { return `${label} ${filePath}` } @@ -1044,7 +1070,10 @@ export function summarizeToolBlock( return `${label} ${filePath}` } if (command && block.toolKind === 'command_execution') { - return `${formatToolTitle(block, t)} ${summarizeProcessText(command, 72)}` + const action = isBackgroundShellCommandBlock(block) + ? t('toolActionBackgroundCommand') + : formatToolTitle(block, t) + return `${action} ${summarizeProcessText(command, 72)}` } if (filePath) { return `${label} ${filePath}` @@ -1192,7 +1221,7 @@ function describeProcessBlock( if (block.kind === 'tool') { return summarizeToolBlock(block, t) } - if (isBackgroundShellNoticeBlock(block)) { + if (block.kind === 'user' && isBackgroundShellNoticeBlock(block)) { return block.meta?.displayText?.trim() || t('backgroundShellNotice.title', { defaultValue: 'Background shell completed' }) } if (block.kind === 'compaction') { diff --git a/src/renderer/src/components/chat/message-timeline-tools.ts b/src/renderer/src/components/chat/message-timeline-tools.ts index 87d73a60a..c5d1c9687 100644 --- a/src/renderer/src/components/chat/message-timeline-tools.ts +++ b/src/renderer/src/components/chat/message-timeline-tools.ts @@ -1,14 +1,84 @@ import type { ToolBlock } from '../../agent/types' +const BACKGROUND_SHELL_ACTION_LABEL_KEYS: Record = { + list: 'toolActionBackgroundShellList', + read: 'toolActionBackgroundShellRead', + poll: 'toolActionBackgroundShellPoll', + write: 'toolActionBackgroundShellWrite', + stop: 'toolActionBackgroundShellStop' +} + +function truncateSummaryText(text: string, max = 72): string { + const oneLine = text.replace(/\s+/g, ' ').trim() + if (!oneLine) return '' + if (oneLine.length <= max) return oneLine + return `${oneLine.slice(0, max - 1).trimEnd()}…` +} + +function readPayloadString(record: Record, key: string): string | undefined { + const value = record[key] + return typeof value === 'string' && value.trim() ? value.trim() : undefined +} + +export function parseToolBlockPayload(block: ToolBlock): Record { + const merged: Record = { ...(block.meta ?? {}) } + const detail = block.detail?.trim() + if (!detail?.startsWith('{')) return merged + try { + const parsed = JSON.parse(detail) as unknown + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) return merged + for (const [key, value] of Object.entries(parsed as Record)) { + if (!(key in merged)) merged[key] = value + } + } catch { + // Ignore invalid JSON tool payloads in the timeline summary. + } + return merged +} + +export function summarizeBackgroundShellToolBlock( + block: ToolBlock, + t: (key: string, opts?: Record) => string +): string { + const payload = parseToolBlockPayload(block) + const action = readPayloadString(payload, 'action') ?? '' + const sessionId = readPayloadString(payload, 'session_id') ?? '' + const command = readPayloadString(payload, 'command') ?? '' + const actionLabelKey = BACKGROUND_SHELL_ACTION_LABEL_KEYS[action] + const actionLabel = actionLabelKey ? t(actionLabelKey) : action + + if (action === 'list') { + const parts = [actionLabel || t('toolBuiltinBackgroundShell', { defaultValue: 'Background shell' })] + if (payload.include_finished === true) { + parts.push(t('toolActionBackgroundShellIncludeFinished', { defaultValue: 'include finished' })) + } + return parts.join(' · ') + } + + const parts: string[] = [] + if (actionLabel) parts.push(actionLabel) + if (sessionId) parts.push(sessionId) + if (command) parts.push(truncateSummaryText(command)) + if (parts.length > 0) return parts.join(' ') + return t('toolBuiltinBackgroundShell', { defaultValue: 'Background shell' }) +} + export function readNumber(meta: Record | undefined, key: string): number | undefined { if (!meta) return undefined const v = meta[key] return typeof v === 'number' && Number.isFinite(v) ? v : undefined } +export function isBackgroundShellCommandBlock(block: ToolBlock): boolean { + const sessionId = block.meta?.session_id + return typeof sessionId === 'string' && /^[a-z0-9]{8}$/i.test(sessionId.trim()) +} + export function formatToolTitle(block: ToolBlock, t: (key: string) => string): string { if (block.toolKind === 'file_change') return t('toolActionFile') - if (block.toolKind === 'command_execution') return t('toolActionCommand') + if (block.toolKind === 'command_execution') { + return isBackgroundShellCommandBlock(block) ? t('toolActionBackgroundCommand') : t('toolActionCommand') + } return t('toolActionTool') } diff --git a/src/renderer/src/components/chat/message-timeline-turns.test.ts b/src/renderer/src/components/chat/message-timeline-turns.test.ts index 45117d708..0d1ba8eda 100644 --- a/src/renderer/src/components/chat/message-timeline-turns.test.ts +++ b/src/renderer/src/components/chat/message-timeline-turns.test.ts @@ -33,8 +33,8 @@ describe('message timeline turns', () => { const notice: ChatBlock = { kind: 'user', id: 'notice_1', - text: 'abcd1234', - meta: { messageSource: 'background_shell', displayText: 'Background shell abcd1234 completed' } + text: 'abcd1234npm run build0okread output', + meta: { displayText: 'Background shell abcd1234 completed', messageSource: 'background_shell' } } const blocks: ChatBlock[] = [ { kind: 'user', id: 'user_1', text: 'Run build in background' }, @@ -49,4 +49,23 @@ describe('message timeline turns', () => { expect(turns[0]?.user?.id).toBe('user_1') expect(turns[0]?.blocks.map((block) => block.id)).toEqual(['assistant_1', 'notice_1', 'assistant_2']) }) + + it('detects background shell notices from client-inferred xml text', () => { + const notice: ChatBlock = { + kind: 'user', + id: 'notice_2', + text: 'abcd1234npm run build0okread output' + } + const blocks: ChatBlock[] = [ + { kind: 'user', id: 'user_1', text: 'Run build in background' }, + notice + ] + + const turns = groupTurns(blocks) + + expect(turns).toHaveLength(1) + expect(turns[0]?.user?.text).toBe('Run build in background') + expect(turns[0]?.blocks).toHaveLength(1) + expect(turns[0]?.blocks[0]?.id).toBe('notice_2') + }) }) diff --git a/src/renderer/src/components/chat/message-timeline-turns.ts b/src/renderer/src/components/chat/message-timeline-turns.ts index 7afeb033b..a4ce592bc 100644 --- a/src/renderer/src/components/chat/message-timeline-turns.ts +++ b/src/renderer/src/components/chat/message-timeline-turns.ts @@ -1,5 +1,5 @@ import type { ChatBlock } from '../../agent/types' -import { isBackgroundShellNoticeSource } from '@shared/background-shell-notice' +import { isBackgroundShellNoticeUserMessage } from '@shared/background-shell-notice' export type Turn = { user?: Extract @@ -7,7 +7,7 @@ export type Turn = { } export function isBackgroundShellNoticeBlock(block: ChatBlock): boolean { - return block.kind === 'user' && isBackgroundShellNoticeSource(block.meta?.messageSource) + return block.kind === 'user' && isBackgroundShellNoticeUserMessage(block) } export function groupTurns(blocks: ChatBlock[]): Turn[] { diff --git a/src/renderer/src/locales/en/common.json b/src/renderer/src/locales/en/common.json index 62e43bc41..e74afab3e 100644 --- a/src/renderer/src/locales/en/common.json +++ b/src/renderer/src/locales/en/common.json @@ -1999,6 +1999,7 @@ "compactionRunning": "Compacting context", "compactionManualCompleted": "Compacted context", "backgroundShellNotice.title": "Background shell completed", + "backgroundShellNotice.kindLabel": "Background callback", "backgroundShellNotice.sessionId": "Session", "backgroundShellNotice.command": "Command", "backgroundShellNotice.exitCode": "Exit code", @@ -2024,6 +2025,8 @@ "turnChangeFilesMany": "Edited {{count}} files", "groupRanCommands": "Ran {{count}} commands", "groupRanCommand": "Ran 1 command", + "groupRanBackgroundCommands": "Ran {{count}} background commands", + "groupRanBackgroundCommand": "Ran 1 background command", "groupUsedTools": "Used {{count}} tools", "groupUsedTool": "Used 1 tool", "groupApprovals": "{{count}} approvals", @@ -2039,6 +2042,7 @@ "rewindBusyError": "Wait for the current turn to finish before rewinding.", "toolActionFile": "Edited file", "toolActionCommand": "Ran command", + "toolActionBackgroundCommand": "Ran background command", "toolActionTool": "Called tool", "toolKindFile": "File change", "toolKindCommand": "Command", @@ -2061,6 +2065,13 @@ "toolBuiltinFind": "Find", "toolBuiltinLs": "List", "toolBuiltinBash": "Bash", + "toolBuiltinBackgroundShell": "Background shell", + "toolActionBackgroundShellList": "List background shells", + "toolActionBackgroundShellRead": "Read background shell", + "toolActionBackgroundShellPoll": "Poll background shell", + "toolActionBackgroundShellWrite": "Write to background shell", + "toolActionBackgroundShellStop": "Stop background shell", + "toolActionBackgroundShellIncludeFinished": "include finished", "toolBuiltinDelegate": "Delegate task", "subagentDefaultName": "Subagent", "subagentStatusQueued": "Queued", diff --git a/src/renderer/src/locales/zh/common.json b/src/renderer/src/locales/zh/common.json index 3c8105c5f..74186ac71 100644 --- a/src/renderer/src/locales/zh/common.json +++ b/src/renderer/src/locales/zh/common.json @@ -1999,6 +1999,7 @@ "compactionRunning": "正在压缩上下文", "compactionManualCompleted": "已压缩上下文", "backgroundShellNotice.title": "后台 shell 已完成", + "backgroundShellNotice.kindLabel": "后台回调", "backgroundShellNotice.sessionId": "会话", "backgroundShellNotice.command": "命令", "backgroundShellNotice.exitCode": "退出码", @@ -2024,6 +2025,8 @@ "turnChangeFilesMany": "已编辑 {{count}} 个文件", "groupRanCommands": "运行了 {{count}} 条命令", "groupRanCommand": "运行了 1 条命令", + "groupRanBackgroundCommands": "运行了 {{count}} 条后台命令", + "groupRanBackgroundCommand": "运行了 1 条后台命令", "groupUsedTools": "调用了 {{count}} 个工具", "groupUsedTool": "调用了 1 个工具", "groupApprovals": "{{count}} 个审批", @@ -2039,6 +2042,7 @@ "rewindBusyError": "当前回合还在进行,等结束后再回退。", "toolActionFile": "修改文件", "toolActionCommand": "运行命令", + "toolActionBackgroundCommand": "运行后台命令", "toolActionTool": "调用工具", "toolKindFile": "文件变更", "toolKindCommand": "命令", @@ -2061,6 +2065,13 @@ "toolBuiltinFind": "查找", "toolBuiltinLs": "列出", "toolBuiltinBash": "命令", + "toolBuiltinBackgroundShell": "后台 shell", + "toolActionBackgroundShellList": "列出后台 shell", + "toolActionBackgroundShellRead": "读取后台 shell", + "toolActionBackgroundShellPoll": "轮询后台 shell", + "toolActionBackgroundShellWrite": "写入后台 shell", + "toolActionBackgroundShellStop": "停止后台 shell", + "toolActionBackgroundShellIncludeFinished": "含已结束", "toolBuiltinDelegate": "委派任务", "subagentDefaultName": "子代理", "subagentStatusQueued": "排队中", diff --git a/src/renderer/src/store/chat-store-runtime-helpers.test.ts b/src/renderer/src/store/chat-store-runtime-helpers.test.ts index c3fad2087..cba8d1858 100644 --- a/src/renderer/src/store/chat-store-runtime-helpers.test.ts +++ b/src/renderer/src/store/chat-store-runtime-helpers.test.ts @@ -1,185 +1,70 @@ import { describe, expect, it } from 'vitest' +import { + inferClientUserMessageSource, + isBackgroundShellNoticeUserMessage +} from '@shared/background-shell-notice' import type { ChatBlock } from '../agent/types' -import type { NormalizedThread } from '../agent/types' -import type { ChatState } from './chat-store-types' import { - findReusableEmptyThreadId, - hasPendingRuntimeWork, - settlePendingRuntimeWorkAfterInterrupt, - threadHasPendingRuntimeWork, - threadSnapshotLooksRunning + isOptimisticUserBlockId, + reconcileOptimisticUserBlock, + upsertUserBlock } from './chat-store-runtime-helpers' -describe('chat-store-runtime-helpers compaction state', () => { - it('keeps the thread busy while a compaction item is running', () => { - const runningCompaction: ChatBlock = { - kind: 'compaction', - id: 'compact-running', - summary: 'Compacting context', - status: 'running' - } - const completedCompaction: ChatBlock = { - kind: 'compaction', - id: 'compact-completed', - summary: 'Compacted context', - status: 'success' - } - - expect(hasPendingRuntimeWork(runningCompaction)).toBe(true) - expect(hasPendingRuntimeWork(completedCompaction)).toBe(false) - expect(threadSnapshotLooksRunning([runningCompaction])).toBe(true) - expect(threadSnapshotLooksRunning([completedCompaction])).toBe(false) - }) - - it('trusts an explicit idle thread status over stale pending blocks', () => { - const staleTool: ChatBlock = { - kind: 'tool', - id: 'tool-stale', - summary: 'Old tool', - status: 'running', - toolKind: 'tool_call' - } - - expect(threadSnapshotLooksRunning([staleTool], 'idle')).toBe(false) - expect(threadSnapshotLooksRunning([staleTool], 'aborted')).toBe(false) - expect(threadSnapshotLooksRunning([staleTool], 'running')).toBe(true) - expect(threadSnapshotLooksRunning([staleTool])).toBe(true) - }) - - it('ignores stale pending work once the same turn has visible assistant content', () => { - const blocks: ChatBlock[] = [ - { kind: 'user', id: 'user-1', text: 'Run the task' }, - { - kind: 'tool', - id: 'tool-stale', - summary: 'Old tool', - status: 'running', - toolKind: 'tool_call' - }, - { kind: 'assistant', id: 'answer-1', text: 'The task is complete.' } - ] - - expect(threadHasPendingRuntimeWork(blocks)).toBe(false) - expect(threadSnapshotLooksRunning(blocks)).toBe(false) - }) - - it('keeps the thread busy when pending work has no later assistant answer', () => { - const blocks: ChatBlock[] = [ - { kind: 'user', id: 'user-1', text: 'Run the task' }, - { kind: 'assistant', id: 'partial-1', text: 'I will check that.' }, - { - kind: 'tool', - id: 'tool-running', - summary: 'Still running', - status: 'running', - toolKind: 'tool_call' - } - ] - - expect(threadHasPendingRuntimeWork(blocks)).toBe(true) - expect(threadSnapshotLooksRunning(blocks)).toBe(true) +describe('chat store runtime helpers', () => { + it('detects optimistic user block ids', () => { + expect(isOptimisticUserBlockId('u-123')).toBe(true) + expect(isOptimisticUserBlockId('item_turn_abc_user')).toBe(false) }) - it('does not let stale pending work from an older turn block new input', () => { - const blocks: ChatBlock[] = [ - { kind: 'user', id: 'user-1', text: 'First task' }, - { - kind: 'tool', - id: 'tool-stale', - summary: 'Old tool', - status: 'running', - toolKind: 'tool_call' - }, - { kind: 'user', id: 'user-2', text: 'Second task' }, - { kind: 'assistant', id: 'answer-2', text: 'Second answer.' } - ] - - expect(threadHasPendingRuntimeWork(blocks)).toBe(false) - expect(threadSnapshotLooksRunning(blocks)).toBe(false) + it('tags background shell notices locally from xml text without server metadata', () => { + const noticeText = + 'abcd1234npm run build0okread output' + expect(inferClientUserMessageSource(noticeText)).toBe('background_shell') + expect( + isBackgroundShellNoticeUserMessage({ + text: noticeText + }) + ).toBe(true) }) - it('settles local pending work after a successful interrupt', () => { - const blocks: ChatBlock[] = [ - { - kind: 'tool', - id: 'tool-running', - summary: 'Running tool', - status: 'running', - toolKind: 'tool_call' - }, - { - kind: 'approval', - id: 'approval-pending', - approvalId: 'approval-1', - summary: 'Needs approval', - status: 'pending' - }, - { - kind: 'user_input', - id: 'input-pending', - requestId: 'input-1', - questions: [], - status: 'pending' - }, - { - kind: 'tool', - id: 'tool-success', - summary: 'Done', - status: 'success', - toolKind: 'tool_call' + it('preserves the original user prompt when a background shell notice arrives', () => { + const originalUser: ChatBlock = { + kind: 'user', + id: 'item_turn_abc_user', + text: 'Run build in background' + } + const blocks: ChatBlock[] = [originalUser] + const notice = { + itemId: 'item_steered_notice', + turnId: 'turn_abc', + text: 'abcd1234npm run build0okread output', + meta: { + displayText: 'Background shell abcd1234 completed' } - ] - - const settled = settlePendingRuntimeWorkAfterInterrupt(blocks) - - expect(settled.map((block) => ('status' in block ? block.status : ''))).toEqual([ - 'error', - 'error', - 'cancelled', - 'success' - ]) - expect(settled.some(hasPendingRuntimeWork)).toBe(false) - }) -}) - -describe('findReusableEmptyThreadId', () => { - const workspace = '/work/project' - const makeThread = (overrides: Partial): NormalizedThread => ({ - id: 'thread', - title: '新会话', - updatedAt: '2026-06-14T00:00:00.000Z', - model: 'deepseek', - mode: 'agent', - workspace, - ...overrides - }) - const stateWith = (threads: NormalizedThread[]): ChatState => - ({ activeThreadId: null, threads, blocks: [] } as unknown as ChatState) - const emptyProvider = { getThreadDetail: async () => ({ blocks: [] }) } - - it('reuses an empty thread that still carries the default placeholder title', async () => { - const state = stateWith([makeThread({ id: 'blank', title: '新会话' })]) - const reused = await findReusableEmptyThreadId(state, emptyProvider, workspace) - expect(reused).toBe('blank') - }) - - it('does not reuse an empty thread that carries a meaningful title (e.g. a released requirement)', async () => { - // Regression: a freshly released SDD requirement thread is empty but keeps - // its requirement title. Reusing it would make the next "new conversation" - // inherit "旅游旅行社区网页" instead of starting fresh. - const state = stateWith([makeThread({ id: 'requirement', title: '旅游旅行社区网页' })]) - const reused = await findReusableEmptyThreadId(state, emptyProvider, workspace) - expect(reused).toBeNull() - }) + } - it('does not reuse the active thread when it carries a meaningful title', async () => { - const titled = makeThread({ id: 'requirement', title: '旅游旅行社区网页' }) - const state = { - activeThreadId: 'requirement', - threads: [titled], - blocks: [] - } as unknown as ChatState - const reused = await findReusableEmptyThreadId(state, emptyProvider, workspace) - expect(reused).toBeNull() + const canReconcileOptimisticUser = + !isBackgroundShellNoticeUserMessage(notice) && + 'item_turn_abc_user' !== notice.itemId && + isOptimisticUserBlockId('item_turn_abc_user') + + expect(canReconcileOptimisticUser).toBe(false) + + const reconciledBlocks = canReconcileOptimisticUser + ? reconcileOptimisticUserBlock(blocks, 'item_turn_abc_user', notice.itemId, notice.text) + : blocks + const nextBlocks = upsertUserBlock(reconciledBlocks, notice) + + expect(nextBlocks).toHaveLength(2) + expect(nextBlocks[0]).toMatchObject({ + kind: 'user', + id: 'item_turn_abc_user', + text: 'Run build in background' + }) + expect(nextBlocks[1]).toMatchObject({ + kind: 'user', + id: 'item_steered_notice', + meta: { messageSource: 'background_shell' } + }) }) }) diff --git a/src/renderer/src/store/chat-store-runtime-helpers.ts b/src/renderer/src/store/chat-store-runtime-helpers.ts index bd592f88f..3c90a57c1 100644 --- a/src/renderer/src/store/chat-store-runtime-helpers.ts +++ b/src/renderer/src/store/chat-store-runtime-helpers.ts @@ -4,7 +4,10 @@ import type { RuntimeDisclosureMetadata, UserMessageEventPayload } from '../agent/types' -import { isBackgroundShellNoticeSource } from '@shared/background-shell-notice' +import { + applyClientUserMessageSourceMeta, + isBackgroundShellNoticeUserMessage +} from '@shared/background-shell-notice' import { normalizeWorkspaceRoot } from '../lib/workspace-path' import { shouldAutoTitleThread } from '../lib/thread-title' import type { ChatState } from './chat-store-types' @@ -41,7 +44,7 @@ export function threadHasPendingRuntimeWork(blocks: ChatBlock[]): boolean { for (const block of blocks) { if (block.kind === 'user') { - if (isBackgroundShellNoticeSource(block.meta?.messageSource)) continue + if (isBackgroundShellNoticeUserMessage(block)) continue pendingInCurrentTurn = false continue } @@ -101,6 +104,8 @@ export function findLatestUserBlockId(blocks: ChatBlock[]): string | null { } export function upsertUserBlock(blocks: ChatBlock[], ev: UserMessageEventPayload): ChatBlock[] { + const clientMeta: RuntimeDisclosureMetadata = { ...(ev.meta ?? {}) } + applyClientUserMessageSourceMeta(clientMeta as Record, ev.text) const nextBlock: ChatBlock = { kind: 'user', id: ev.itemId, @@ -109,12 +114,12 @@ export function upsertUserBlock(blocks: ChatBlock[], ev: UserMessageEventPayload text: ev.text, ...(ev.modelLabel ? { modelLabel: ev.modelLabel } : {}), ...(ev.managedBy ? { managedBy: ev.managedBy } : {}), - ...(ev.meta ? { meta: ev.meta } : {}) + ...(Object.keys(clientMeta).length > 0 ? { meta: clientMeta } : {}) } const existingIndex = blocks.findIndex((block) => block.kind === 'user' && block.id === ev.itemId) if (existingIndex < 0) return [...blocks, nextBlock] const current = blocks[existingIndex] - const meta = mergeRuntimeDisclosureMeta( + const mergedMeta = mergeRuntimeDisclosureMeta( current.kind === 'user' ? current.meta : undefined, nextBlock.kind === 'user' ? nextBlock.meta : undefined ) @@ -122,7 +127,12 @@ export function upsertUserBlock(blocks: ChatBlock[], ev: UserMessageEventPayload ...current, ...nextBlock, createdAt: current.createdAt ?? nextBlock.createdAt, - ...(meta ? { meta } : {}) + ...(mergedMeta ? { meta: mergedMeta } : {}) + } + if (merged.kind === 'user') { + const metaRecord = { ...(merged.meta ?? {}) } as Record + applyClientUserMessageSourceMeta(metaRecord, merged.text) + merged.meta = Object.keys(metaRecord).length > 0 ? (metaRecord as RuntimeDisclosureMetadata) : undefined } const next = [...blocks] next[existingIndex] = merged @@ -140,6 +150,10 @@ function mergeRuntimeDisclosureMeta( } } +export function isOptimisticUserBlockId(id: string): boolean { + return id.startsWith('u-') +} + export function reconcileOptimisticUserBlock( blocks: ChatBlock[], optimisticId: string, diff --git a/src/renderer/src/store/chat-store-runtime.ts b/src/renderer/src/store/chat-store-runtime.ts index e1899adb6..f84861472 100644 --- a/src/renderer/src/store/chat-store-runtime.ts +++ b/src/renderer/src/store/chat-store-runtime.ts @@ -16,10 +16,12 @@ import i18n from '../i18n' import { describeRuntimeError, formatRuntimeError, getRuntimeErrorCode } from '../lib/format-runtime-error' import { isClawWorkspacePath, isInternalTemporaryWorkspace, normalizeWorkspaceRoot } from '../lib/workspace-path' import type { ClawImChannelV1 } from '@shared/app-settings' +import { isBackgroundShellNoticeUserMessage } from '@shared/background-shell-notice' import type { ChatState } from './chat-store-types' import { isClawThread } from './chat-store-helpers' import { collectAssistantTextForTurn, + isOptimisticUserBlockId, reconcileOptimisticUserBlock, settlePendingRuntimeWorkAfterInterrupt, threadSnapshotLooksRunning, @@ -634,31 +636,45 @@ export function buildThreadEventSink( const flushed = flushLiveBlocks(s) const baseBlocks = flushed.blocks ?? s.blocks const optimisticCurrentUserId = s.currentTurnUserId - const reconciledBlocks = + const isBackgroundShellNotice = isBackgroundShellNoticeUserMessage({ + text: ev.text, + meta: ev.meta + }) + const canReconcileOptimisticUser = + !isBackgroundShellNotice && optimisticCurrentUserId && optimisticCurrentUserId !== ev.itemId && + isOptimisticUserBlockId(optimisticCurrentUserId) && baseBlocks.some((block) => block.kind === 'user' && block.id === optimisticCurrentUserId) - ? reconcileOptimisticUserBlock( - baseBlocks, - optimisticCurrentUserId, - ev.itemId, - ev.text, - ev.modelLabel - ) - : baseBlocks + const reconciledBlocks = canReconcileOptimisticUser + ? reconcileOptimisticUserBlock( + baseBlocks, + optimisticCurrentUserId, + ev.itemId, + ev.text, + ev.modelLabel + ) + : baseBlocks const nextBlocks = upsertUserBlock(reconciledBlocks, ev) const startedAt = runtimeEventStartedAt(ev.createdAt) armBusyWatchdog(set, get) + const nextCurrentTurnUserId = isBackgroundShellNotice + ? optimisticCurrentUserId + : canReconcileOptimisticUser || !optimisticCurrentUserId + ? ev.itemId + : optimisticCurrentUserId return { ...flushed, blocks: nextBlocks, busy: true, currentTurnId: ev.turnId ?? s.currentTurnId, - currentTurnUserId: ev.itemId, - turnStartedAtByUserId: { - ...s.turnStartedAtByUserId, - [ev.itemId]: s.turnStartedAtByUserId[ev.itemId] ?? startedAt - }, + currentTurnUserId: nextCurrentTurnUserId, + turnStartedAtByUserId: isBackgroundShellNotice + ? s.turnStartedAtByUserId + : { + ...s.turnStartedAtByUserId, + [ev.itemId]: s.turnStartedAtByUserId[ev.itemId] ?? startedAt + }, error: clearRuntimeStreamRecoveringError(s.error) } }), diff --git a/src/shared/background-shell-notice.ts b/src/shared/background-shell-notice.ts index 720542c9e..c0c705a2d 100644 --- a/src/shared/background-shell-notice.ts +++ b/src/shared/background-shell-notice.ts @@ -41,3 +41,27 @@ export function isBackgroundShellNoticeSource( ): messageSource is 'background_shell' { return messageSource === 'background_shell' } + +export type ClientUserMessageSource = 'background_shell' + +/** Client-only hint derived from persisted user_message text, not from server metadata. */ +export function inferClientUserMessageSource(text: string): ClientUserMessageSource | undefined { + return parseBackgroundShellCompletionNotice(text) ? 'background_shell' : undefined +} + +export function applyClientUserMessageSourceMeta( + meta: Record, + text: string +): void { + const messageSource = inferClientUserMessageSource(text) + if (messageSource) meta.messageSource = messageSource + else delete meta.messageSource +} + +export function isBackgroundShellNoticeUserMessage(input: { + text: string + meta?: Record | null +}): boolean { + if (isBackgroundShellNoticeSource(input.meta?.messageSource)) return true + return inferClientUserMessageSource(input.text) === 'background_shell' +} From a3aaba0845737653109c5205cec8cee784eef1fe Mon Sep 17 00:00:00 2001 From: XingYu-Zhong <1736101137@qq.com> Date: Sun, 28 Jun 2026 21:43:05 +0800 Subject: [PATCH 09/18] fix(test): satisfy types/mocks for batched PRs #635 #636 - load-kun-diagnostics.test.ts: cast null runtime/tool mocks so the provider satisfies DiagnosticsProvider (fixes #635 typecheck failure) - chat-store-navigation-actions.test.ts: add applyChatContentMaxWidth to the apply-theme mock so settings-apply doesn't throw (fixes #636 nav test) Co-Authored-By: Claude Opus 4.8 --- src/renderer/src/lib/load-kun-diagnostics.test.ts | 8 ++++---- .../src/store/chat-store-navigation-actions.test.ts | 1 + 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/src/renderer/src/lib/load-kun-diagnostics.test.ts b/src/renderer/src/lib/load-kun-diagnostics.test.ts index 69982bf9b..119442b93 100644 --- a/src/renderer/src/lib/load-kun-diagnostics.test.ts +++ b/src/renderer/src/lib/load-kun-diagnostics.test.ts @@ -26,8 +26,8 @@ describe('loadKunDiagnostics', () => { it('loads all memories by default for global settings diagnostics', async () => { const memoryRecords = [{ id: 'mem_1', content: 'remember this' }] as any const provider = { - getRuntimeInfo: async () => null, - getToolDiagnostics: async () => null, + getRuntimeInfo: async () => null as any, + getToolDiagnostics: async () => null as any, listMemories: async (options?: { all?: boolean }) => { expect(options).toEqual({ all: true, includeDeleted: false }) return memoryRecords @@ -43,8 +43,8 @@ describe('loadKunDiagnostics', () => { it('can scope memory loading to the current workspace when explicitly requested', async () => { const memoryRecords = [{ id: 'mem_ws', content: 'workspace only' }] as any const provider = { - getRuntimeInfo: async () => null, - getToolDiagnostics: async () => null, + getRuntimeInfo: async () => null as any, + getToolDiagnostics: async () => null as any, listMemories: async (options?: { all?: boolean }) => { expect(options).toEqual({ includeDeleted: false }) return memoryRecords diff --git a/src/renderer/src/store/chat-store-navigation-actions.test.ts b/src/renderer/src/store/chat-store-navigation-actions.test.ts index 7713ec613..d4f7cc36b 100644 --- a/src/renderer/src/store/chat-store-navigation-actions.test.ts +++ b/src/renderer/src/store/chat-store-navigation-actions.test.ts @@ -16,6 +16,7 @@ const applyThemeLibMock = vi.hoisted(() => ({ applyCursorSpotlightColor: vi.fn(), applyTheme: vi.fn(), applyUiFontScale: vi.fn(), + applyChatContentMaxWidth: vi.fn(), applyDocumentLocale: vi.fn() })) From f7d5767cb38205f2935409377cb5a2e8fe2db871 Mon Sep 17 00:00:00 2001 From: musnows Date: Sun, 28 Jun 2026 21:48:11 +0800 Subject: [PATCH 10/18] fix(test): address PR review typecheck and timeline chip regressions Merge latest develop and narrow background-shell runtime test mocks, update steering queue usage in loop.test, and align tool bubble chip assertions with hideTurnDisclosure behavior. Co-authored-by: Cursor --- kun/tests/background-shell-runtime.test.ts | 15 +++++++++------ kun/tests/loop.test.ts | 2 +- .../chat/MessageTimeline.tool-summary.test.ts | 8 ++++---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/kun/tests/background-shell-runtime.test.ts b/kun/tests/background-shell-runtime.test.ts index 54bc59a2b..2a45e6357 100644 --- a/kun/tests/background-shell-runtime.test.ts +++ b/kun/tests/background-shell-runtime.test.ts @@ -1,5 +1,8 @@ import { describe, expect, it, vi } from 'vitest' +import type { ThreadStore } from '../src/ports/thread-store.js' +import type { RuntimeEventRecorder } from '../src/services/runtime-event-recorder.js' import { BackgroundShellRuntime } from '../src/services/background-shell-runtime.js' +import type { TurnService } from '../src/services/turn-service.js' describe('BackgroundShellRuntime', () => { it('steers a running turn when a detached shell completes successfully', async () => { @@ -7,15 +10,15 @@ describe('BackgroundShellRuntime', () => { const startTurn = vi.fn(async () => ({ threadId: 'thr_1', turnId: 'turn_new', userMessageItemId: 'item_1' })) const runTurn = vi.fn(async () => undefined) const runtime = new BackgroundShellRuntime({ - events: { record: vi.fn(async () => undefined) }, + events: { record: vi.fn(async () => undefined) } as unknown as RuntimeEventRecorder, threadStore: { get: vi.fn(async () => ({ id: 'thr_1', status: 'running', turns: [{ id: 'turn_1', status: 'running' }] })) - }, - turns: { steerTurn, startTurn }, + } as unknown as ThreadStore, + turns: { steerTurn, startTurn } as unknown as TurnService, nowIso: () => '2026-01-01T00:00:00.000Z' }) runtime.bindAgentLoop({ runTurn }) @@ -49,15 +52,15 @@ describe('BackgroundShellRuntime', () => { const startTurn = vi.fn(async () => ({ threadId: 'thr_1', turnId: 'turn_new', userMessageItemId: 'item_1' })) const runTurn = vi.fn(async () => undefined) const runtime = new BackgroundShellRuntime({ - events: { record: vi.fn(async () => undefined) }, + events: { record: vi.fn(async () => undefined) } as unknown as RuntimeEventRecorder, threadStore: { get: vi.fn(async () => ({ id: 'thr_1', status: 'idle', turns: [{ id: 'turn_1', status: 'completed' }] })) - }, - turns: { steerTurn, startTurn }, + } as unknown as ThreadStore, + turns: { steerTurn, startTurn } as unknown as TurnService, nowIso: () => '2026-01-01T00:00:00.000Z' }) runtime.bindAgentLoop({ runTurn }) diff --git a/kun/tests/loop.test.ts b/kun/tests/loop.test.ts index e04f96cf0..ffae9b5aa 100644 --- a/kun/tests/loop.test.ts +++ b/kun/tests/loop.test.ts @@ -2027,7 +2027,7 @@ describe('AgentLoop', () => { it('steers the turn and injects user messages', async () => { const h = makeHarness(makeSilentModel()) await bootstrapThread(h) - h.steering.enqueue(h.turnId, 'follow up') + h.steering.enqueue(h.turnId, { text: 'follow up' }) await h.loop.runTurn(h.threadId, h.turnId) const items = await h.sessionStore.loadItems(h.threadId) const user = items.find((item) => item.kind === 'user_message' && item.text === 'follow up') diff --git a/src/renderer/src/components/chat/MessageTimeline.tool-summary.test.ts b/src/renderer/src/components/chat/MessageTimeline.tool-summary.test.ts index 236ede217..00e500edf 100644 --- a/src/renderer/src/components/chat/MessageTimeline.tool-summary.test.ts +++ b/src/renderer/src/components/chat/MessageTimeline.tool-summary.test.ts @@ -279,7 +279,7 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { expect(html).not.toContain('Feishu / Lark inbound message') }) - it('renders attachment, Skill, memory, web source, and child-agent chips in bubbles', () => { + it('renders tool-specific metadata chips in tool bubbles', () => { const block: ToolBlock = toolBlock({ summary: 'web_search: docs', meta: { @@ -301,9 +301,9 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { const html = renderToStaticMarkup(createElement(MessageBubble, { block })) - expect(html).toContain('Attachments 1') - expect(html).toContain('Skills 1') - expect(html).toContain('Memories 1') + expect(html).not.toContain('Attachments 1') + expect(html).not.toContain('Skills 1') + expect(html).not.toContain('Memories 1') expect(html).toContain('Child agent') expect(html).toContain('research') expect(html).toContain('Sources 1') From d98c3fc73404331938ef39f867d36acbbf8ae7c8 Mon Sep 17 00:00:00 2001 From: XingYu-Zhong <1736101137@qq.com> Date: Mon, 29 Jun 2026 00:10:42 +0800 Subject: [PATCH 11/18] fix(read-tracker): allow cross-turn edits when oldText still matches (#640) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The read-before-edit guard rejected an `edit` whenever the preceding `read` happened in an earlier turn. Agent responses routinely span multiple turns (long replies, or tool results arriving as separate turn items), so a read-in-turn-A then edit-in-turn-B sequence is legitimate, yet the turnId guard blocked it — forcing a fallback to sed/bash that mangles whitespace and indentation. Remove the turnId guard. Freshness is still enforced: requireOldTextInRead checks the oldText fragments against the cached read content, and the edit's own fuzzy matching runs against the current bytes on disk, so a stale SEARCH string fails there with a clear error instead of corrupting the file. Adds read-tracker.test.ts to lock in the behavior. Co-Authored-By: Claude Opus 4.8 --- kun/src/adapters/tool/read-tracker.test.ts | 99 ++++++++++++++++++++++ kun/src/adapters/tool/read-tracker.ts | 16 ++-- 2 files changed, 107 insertions(+), 8 deletions(-) create mode 100644 kun/src/adapters/tool/read-tracker.test.ts diff --git a/kun/src/adapters/tool/read-tracker.test.ts b/kun/src/adapters/tool/read-tracker.test.ts new file mode 100644 index 000000000..e25b908df --- /dev/null +++ b/kun/src/adapters/tool/read-tracker.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from 'vitest' +import { ReadTracker, normalizeReadTrackerOptions } from './read-tracker.js' +import type { ToolHostContext, ToolCallLike } from '../../ports/tool-host.js' + +function context(turnId: string, overrides: Partial = {}): ToolHostContext { + return { + threadId: 'thread_1', + turnId, + workspace: '/ws', + approvalPolicy: 'never', + abortSignal: new AbortController().signal, + awaitApproval: async () => 'allow' as const, + ...overrides + } +} + +function readResult(turnId: string, path: string, content: string): { + context: ToolHostContext + call: ToolCallLike + output: unknown +} { + return { + context: context(turnId), + call: { callId: `read_${turnId}`, toolName: 'read', arguments: { path } }, + output: { path, relative_path: path, content, truncated: false } + } +} + +function editCall(path: string, oldText: string): ToolCallLike { + return { callId: 'edit_1', toolName: 'edit', arguments: { path, oldText, newText: 'x' } } +} + +describe('ReadTracker cross-turn edits (#640)', () => { + it('allows an edit in a later turn when the oldText is still present in the cached read', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'const value = 42\n')) + + // Edit arrives in a *different* turn than the read — the common case the + // turnId guard used to reject, forcing a fallback to sed/bash. + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'const value = 42') + }) + + expect(verdict).toEqual({ ok: true }) + }) + + it('still blocks an edit for a file that was never read', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'const value = 42') + }) + + expect(verdict.ok).toBe(false) + if (!verdict.ok) expect(verdict.message).toContain('Read the current file contents') + }) + + it('still blocks a cross-turn edit when the oldText is not in the cached read', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'const value = 42\n')) + + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'const other = 99') + }) + + expect(verdict.ok).toBe(false) + if (!verdict.ok) expect(verdict.message).toContain('was not present in the latest read output') + }) + + it('allows a cross-turn multi-edit when every oldText fragment is present', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'alpha\nbeta\ngamma\n')) + + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: { + callId: 'edit_2', + toolName: 'edit', + arguments: { path: 'file.ts', edits: [{ oldText: 'alpha' }, { oldText: 'gamma' }] } + } + }) + + expect(verdict).toEqual({ ok: true }) + }) + + it('allows a cross-turn edit on a prior read when content checking is disabled', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions({ enabled: true, requireOldTextInRead: false })) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'const value = 42\n')) + + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'anything at all') + }) + + expect(verdict).toEqual({ ok: true }) + }) +}) diff --git a/kun/src/adapters/tool/read-tracker.ts b/kun/src/adapters/tool/read-tracker.ts index 883db32d9..8b6846e09 100644 --- a/kun/src/adapters/tool/read-tracker.ts +++ b/kun/src/adapters/tool/read-tracker.ts @@ -57,14 +57,14 @@ export class ReadTracker { 'Read the current file contents in this turn before editing so SEARCH text is based on fresh bytes.' } } - if (record.turnId !== input.context.turnId) { - return { - ok: false, - message: - `read-before-edit guard blocked edit for ${displayPath(rawPath, input.context.workspace)}. ` + - 'The previous read is from an earlier turn; read the file again before editing.' - } - } + // A read from an earlier turn still counts: agent responses routinely span + // multiple turns (a long reply, or tool results arriving as separate turn + // items), so read-in-turn-A then edit-in-turn-B is a legitimate sequence. + // Hard-blocking it forced a fallback to sed/bash, which mangles code (#640). + // Freshness is still enforced below — `requireOldTextInRead` checks the + // oldText fragments against the cached read content, and the edit's own + // fuzzy matching runs against the current bytes on disk, so a stale SEARCH + // string fails there with a clear error instead of corrupting the file. if (!this.options.requireOldTextInRead) return { ok: true } const missing = oldTextFragments(input.call.arguments).filter((fragment) => { if (!fragment.trim()) return false From 2c04df8c360e508a893e618604252d39a30eae3a Mon Sep 17 00:00:00 2001 From: XingYu-Zhong <1736101137@qq.com> Date: Mon, 29 Jun 2026 00:10:50 +0800 Subject: [PATCH 12/18] fix(mcp): keep runtime alive when a streamable-http MCP server disconnects (#639) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit An unexpected MCP disconnect (e.g. the server's network drops) could restart the whole runtime and blank the GUI. Serve mode treated every unhandledRejection as fatal and called process.exit, so a stray background rejection from MCP transport churn took the runtime down and the GUI supervisor then restarted it. - Split the serve crash handlers: uncaughtException still does a bounded graceful close + non-zero exit (the process state is genuinely unsafe), but unhandledRejection is now logged and non-fatal — Node keeps running. Extracted into serve-crash-handlers.ts so it is unit-testable without triggering serve-entry's top-level main(). - Attach an onerror handler when creating the SDK MCP client so a dropped SSE stream / exhausted reconnects are logged (and surfaced as unavailable) instead of silently swallowed or escaping as unhandled. The per-call reconnect in callMcpToolWithReconnect still recovers the connection on the next tool use. Co-Authored-By: Claude Opus 4.8 --- kun/src/adapters/tool/mcp-tool-provider.ts | 10 +++ kun/src/cli/serve-crash-handlers.test.ts | 77 ++++++++++++++++++++++ kun/src/cli/serve-crash-handlers.ts | 45 +++++++++++++ kun/src/cli/serve-entry.ts | 31 +-------- 4 files changed, 133 insertions(+), 30 deletions(-) create mode 100644 kun/src/cli/serve-crash-handlers.test.ts create mode 100644 kun/src/cli/serve-crash-handlers.ts diff --git a/kun/src/adapters/tool/mcp-tool-provider.ts b/kun/src/adapters/tool/mcp-tool-provider.ts index 046fb869b..978d85342 100644 --- a/kun/src/adapters/tool/mcp-tool-provider.ts +++ b/kun/src/adapters/tool/mcp-tool-provider.ts @@ -459,6 +459,16 @@ function workspaceMatchesRoots(workspace: string, roots: readonly string[]): boo async function createSdkMcpClient(serverId: string, server: McpServerConfig): Promise { const client = new Client({ name: `kun-${serverId}`, version: '0.1.0' }) + // Observe transport-level failures explicitly. The SDK routes a dropped SSE + // stream and exhausted background reconnects to `onerror`; with no handler + // they are silently swallowed, which hides real outages from the logs and (on + // some SDK/runtime versions) lets the rejection escape as unhandled. Handling + // it here keeps a streamable-http disconnect from destabilizing the runtime + // (#639) — the per-call reconnect in callMcpToolWithReconnect still recovers + // the connection on the next tool use. + client.onerror = (error) => { + process.stderr.write(`kun mcp[${serverId}]: transport error: ${redactSecretText(errorMessage(error))}\n`) + } const transport = createTransport(server) await client.connect(transport, { timeout: server.timeoutMs }) return { diff --git a/kun/src/cli/serve-crash-handlers.test.ts b/kun/src/cli/serve-crash-handlers.test.ts new file mode 100644 index 000000000..619569038 --- /dev/null +++ b/kun/src/cli/serve-crash-handlers.test.ts @@ -0,0 +1,77 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import process from 'node:process' +import { installServeCrashHandlers } from './serve-crash-handlers.js' + +type Listener = (...args: unknown[]) => void + +/** + * installServeCrashHandlers registers process-wide listeners. Snapshot the + * existing ones so we can invoke (and later remove) only the pair this call + * added, without disturbing vitest's own handlers. + */ +function install(getHandle: () => null = () => null): { + unhandled: Listener[] + uncaught: Listener[] + cleanup: () => void +} { + const beforeRejection = new Set(process.listeners('unhandledRejection')) + const beforeException = new Set(process.listeners('uncaughtException')) + installServeCrashHandlers(getHandle) + const unhandled = process + .listeners('unhandledRejection') + .filter((l) => !beforeRejection.has(l)) as unknown as Listener[] + const uncaught = process + .listeners('uncaughtException') + .filter((l) => !beforeException.has(l)) as unknown as Listener[] + return { + unhandled, + uncaught, + cleanup: () => { + for (const l of unhandled) process.removeListener('unhandledRejection', l as never) + for (const l of uncaught) process.removeListener('uncaughtException', l as never) + } + } +} + +afterEach(() => { + vi.restoreAllMocks() +}) + +describe('serve crash handlers (#639)', () => { + it('keeps the runtime alive on an unhandledRejection (e.g. an MCP transport drop)', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => undefined) as never) + const writes: string[] = [] + vi.spyOn(process.stderr, 'write').mockImplementation(((chunk: unknown) => { + writes.push(String(chunk)) + return true + }) as never) + + const handlers = install() + try { + expect(handlers.unhandled).toHaveLength(1) + handlers.unhandled[0](new Error('SSE stream disconnected: socket hang up')) + + expect(exitSpy).not.toHaveBeenCalled() + expect(writes.join('')).toContain('unhandledRejection (non-fatal, runtime stays up)') + expect(writes.join('')).toContain('socket hang up') + } finally { + handlers.cleanup() + } + }) + + it('still exits non-zero on an uncaughtException so the supervisor restarts a clean process', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => undefined) as never) + vi.spyOn(process.stderr, 'write').mockImplementation((() => true) as never) + + const handlers = install() + try { + expect(handlers.uncaught).toHaveLength(1) + handlers.uncaught[0](new Error('boom')) + + // ServeExitCode.runtime === 70 + expect(exitSpy).toHaveBeenCalledWith(70) + } finally { + handlers.cleanup() + } + }) +}) diff --git a/kun/src/cli/serve-crash-handlers.ts b/kun/src/cli/serve-crash-handlers.ts new file mode 100644 index 000000000..510999a94 --- /dev/null +++ b/kun/src/cli/serve-crash-handlers.ts @@ -0,0 +1,45 @@ +import process from 'node:process' +import { ServeExitCode } from './serve.js' +import type { KunServeHandle } from '../server/runtime-factory.js' + +/** + * Serve mode runs unattended under the GUI. The two failure modes are + * deliberately handled differently: + * + * - `uncaughtException` left the stack unwound mid-operation, so the process + * state is genuinely unsafe. Report it on stderr (the GUI captures the tail), + * attempt a bounded graceful close, then exit non-zero so the GUI supervisor + * can restart a fresh process. + * - `unhandledRejection` does NOT corrupt the process — Node keeps running. A + * stray background rejection (e.g. a streamable-http MCP server dropping its + * connection while a reconnect promise is in flight) is fully recoverable, so + * tearing the whole runtime down and blanking the GUI for it is the wrong + * trade (#639). Log it for the stderr tail and stay up; the MCP layer already + * reports the server unavailable and reconnects on the next request. + */ +export function installServeCrashHandlers(getHandle: () => KunServeHandle | null): void { + let crashing = false + const crash = (kind: string, error: unknown): void => { + if (crashing) return + crashing = true + const detail = error instanceof Error ? (error.stack ?? error.message) : String(error) + process.stderr.write(`kun serve: ${kind}: ${detail}\n`) + const finish = (): void => process.exit(ServeExitCode.runtime) + const handle = getHandle() + if (!handle) { + finish() + return + } + const deadline = setTimeout(finish, 3000) + deadline.unref() + void handle + .close() + .catch(() => undefined) + .finally(finish) + } + process.on('uncaughtException', (error) => crash('uncaughtException', error)) + process.on('unhandledRejection', (reason) => { + const detail = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason) + process.stderr.write(`kun serve: unhandledRejection (non-fatal, runtime stays up): ${detail}\n`) + }) +} diff --git a/kun/src/cli/serve-entry.ts b/kun/src/cli/serve-entry.ts index 05a7469d3..e44b2ae5c 100644 --- a/kun/src/cli/serve-entry.ts +++ b/kun/src/cli/serve-entry.ts @@ -11,39 +11,10 @@ import { resolveEventLoopStallThresholdMs, startEventLoopMonitor } from '../server/event-loop-monitor.js' +import { installServeCrashHandlers } from './serve-crash-handlers.js' export const KUN_READY_PREFIX = 'KUN_READY ' -/** - * Serve mode runs unattended under the GUI. An uncaught error must not - * leave a half-dead process: report it on stderr (the GUI captures the - * tail), attempt a bounded graceful close, then exit non-zero so the - * GUI supervisor can restart us. - */ -function installServeCrashHandlers(getHandle: () => KunServeHandle | null): void { - let crashing = false - const crash = (kind: string, error: unknown): void => { - if (crashing) return - crashing = true - const detail = error instanceof Error ? (error.stack ?? error.message) : String(error) - process.stderr.write(`kun serve: ${kind}: ${detail}\n`) - const finish = (): void => process.exit(ServeExitCode.runtime) - const handle = getHandle() - if (!handle) { - finish() - return - } - const deadline = setTimeout(finish, 3000) - deadline.unref() - void handle - .close() - .catch(() => undefined) - .finally(finish) - } - process.on('uncaughtException', (error) => crash('uncaughtException', error)) - process.on('unhandledRejection', (reason) => crash('unhandledRejection', reason)) -} - /** * Serve-mode command. Kept separate from the dispatcher so GUI startup * still has the exact same KUN_READY handshake behavior. From 97b35ae9ebab5644e787f3323585727cf69d6e9f Mon Sep 17 00:00:00 2001 From: luoye520ww <100058663+luoye520ww@users.noreply.github.com> Date: Mon, 29 Jun 2026 00:50:37 +0800 Subject: [PATCH 13/18] fix(mcp): harden runtime reconnect lifecycle --- .../adapters/tool/mcp-tool-provider.test.ts | 143 +++++++++++ kun/src/adapters/tool/mcp-tool-provider.ts | 232 +++++++++++++++++- 2 files changed, 362 insertions(+), 13 deletions(-) create mode 100644 kun/src/adapters/tool/mcp-tool-provider.test.ts diff --git a/kun/src/adapters/tool/mcp-tool-provider.test.ts b/kun/src/adapters/tool/mcp-tool-provider.test.ts new file mode 100644 index 000000000..a30aecc35 --- /dev/null +++ b/kun/src/adapters/tool/mcp-tool-provider.test.ts @@ -0,0 +1,143 @@ +import { describe, expect, it, vi } from 'vitest' +import { McpCapabilityConfig, type McpServerConfig } from '../../contracts/capabilities.js' +import type { ToolHostContext } from '../../ports/tool-host.js' +import { + buildMcpToolProviders, + type McpClientLifecycleHandlers, + type McpClientLike, + type McpToolDescriptor +} from './mcp-tool-provider.js' + +class MockMcpClient implements McpClientLike { + lifecycle: McpClientLifecycleHandlers = {} + close = vi.fn(async () => undefined) + + constructor( + private readonly tools: McpToolDescriptor[], + readonly callTool: McpClientLike['callTool'] + ) {} + + async listTools(): Promise<{ tools: McpToolDescriptor[] }> { + return { tools: this.tools } + } + + setLifecycleHandlers(handlers: McpClientLifecycleHandlers): void { + this.lifecycle = handlers + } +} + +const server: McpServerConfig = { + enabled: true, + transport: 'streamable-http', + url: 'http://127.0.0.1:39999/mcp', + headers: {}, + args: [], + env: {}, + workspaceRoots: [], + trustScope: 'user', + trustedWorkspaceRoots: [], + timeoutMs: 1_000 +} + +const config = McpCapabilityConfig.parse({ + enabled: true, + servers: { docs: server }, + search: { enabled: false } +}) + +const context: ToolHostContext = { + threadId: 'thread_test', + turnId: 'turn_test', + workspace: '/workspace', + approvalPolicy: 'auto', + abortSignal: new AbortController().signal, + awaitApproval: vi.fn() +} + +const descriptor: McpToolDescriptor = { + name: 'lookup', + description: 'Lookup docs', + inputSchema: { type: 'object', properties: {} }, + annotations: { readOnlyHint: true } +} + +describe('mcp tool provider reliability', () => { + it('shares one reconnect across concurrent tool calls after a transport failure', async () => { + const first = new MockMcpClient([descriptor], vi.fn(async () => { + throw new Error('socket connection reset') + })) + const second = new MockMcpClient([descriptor], vi.fn(async () => ({ ok: true }))) + const clientFactory = vi.fn() + .mockResolvedValueOnce(first) + .mockResolvedValueOnce(second) + + const built = await buildMcpToolProviders(config, { + clientFactory, + nowIso: () => '2026-06-29T00:00:00.000Z' + }) + const tool = built.providers[0]?.tools[0] + expect(tool?.name).toBe('mcp_docs_lookup') + + const [one, two] = await Promise.all([ + tool!.execute({}, context), + tool!.execute({}, context) + ]) + + expect(clientFactory).toHaveBeenCalledTimes(2) + expect(first.close).toHaveBeenCalledTimes(1) + expect(second.callTool).toHaveBeenCalledTimes(2) + expect(one).toMatchObject({ output: { result: { ok: true } } }) + expect(two).toMatchObject({ output: { result: { ok: true } } }) + expect(built.diagnostics[0]).toMatchObject({ + id: 'docs', + status: 'connected', + available: true, + reconnectAttempts: 1 + }) + }) + + it('marks lifecycle transport close as offline and reconnects on the next call', async () => { + const first = new MockMcpClient([descriptor], vi.fn(async () => ({ stale: true }))) + const second = new MockMcpClient([descriptor], vi.fn(async () => ({ fresh: true }))) + const clientFactory = vi.fn() + .mockResolvedValueOnce(first) + .mockResolvedValueOnce(second) + + const built = await buildMcpToolProviders(config, { clientFactory }) + first.lifecycle.onClose?.() + expect(built.diagnostics[0]).toMatchObject({ + status: 'error', + available: false, + lastError: 'MCP transport closed' + }) + + const tool = built.providers[0]!.tools[0]! + const result = await tool.execute({}, context) + + expect(result).toMatchObject({ output: { result: { fresh: true } } }) + expect(clientFactory).toHaveBeenCalledTimes(2) + expect(built.diagnostics[0]).toMatchObject({ + status: 'connected', + available: true, + reconnectAttempts: 1 + }) + }) + + it('does not mark deterministic tool errors as offline', async () => { + const client = new MockMcpClient([descriptor], vi.fn(async () => { + throw new Error('Invalid arguments: query is required') + })) + const built = await buildMcpToolProviders(config, { + clientFactory: vi.fn(async () => client) + }) + const tool = built.providers[0]!.tools[0]! + + await expect(tool.execute({}, context)).rejects.toThrow('Invalid arguments') + + expect(built.diagnostics[0]).toMatchObject({ + status: 'connected', + available: true, + lastError: 'Invalid arguments: query is required' + }) + }) +}) diff --git a/kun/src/adapters/tool/mcp-tool-provider.ts b/kun/src/adapters/tool/mcp-tool-provider.ts index 046fb869b..264575580 100644 --- a/kun/src/adapters/tool/mcp-tool-provider.ts +++ b/kun/src/adapters/tool/mcp-tool-provider.ts @@ -50,6 +50,12 @@ export type McpClientLike = { options?: { signal?: AbortSignal; timeout?: number } ): Promise close(): Promise + setLifecycleHandlers?(handlers: McpClientLifecycleHandlers): void +} + +export type McpClientLifecycleHandlers = { + onError?: (error: Error) => void + onClose?: () => void } export type McpServerDiagnostic = { @@ -58,11 +64,15 @@ export type McpServerDiagnostic = { transport: McpServerConfig['transport'] trustScope: McpServerConfig['trustScope'] available: boolean - status: 'disabled' | 'connected' | 'error' + status: 'disabled' | 'connected' | 'reconnecting' | 'error' toolCount: number catalogFingerprint?: string catalogDrift?: boolean lastConnectedAt?: string + lastDisconnectedAt?: string + lastReconnectAt?: string + nextReconnectAt?: string + reconnectAttempts?: number lastError?: string } @@ -131,7 +141,16 @@ type McpConnectionState = { catalogFingerprint?: string catalogDrift?: boolean lastConnectedAt?: string + lastDisconnectedAt?: string + lastReconnectAt?: string + nextReconnectAt?: string + reconnectAttempts: number + reconnectBackoffMs: number + reconnectPromise?: Promise lastError?: string + status: 'connected' | 'reconnecting' | 'error' + diagnostic?: McpServerDiagnostic + intentionallyClosing?: boolean } export async function buildMcpToolProviders( @@ -199,8 +218,12 @@ export async function buildMcpToolProviders( client, clientFactory, nowIso, + reconnectAttempts: 0, + reconnectBackoffMs: DEFAULT_MCP_RECONNECT_BASE_DELAY_MS, + status: 'connected', lastConnectedAt: nowIso() } + attachMcpClientLifecycle(state) const listed = await refreshMcpConnectionCatalog(state) return { state, listed } })() @@ -240,7 +263,7 @@ export async function buildMcpToolProviders( available: true, tools }) - diagnostics.push(serverDiagnostic(state, 'connected', tools.length)) + diagnostics.push(syncMcpDiagnostic(state, 'connected', tools.length)) } const connectedServers = diagnostics.filter((diagnostic) => diagnostic.status === 'connected').length @@ -317,7 +340,7 @@ export async function buildMcpToolProviders( }, close: async () => { reconnectAborted = true - await Promise.all(connected.map((state) => state.client.close().catch(() => undefined))) + await Promise.all(connected.map((state) => closeMcpClient(state))) } } } @@ -366,6 +389,7 @@ async function reconnectFailedMcpServer( ): Promise { for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { if (params.isAborted()) return + updateFailedServerDiagnostic(params.diagnostics, failed, 'reconnecting', attempt) await params.delay(Math.min(maxDelayMs, baseDelayMs * 2 ** (attempt - 1))) if (params.isAborted()) return try { @@ -376,17 +400,27 @@ async function reconnectFailedMcpServer( client, clientFactory: params.clientFactory, nowIso: params.nowIso, + reconnectAttempts: 0, + reconnectBackoffMs: DEFAULT_MCP_RECONNECT_BASE_DELAY_MS, + status: 'connected', lastConnectedAt: params.nowIso() } + attachMcpClientLifecycle(state) const listed = await refreshMcpConnectionCatalog(state) if (params.isAborted()) { - await client.close().catch(() => undefined) + await closeMcpClient(state) return } registerLateMcpConnection(params, state, listed) return - } catch { - // Leave the diagnostic as "error" and try again until attempts run out. + } catch (error) { + updateFailedServerDiagnostic( + params.diagnostics, + failed, + 'error', + attempt, + formatMcpConnectionError(error, failed.server) + ) } } } @@ -416,12 +450,33 @@ function registerLateMcpConnection( // flips to connected below so the UI stops showing the server as failed. } } - const diagnostic = serverDiagnostic(state, 'connected', tools.length) + const diagnostic = syncMcpDiagnostic(state, 'connected', tools.length) const index = params.diagnostics.findIndex((entry) => entry.id === state.serverId) if (index >= 0) params.diagnostics[index] = diagnostic else params.diagnostics.push(diagnostic) } +function updateFailedServerDiagnostic( + diagnostics: McpServerDiagnostic[], + failed: FailedMcpServer, + status: Extract, + attempt: number, + lastError?: string +): void { + const index = diagnostics.findIndex((entry) => entry.id === failed.serverId) + const previous = index >= 0 ? diagnostics[index] : undefined + const next: McpServerDiagnostic = { + ...(previous ?? serverDiagnostic({ serverId: failed.serverId, server: failed.server }, 'error', 0)), + status, + available: false, + reconnectAttempts: attempt, + lastReconnectAt: new Date().toISOString(), + ...(lastError ? { lastError: redactSecretText(lastError) } : {}) + } + if (index >= 0) diagnostics[index] = next + else diagnostics.push(next) +} + function defaultMcpReconnectDelay(ms: number): Promise { return new Promise((resolve) => { const timer = setTimeout(resolve, ms) @@ -470,7 +525,11 @@ async function createSdkMcpClient(serverId: string, server: McpServerConfig): Pr }) }, callTool: (input, options) => client.callTool(input, undefined, options), - close: () => client.close() + close: () => client.close(), + setLifecycleHandlers: (handlers) => { + client.onerror = handlers.onError + client.onclose = handlers.onClose + } } } @@ -539,6 +598,17 @@ function createMcpLocalTool( isError: true } } + if (state.status === 'error' && !canAttemptMcpReconnect(state)) { + return { + output: { + error: mcpReconnectCooldownMessage(state), + serverId: state.serverId, + status: state.status, + nextReconnectAt: state.nextReconnectAt + }, + isError: true + } + } const result = await callMcpToolWithReconnect( state, { name: descriptor.name, arguments: args }, @@ -591,6 +661,7 @@ async function refreshMcpConnectionCatalog(state: McpConnectionState): Promise { try { + await ensureMcpConnectionForCall(state, signal) return await state.client.callTool(input, { signal, timeout }) } catch (error) { - state.lastError = redactSecretText(errorMessage(error)) if (signal?.aborted) throw error // Deterministic server-side failures (validation errors, bad // arguments) come back identically on a fresh connection; tearing // down a healthy session for them just loses server state. Only // transport-looking failures earn a reconnect + retry. - if (!looksLikeMcpTransportError(error)) throw error - const client = await reconnectMcpConnection(state) + if (!looksLikeMcpTransportError(error)) { + state.lastError = redactSecretText(errorMessage(error)) + syncMcpDiagnostic(state) + throw error + } + markMcpConnectionError(state, error) + const client = await reconnectMcpConnection(state, signal) return client.callTool(input, { signal, timeout }) } } @@ -657,15 +733,112 @@ async function raceStartupTimeout( } } -async function reconnectMcpConnection(state: McpConnectionState): Promise { - await state.client.close().catch(() => undefined) +async function ensureMcpConnectionForCall( + state: McpConnectionState, + signal: AbortSignal | undefined +): Promise { + if (state.status === 'connected') return + await reconnectMcpConnection(state, signal) +} + +async function reconnectMcpConnection( + state: McpConnectionState, + signal?: AbortSignal +): Promise { + if (state.reconnectPromise) return state.reconnectPromise + if (!canAttemptMcpReconnect(state)) { + throw new Error(mcpReconnectCooldownMessage(state)) + } + state.status = 'reconnecting' + state.reconnectAttempts += 1 + state.lastReconnectAt = state.nowIso() + syncMcpDiagnostic(state, 'reconnecting') + state.reconnectPromise = reconnectMcpConnectionOnce(state, signal) + .catch((error) => { + markMcpReconnectFailed(state, error) + throw error + }) + .finally(() => { + state.reconnectPromise = undefined + }) + return state.reconnectPromise +} + +async function reconnectMcpConnectionOnce( + state: McpConnectionState, + signal?: AbortSignal +): Promise { + if (signal?.aborted) throw new Error('MCP reconnect aborted') + await closeMcpClient(state) + if (signal?.aborted) throw new Error('MCP reconnect aborted') const client = await state.clientFactory(state.serverId, state.server) state.client = client + state.status = 'connected' state.lastConnectedAt = state.nowIso() state.lastError = undefined + state.nextReconnectAt = undefined + state.reconnectBackoffMs = DEFAULT_MCP_RECONNECT_BASE_DELAY_MS + attachMcpClientLifecycle(state) + await refreshMcpConnectionCatalog(state) + syncMcpDiagnostic(state, 'connected') return client } +async function closeMcpClient(state: McpConnectionState): Promise { + state.intentionallyClosing = true + try { + await state.client.close().catch(() => undefined) + } finally { + state.intentionallyClosing = false + } +} + +function attachMcpClientLifecycle(state: McpConnectionState): void { + state.client.setLifecycleHandlers?.({ + onError: (error) => { + if (looksLikeMcpTransportError(error)) { + markMcpConnectionError(state, error) + } else { + state.lastError = redactSecretText(errorMessage(error)) + syncMcpDiagnostic(state) + } + }, + onClose: () => { + if (state.intentionallyClosing) return + markMcpConnectionError(state, new Error('MCP transport closed')) + } + }) +} + +function markMcpConnectionError(state: McpConnectionState, error: unknown): void { + if (state.intentionallyClosing) return + state.status = 'error' + state.lastError = redactSecretText(errorMessage(error)) + state.lastDisconnectedAt = state.nowIso() + syncMcpDiagnostic(state, 'error') +} + +function markMcpReconnectFailed(state: McpConnectionState, error: unknown): void { + state.status = 'error' + state.lastError = redactSecretText(errorMessage(error)) + state.lastDisconnectedAt = state.nowIso() + const nextDelay = state.reconnectBackoffMs + state.reconnectBackoffMs = Math.min(DEFAULT_MCP_RECONNECT_MAX_DELAY_MS, nextDelay * 2) + state.nextReconnectAt = new Date(Date.now() + nextDelay).toISOString() + syncMcpDiagnostic(state, 'error') +} + +function canAttemptMcpReconnect(state: McpConnectionState): boolean { + if (!state.nextReconnectAt) return true + return Date.now() >= Date.parse(state.nextReconnectAt) +} + +function mcpReconnectCooldownMessage(state: McpConnectionState): string { + return state.nextReconnectAt + ? `MCP server ${state.serverId} is offline; reconnect is cooling down until ${state.nextReconnectAt}. Last error: ${state.lastError ?? 'unknown error'}` + : `MCP server ${state.serverId} is offline. Last error: ${state.lastError ?? 'unknown error'}` +} + function shouldUseMcpSearch(config: NonNullable, toolCount: number): boolean { if (!config.enabled) return false if (config.mode === 'direct') return false @@ -701,6 +874,39 @@ function serverDiagnostic( } } +function syncMcpDiagnostic( + state: McpConnectionState, + status: McpServerDiagnostic['status'] = state.status, + toolCount = state.diagnostic?.toolCount ?? 0 +): McpServerDiagnostic { + const diagnostic: McpServerDiagnostic = { + id: state.serverId, + enabled: state.server.enabled, + transport: state.server.transport, + trustScope: state.server.trustScope, + available: status === 'connected', + status, + toolCount, + ...(state.catalogFingerprint ? { catalogFingerprint: state.catalogFingerprint } : {}), + ...(state.catalogDrift !== undefined ? { catalogDrift: state.catalogDrift } : {}), + ...(state.lastConnectedAt ? { lastConnectedAt: state.lastConnectedAt } : {}), + ...(state.lastDisconnectedAt ? { lastDisconnectedAt: state.lastDisconnectedAt } : {}), + ...(state.lastReconnectAt ? { lastReconnectAt: state.lastReconnectAt } : {}), + ...(state.nextReconnectAt ? { nextReconnectAt: state.nextReconnectAt } : {}), + ...(state.reconnectAttempts > 0 ? { reconnectAttempts: state.reconnectAttempts } : {}), + ...(state.lastError ? { lastError: redactSecretText(state.lastError) } : {}) + } + if (!state.diagnostic) { + state.diagnostic = diagnostic + return diagnostic + } + for (const key of Object.keys(state.diagnostic) as Array) { + delete (state.diagnostic as Record)[key] + } + Object.assign(state.diagnostic, diagnostic) + return state.diagnostic +} + function catalogFingerprint(values: readonly string[]): string { return createHash('sha256') .update(JSON.stringify([...values].sort())) From 24d961a2f6f54fffbf7e774baecf2a5848bb7478 Mon Sep 17 00:00:00 2001 From: luoye520ww <100058663+luoye520ww@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:02:18 +0800 Subject: [PATCH 14/18] feat(perf): add agent replay benchmark --- kun/README.md | 21 + kun/benchmarks/agent-core.json | 110 +++ kun/package.json | 1 + kun/src/benchmark/replay-benchmark.test.ts | 230 +++++++ kun/src/benchmark/replay-benchmark.ts | 734 +++++++++++++++++++++ kun/src/cli/replay-entry.ts | 184 ++++++ kun/src/contracts/runtime-info.ts | 7 + kun/src/server/runtime-factory.ts | 41 +- src/renderer/src/agent/kun-contract.ts | 7 + 9 files changed, 1320 insertions(+), 15 deletions(-) create mode 100644 kun/benchmarks/agent-core.json create mode 100644 kun/src/benchmark/replay-benchmark.test.ts create mode 100644 kun/src/benchmark/replay-benchmark.ts create mode 100644 kun/src/cli/replay-entry.ts diff --git a/kun/README.md b/kun/README.md index f63d06110..bbe80ce53 100644 --- a/kun/README.md +++ b/kun/README.md @@ -51,6 +51,27 @@ Run from the `kun/` directory. - `npm run serve` – start the runtime after a build. - `npm run dev` – rebuild in watch mode. +- `npm run benchmark:replay -- --suite ` - run a read-only HTTP/SSE agent replay suite. + +### Agent replay benchmark + +Start a Kun runtime, set `KUN_RUNTIME_URL` and `KUN_RUNTIME_TOKEN`, then run the five-task smoke set: + +```bash +npm run benchmark:replay -- --suite benchmarks/agent-core.json --tag smoke --output replay-smoke.json +``` + +Run all 20 tasks twice and compare with an earlier report: + +```bash +npm run benchmark:replay -- --suite benchmarks/agent-core.json --repeat 2 \ + --baseline replay-baseline.json --output replay-current.json --fail-on-regression +``` + +Replay threads always use the `read-only` sandbox and disable interactive input. Reports include success rate, +TTFT, full latency, tool time, SSE delivery delay, token/cache/cost counters, and Kun process peak RSS. The runtime +token is accepted only through `KUN_RUNTIME_TOKEN`, so it does not leak through process arguments. + ## CLI `kun serve` accepts the following flags: diff --git a/kun/benchmarks/agent-core.json b/kun/benchmarks/agent-core.json new file mode 100644 index 000000000..f4a78ff7c --- /dev/null +++ b/kun/benchmarks/agent-core.json @@ -0,0 +1,110 @@ +{ + "version": 1, + "name": "kun-agent-core", + "defaults": { + "reasoningEffort": "off", + "timeoutMs": 300000 + }, + "tasks": [ + { + "id": "architecture-summary", + "tags": ["smoke", "architecture"], + "prompt": "Read the repository and explain the active Renderer -> preload -> main -> Kun runtime data path. Cite the most relevant file paths. Do not modify files." + }, + { + "id": "runtime-entrypoint", + "tags": ["smoke", "runtime"], + "prompt": "Find the Kun serve-mode composition root and summarize how stores, model clients, tools, and the agent loop are assembled. Cite exact file paths. Do not modify files." + }, + { + "id": "renderer-send-flow", + "tags": ["smoke", "frontend"], + "prompt": "Trace a chat message from the renderer composer through the preload/main bridge to the Kun turn endpoint. Return a concise ordered call path with files. Do not modify files." + }, + { + "id": "sse-replay", + "tags": ["smoke", "runtime"], + "prompt": "Explain how Kun SSE event replay avoids duplicates and cursor rewind after reconnect or restart. Cite the implementation and tests. Do not modify files." + }, + { + "id": "mcp-lifecycle", + "tags": ["smoke", "mcp"], + "prompt": "Inspect MCP startup, tool discovery, execution, and reconnect behavior. Identify the main reliability boundaries and cite the implementation files. Do not modify files." + }, + { + "id": "cache-prefix", + "tags": ["cache"], + "prompt": "Explain what makes Kun's immutable prompt prefix stable and list dynamic data that must remain outside it. Cite code and documentation. Do not modify files." + }, + { + "id": "provider-url-contract", + "tags": ["provider"], + "prompt": "Trace how baseUrl and endpointFormat affect provider URL construction and request bodies across chat and auxiliary model calls. Cite all important consumers. Do not modify files." + }, + { + "id": "attachment-flow", + "tags": ["attachments"], + "prompt": "Trace an image or local file attachment from renderer selection to model input or fallback. Identify the cross-layer contract fields and failure points. Do not modify files." + }, + { + "id": "approval-flow", + "tags": ["runtime", "security"], + "prompt": "Trace a tool approval request from agent loop creation through SSE/UI resolution back to tool execution. Cite routes, gates, and renderer handlers. Do not modify files." + }, + { + "id": "goal-resume", + "tags": ["runtime", "goal"], + "prompt": "Explain how active goals survive runtime restart, how orphaned turns are reconciled, and where auto-resume is triggered. Cite tests if present. Do not modify files." + }, + { + "id": "subagent-permissions", + "tags": ["subagent", "security"], + "prompt": "Explain how subagent tool policies inherit or restrict built-in tools, MCP servers, and skills without escalating the parent permissions. Cite enforcement points. Do not modify files." + }, + { + "id": "settings-persistence", + "tags": ["settings"], + "prompt": "Trace a Kun settings change from renderer state through validation/persistence to managed runtime restart. Highlight rollback behavior. Do not modify files." + }, + { + "id": "test-selection", + "tags": ["quality"], + "prompt": "Identify how the verify_changes tool selects and runs validation after edits. Explain its safety limits and output contract. Do not modify files." + }, + { + "id": "build-pipeline", + "tags": ["build"], + "prompt": "Summarize the development, typecheck, test, build, and packaging pipeline for Kun. Cite package scripts and packaging configuration. Do not modify files." + }, + { + "id": "security-boundaries", + "tags": ["security"], + "prompt": "Map the main trust boundaries for renderer IPC, filesystem tools, command execution, MCP, and secrets. Cite concrete enforcement files. Do not modify files." + }, + { + "id": "runtime-hotspots", + "tags": ["performance"], + "prompt": "Inspect runtime event persistence, SSE replay, tool execution, and context assembly. Identify three evidence-based performance or memory hotspots with file references. Do not modify files." + }, + { + "id": "thread-persistence", + "tags": ["storage"], + "prompt": "Explain how thread/session data is persisted and indexed across file and hybrid SQLite stores, including usage carryover. Cite implementation files. Do not modify files." + }, + { + "id": "model-capabilities", + "tags": ["provider"], + "prompt": "Explain how model capabilities control image input, tool calling, reasoning effort, endpoint format, and context limits. Cite schemas and request construction. Do not modify files." + }, + { + "id": "frontend-chunking", + "tags": ["frontend", "performance"], + "prompt": "Inspect renderer lazy loading and identify which Workbench surfaces are split into separate chunks and which heavy chat dependencies still load eagerly. Do not modify files." + }, + { + "id": "failure-recovery", + "tags": ["runtime", "reliability"], + "prompt": "Map how the desktop app detects an unhealthy Kun child, budgets restarts, distinguishes settings restarts from crashes, and reports status to the renderer. Do not modify files." + } + ] +} diff --git a/kun/package.json b/kun/package.json index d5de81c3b..f69259cd1 100644 --- a/kun/package.json +++ b/kun/package.json @@ -61,6 +61,7 @@ "test": "vitest run", "test:watch": "vitest", "transcript:diff": "node ./scripts/transcript-diff.mjs", + "benchmark:replay": "npm run build && node ./dist/cli/replay-entry.js", "serve": "node ./dist/cli/serve-entry.js", "dev": "tsc -p tsconfig.build.json --watch" }, diff --git a/kun/src/benchmark/replay-benchmark.test.ts b/kun/src/benchmark/replay-benchmark.test.ts new file mode 100644 index 000000000..81ae4071a --- /dev/null +++ b/kun/src/benchmark/replay-benchmark.test.ts @@ -0,0 +1,230 @@ +import { describe, expect, it } from 'vitest' +import type { RuntimeEvent } from '../contracts/events.js' +import { + compareReplayReports, + ReplaySuiteSchema, + SseMessageDecoder, + summarizeReplayEvents, + summarizeReplayRuns, + type ObservedReplayEvent, + type ReplayReport, + type ReplayRunResult +} from './replay-benchmark.js' + +const baseTimestamp = Date.parse('2026-06-29T00:00:00.000Z') + +function observed(event: RuntimeEvent, elapsedMs: number, delayMs = 10): ObservedReplayEvent { + return { + event, + elapsedMs, + receivedAtMs: Date.parse(event.timestamp) + delayMs + } +} + +function itemBase(kind: string) { + return { + kind, + id: `item_${kind}`, + turnId: 'turn_1', + threadId: 'thread_1', + role: kind === 'tool_result' ? 'tool' : 'assistant', + status: 'completed', + createdAt: '2026-06-29T00:00:00.000Z' + } +} + +describe('replay benchmark', () => { + it('decodes SSE messages across arbitrary chunks', () => { + const decoder = new SseMessageDecoder() + const payload = [ + 'id: 4', + 'event: turn_completed', + `data: ${JSON.stringify({ + kind: 'turn_completed', + seq: 4, + timestamp: '2026-06-29T00:00:00.000Z', + threadId: 'thread_1', + turnId: 'turn_1', + status: 'completed' + })}`, + '', + '' + ].join('\n') + + expect(decoder.push(payload.slice(0, 31))).toEqual([]) + expect(decoder.push(payload.slice(31))).toEqual([ + expect.objectContaining({ id: '4', event: 'turn_completed' }) + ]) + }) + + it('computes TTFT, tool, SSE, usage, and memory metrics from runtime events', () => { + const timestamp = (offset: number) => new Date(baseTimestamp + offset).toISOString() + const events: ObservedReplayEvent[] = [ + observed({ + kind: 'assistant_text_delta', + seq: 1, + timestamp: timestamp(100), + threadId: 'thread_1', + turnId: 'turn_1', + item: { ...itemBase('assistant_text'), kind: 'assistant_text', text: 'hello' } + } as RuntimeEvent, 120, 20), + observed({ + kind: 'tool_call_started', + seq: 2, + timestamp: timestamp(180), + threadId: 'thread_1', + turnId: 'turn_1', + item: { + ...itemBase('tool_call'), + kind: 'tool_call', + toolName: 'read', + callId: 'call_1', + toolKind: 'tool_call', + arguments: { path: 'README.md' } + } + } as RuntimeEvent, 200, 20), + observed({ + kind: 'tool_call_finished', + seq: 3, + timestamp: timestamp(430), + threadId: 'thread_1', + turnId: 'turn_1', + item: { + ...itemBase('tool_result'), + kind: 'tool_result', + toolName: 'read', + callId: 'call_1', + toolKind: 'tool_call', + output: { ok: true }, + isError: false + } + } as RuntimeEvent, 450, 20), + observed({ + kind: 'usage', + seq: 4, + timestamp: timestamp(500), + threadId: 'thread_1', + turnId: 'turn_1', + usage: { + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + cacheHitTokens: 60, + cacheMissTokens: 40, + cacheHitRate: 0.6, + cacheableTokenHitRate: 0.75, + totalInputTokenHitRate: 0.6, + turns: 1, + costUsd: 0.001 + } + }, 520, 20), + observed({ + kind: 'turn_completed', + seq: 5, + timestamp: timestamp(580), + threadId: 'thread_1', + turnId: 'turn_1', + status: 'completed' + }, 600, 20) + ] + + expect(summarizeReplayEvents(events, 600, 256 * 1024 * 1024)).toEqual({ + ttftMs: 120, + totalMs: 600, + assistantChars: 5, + eventCount: 5, + errorEvents: 0, + toolCalls: 1, + toolDurationMs: 250, + toolDurationP95Ms: 250, + sseDelayP50Ms: 20, + sseDelayP95Ms: 20, + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + cacheHitTokens: 60, + cacheMissTokens: 40, + cacheHitRate: 0.6, + cacheableTokenHitRate: 0.75, + totalInputTokenHitRate: 0.6, + costUsd: 0.001, + peakRssBytes: 256 * 1024 * 1024 + }) + }) + + it('aggregates reports and identifies material regressions', () => { + const baselineRun = replayRun('passed', 100, 1_000, 0.8) + const currentRun = replayRun('passed', 200, 1_800, 0.6) + const baseline = report([baselineRun], '2026-06-28T00:00:00.000Z') + const current = report([currentRun], '2026-06-29T00:00:00.000Z') + const comparison = compareReplayReports(current, baseline) + + expect(comparison.ttftP95MsDelta).toBe(100) + expect(comparison.totalP95MsDelta).toBe(800) + expect(comparison.cacheHitRateDelta).toBeCloseTo(-0.2) + expect(comparison.regressions).toEqual(expect.arrayContaining([ + expect.stringContaining('total latency'), + expect.stringContaining('cache hit rate') + ])) + }) + + it('rejects duplicate task ids before spending model tokens', () => { + expect(() => ReplaySuiteSchema.parse({ + version: 1, + name: 'duplicate-suite', + tasks: [ + { id: 'same', prompt: 'one' }, + { id: 'same', prompt: 'two' } + ] + })).toThrow('duplicate replay task id') + }) +}) + +function replayRun( + status: ReplayRunResult['status'], + ttftMs: number, + totalMs: number, + cacheHitRate: number +): ReplayRunResult { + return { + id: 'task#1', + taskId: 'task', + iteration: 1, + tags: [], + status, + failureReasons: [], + metrics: { + ttftMs, + totalMs, + assistantChars: 10, + eventCount: 5, + errorEvents: 0, + toolCalls: 0, + toolDurationMs: 0, + toolDurationP95Ms: null, + sseDelayP50Ms: 10, + sseDelayP95Ms: 20, + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + cacheHitTokens: cacheHitRate * 100, + cacheMissTokens: (1 - cacheHitRate) * 100, + cacheHitRate, + cacheableTokenHitRate: cacheHitRate, + totalInputTokenHitRate: cacheHitRate, + costUsd: 0.001, + peakRssBytes: 100 + } + } +} + +function report(runs: ReplayRunResult[], generatedAt: string): ReplayReport { + return { + version: 1, + generatedAt, + suite: { name: 'test', taskCount: runs.length, repeat: 1 }, + runtime: { baseUrl: 'http://127.0.0.1', startedAt: generatedAt }, + summary: summarizeReplayRuns(runs), + runs + } +} diff --git a/kun/src/benchmark/replay-benchmark.ts b/kun/src/benchmark/replay-benchmark.ts new file mode 100644 index 000000000..d8130f874 --- /dev/null +++ b/kun/src/benchmark/replay-benchmark.ts @@ -0,0 +1,734 @@ +import { resolve } from 'node:path' +import { z } from 'zod' +import { RuntimeEvent, type RuntimeEvent as RuntimeEventValue } from '../contracts/events.js' +import { RuntimeInfoResponse, type RuntimeInfoResponse as RuntimeInfoValue } from '../contracts/runtime-info.js' +import { TurnReasoningEffortSchema } from '../contracts/turns.js' +import type { UsageSnapshot } from '../contracts/usage.js' + +const ReplayExpectationSchema = z.object({ + minAssistantChars: z.number().int().nonnegative().default(1), + requiredTools: z.array(z.string().min(1)).default([]), + maxErrorEvents: z.number().int().nonnegative().default(0), + maxTotalMs: z.number().int().positive().optional() +}).strict() + +const ReplayTaskSchema = z.object({ + id: z.string().regex(/^[a-z0-9][a-z0-9_-]*$/), + prompt: z.string().min(1), + tags: z.array(z.string().min(1)).default([]), + workspace: z.string().min(1).optional(), + model: z.string().min(1).optional(), + providerId: z.string().min(1).optional(), + reasoningEffort: TurnReasoningEffortSchema.optional(), + timeoutMs: z.number().int().positive().optional(), + expect: ReplayExpectationSchema.default(() => ReplayExpectationSchema.parse({})) +}).strict() + +export const ReplaySuiteSchema = z.object({ + version: z.literal(1), + name: z.string().min(1), + defaults: z.object({ + model: z.string().min(1).optional(), + providerId: z.string().min(1).optional(), + reasoningEffort: TurnReasoningEffortSchema.optional(), + timeoutMs: z.number().int().positive().default(300_000) + }).strict().default(() => ({ timeoutMs: 300_000 })), + tasks: z.array(ReplayTaskSchema).min(1).max(100) +}).strict().superRefine((suite, context) => { + const ids = new Set() + suite.tasks.forEach((task, index) => { + if (ids.has(task.id)) { + context.addIssue({ + code: 'custom', + path: ['tasks', index, 'id'], + message: `duplicate replay task id: ${task.id}` + }) + } + ids.add(task.id) + }) +}) + +export type ReplaySuite = z.infer +export type ReplayTask = z.infer + +export type ObservedReplayEvent = { + event: RuntimeEventValue + receivedAtMs: number + elapsedMs: number +} + +export type ReplayRunMetrics = { + ttftMs: number | null + totalMs: number + assistantChars: number + eventCount: number + errorEvents: number + toolCalls: number + toolDurationMs: number + toolDurationP95Ms: number | null + sseDelayP50Ms: number | null + sseDelayP95Ms: number | null + promptTokens: number + completionTokens: number + totalTokens: number + cacheHitTokens: number | null + cacheMissTokens: number | null + cacheHitRate: number | null + cacheableTokenHitRate: number | null + totalInputTokenHitRate: number | null + costUsd: number + peakRssBytes: number | null +} + +export type ReplayRunResult = { + id: string + taskId: string + iteration: number + tags: string[] + threadId?: string + turnId?: string + status: 'passed' | 'failed' | 'timeout' | 'error' + failureReasons: string[] + metrics: ReplayRunMetrics + error?: string +} + +export type ReplayReportSummary = { + runCount: number + passed: number + failed: number + timedOut: number + errors: number + successRate: number + ttftP50Ms: number | null + ttftP95Ms: number | null + totalP50Ms: number | null + totalP95Ms: number | null + toolDurationP95Ms: number | null + sseDelayP95Ms: number | null + promptTokens: number + completionTokens: number + totalTokens: number + cacheHitRate: number | null + cacheableTokenHitRate: number | null + totalInputTokenHitRate: number | null + costUsd: number + peakRssBytes: number | null +} + +export type ReplayComparison = { + baselineGeneratedAt: string + successRateDelta: number + ttftP95MsDelta: number | null + totalP95MsDelta: number | null + promptTokensDelta: number + cacheHitRateDelta: number | null + costUsdDelta: number + peakRssBytesDelta: number | null + regressions: string[] +} + +export type ReplayReport = { + version: 1 + generatedAt: string + suite: { name: string; taskCount: number; repeat: number; tag?: string } + runtime: { + baseUrl: string + model?: string + startedAt: string + pid?: number + } + summary: ReplayReportSummary + runs: ReplayRunResult[] + comparison?: ReplayComparison +} + +export type RunReplaySuiteOptions = { + baseUrl: string + token?: string + workspace: string + repeat?: number + concurrency?: number + tag?: string + keepThreads?: boolean + fetchImpl?: typeof fetch + onProgress?: (completed: number, total: number, run: ReplayRunResult) => void +} + +type ReplayHttpClient = { + getRuntimeInfo(): Promise + createThread(body: Record): Promise<{ id: string }> + startTurn(threadId: string, body: Record): Promise<{ turnId: string }> + openEvents(threadId: string, signal: AbortSignal): Promise + deleteThread(threadId: string): Promise +} + +export async function runReplaySuite( + suiteInput: unknown, + options: RunReplaySuiteOptions +): Promise { + const suite = ReplaySuiteSchema.parse(suiteInput) + const repeat = clampInteger(options.repeat ?? 1, 1, 20) + const concurrency = clampInteger(options.concurrency ?? 1, 1, 8) + const baseUrl = options.baseUrl.replace(/\/$/, '') + const client = createReplayHttpClient(baseUrl, options.token, options.fetchImpl ?? fetch) + const runtime = await client.getRuntimeInfo() + const selectedTasks = options.tag + ? suite.tasks.filter((task) => task.tags.includes(options.tag!)) + : suite.tasks + if (selectedTasks.length === 0) { + throw new Error(`replay suite has no tasks tagged "${options.tag}"`) + } + const jobs = selectedTasks.flatMap((task) => + Array.from({ length: repeat }, (_, index) => ({ task, iteration: index + 1 })) + ) + const runs = new Array(jobs.length) + let cursor = 0 + let completed = 0 + const worker = async (): Promise => { + while (true) { + const jobIndex = cursor + cursor += 1 + const job = jobs[jobIndex] + if (!job) return + const run = await runReplayTask({ + suite, + task: job.task, + iteration: job.iteration, + runtime, + client, + workspace: options.workspace, + keepThread: options.keepThreads === true + }) + runs[jobIndex] = run + completed += 1 + options.onProgress?.(completed, jobs.length, run) + } + } + await Promise.all(Array.from({ length: Math.min(concurrency, jobs.length) }, () => worker())) + const report: ReplayReport = { + version: 1, + generatedAt: new Date().toISOString(), + suite: { + name: suite.name, + taskCount: selectedTasks.length, + repeat, + ...(options.tag ? { tag: options.tag } : {}) + }, + runtime: { + baseUrl, + ...(runtime.model ? { model: runtime.model } : {}), + startedAt: runtime.startedAt, + ...(runtime.pid ? { pid: runtime.pid } : {}) + }, + summary: summarizeReplayRuns(runs), + runs + } + return report +} + +async function runReplayTask(input: { + suite: ReplaySuite + task: ReplayTask + iteration: number + runtime: RuntimeInfoValue + client: ReplayHttpClient + workspace: string + keepThread: boolean +}): Promise { + const { suite, task, iteration, runtime, client } = input + const runId = `${task.id}#${iteration}` + const model = task.model ?? suite.defaults.model ?? runtime.model + if (!model) return errorReplayRun(runId, task, iteration, 'runtime did not report a default model') + const workspace = resolve(input.workspace, task.workspace ?? '.') + let threadId: string | undefined + try { + const thread = await client.createThread({ + title: `[replay] ${runId}`, + titleAuto: false, + workspace, + model, + ...(task.providerId ?? suite.defaults.providerId + ? { providerId: task.providerId ?? suite.defaults.providerId } + : {}), + mode: 'agent', + approvalPolicy: 'auto', + sandboxMode: 'read-only' + }) + threadId = thread.id + const startedAt = performance.now() + const turn = await client.startTurn(threadId, { + prompt: task.prompt, + reasoningEffort: task.reasoningEffort ?? suite.defaults.reasoningEffort ?? 'off', + approvalPolicy: 'auto', + sandboxMode: 'read-only', + disableUserInput: true + }) + const timeoutMs = task.timeoutMs ?? suite.defaults.timeoutMs + const collected = await collectReplayEvents({ + client, + threadId, + turnId: turn.turnId, + startedAt, + timeoutMs + }) + const after = await client.getRuntimeInfo().catch(() => runtime) + const metrics = summarizeReplayEvents( + collected.events, + collected.elapsedMs, + after.memoryUsage?.peakRssBytes + ) + const failureReasons = replayExpectationFailures(task, collected.timedOut, metrics, collected.events) + return { + id: runId, + taskId: task.id, + iteration, + tags: task.tags, + threadId, + turnId: turn.turnId, + status: collected.timedOut ? 'timeout' : failureReasons.length > 0 ? 'failed' : 'passed', + failureReasons, + metrics + } + } catch (error) { + return { + ...errorReplayRun(runId, task, iteration, errorMessage(error)), + ...(threadId ? { threadId } : {}) + } + } finally { + if (threadId && !input.keepThread) { + await client.deleteThread(threadId).catch(() => undefined) + } + } +} + +async function collectReplayEvents(input: { + client: ReplayHttpClient + threadId: string + turnId: string + startedAt: number + timeoutMs: number +}): Promise<{ events: ObservedReplayEvent[]; elapsedMs: number; timedOut: boolean }> { + const controller = new AbortController() + let timedOut = false + const timer = setTimeout(() => { + timedOut = true + controller.abort() + }, input.timeoutMs) + timer.unref?.() + const observed: ObservedReplayEvent[] = [] + try { + const response = await input.client.openEvents(input.threadId, controller.signal) + if (!response.body) throw new Error('runtime SSE response has no body') + const reader = response.body.getReader() + const decoder = new TextDecoder() + const sse = new SseMessageDecoder() + while (true) { + const chunk = await reader.read() + if (chunk.done) break + for (const message of sse.push(decoder.decode(chunk.value, { stream: true }))) { + const parsed = parseRuntimeSseMessage(message) + if (!parsed) continue + const receivedAtMs = Date.now() + observed.push({ + event: parsed, + receivedAtMs, + elapsedMs: Math.max(0, performance.now() - input.startedAt) + }) + if (parsed.turnId === input.turnId && isTerminalTurnEvent(parsed.kind)) { + controller.abort() + return { + events: observed, + elapsedMs: Math.max(0, performance.now() - input.startedAt), + timedOut: false + } + } + } + } + return { + events: observed, + elapsedMs: Math.max(0, performance.now() - input.startedAt), + timedOut + } + } catch (error) { + if (!timedOut && !controller.signal.aborted) throw error + return { + events: observed, + elapsedMs: Math.max(0, performance.now() - input.startedAt), + timedOut + } + } finally { + clearTimeout(timer) + controller.abort() + } +} + +export function summarizeReplayEvents( + observed: ObservedReplayEvent[], + elapsedMs: number, + peakRssBytes?: number +): ReplayRunMetrics { + const firstText = observed.find(({ event }) => + event.kind === 'assistant_text_delta' && event.item.kind === 'assistant_text' && event.item.text.length > 0 + ) ?? observed.find(({ event }) => + (event.kind === 'item_created' || event.kind === 'item_completed') && + event.item.kind === 'assistant_text' && + event.item.text.length > 0 + ) + const assistantTextByItem = new Map() + const toolStarted = new Map() + const toolDurations: number[] = [] + const toolCallIds = new Set() + const sseDelays: number[] = [] + let errorEvents = 0 + let usage: UsageSnapshot | undefined + for (const record of observed) { + const eventTime = Date.parse(record.event.timestamp) + if (Number.isFinite(eventTime)) sseDelays.push(Math.max(0, record.receivedAtMs - eventTime)) + if (record.event.kind === 'error' || record.event.kind === 'turn_failed') errorEvents += 1 + if (record.event.kind === 'usage') usage = record.event.usage + if ('item' in record.event && record.event.item.kind === 'assistant_text') { + const itemId = record.event.item.id + if (record.event.kind === 'assistant_text_delta') { + assistantTextByItem.set(itemId, `${assistantTextByItem.get(itemId) ?? ''}${record.event.item.text}`) + } else { + assistantTextByItem.set(itemId, record.event.item.text) + } + } + if (record.event.kind === 'tool_call_started' && 'item' in record.event && 'callId' in record.event.item) { + toolStarted.set(record.event.item.callId, record.elapsedMs) + toolCallIds.add(record.event.item.callId) + } + if (record.event.kind === 'tool_call_finished' && 'item' in record.event && 'callId' in record.event.item) { + const started = toolStarted.get(record.event.item.callId) + if (started !== undefined) toolDurations.push(Math.max(0, record.elapsedMs - started)) + toolCallIds.add(record.event.item.callId) + } + } + const assistantChars = [...assistantTextByItem.values()].reduce((total, text) => total + text.length, 0) + const hit = usage?.cacheHitTokens + const miss = usage?.cacheMissTokens + const cacheTotal = (hit ?? 0) + (miss ?? 0) + return { + ttftMs: firstText ? roundMetric(firstText.elapsedMs) : null, + totalMs: roundMetric(elapsedMs), + assistantChars, + eventCount: observed.length, + errorEvents, + toolCalls: toolCallIds.size, + toolDurationMs: roundMetric(toolDurations.reduce((total, value) => total + value, 0)), + toolDurationP95Ms: percentile(toolDurations, 0.95), + sseDelayP50Ms: percentile(sseDelays, 0.5), + sseDelayP95Ms: percentile(sseDelays, 0.95), + promptTokens: usage?.promptTokens ?? 0, + completionTokens: usage?.completionTokens ?? 0, + totalTokens: usage?.totalTokens ?? 0, + cacheHitTokens: hit ?? null, + cacheMissTokens: miss ?? null, + cacheHitRate: usage?.cacheHitRate ?? (cacheTotal > 0 ? (hit ?? 0) / cacheTotal : null), + cacheableTokenHitRate: usage?.cacheableTokenHitRate ?? null, + totalInputTokenHitRate: usage?.totalInputTokenHitRate ?? null, + costUsd: usage?.costUsd ?? 0, + peakRssBytes: peakRssBytes ?? null + } +} + +export function summarizeReplayRuns(runs: ReplayRunResult[]): ReplayReportSummary { + const ttft = compactNumbers(runs.map((run) => run.metrics.ttftMs)) + const total = runs.map((run) => run.metrics.totalMs) + const toolP95 = compactNumbers(runs.map((run) => run.metrics.toolDurationP95Ms)) + const sseP95 = compactNumbers(runs.map((run) => run.metrics.sseDelayP95Ms)) + const hitTokens = compactNumbers(runs.map((run) => run.metrics.cacheHitTokens)).reduce(sum, 0) + const missTokens = compactNumbers(runs.map((run) => run.metrics.cacheMissTokens)).reduce(sum, 0) + const cacheableRates = compactNumbers(runs.map((run) => run.metrics.cacheableTokenHitRate)) + const totalInputRates = compactNumbers(runs.map((run) => run.metrics.totalInputTokenHitRate)) + const passed = runs.filter((run) => run.status === 'passed').length + return { + runCount: runs.length, + passed, + failed: runs.filter((run) => run.status === 'failed').length, + timedOut: runs.filter((run) => run.status === 'timeout').length, + errors: runs.filter((run) => run.status === 'error').length, + successRate: runs.length > 0 ? passed / runs.length : 0, + ttftP50Ms: percentile(ttft, 0.5), + ttftP95Ms: percentile(ttft, 0.95), + totalP50Ms: percentile(total, 0.5), + totalP95Ms: percentile(total, 0.95), + toolDurationP95Ms: percentile(toolP95, 0.95), + sseDelayP95Ms: percentile(sseP95, 0.95), + promptTokens: runs.reduce((totalValue, run) => totalValue + run.metrics.promptTokens, 0), + completionTokens: runs.reduce((totalValue, run) => totalValue + run.metrics.completionTokens, 0), + totalTokens: runs.reduce((totalValue, run) => totalValue + run.metrics.totalTokens, 0), + cacheHitRate: hitTokens + missTokens > 0 ? hitTokens / (hitTokens + missTokens) : null, + cacheableTokenHitRate: average(cacheableRates), + totalInputTokenHitRate: average(totalInputRates), + costUsd: runs.reduce((totalValue, run) => totalValue + run.metrics.costUsd, 0), + peakRssBytes: maxNullable(compactNumbers(runs.map((run) => run.metrics.peakRssBytes))) + } +} + +export function compareReplayReports(current: ReplayReport, baseline: ReplayReport): ReplayComparison { + const successRateDelta = current.summary.successRate - baseline.summary.successRate + const ttftP95MsDelta = nullableDelta(current.summary.ttftP95Ms, baseline.summary.ttftP95Ms) + const totalP95MsDelta = nullableDelta(current.summary.totalP95Ms, baseline.summary.totalP95Ms) + const cacheHitRateDelta = nullableDelta(current.summary.cacheHitRate, baseline.summary.cacheHitRate) + const peakRssBytesDelta = nullableDelta(current.summary.peakRssBytes, baseline.summary.peakRssBytes) + const regressions: string[] = [] + if (successRateDelta < 0) regressions.push(`success rate dropped by ${formatPercent(-successRateDelta)}`) + if (isRelativeRegression(current.summary.ttftP95Ms, baseline.summary.ttftP95Ms, 0.2, 300)) { + regressions.push(`TTFT p95 increased by ${ttftP95MsDelta}ms`) + } + if (isRelativeRegression(current.summary.totalP95Ms, baseline.summary.totalP95Ms, 0.2, 500)) { + regressions.push(`total latency p95 increased by ${totalP95MsDelta}ms`) + } + if (cacheHitRateDelta !== null && cacheHitRateDelta < -0.05) { + regressions.push(`cache hit rate dropped by ${formatPercent(-cacheHitRateDelta)}`) + } + if (baseline.summary.costUsd > 0 && current.summary.costUsd > baseline.summary.costUsd * 1.1) { + regressions.push(`cost increased by $${(current.summary.costUsd - baseline.summary.costUsd).toFixed(6)}`) + } + return { + baselineGeneratedAt: baseline.generatedAt, + successRateDelta, + ttftP95MsDelta, + totalP95MsDelta, + promptTokensDelta: current.summary.promptTokens - baseline.summary.promptTokens, + cacheHitRateDelta, + costUsdDelta: current.summary.costUsd - baseline.summary.costUsd, + peakRssBytesDelta, + regressions + } +} + +export type SseMessage = { event?: string; id?: string; data: string } + +export class SseMessageDecoder { + private buffer = '' + + push(chunk: string): SseMessage[] { + this.buffer += chunk.replace(/\r\n/g, '\n') + const messages: SseMessage[] = [] + let boundary = this.buffer.indexOf('\n\n') + while (boundary >= 0) { + const block = this.buffer.slice(0, boundary) + this.buffer = this.buffer.slice(boundary + 2) + const message = parseSseBlock(block) + if (message) messages.push(message) + boundary = this.buffer.indexOf('\n\n') + } + return messages + } +} + +function createReplayHttpClient( + baseUrl: string, + token: string | undefined, + fetchImpl: typeof fetch +): ReplayHttpClient { + const headers = (): Headers => { + const value = new Headers({ accept: 'application/json' }) + if (token) value.set('authorization', `Bearer ${token}`) + return value + } + const requestJson = async (path: string, init: RequestInit = {}): Promise => { + const requestHeaders = headers() + if (init.body) requestHeaders.set('content-type', 'application/json') + new Headers(init.headers).forEach((value, key) => requestHeaders.set(key, value)) + const response = await fetchImpl(`${baseUrl}${path}`, { + ...init, + headers: requestHeaders + }) + if (!response.ok) { + const body = (await response.text().catch(() => '')).slice(0, 1_000) + throw new Error(`${init.method ?? 'GET'} ${path} failed (${response.status}): ${body}`) + } + return await response.json() as T + } + return { + async getRuntimeInfo() { + return RuntimeInfoResponse.parse(await requestJson('/v1/runtime/info')) + }, + createThread: (body) => requestJson('/v1/threads', { method: 'POST', body: JSON.stringify(body) }), + startTurn: (threadId, body) => requestJson(`/v1/threads/${encodeURIComponent(threadId)}/turns`, { + method: 'POST', + body: JSON.stringify(body) + }), + async openEvents(threadId, signal) { + const requestHeaders = headers() + requestHeaders.set('accept', 'text/event-stream') + const response = await fetchImpl(`${baseUrl}/v1/threads/${encodeURIComponent(threadId)}/events?since_seq=0`, { + headers: requestHeaders, + signal + }) + if (!response.ok) { + const body = (await response.text().catch(() => '')).slice(0, 1_000) + throw new Error(`GET events failed (${response.status}): ${body}`) + } + return response + }, + async deleteThread(threadId) { + await requestJson(`/v1/threads/${encodeURIComponent(threadId)}`, { method: 'DELETE' }) + } + } +} + +function parseSseBlock(block: string): SseMessage | null { + if (!block.trim()) return null + let event: string | undefined + let id: string | undefined + const data: string[] = [] + for (const line of block.split('\n')) { + if (!line || line.startsWith(':')) continue + const separator = line.indexOf(':') + const field = separator >= 0 ? line.slice(0, separator) : line + const value = separator >= 0 ? line.slice(separator + 1).replace(/^ /, '') : '' + if (field === 'event') event = value + else if (field === 'id') id = value + else if (field === 'data') data.push(value) + } + if (data.length === 0) return null + return { ...(event ? { event } : {}), ...(id ? { id } : {}), data: data.join('\n') } +} + +function parseRuntimeSseMessage(message: SseMessage): RuntimeEventValue | null { + let value: unknown + try { + value = JSON.parse(message.data) + } catch { + return null + } + const parsed = RuntimeEvent.safeParse(value) + if (parsed.success) return parsed.data + if (message.event === 'error') { + const detail = value && typeof value === 'object' && 'message' in value + ? String((value as { message?: unknown }).message ?? 'unknown SSE error') + : 'unknown SSE error' + throw new Error(`runtime SSE error: ${detail}`) + } + return null +} + +function replayExpectationFailures( + task: ReplayTask, + timedOut: boolean, + metrics: ReplayRunMetrics, + events: ObservedReplayEvent[] +): string[] { + const failures: string[] = [] + if (timedOut) failures.push('turn timed out') + const terminal = events.find(({ event }) => event.kind === 'turn_completed' || event.kind === 'turn_failed' || event.kind === 'turn_aborted') + if (!terminal) failures.push('no terminal turn event') + else if (terminal.event.kind !== 'turn_completed') failures.push(`turn ended with ${terminal.event.kind}`) + if (metrics.assistantChars < task.expect.minAssistantChars) { + failures.push(`assistant output ${metrics.assistantChars} chars is below ${task.expect.minAssistantChars}`) + } + if (metrics.errorEvents > task.expect.maxErrorEvents) { + failures.push(`error event count ${metrics.errorEvents} exceeds ${task.expect.maxErrorEvents}`) + } + if (task.expect.maxTotalMs && metrics.totalMs > task.expect.maxTotalMs) { + failures.push(`total latency ${metrics.totalMs}ms exceeds ${task.expect.maxTotalMs}ms`) + } + const usedTools = new Set(events.flatMap(({ event }) => { + if (!('item' in event) || !('toolName' in event.item)) return [] + return [event.item.toolName] + })) + for (const tool of task.expect.requiredTools) { + if (!usedTools.has(tool)) failures.push(`required tool was not used: ${tool}`) + } + return failures +} + +function errorReplayRun(id: string, task: ReplayTask, iteration: number, error: string): ReplayRunResult { + return { + id, + taskId: task.id, + iteration, + tags: task.tags, + status: 'error', + failureReasons: [error], + metrics: emptyReplayMetrics(), + error + } +} + +function emptyReplayMetrics(): ReplayRunMetrics { + return { + ttftMs: null, + totalMs: 0, + assistantChars: 0, + eventCount: 0, + errorEvents: 0, + toolCalls: 0, + toolDurationMs: 0, + toolDurationP95Ms: null, + sseDelayP50Ms: null, + sseDelayP95Ms: null, + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + cacheHitTokens: null, + cacheMissTokens: null, + cacheHitRate: null, + cacheableTokenHitRate: null, + totalInputTokenHitRate: null, + costUsd: 0, + peakRssBytes: null + } +} + +function isTerminalTurnEvent(kind: RuntimeEventValue['kind']): boolean { + return kind === 'turn_completed' || kind === 'turn_failed' || kind === 'turn_aborted' +} + +function percentile(values: number[], quantile: number): number | null { + if (values.length === 0) return null + const sorted = [...values].sort((left, right) => left - right) + const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(quantile * sorted.length) - 1)) + return roundMetric(sorted[index] ?? 0) +} + +function average(values: number[]): number | null { + return values.length > 0 ? values.reduce(sum, 0) / values.length : null +} + +function maxNullable(values: number[]): number | null { + return values.length > 0 ? Math.max(...values) : null +} + +function compactNumbers(values: Array): number[] { + return values.filter((value): value is number => typeof value === 'number' && Number.isFinite(value)) +} + +function nullableDelta(current: number | null, baseline: number | null): number | null { + return current === null || baseline === null ? null : current - baseline +} + +function isRelativeRegression( + current: number | null, + baseline: number | null, + ratio: number, + minimumDelta: number +): boolean { + if (current === null || baseline === null || baseline <= 0) return false + return current - baseline >= minimumDelta && current > baseline * (1 + ratio) +} + +function roundMetric(value: number): number { + return Math.round(value * 100) / 100 +} + +function clampInteger(value: number, min: number, max: number): number { + return Math.max(min, Math.min(max, Math.floor(value))) +} + +function sum(left: number, right: number): number { + return left + right +} + +function formatPercent(value: number): string { + return `${(value * 100).toFixed(2)}%` +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error) +} diff --git a/kun/src/cli/replay-entry.ts b/kun/src/cli/replay-entry.ts new file mode 100644 index 000000000..2cf0238ec --- /dev/null +++ b/kun/src/cli/replay-entry.ts @@ -0,0 +1,184 @@ +#!/usr/bin/env node +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { + compareReplayReports, + runReplaySuite, + type ReplayReport +} from '../benchmark/replay-benchmark.js' + +type CliOptions = { + suitePath?: string + baseUrl: string + workspace: string + outputPath?: string + baselinePath?: string + repeat: number + concurrency: number + tag?: string + keepThreads: boolean + failOnRegression: boolean + help: boolean +} + +const options = parseArgs(process.argv.slice(2)) +if (options.help) { + printUsage() + process.exit(0) +} +if (!options.suitePath) { + printUsage() + process.exit(2) +} + +const suitePath = resolve(options.suitePath) +const suite = JSON.parse(await readFile(suitePath, 'utf8')) as unknown +const report = await runReplaySuite(suite, { + baseUrl: options.baseUrl, + token: process.env.KUN_RUNTIME_TOKEN, + workspace: options.workspace, + repeat: options.repeat, + concurrency: options.concurrency, + ...(options.tag ? { tag: options.tag } : {}), + keepThreads: options.keepThreads, + onProgress: (completed, total, run) => { + const ttft = run.metrics.ttftMs === null ? 'n/a' : `${Math.round(run.metrics.ttftMs)}ms` + console.error( + `[${completed}/${total}] ${run.id} ${run.status} ` + + `ttft=${ttft} total=${Math.round(run.metrics.totalMs)}ms tokens=${run.metrics.totalTokens}` + ) + } +}) + +if (options.baselinePath) { + const baseline = JSON.parse(await readFile(resolve(options.baselinePath), 'utf8')) as ReplayReport + report.comparison = compareReplayReports(report, baseline) +} + +printSummary(report) +if (options.outputPath) { + const outputPath = resolve(options.outputPath) + await mkdir(dirname(outputPath), { recursive: true }) + await writeFile(outputPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8') + console.error(`Replay report written to ${outputPath}`) +} else { + console.log(JSON.stringify(report, null, 2)) +} + +if (options.failOnRegression && report.comparison?.regressions.length) { + process.exitCode = 1 +} + +function parseArgs(args: string[]): CliOptions { + const options: CliOptions = { + baseUrl: process.env.KUN_RUNTIME_URL ?? 'http://127.0.0.1:18788', + workspace: resolve(process.env.INIT_CWD ?? process.cwd()), + repeat: 1, + concurrency: 1, + keepThreads: false, + failOnRegression: false, + help: false + } + for (let index = 0; index < args.length; index += 1) { + const arg = args[index] + switch (arg) { + case '--suite': + options.suitePath = requiredValue(args, ++index, arg) + break + case '--base-url': + options.baseUrl = requiredValue(args, ++index, arg) + break + case '--workspace': + options.workspace = resolve(requiredValue(args, ++index, arg)) + break + case '--output': + options.outputPath = requiredValue(args, ++index, arg) + break + case '--baseline': + options.baselinePath = requiredValue(args, ++index, arg) + break + case '--tag': + options.tag = requiredValue(args, ++index, arg) + break + case '--repeat': + options.repeat = positiveInteger(requiredValue(args, ++index, arg), arg) + break + case '--concurrency': + options.concurrency = positiveInteger(requiredValue(args, ++index, arg), arg) + break + case '--keep-threads': + options.keepThreads = true + break + case '--fail-on-regression': + options.failOnRegression = true + break + case '--help': + case '-h': + options.help = true + break + default: + throw new Error(`unknown replay option: ${arg}`) + } + } + return options +} + +function requiredValue(args: string[], index: number, flag: string): string { + const value = args[index] + if (!value || value.startsWith('--')) throw new Error(`${flag} requires a value`) + return value +} + +function positiveInteger(value: string, flag: string): number { + const parsed = Number(value) + if (!Number.isInteger(parsed) || parsed <= 0) throw new Error(`${flag} must be a positive integer`) + return parsed +} + +function printUsage(): void { + console.log('Usage:') + console.log(' npm --prefix kun run benchmark:replay -- --suite [options]') + console.log('') + console.log('Options:') + console.log(' --base-url Kun runtime URL (or KUN_RUNTIME_URL)') + console.log(' --workspace Workspace for replay tasks') + console.log(' --tag Run only tasks with this tag') + console.log(' --repeat Repeat each selected task (default 1)') + console.log(' --concurrency Parallel tasks, capped at 8 (default 1)') + console.log(' --baseline Compare against an earlier report') + console.log(' --output Write the full machine-readable report') + console.log(' --keep-threads Keep generated replay threads') + console.log(' --fail-on-regression Exit 1 when comparison thresholds regress') + console.log('') + console.log('Authentication: set KUN_RUNTIME_TOKEN; it is intentionally not accepted as a CLI flag.') +} + +function printSummary(report: ReplayReport): void { + const summary = report.summary + console.error('') + console.error(`Replay suite: ${report.suite.name}`) + console.error(`Success: ${summary.passed}/${summary.runCount} (${formatRate(summary.successRate)})`) + console.error(`TTFT p50/p95: ${formatMs(summary.ttftP50Ms)} / ${formatMs(summary.ttftP95Ms)}`) + console.error(`Total p50/p95: ${formatMs(summary.totalP50Ms)} / ${formatMs(summary.totalP95Ms)}`) + console.error(`SSE delay p95: ${formatMs(summary.sseDelayP95Ms)}`) + console.error(`Tokens: ${summary.promptTokens} input + ${summary.completionTokens} output`) + console.error(`Cache hit: ${formatRate(summary.cacheHitRate)}`) + console.error(`Cost: $${summary.costUsd.toFixed(6)}`) + console.error(`Peak RSS: ${summary.peakRssBytes === null ? 'n/a' : formatBytes(summary.peakRssBytes)}`) + if (report.comparison) { + console.error(`Regressions: ${report.comparison.regressions.length}`) + for (const regression of report.comparison.regressions) console.error(` - ${regression}`) + } +} + +function formatMs(value: number | null): string { + return value === null ? 'n/a' : `${Math.round(value)}ms` +} + +function formatRate(value: number | null): string { + return value === null ? 'n/a' : `${(value * 100).toFixed(2)}%` +} + +function formatBytes(value: number): string { + return `${(value / 1024 / 1024).toFixed(1)} MiB` +} diff --git a/kun/src/contracts/runtime-info.ts b/kun/src/contracts/runtime-info.ts index dd154e9a1..57ce055ae 100644 --- a/kun/src/contracts/runtime-info.ts +++ b/kun/src/contracts/runtime-info.ts @@ -17,6 +17,13 @@ export const RuntimeInfoResponse = z insecure: z.boolean().optional(), startedAt: z.string(), pid: z.number().int().positive().optional(), + memoryUsage: z.object({ + rssBytes: z.number().int().nonnegative(), + peakRssBytes: z.number().int().nonnegative(), + heapUsedBytes: z.number().int().nonnegative(), + heapTotalBytes: z.number().int().nonnegative(), + externalBytes: z.number().int().nonnegative() + }).strict().optional(), capabilities: RuntimeCapabilityManifest }) .strict() diff --git a/kun/src/server/runtime-factory.ts b/kun/src/server/runtime-factory.ts index 81a7292f4..761116428 100644 --- a/kun/src/server/runtime-factory.ts +++ b/kun/src/server/runtime-factory.ts @@ -570,21 +570,32 @@ export async function createKunServeRuntime( insecure: options.insecure, allocateSeq, nowIso, - info: () => ({ - host: options.host, - port: options.port, - configPath: options.configPath, - dataDir: options.dataDir, - model: options.model, - endpointFormat: options.endpointFormat ?? DEFAULT_MODEL_ENDPOINT_FORMAT, - approvalPolicy: options.approvalPolicy, - sandboxMode: options.sandboxMode, - tokenEconomyMode: options.tokenEconomyMode, - insecure: options.insecure, - startedAt, - pid: process.pid, - capabilities - }), + info: () => { + const memory = process.memoryUsage() + const peakRssBytes = Math.max(memory.rss, process.resourceUsage().maxRSS * 1024) + return { + host: options.host, + port: options.port, + configPath: options.configPath, + dataDir: options.dataDir, + model: options.model, + endpointFormat: options.endpointFormat ?? DEFAULT_MODEL_ENDPOINT_FORMAT, + approvalPolicy: options.approvalPolicy, + sandboxMode: options.sandboxMode, + tokenEconomyMode: options.tokenEconomyMode, + insecure: options.insecure, + startedAt, + pid: process.pid, + memoryUsage: { + rssBytes: memory.rss, + peakRssBytes, + heapUsedBytes: memory.heapUsed, + heapTotalBytes: memory.heapTotal, + externalBytes: memory.external + }, + capabilities + } + }, toolDiagnostics: async () => ({ providers: registry.diagnostics(), mcpServers: mcpProviders.diagnostics, diff --git a/src/renderer/src/agent/kun-contract.ts b/src/renderer/src/agent/kun-contract.ts index 54794c4f2..cb73c6fbc 100644 --- a/src/renderer/src/agent/kun-contract.ts +++ b/src/renderer/src/agent/kun-contract.ts @@ -261,6 +261,13 @@ export type CoreRuntimeInfoJson = { insecure?: boolean startedAt: string pid?: number + memoryUsage?: { + rssBytes: number + peakRssBytes: number + heapUsedBytes: number + heapTotalBytes: number + externalBytes: number + } capabilities: CoreRuntimeCapabilityManifestJson } From a57069f591e691066f78047f002ccd55e2c07a37 Mon Sep 17 00:00:00 2001 From: luoye520ww <100058663+luoye520ww@users.noreply.github.com> Date: Mon, 29 Jun 2026 14:23:11 +0800 Subject: [PATCH 15/18] perf(renderer): split settings and timeline bundles --- src/renderer/src/components/SettingsView.tsx | 149 ++++++++++++------ src/renderer/src/components/Workbench.tsx | 54 ++++--- .../chat/message-timeline-bubbles.tsx | 4 +- 3 files changed, 133 insertions(+), 74 deletions(-) diff --git a/src/renderer/src/components/SettingsView.tsx b/src/renderer/src/components/SettingsView.tsx index 81bad9fc9..265410511 100644 --- a/src/renderer/src/components/SettingsView.tsx +++ b/src/renderer/src/components/SettingsView.tsx @@ -1,5 +1,5 @@ -import type { ReactElement } from 'react' -import { useCallback, useEffect, useMemo, useRef, useState } from 'react' +import type { ComponentProps, ReactElement } from 'react' +import { lazy, Suspense, useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { DEFAULT_WRITE_INLINE_COMPLETION_BASE_URL, @@ -44,7 +44,6 @@ import { } from '../lib/settings-home-paths' import { useChatStore, type SettingsRouteSection } from '../store/chat-store' import { SettingsSidebar } from './SettingsSidebar' -import { WriteDebugLogModal } from './settings-debug-log' import { useSettingsGuiUpdate } from './use-settings-gui-update' import { DEFAULT_WORKSPACE_ROOT, @@ -56,23 +55,72 @@ import { } from './settings-utils' import { loadKunDiagnostics } from '../lib/load-kun-diagnostics' import { SETTINGS_CHANGED_EVENT, emitRendererSettingsChanged } from '../lib/keyboard-shortcut-settings' -import { - AgentsSettingsSection, - ArchivedThreadsSettingsSection, - ClawSettingsSection, - EasterEggSettingsSection, - GeneralSettingsSection, - KeyboardShortcutsSettingsSection, - LlmDebugSettingsSection, - WorktreeSettingsSection, - MediaGenerationSettingsSection, - MemorySettingsSection, - ProvidersSettingsSection, - SpeechToTextSettingsSection, - UpdatesSettingsSection, - WriteSettingsSection, - TerminalSettingsSection -} from './settings-sections' +import { GeneralSettingsSection } from './settings-section-general' + +const ProvidersSettingsSection = lazy(() => + import('./settings-section-providers').then((module) => ({ default: module.ProvidersSettingsSection })) +) +const WriteSettingsSection = lazy(() => + import('./settings-section-write').then((module) => ({ default: module.WriteSettingsSection })) +) +const MediaGenerationSettingsSection = lazy(() => + import('./settings-section-media-generation').then((module) => ({ default: module.MediaGenerationSettingsSection })) +) +const SpeechToTextSettingsSection = lazy(() => + import('./settings-section-speech-to-text').then((module) => ({ default: module.SpeechToTextSettingsSection })) +) +const AgentsSettingsSection = lazy(() => + import('./settings-section-agents').then((module) => ({ default: module.AgentsSettingsSection })) +) +const ArchivedThreadsSettingsSection = lazy(() => + import('./settings-section-archives').then((module) => ({ default: module.ArchivedThreadsSettingsSection })) +) +const WorktreeSettingsSection = lazy(() => + import('./settings-section-worktree').then((module) => ({ default: module.WorktreeSettingsSection })) +) +const MemorySettingsSection = lazy(() => + import('./settings-section-memory').then((module) => ({ default: module.MemorySettingsSection })) +) +const KeyboardShortcutsSettingsSection = lazy(() => + import('./settings-section-shortcuts').then((module) => ({ default: module.KeyboardShortcutsSettingsSection })) +) +const EasterEggSettingsSection = lazy(() => + import('./settings-section-easter-egg').then((module) => ({ default: module.EasterEggSettingsSection })) +) +const ClawSettingsSection = lazy(() => + import('./settings-section-claw').then((module) => ({ default: module.ClawSettingsSection })) +) +const UpdatesSettingsSection = lazy(() => + import('./settings-section-updates').then((module) => ({ default: module.UpdatesSettingsSection })) +) +const TerminalSettingsSection = lazy(() => + import('./settings-section-terminal').then((module) => ({ default: module.TerminalSettingsSection })) +) +const LlmDebugSettingsSection = lazy(() => + import('./settings-section-llm-debug').then((module) => ({ default: module.LlmDebugSettingsSection })) +) +const WriteDebugLogModal = lazy(() => + import('./settings-debug-log').then((module) => ({ default: module.WriteDebugLogModal })) +) + +function LoadedAgentsSettingsSection({ + onReady, + ...props +}: ComponentProps & { onReady: () => void }): ReactElement { + useEffect(() => { + onReady() + }, [onReady]) + return +} + +function SettingsSectionFallback(): ReactElement { + return ( +
+
+
+
+ ) +} type SettingsCategory = 'general' | 'providers' | 'write' | 'mediaGeneration' | 'speechToText' | 'agents' | 'archives' | 'permissions' | 'worktree' | 'memory' | 'shortcuts' | 'easterEgg' | 'claw' | 'updates' | 'debug' | 'terminal' type SaveStatus = 'idle' | 'saving' | 'saved' | 'error' @@ -132,6 +180,7 @@ export function SettingsView(): ReactElement { const [memoryDiagnostics, setMemoryDiagnostics] = useState(null) const [runtimeDiagnosticsBusy, setRuntimeDiagnosticsBusy] = useState(false) const [runtimeDiagnosticsNotice, setRuntimeDiagnosticsNotice] = useState(null) + const [agentsSectionReady, setAgentsSectionReady] = useState(false) const [writeDebugModalOpen, setWriteDebugModalOpen] = useState(false) const [writeCompletionDebugEntries, setWriteCompletionDebugEntries] = useState([]) const [writeCompletionDebugSelectedId, setWriteCompletionDebugSelectedId] = useState(null) @@ -159,6 +208,7 @@ export function SettingsView(): ReactElement { const formGuiUpdateChannel = form?.guiUpdate?.channel const formCursorSpotlight = form?.cursorSpotlight const formCursorSpotlightColor = form?.cursorSpotlightColor + const markAgentsSectionReady = useCallback(() => setAgentsSectionReady(true), []) const settingsPlatform = typeof window !== 'undefined' ? window.kunGui?.platform ?? '' : '' const settingsHomeDir = typeof window !== 'undefined' ? window.kunGui?.homeDir ?? '' : '' const compactHomePath = useCallback((value: string): string => @@ -355,6 +405,7 @@ export function SettingsView(): ReactElement { ) { return } + if (!agentsSectionReady) return const refs: Record< Exclude, HTMLDivElement | null @@ -369,7 +420,7 @@ export function SettingsView(): ReactElement { window.requestAnimationFrame(() => { target.scrollIntoView({ behavior: 'smooth', block: 'start' }) }) - }, [category, form, settingsSection]) + }, [agentsSectionReady, category, form, settingsSection]) useEffect(() => { return () => { @@ -1107,20 +1158,24 @@ export function SettingsView(): ReactElement { ) : null} {category === 'general' ? : null} - {category === 'providers' ? : null} - {category === 'write' ? : null} - {category === 'mediaGeneration' ? : null} - {category === 'speechToText' ? : null} - {category === 'agents' ? : null} - {category === 'archives' ? : null} - {category === 'worktree' ? : null} - {category === 'memory' ? : null} - {category === 'shortcuts' ? : null} - {category === 'easterEgg' ? : null} - {category === 'claw' ? : null} - {category === 'updates' ? : null} - {category === 'terminal' ? : null} - {category === 'debug' ? : null} + }> + {category === 'providers' ? : null} + {category === 'write' ? : null} + {category === 'mediaGeneration' ? : null} + {category === 'speechToText' ? : null} + {category === 'agents' ? ( + + ) : null} + {category === 'archives' ? : null} + {category === 'worktree' ? : null} + {category === 'memory' ? : null} + {category === 'shortcuts' ? : null} + {category === 'easterEgg' ? : null} + {category === 'claw' ? : null} + {category === 'updates' ? : null} + {category === 'terminal' ? : null} + {category === 'debug' ? : null} +
{saveStatus === 'error' && saveError ? ( @@ -1145,17 +1200,19 @@ export function SettingsView(): ReactElement {
) : null} {writeDebugModalOpen ? ( - void loadWriteDebugEntries()} - onClear={() => void clearWriteDebugEntries()} - onClose={() => setWriteDebugModalOpen(false)} - t={t} - /> + + void loadWriteDebugEntries()} + onClear={() => void clearWriteDebugEntries()} + onClose={() => setWriteDebugModalOpen(false)} + t={t} + /> + ) : null}
) diff --git a/src/renderer/src/components/Workbench.tsx b/src/renderer/src/components/Workbench.tsx index e489e9de9..d733ab94a 100644 --- a/src/renderer/src/components/Workbench.tsx +++ b/src/renderer/src/components/Workbench.tsx @@ -41,7 +41,6 @@ import { } from '../lib/dev-preview-detection' import { Sidebar } from './chat/Sidebar' import { WorkbenchTopBar, type RightPanelMode } from './chat/WorkbenchTopBar' -import { MessageTimeline } from './chat/MessageTimeline' import { SubagentReturnBar } from './chat/message-timeline-empty' import { IkunCameoLayer, KunCelebrationLayer } from './chat/AnimatedWorkLogo' import { @@ -112,6 +111,9 @@ import { shouldSuppressRuntimeErrorBanner } from '../lib/runtime-banner-visibili const ChangeInspector = lazy(() => import('./ChangeInspector').then((module) => ({ default: module.ChangeInspector })) ) +const MessageTimeline = lazy(() => + import('./chat/MessageTimeline').then((module) => ({ default: module.MessageTimeline })) +) const DevBrowserPanel = lazy(() => import('./DevBrowserPanel').then((module) => ({ default: module.DevBrowserPanel })) ) @@ -2737,30 +2739,32 @@ export function Workbench(): ReactElement {
- void probeRuntime('user', { restart: true })} - onOpenSettings={() => openSettings('agents')} - onSelectSuggestion={(text) => setInput(text)} - focusModeEnabled={focusModeEnabled} - planActionsBusy={busy} - onBuildPlan={() => void buildGuiPlan()} - onOpenPlan={openGuiPlanPanel} - devPreviewCard={ - showDevPreviewCard ? ( - - ) : null - } - /> + }> + void probeRuntime('user', { restart: true })} + onOpenSettings={() => openSettings('agents')} + onSelectSuggestion={(text) => setInput(text)} + focusModeEnabled={focusModeEnabled} + planActionsBusy={busy} + onBuildPlan={() => void buildGuiPlan()} + onOpenPlan={openGuiPlanPanel} + devPreviewCard={ + showDevPreviewCard ? ( + + ) : null + } + /> + {uiModeCameosEnabled && !focusModeEnabled ? : null} {!focusModeEnabled ? : null}
diff --git a/src/renderer/src/components/chat/message-timeline-bubbles.tsx b/src/renderer/src/components/chat/message-timeline-bubbles.tsx index bbf64501f..1f3eeb8ee 100644 --- a/src/renderer/src/components/chat/message-timeline-bubbles.tsx +++ b/src/renderer/src/components/chat/message-timeline-bubbles.tsx @@ -1,7 +1,5 @@ import type { ReactElement } from 'react' import { memo, useEffect, useMemo, useRef, useState } from 'react' -import ReactMarkdown from 'react-markdown' -import remarkGfm from 'remark-gfm' import { useTranslation } from 'react-i18next' import { ArrowDown, Check, ChevronDown, ChevronRight, Copy, Download, File, FileEdit, GitFork, ImageIcon, Loader2, MessageSquareQuote, PencilLine, RotateCcw, Terminal, Video, Wrench } from 'lucide-react' import type { AttachmentReference, ChatBlock, GeneratedFileReference, RuntimeDisclosureMetadata, ToolBlock, UserFileReference, UserInputAnswer } from '../../agent/types' @@ -1415,7 +1413,7 @@ function MessageBubbleImpl({ return (
- {block.text} +
) From 16f43ca801174a09f204cf8dee92263254e72c01 Mon Sep 17 00:00:00 2001 From: XingYu-Zhong <1736101137@qq.com> Date: Mon, 29 Jun 2026 22:19:52 +0800 Subject: [PATCH 16/18] fix(benchmark): harden replay cleanup and expectations --- kun/benchmarks/agent-core.json | 60 +++++--- kun/src/benchmark/replay-benchmark.test.ts | 169 +++++++++++++++++++++ kun/src/benchmark/replay-benchmark.ts | 29 +++- kun/src/cli/replay-entry.ts | 7 +- 4 files changed, 240 insertions(+), 25 deletions(-) diff --git a/kun/benchmarks/agent-core.json b/kun/benchmarks/agent-core.json index f4a78ff7c..66d078d5c 100644 --- a/kun/benchmarks/agent-core.json +++ b/kun/benchmarks/agent-core.json @@ -9,102 +9,122 @@ { "id": "architecture-summary", "tags": ["smoke", "architecture"], - "prompt": "Read the repository and explain the active Renderer -> preload -> main -> Kun runtime data path. Cite the most relevant file paths. Do not modify files." + "prompt": "Read the repository and explain the active Renderer -> preload -> main -> Kun runtime data path. Cite the most relevant file paths. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "runtime-entrypoint", "tags": ["smoke", "runtime"], - "prompt": "Find the Kun serve-mode composition root and summarize how stores, model clients, tools, and the agent loop are assembled. Cite exact file paths. Do not modify files." + "prompt": "Find the Kun serve-mode composition root and summarize how stores, model clients, tools, and the agent loop are assembled. Cite exact file paths. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "renderer-send-flow", "tags": ["smoke", "frontend"], - "prompt": "Trace a chat message from the renderer composer through the preload/main bridge to the Kun turn endpoint. Return a concise ordered call path with files. Do not modify files." + "prompt": "Trace a chat message from the renderer composer through the preload/main bridge to the Kun turn endpoint. Return a concise ordered call path with files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "sse-replay", "tags": ["smoke", "runtime"], - "prompt": "Explain how Kun SSE event replay avoids duplicates and cursor rewind after reconnect or restart. Cite the implementation and tests. Do not modify files." + "prompt": "Explain how Kun SSE event replay avoids duplicates and cursor rewind after reconnect or restart. Cite the implementation and tests. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "mcp-lifecycle", "tags": ["smoke", "mcp"], - "prompt": "Inspect MCP startup, tool discovery, execution, and reconnect behavior. Identify the main reliability boundaries and cite the implementation files. Do not modify files." + "prompt": "Inspect MCP startup, tool discovery, execution, and reconnect behavior. Identify the main reliability boundaries and cite the implementation files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "cache-prefix", "tags": ["cache"], - "prompt": "Explain what makes Kun's immutable prompt prefix stable and list dynamic data that must remain outside it. Cite code and documentation. Do not modify files." + "prompt": "Explain what makes Kun's immutable prompt prefix stable and list dynamic data that must remain outside it. Cite code and documentation. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "provider-url-contract", "tags": ["provider"], - "prompt": "Trace how baseUrl and endpointFormat affect provider URL construction and request bodies across chat and auxiliary model calls. Cite all important consumers. Do not modify files." + "prompt": "Trace how baseUrl and endpointFormat affect provider URL construction and request bodies across chat and auxiliary model calls. Cite all important consumers. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "attachment-flow", "tags": ["attachments"], - "prompt": "Trace an image or local file attachment from renderer selection to model input or fallback. Identify the cross-layer contract fields and failure points. Do not modify files." + "prompt": "Trace an image or local file attachment from renderer selection to model input or fallback. Identify the cross-layer contract fields and failure points. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "approval-flow", "tags": ["runtime", "security"], - "prompt": "Trace a tool approval request from agent loop creation through SSE/UI resolution back to tool execution. Cite routes, gates, and renderer handlers. Do not modify files." + "prompt": "Trace a tool approval request from agent loop creation through SSE/UI resolution back to tool execution. Cite routes, gates, and renderer handlers. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "goal-resume", "tags": ["runtime", "goal"], - "prompt": "Explain how active goals survive runtime restart, how orphaned turns are reconciled, and where auto-resume is triggered. Cite tests if present. Do not modify files." + "prompt": "Explain how active goals survive runtime restart, how orphaned turns are reconciled, and where auto-resume is triggered. Cite tests if present. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "subagent-permissions", "tags": ["subagent", "security"], - "prompt": "Explain how subagent tool policies inherit or restrict built-in tools, MCP servers, and skills without escalating the parent permissions. Cite enforcement points. Do not modify files." + "prompt": "Explain how subagent tool policies inherit or restrict built-in tools, MCP servers, and skills without escalating the parent permissions. Cite enforcement points. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "settings-persistence", "tags": ["settings"], - "prompt": "Trace a Kun settings change from renderer state through validation/persistence to managed runtime restart. Highlight rollback behavior. Do not modify files." + "prompt": "Trace a Kun settings change from renderer state through validation/persistence to managed runtime restart. Highlight rollback behavior. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "test-selection", "tags": ["quality"], - "prompt": "Identify how the verify_changes tool selects and runs validation after edits. Explain its safety limits and output contract. Do not modify files." + "prompt": "Identify how the verify_changes tool selects and runs validation after edits. Explain its safety limits and output contract. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "build-pipeline", "tags": ["build"], - "prompt": "Summarize the development, typecheck, test, build, and packaging pipeline for Kun. Cite package scripts and packaging configuration. Do not modify files." + "prompt": "Summarize the development, typecheck, test, build, and packaging pipeline for Kun. Cite package scripts and packaging configuration. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "security-boundaries", "tags": ["security"], - "prompt": "Map the main trust boundaries for renderer IPC, filesystem tools, command execution, MCP, and secrets. Cite concrete enforcement files. Do not modify files." + "prompt": "Map the main trust boundaries for renderer IPC, filesystem tools, command execution, MCP, and secrets. Cite concrete enforcement files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "runtime-hotspots", "tags": ["performance"], - "prompt": "Inspect runtime event persistence, SSE replay, tool execution, and context assembly. Identify three evidence-based performance or memory hotspots with file references. Do not modify files." + "prompt": "Inspect runtime event persistence, SSE replay, tool execution, and context assembly. Identify three evidence-based performance or memory hotspots with file references. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "thread-persistence", "tags": ["storage"], - "prompt": "Explain how thread/session data is persisted and indexed across file and hybrid SQLite stores, including usage carryover. Cite implementation files. Do not modify files." + "prompt": "Explain how thread/session data is persisted and indexed across file and hybrid SQLite stores, including usage carryover. Cite implementation files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "model-capabilities", "tags": ["provider"], - "prompt": "Explain how model capabilities control image input, tool calling, reasoning effort, endpoint format, and context limits. Cite schemas and request construction. Do not modify files." + "prompt": "Explain how model capabilities control image input, tool calling, reasoning effort, endpoint format, and context limits. Cite schemas and request construction. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "frontend-chunking", "tags": ["frontend", "performance"], - "prompt": "Inspect renderer lazy loading and identify which Workbench surfaces are split into separate chunks and which heavy chat dependencies still load eagerly. Do not modify files." + "prompt": "Inspect renderer lazy loading and identify which Workbench surfaces are split into separate chunks and which heavy chat dependencies still load eagerly. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } }, { "id": "failure-recovery", "tags": ["runtime", "reliability"], - "prompt": "Map how the desktop app detects an unhealthy Kun child, budgets restarts, distinguishes settings restarts from crashes, and reports status to the renderer. Do not modify files." + "prompt": "Map how the desktop app detects an unhealthy Kun child, budgets restarts, distinguishes settings restarts from crashes, and reports status to the renderer. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } } ] } diff --git a/kun/src/benchmark/replay-benchmark.test.ts b/kun/src/benchmark/replay-benchmark.test.ts index 81ae4071a..9bf69d261 100644 --- a/kun/src/benchmark/replay-benchmark.test.ts +++ b/kun/src/benchmark/replay-benchmark.test.ts @@ -3,6 +3,7 @@ import type { RuntimeEvent } from '../contracts/events.js' import { compareReplayReports, ReplaySuiteSchema, + runReplaySuite, SseMessageDecoder, summarizeReplayEvents, summarizeReplayRuns, @@ -10,6 +11,7 @@ import { type ReplayReport, type ReplayRunResult } from './replay-benchmark.js' +import { buildRuntimeCapabilityManifest } from '../contracts/capabilities.js' const baseTimestamp = Date.parse('2026-06-29T00:00:00.000Z') @@ -178,8 +180,175 @@ describe('replay benchmark', () => { ] })).toThrow('duplicate replay task id') }) + + it('fails runs that do not use any required investigation tool', async () => { + const fetchImpl: typeof fetch = async (input, init = {}) => { + const url = new URL(String(input)) + if (url.pathname === '/v1/runtime/info') return jsonResponse(testRuntimeInfo()) + if (url.pathname === '/v1/threads' && init.method === 'POST') return jsonResponse({ id: 'thr_1' }, 201) + if (url.pathname === '/v1/threads/thr_1/turns' && init.method === 'POST') { + return jsonResponse({ threadId: 'thr_1', turnId: 'turn_1', userMessageItemId: 'item_user' }, 202) + } + if (url.pathname === '/v1/threads/thr_1/events') { + return sseResponse([ + { + kind: 'assistant_text_delta', + seq: 1, + timestamp: '2026-06-29T00:00:00.000Z', + threadId: 'thr_1', + turnId: 'turn_1', + item: { ...itemBase('assistant_text'), id: 'item_text', threadId: 'thr_1', turnId: 'turn_1', text: 'hello' } + } as RuntimeEvent, + { + kind: 'turn_completed', + seq: 2, + timestamp: '2026-06-29T00:00:00.010Z', + threadId: 'thr_1', + turnId: 'turn_1', + status: 'completed' + } + ]) + } + if (url.pathname === '/v1/threads/thr_1' && init.method === 'DELETE') { + return jsonResponse({ id: 'thr_1', deleted: true }) + } + return jsonResponse({ message: `unexpected ${init.method ?? 'GET'} ${url.pathname}` }, 404) + } + + const report = await runReplaySuite({ + version: 1, + name: 'tool-required-suite', + tasks: [{ + id: 'no-tool', + prompt: 'answer from memory', + expect: { requiredAnyTools: ['read', 'grep', 'find', 'ls'] } + }] + }, { + baseUrl: 'http://127.0.0.1:18899', + token: 'token', + workspace: '/tmp/workspace', + fetchImpl + }) + + expect(report.runs[0]?.status).toBe('failed') + expect(report.runs[0]?.failureReasons).toContain('none of the required tools were used: read, grep, find, ls') + }) + + it('interrupts timed-out turns before deleting replay threads', async () => { + const calls: Array<{ method: string; path: string }> = [] + const fetchImpl: typeof fetch = async (input, init = {}) => { + const url = new URL(String(input)) + calls.push({ method: init.method ?? 'GET', path: `${url.pathname}${url.search}` }) + if (url.pathname === '/v1/runtime/info') return jsonResponse(testRuntimeInfo()) + if (url.pathname === '/v1/threads' && init.method === 'POST') return jsonResponse({ id: 'thr_1' }, 201) + if (url.pathname === '/v1/threads/thr_1/turns' && init.method === 'POST') { + return jsonResponse({ threadId: 'thr_1', turnId: 'turn_1', userMessageItemId: 'item_user' }, 202) + } + if (url.pathname === '/v1/threads/thr_1/events') return neverTerminalSse(init.signal) + if (url.pathname === '/v1/threads/thr_1/turns/turn_1/interrupt' && init.method === 'POST') { + return jsonResponse({ threadId: 'thr_1', turnId: 'turn_1', status: 'aborted' }) + } + if (url.pathname === '/v1/threads/thr_1' && init.method === 'DELETE') { + return jsonResponse({ id: 'thr_1', deleted: true }) + } + return jsonResponse({ message: `unexpected ${init.method ?? 'GET'} ${url.pathname}` }, 404) + } + + const report = await runReplaySuite({ + version: 1, + name: 'timeout-suite', + defaults: { timeoutMs: 20 }, + tasks: [{ id: 'slow', prompt: 'wait for a terminal event', expect: { minAssistantChars: 0 } }] + }, { + baseUrl: 'http://127.0.0.1:18899', + token: 'token', + workspace: '/tmp/workspace', + fetchImpl + }) + + expect(report.runs[0]?.status).toBe('timeout') + const interruptIndex = calls.findIndex((call) => call.path === '/v1/threads/thr_1/turns/turn_1/interrupt') + const deleteIndex = calls.findIndex((call) => call.path === '/v1/threads/thr_1') + expect(interruptIndex).toBeGreaterThan(-1) + expect(deleteIndex).toBeGreaterThan(interruptIndex) + }) }) +function jsonResponse(value: unknown, status = 200): Response { + return new Response(JSON.stringify(value), { + status, + headers: { 'content-type': 'application/json' } + }) +} + +function testRuntimeInfo() { + return { + host: '127.0.0.1', + port: 18899, + dataDir: '/tmp/kun-replay', + model: 'deepseek-chat', + startedAt: '2026-06-29T00:00:00.000Z', + capabilities: buildRuntimeCapabilityManifest({ + model: { + id: 'deepseek-chat', + inputModalities: ['text'], + outputModalities: ['text'], + supportsToolCalling: true, + messageParts: ['text'] + } + }) + } +} + +function sseResponse(events: RuntimeEvent[]): Response { + const encoder = new TextEncoder() + const stream = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue( + encoder.encode(`id: ${event.seq}\nevent: ${event.kind}\ndata: ${JSON.stringify(event)}\n\n`) + ) + } + controller.close() + } + }) + return new Response(stream, { + status: 200, + headers: { 'content-type': 'text/event-stream; charset=utf-8' } + }) +} + +function neverTerminalSse(signal?: AbortSignal | null): Response { + const encoder = new TextEncoder() + const stream = new ReadableStream({ + start(controller) { + const heartbeat = [ + 'id: 1', + 'event: heartbeat', + `data: ${JSON.stringify({ + kind: 'heartbeat', + seq: 1, + timestamp: '2026-06-29T00:00:00.000Z', + threadId: 'thr_1' + })}`, + '', + '' + ].join('\n') + const push = () => controller.enqueue(encoder.encode(heartbeat)) + const timer = setInterval(push, 1) + push() + signal?.addEventListener('abort', () => { + clearInterval(timer) + controller.error(new DOMException('aborted', 'AbortError')) + }, { once: true }) + } + }) + return new Response(stream, { + status: 200, + headers: { 'content-type': 'text/event-stream; charset=utf-8' } + }) +} + function replayRun( status: ReplayRunResult['status'], ttftMs: number, diff --git a/kun/src/benchmark/replay-benchmark.ts b/kun/src/benchmark/replay-benchmark.ts index d8130f874..f16bb809b 100644 --- a/kun/src/benchmark/replay-benchmark.ts +++ b/kun/src/benchmark/replay-benchmark.ts @@ -8,6 +8,7 @@ import type { UsageSnapshot } from '../contracts/usage.js' const ReplayExpectationSchema = z.object({ minAssistantChars: z.number().int().nonnegative().default(1), requiredTools: z.array(z.string().min(1)).default([]), + requiredAnyTools: z.array(z.string().min(1)).default([]), maxErrorEvents: z.number().int().nonnegative().default(0), maxTotalMs: z.number().int().positive().optional() }).strict() @@ -160,6 +161,7 @@ type ReplayHttpClient = { createThread(body: Record): Promise<{ id: string }> startTurn(threadId: string, body: Record): Promise<{ turnId: string }> openEvents(threadId: string, signal: AbortSignal): Promise + interruptTurn(threadId: string, turnId: string): Promise deleteThread(threadId: string): Promise } @@ -242,6 +244,8 @@ async function runReplayTask(input: { if (!model) return errorReplayRun(runId, task, iteration, 'runtime did not report a default model') const workspace = resolve(input.workspace, task.workspace ?? '.') let threadId: string | undefined + let turnId: string | undefined + let shouldInterrupt = false try { const thread = await client.createThread({ title: `[replay] ${runId}`, @@ -264,14 +268,16 @@ async function runReplayTask(input: { sandboxMode: 'read-only', disableUserInput: true }) + turnId = turn.turnId const timeoutMs = task.timeoutMs ?? suite.defaults.timeoutMs const collected = await collectReplayEvents({ client, threadId, - turnId: turn.turnId, + turnId, startedAt, timeoutMs }) + shouldInterrupt = collected.timedOut || !hasTerminalTurnEvent(collected.events, turnId) const after = await client.getRuntimeInfo().catch(() => runtime) const metrics = summarizeReplayEvents( collected.events, @@ -285,17 +291,22 @@ async function runReplayTask(input: { iteration, tags: task.tags, threadId, - turnId: turn.turnId, + turnId, status: collected.timedOut ? 'timeout' : failureReasons.length > 0 ? 'failed' : 'passed', failureReasons, metrics } } catch (error) { + shouldInterrupt = turnId !== undefined return { ...errorReplayRun(runId, task, iteration, errorMessage(error)), - ...(threadId ? { threadId } : {}) + ...(threadId ? { threadId } : {}), + ...(turnId ? { turnId } : {}) } } finally { + if (threadId && turnId && shouldInterrupt) { + await client.interruptTurn(threadId, turnId).catch(() => undefined) + } if (threadId && !input.keepThread) { await client.deleteThread(threadId).catch(() => undefined) } @@ -566,6 +577,11 @@ function createReplayHttpClient( } return response }, + async interruptTurn(threadId, turnId) { + await requestJson(`/v1/threads/${encodeURIComponent(threadId)}/turns/${encodeURIComponent(turnId)}/interrupt`, { + method: 'POST' + }) + }, async deleteThread(threadId) { await requestJson(`/v1/threads/${encodeURIComponent(threadId)}`, { method: 'DELETE' }) } @@ -635,6 +651,9 @@ function replayExpectationFailures( for (const tool of task.expect.requiredTools) { if (!usedTools.has(tool)) failures.push(`required tool was not used: ${tool}`) } + if (task.expect.requiredAnyTools.length > 0 && !task.expect.requiredAnyTools.some((tool) => usedTools.has(tool))) { + failures.push(`none of the required tools were used: ${task.expect.requiredAnyTools.join(', ')}`) + } return failures } @@ -680,6 +699,10 @@ function isTerminalTurnEvent(kind: RuntimeEventValue['kind']): boolean { return kind === 'turn_completed' || kind === 'turn_failed' || kind === 'turn_aborted' } +function hasTerminalTurnEvent(events: ObservedReplayEvent[], turnId: string): boolean { + return events.some(({ event }) => event.turnId === turnId && isTerminalTurnEvent(event.kind)) +} + function percentile(values: number[], quantile: number): number | null { if (values.length === 0) return null const sorted = [...values].sort((left, right) => left - right) diff --git a/kun/src/cli/replay-entry.ts b/kun/src/cli/replay-entry.ts index 2cf0238ec..745efab8c 100644 --- a/kun/src/cli/replay-entry.ts +++ b/kun/src/cli/replay-entry.ts @@ -6,6 +6,9 @@ import { runReplaySuite, type ReplayReport } from '../benchmark/replay-benchmark.js' +import { DEFAULT_SERVE_PORT } from './cli-options.js' + +const DEFAULT_RUNTIME_URL = `http://127.0.0.1:${DEFAULT_SERVE_PORT}` type CliOptions = { suitePath?: string @@ -71,7 +74,7 @@ if (options.failOnRegression && report.comparison?.regressions.length) { function parseArgs(args: string[]): CliOptions { const options: CliOptions = { - baseUrl: process.env.KUN_RUNTIME_URL ?? 'http://127.0.0.1:18788', + baseUrl: process.env.KUN_RUNTIME_URL ?? DEFAULT_RUNTIME_URL, workspace: resolve(process.env.INIT_CWD ?? process.cwd()), repeat: 1, concurrency: 1, @@ -140,7 +143,7 @@ function printUsage(): void { console.log(' npm --prefix kun run benchmark:replay -- --suite [options]') console.log('') console.log('Options:') - console.log(' --base-url Kun runtime URL (or KUN_RUNTIME_URL)') + console.log(` --base-url Kun runtime URL (or KUN_RUNTIME_URL, default ${DEFAULT_RUNTIME_URL})`) console.log(' --workspace Workspace for replay tasks') console.log(' --tag Run only tasks with this tag') console.log(' --repeat Repeat each selected task (default 1)') From 7588cf1de9c6df9c2ee4ab799b21fb8bf969ddd1 Mon Sep 17 00:00:00 2001 From: XingYu-Zhong <1736101137@qq.com> Date: Mon, 29 Jun 2026 22:24:34 +0800 Subject: [PATCH 17/18] refactor(agent-loop): remove unused MAX_TURN_MODEL_STEPS and related logic --- kun/src/loop/agent-loop.ts | 25 ------------------------- 1 file changed, 25 deletions(-) diff --git a/kun/src/loop/agent-loop.ts b/kun/src/loop/agent-loop.ts index def31ac8f..40ed79672 100644 --- a/kun/src/loop/agent-loop.ts +++ b/kun/src/loop/agent-loop.ts @@ -104,7 +104,6 @@ const MAX_PARALLEL_TOOL_CALLS = 3 // request. Older ones collapse to a text note (Anthropic-style "keep last // N images"), bounding context growth for long computer-use sessions. const MAX_FORWARDED_TOOL_IMAGES = 3 -const MAX_TURN_MODEL_STEPS = 64 /** * Tools that, on their own, do not count as "progress" toward a goal when @@ -1293,30 +1292,6 @@ export class AgentLoop { ): Promise<'completed' | 'failed' | 'aborted'> { for (let step = 0; ; step += 1) { if (signal.aborted) return 'aborted' - if (step >= MAX_TURN_MODEL_STEPS) { - const message = - `Turn stopped after ${MAX_TURN_MODEL_STEPS} model steps without reaching a final response.` - await this.opts.events.record({ - kind: 'error', - threadId, - turnId, - message, - code: 'turn_step_limit_exceeded', - severity: 'error' - }) - await this.opts.turns.applyItem( - threadId, - makeErrorItem({ - id: this.opts.ids.next('item_error'), - turnId, - threadId, - message, - code: 'turn_step_limit_exceeded', - severity: 'error' - }) - ) - return 'failed' - } await this.drainSteering(threadId, turnId, signal) const stepResult = await this.modelStep(threadId, turnId, signal, step) if (stepResult === 'stop') return 'completed' From b5ce4b69917095c42d1d1b3f640c403bd0f3efd6 Mon Sep 17 00:00:00 2001 From: XingYu-Zhong <1736101137@qq.com> Date: Mon, 29 Jun 2026 22:38:51 +0800 Subject: [PATCH 18/18] feat(release): add release notes for v0.2.19 with Claude Pro/Max subscription integration and various enhancements --- release/release-v0.2.16.md | 52 +++++++++++++++++++++++++++++++++ release/release-v0.2.17.md | 54 ++++++++++++++++++++++++++++++++++ release/release-v0.2.18.md | 29 ++++++++++++++++++ release/release-v0.2.19.md | 60 ++++++++++++++++++++++++++++++++++++++ release/release-v0.2.20.md | 48 ++++++++++++++++++++++++++++++ 5 files changed, 243 insertions(+) create mode 100644 release/release-v0.2.16.md create mode 100644 release/release-v0.2.17.md create mode 100644 release/release-v0.2.18.md create mode 100644 release/release-v0.2.19.md create mode 100644 release/release-v0.2.20.md diff --git a/release/release-v0.2.16.md b/release/release-v0.2.16.md new file mode 100644 index 000000000..c54cc85ba --- /dev/null +++ b/release/release-v0.2.16.md @@ -0,0 +1,52 @@ +# Kun v0.2.16 + +这一版是在 v0.2.15 的「创建 Loop」大版本之后做的一轮能力补齐与稳定性修复。主线是本地 Whisper 语音转写、Agent 回滚与计划交互、LSP 诊断、供应商扩展,以及一批围绕 Windows 升级、Kun 端口、SSE 重连、权限与导入安全的加固。 + +### 本地 Whisper 语音转写 + +- 新增本地 Whisper 转写能力,语音可以在本机完成转文字,不必完全依赖远端 ASR。 +- Whisper 模型支持下载、状态展示、取消和删除,并加入 Hugging Face CDN 镜像源,下载完成后就绪状态也会正确刷新。 +- 补齐 Linux 与 Linux arm64 runner,并处理 macOS 跨架构打包时的 native CPU flags 问题。 +- 打包时会裁剪 Whisper 资源,减少发布包里无用文件的堆积。 + +### Agent 计划、回滚与诊断 + +- 聊天中新增助手回复级 Git 回滚动作,可以从回复处触发对应的回滚流程。 +- 回滚流程会明确提示破坏性,并暴露 rescue id;成功后隐藏内部 checkpoint refs 和多余横幅,减少界面噪音。 +- Plan mode 改为按线程保存,fork 对话不再错误继承原线程的 plan 状态。 +- 模糊请求会先进入澄清问题,而不是过早转成计划;计划回合也可以通过结构化 `user_input` 工具询问用户。 +- 新增 LSP 诊断能力与常见语言服务器预设,代码问题可以更自然地进入 Agent 上下文。 +- 新增缓存诊断与压缩记忆详情弹窗,便于排查长会话、缓存和记忆注入效果。 + +### 供应商、权限与 MCP + +- 新增 LongCat 与 Vercel AI Gateway 供应商预设。 +- 设置中统一工具权限控制,长模型 id、`modelHint` IPC 字段和供应商布局都做了兼容性打磨。 +- 外部工具(MCP)服务器新增表单式编辑器,远程 MCP URL 校验要求 HTTPS,并补充 OAuth 文档入口。 +- GitHub Skill 导入加入 SSRF 与路径穿越防护,远端导入更安全。 +- 缓存 telemetry、usage diagnostics、计划队列并发上限等内部契约得到加固。 + +### 界面、主题与终端 + +- 新增 Retroma parchment 浅色主题、侧栏底部日月主题切换按钮,以及可配置的交互效果颜色。 +- 终端新增黑白模式、自定义颜色,并修复 CJK 乱码;后续又恢复了原生彩色模式。 +- 工具失败提示改用警告色,文件树按钮图标、上下文容量数字、跳转栏编号、设置区块和深色 UI 插件 token 都做了细节修正。 +- Write 编辑器修复选区高亮宽度、Markdown 重命名可见性,并恢复 DOCX 导出。 + +### 运行时、安装与发布 + +- Kun 本地端口上移到 10000 以上,降低和系统/其他开发服务冲突的概率。 +- 运行时只注入一次初始上下文,端口回收时避免误杀仍在使用的 Kun 子进程。 +- 桌面端会节流 IPC 上的 SSE 事件,并正确处理重连。 +- Windows 升级前会停止打包进来的后台进程,并修复覆盖安装卸载失败的问题。 +- 新增每日 dev prerelease 工作流,更新完成通知标题也统一为 Kun。 + +### 升级说明 + +- 从 `v0.2.15` 升级可直接通过 GUI 更新,本地数据和配置会沿用。 +- 如果要使用本地 Whisper,首次使用需要下载模型;Linux 用户升级后可直接使用对应平台 runner。 +- 如果你之前遇到 Windows 覆盖安装、端口占用或 SSE 重连异常,这一版包含对应修复。 + +### 总结 + +v0.2.16 是一次“把大功能落稳”的版本:本地语音转写、回滚、计划交互、LSP 和供应商能力继续往前走,同时把安装、端口、权限、安全和界面细节一起补牢。 diff --git a/release/release-v0.2.17.md b/release/release-v0.2.17.md new file mode 100644 index 000000000..eaf1f151f --- /dev/null +++ b/release/release-v0.2.17.md @@ -0,0 +1,54 @@ +# Kun v0.2.17 + +这一版的主角是子代理系统。Kun 开始支持更完整的子代理 profile、按 profile 路由模型、独立子会话、实时子任务展示,以及 `.kun/agents/*.md` 这样的项目级代理定义。同时,Skill/MCP、文档附件、Agents 设置和上下文压缩也完成了一轮系统升级。 + +### 子代理系统成型 + +- 子代理支持按 profile 配置模型供应商、模式、系统提示词、允许工具、名称、描述和颜色。 +- 设置中新增 Subagents 管理视图,可以创建、编辑、删除子代理,并把配置桥接到运行时。 +- 线程支持 `agentId` 与 persona,composer 新增代理选择器,子代理运行状态可以实时显示。 +- 支持 `.kun/agents/*.md` 覆盖项目代理定义,并提供 AI 草稿、detach 与 abort 能力。 +- 子代理可以配置独立压缩模型,并内置 General / Explore 等预设。 +- 委派链路补齐被禁止工具、被禁止服务器和 profile 合并逻辑,子代理权限边界更清晰。 + +### Skill、MCP 与附件 + +- 聊天支持作用域 Skill 和文档附件,Skill 可以按项目/全局来源加载并标记来源。 +- Tool runtime 支持全局 Skill 加载、深路径文件搜索,以及从设置传入全局 Skill roots。 +- 已停用 Skill 会在运行时生效,Codex 插件缓存目录也可以开关。 +- MCP stdio server 支持配置 `cwd`,远程 MCP 继续强化 HTTPS 约束。 +- 新增进程类工具图标,纯文本代码块也会显示更清楚的 chrome。 + +### Agents 设置与模型行为 + +- Agents 配置界面重写,整合模型配置、完整管理、动画 Kun 与 i18n。 +- 内置 endpoint format 会标注供应商协议类型,例如 OpenAI 与 Anthropic。 +- 上下文压缩摘要改为更接近 opencode 的 compaction 模式,长会话阅读和续跑更自然。 +- Anthropic 并行 `tool_result` 会合并为一个 user message,避免协议不兼容。 +- 视觉模型到文本模型的锁定只在确实有图片时触发。 + +### 会话、侧栏与启动体验 + +- 侧栏会话操作更完整,长分支名、完整工作区路径和聊天跳转栏都做了展示修复。 +- 新用户引导中加入默认 Agent 权限配置。 +- 设置里新增 Git checkpoint 清理间隔,并保持 opt-in。 +- ask-user 提示面板上移到输入框上方,减少长对话中被忽略的概率。 +- Loop 编辑器在 Windows 标题栏下方正确偏移。 + +### 稳定性与安全修复 + +- Git checkpoint restore 会读取 `thread.status` busy guard,并加强路径穿越防护。 +- IM 权限透传、workspace symlink escape、MCP HTTPS 等安全边界继续收紧。 +- 运行时启动必须通过 health probe 后才宣告 ready。 +- 上游模型列表 `fetch_failed` 与本地 runtime failure 会区分展示,排障信息更准确。 +- 修复流式模型客户端丢失/截断 tool calls 的问题。 + +### 升级说明 + +- 从 `v0.2.16` 升级可直接通过 GUI 更新。 +- 如果你想使用子代理,可以在 Agents 设置中配置 profile;项目内也可以用 `.kun/agents/*.md` 保存团队共享代理定义。 +- 如果之前手动维护过 Skill 目录,升级后建议检查全局和项目 Skill roots 的开关状态。 + +### 总结 + +v0.2.17 让 Kun 的“多代理协作”正式成型:子代理不再只是一次工具调用,而是有 profile、有模型、有权限、有 UI、有项目定义的独立工作单元。配合全局 Skill、MCP `cwd`、文档附件和设置重写,这一版把 Agent 生态往可管理、可复用的方向推进了一大步。 diff --git a/release/release-v0.2.18.md b/release/release-v0.2.18.md new file mode 100644 index 000000000..bd0ffa737 --- /dev/null +++ b/release/release-v0.2.18.md @@ -0,0 +1,29 @@ +# Kun v0.2.18 + +这一版是 v0.2.17 之后的小型打磨版本,重点补在子代理委派、分支/worktree 操作、线程事件订阅和设置布局上。它不是大功能发布,但让刚成型的子代理与 Git 工作流更顺手。 + +### 子代理委派与展示 + +- 强化 delegation tool provider 与运行时的子代理支持,子代理委派链路更完整。 +- 子代理调用卡片默认折叠,避免多个子任务同时运行时把时间线撑得过长。 +- 优化子代理相关字体缩放与展示细节,配合 v0.2.17 的 profile 系统更稳定。 + +### 分支和 Worktree 操作 + +- Git 分支创建、切换文案补齐本地化,用户更容易理解当前操作。 +- 分支管理增强 worktree 支持,和会话级隔离工作树的配合更自然。 +- 相关标签与状态文案也做了同步调整。 + +### 设置和事件流 + +- 设置页把代理配置移动到供应商配置下方,并细化字体缩放控制。 +- 优化线程事件订阅,减少会话流转中的重复订阅和状态抖动。 + +### 升级说明 + +- 从 `v0.2.17` 升级可直接通过 GUI 更新,无需额外操作。 +- 如果你已经在使用子代理或 worktree 分支管理,这一版主要是体验和稳定性提升。 + +### 总结 + +v0.2.18 是一版精修补丁:让子代理调用更安静,分支/worktree 操作更清楚,设置和事件订阅更稳。它把 v0.2.17 的大改动继续磨平了一层。 diff --git a/release/release-v0.2.19.md b/release/release-v0.2.19.md new file mode 100644 index 000000000..a48880569 --- /dev/null +++ b/release/release-v0.2.19.md @@ -0,0 +1,60 @@ +# Kun v0.2.19 + +这一版的核心是 Claude Pro/Max 订阅接入:Kun 通过内置 Claude Agent SDK 路径,把完整回合委托给订阅模型,同时继续注入 Kun 的历史、Skill、模式上下文、权限和工具桥。除此之外,这一版还加入了 Conversations 标签、对话本地附件、自动验收、目标续跑增强,并修复了一批运行时与设置稳定性问题。 + +### Claude Pro/Max 订阅接入 + +- 新增 `Claude (Pro/Max 订阅)` 供应商预设,通过 Claude Agent SDK 使用订阅额度,而不是普通 HTTP API 计费路径。 +- Kun 会把完整回合路由到 SDK runtime,并把 Kun 专属工具桥接为 in-process MCP,让订阅路径也能使用 Kun 的工具、权限和上下文。 +- 新增 Claude 订阅登录 UI,可以检测本机 Claude Code 登录,也支持通过 SDK 获取可用模型。 +- Claude Code binary 改为按需下载,并提供后台下载进度;不再要求用户提前单独安装 CLI。 +- 支持从 `supportedModels()` 获取模型并自动填充,模型 id、视觉能力和上下文长度会跟随 SDK 返回值校准。 +- 图片附件会转发给 SDK,交互式输入会走 Kun 的 `user_input` 面板,plan 回合也会向 SDK 暴露 `create_plan`。 + +### 对话工作区、附件与 Composer + +- 侧栏新增 Conversations 标签,并可自动创建带时间戳的对话工作区。 +- 对话支持添加本地文件附件,composer 也新增文件和文件夹入口。 +- 项目选择器会排除 conversation workspace,避免临时对话空间干扰真实项目列表。 +- 会话置顶切换、线程预览锚点、会话动作弹窗遮罩和 timeline 置底行为都做了修复。 +- 重新打开已结束线程时,不会再重复弹出过期的 `user_input` 提示。 + +### 运行时可靠性与自动验收 + +- 运行时会重试 stale managed endpoints,并加强 endpoint health recovery。 +- 子代理运行时卡住时可以恢复,并新增 event-loop stall 日志用于诊断 runtime hang。 +- 新增自动验收验证,代码模式下 `verify_changes` 变为可选建议,减少对非代码任务的干扰。 +- 目标续跑逻辑增强,可以更好处理未完成目标对应的回合。 +- Write 与 SDD 工作台改为懒加载,降低首屏渲染和设置切换成本。 + +### 设置、MCP 与权限 + +- 设置里可以看到 MCP 与 Skill 权限来源,权限预览也不再直接回显原始 MCP 解析错误。 +- MCP 服务器按 workspace roots 作用域管理,并移除不安全的 repo-local `.kun/mcp.json` 自动导入。 +- 修复模型请求代理 URL 输入时被清空的问题,设置卸载时会 flush 待保存的供应商编辑。 +- Provider stale proxy 诊断更明确,连接测试的长错误消息也会正确换行。 + +### 平台与工作区体验 + +- Linux 下 Wayland IME flags 会按平台门控,减少输入法相关副作用。 +- Windows shell 通过绝对路径启动,并正确尊重 `danger-full-access` 文件工具权限。 +- 原生右键菜单和若干标签文案更清楚。 +- Worktree 支持重复 checkout 同一分支,并隐藏内部 worktree projects;undefined workspace 与 `.kun/worktrees` 锚点检测也做了防护。 +- 内部拆分了 main 桌面行为、路径 helper、聊天 store 初始状态与线程 action,为后续维护降低复杂度。 + +### 本版合入的修复 + +- 修复 Claude SDK 路径下的文本/推理重复流式渲染、非 Anthropic 线程模型回落、`canUseTool` 输入更新和订阅状态识别。 +- 修复设置代理、侧栏预览、composer 背景、目标面板背景、纯文本 chip 背景等 UI 问题。 +- 修复构建中缺失的 `@emnapi` lockfile 记录。 +- 补充 worktree 分组测试覆盖,并增加 runtime stall 诊断日志。 + +### 升级说明 + +- 从 `v0.2.18` 升级可直接通过 GUI 更新。 +- 如果要使用 Claude Pro/Max 订阅路径,可以在供应商中选择 `Claude (Pro/Max 订阅)`,按提示登录或下载所需 SDK binary。 +- 如果你依赖 repo-local `.kun/mcp.json` 自动导入,升级后需要改为在设置中显式配置 MCP,以避免不安全的隐式导入。 + +### 总结 + +v0.2.19 把 Kun 接到 Claude 订阅模型路径上,同时没有放弃 Kun 自己的工具、Skill、权限和上下文治理。这让订阅模型可以真正参与 Kun 的本地工作流;配合 Conversations、附件、自动验收、目标续跑和运行时修复,它也是 v0.2 后段非常关键的一次整合版本。 diff --git a/release/release-v0.2.20.md b/release/release-v0.2.20.md new file mode 100644 index 000000000..05229cb8c --- /dev/null +++ b/release/release-v0.2.20.md @@ -0,0 +1,48 @@ +# Kun v0.2.20 + +这一版是 v0.2.19 之后的一次稳定性与性能补强。主线是新增后台 shell 会话,让长时间命令可以脱离当前回合继续运行并持久化输出;同时修复 MCP streamable-http 断线导致运行时不稳、跨回合编辑误拦截、前端首屏包体偏大等问题,并补上 Agent replay benchmark,方便后续用回放方式观察运行时表现。 + +### 后台 Shell 会话 + +- 新增 `background_shell` 能力,长时间运行的 shell 命令可以在后台继续执行,不必阻塞当前对话回合。 +- 后台命令输出会按线程持久化到 runtime data 目录,并保留输出摘要与完整日志路径,便于后续查看和审计。 +- 运行结束后会向 Agent 发送完成通知,GUI 会把这类通知识别为后台 shell 事件,避免覆盖原始用户提示。 +- 后台 shell 支持列出会话、查看详情、停止运行中的会话,并默认隐藏已结束会话以减少噪音。 +- 修复后台输出目录使用错误,统一改走 `options.dataDir`,保证开发与打包环境都能找到正确位置。 + +### MCP 与运行时可靠性 + +- 修复 streamable-http MCP server 断开连接时可能把 Kun runtime 一起带崩的问题(#639)。 +- 加固 MCP runtime reconnect 生命周期:断线后按需重连,多个并发工具调用共享同一次重连,生命周期关闭会正确标记为离线。 +- 运行时 crash handler 会把可恢复的 MCP 后台拒绝视作可恢复错误,避免因为外部 MCP 抖动导致本地会话中断。 +- 修复 stale reconnect、诊断状态和重试时机相关问题,让外部工具服务恢复后能继续调用。 + +### 文件编辑与对话体验 + +- 修复跨回合编辑时 read tracker 过度保守的问题:只要旧文本仍在最近读取内容中,就允许后续回合继续编辑(#640)。 +- 记忆注入 chip hover 时显示记忆摘要,便于确认当前对话使用了哪些长期记忆。 +- 设置中新增对话文字宽度配置,可以调整消息正文和输入框正文的显示宽度。 +- 思考过程文字做了降噪处理,界面左侧边框也进一步简化。 + +### 性能与可维护性 + +- 设置页和时间线相关代码拆分为独立 bundle,减少主工作台首屏负担。 +- 新增 Agent replay benchmark,可用只读 HTTP/SSE 回放套件重复跑核心场景,方便比较运行时性能和稳定性。 +- replay benchmark 的清理逻辑与测试期望得到加固,减少基准测试自身的误报。 +- 清理 agent loop 中不再使用的 `MAX_TURN_MODEL_STEPS` 及相关逻辑。 + +### 测试与回归修复 + +- 修复后台 shell 回调 UI、工具摘要、runtime-client import 等 renderer 问题。 +- 修复批量 PR 合入后的类型、mock、timeline chip 回归。 +- 补充后台 shell、MCP reconnect、read tracker 和 replay benchmark 相关测试覆盖。 + +### 升级说明 + +- 从 `v0.2.19` 升级可直接通过 GUI 更新。 +- 后台 shell 输出会写入 Kun runtime data 目录;如果你在只读沙箱中查看输出,Kun 会允许读取这些后台日志文件。 +- 如果你依赖远程 MCP server,这一版会明显改善断线和恢复时的稳定性。 + +### 总结 + +v0.2.20 把 Kun 的长命令执行和外部工具恢复能力往前推进了一步:后台 shell 让耗时任务不再绑死对话回合,MCP reconnect 修复让 runtime 更抗抖,前端拆包和 replay benchmark 则让性能优化有了更清晰的落点。