diff --git a/kun/README.md b/kun/README.md index f63d06110..bbe80ce53 100644 --- a/kun/README.md +++ b/kun/README.md @@ -51,6 +51,27 @@ Run from the `kun/` directory. - `npm run serve` – start the runtime after a build. - `npm run dev` – rebuild in watch mode. +- `npm run benchmark:replay -- --suite ` - run a read-only HTTP/SSE agent replay suite. + +### Agent replay benchmark + +Start a Kun runtime, set `KUN_RUNTIME_URL` and `KUN_RUNTIME_TOKEN`, then run the five-task smoke set: + +```bash +npm run benchmark:replay -- --suite benchmarks/agent-core.json --tag smoke --output replay-smoke.json +``` + +Run all 20 tasks twice and compare with an earlier report: + +```bash +npm run benchmark:replay -- --suite benchmarks/agent-core.json --repeat 2 \ + --baseline replay-baseline.json --output replay-current.json --fail-on-regression +``` + +Replay threads always use the `read-only` sandbox and disable interactive input. Reports include success rate, +TTFT, full latency, tool time, SSE delivery delay, token/cache/cost counters, and Kun process peak RSS. The runtime +token is accepted only through `KUN_RUNTIME_TOKEN`, so it does not leak through process arguments. + ## CLI `kun serve` accepts the following flags: diff --git a/kun/benchmarks/agent-core.json b/kun/benchmarks/agent-core.json new file mode 100644 index 000000000..66d078d5c --- /dev/null +++ b/kun/benchmarks/agent-core.json @@ -0,0 +1,130 @@ +{ + "version": 1, + "name": "kun-agent-core", + "defaults": { + "reasoningEffort": "off", + "timeoutMs": 300000 + }, + "tasks": [ + { + "id": "architecture-summary", + "tags": ["smoke", "architecture"], + "prompt": "Read the repository and explain the active Renderer -> preload -> main -> Kun runtime data path. Cite the most relevant file paths. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "runtime-entrypoint", + "tags": ["smoke", "runtime"], + "prompt": "Find the Kun serve-mode composition root and summarize how stores, model clients, tools, and the agent loop are assembled. Cite exact file paths. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "renderer-send-flow", + "tags": ["smoke", "frontend"], + "prompt": "Trace a chat message from the renderer composer through the preload/main bridge to the Kun turn endpoint. Return a concise ordered call path with files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "sse-replay", + "tags": ["smoke", "runtime"], + "prompt": "Explain how Kun SSE event replay avoids duplicates and cursor rewind after reconnect or restart. Cite the implementation and tests. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "mcp-lifecycle", + "tags": ["smoke", "mcp"], + "prompt": "Inspect MCP startup, tool discovery, execution, and reconnect behavior. Identify the main reliability boundaries and cite the implementation files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "cache-prefix", + "tags": ["cache"], + "prompt": "Explain what makes Kun's immutable prompt prefix stable and list dynamic data that must remain outside it. Cite code and documentation. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "provider-url-contract", + "tags": ["provider"], + "prompt": "Trace how baseUrl and endpointFormat affect provider URL construction and request bodies across chat and auxiliary model calls. Cite all important consumers. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "attachment-flow", + "tags": ["attachments"], + "prompt": "Trace an image or local file attachment from renderer selection to model input or fallback. Identify the cross-layer contract fields and failure points. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "approval-flow", + "tags": ["runtime", "security"], + "prompt": "Trace a tool approval request from agent loop creation through SSE/UI resolution back to tool execution. Cite routes, gates, and renderer handlers. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "goal-resume", + "tags": ["runtime", "goal"], + "prompt": "Explain how active goals survive runtime restart, how orphaned turns are reconciled, and where auto-resume is triggered. Cite tests if present. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "subagent-permissions", + "tags": ["subagent", "security"], + "prompt": "Explain how subagent tool policies inherit or restrict built-in tools, MCP servers, and skills without escalating the parent permissions. Cite enforcement points. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "settings-persistence", + "tags": ["settings"], + "prompt": "Trace a Kun settings change from renderer state through validation/persistence to managed runtime restart. Highlight rollback behavior. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "test-selection", + "tags": ["quality"], + "prompt": "Identify how the verify_changes tool selects and runs validation after edits. Explain its safety limits and output contract. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "build-pipeline", + "tags": ["build"], + "prompt": "Summarize the development, typecheck, test, build, and packaging pipeline for Kun. Cite package scripts and packaging configuration. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "security-boundaries", + "tags": ["security"], + "prompt": "Map the main trust boundaries for renderer IPC, filesystem tools, command execution, MCP, and secrets. Cite concrete enforcement files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "runtime-hotspots", + "tags": ["performance"], + "prompt": "Inspect runtime event persistence, SSE replay, tool execution, and context assembly. Identify three evidence-based performance or memory hotspots with file references. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "thread-persistence", + "tags": ["storage"], + "prompt": "Explain how thread/session data is persisted and indexed across file and hybrid SQLite stores, including usage carryover. Cite implementation files. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "model-capabilities", + "tags": ["provider"], + "prompt": "Explain how model capabilities control image input, tool calling, reasoning effort, endpoint format, and context limits. Cite schemas and request construction. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "frontend-chunking", + "tags": ["frontend", "performance"], + "prompt": "Inspect renderer lazy loading and identify which Workbench surfaces are split into separate chunks and which heavy chat dependencies still load eagerly. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + }, + { + "id": "failure-recovery", + "tags": ["runtime", "reliability"], + "prompt": "Map how the desktop app detects an unhealthy Kun child, budgets restarts, distinguishes settings restarts from crashes, and reports status to the renderer. Do not modify files.", + "expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] } + } + ] +} diff --git a/kun/package.json b/kun/package.json index d5de81c3b..f69259cd1 100644 --- a/kun/package.json +++ b/kun/package.json @@ -61,6 +61,7 @@ "test": "vitest run", "test:watch": "vitest", "transcript:diff": "node ./scripts/transcript-diff.mjs", + "benchmark:replay": "npm run build && node ./dist/cli/replay-entry.js", "serve": "node ./dist/cli/serve-entry.js", "dev": "tsc -p tsconfig.build.json --watch" }, diff --git a/kun/src/adapters/hybrid/hybrid-thread-store.ts b/kun/src/adapters/hybrid/hybrid-thread-store.ts index e9578db0a..4fc5873e9 100644 --- a/kun/src/adapters/hybrid/hybrid-thread-store.ts +++ b/kun/src/adapters/hybrid/hybrid-thread-store.ts @@ -932,6 +932,10 @@ function mergeTurnMetadata(previous: Turn, next: Turn): Turn { attachmentIds: mergeStringArrays(previous.attachmentIds, next.attachmentIds), activeSkillIds: mergeStringArrays(previous.activeSkillIds, next.activeSkillIds), injectedMemoryIds: mergeStringArrays(previous.injectedMemoryIds, next.injectedMemoryIds), + injectedMemorySummaries: + next.injectedMemorySummaries.length > 0 + ? next.injectedMemorySummaries + : previous.injectedMemorySummaries, items: mergeTurnItems(previous.items, next.items) } } @@ -971,6 +975,7 @@ function turnFromItems(threadId: string, turnId: string, items: TurnItem[], fall attachmentIds: attachmentIdsFromItems(items), activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], createdAt, finishedAt: hasOpenItem ? undefined : items[items.length - 1]?.finishedAt ?? fallbackTime, items diff --git a/kun/src/adapters/tool/background-shell-tool.ts b/kun/src/adapters/tool/background-shell-tool.ts new file mode 100644 index 000000000..1fa8dfffd --- /dev/null +++ b/kun/src/adapters/tool/background-shell-tool.ts @@ -0,0 +1,142 @@ +import { LocalToolHost, type LocalTool } from './local-tool-host.js' +import { withToolBoundary } from './builtin-tool-utils.js' +import type { BackgroundShellRecordInput } from './builtin-tool-types.js' +import { + isBashSessionId, + listBashSessionRecords, + pollBashSession, + readBashSessionPayload, + stopBashSessionById, + writeBashSessionStdin +} from './builtin-bash-tool.js' + +export type BackgroundShellToolOptions = { + listBackgroundSessions?: (threadId?: string) => readonly BackgroundShellRecordInput[] +} + + +function normalizeYieldSeconds(value: unknown): number { + const raw = typeof value === 'number' && Number.isFinite(value) ? Math.floor(value) : 10 + return Math.max(1, Math.min(60, raw)) +} + +export function createBackgroundShellTool(options: BackgroundShellToolOptions = {}): LocalTool { + return LocalToolHost.defineTool({ + name: 'background_shell', + description: + 'Manage shell sessions started with bash background=true. The bash tool assigns an 8-character session_id when starting a background command; use that id here. action="list" lists running sessions by default (set include_finished=true to also show completed/stopped/failed sessions; optional thread_only). action="read" returns a non-blocking output snapshot. action="poll" waits up to yield_seconds for more output or exit. action="write" sends stdin via input. action="stop" terminates a running session.', + inputSchema: { + type: 'object', + properties: { + action: { + type: 'string', + enum: ['list', 'read', 'poll', 'write', 'stop'] + }, + session_id: { + type: 'string', + description: 'Required for read, poll, write, and stop. The 8-character id returned by bash when background=true.' + }, + yield_seconds: { type: 'number' }, + include_finished: { type: 'boolean', default: false }, + thread_only: { type: 'boolean', default: true }, + input: { type: 'string' } + }, + required: ['action'], + additionalProperties: false + }, + policy: 'auto', + toolKind: 'tool_call', + execute: async (args, context) => + withToolBoundary(async () => { + const action = typeof args.action === 'string' ? args.action.trim() : '' + if (action === 'list') { + const threadOnly = args.thread_only !== false + const threadId = threadOnly ? context.threadId : undefined + let sessions = options.listBackgroundSessions + ? [...options.listBackgroundSessions(threadId)] + : await listBashSessionRecords(threadId) + if (args.include_finished !== true) { + sessions = sessions.filter((session) => session.status === 'running') + } + return { + output: { + sessions: sessions.map((session) => ({ + session_id: session.id, + command: session.command, + cwd: session.cwd, + shell: session.shell, + status: session.status, + started_at: session.startedAt, + ...(session.finishedAt ? { finished_at: session.finishedAt } : {}), + exit_code: session.exitCode, + output: session.output, + ...(session.outputTruncated ? { output_truncated: true } : {}), + ...(session.outputFilePath ? { output_file: session.outputFilePath } : {}), + detached: session.detached + })), + running: sessions.filter((session) => session.status === 'running').length + } + } + } + + const sessionId = typeof args.session_id === 'string' ? args.session_id.trim() : '' + if (!sessionId) { + return { output: { error: 'session_id is required' }, isError: true } + } + if (!isBashSessionId(sessionId)) { + return { + output: { + error: 'session_id must be the 8-character id returned by bash when background=true', + session_id: sessionId + }, + isError: true + } + } + + if (action === 'read') { + const payload = await readBashSessionPayload(sessionId) + if (!payload) { + return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true } + } + return { output: payload, isError: payload.status === 'failed' } + } + + if (action === 'stop') { + const stopped = await stopBashSessionById(sessionId) + const payload = await readBashSessionPayload(sessionId) + if (!payload) { + return { + output: { error: 'background shell session not found', session_id: sessionId, stopped }, + isError: true + } + } + return { + output: { ...payload, stop_sent: stopped }, + isError: payload.status === 'running' || payload.status === 'failed' + } + } + + if (action === 'write') { + const payload = await writeBashSessionStdin( + sessionId, + typeof args.input === 'string' ? args.input : '', + normalizeYieldSeconds(args.yield_seconds) + ) + if (!payload) { + return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true } + } + return { output: payload, isError: payload.status === 'failed' } + } + + if (action === 'poll') { + const payload = await pollBashSession(sessionId, normalizeYieldSeconds(args.yield_seconds)) + if (!payload) { + return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true } + } + return { output: payload, isError: payload.status === 'failed' } + } + + return { output: { error: `unsupported background_shell action: ${action}` }, isError: true } + }) + }) +} diff --git a/kun/src/adapters/tool/builtin-bash-tool.ts b/kun/src/adapters/tool/builtin-bash-tool.ts index 1024d34c9..3a040d6ed 100644 --- a/kun/src/adapters/tool/builtin-bash-tool.ts +++ b/kun/src/adapters/tool/builtin-bash-tool.ts @@ -1,11 +1,14 @@ import { mkdir } from 'node:fs/promises' -import { randomUUID } from 'node:crypto' +import { randomBytes } from 'node:crypto' import { spawn, type ChildProcessWithoutNullStreams } from 'node:child_process' import { LocalToolHost, type LocalTool } from './local-tool-host.js' import { OutputAccumulator } from './output-accumulator.js' import { DEFAULT_MAX_BYTES, DEFAULT_MAX_LINES, formatSize } from './truncate.js' -import type { BashLocalToolOptions, TextSlice, TruncateMode } from './builtin-tool-types.js' +import type { BashLocalToolOptions, TextSlice, TruncateMode, BackgroundShellRecordInput } from './builtin-tool-types.js' import { DEFAULT_BASH_TIMEOUT_SECONDS } from './builtin-tool-types.js' +import { + BackgroundShellOutputWriter +} from '../../services/background-shell-output.js' import { describeKind, normalizePositiveInteger, @@ -28,6 +31,8 @@ type BashSessionStatus = 'running' | 'completed' | 'stopped' | 'failed' type BashSession = { id: string + threadId?: string + turnId?: string command: string cwd: string shell: string @@ -40,7 +45,9 @@ type BashSession = { error?: string stopRequested: boolean finalized: boolean + detached: boolean exitWaiters: Set<() => void> + outputWriter?: BackgroundShellOutputWriter } type BashPayload = { @@ -49,8 +56,8 @@ type BashPayload = { shell: string exit_code: number | null output: string - full_output_path: string | null - truncation: null | { + full_output_path?: string | null + truncation?: null | { total_lines: number output_lines: number total_bytes: number @@ -66,6 +73,7 @@ type BashPayload = { partial?: boolean stop_sent?: boolean error?: string + output_file?: string } const bashSessions = new Map() @@ -241,8 +249,24 @@ function sleep(ms: number): Promise { return new Promise((resolve) => setTimeout(resolve, ms)) } +const SESSION_ID_ALPHABET = 'abcdefghijklmnopqrstuvwxyz0123456789' +const SESSION_ID_LENGTH = 8 +const SESSION_ID_PATTERN = /^[a-z0-9]{8}$/ + function nextSessionId(): string { - return `bash_${randomUUID().replace(/-/g, '').slice(0, 12)}` + for (let attempt = 0; attempt < 64; attempt++) { + const bytes = randomBytes(SESSION_ID_LENGTH) + let id = '' + for (let i = 0; i < SESSION_ID_LENGTH; i++) { + id += SESSION_ID_ALPHABET[bytes[i]! % SESSION_ID_ALPHABET.length] + } + if (!bashSessions.has(id)) return id + } + throw new Error('failed to allocate unique bash session id') +} + +export function isBashSessionId(value: unknown): value is string { + return typeof value === 'string' && SESSION_ID_PATTERN.test(value) } function textSliceFromSnapshot(snapshot: ReturnType): TextSlice { @@ -297,13 +321,43 @@ async function finalizeSessionOutput(session: BashSession): Promise { await sleep(SESSION_EXIT_FLUSH_MS) session.output.finish() await session.output.closeTempFile() + await session.outputWriter?.close() session.finalized = true } +async function backgroundSessionPayload( + session: BashSession, + options: { stopSent?: boolean } = {} +): Promise { + if (session.status !== 'running') { + await finalizeSessionOutput(session) + } + const fields = await backgroundShellOutputFields(session) + return { + command: session.command, + cwd: session.cwd, + shell: session.shell, + exit_code: session.exitCode, + output: fields.output, + output_file: fields.output_file, + session_id: session.id, + status: session.status, + started_at: session.startedAt, + ...(session.finishedAt ? { finished_at: session.finishedAt } : {}), + ...(typeof session.child.pid === 'number' ? { pid: session.child.pid } : {}), + ...(session.status === 'running' ? { partial: true } : {}), + ...(options.stopSent ? { stop_sent: true } : {}), + ...(session.error ? { error: session.error } : {}) + } +} + async function sessionPayload( session: BashSession, options: { stopSent?: boolean } = {} ): Promise { + if (session.outputWriter) { + return backgroundSessionPayload(session, options) + } if (session.status !== 'running') { await finalizeSessionOutput(session) } @@ -377,21 +431,132 @@ function normalizeYieldSeconds(value: unknown): number { return Math.max(1, Math.min(MAX_BASH_YIELD_SECONDS, raw)) } +function recordFromSession( + session: BashSession, + output: string, + truncated?: boolean, + detached = false, + outputFilePath?: string +): BackgroundShellRecordInput { + return { + id: session.id, + threadId: session.threadId ?? '', + turnId: session.turnId ?? '', + command: session.command, + cwd: session.cwd, + shell: session.shell, + status: session.status, + startedAt: session.startedAt, + ...(session.finishedAt ? { finishedAt: session.finishedAt } : {}), + exitCode: session.exitCode, + output, + ...(truncated ? { outputTruncated: true } : {}), + ...(outputFilePath ? { outputFilePath } : {}), + ...(session.error ? { error: session.error } : {}), + detached + } +} + +async function backgroundShellOutputFields(session: BashSession): Promise<{ + output: string + output_truncated: boolean + output_total_chars: number + output_file: string +}> { + const writer = session.outputWriter + if (!writer) { + return { + output: '', + output_truncated: false, + output_total_chars: 0, + output_file: '' + } + } + const fields = await writer.buildReturnFields() + return { + output: fields.summary, + output_truncated: fields.truncated, + output_total_chars: fields.totalChars, + output_file: fields.output_file + } +} + +async function recordFromBackgroundSession(session: BashSession, detached: boolean): Promise { + const fields = await backgroundShellOutputFields(session) + return recordFromSession( + session, + fields.output, + fields.output_truncated, + detached, + fields.output_file + ) +} + function sessionById(sessionId: unknown): BashSession | null { const id = typeof sessionId === 'string' ? sessionId.trim() : '' return id ? bashSessions.get(id) ?? null : null } -async function startBashSession( +export async function stopBashSessionById(sessionId: string): Promise { + const session = sessionById(sessionId) + if (!session || session.status !== 'running') return false + stopSession(session) + await waitForSessionExitOrDelay(session, STOP_GRACE_MS) + return session.status !== 'running' +} + +export async function readBashSessionPayload(sessionId: string): Promise { + const session = sessionById(sessionId) + if (!session) return null + return sessionPayload(session) +} + +export async function listBashSessionRecords(threadId?: string): Promise { + const records: BackgroundShellRecordInput[] = [] + for (const session of bashSessions.values()) { + if (threadId && session.threadId !== threadId) continue + records.push(await recordFromBackgroundSession(session, session.detached)) + } + return records.sort((a, b) => b.startedAt.localeCompare(a.startedAt)) +} + +export async function pollBashSession(sessionId: string, yieldSeconds: number): Promise { + const session = sessionById(sessionId) + if (!session) return null + await waitForSessionExitOrDelay(session, normalizeYieldSeconds(yieldSeconds) * 1000) + return sessionPayload(session) +} + +export async function writeBashSessionStdin( + sessionId: string, + input: string, + yieldSeconds: number +): Promise { + const session = sessionById(sessionId) + if (!session) return null + if (session.status !== 'running') return sessionPayload(session) + session.child.stdin.write(input) + await waitForSessionExitOrDelay(session, normalizeYieldSeconds(yieldSeconds) * 1000) + return sessionPayload(session) +} + +async function startBackgroundBashSession( input: { command: string cwd: string + threadId: string + turnId: string signal: AbortSignal timeoutSeconds: number - yieldSeconds: number + detached: boolean + dataDir?: string }, + hooks: BashLocalToolOptions['backgroundShell'], onUpdate?: (update: { output: unknown; isError?: boolean }) => Promise | void ): Promise<{ payload: BashPayload; isError?: boolean }> { + if (!input.dataDir?.trim()) { + throw new Error('background shell sessions require runtime dataDir') + } await mkdir(input.cwd, { recursive: true }) const shellRuntime = shellRuntimeInfo() const child = spawn(shellRuntime.shell, shellCommandArgs(shellRuntime, input.command), { @@ -401,22 +566,38 @@ async function startBashSession( stdio: ['pipe', 'pipe', 'pipe'], windowsHide: true }) + const sessionId = nextSessionId() + const outputWriter = new BackgroundShellOutputWriter(input.dataDir, input.threadId, sessionId) + await outputWriter.open() const session: BashSession = { - id: nextSessionId(), + id: sessionId, + threadId: input.threadId, + turnId: input.turnId, command: input.command, cwd: input.cwd, shell: shellRuntime.name, child, output: createOutputAccumulator(), + outputWriter, startedAt: new Date().toISOString(), exitCode: null, status: 'running', stopRequested: false, finalized: false, + detached: input.detached, exitWaiters: new Set() } bashSessions.set(session.id, session) + const notifyUpdated = async () => { + if (!hooks) return + await hooks.onSessionUpdated?.(await recordFromBackgroundSession(session, input.detached)) + } + const notifySettled = async () => { + if (!hooks) return + await hooks.onSessionSettled?.(await recordFromBackgroundSession(session, input.detached)) + } + let updateDirty = false let updateTimer: NodeJS.Timeout | undefined let lastUpdateAt = 0 @@ -425,7 +606,9 @@ async function startBashSession( if (!liveUpdates || !onUpdate || !updateDirty) return updateDirty = false lastUpdateAt = Date.now() - await onUpdate({ output: await sessionPayload(session) }) + const payload = await sessionPayload(session) + await onUpdate({ output: payload }) + void notifyUpdated() } const scheduleUpdate = () => { if (!liveUpdates || !onUpdate) return @@ -443,49 +626,38 @@ async function startBashSession( } const handleData = (chunk: Buffer | string) => { if (session.finalized) return - session.output.append(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk)) + const buffer = Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk) + session.output.append(buffer) + session.outputWriter?.append(buffer) scheduleUpdate() } child.stdout.on('data', handleData) child.stderr.on('data', handleData) child.once('error', (error) => { settleSession(session, 'failed', null, error.message) + void notifySettled() }) child.once('exit', (code) => { settleSession(session, session.stopRequested ? 'stopped' : 'completed', code) + void notifySettled() }) - const onAbort = () => stopSession(session) - input.signal.addEventListener('abort', onAbort, { once: true }) - const timeoutMs = input.timeoutSeconds * 1000 - const yieldMs = Math.min(input.yieldSeconds * 1000, timeoutMs) - const exited = await waitForSessionExitOrDelay(session, yieldMs) - input.signal.removeEventListener('abort', onAbort) - if (updateTimer) clearTimeout(updateTimer) - - if (input.signal.aborted) { - liveUpdates = false - stopSession(session) - throw new Error('command aborted') - } - if (!exited && timeoutMs <= yieldMs) { - liveUpdates = false - stopSession(session) - await waitForSessionExitOrDelay(session, STOP_GRACE_MS) - throw new Error(`command timed out after ${input.timeoutSeconds} seconds`) - } - - if (exited) { - await emitUpdate() - liveUpdates = false - const payload = await sessionPayload(session) - if (session.status === 'failed') return { payload, isError: true } - return { payload, isError: session.exitCode !== null && session.exitCode !== 0 } + const initialPayload = await sessionPayload(session) + await hooks?.onSessionStarted?.(await recordFromBackgroundSession(session, input.detached)) + + if (input.detached) { + const timeoutMs = input.timeoutSeconds * 1000 + const timeoutTimer = setTimeout(() => { + if (session.status !== 'running') return + stopSession(session) + }, timeoutMs) + timeoutTimer.unref?.() + child.once('exit', () => clearTimeout(timeoutTimer)) + child.once('error', () => clearTimeout(timeoutTimer)) + return { payload: initialPayload } } - await emitUpdate() - liveUpdates = false - return { payload: await sessionPayload(session) } + throw new Error('startBackgroundBashSession requires detached=true') } function appendTruncationNotice(text: string, truncated: TextSlice, mode: TruncateMode): string { @@ -499,22 +671,18 @@ function appendTruncationNotice(text: string, truncated: TextSlice, mode: Trunca export function createBashLocalTool(options: BashLocalToolOptions = {}): LocalTool { const bashOps = options.operations + const shellHooks = options.backgroundShell + const backgroundShellDataDir = options.backgroundShellDataDir const shellRuntime = shellRuntimeInfo() return LocalToolHost.defineTool({ name: 'bash', - description: `Execute a shell command in the workspace using the host platform shell. Current shell: ${shellRuntime.name}. Use ${shellRuntime.syntax} syntax. Return combined stdout and stderr. Long-running commands return a session_id; use action="poll" to block up to yield_seconds (default ${DEFAULT_BASH_YIELD_SECONDS}s, max ${MAX_BASH_YIELD_SECONDS}s) waiting for more output or process exit, action="write" with input to send stdin, or action="stop" to terminate the session.`, + description: `Execute a shell command in the workspace using the host platform shell. Current shell: ${shellRuntime.name}. Use ${shellRuntime.syntax} syntax. Return combined stdout and stderr. Runs synchronously by default (background defaults to false). Set background=true to start a detached session that keeps running after the turn ends; the tool assigns an 8-character session_id in the response. Use the background_shell tool to list, read, poll, write, or stop background sessions.`, inputSchema: { type: 'object', properties: { command: { type: 'string' }, timeout: { type: 'number' }, - yield_seconds: { type: 'number' }, - action: { - type: 'string', - enum: ['run', 'poll', 'write', 'stop'] - }, - session_id: { type: 'string' }, - input: { type: 'string' } + background: { type: 'boolean', default: false } }, required: [], additionalProperties: false @@ -522,53 +690,34 @@ export function createBashLocalTool(options: BashLocalToolOptions = {}): LocalTo policy: 'on-request', toolKind: 'command_execution', execute: async (args, context, onUpdate) => withToolBoundary(async () => { - const action = typeof args.action === 'string' ? args.action.trim() : '' - if (action && action !== 'run') { - if (action !== 'poll' && action !== 'write' && action !== 'stop') { - return { output: { error: `unsupported bash session action: ${action}` }, isError: true } - } - const session = sessionById(args.session_id) - if (!session) { - return { output: { error: 'bash session not found', session_id: args.session_id ?? null }, isError: true } - } - if (action === 'write') { - if (session.status !== 'running') { - return { output: await sessionPayload(session), isError: true } - } - const input = typeof args.input === 'string' ? args.input : '' - session.child.stdin.write(input) - await waitForSessionExitOrDelay(session, normalizeYieldSeconds(args.yield_seconds) * 1000) - const payload = await sessionPayload(session) - return { output: payload, isError: payload.status === 'failed' } - } - if (action === 'stop') { - stopSession(session) - await waitForSessionExitOrDelay(session, STOP_GRACE_MS) - const payload = await sessionPayload(session, { stopSent: true }) - return { output: payload, isError: session.status === 'running' || session.status === 'failed' } - } - await waitForSessionExitOrDelay(session, normalizeYieldSeconds(args.yield_seconds) * 1000) - return { output: await sessionPayload(session), isError: session.status === 'failed' } - } - const command = typeof args.command === 'string' ? args.command : '' if (!command.trim()) return { output: { error: 'command is required' }, isError: true } const timeout = normalizePositiveInteger( args.timeout, options.defaultTimeoutSeconds ?? DEFAULT_BASH_TIMEOUT_SECONDS ) - const yieldSeconds = normalizeYieldSeconds(args.yield_seconds) + const background = args.background === true const cwd = workspaceRoot(context.workspace) try { - if (!bashOps?.exec) { - const result = await startBashSession( + if (background) { + if (bashOps?.exec) { + return { + output: { error: 'background sessions are not supported with custom bash exec operations' }, + isError: true + } + } + const result = await startBackgroundBashSession( { command, cwd, + threadId: context.threadId, + turnId: context.turnId, signal: context.abortSignal, timeoutSeconds: timeout, - yieldSeconds + detached: true, + dataDir: backgroundShellDataDir }, + shellHooks, onUpdate ) return { @@ -582,7 +731,7 @@ export function createBashLocalTool(options: BashLocalToolOptions = {}): LocalTo context.abortSignal, timeout, onUpdate, - bashOps.exec + bashOps?.exec ) const payload = resultPayload({ command, diff --git a/kun/src/adapters/tool/builtin-tool-types.ts b/kun/src/adapters/tool/builtin-tool-types.ts index 6436d1405..24f0f1873 100644 --- a/kun/src/adapters/tool/builtin-tool-types.ts +++ b/kun/src/adapters/tool/builtin-tool-types.ts @@ -120,9 +120,36 @@ export type ReadLocalToolOptions = { operations?: ReadLocalToolOperations } +export type BackgroundShellRecordInput = { + id: string + threadId: string + turnId: string + command: string + cwd: string + shell: string + status: 'running' | 'completed' | 'stopped' | 'failed' + startedAt: string + finishedAt?: string + exitCode: number | null + output: string + outputTruncated?: boolean + outputFilePath?: string + error?: string + detached: boolean +} + +export type BackgroundShellHooks = { + onSessionStarted?: (record: BackgroundShellRecordInput) => void | Promise + onSessionUpdated?: (record: BackgroundShellRecordInput) => void | Promise + onSessionSettled?: (record: BackgroundShellRecordInput) => void | Promise + isDetachedSession?: (sessionId: string) => boolean +} + export type BashLocalToolOptions = { defaultTimeoutSeconds?: number operations?: BashLocalToolOperations + backgroundShell?: BackgroundShellHooks + backgroundShellDataDir?: string } export type WriteLocalToolOptions = { diff --git a/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts b/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts index e2fd5ff01..0a360b6c7 100644 --- a/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts +++ b/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts @@ -4,6 +4,7 @@ import { join } from 'node:path' import { afterEach, beforeEach, describe, expect, it } from 'vitest' import type { ToolHostContext } from '../../ports/tool-host.js' import { resolveWorkspacePath } from './builtin-tool-utils.js' +import { resolveBackgroundShellOutputPaths } from '../../services/background-shell-output.js' function context(workspace: string): ToolHostContext { return { @@ -114,6 +115,20 @@ describe('resolveWorkspacePath sandbox mode', () => { ).rejects.toThrow(/escapes the workspace root/) }) + it('allows background shell output files outside the workspace in read-only sandbox', async () => { + const runtimeDataDir = join(base, 'runtime-data') + const { outputFilePath } = resolveBackgroundShellOutputPaths(runtimeDataDir, 'thr_1', 'abcd1234') + await mkdir(join(runtimeDataDir, 'threads', 'thr_1', 'background-shells'), { recursive: true }) + await writeFile(outputFilePath, 'full log') + const resolved = await resolveWorkspacePath(outputFilePath, { + ...context(workspace), + sandboxMode: 'read-only', + runtimeDataDir, + threadId: 'thr_1' + }) + expect(resolved.absolutePath).toBe(outputFilePath) + }) + it('does not require the workspace root to exist under danger-full-access', async () => { const missingWs = join(base, 'does-not-exist') const target = join(outside, 'sys.txt') diff --git a/kun/src/adapters/tool/builtin-tool-utils.ts b/kun/src/adapters/tool/builtin-tool-utils.ts index e60445ab0..c2d63f6a1 100644 --- a/kun/src/adapters/tool/builtin-tool-utils.ts +++ b/kun/src/adapters/tool/builtin-tool-utils.ts @@ -4,6 +4,7 @@ import { spawn, spawnSync, type ChildProcess } from 'node:child_process' import { basename, dirname, isAbsolute, join, relative, resolve, sep, win32 } from 'node:path' import type { ToolHostContext } from '../../ports/tool-host.js' import { effectiveSandboxMode } from './sandbox-policy.js' +import { isBackgroundShellOutputPath } from '../../services/background-shell-output.js' import type { EditInstruction, FsStats, @@ -65,6 +66,18 @@ export async function resolveWorkspacePath(inputPath: string, context: ToolHostC }> { const root = workspaceRoot(context.workspace) const lexicalAbsolutePath = isAbsolute(inputPath) ? resolve(inputPath) : resolve(root, inputPath) + if ( + isBackgroundShellOutputPath(lexicalAbsolutePath, { + runtimeDataDir: context.runtimeDataDir, + threadId: context.threadId + }) + ) { + return { + workspaceRoot: root, + absolutePath: resolve(lexicalAbsolutePath), + relativePath: relative(root, resolve(lexicalAbsolutePath)) || '.' + } + } // In full-access mode the workspace boundary is not enforced: the user has // explicitly opted into reaching paths outside the workspace. This mirrors // canWritePath(), which already permits writes anywhere under diff --git a/kun/src/adapters/tool/mcp-tool-provider.test.ts b/kun/src/adapters/tool/mcp-tool-provider.test.ts new file mode 100644 index 000000000..a30aecc35 --- /dev/null +++ b/kun/src/adapters/tool/mcp-tool-provider.test.ts @@ -0,0 +1,143 @@ +import { describe, expect, it, vi } from 'vitest' +import { McpCapabilityConfig, type McpServerConfig } from '../../contracts/capabilities.js' +import type { ToolHostContext } from '../../ports/tool-host.js' +import { + buildMcpToolProviders, + type McpClientLifecycleHandlers, + type McpClientLike, + type McpToolDescriptor +} from './mcp-tool-provider.js' + +class MockMcpClient implements McpClientLike { + lifecycle: McpClientLifecycleHandlers = {} + close = vi.fn(async () => undefined) + + constructor( + private readonly tools: McpToolDescriptor[], + readonly callTool: McpClientLike['callTool'] + ) {} + + async listTools(): Promise<{ tools: McpToolDescriptor[] }> { + return { tools: this.tools } + } + + setLifecycleHandlers(handlers: McpClientLifecycleHandlers): void { + this.lifecycle = handlers + } +} + +const server: McpServerConfig = { + enabled: true, + transport: 'streamable-http', + url: 'http://127.0.0.1:39999/mcp', + headers: {}, + args: [], + env: {}, + workspaceRoots: [], + trustScope: 'user', + trustedWorkspaceRoots: [], + timeoutMs: 1_000 +} + +const config = McpCapabilityConfig.parse({ + enabled: true, + servers: { docs: server }, + search: { enabled: false } +}) + +const context: ToolHostContext = { + threadId: 'thread_test', + turnId: 'turn_test', + workspace: '/workspace', + approvalPolicy: 'auto', + abortSignal: new AbortController().signal, + awaitApproval: vi.fn() +} + +const descriptor: McpToolDescriptor = { + name: 'lookup', + description: 'Lookup docs', + inputSchema: { type: 'object', properties: {} }, + annotations: { readOnlyHint: true } +} + +describe('mcp tool provider reliability', () => { + it('shares one reconnect across concurrent tool calls after a transport failure', async () => { + const first = new MockMcpClient([descriptor], vi.fn(async () => { + throw new Error('socket connection reset') + })) + const second = new MockMcpClient([descriptor], vi.fn(async () => ({ ok: true }))) + const clientFactory = vi.fn() + .mockResolvedValueOnce(first) + .mockResolvedValueOnce(second) + + const built = await buildMcpToolProviders(config, { + clientFactory, + nowIso: () => '2026-06-29T00:00:00.000Z' + }) + const tool = built.providers[0]?.tools[0] + expect(tool?.name).toBe('mcp_docs_lookup') + + const [one, two] = await Promise.all([ + tool!.execute({}, context), + tool!.execute({}, context) + ]) + + expect(clientFactory).toHaveBeenCalledTimes(2) + expect(first.close).toHaveBeenCalledTimes(1) + expect(second.callTool).toHaveBeenCalledTimes(2) + expect(one).toMatchObject({ output: { result: { ok: true } } }) + expect(two).toMatchObject({ output: { result: { ok: true } } }) + expect(built.diagnostics[0]).toMatchObject({ + id: 'docs', + status: 'connected', + available: true, + reconnectAttempts: 1 + }) + }) + + it('marks lifecycle transport close as offline and reconnects on the next call', async () => { + const first = new MockMcpClient([descriptor], vi.fn(async () => ({ stale: true }))) + const second = new MockMcpClient([descriptor], vi.fn(async () => ({ fresh: true }))) + const clientFactory = vi.fn() + .mockResolvedValueOnce(first) + .mockResolvedValueOnce(second) + + const built = await buildMcpToolProviders(config, { clientFactory }) + first.lifecycle.onClose?.() + expect(built.diagnostics[0]).toMatchObject({ + status: 'error', + available: false, + lastError: 'MCP transport closed' + }) + + const tool = built.providers[0]!.tools[0]! + const result = await tool.execute({}, context) + + expect(result).toMatchObject({ output: { result: { fresh: true } } }) + expect(clientFactory).toHaveBeenCalledTimes(2) + expect(built.diagnostics[0]).toMatchObject({ + status: 'connected', + available: true, + reconnectAttempts: 1 + }) + }) + + it('does not mark deterministic tool errors as offline', async () => { + const client = new MockMcpClient([descriptor], vi.fn(async () => { + throw new Error('Invalid arguments: query is required') + })) + const built = await buildMcpToolProviders(config, { + clientFactory: vi.fn(async () => client) + }) + const tool = built.providers[0]!.tools[0]! + + await expect(tool.execute({}, context)).rejects.toThrow('Invalid arguments') + + expect(built.diagnostics[0]).toMatchObject({ + status: 'connected', + available: true, + lastError: 'Invalid arguments: query is required' + }) + }) +}) diff --git a/kun/src/adapters/tool/mcp-tool-provider.ts b/kun/src/adapters/tool/mcp-tool-provider.ts index 046fb869b..96cc53569 100644 --- a/kun/src/adapters/tool/mcp-tool-provider.ts +++ b/kun/src/adapters/tool/mcp-tool-provider.ts @@ -50,6 +50,12 @@ export type McpClientLike = { options?: { signal?: AbortSignal; timeout?: number } ): Promise close(): Promise + setLifecycleHandlers?(handlers: McpClientLifecycleHandlers): void +} + +export type McpClientLifecycleHandlers = { + onError?: (error: Error) => void + onClose?: () => void } export type McpServerDiagnostic = { @@ -58,11 +64,15 @@ export type McpServerDiagnostic = { transport: McpServerConfig['transport'] trustScope: McpServerConfig['trustScope'] available: boolean - status: 'disabled' | 'connected' | 'error' + status: 'disabled' | 'connected' | 'reconnecting' | 'error' toolCount: number catalogFingerprint?: string catalogDrift?: boolean lastConnectedAt?: string + lastDisconnectedAt?: string + lastReconnectAt?: string + nextReconnectAt?: string + reconnectAttempts?: number lastError?: string } @@ -131,7 +141,16 @@ type McpConnectionState = { catalogFingerprint?: string catalogDrift?: boolean lastConnectedAt?: string + lastDisconnectedAt?: string + lastReconnectAt?: string + nextReconnectAt?: string + reconnectAttempts: number + reconnectBackoffMs: number + reconnectPromise?: Promise lastError?: string + status: 'connected' | 'reconnecting' | 'error' + diagnostic?: McpServerDiagnostic + intentionallyClosing?: boolean } export async function buildMcpToolProviders( @@ -199,8 +218,12 @@ export async function buildMcpToolProviders( client, clientFactory, nowIso, + reconnectAttempts: 0, + reconnectBackoffMs: DEFAULT_MCP_RECONNECT_BASE_DELAY_MS, + status: 'connected', lastConnectedAt: nowIso() } + attachMcpClientLifecycle(state) const listed = await refreshMcpConnectionCatalog(state) return { state, listed } })() @@ -240,7 +263,7 @@ export async function buildMcpToolProviders( available: true, tools }) - diagnostics.push(serverDiagnostic(state, 'connected', tools.length)) + diagnostics.push(syncMcpDiagnostic(state, 'connected', tools.length)) } const connectedServers = diagnostics.filter((diagnostic) => diagnostic.status === 'connected').length @@ -317,7 +340,7 @@ export async function buildMcpToolProviders( }, close: async () => { reconnectAborted = true - await Promise.all(connected.map((state) => state.client.close().catch(() => undefined))) + await Promise.all(connected.map((state) => closeMcpClient(state))) } } } @@ -366,6 +389,7 @@ async function reconnectFailedMcpServer( ): Promise { for (let attempt = 1; attempt <= maxAttempts; attempt += 1) { if (params.isAborted()) return + updateFailedServerDiagnostic(params.diagnostics, failed, 'reconnecting', attempt) await params.delay(Math.min(maxDelayMs, baseDelayMs * 2 ** (attempt - 1))) if (params.isAborted()) return try { @@ -376,17 +400,27 @@ async function reconnectFailedMcpServer( client, clientFactory: params.clientFactory, nowIso: params.nowIso, + reconnectAttempts: 0, + reconnectBackoffMs: DEFAULT_MCP_RECONNECT_BASE_DELAY_MS, + status: 'connected', lastConnectedAt: params.nowIso() } + attachMcpClientLifecycle(state) const listed = await refreshMcpConnectionCatalog(state) if (params.isAborted()) { - await client.close().catch(() => undefined) + await closeMcpClient(state) return } registerLateMcpConnection(params, state, listed) return - } catch { - // Leave the diagnostic as "error" and try again until attempts run out. + } catch (error) { + updateFailedServerDiagnostic( + params.diagnostics, + failed, + 'error', + attempt, + formatMcpConnectionError(error, failed.server) + ) } } } @@ -416,12 +450,33 @@ function registerLateMcpConnection( // flips to connected below so the UI stops showing the server as failed. } } - const diagnostic = serverDiagnostic(state, 'connected', tools.length) + const diagnostic = syncMcpDiagnostic(state, 'connected', tools.length) const index = params.diagnostics.findIndex((entry) => entry.id === state.serverId) if (index >= 0) params.diagnostics[index] = diagnostic else params.diagnostics.push(diagnostic) } +function updateFailedServerDiagnostic( + diagnostics: McpServerDiagnostic[], + failed: FailedMcpServer, + status: Extract, + attempt: number, + lastError?: string +): void { + const index = diagnostics.findIndex((entry) => entry.id === failed.serverId) + const previous = index >= 0 ? diagnostics[index] : undefined + const next: McpServerDiagnostic = { + ...(previous ?? serverDiagnostic({ serverId: failed.serverId, server: failed.server }, 'error', 0)), + status, + available: false, + reconnectAttempts: attempt, + lastReconnectAt: new Date().toISOString(), + ...(lastError ? { lastError: redactSecretText(lastError) } : {}) + } + if (index >= 0) diagnostics[index] = next + else diagnostics.push(next) +} + function defaultMcpReconnectDelay(ms: number): Promise { return new Promise((resolve) => { const timer = setTimeout(resolve, ms) @@ -459,6 +514,16 @@ function workspaceMatchesRoots(workspace: string, roots: readonly string[]): boo async function createSdkMcpClient(serverId: string, server: McpServerConfig): Promise { const client = new Client({ name: `kun-${serverId}`, version: '0.1.0' }) + // Observe transport-level failures explicitly. The SDK routes a dropped SSE + // stream and exhausted background reconnects to `onerror`; with no handler + // they are silently swallowed, which hides real outages from the logs and (on + // some SDK/runtime versions) lets the rejection escape as unhandled. Handling + // it here keeps a streamable-http disconnect from destabilizing the runtime + // (#639) — the per-call reconnect in callMcpToolWithReconnect still recovers + // the connection on the next tool use. + client.onerror = (error) => { + process.stderr.write(`kun mcp[${serverId}]: transport error: ${redactSecretText(errorMessage(error))}\n`) + } const transport = createTransport(server) await client.connect(transport, { timeout: server.timeoutMs }) return { @@ -470,7 +535,11 @@ async function createSdkMcpClient(serverId: string, server: McpServerConfig): Pr }) }, callTool: (input, options) => client.callTool(input, undefined, options), - close: () => client.close() + close: () => client.close(), + setLifecycleHandlers: (handlers) => { + client.onerror = handlers.onError + client.onclose = handlers.onClose + } } } @@ -539,6 +608,17 @@ function createMcpLocalTool( isError: true } } + if (state.status === 'error' && !canAttemptMcpReconnect(state)) { + return { + output: { + error: mcpReconnectCooldownMessage(state), + serverId: state.serverId, + status: state.status, + nextReconnectAt: state.nextReconnectAt + }, + isError: true + } + } const result = await callMcpToolWithReconnect( state, { name: descriptor.name, arguments: args }, @@ -591,6 +671,7 @@ async function refreshMcpConnectionCatalog(state: McpConnectionState): Promise { try { + await ensureMcpConnectionForCall(state, signal) return await state.client.callTool(input, { signal, timeout }) } catch (error) { - state.lastError = redactSecretText(errorMessage(error)) if (signal?.aborted) throw error // Deterministic server-side failures (validation errors, bad // arguments) come back identically on a fresh connection; tearing // down a healthy session for them just loses server state. Only // transport-looking failures earn a reconnect + retry. - if (!looksLikeMcpTransportError(error)) throw error - const client = await reconnectMcpConnection(state) + if (!looksLikeMcpTransportError(error)) { + state.lastError = redactSecretText(errorMessage(error)) + syncMcpDiagnostic(state) + throw error + } + markMcpConnectionError(state, error) + const client = await reconnectMcpConnection(state, signal) return client.callTool(input, { signal, timeout }) } } @@ -657,15 +743,112 @@ async function raceStartupTimeout( } } -async function reconnectMcpConnection(state: McpConnectionState): Promise { - await state.client.close().catch(() => undefined) +async function ensureMcpConnectionForCall( + state: McpConnectionState, + signal: AbortSignal | undefined +): Promise { + if (state.status === 'connected') return + await reconnectMcpConnection(state, signal) +} + +async function reconnectMcpConnection( + state: McpConnectionState, + signal?: AbortSignal +): Promise { + if (state.reconnectPromise) return state.reconnectPromise + if (!canAttemptMcpReconnect(state)) { + throw new Error(mcpReconnectCooldownMessage(state)) + } + state.status = 'reconnecting' + state.reconnectAttempts += 1 + state.lastReconnectAt = state.nowIso() + syncMcpDiagnostic(state, 'reconnecting') + state.reconnectPromise = reconnectMcpConnectionOnce(state, signal) + .catch((error) => { + markMcpReconnectFailed(state, error) + throw error + }) + .finally(() => { + state.reconnectPromise = undefined + }) + return state.reconnectPromise +} + +async function reconnectMcpConnectionOnce( + state: McpConnectionState, + signal?: AbortSignal +): Promise { + if (signal?.aborted) throw new Error('MCP reconnect aborted') + await closeMcpClient(state) + if (signal?.aborted) throw new Error('MCP reconnect aborted') const client = await state.clientFactory(state.serverId, state.server) state.client = client + state.status = 'connected' state.lastConnectedAt = state.nowIso() state.lastError = undefined + state.nextReconnectAt = undefined + state.reconnectBackoffMs = DEFAULT_MCP_RECONNECT_BASE_DELAY_MS + attachMcpClientLifecycle(state) + await refreshMcpConnectionCatalog(state) + syncMcpDiagnostic(state, 'connected') return client } +async function closeMcpClient(state: McpConnectionState): Promise { + state.intentionallyClosing = true + try { + await state.client.close().catch(() => undefined) + } finally { + state.intentionallyClosing = false + } +} + +function attachMcpClientLifecycle(state: McpConnectionState): void { + state.client.setLifecycleHandlers?.({ + onError: (error) => { + if (looksLikeMcpTransportError(error)) { + markMcpConnectionError(state, error) + } else { + state.lastError = redactSecretText(errorMessage(error)) + syncMcpDiagnostic(state) + } + }, + onClose: () => { + if (state.intentionallyClosing) return + markMcpConnectionError(state, new Error('MCP transport closed')) + } + }) +} + +function markMcpConnectionError(state: McpConnectionState, error: unknown): void { + if (state.intentionallyClosing) return + state.status = 'error' + state.lastError = redactSecretText(errorMessage(error)) + state.lastDisconnectedAt = state.nowIso() + syncMcpDiagnostic(state, 'error') +} + +function markMcpReconnectFailed(state: McpConnectionState, error: unknown): void { + state.status = 'error' + state.lastError = redactSecretText(errorMessage(error)) + state.lastDisconnectedAt = state.nowIso() + const nextDelay = state.reconnectBackoffMs + state.reconnectBackoffMs = Math.min(DEFAULT_MCP_RECONNECT_MAX_DELAY_MS, nextDelay * 2) + state.nextReconnectAt = new Date(Date.now() + nextDelay).toISOString() + syncMcpDiagnostic(state, 'error') +} + +function canAttemptMcpReconnect(state: McpConnectionState): boolean { + if (!state.nextReconnectAt) return true + return Date.now() >= Date.parse(state.nextReconnectAt) +} + +function mcpReconnectCooldownMessage(state: McpConnectionState): string { + return state.nextReconnectAt + ? `MCP server ${state.serverId} is offline; reconnect is cooling down until ${state.nextReconnectAt}. Last error: ${state.lastError ?? 'unknown error'}` + : `MCP server ${state.serverId} is offline. Last error: ${state.lastError ?? 'unknown error'}` +} + function shouldUseMcpSearch(config: NonNullable, toolCount: number): boolean { if (!config.enabled) return false if (config.mode === 'direct') return false @@ -701,6 +884,39 @@ function serverDiagnostic( } } +function syncMcpDiagnostic( + state: McpConnectionState, + status: McpServerDiagnostic['status'] = state.status, + toolCount = state.diagnostic?.toolCount ?? 0 +): McpServerDiagnostic { + const diagnostic: McpServerDiagnostic = { + id: state.serverId, + enabled: state.server.enabled, + transport: state.server.transport, + trustScope: state.server.trustScope, + available: status === 'connected', + status, + toolCount, + ...(state.catalogFingerprint ? { catalogFingerprint: state.catalogFingerprint } : {}), + ...(state.catalogDrift !== undefined ? { catalogDrift: state.catalogDrift } : {}), + ...(state.lastConnectedAt ? { lastConnectedAt: state.lastConnectedAt } : {}), + ...(state.lastDisconnectedAt ? { lastDisconnectedAt: state.lastDisconnectedAt } : {}), + ...(state.lastReconnectAt ? { lastReconnectAt: state.lastReconnectAt } : {}), + ...(state.nextReconnectAt ? { nextReconnectAt: state.nextReconnectAt } : {}), + ...(state.reconnectAttempts > 0 ? { reconnectAttempts: state.reconnectAttempts } : {}), + ...(state.lastError ? { lastError: redactSecretText(state.lastError) } : {}) + } + if (!state.diagnostic) { + state.diagnostic = diagnostic + return diagnostic + } + for (const key of Object.keys(state.diagnostic) as Array) { + delete (state.diagnostic as Record)[key] + } + Object.assign(state.diagnostic, diagnostic) + return state.diagnostic +} + function catalogFingerprint(values: readonly string[]): string { return createHash('sha256') .update(JSON.stringify([...values].sort())) diff --git a/kun/src/adapters/tool/read-tracker.test.ts b/kun/src/adapters/tool/read-tracker.test.ts new file mode 100644 index 000000000..e25b908df --- /dev/null +++ b/kun/src/adapters/tool/read-tracker.test.ts @@ -0,0 +1,99 @@ +import { describe, expect, it } from 'vitest' +import { ReadTracker, normalizeReadTrackerOptions } from './read-tracker.js' +import type { ToolHostContext, ToolCallLike } from '../../ports/tool-host.js' + +function context(turnId: string, overrides: Partial = {}): ToolHostContext { + return { + threadId: 'thread_1', + turnId, + workspace: '/ws', + approvalPolicy: 'never', + abortSignal: new AbortController().signal, + awaitApproval: async () => 'allow' as const, + ...overrides + } +} + +function readResult(turnId: string, path: string, content: string): { + context: ToolHostContext + call: ToolCallLike + output: unknown +} { + return { + context: context(turnId), + call: { callId: `read_${turnId}`, toolName: 'read', arguments: { path } }, + output: { path, relative_path: path, content, truncated: false } + } +} + +function editCall(path: string, oldText: string): ToolCallLike { + return { callId: 'edit_1', toolName: 'edit', arguments: { path, oldText, newText: 'x' } } +} + +describe('ReadTracker cross-turn edits (#640)', () => { + it('allows an edit in a later turn when the oldText is still present in the cached read', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'const value = 42\n')) + + // Edit arrives in a *different* turn than the read — the common case the + // turnId guard used to reject, forcing a fallback to sed/bash. + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'const value = 42') + }) + + expect(verdict).toEqual({ ok: true }) + }) + + it('still blocks an edit for a file that was never read', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'const value = 42') + }) + + expect(verdict.ok).toBe(false) + if (!verdict.ok) expect(verdict.message).toContain('Read the current file contents') + }) + + it('still blocks a cross-turn edit when the oldText is not in the cached read', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'const value = 42\n')) + + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'const other = 99') + }) + + expect(verdict.ok).toBe(false) + if (!verdict.ok) expect(verdict.message).toContain('was not present in the latest read output') + }) + + it('allows a cross-turn multi-edit when every oldText fragment is present', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions(true)) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'alpha\nbeta\ngamma\n')) + + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: { + callId: 'edit_2', + toolName: 'edit', + arguments: { path: 'file.ts', edits: [{ oldText: 'alpha' }, { oldText: 'gamma' }] } + } + }) + + expect(verdict).toEqual({ ok: true }) + }) + + it('allows a cross-turn edit on a prior read when content checking is disabled', () => { + const tracker = new ReadTracker(normalizeReadTrackerOptions({ enabled: true, requireOldTextInRead: false })) + tracker.observeToolResult(readResult('turn_a', 'file.ts', 'const value = 42\n')) + + const verdict = tracker.validateBeforeTool({ + context: context('turn_b'), + call: editCall('file.ts', 'anything at all') + }) + + expect(verdict).toEqual({ ok: true }) + }) +}) diff --git a/kun/src/adapters/tool/read-tracker.ts b/kun/src/adapters/tool/read-tracker.ts index 883db32d9..8b6846e09 100644 --- a/kun/src/adapters/tool/read-tracker.ts +++ b/kun/src/adapters/tool/read-tracker.ts @@ -57,14 +57,14 @@ export class ReadTracker { 'Read the current file contents in this turn before editing so SEARCH text is based on fresh bytes.' } } - if (record.turnId !== input.context.turnId) { - return { - ok: false, - message: - `read-before-edit guard blocked edit for ${displayPath(rawPath, input.context.workspace)}. ` + - 'The previous read is from an earlier turn; read the file again before editing.' - } - } + // A read from an earlier turn still counts: agent responses routinely span + // multiple turns (a long reply, or tool results arriving as separate turn + // items), so read-in-turn-A then edit-in-turn-B is a legitimate sequence. + // Hard-blocking it forced a fallback to sed/bash, which mangles code (#640). + // Freshness is still enforced below — `requireOldTextInRead` checks the + // oldText fragments against the cached read content, and the edit's own + // fuzzy matching runs against the current bytes on disk, so a stale SEARCH + // string fails there with a clear error instead of corrupting the file. if (!this.options.requireOldTextInRead) return { ok: true } const missing = oldTextFragments(input.call.arguments).filter((fragment) => { if (!fragment.trim()) return false diff --git a/kun/src/benchmark/replay-benchmark.test.ts b/kun/src/benchmark/replay-benchmark.test.ts new file mode 100644 index 000000000..9bf69d261 --- /dev/null +++ b/kun/src/benchmark/replay-benchmark.test.ts @@ -0,0 +1,399 @@ +import { describe, expect, it } from 'vitest' +import type { RuntimeEvent } from '../contracts/events.js' +import { + compareReplayReports, + ReplaySuiteSchema, + runReplaySuite, + SseMessageDecoder, + summarizeReplayEvents, + summarizeReplayRuns, + type ObservedReplayEvent, + type ReplayReport, + type ReplayRunResult +} from './replay-benchmark.js' +import { buildRuntimeCapabilityManifest } from '../contracts/capabilities.js' + +const baseTimestamp = Date.parse('2026-06-29T00:00:00.000Z') + +function observed(event: RuntimeEvent, elapsedMs: number, delayMs = 10): ObservedReplayEvent { + return { + event, + elapsedMs, + receivedAtMs: Date.parse(event.timestamp) + delayMs + } +} + +function itemBase(kind: string) { + return { + kind, + id: `item_${kind}`, + turnId: 'turn_1', + threadId: 'thread_1', + role: kind === 'tool_result' ? 'tool' : 'assistant', + status: 'completed', + createdAt: '2026-06-29T00:00:00.000Z' + } +} + +describe('replay benchmark', () => { + it('decodes SSE messages across arbitrary chunks', () => { + const decoder = new SseMessageDecoder() + const payload = [ + 'id: 4', + 'event: turn_completed', + `data: ${JSON.stringify({ + kind: 'turn_completed', + seq: 4, + timestamp: '2026-06-29T00:00:00.000Z', + threadId: 'thread_1', + turnId: 'turn_1', + status: 'completed' + })}`, + '', + '' + ].join('\n') + + expect(decoder.push(payload.slice(0, 31))).toEqual([]) + expect(decoder.push(payload.slice(31))).toEqual([ + expect.objectContaining({ id: '4', event: 'turn_completed' }) + ]) + }) + + it('computes TTFT, tool, SSE, usage, and memory metrics from runtime events', () => { + const timestamp = (offset: number) => new Date(baseTimestamp + offset).toISOString() + const events: ObservedReplayEvent[] = [ + observed({ + kind: 'assistant_text_delta', + seq: 1, + timestamp: timestamp(100), + threadId: 'thread_1', + turnId: 'turn_1', + item: { ...itemBase('assistant_text'), kind: 'assistant_text', text: 'hello' } + } as RuntimeEvent, 120, 20), + observed({ + kind: 'tool_call_started', + seq: 2, + timestamp: timestamp(180), + threadId: 'thread_1', + turnId: 'turn_1', + item: { + ...itemBase('tool_call'), + kind: 'tool_call', + toolName: 'read', + callId: 'call_1', + toolKind: 'tool_call', + arguments: { path: 'README.md' } + } + } as RuntimeEvent, 200, 20), + observed({ + kind: 'tool_call_finished', + seq: 3, + timestamp: timestamp(430), + threadId: 'thread_1', + turnId: 'turn_1', + item: { + ...itemBase('tool_result'), + kind: 'tool_result', + toolName: 'read', + callId: 'call_1', + toolKind: 'tool_call', + output: { ok: true }, + isError: false + } + } as RuntimeEvent, 450, 20), + observed({ + kind: 'usage', + seq: 4, + timestamp: timestamp(500), + threadId: 'thread_1', + turnId: 'turn_1', + usage: { + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + cacheHitTokens: 60, + cacheMissTokens: 40, + cacheHitRate: 0.6, + cacheableTokenHitRate: 0.75, + totalInputTokenHitRate: 0.6, + turns: 1, + costUsd: 0.001 + } + }, 520, 20), + observed({ + kind: 'turn_completed', + seq: 5, + timestamp: timestamp(580), + threadId: 'thread_1', + turnId: 'turn_1', + status: 'completed' + }, 600, 20) + ] + + expect(summarizeReplayEvents(events, 600, 256 * 1024 * 1024)).toEqual({ + ttftMs: 120, + totalMs: 600, + assistantChars: 5, + eventCount: 5, + errorEvents: 0, + toolCalls: 1, + toolDurationMs: 250, + toolDurationP95Ms: 250, + sseDelayP50Ms: 20, + sseDelayP95Ms: 20, + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + cacheHitTokens: 60, + cacheMissTokens: 40, + cacheHitRate: 0.6, + cacheableTokenHitRate: 0.75, + totalInputTokenHitRate: 0.6, + costUsd: 0.001, + peakRssBytes: 256 * 1024 * 1024 + }) + }) + + it('aggregates reports and identifies material regressions', () => { + const baselineRun = replayRun('passed', 100, 1_000, 0.8) + const currentRun = replayRun('passed', 200, 1_800, 0.6) + const baseline = report([baselineRun], '2026-06-28T00:00:00.000Z') + const current = report([currentRun], '2026-06-29T00:00:00.000Z') + const comparison = compareReplayReports(current, baseline) + + expect(comparison.ttftP95MsDelta).toBe(100) + expect(comparison.totalP95MsDelta).toBe(800) + expect(comparison.cacheHitRateDelta).toBeCloseTo(-0.2) + expect(comparison.regressions).toEqual(expect.arrayContaining([ + expect.stringContaining('total latency'), + expect.stringContaining('cache hit rate') + ])) + }) + + it('rejects duplicate task ids before spending model tokens', () => { + expect(() => ReplaySuiteSchema.parse({ + version: 1, + name: 'duplicate-suite', + tasks: [ + { id: 'same', prompt: 'one' }, + { id: 'same', prompt: 'two' } + ] + })).toThrow('duplicate replay task id') + }) + + it('fails runs that do not use any required investigation tool', async () => { + const fetchImpl: typeof fetch = async (input, init = {}) => { + const url = new URL(String(input)) + if (url.pathname === '/v1/runtime/info') return jsonResponse(testRuntimeInfo()) + if (url.pathname === '/v1/threads' && init.method === 'POST') return jsonResponse({ id: 'thr_1' }, 201) + if (url.pathname === '/v1/threads/thr_1/turns' && init.method === 'POST') { + return jsonResponse({ threadId: 'thr_1', turnId: 'turn_1', userMessageItemId: 'item_user' }, 202) + } + if (url.pathname === '/v1/threads/thr_1/events') { + return sseResponse([ + { + kind: 'assistant_text_delta', + seq: 1, + timestamp: '2026-06-29T00:00:00.000Z', + threadId: 'thr_1', + turnId: 'turn_1', + item: { ...itemBase('assistant_text'), id: 'item_text', threadId: 'thr_1', turnId: 'turn_1', text: 'hello' } + } as RuntimeEvent, + { + kind: 'turn_completed', + seq: 2, + timestamp: '2026-06-29T00:00:00.010Z', + threadId: 'thr_1', + turnId: 'turn_1', + status: 'completed' + } + ]) + } + if (url.pathname === '/v1/threads/thr_1' && init.method === 'DELETE') { + return jsonResponse({ id: 'thr_1', deleted: true }) + } + return jsonResponse({ message: `unexpected ${init.method ?? 'GET'} ${url.pathname}` }, 404) + } + + const report = await runReplaySuite({ + version: 1, + name: 'tool-required-suite', + tasks: [{ + id: 'no-tool', + prompt: 'answer from memory', + expect: { requiredAnyTools: ['read', 'grep', 'find', 'ls'] } + }] + }, { + baseUrl: 'http://127.0.0.1:18899', + token: 'token', + workspace: '/tmp/workspace', + fetchImpl + }) + + expect(report.runs[0]?.status).toBe('failed') + expect(report.runs[0]?.failureReasons).toContain('none of the required tools were used: read, grep, find, ls') + }) + + it('interrupts timed-out turns before deleting replay threads', async () => { + const calls: Array<{ method: string; path: string }> = [] + const fetchImpl: typeof fetch = async (input, init = {}) => { + const url = new URL(String(input)) + calls.push({ method: init.method ?? 'GET', path: `${url.pathname}${url.search}` }) + if (url.pathname === '/v1/runtime/info') return jsonResponse(testRuntimeInfo()) + if (url.pathname === '/v1/threads' && init.method === 'POST') return jsonResponse({ id: 'thr_1' }, 201) + if (url.pathname === '/v1/threads/thr_1/turns' && init.method === 'POST') { + return jsonResponse({ threadId: 'thr_1', turnId: 'turn_1', userMessageItemId: 'item_user' }, 202) + } + if (url.pathname === '/v1/threads/thr_1/events') return neverTerminalSse(init.signal) + if (url.pathname === '/v1/threads/thr_1/turns/turn_1/interrupt' && init.method === 'POST') { + return jsonResponse({ threadId: 'thr_1', turnId: 'turn_1', status: 'aborted' }) + } + if (url.pathname === '/v1/threads/thr_1' && init.method === 'DELETE') { + return jsonResponse({ id: 'thr_1', deleted: true }) + } + return jsonResponse({ message: `unexpected ${init.method ?? 'GET'} ${url.pathname}` }, 404) + } + + const report = await runReplaySuite({ + version: 1, + name: 'timeout-suite', + defaults: { timeoutMs: 20 }, + tasks: [{ id: 'slow', prompt: 'wait for a terminal event', expect: { minAssistantChars: 0 } }] + }, { + baseUrl: 'http://127.0.0.1:18899', + token: 'token', + workspace: '/tmp/workspace', + fetchImpl + }) + + expect(report.runs[0]?.status).toBe('timeout') + const interruptIndex = calls.findIndex((call) => call.path === '/v1/threads/thr_1/turns/turn_1/interrupt') + const deleteIndex = calls.findIndex((call) => call.path === '/v1/threads/thr_1') + expect(interruptIndex).toBeGreaterThan(-1) + expect(deleteIndex).toBeGreaterThan(interruptIndex) + }) +}) + +function jsonResponse(value: unknown, status = 200): Response { + return new Response(JSON.stringify(value), { + status, + headers: { 'content-type': 'application/json' } + }) +} + +function testRuntimeInfo() { + return { + host: '127.0.0.1', + port: 18899, + dataDir: '/tmp/kun-replay', + model: 'deepseek-chat', + startedAt: '2026-06-29T00:00:00.000Z', + capabilities: buildRuntimeCapabilityManifest({ + model: { + id: 'deepseek-chat', + inputModalities: ['text'], + outputModalities: ['text'], + supportsToolCalling: true, + messageParts: ['text'] + } + }) + } +} + +function sseResponse(events: RuntimeEvent[]): Response { + const encoder = new TextEncoder() + const stream = new ReadableStream({ + start(controller) { + for (const event of events) { + controller.enqueue( + encoder.encode(`id: ${event.seq}\nevent: ${event.kind}\ndata: ${JSON.stringify(event)}\n\n`) + ) + } + controller.close() + } + }) + return new Response(stream, { + status: 200, + headers: { 'content-type': 'text/event-stream; charset=utf-8' } + }) +} + +function neverTerminalSse(signal?: AbortSignal | null): Response { + const encoder = new TextEncoder() + const stream = new ReadableStream({ + start(controller) { + const heartbeat = [ + 'id: 1', + 'event: heartbeat', + `data: ${JSON.stringify({ + kind: 'heartbeat', + seq: 1, + timestamp: '2026-06-29T00:00:00.000Z', + threadId: 'thr_1' + })}`, + '', + '' + ].join('\n') + const push = () => controller.enqueue(encoder.encode(heartbeat)) + const timer = setInterval(push, 1) + push() + signal?.addEventListener('abort', () => { + clearInterval(timer) + controller.error(new DOMException('aborted', 'AbortError')) + }, { once: true }) + } + }) + return new Response(stream, { + status: 200, + headers: { 'content-type': 'text/event-stream; charset=utf-8' } + }) +} + +function replayRun( + status: ReplayRunResult['status'], + ttftMs: number, + totalMs: number, + cacheHitRate: number +): ReplayRunResult { + return { + id: 'task#1', + taskId: 'task', + iteration: 1, + tags: [], + status, + failureReasons: [], + metrics: { + ttftMs, + totalMs, + assistantChars: 10, + eventCount: 5, + errorEvents: 0, + toolCalls: 0, + toolDurationMs: 0, + toolDurationP95Ms: null, + sseDelayP50Ms: 10, + sseDelayP95Ms: 20, + promptTokens: 100, + completionTokens: 20, + totalTokens: 120, + cacheHitTokens: cacheHitRate * 100, + cacheMissTokens: (1 - cacheHitRate) * 100, + cacheHitRate, + cacheableTokenHitRate: cacheHitRate, + totalInputTokenHitRate: cacheHitRate, + costUsd: 0.001, + peakRssBytes: 100 + } + } +} + +function report(runs: ReplayRunResult[], generatedAt: string): ReplayReport { + return { + version: 1, + generatedAt, + suite: { name: 'test', taskCount: runs.length, repeat: 1 }, + runtime: { baseUrl: 'http://127.0.0.1', startedAt: generatedAt }, + summary: summarizeReplayRuns(runs), + runs + } +} diff --git a/kun/src/benchmark/replay-benchmark.ts b/kun/src/benchmark/replay-benchmark.ts new file mode 100644 index 000000000..f16bb809b --- /dev/null +++ b/kun/src/benchmark/replay-benchmark.ts @@ -0,0 +1,757 @@ +import { resolve } from 'node:path' +import { z } from 'zod' +import { RuntimeEvent, type RuntimeEvent as RuntimeEventValue } from '../contracts/events.js' +import { RuntimeInfoResponse, type RuntimeInfoResponse as RuntimeInfoValue } from '../contracts/runtime-info.js' +import { TurnReasoningEffortSchema } from '../contracts/turns.js' +import type { UsageSnapshot } from '../contracts/usage.js' + +const ReplayExpectationSchema = z.object({ + minAssistantChars: z.number().int().nonnegative().default(1), + requiredTools: z.array(z.string().min(1)).default([]), + requiredAnyTools: z.array(z.string().min(1)).default([]), + maxErrorEvents: z.number().int().nonnegative().default(0), + maxTotalMs: z.number().int().positive().optional() +}).strict() + +const ReplayTaskSchema = z.object({ + id: z.string().regex(/^[a-z0-9][a-z0-9_-]*$/), + prompt: z.string().min(1), + tags: z.array(z.string().min(1)).default([]), + workspace: z.string().min(1).optional(), + model: z.string().min(1).optional(), + providerId: z.string().min(1).optional(), + reasoningEffort: TurnReasoningEffortSchema.optional(), + timeoutMs: z.number().int().positive().optional(), + expect: ReplayExpectationSchema.default(() => ReplayExpectationSchema.parse({})) +}).strict() + +export const ReplaySuiteSchema = z.object({ + version: z.literal(1), + name: z.string().min(1), + defaults: z.object({ + model: z.string().min(1).optional(), + providerId: z.string().min(1).optional(), + reasoningEffort: TurnReasoningEffortSchema.optional(), + timeoutMs: z.number().int().positive().default(300_000) + }).strict().default(() => ({ timeoutMs: 300_000 })), + tasks: z.array(ReplayTaskSchema).min(1).max(100) +}).strict().superRefine((suite, context) => { + const ids = new Set() + suite.tasks.forEach((task, index) => { + if (ids.has(task.id)) { + context.addIssue({ + code: 'custom', + path: ['tasks', index, 'id'], + message: `duplicate replay task id: ${task.id}` + }) + } + ids.add(task.id) + }) +}) + +export type ReplaySuite = z.infer +export type ReplayTask = z.infer + +export type ObservedReplayEvent = { + event: RuntimeEventValue + receivedAtMs: number + elapsedMs: number +} + +export type ReplayRunMetrics = { + ttftMs: number | null + totalMs: number + assistantChars: number + eventCount: number + errorEvents: number + toolCalls: number + toolDurationMs: number + toolDurationP95Ms: number | null + sseDelayP50Ms: number | null + sseDelayP95Ms: number | null + promptTokens: number + completionTokens: number + totalTokens: number + cacheHitTokens: number | null + cacheMissTokens: number | null + cacheHitRate: number | null + cacheableTokenHitRate: number | null + totalInputTokenHitRate: number | null + costUsd: number + peakRssBytes: number | null +} + +export type ReplayRunResult = { + id: string + taskId: string + iteration: number + tags: string[] + threadId?: string + turnId?: string + status: 'passed' | 'failed' | 'timeout' | 'error' + failureReasons: string[] + metrics: ReplayRunMetrics + error?: string +} + +export type ReplayReportSummary = { + runCount: number + passed: number + failed: number + timedOut: number + errors: number + successRate: number + ttftP50Ms: number | null + ttftP95Ms: number | null + totalP50Ms: number | null + totalP95Ms: number | null + toolDurationP95Ms: number | null + sseDelayP95Ms: number | null + promptTokens: number + completionTokens: number + totalTokens: number + cacheHitRate: number | null + cacheableTokenHitRate: number | null + totalInputTokenHitRate: number | null + costUsd: number + peakRssBytes: number | null +} + +export type ReplayComparison = { + baselineGeneratedAt: string + successRateDelta: number + ttftP95MsDelta: number | null + totalP95MsDelta: number | null + promptTokensDelta: number + cacheHitRateDelta: number | null + costUsdDelta: number + peakRssBytesDelta: number | null + regressions: string[] +} + +export type ReplayReport = { + version: 1 + generatedAt: string + suite: { name: string; taskCount: number; repeat: number; tag?: string } + runtime: { + baseUrl: string + model?: string + startedAt: string + pid?: number + } + summary: ReplayReportSummary + runs: ReplayRunResult[] + comparison?: ReplayComparison +} + +export type RunReplaySuiteOptions = { + baseUrl: string + token?: string + workspace: string + repeat?: number + concurrency?: number + tag?: string + keepThreads?: boolean + fetchImpl?: typeof fetch + onProgress?: (completed: number, total: number, run: ReplayRunResult) => void +} + +type ReplayHttpClient = { + getRuntimeInfo(): Promise + createThread(body: Record): Promise<{ id: string }> + startTurn(threadId: string, body: Record): Promise<{ turnId: string }> + openEvents(threadId: string, signal: AbortSignal): Promise + interruptTurn(threadId: string, turnId: string): Promise + deleteThread(threadId: string): Promise +} + +export async function runReplaySuite( + suiteInput: unknown, + options: RunReplaySuiteOptions +): Promise { + const suite = ReplaySuiteSchema.parse(suiteInput) + const repeat = clampInteger(options.repeat ?? 1, 1, 20) + const concurrency = clampInteger(options.concurrency ?? 1, 1, 8) + const baseUrl = options.baseUrl.replace(/\/$/, '') + const client = createReplayHttpClient(baseUrl, options.token, options.fetchImpl ?? fetch) + const runtime = await client.getRuntimeInfo() + const selectedTasks = options.tag + ? suite.tasks.filter((task) => task.tags.includes(options.tag!)) + : suite.tasks + if (selectedTasks.length === 0) { + throw new Error(`replay suite has no tasks tagged "${options.tag}"`) + } + const jobs = selectedTasks.flatMap((task) => + Array.from({ length: repeat }, (_, index) => ({ task, iteration: index + 1 })) + ) + const runs = new Array(jobs.length) + let cursor = 0 + let completed = 0 + const worker = async (): Promise => { + while (true) { + const jobIndex = cursor + cursor += 1 + const job = jobs[jobIndex] + if (!job) return + const run = await runReplayTask({ + suite, + task: job.task, + iteration: job.iteration, + runtime, + client, + workspace: options.workspace, + keepThread: options.keepThreads === true + }) + runs[jobIndex] = run + completed += 1 + options.onProgress?.(completed, jobs.length, run) + } + } + await Promise.all(Array.from({ length: Math.min(concurrency, jobs.length) }, () => worker())) + const report: ReplayReport = { + version: 1, + generatedAt: new Date().toISOString(), + suite: { + name: suite.name, + taskCount: selectedTasks.length, + repeat, + ...(options.tag ? { tag: options.tag } : {}) + }, + runtime: { + baseUrl, + ...(runtime.model ? { model: runtime.model } : {}), + startedAt: runtime.startedAt, + ...(runtime.pid ? { pid: runtime.pid } : {}) + }, + summary: summarizeReplayRuns(runs), + runs + } + return report +} + +async function runReplayTask(input: { + suite: ReplaySuite + task: ReplayTask + iteration: number + runtime: RuntimeInfoValue + client: ReplayHttpClient + workspace: string + keepThread: boolean +}): Promise { + const { suite, task, iteration, runtime, client } = input + const runId = `${task.id}#${iteration}` + const model = task.model ?? suite.defaults.model ?? runtime.model + if (!model) return errorReplayRun(runId, task, iteration, 'runtime did not report a default model') + const workspace = resolve(input.workspace, task.workspace ?? '.') + let threadId: string | undefined + let turnId: string | undefined + let shouldInterrupt = false + try { + const thread = await client.createThread({ + title: `[replay] ${runId}`, + titleAuto: false, + workspace, + model, + ...(task.providerId ?? suite.defaults.providerId + ? { providerId: task.providerId ?? suite.defaults.providerId } + : {}), + mode: 'agent', + approvalPolicy: 'auto', + sandboxMode: 'read-only' + }) + threadId = thread.id + const startedAt = performance.now() + const turn = await client.startTurn(threadId, { + prompt: task.prompt, + reasoningEffort: task.reasoningEffort ?? suite.defaults.reasoningEffort ?? 'off', + approvalPolicy: 'auto', + sandboxMode: 'read-only', + disableUserInput: true + }) + turnId = turn.turnId + const timeoutMs = task.timeoutMs ?? suite.defaults.timeoutMs + const collected = await collectReplayEvents({ + client, + threadId, + turnId, + startedAt, + timeoutMs + }) + shouldInterrupt = collected.timedOut || !hasTerminalTurnEvent(collected.events, turnId) + const after = await client.getRuntimeInfo().catch(() => runtime) + const metrics = summarizeReplayEvents( + collected.events, + collected.elapsedMs, + after.memoryUsage?.peakRssBytes + ) + const failureReasons = replayExpectationFailures(task, collected.timedOut, metrics, collected.events) + return { + id: runId, + taskId: task.id, + iteration, + tags: task.tags, + threadId, + turnId, + status: collected.timedOut ? 'timeout' : failureReasons.length > 0 ? 'failed' : 'passed', + failureReasons, + metrics + } + } catch (error) { + shouldInterrupt = turnId !== undefined + return { + ...errorReplayRun(runId, task, iteration, errorMessage(error)), + ...(threadId ? { threadId } : {}), + ...(turnId ? { turnId } : {}) + } + } finally { + if (threadId && turnId && shouldInterrupt) { + await client.interruptTurn(threadId, turnId).catch(() => undefined) + } + if (threadId && !input.keepThread) { + await client.deleteThread(threadId).catch(() => undefined) + } + } +} + +async function collectReplayEvents(input: { + client: ReplayHttpClient + threadId: string + turnId: string + startedAt: number + timeoutMs: number +}): Promise<{ events: ObservedReplayEvent[]; elapsedMs: number; timedOut: boolean }> { + const controller = new AbortController() + let timedOut = false + const timer = setTimeout(() => { + timedOut = true + controller.abort() + }, input.timeoutMs) + timer.unref?.() + const observed: ObservedReplayEvent[] = [] + try { + const response = await input.client.openEvents(input.threadId, controller.signal) + if (!response.body) throw new Error('runtime SSE response has no body') + const reader = response.body.getReader() + const decoder = new TextDecoder() + const sse = new SseMessageDecoder() + while (true) { + const chunk = await reader.read() + if (chunk.done) break + for (const message of sse.push(decoder.decode(chunk.value, { stream: true }))) { + const parsed = parseRuntimeSseMessage(message) + if (!parsed) continue + const receivedAtMs = Date.now() + observed.push({ + event: parsed, + receivedAtMs, + elapsedMs: Math.max(0, performance.now() - input.startedAt) + }) + if (parsed.turnId === input.turnId && isTerminalTurnEvent(parsed.kind)) { + controller.abort() + return { + events: observed, + elapsedMs: Math.max(0, performance.now() - input.startedAt), + timedOut: false + } + } + } + } + return { + events: observed, + elapsedMs: Math.max(0, performance.now() - input.startedAt), + timedOut + } + } catch (error) { + if (!timedOut && !controller.signal.aborted) throw error + return { + events: observed, + elapsedMs: Math.max(0, performance.now() - input.startedAt), + timedOut + } + } finally { + clearTimeout(timer) + controller.abort() + } +} + +export function summarizeReplayEvents( + observed: ObservedReplayEvent[], + elapsedMs: number, + peakRssBytes?: number +): ReplayRunMetrics { + const firstText = observed.find(({ event }) => + event.kind === 'assistant_text_delta' && event.item.kind === 'assistant_text' && event.item.text.length > 0 + ) ?? observed.find(({ event }) => + (event.kind === 'item_created' || event.kind === 'item_completed') && + event.item.kind === 'assistant_text' && + event.item.text.length > 0 + ) + const assistantTextByItem = new Map() + const toolStarted = new Map() + const toolDurations: number[] = [] + const toolCallIds = new Set() + const sseDelays: number[] = [] + let errorEvents = 0 + let usage: UsageSnapshot | undefined + for (const record of observed) { + const eventTime = Date.parse(record.event.timestamp) + if (Number.isFinite(eventTime)) sseDelays.push(Math.max(0, record.receivedAtMs - eventTime)) + if (record.event.kind === 'error' || record.event.kind === 'turn_failed') errorEvents += 1 + if (record.event.kind === 'usage') usage = record.event.usage + if ('item' in record.event && record.event.item.kind === 'assistant_text') { + const itemId = record.event.item.id + if (record.event.kind === 'assistant_text_delta') { + assistantTextByItem.set(itemId, `${assistantTextByItem.get(itemId) ?? ''}${record.event.item.text}`) + } else { + assistantTextByItem.set(itemId, record.event.item.text) + } + } + if (record.event.kind === 'tool_call_started' && 'item' in record.event && 'callId' in record.event.item) { + toolStarted.set(record.event.item.callId, record.elapsedMs) + toolCallIds.add(record.event.item.callId) + } + if (record.event.kind === 'tool_call_finished' && 'item' in record.event && 'callId' in record.event.item) { + const started = toolStarted.get(record.event.item.callId) + if (started !== undefined) toolDurations.push(Math.max(0, record.elapsedMs - started)) + toolCallIds.add(record.event.item.callId) + } + } + const assistantChars = [...assistantTextByItem.values()].reduce((total, text) => total + text.length, 0) + const hit = usage?.cacheHitTokens + const miss = usage?.cacheMissTokens + const cacheTotal = (hit ?? 0) + (miss ?? 0) + return { + ttftMs: firstText ? roundMetric(firstText.elapsedMs) : null, + totalMs: roundMetric(elapsedMs), + assistantChars, + eventCount: observed.length, + errorEvents, + toolCalls: toolCallIds.size, + toolDurationMs: roundMetric(toolDurations.reduce((total, value) => total + value, 0)), + toolDurationP95Ms: percentile(toolDurations, 0.95), + sseDelayP50Ms: percentile(sseDelays, 0.5), + sseDelayP95Ms: percentile(sseDelays, 0.95), + promptTokens: usage?.promptTokens ?? 0, + completionTokens: usage?.completionTokens ?? 0, + totalTokens: usage?.totalTokens ?? 0, + cacheHitTokens: hit ?? null, + cacheMissTokens: miss ?? null, + cacheHitRate: usage?.cacheHitRate ?? (cacheTotal > 0 ? (hit ?? 0) / cacheTotal : null), + cacheableTokenHitRate: usage?.cacheableTokenHitRate ?? null, + totalInputTokenHitRate: usage?.totalInputTokenHitRate ?? null, + costUsd: usage?.costUsd ?? 0, + peakRssBytes: peakRssBytes ?? null + } +} + +export function summarizeReplayRuns(runs: ReplayRunResult[]): ReplayReportSummary { + const ttft = compactNumbers(runs.map((run) => run.metrics.ttftMs)) + const total = runs.map((run) => run.metrics.totalMs) + const toolP95 = compactNumbers(runs.map((run) => run.metrics.toolDurationP95Ms)) + const sseP95 = compactNumbers(runs.map((run) => run.metrics.sseDelayP95Ms)) + const hitTokens = compactNumbers(runs.map((run) => run.metrics.cacheHitTokens)).reduce(sum, 0) + const missTokens = compactNumbers(runs.map((run) => run.metrics.cacheMissTokens)).reduce(sum, 0) + const cacheableRates = compactNumbers(runs.map((run) => run.metrics.cacheableTokenHitRate)) + const totalInputRates = compactNumbers(runs.map((run) => run.metrics.totalInputTokenHitRate)) + const passed = runs.filter((run) => run.status === 'passed').length + return { + runCount: runs.length, + passed, + failed: runs.filter((run) => run.status === 'failed').length, + timedOut: runs.filter((run) => run.status === 'timeout').length, + errors: runs.filter((run) => run.status === 'error').length, + successRate: runs.length > 0 ? passed / runs.length : 0, + ttftP50Ms: percentile(ttft, 0.5), + ttftP95Ms: percentile(ttft, 0.95), + totalP50Ms: percentile(total, 0.5), + totalP95Ms: percentile(total, 0.95), + toolDurationP95Ms: percentile(toolP95, 0.95), + sseDelayP95Ms: percentile(sseP95, 0.95), + promptTokens: runs.reduce((totalValue, run) => totalValue + run.metrics.promptTokens, 0), + completionTokens: runs.reduce((totalValue, run) => totalValue + run.metrics.completionTokens, 0), + totalTokens: runs.reduce((totalValue, run) => totalValue + run.metrics.totalTokens, 0), + cacheHitRate: hitTokens + missTokens > 0 ? hitTokens / (hitTokens + missTokens) : null, + cacheableTokenHitRate: average(cacheableRates), + totalInputTokenHitRate: average(totalInputRates), + costUsd: runs.reduce((totalValue, run) => totalValue + run.metrics.costUsd, 0), + peakRssBytes: maxNullable(compactNumbers(runs.map((run) => run.metrics.peakRssBytes))) + } +} + +export function compareReplayReports(current: ReplayReport, baseline: ReplayReport): ReplayComparison { + const successRateDelta = current.summary.successRate - baseline.summary.successRate + const ttftP95MsDelta = nullableDelta(current.summary.ttftP95Ms, baseline.summary.ttftP95Ms) + const totalP95MsDelta = nullableDelta(current.summary.totalP95Ms, baseline.summary.totalP95Ms) + const cacheHitRateDelta = nullableDelta(current.summary.cacheHitRate, baseline.summary.cacheHitRate) + const peakRssBytesDelta = nullableDelta(current.summary.peakRssBytes, baseline.summary.peakRssBytes) + const regressions: string[] = [] + if (successRateDelta < 0) regressions.push(`success rate dropped by ${formatPercent(-successRateDelta)}`) + if (isRelativeRegression(current.summary.ttftP95Ms, baseline.summary.ttftP95Ms, 0.2, 300)) { + regressions.push(`TTFT p95 increased by ${ttftP95MsDelta}ms`) + } + if (isRelativeRegression(current.summary.totalP95Ms, baseline.summary.totalP95Ms, 0.2, 500)) { + regressions.push(`total latency p95 increased by ${totalP95MsDelta}ms`) + } + if (cacheHitRateDelta !== null && cacheHitRateDelta < -0.05) { + regressions.push(`cache hit rate dropped by ${formatPercent(-cacheHitRateDelta)}`) + } + if (baseline.summary.costUsd > 0 && current.summary.costUsd > baseline.summary.costUsd * 1.1) { + regressions.push(`cost increased by $${(current.summary.costUsd - baseline.summary.costUsd).toFixed(6)}`) + } + return { + baselineGeneratedAt: baseline.generatedAt, + successRateDelta, + ttftP95MsDelta, + totalP95MsDelta, + promptTokensDelta: current.summary.promptTokens - baseline.summary.promptTokens, + cacheHitRateDelta, + costUsdDelta: current.summary.costUsd - baseline.summary.costUsd, + peakRssBytesDelta, + regressions + } +} + +export type SseMessage = { event?: string; id?: string; data: string } + +export class SseMessageDecoder { + private buffer = '' + + push(chunk: string): SseMessage[] { + this.buffer += chunk.replace(/\r\n/g, '\n') + const messages: SseMessage[] = [] + let boundary = this.buffer.indexOf('\n\n') + while (boundary >= 0) { + const block = this.buffer.slice(0, boundary) + this.buffer = this.buffer.slice(boundary + 2) + const message = parseSseBlock(block) + if (message) messages.push(message) + boundary = this.buffer.indexOf('\n\n') + } + return messages + } +} + +function createReplayHttpClient( + baseUrl: string, + token: string | undefined, + fetchImpl: typeof fetch +): ReplayHttpClient { + const headers = (): Headers => { + const value = new Headers({ accept: 'application/json' }) + if (token) value.set('authorization', `Bearer ${token}`) + return value + } + const requestJson = async (path: string, init: RequestInit = {}): Promise => { + const requestHeaders = headers() + if (init.body) requestHeaders.set('content-type', 'application/json') + new Headers(init.headers).forEach((value, key) => requestHeaders.set(key, value)) + const response = await fetchImpl(`${baseUrl}${path}`, { + ...init, + headers: requestHeaders + }) + if (!response.ok) { + const body = (await response.text().catch(() => '')).slice(0, 1_000) + throw new Error(`${init.method ?? 'GET'} ${path} failed (${response.status}): ${body}`) + } + return await response.json() as T + } + return { + async getRuntimeInfo() { + return RuntimeInfoResponse.parse(await requestJson('/v1/runtime/info')) + }, + createThread: (body) => requestJson('/v1/threads', { method: 'POST', body: JSON.stringify(body) }), + startTurn: (threadId, body) => requestJson(`/v1/threads/${encodeURIComponent(threadId)}/turns`, { + method: 'POST', + body: JSON.stringify(body) + }), + async openEvents(threadId, signal) { + const requestHeaders = headers() + requestHeaders.set('accept', 'text/event-stream') + const response = await fetchImpl(`${baseUrl}/v1/threads/${encodeURIComponent(threadId)}/events?since_seq=0`, { + headers: requestHeaders, + signal + }) + if (!response.ok) { + const body = (await response.text().catch(() => '')).slice(0, 1_000) + throw new Error(`GET events failed (${response.status}): ${body}`) + } + return response + }, + async interruptTurn(threadId, turnId) { + await requestJson(`/v1/threads/${encodeURIComponent(threadId)}/turns/${encodeURIComponent(turnId)}/interrupt`, { + method: 'POST' + }) + }, + async deleteThread(threadId) { + await requestJson(`/v1/threads/${encodeURIComponent(threadId)}`, { method: 'DELETE' }) + } + } +} + +function parseSseBlock(block: string): SseMessage | null { + if (!block.trim()) return null + let event: string | undefined + let id: string | undefined + const data: string[] = [] + for (const line of block.split('\n')) { + if (!line || line.startsWith(':')) continue + const separator = line.indexOf(':') + const field = separator >= 0 ? line.slice(0, separator) : line + const value = separator >= 0 ? line.slice(separator + 1).replace(/^ /, '') : '' + if (field === 'event') event = value + else if (field === 'id') id = value + else if (field === 'data') data.push(value) + } + if (data.length === 0) return null + return { ...(event ? { event } : {}), ...(id ? { id } : {}), data: data.join('\n') } +} + +function parseRuntimeSseMessage(message: SseMessage): RuntimeEventValue | null { + let value: unknown + try { + value = JSON.parse(message.data) + } catch { + return null + } + const parsed = RuntimeEvent.safeParse(value) + if (parsed.success) return parsed.data + if (message.event === 'error') { + const detail = value && typeof value === 'object' && 'message' in value + ? String((value as { message?: unknown }).message ?? 'unknown SSE error') + : 'unknown SSE error' + throw new Error(`runtime SSE error: ${detail}`) + } + return null +} + +function replayExpectationFailures( + task: ReplayTask, + timedOut: boolean, + metrics: ReplayRunMetrics, + events: ObservedReplayEvent[] +): string[] { + const failures: string[] = [] + if (timedOut) failures.push('turn timed out') + const terminal = events.find(({ event }) => event.kind === 'turn_completed' || event.kind === 'turn_failed' || event.kind === 'turn_aborted') + if (!terminal) failures.push('no terminal turn event') + else if (terminal.event.kind !== 'turn_completed') failures.push(`turn ended with ${terminal.event.kind}`) + if (metrics.assistantChars < task.expect.minAssistantChars) { + failures.push(`assistant output ${metrics.assistantChars} chars is below ${task.expect.minAssistantChars}`) + } + if (metrics.errorEvents > task.expect.maxErrorEvents) { + failures.push(`error event count ${metrics.errorEvents} exceeds ${task.expect.maxErrorEvents}`) + } + if (task.expect.maxTotalMs && metrics.totalMs > task.expect.maxTotalMs) { + failures.push(`total latency ${metrics.totalMs}ms exceeds ${task.expect.maxTotalMs}ms`) + } + const usedTools = new Set(events.flatMap(({ event }) => { + if (!('item' in event) || !('toolName' in event.item)) return [] + return [event.item.toolName] + })) + for (const tool of task.expect.requiredTools) { + if (!usedTools.has(tool)) failures.push(`required tool was not used: ${tool}`) + } + if (task.expect.requiredAnyTools.length > 0 && !task.expect.requiredAnyTools.some((tool) => usedTools.has(tool))) { + failures.push(`none of the required tools were used: ${task.expect.requiredAnyTools.join(', ')}`) + } + return failures +} + +function errorReplayRun(id: string, task: ReplayTask, iteration: number, error: string): ReplayRunResult { + return { + id, + taskId: task.id, + iteration, + tags: task.tags, + status: 'error', + failureReasons: [error], + metrics: emptyReplayMetrics(), + error + } +} + +function emptyReplayMetrics(): ReplayRunMetrics { + return { + ttftMs: null, + totalMs: 0, + assistantChars: 0, + eventCount: 0, + errorEvents: 0, + toolCalls: 0, + toolDurationMs: 0, + toolDurationP95Ms: null, + sseDelayP50Ms: null, + sseDelayP95Ms: null, + promptTokens: 0, + completionTokens: 0, + totalTokens: 0, + cacheHitTokens: null, + cacheMissTokens: null, + cacheHitRate: null, + cacheableTokenHitRate: null, + totalInputTokenHitRate: null, + costUsd: 0, + peakRssBytes: null + } +} + +function isTerminalTurnEvent(kind: RuntimeEventValue['kind']): boolean { + return kind === 'turn_completed' || kind === 'turn_failed' || kind === 'turn_aborted' +} + +function hasTerminalTurnEvent(events: ObservedReplayEvent[], turnId: string): boolean { + return events.some(({ event }) => event.turnId === turnId && isTerminalTurnEvent(event.kind)) +} + +function percentile(values: number[], quantile: number): number | null { + if (values.length === 0) return null + const sorted = [...values].sort((left, right) => left - right) + const index = Math.min(sorted.length - 1, Math.max(0, Math.ceil(quantile * sorted.length) - 1)) + return roundMetric(sorted[index] ?? 0) +} + +function average(values: number[]): number | null { + return values.length > 0 ? values.reduce(sum, 0) / values.length : null +} + +function maxNullable(values: number[]): number | null { + return values.length > 0 ? Math.max(...values) : null +} + +function compactNumbers(values: Array): number[] { + return values.filter((value): value is number => typeof value === 'number' && Number.isFinite(value)) +} + +function nullableDelta(current: number | null, baseline: number | null): number | null { + return current === null || baseline === null ? null : current - baseline +} + +function isRelativeRegression( + current: number | null, + baseline: number | null, + ratio: number, + minimumDelta: number +): boolean { + if (current === null || baseline === null || baseline <= 0) return false + return current - baseline >= minimumDelta && current > baseline * (1 + ratio) +} + +function roundMetric(value: number): number { + return Math.round(value * 100) / 100 +} + +function clampInteger(value: number, min: number, max: number): number { + return Math.max(min, Math.min(max, Math.floor(value))) +} + +function sum(left: number, right: number): number { + return left + right +} + +function formatPercent(value: number): string { + return `${(value * 100).toFixed(2)}%` +} + +function errorMessage(error: unknown): string { + return error instanceof Error ? error.message : String(error) +} diff --git a/kun/src/cli/replay-entry.ts b/kun/src/cli/replay-entry.ts new file mode 100644 index 000000000..745efab8c --- /dev/null +++ b/kun/src/cli/replay-entry.ts @@ -0,0 +1,187 @@ +#!/usr/bin/env node +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { dirname, resolve } from 'node:path' +import { + compareReplayReports, + runReplaySuite, + type ReplayReport +} from '../benchmark/replay-benchmark.js' +import { DEFAULT_SERVE_PORT } from './cli-options.js' + +const DEFAULT_RUNTIME_URL = `http://127.0.0.1:${DEFAULT_SERVE_PORT}` + +type CliOptions = { + suitePath?: string + baseUrl: string + workspace: string + outputPath?: string + baselinePath?: string + repeat: number + concurrency: number + tag?: string + keepThreads: boolean + failOnRegression: boolean + help: boolean +} + +const options = parseArgs(process.argv.slice(2)) +if (options.help) { + printUsage() + process.exit(0) +} +if (!options.suitePath) { + printUsage() + process.exit(2) +} + +const suitePath = resolve(options.suitePath) +const suite = JSON.parse(await readFile(suitePath, 'utf8')) as unknown +const report = await runReplaySuite(suite, { + baseUrl: options.baseUrl, + token: process.env.KUN_RUNTIME_TOKEN, + workspace: options.workspace, + repeat: options.repeat, + concurrency: options.concurrency, + ...(options.tag ? { tag: options.tag } : {}), + keepThreads: options.keepThreads, + onProgress: (completed, total, run) => { + const ttft = run.metrics.ttftMs === null ? 'n/a' : `${Math.round(run.metrics.ttftMs)}ms` + console.error( + `[${completed}/${total}] ${run.id} ${run.status} ` + + `ttft=${ttft} total=${Math.round(run.metrics.totalMs)}ms tokens=${run.metrics.totalTokens}` + ) + } +}) + +if (options.baselinePath) { + const baseline = JSON.parse(await readFile(resolve(options.baselinePath), 'utf8')) as ReplayReport + report.comparison = compareReplayReports(report, baseline) +} + +printSummary(report) +if (options.outputPath) { + const outputPath = resolve(options.outputPath) + await mkdir(dirname(outputPath), { recursive: true }) + await writeFile(outputPath, `${JSON.stringify(report, null, 2)}\n`, 'utf8') + console.error(`Replay report written to ${outputPath}`) +} else { + console.log(JSON.stringify(report, null, 2)) +} + +if (options.failOnRegression && report.comparison?.regressions.length) { + process.exitCode = 1 +} + +function parseArgs(args: string[]): CliOptions { + const options: CliOptions = { + baseUrl: process.env.KUN_RUNTIME_URL ?? DEFAULT_RUNTIME_URL, + workspace: resolve(process.env.INIT_CWD ?? process.cwd()), + repeat: 1, + concurrency: 1, + keepThreads: false, + failOnRegression: false, + help: false + } + for (let index = 0; index < args.length; index += 1) { + const arg = args[index] + switch (arg) { + case '--suite': + options.suitePath = requiredValue(args, ++index, arg) + break + case '--base-url': + options.baseUrl = requiredValue(args, ++index, arg) + break + case '--workspace': + options.workspace = resolve(requiredValue(args, ++index, arg)) + break + case '--output': + options.outputPath = requiredValue(args, ++index, arg) + break + case '--baseline': + options.baselinePath = requiredValue(args, ++index, arg) + break + case '--tag': + options.tag = requiredValue(args, ++index, arg) + break + case '--repeat': + options.repeat = positiveInteger(requiredValue(args, ++index, arg), arg) + break + case '--concurrency': + options.concurrency = positiveInteger(requiredValue(args, ++index, arg), arg) + break + case '--keep-threads': + options.keepThreads = true + break + case '--fail-on-regression': + options.failOnRegression = true + break + case '--help': + case '-h': + options.help = true + break + default: + throw new Error(`unknown replay option: ${arg}`) + } + } + return options +} + +function requiredValue(args: string[], index: number, flag: string): string { + const value = args[index] + if (!value || value.startsWith('--')) throw new Error(`${flag} requires a value`) + return value +} + +function positiveInteger(value: string, flag: string): number { + const parsed = Number(value) + if (!Number.isInteger(parsed) || parsed <= 0) throw new Error(`${flag} must be a positive integer`) + return parsed +} + +function printUsage(): void { + console.log('Usage:') + console.log(' npm --prefix kun run benchmark:replay -- --suite [options]') + console.log('') + console.log('Options:') + console.log(` --base-url Kun runtime URL (or KUN_RUNTIME_URL, default ${DEFAULT_RUNTIME_URL})`) + console.log(' --workspace Workspace for replay tasks') + console.log(' --tag Run only tasks with this tag') + console.log(' --repeat Repeat each selected task (default 1)') + console.log(' --concurrency Parallel tasks, capped at 8 (default 1)') + console.log(' --baseline Compare against an earlier report') + console.log(' --output Write the full machine-readable report') + console.log(' --keep-threads Keep generated replay threads') + console.log(' --fail-on-regression Exit 1 when comparison thresholds regress') + console.log('') + console.log('Authentication: set KUN_RUNTIME_TOKEN; it is intentionally not accepted as a CLI flag.') +} + +function printSummary(report: ReplayReport): void { + const summary = report.summary + console.error('') + console.error(`Replay suite: ${report.suite.name}`) + console.error(`Success: ${summary.passed}/${summary.runCount} (${formatRate(summary.successRate)})`) + console.error(`TTFT p50/p95: ${formatMs(summary.ttftP50Ms)} / ${formatMs(summary.ttftP95Ms)}`) + console.error(`Total p50/p95: ${formatMs(summary.totalP50Ms)} / ${formatMs(summary.totalP95Ms)}`) + console.error(`SSE delay p95: ${formatMs(summary.sseDelayP95Ms)}`) + console.error(`Tokens: ${summary.promptTokens} input + ${summary.completionTokens} output`) + console.error(`Cache hit: ${formatRate(summary.cacheHitRate)}`) + console.error(`Cost: $${summary.costUsd.toFixed(6)}`) + console.error(`Peak RSS: ${summary.peakRssBytes === null ? 'n/a' : formatBytes(summary.peakRssBytes)}`) + if (report.comparison) { + console.error(`Regressions: ${report.comparison.regressions.length}`) + for (const regression of report.comparison.regressions) console.error(` - ${regression}`) + } +} + +function formatMs(value: number | null): string { + return value === null ? 'n/a' : `${Math.round(value)}ms` +} + +function formatRate(value: number | null): string { + return value === null ? 'n/a' : `${(value * 100).toFixed(2)}%` +} + +function formatBytes(value: number): string { + return `${(value / 1024 / 1024).toFixed(1)} MiB` +} diff --git a/kun/src/cli/serve-crash-handlers.test.ts b/kun/src/cli/serve-crash-handlers.test.ts new file mode 100644 index 000000000..619569038 --- /dev/null +++ b/kun/src/cli/serve-crash-handlers.test.ts @@ -0,0 +1,77 @@ +import { afterEach, describe, expect, it, vi } from 'vitest' +import process from 'node:process' +import { installServeCrashHandlers } from './serve-crash-handlers.js' + +type Listener = (...args: unknown[]) => void + +/** + * installServeCrashHandlers registers process-wide listeners. Snapshot the + * existing ones so we can invoke (and later remove) only the pair this call + * added, without disturbing vitest's own handlers. + */ +function install(getHandle: () => null = () => null): { + unhandled: Listener[] + uncaught: Listener[] + cleanup: () => void +} { + const beforeRejection = new Set(process.listeners('unhandledRejection')) + const beforeException = new Set(process.listeners('uncaughtException')) + installServeCrashHandlers(getHandle) + const unhandled = process + .listeners('unhandledRejection') + .filter((l) => !beforeRejection.has(l)) as unknown as Listener[] + const uncaught = process + .listeners('uncaughtException') + .filter((l) => !beforeException.has(l)) as unknown as Listener[] + return { + unhandled, + uncaught, + cleanup: () => { + for (const l of unhandled) process.removeListener('unhandledRejection', l as never) + for (const l of uncaught) process.removeListener('uncaughtException', l as never) + } + } +} + +afterEach(() => { + vi.restoreAllMocks() +}) + +describe('serve crash handlers (#639)', () => { + it('keeps the runtime alive on an unhandledRejection (e.g. an MCP transport drop)', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => undefined) as never) + const writes: string[] = [] + vi.spyOn(process.stderr, 'write').mockImplementation(((chunk: unknown) => { + writes.push(String(chunk)) + return true + }) as never) + + const handlers = install() + try { + expect(handlers.unhandled).toHaveLength(1) + handlers.unhandled[0](new Error('SSE stream disconnected: socket hang up')) + + expect(exitSpy).not.toHaveBeenCalled() + expect(writes.join('')).toContain('unhandledRejection (non-fatal, runtime stays up)') + expect(writes.join('')).toContain('socket hang up') + } finally { + handlers.cleanup() + } + }) + + it('still exits non-zero on an uncaughtException so the supervisor restarts a clean process', () => { + const exitSpy = vi.spyOn(process, 'exit').mockImplementation((() => undefined) as never) + vi.spyOn(process.stderr, 'write').mockImplementation((() => true) as never) + + const handlers = install() + try { + expect(handlers.uncaught).toHaveLength(1) + handlers.uncaught[0](new Error('boom')) + + // ServeExitCode.runtime === 70 + expect(exitSpy).toHaveBeenCalledWith(70) + } finally { + handlers.cleanup() + } + }) +}) diff --git a/kun/src/cli/serve-crash-handlers.ts b/kun/src/cli/serve-crash-handlers.ts new file mode 100644 index 000000000..510999a94 --- /dev/null +++ b/kun/src/cli/serve-crash-handlers.ts @@ -0,0 +1,45 @@ +import process from 'node:process' +import { ServeExitCode } from './serve.js' +import type { KunServeHandle } from '../server/runtime-factory.js' + +/** + * Serve mode runs unattended under the GUI. The two failure modes are + * deliberately handled differently: + * + * - `uncaughtException` left the stack unwound mid-operation, so the process + * state is genuinely unsafe. Report it on stderr (the GUI captures the tail), + * attempt a bounded graceful close, then exit non-zero so the GUI supervisor + * can restart a fresh process. + * - `unhandledRejection` does NOT corrupt the process — Node keeps running. A + * stray background rejection (e.g. a streamable-http MCP server dropping its + * connection while a reconnect promise is in flight) is fully recoverable, so + * tearing the whole runtime down and blanking the GUI for it is the wrong + * trade (#639). Log it for the stderr tail and stay up; the MCP layer already + * reports the server unavailable and reconnects on the next request. + */ +export function installServeCrashHandlers(getHandle: () => KunServeHandle | null): void { + let crashing = false + const crash = (kind: string, error: unknown): void => { + if (crashing) return + crashing = true + const detail = error instanceof Error ? (error.stack ?? error.message) : String(error) + process.stderr.write(`kun serve: ${kind}: ${detail}\n`) + const finish = (): void => process.exit(ServeExitCode.runtime) + const handle = getHandle() + if (!handle) { + finish() + return + } + const deadline = setTimeout(finish, 3000) + deadline.unref() + void handle + .close() + .catch(() => undefined) + .finally(finish) + } + process.on('uncaughtException', (error) => crash('uncaughtException', error)) + process.on('unhandledRejection', (reason) => { + const detail = reason instanceof Error ? (reason.stack ?? reason.message) : String(reason) + process.stderr.write(`kun serve: unhandledRejection (non-fatal, runtime stays up): ${detail}\n`) + }) +} diff --git a/kun/src/cli/serve-entry.ts b/kun/src/cli/serve-entry.ts index 05a7469d3..e44b2ae5c 100644 --- a/kun/src/cli/serve-entry.ts +++ b/kun/src/cli/serve-entry.ts @@ -11,39 +11,10 @@ import { resolveEventLoopStallThresholdMs, startEventLoopMonitor } from '../server/event-loop-monitor.js' +import { installServeCrashHandlers } from './serve-crash-handlers.js' export const KUN_READY_PREFIX = 'KUN_READY ' -/** - * Serve mode runs unattended under the GUI. An uncaught error must not - * leave a half-dead process: report it on stderr (the GUI captures the - * tail), attempt a bounded graceful close, then exit non-zero so the - * GUI supervisor can restart us. - */ -function installServeCrashHandlers(getHandle: () => KunServeHandle | null): void { - let crashing = false - const crash = (kind: string, error: unknown): void => { - if (crashing) return - crashing = true - const detail = error instanceof Error ? (error.stack ?? error.message) : String(error) - process.stderr.write(`kun serve: ${kind}: ${detail}\n`) - const finish = (): void => process.exit(ServeExitCode.runtime) - const handle = getHandle() - if (!handle) { - finish() - return - } - const deadline = setTimeout(finish, 3000) - deadline.unref() - void handle - .close() - .catch(() => undefined) - .finally(finish) - } - process.on('uncaughtException', (error) => crash('uncaughtException', error)) - process.on('unhandledRejection', (reason) => crash('unhandledRejection', reason)) -} - /** * Serve-mode command. Kept separate from the dispatcher so GUI startup * still has the exact same KUN_READY handshake behavior. diff --git a/kun/src/contracts/background-shell.ts b/kun/src/contracts/background-shell.ts new file mode 100644 index 000000000..3d7f06758 --- /dev/null +++ b/kun/src/contracts/background-shell.ts @@ -0,0 +1,35 @@ +import { z } from 'zod' + +export const BackgroundShellStatus = z.enum(['running', 'completed', 'stopped', 'failed']) +export type BackgroundShellStatus = z.infer + +export const BackgroundShellRecord = z.object({ + id: z.string().min(1), + threadId: z.string().min(1), + turnId: z.string().min(1), + command: z.string(), + cwd: z.string(), + shell: z.string(), + status: BackgroundShellStatus, + startedAt: z.string(), + finishedAt: z.string().optional(), + exitCode: z.number().int().nullable(), + output: z.string(), + outputTruncated: z.boolean().optional(), + outputFilePath: z.string().optional(), + error: z.string().optional(), + detached: z.boolean() +}).strict() +export type BackgroundShellRecord = z.infer + +export const BackgroundShellListResponse = z.object({ + sessions: z.array(BackgroundShellRecord), + running: z.number().int().nonnegative() +}).strict() +export type BackgroundShellListResponse = z.infer + +export const BackgroundShellStopResponse = z.object({ + sessionId: z.string().min(1), + stopped: z.boolean() +}).strict() +export type BackgroundShellStopResponse = z.infer diff --git a/kun/src/contracts/events.ts b/kun/src/contracts/events.ts index 45e0c4797..05d07c98c 100644 --- a/kun/src/contracts/events.ts +++ b/kun/src/contracts/events.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { TurnItem } from './items.js' +import { TurnItem, UserMessageSource } from './items.js' import { ThreadGoalSchema, ThreadTodoListSchema } from './threads.js' import { UsageSnapshotSchema } from './usage.js' import { RuntimeErrorSeverity } from './errors.js' @@ -39,6 +39,9 @@ export const RuntimeEventKind = z.enum([ 'goal_cleared', 'todos_updated', 'todos_cleared', + 'bash_session_started', + 'bash_session_updated', + 'bash_session_completed', 'pipeline_stage', 'usage', 'error', @@ -125,6 +128,8 @@ export const TurnLifecycleEvent = RuntimeEventBase.extend({ ]), status: z.string().optional(), text: z.string().optional(), + displayText: z.string().optional(), + messageSource: UserMessageSource.optional(), message: z.string().optional(), code: z.string().optional(), details: z.unknown().optional(), @@ -226,6 +231,24 @@ export const TodoEvent = RuntimeEventBase.extend({ }) export type TodoEvent = z.infer +export const BashSessionEvent = RuntimeEventBase.extend({ + kind: z.enum(['bash_session_started', 'bash_session_updated', 'bash_session_completed']), + sessionId: z.string().min(1), + command: z.string(), + cwd: z.string(), + shell: z.string(), + status: z.enum(['running', 'completed', 'stopped', 'failed']), + startedAt: z.string(), + finishedAt: z.string().optional(), + exitCode: z.number().int().nullable().optional(), + detached: z.boolean(), + output: z.string().default(''), + outputTruncated: z.boolean().optional(), + outputFilePath: z.string().optional(), + error: z.string().optional() +}) +export type BashSessionEvent = z.infer + export const UsageEvent = RuntimeEventBase.extend({ kind: z.literal('usage'), model: z.string().optional(), @@ -268,6 +291,7 @@ export const RuntimeEvent = z.discriminatedUnion('kind', [ CompactionEvent, GoalEvent, TodoEvent, + BashSessionEvent, PipelineStageEvent, UsageEvent, ErrorEvent, diff --git a/kun/src/contracts/items.ts b/kun/src/contracts/items.ts index d75e927d2..622608384 100644 --- a/kun/src/contracts/items.ts +++ b/kun/src/contracts/items.ts @@ -51,10 +51,14 @@ export const UserFileReferenceSchema = z.object({ }) export type UserFileReference = z.infer +export const UserMessageSource = z.enum(['background_shell']) +export type UserMessageSource = z.infer + export const UserTurnItem = TurnItemBase.extend({ kind: z.literal('user_message'), text: z.string(), displayText: z.string().optional(), + messageSource: UserMessageSource.optional(), attachmentIds: z.array(z.string().min(1)).optional(), fileReferences: z.array(UserFileReferenceSchema).optional(), workspaceCheckpointId: z.string().min(1).optional() diff --git a/kun/src/contracts/runtime-info.ts b/kun/src/contracts/runtime-info.ts index dd154e9a1..57ce055ae 100644 --- a/kun/src/contracts/runtime-info.ts +++ b/kun/src/contracts/runtime-info.ts @@ -17,6 +17,13 @@ export const RuntimeInfoResponse = z insecure: z.boolean().optional(), startedAt: z.string(), pid: z.number().int().positive().optional(), + memoryUsage: z.object({ + rssBytes: z.number().int().nonnegative(), + peakRssBytes: z.number().int().nonnegative(), + heapUsedBytes: z.number().int().nonnegative(), + heapTotalBytes: z.number().int().nonnegative(), + externalBytes: z.number().int().nonnegative() + }).strict().optional(), capabilities: RuntimeCapabilityManifest }) .strict() diff --git a/kun/src/contracts/turns.ts b/kun/src/contracts/turns.ts index c908fb4c6..d58c3aed7 100644 --- a/kun/src/contracts/turns.ts +++ b/kun/src/contracts/turns.ts @@ -1,5 +1,5 @@ import { z } from 'zod' -import { TurnItem, UserFileReferenceSchema } from './items.js' +import { TurnItem, UserFileReferenceSchema, UserMessageSource } from './items.js' import { isGuiPlanRelativePath } from '../shared/gui-plan.js' import { ApprovalPolicySchema, SandboxModeSchema } from './policy.js' @@ -50,6 +50,12 @@ export const TurnStatus = z.enum([ ]) export type TurnStatus = z.infer +export const InjectedMemorySummarySchema = z.object({ + id: z.string().min(1), + content: z.string() +}) +export type InjectedMemorySummary = z.infer + export const TurnSchema = z.object({ id: z.string().min(1), threadId: z.string().min(1), @@ -66,6 +72,7 @@ export const TurnSchema = z.object({ attachmentIds: z.array(z.string().min(1)).default([]), activeSkillIds: z.array(z.string().min(1)).default([]), injectedMemoryIds: z.array(z.string().min(1)).default([]), + injectedMemorySummaries: z.array(InjectedMemorySummarySchema).default([]), skillInjectionBytes: z.number().int().nonnegative().optional(), workspaceCheckpointId: z.string().min(1).optional(), toolCatalogFingerprint: z.string().optional(), @@ -91,6 +98,7 @@ export type Turn = z.infer export const StartTurnRequest = z.object({ prompt: z.string().min(1), displayText: z.string().optional(), + messageSource: UserMessageSource.optional(), model: z.string().optional(), reasoningEffort: TurnReasoningEffortSchema.optional(), approvalPolicy: ApprovalPolicySchema.optional(), @@ -135,7 +143,9 @@ export const StartTurnResponse = z.object({ export type StartTurnResponse = z.infer export const SteerTurnRequest = z.object({ - text: z.string().min(1) + text: z.string().min(1), + displayText: z.string().optional(), + messageSource: UserMessageSource.optional() }) export type SteerTurnRequest = z.infer diff --git a/kun/src/domain/item.ts b/kun/src/domain/item.ts index 35f12ffbe..3263d8813 100644 --- a/kun/src/domain/item.ts +++ b/kun/src/domain/item.ts @@ -9,6 +9,7 @@ export function makeUserItem(input: { threadId: string text: string displayText?: string + messageSource?: 'background_shell' attachmentIds?: string[] fileReferences?: Array<{ path: string; relativePath: string; name: string; kind?: 'file' | 'directory' }> workspaceCheckpointId?: string @@ -34,6 +35,7 @@ export function makeUserItem(input: { kind: 'user_message', text: input.text, ...(displayText && displayText !== input.text ? { displayText } : {}), + ...(input.messageSource ? { messageSource: input.messageSource } : {}), ...(attachmentIds?.length ? { attachmentIds } : {}), ...(fileReferences?.length ? { fileReferences } : {}), ...(input.workspaceCheckpointId ? { workspaceCheckpointId: input.workspaceCheckpointId } : {}) diff --git a/kun/src/domain/turn.ts b/kun/src/domain/turn.ts index 0560bb87e..88dbec125 100644 --- a/kun/src/domain/turn.ts +++ b/kun/src/domain/turn.ts @@ -30,6 +30,7 @@ export function createTurnRecord(input: { attachmentIds: [...(input.attachmentIds ?? [])], activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], ...(model ? { model } : {}), ...(reasoningEffort ? { reasoningEffort } : {}), ...(input.guiPlan ? { guiPlan: input.guiPlan } : {}), diff --git a/kun/src/loop/agent-loop.ts b/kun/src/loop/agent-loop.ts index acd38a82b..40ed79672 100644 --- a/kun/src/loop/agent-loop.ts +++ b/kun/src/loop/agent-loop.ts @@ -54,6 +54,7 @@ import { makeErrorItem } from '../domain/item.js' import { touchThread } from '../domain/thread.js' +import { memoryPreview } from '../shared/memory-preview.js' import { repairModelHistoryItems } from '../domain/model-history-repair.js' import type { TurnItem } from '../contracts/items.js' import type { ThreadGoal, ThreadTodoList } from '../contracts/threads.js' @@ -103,7 +104,6 @@ const MAX_PARALLEL_TOOL_CALLS = 3 // request. Older ones collapse to a text note (Anthropic-style "keep last // N images"), bounding context growth for long computer-use sessions. const MAX_FORWARDED_TOOL_IMAGES = 3 -const MAX_TURN_MODEL_STEPS = 64 /** * Tools that, on their own, do not count as "progress" toward a goal when @@ -672,6 +672,8 @@ export type AgentLoopOptions = { skillRuntime?: SkillRuntime attachmentStore?: AttachmentStore memoryStore?: MemoryStore + /** Kun runtime data root for sandbox-safe background shell output reads. */ + runtimeDataDir?: string tokenEconomy?: TokenEconomyConfig contextCompaction?: ContextCompactionConfig /** Internal-LLM role model routing (smallModel slot + title/summary/codeReview overrides). */ @@ -1269,18 +1271,15 @@ export class AgentLoop { private async drainSteering(threadId: string, turnId: string, signal: AbortSignal): Promise { const pending = this.opts.steering.drain() if (pending.length === 0) return - for (const text of pending) { - const item: TurnItem = { + for (const entry of pending) { + const item = makeUserItem({ id: this.opts.ids.next('item_steered'), turnId, threadId, - role: 'user', - status: 'completed', - createdAt: this.opts.nowIso(), - finishedAt: this.opts.nowIso(), - kind: 'user_message', - text - } + text: entry.text, + ...(entry.displayText ? { displayText: entry.displayText } : {}), + ...(entry.messageSource ? { messageSource: entry.messageSource } : {}) + }) await this.opts.turns.applyItem(threadId, item) } void signal @@ -1293,30 +1292,6 @@ export class AgentLoop { ): Promise<'completed' | 'failed' | 'aborted'> { for (let step = 0; ; step += 1) { if (signal.aborted) return 'aborted' - if (step >= MAX_TURN_MODEL_STEPS) { - const message = - `Turn stopped after ${MAX_TURN_MODEL_STEPS} model steps without reaching a final response.` - await this.opts.events.record({ - kind: 'error', - threadId, - turnId, - message, - code: 'turn_step_limit_exceeded', - severity: 'error' - }) - await this.opts.turns.applyItem( - threadId, - makeErrorItem({ - id: this.opts.ids.next('item_error'), - turnId, - threadId, - message, - code: 'turn_step_limit_exceeded', - severity: 'error' - }) - ) - return 'failed' - } await this.drainSteering(threadId, turnId, signal) const stepResult = await this.modelStep(threadId, turnId, signal, step) if (stepResult === 'stop') return 'completed' @@ -1466,6 +1441,7 @@ export class AgentLoop { ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}), approvalPolicy, sandboxMode, + ...(this.opts.runtimeDataDir ? { runtimeDataDir: this.opts.runtimeDataDir } : {}), abortSignal: signal, awaitApproval: async () => 'allow', ...(userInputDisabled @@ -1509,6 +1485,10 @@ export class AgentLoop { activeSkillIds: skillResolution.activeSkillIds, skillInjectionBytes: skillResolution.injectedBytes, injectedMemoryIds: memories.map((memory) => memory.id), + injectedMemorySummaries: memories.map((memory) => ({ + id: memory.id, + content: memoryPreview(memory.content) + })), toolCatalogFingerprint: toolCatalog.fingerprint, toolCatalogToolCount: toolCatalog.toolCount, toolCatalogDrift: toolCatalogDrift.kind !== 'none' @@ -2244,6 +2224,7 @@ export class AgentLoop { ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}), approvalPolicy: input.approvalPolicy, sandboxMode: input.sandboxMode, + ...(this.opts.runtimeDataDir ? { runtimeDataDir: this.opts.runtimeDataDir } : {}), abortSignal: input.signal, awaitApproval: async (approval) => { await this.opts.events.record({ diff --git a/kun/src/loop/steering-queue.ts b/kun/src/loop/steering-queue.ts index da1959e72..928ba7626 100644 --- a/kun/src/loop/steering-queue.ts +++ b/kun/src/loop/steering-queue.ts @@ -4,8 +4,14 @@ * as user inputs at the next safe loop boundary. The queue is cleared * on turn completion or interruption. */ +export type SteeringEntry = { + text: string + displayText?: string + messageSource?: 'background_shell' +} + export class SteeringQueue { - private readonly buffer: string[] = [] + private readonly buffer: SteeringEntry[] = [] private turnId: string | null = null setTurn(turnId: string | null): void { @@ -15,14 +21,18 @@ export class SteeringQueue { this.turnId = turnId } - enqueue(turnId: string, text: string): void { + enqueue(turnId: string, entry: SteeringEntry): void { if (this.turnId !== turnId) { this.buffer.length = 0 this.turnId = turnId } - const trimmed = text.trim() - if (!trimmed) return - this.buffer.push(trimmed) + const text = entry.text.trim() + if (!text) return + this.buffer.push({ + text, + ...(entry.displayText?.trim() ? { displayText: entry.displayText.trim() } : {}), + ...(entry.messageSource ? { messageSource: entry.messageSource } : {}) + }) } /** @@ -30,7 +40,7 @@ export class SteeringQueue { * this at safe boundaries (after a model response, before the next * model request). Returns an empty array when nothing is pending. */ - drain(): string[] { + drain(): SteeringEntry[] { if (this.buffer.length === 0) return [] const out = [...this.buffer] this.buffer.length = 0 @@ -41,7 +51,7 @@ export class SteeringQueue { * Peek at the queued text without removing it. Used by the UI to * show pending steering in a "pending injection" indicator. */ - peek(): string[] { + peek(): SteeringEntry[] { return [...this.buffer] } diff --git a/kun/src/loop/token-economy.ts b/kun/src/loop/token-economy.ts index 21c0f4877..fc3272ae8 100644 --- a/kun/src/loop/token-economy.ts +++ b/kun/src/loop/token-economy.ts @@ -406,10 +406,12 @@ function compactToolOutput(toolName: string, output: unknown): unknown { } function compactBashOutput(output: JsonRecord): JsonRecord { + const hasExternalOutput = + Boolean(output.full_output_path) || Boolean(output.output_file) return { ...output, output: typeof output.output === 'string' - ? compactCommandOutput(output.output, Boolean(output.full_output_path)) + ? compactCommandOutput(output.output, hasExternalOutput) : output.output } } diff --git a/kun/src/memory/memory-store.ts b/kun/src/memory/memory-store.ts index 04f95c8c5..1134ab92b 100644 --- a/kun/src/memory/memory-store.ts +++ b/kun/src/memory/memory-store.ts @@ -13,7 +13,7 @@ export interface MemoryStore { create(input: MemoryCreateRequest): Promise update(id: string, patch: MemoryUpdateRequest, access?: MemoryAccess): Promise delete(id: string, access?: MemoryAccess): Promise - list(filter?: { workspace?: string; includeDeleted?: boolean }): Promise + list(filter?: { workspace?: string; includeDeleted?: boolean; all?: boolean }): Promise retrieve(input: { query: string; workspace?: string; limit: number }): Promise diagnostics(): Promise setLastInjected(ids: string[]): void @@ -84,11 +84,11 @@ export class FileMemoryStore implements MemoryStore { return next } - async list(filter: { workspace?: string; includeDeleted?: boolean } = {}): Promise { + async list(filter: { workspace?: string; includeDeleted?: boolean; all?: boolean } = {}): Promise { const records = await this.readAll() return records .filter((record) => filter.includeDeleted || !record.deletedAt) - .filter((record) => inScope(record, filter.workspace)) + .filter((record) => filter.all || inScope(record, filter.workspace)) .sort((a, b) => b.updatedAt.localeCompare(a.updatedAt)) } diff --git a/kun/src/ports/tool-host.ts b/kun/src/ports/tool-host.ts index 2dd1669be..95765cc56 100644 --- a/kun/src/ports/tool-host.ts +++ b/kun/src/ports/tool-host.ts @@ -91,6 +91,8 @@ export type ToolHostContext = { approvalPolicy: ApprovalPolicy /** Filesystem/command sandbox selected for this turn. Defaults at execution time for old callers. */ sandboxMode?: SandboxMode + /** Kun runtime data root; used to allow sandbox-safe reads of background shell output files. */ + runtimeDataDir?: string abortSignal: AbortSignal /** Resolves a pending approval with the user's decision. */ awaitApproval: (approval: ApprovalRequest) => Promise<'allow' | 'deny'> diff --git a/kun/src/server/routes/background-shells.ts b/kun/src/server/routes/background-shells.ts new file mode 100644 index 000000000..0c519f357 --- /dev/null +++ b/kun/src/server/routes/background-shells.ts @@ -0,0 +1,42 @@ +import type { BackgroundShellRuntime } from '../../services/background-shell-runtime.js' +import { jsonResponse, type JsonResponse } from '../response.js' +import { ERRORS } from './runtime-error.js' + +export async function backgroundShellList( + runtime: BackgroundShellRuntime | undefined, + request: Request +): Promise { + if (!runtime) { + return jsonResponse({ sessions: [], running: 0 }) + } + const url = new URL(request.url) + const threadId = url.searchParams.get('thread_id') ?? undefined + const sessions = runtime.listSessions(threadId) + return jsonResponse({ + sessions, + running: sessions.filter((session) => session.status === 'running').length + }) +} + +export async function backgroundShellGet( + runtime: BackgroundShellRuntime | undefined, + sessionId: string +): Promise { + if (!runtime) return ERRORS.unavailable('background shell runtime is unavailable') + if (!sessionId.trim()) return ERRORS.validation('sessionId is required', []) + const session = runtime.getSession(sessionId) + if (!session) return ERRORS.notFound(`background shell not found: ${sessionId}`) + return jsonResponse(session) +} + +export async function backgroundShellStop( + runtime: BackgroundShellRuntime | undefined, + sessionId: string +): Promise { + if (!runtime) return ERRORS.unavailable('background shell runtime is unavailable') + if (!sessionId.trim()) return ERRORS.validation('sessionId is required', []) + const stopped = await runtime.stopSession(sessionId) + return jsonResponse({ sessionId, stopped }) +} + +export { ERRORS as BackgroundShellErrors } diff --git a/kun/src/server/routes/index.ts b/kun/src/server/routes/index.ts index 11cf12a17..51d59671e 100644 --- a/kun/src/server/routes/index.ts +++ b/kun/src/server/routes/index.ts @@ -51,6 +51,11 @@ import { delegationDiagnostics, delegationProfiles } from './delegation.js' +import { + backgroundShellGet, + backgroundShellList, + backgroundShellStop +} from './background-shells.js' import { isAuthorized, bearerToken } from '../auth.js' import { ERRORS } from './runtime-error.js' import type { ServerRuntime } from './server-runtime.js' @@ -150,6 +155,18 @@ export function buildRouter(runtime: ServerRuntime): Router { if (!authorize(request, runtime)) return ERRORS.unauthorized() return delegationAbort(runtime.delegationRuntime, ctx.params.childId) }) + router.add('GET', '/v1/background-shells', async (request) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return backgroundShellList(runtime.backgroundShellRuntime, request) + }) + router.add('GET', '/v1/background-shells/:sessionId', async (request, ctx) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return backgroundShellGet(runtime.backgroundShellRuntime, ctx.params.sessionId) + }) + router.add('POST', '/v1/background-shells/:sessionId/stop', async (request, ctx) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return backgroundShellStop(runtime.backgroundShellRuntime, ctx.params.sessionId) + }) router.add('GET', '/v1/workspace/status', async (request) => { if (!authorize(request, runtime)) return ERRORS.unauthorized() const url = new URL(request.url) diff --git a/kun/src/server/routes/memory.ts b/kun/src/server/routes/memory.ts index 2ca8132dc..2d4a7cee0 100644 --- a/kun/src/server/routes/memory.ts +++ b/kun/src/server/routes/memory.ts @@ -10,7 +10,8 @@ export async function listMemories(store: MemoryStore | undefined, request: Requ return jsonResponse({ memories: await store.list({ workspace: url.searchParams.get('workspace') ?? undefined, - includeDeleted: url.searchParams.get('include_deleted') === 'true' + includeDeleted: url.searchParams.get('include_deleted') === 'true', + all: url.searchParams.get('all') === 'true' }) }) } diff --git a/kun/src/server/routes/server-runtime.ts b/kun/src/server/routes/server-runtime.ts index 74ca31329..b7f905f96 100644 --- a/kun/src/server/routes/server-runtime.ts +++ b/kun/src/server/routes/server-runtime.ts @@ -27,6 +27,7 @@ import type { MemoryDiagnostics } from '../../contracts/memory.js' import type { MemoryStore } from '../../memory/memory-store.js' import type { ReviewTarget } from '../../contracts/review.js' import type { DelegationRuntime } from '../../delegation/delegation-runtime.js' +import type { BackgroundShellRuntime } from '../../services/background-shell-runtime.js' import type { ModelClient } from '../../ports/model-client.js' import type { RolesConfig } from '../../config/kun-config.js' import type { ImmutablePrefix } from '../../cache/immutable-prefix.js' @@ -71,6 +72,7 @@ export type ServerRuntime = { * listing. Optional so test scaffolds can omit it. */ delegationRuntime?: DelegationRuntime + backgroundShellRuntime?: BackgroundShellRuntime /** * Default ModelClient + model id for one-shot completions outside the * agent loop (e.g. AI-generated subagent profiles). Optional so test diff --git a/kun/src/server/routes/turns.ts b/kun/src/server/routes/turns.ts index 7b438f829..af113d5e4 100644 --- a/kun/src/server/routes/turns.ts +++ b/kun/src/server/routes/turns.ts @@ -53,7 +53,13 @@ export async function steerTurn( if (!parsed.success) { return ERRORS.validation('invalid steer turn body', parsed.error.issues) } - await turns.steerTurn({ threadId, turnId, text: parsed.data.text }) + await turns.steerTurn({ + threadId, + turnId, + text: parsed.data.text, + ...(parsed.data.displayText ? { displayText: parsed.data.displayText } : {}), + ...(parsed.data.messageSource ? { messageSource: parsed.data.messageSource } : {}) + }) return jsonResponse({ ok: true }) } diff --git a/kun/src/server/runtime-factory.ts b/kun/src/server/runtime-factory.ts index a493fdfb7..761116428 100644 --- a/kun/src/server/runtime-factory.ts +++ b/kun/src/server/runtime-factory.ts @@ -78,6 +78,10 @@ import { resolveConfiguredHooks, type HooksConfig } from '../hooks/hook-config.j import { FileMemoryStore } from '../memory/memory-store.js' import { DelegationRuntime, FileDelegationStore } from '../delegation/delegation-runtime.js' import { createChildAgentExecutor } from '../delegation/child-agent-executor.js' +import { BackgroundShellRuntime } from '../services/background-shell-runtime.js' +import { stopBashSessionById, createBashLocalTool } from '../adapters/tool/builtin-bash-tool.js' +import { createBackgroundShellTool } from '../adapters/tool/background-shell-tool.js' +import type { LocalTool } from '../adapters/tool/local-tool-host.js' export type KunServeRuntimeOptions = { host: string @@ -237,6 +241,28 @@ export async function createKunServeRuntime( ids, nowIso }) + const backgroundShellRuntime = new BackgroundShellRuntime({ + events, + threadStore, + turns: turnService, + nowIso + }) + backgroundShellRuntime.bindStopHandler(stopBashSessionById) + const backgroundShellTool = createBackgroundShellTool({ + listBackgroundSessions: (threadId) => backgroundShellRuntime.listSessions(threadId) + }) + const withBackgroundShellTools = (tools: LocalTool[]): LocalTool[] => { + const mapped = tools.map((tool) => + tool.name === 'bash' + ? createBashLocalTool({ + backgroundShell: backgroundShellRuntime.bashHooks(), + backgroundShellDataDir: options.dataDir + }) + : tool + ) + const withoutBackgroundShell = mapped.filter((tool) => tool.name !== 'background_shell') + return [...withoutBackgroundShell, backgroundShellTool] + } const reviewService = new ReviewService({ threadStore, turns: turnService, @@ -281,7 +307,7 @@ export async function createKunServeRuntime( kind: 'built-in' as const, enabled: true, available: true, - tools: buildDefaultLocalTools() + tools: withBackgroundShellTools(buildDefaultLocalTools()) }, ...mcpProviders.providers, ...webProviders.providers, @@ -496,6 +522,7 @@ export async function createKunServeRuntime( ...(resolvedHooks.length ? { hooks: resolvedHooks } : {}), ...(attachmentStore ? { attachmentStore } : {}), ...(memoryStore ? { memoryStore } : {}), + runtimeDataDir: options.dataDir, onPlanWritten: async ({ threadId, planId, relativePath, markdown }) => { await threadService.syncTodosFromPlan(threadId, { planId, @@ -505,6 +532,9 @@ export async function createKunServeRuntime( }) } }) + backgroundShellRuntime.bindAgentLoop({ + runTurn: (threadId, turnId) => loop.runTurn(threadId, turnId) + }) const startedAt = options.startedAt ?? nowIso() return { threadService, @@ -522,6 +552,7 @@ export async function createKunServeRuntime( ...(attachmentStore ? { attachmentStore } : {}), ...(memoryStore ? { memoryStore } : {}), ...(delegationRuntime ? { delegationRuntime } : {}), + backgroundShellRuntime, modelClient, defaultModel: options.model, ...(options.roles ? { roles: options.roles } : {}), @@ -539,21 +570,32 @@ export async function createKunServeRuntime( insecure: options.insecure, allocateSeq, nowIso, - info: () => ({ - host: options.host, - port: options.port, - configPath: options.configPath, - dataDir: options.dataDir, - model: options.model, - endpointFormat: options.endpointFormat ?? DEFAULT_MODEL_ENDPOINT_FORMAT, - approvalPolicy: options.approvalPolicy, - sandboxMode: options.sandboxMode, - tokenEconomyMode: options.tokenEconomyMode, - insecure: options.insecure, - startedAt, - pid: process.pid, - capabilities - }), + info: () => { + const memory = process.memoryUsage() + const peakRssBytes = Math.max(memory.rss, process.resourceUsage().maxRSS * 1024) + return { + host: options.host, + port: options.port, + configPath: options.configPath, + dataDir: options.dataDir, + model: options.model, + endpointFormat: options.endpointFormat ?? DEFAULT_MODEL_ENDPOINT_FORMAT, + approvalPolicy: options.approvalPolicy, + sandboxMode: options.sandboxMode, + tokenEconomyMode: options.tokenEconomyMode, + insecure: options.insecure, + startedAt, + pid: process.pid, + memoryUsage: { + rssBytes: memory.rss, + peakRssBytes, + heapUsedBytes: memory.heapUsed, + heapTotalBytes: memory.heapTotal, + externalBytes: memory.external + }, + capabilities + } + }, toolDiagnostics: async () => ({ providers: registry.diagnostics(), mcpServers: mcpProviders.diagnostics, diff --git a/kun/src/services/background-shell-notice.ts b/kun/src/services/background-shell-notice.ts new file mode 100644 index 000000000..aabdb3728 --- /dev/null +++ b/kun/src/services/background-shell-notice.ts @@ -0,0 +1,74 @@ +import type { BackgroundShellRecord } from '../contracts/background-shell.js' + +export type BackgroundShellCompletionNotice = { + sessionId: string + command: string + exitCode: number + outputPreview: string + hint: string +} + +function escapeXml(text: string): string { + return text + .replace(/&/g, '&') + .replace(//g, '>') + .replace(/"/g, '"') +} + +function unescapeXml(text: string): string { + return text + .replace(/"/g, '"') + .replace(/>/g, '>') + .replace(/</g, '<') + .replace(/&/g, '&') +} + +function summarizeOutput(output: string, max = 400): string { + const trimmed = output.trim() + if (trimmed.length <= max) return trimmed + return `${trimmed.slice(0, max)}…` +} + +function readXmlTag(xml: string, tag: string): string | null { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)) + if (!match) return null + return unescapeXml(match[1].trim()) +} + +export function formatBackgroundShellCompletionNotice(record: BackgroundShellRecord): string { + const sessionId = record.id + const outputPreview = summarizeOutput(record.output) || '(empty)' + const hint = record.outputFilePath + ? `Full output is saved at ${record.outputFilePath}. Use background_shell action="read" with session_id="${sessionId}" for a fresh summary.` + : `Use background_shell action="read" with session_id="${sessionId}" to inspect the full output.` + const lines = [ + '', + `${escapeXml(sessionId)}`, + `${escapeXml(record.command)}`, + `${record.exitCode ?? 0}`, + `${escapeXml(outputPreview)}`, + ...(record.outputFilePath ? [`${escapeXml(record.outputFilePath)}`] : []), + `${escapeXml(hint)}`, + '' + ] + return lines.join('\n') +} + +export function parseBackgroundShellCompletionNotice(text: string): BackgroundShellCompletionNotice | null { + const trimmed = text.trim() + if (!trimmed.includes('')) return null + const sessionId = readXmlTag(trimmed, 'session_id') + const command = readXmlTag(trimmed, 'command') + const exitCodeRaw = readXmlTag(trimmed, 'exit_code') + const outputPreview = readXmlTag(trimmed, 'output_preview') + const hint = readXmlTag(trimmed, 'hint') + if (!sessionId || !command || exitCodeRaw === null || outputPreview === null || !hint) return null + const exitCode = Number.parseInt(exitCodeRaw, 10) + if (!Number.isFinite(exitCode)) return null + return { sessionId, command, exitCode, outputPreview, hint } +} + +export function backgroundShellNoticeDisplayText(sessionId: string): string { + return `Background shell ${sessionId} completed` +} diff --git a/kun/src/services/background-shell-output.ts b/kun/src/services/background-shell-output.ts new file mode 100644 index 000000000..66fd6a0cd --- /dev/null +++ b/kun/src/services/background-shell-output.ts @@ -0,0 +1,137 @@ +import { mkdir, readFile, writeFile } from 'node:fs/promises' +import { createWriteStream, type WriteStream } from 'node:fs' +import { isAbsolute, join, relative, resolve, sep } from 'node:path' + +/** Shared per-thread folder for all background shell logs (alongside messages.jsonl). */ +export const BACKGROUND_SHELL_OUTPUT_SUBDIR = 'background-shells' +export const DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS = 10_000 +export const BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE = + '\n[background shell output truncated; use output_file for the full log]' + +export type BackgroundShellOutputPaths = { + outputDir: string + outputFilePath: string +} + +export type BackgroundShellOutputSummary = { + summary: string + truncated: boolean + totalChars: number +} + +export function resolveBackgroundShellOutputDir(dataDir: string, threadId: string): string { + return join(resolve(dataDir, 'threads', threadId), BACKGROUND_SHELL_OUTPUT_SUBDIR) +} + +export function resolveBackgroundShellOutputPaths( + dataDir: string, + threadId: string, + sessionId: string +): BackgroundShellOutputPaths { + const outputDir = resolveBackgroundShellOutputDir(dataDir, threadId) + const outputFilePath = resolve(outputDir, `${sessionId}.output`) + return { outputDir, outputFilePath } +} + +export function isBackgroundShellOutputPath( + absolutePath: string, + options: { runtimeDataDir?: string; threadId?: string } +): boolean { + const dataDir = options.runtimeDataDir?.trim() + if (!dataDir) return false + const normalized = resolve(absolutePath) + const threadId = options.threadId?.trim() + if (threadId) { + const dir = resolveBackgroundShellOutputDir(dataDir, threadId) + if (!normalized.startsWith(`${dir}${sep}`) && normalized !== dir) return false + return normalized.endsWith('.output') + } + const threadsRoot = resolve(dataDir, 'threads') + const rel = relative(threadsRoot, normalized) + if (!rel || rel.startsWith('..') || isAbsolute(rel)) return false + const parts = rel.split(sep) + return parts.length === 3 && parts[1] === BACKGROUND_SHELL_OUTPUT_SUBDIR && parts[2]?.endsWith('.output') === true +} + +export function summarizeBackgroundShellOutput( + fullOutput: string, + maxChars = DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS +): BackgroundShellOutputSummary { + const chars = [...fullOutput] + const totalChars = chars.length + if (totalChars <= maxChars) { + return { summary: fullOutput, truncated: false, totalChars } + } + const noticeChars = [...BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE].length + const bodyBudget = Math.max(1, maxChars - noticeChars) + const body = chars.slice(-bodyBudget).join('') + return { + summary: `${body}${BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE}`, + truncated: true, + totalChars + } +} + +export async function readBackgroundShellOutputSummary( + outputFilePath: string, + maxChars = DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS +): Promise { + try { + const full = await readFile(outputFilePath, 'utf-8') + return summarizeBackgroundShellOutput(full, maxChars) + } catch { + return { summary: '', truncated: false, totalChars: 0 } + } +} + +export class BackgroundShellOutputWriter { + private stream: WriteStream | undefined + private closed = false + + readonly paths: BackgroundShellOutputPaths + + constructor(dataDir: string, threadId: string, sessionId: string) { + this.paths = resolveBackgroundShellOutputPaths(dataDir, threadId, sessionId) + } + + async open(): Promise { + await mkdir(this.paths.outputDir, { recursive: true }) + await writeFile(this.paths.outputFilePath, '', 'utf-8') + this.stream = createWriteStream(this.paths.outputFilePath, { flags: 'a' }) + } + + append(chunk: Buffer | string): void { + if (this.closed) return + if (!this.stream) { + throw new Error('background shell output writer is not open') + } + this.stream.write(chunk) + } + + async close(): Promise { + if (this.closed) return + this.closed = true + if (!this.stream) { + await mkdir(this.paths.outputDir, { recursive: true }) + await writeFile(this.paths.outputFilePath, '', 'utf-8') + return + } + const stream = this.stream + this.stream = undefined + await new Promise((resolvePromise, reject) => { + stream.once('finish', resolvePromise) + stream.once('error', reject) + stream.end() + }) + } + + async buildReturnFields( + maxChars = DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS + ): Promise { + const summary = await readBackgroundShellOutputSummary(this.paths.outputFilePath, maxChars) + return { + ...summary, + output_file: this.paths.outputFilePath + } + } +} diff --git a/kun/src/services/background-shell-runtime.ts b/kun/src/services/background-shell-runtime.ts new file mode 100644 index 000000000..63ad93eab --- /dev/null +++ b/kun/src/services/background-shell-runtime.ts @@ -0,0 +1,221 @@ +import type { BackgroundShellRecord, BackgroundShellStatus } from '../contracts/background-shell.js' +import type { RuntimeEventRecorder } from './runtime-event-recorder.js' +import type { ThreadStore } from '../ports/thread-store.js' +import type { TurnService } from './turn-service.js' +import type { BackgroundShellHooks } from '../adapters/tool/builtin-tool-types.js' +import { + backgroundShellNoticeDisplayText, + formatBackgroundShellCompletionNotice +} from './background-shell-notice.js' + +export type BackgroundShellRuntimeDeps = { + events: RuntimeEventRecorder + threadStore: ThreadStore + turns: TurnService + nowIso: () => string +} + +type RunTurnFn = (threadId: string, turnId: string) => Promise + +export class BackgroundShellRuntime { + private readonly sessions = new Map() + private readonly detachedIds = new Set() + private runTurn: RunTurnFn | null = null + + constructor(private readonly deps: BackgroundShellRuntimeDeps) {} + + bindAgentLoop(input: { runTurn: RunTurnFn }): void { + this.runTurn = input.runTurn + } + + bashHooks(): BackgroundShellHooks { + return { + onSessionStarted: (record) => this.handleSessionStarted(record), + onSessionUpdated: (record) => this.handleSessionUpdated(record), + onSessionSettled: (record) => this.handleSessionSettled(record), + isDetachedSession: (sessionId) => this.detachedIds.has(sessionId) + } + } + + listSessions(threadId?: string): BackgroundShellRecord[] { + const all = [...this.sessions.values()] + const filtered = threadId ? all.filter((session) => session.threadId === threadId) : all + return filtered.sort((a, b) => b.startedAt.localeCompare(a.startedAt)) + } + + getSession(sessionId: string): BackgroundShellRecord | null { + return this.sessions.get(sessionId) ?? null + } + + private stopHandler: ((sessionId: string) => Promise) | null = null + + bindStopHandler(handler: (sessionId: string) => Promise): void { + this.stopHandler = handler + } + + async stopSession(sessionId: string): Promise { + if (!this.stopHandler) return false + return this.stopHandler(sessionId) + } + + markDetached(sessionId: string): void { + this.detachedIds.add(sessionId) + } + + unmarkDetached(sessionId: string): void { + this.detachedIds.delete(sessionId) + } + + upsertSession(record: BackgroundShellRecord): void { + this.sessions.set(record.id, record) + } + + removeSession(sessionId: string): void { + this.sessions.delete(sessionId) + this.detachedIds.delete(sessionId) + } + + private sessionEventOutput(record: BackgroundShellRecord): { + output: string + outputTruncated?: true + outputFilePath?: string + } { + return { + output: record.output, + ...(record.outputTruncated ? { outputTruncated: true as const } : {}), + ...(record.outputFilePath ? { outputFilePath: record.outputFilePath } : {}) + } + } + + private async handleSessionStarted(record: BackgroundShellRecord): Promise { + this.sessions.set(record.id, record) + if (record.detached) this.detachedIds.add(record.id) + await this.deps.events.record({ + kind: 'bash_session_started', + threadId: record.threadId, + turnId: record.turnId, + sessionId: record.id, + command: record.command, + cwd: record.cwd, + shell: record.shell, + status: record.status, + startedAt: record.startedAt, + detached: record.detached, + ...this.sessionEventOutput(record) + }) + } + + private async handleSessionUpdated(record: BackgroundShellRecord): Promise { + this.sessions.set(record.id, record) + await this.deps.events.record({ + kind: 'bash_session_updated', + threadId: record.threadId, + turnId: record.turnId, + sessionId: record.id, + command: record.command, + cwd: record.cwd, + shell: record.shell, + status: record.status, + startedAt: record.startedAt, + ...(record.finishedAt ? { finishedAt: record.finishedAt } : {}), + exitCode: record.exitCode, + detached: record.detached, + ...this.sessionEventOutput(record), + ...(record.error ? { error: record.error } : {}) + }) + } + + private async handleSessionSettled(record: BackgroundShellRecord): Promise { + this.sessions.set(record.id, record) + await this.deps.events.record({ + kind: 'bash_session_completed', + threadId: record.threadId, + turnId: record.turnId, + sessionId: record.id, + command: record.command, + cwd: record.cwd, + shell: record.shell, + status: record.status, + startedAt: record.startedAt, + ...(record.finishedAt ? { finishedAt: record.finishedAt } : {}), + exitCode: record.exitCode, + detached: record.detached, + ...this.sessionEventOutput(record), + ...(record.error ? { error: record.error } : {}) + }) + if (record.detached && record.status === 'completed' && record.exitCode === 0) { + await this.notifyAgent(record) + } + if (record.status !== 'running') { + this.detachedIds.delete(record.id) + } + } + + private async notifyAgent(record: BackgroundShellRecord): Promise { + const thread = await this.deps.threadStore.get(record.threadId) + if (!thread) return + const notice = formatBackgroundShellCompletionNotice(record) + const displayText = backgroundShellNoticeDisplayText(record.id) + const noticeMeta = { + displayText, + messageSource: 'background_shell' as const + } + if (thread.status === 'running') { + const runningTurn = [...thread.turns].reverse().find((turn) => turn.status === 'running') + if (runningTurn) { + await this.deps.turns.steerTurn({ + threadId: record.threadId, + turnId: runningTurn.id, + text: notice, + ...noticeMeta + }) + return + } + } + if (!this.runTurn) return + const started = await this.deps.turns.startTurn({ + threadId: record.threadId, + request: { + prompt: notice, + ...noticeMeta + } + }) + void this.runTurn(record.threadId, started.turnId) + } +} + +export function toBackgroundShellRecord(input: { + id: string + threadId: string + turnId: string + command: string + cwd: string + shell: string + status: BackgroundShellStatus + startedAt: string + finishedAt?: string + exitCode: number | null + output: string + outputTruncated?: boolean + outputFilePath?: string + error?: string + detached: boolean +}): BackgroundShellRecord { + return { + id: input.id, + threadId: input.threadId, + turnId: input.turnId, + command: input.command, + cwd: input.cwd, + shell: input.shell, + status: input.status, + startedAt: input.startedAt, + ...(input.finishedAt ? { finishedAt: input.finishedAt } : {}), + exitCode: input.exitCode, + output: input.output, + ...(input.outputTruncated ? { outputTruncated: true } : {}), + ...(input.outputFilePath ? { outputFilePath: input.outputFilePath } : {}), + ...(input.error ? { error: input.error } : {}), + detached: input.detached + } +} diff --git a/kun/src/services/thread-service.ts b/kun/src/services/thread-service.ts index 796c50620..245282936 100644 --- a/kun/src/services/thread-service.ts +++ b/kun/src/services/thread-service.ts @@ -749,6 +749,7 @@ function rebuildTurnsFromItems(input: { attachmentIds: [], activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], createdAt: input.now, finishedAt: input.now, items: [] @@ -767,6 +768,7 @@ function rebuildTurnsFromItems(input: { attachmentIds: attachmentIdsFromItems(items), activeSkillIds: [], injectedMemoryIds: [], + injectedMemorySummaries: [], createdAt: items[0]?.createdAt ?? input.now, finishedAt: input.now, items diff --git a/kun/src/services/turn-service.ts b/kun/src/services/turn-service.ts index 8d9c1eed6..36c750efa 100644 --- a/kun/src/services/turn-service.ts +++ b/kun/src/services/turn-service.ts @@ -88,6 +88,7 @@ export class TurnService { threadId: input.threadId, text: input.request.prompt, displayText: input.request.displayText, + messageSource: input.request.messageSource, attachmentIds: input.request.attachmentIds ?? [], fileReferences: input.request.fileReferences ?? [], workspaceCheckpointId: input.request.workspaceCheckpointId @@ -156,13 +157,25 @@ export class TurnService { } } - async steerTurn(input: { threadId: string; turnId: string; text: string }): Promise { - this.deps.steering.enqueue(input.turnId, input.text) + async steerTurn(input: { + threadId: string + turnId: string + text: string + displayText?: string + messageSource?: 'background_shell' + }): Promise { + this.deps.steering.enqueue(input.turnId, { + text: input.text, + ...(input.displayText ? { displayText: input.displayText } : {}), + ...(input.messageSource ? { messageSource: input.messageSource } : {}) + }) await this.deps.events.record({ kind: 'turn_steered', threadId: input.threadId, turnId: input.turnId, - text: input.text + text: input.text, + ...(input.displayText ? { displayText: input.displayText } : {}), + ...(input.messageSource ? { messageSource: input.messageSource } : {}) }) } @@ -446,6 +459,7 @@ export class TurnService { Partial, | 'activeSkillIds' | 'injectedMemoryIds' + | 'injectedMemorySummaries' | 'skillInjectionBytes' | 'toolCatalogFingerprint' | 'toolCatalogToolCount' @@ -460,6 +474,9 @@ export class TurnService { ...turn, ...(patch.activeSkillIds ? { activeSkillIds: [...patch.activeSkillIds] } : {}), ...(patch.injectedMemoryIds ? { injectedMemoryIds: [...patch.injectedMemoryIds] } : {}), + ...(patch.injectedMemorySummaries + ? { injectedMemorySummaries: [...patch.injectedMemorySummaries] } + : {}), ...(patch.skillInjectionBytes !== undefined ? { skillInjectionBytes: patch.skillInjectionBytes } : {}), ...(patch.toolCatalogFingerprint ? { toolCatalogFingerprint: patch.toolCatalogFingerprint } : {}), ...(patch.toolCatalogToolCount !== undefined ? { toolCatalogToolCount: patch.toolCatalogToolCount } : {}), diff --git a/kun/src/shared/memory-preview.ts b/kun/src/shared/memory-preview.ts new file mode 100644 index 000000000..aa5e866ad --- /dev/null +++ b/kun/src/shared/memory-preview.ts @@ -0,0 +1,5 @@ +export function memoryPreview(content: string, maxLength = 200): string { + const compact = content.replace(/\s+/g, ' ').trim() + if (compact.length <= maxLength) return compact + return `${compact.slice(0, maxLength).trimEnd()}...` +} diff --git a/kun/tests/background-shell-notice.test.ts b/kun/tests/background-shell-notice.test.ts new file mode 100644 index 000000000..32b3d7c5e --- /dev/null +++ b/kun/tests/background-shell-notice.test.ts @@ -0,0 +1,77 @@ +import { describe, expect, it } from 'vitest' +import { + backgroundShellNoticeDisplayText, + formatBackgroundShellCompletionNotice, + parseBackgroundShellCompletionNotice +} from '../src/services/background-shell-notice.js' + +describe('background-shell-notice', () => { + it('formats and parses completion notices as xml', () => { + const xml = formatBackgroundShellCompletionNotice({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm run build', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'build ok', + detached: true + }) + expect(xml).toContain('') + expect(xml).toContain('abcd1234') + expect(xml).toContain('npm run build') + expect(parseBackgroundShellCompletionNotice(xml)).toEqual({ + sessionId: 'abcd1234', + command: 'npm run build', + exitCode: 0, + outputPreview: 'build ok', + hint: expect.stringContaining('background_shell action="read"') + }) + }) + + it('escapes xml characters in command and output preview', () => { + const xml = formatBackgroundShellCompletionNotice({ + id: 'sess1', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'echo "&"', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: 0, + output: '', + detached: true + }) + expect(xml).toContain('echo "<tag>&"') + expect(parseBackgroundShellCompletionNotice(xml)?.command).toBe('echo "&"') + expect(parseBackgroundShellCompletionNotice(xml)?.outputPreview).toBe('') + }) + + it('includes the output file path in completion notices when available', () => { + const xml = formatBackgroundShellCompletionNotice({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm run build', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: 0, + output: 'ok', + outputFilePath: '/data/threads/thr_1/background-shells/abcd1234.output', + detached: true + }) + expect(xml).toContain('/data/threads/thr_1/background-shells/abcd1234.output') + expect(xml).not.toContain('') + }) + + it('builds a short display label for the renderer', () => { + expect(backgroundShellNoticeDisplayText('abcd1234')).toBe('Background shell abcd1234 completed') + }) +}) diff --git a/kun/tests/background-shell-output.test.ts b/kun/tests/background-shell-output.test.ts new file mode 100644 index 000000000..30019cd9b --- /dev/null +++ b/kun/tests/background-shell-output.test.ts @@ -0,0 +1,79 @@ +import { mkdtemp, readFile, rm } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join, resolve } from 'node:path' +import { afterEach, describe, expect, it } from 'vitest' +import { + BACKGROUND_SHELL_OUTPUT_SUBDIR, + BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE, + BackgroundShellOutputWriter, + DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS, + isBackgroundShellOutputPath, + readBackgroundShellOutputSummary, + resolveBackgroundShellOutputPaths, + summarizeBackgroundShellOutput +} from '../src/services/background-shell-output.js' + +describe('background-shell-output', () => { + let tempDir = '' + + afterEach(async () => { + if (tempDir) await rm(tempDir, { recursive: true, force: true }) + }) + + it('stores all session logs under one thread-scoped folder', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const first = resolveBackgroundShellOutputPaths(tempDir, 'thr_1', 'aaaa1111') + const second = resolveBackgroundShellOutputPaths(tempDir, 'thr_1', 'bbbb2222') + expect(first.outputDir).toBe(second.outputDir) + expect(first.outputDir).toContain(`${BACKGROUND_SHELL_OUTPUT_SUBDIR}`) + expect(first.outputFilePath.endsWith('aaaa1111.output')).toBe(true) + expect(second.outputFilePath.endsWith('bbbb2222.output')).toBe(true) + expect(resolve(first.outputFilePath)).toBe(first.outputFilePath) + }) + + it('appends a truncation notice to summarized output', () => { + const full = 'x'.repeat(DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS + 50) + const summary = summarizeBackgroundShellOutput(full) + expect(summary.truncated).toBe(true) + expect(summary.summary.endsWith(BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE)).toBe(true) + expect([...summary.summary].length).toBeLessThanOrEqual(DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS) + }) + + it('always creates an output file and summarizes from disk', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const writer = new BackgroundShellOutputWriter(tempDir, 'thr_1', 'sess1234') + await writer.open() + writer.append('hello\n') + writer.append('x'.repeat(DEFAULT_BACKGROUND_SHELL_OUTPUT_SUMMARY_MAX_CHARS + 50)) + const live = await writer.buildReturnFields() + expect(live.output_file).toContain('sess1234.output') + expect(live.truncated).toBe(true) + expect(live.summary).toContain(BACKGROUND_SHELL_OUTPUT_TRUNCATION_NOTICE) + await writer.close() + const persisted = await readFile(live.output_file, 'utf-8') + expect(persisted.startsWith('hello\n')).toBe(true) + const summary = await readBackgroundShellOutputSummary(live.output_file) + expect(summary.truncated).toBe(true) + }) + + it('creates an empty output file even when no bytes were written', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const writer = new BackgroundShellOutputWriter(tempDir, 'thr_1', 'empty01') + await writer.open() + await writer.close() + const fields = await writer.buildReturnFields() + expect(await readFile(fields.output_file, 'utf-8')).toBe('') + expect(summarizeBackgroundShellOutput('').truncated).toBe(false) + }) + + it('recognizes background shell output paths for sandbox read bypass', async () => { + tempDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-output-')) + const { outputFilePath } = resolveBackgroundShellOutputPaths(tempDir, 'thr_1', 'sess1234') + expect( + isBackgroundShellOutputPath(outputFilePath, { runtimeDataDir: tempDir, threadId: 'thr_1' }) + ).toBe(true) + expect(isBackgroundShellOutputPath('/tmp/other.log', { runtimeDataDir: tempDir, threadId: 'thr_1' })).toBe( + false + ) + }) +}) diff --git a/kun/tests/background-shell-runtime.test.ts b/kun/tests/background-shell-runtime.test.ts new file mode 100644 index 000000000..2a45e6357 --- /dev/null +++ b/kun/tests/background-shell-runtime.test.ts @@ -0,0 +1,92 @@ +import { describe, expect, it, vi } from 'vitest' +import type { ThreadStore } from '../src/ports/thread-store.js' +import type { RuntimeEventRecorder } from '../src/services/runtime-event-recorder.js' +import { BackgroundShellRuntime } from '../src/services/background-shell-runtime.js' +import type { TurnService } from '../src/services/turn-service.js' + +describe('BackgroundShellRuntime', () => { + it('steers a running turn when a detached shell completes successfully', async () => { + const steerTurn = vi.fn(async () => undefined) + const startTurn = vi.fn(async () => ({ threadId: 'thr_1', turnId: 'turn_new', userMessageItemId: 'item_1' })) + const runTurn = vi.fn(async () => undefined) + const runtime = new BackgroundShellRuntime({ + events: { record: vi.fn(async () => undefined) } as unknown as RuntimeEventRecorder, + threadStore: { + get: vi.fn(async () => ({ + id: 'thr_1', + status: 'running', + turns: [{ id: 'turn_1', status: 'running' }] + })) + } as unknown as ThreadStore, + turns: { steerTurn, startTurn } as unknown as TurnService, + nowIso: () => '2026-01-01T00:00:00.000Z' + }) + runtime.bindAgentLoop({ runTurn }) + await runtime.bashHooks().onSessionSettled?.({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm test', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'ok', + detached: true + }) + expect(steerTurn).toHaveBeenCalledWith({ + threadId: 'thr_1', + turnId: 'turn_1', + text: expect.stringContaining('abcd1234'), + displayText: 'Background shell abcd1234 completed', + messageSource: 'background_shell' + }) + expect(startTurn).not.toHaveBeenCalled() + expect(runTurn).not.toHaveBeenCalled() + }) + + it('starts a new turn with messageSource when the thread is idle', async () => { + const steerTurn = vi.fn(async () => undefined) + const startTurn = vi.fn(async () => ({ threadId: 'thr_1', turnId: 'turn_new', userMessageItemId: 'item_1' })) + const runTurn = vi.fn(async () => undefined) + const runtime = new BackgroundShellRuntime({ + events: { record: vi.fn(async () => undefined) } as unknown as RuntimeEventRecorder, + threadStore: { + get: vi.fn(async () => ({ + id: 'thr_1', + status: 'idle', + turns: [{ id: 'turn_1', status: 'completed' }] + })) + } as unknown as ThreadStore, + turns: { steerTurn, startTurn } as unknown as TurnService, + nowIso: () => '2026-01-01T00:00:00.000Z' + }) + runtime.bindAgentLoop({ runTurn }) + await runtime.bashHooks().onSessionSettled?.({ + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'npm test', + cwd: '/tmp', + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'ok', + detached: true + }) + expect(startTurn).toHaveBeenCalledWith({ + threadId: 'thr_1', + request: { + prompt: expect.stringContaining(''), + displayText: 'Background shell abcd1234 completed', + messageSource: 'background_shell' + } + }) + expect(runTurn).toHaveBeenCalledWith('thr_1', 'turn_new') + expect(steerTurn).not.toHaveBeenCalled() + }) +}) diff --git a/kun/tests/builtin-tools.test.ts b/kun/tests/builtin-tools.test.ts index a5d3a5716..e8dbaba14 100644 --- a/kun/tests/builtin-tools.test.ts +++ b/kun/tests/builtin-tools.test.ts @@ -43,6 +43,7 @@ import { createLsTool, createLsToolDefinition } from '../src/adapters/tool/builtin-tools.js' +import { createBackgroundShellTool } from '../src/adapters/tool/background-shell-tool.js' import { createReadTool as createReadToolFromModule } from '../src/adapters/tool/read.js' import { createBashTool as createBashToolFromModule } from '../src/adapters/tool/bash.js' import { createEditTool as createEditToolFromModule } from '../src/adapters/tool/edit.js' @@ -92,15 +93,27 @@ async function executeTool( describe('Kun built-in tools', () => { let workspace: string + let backgroundShellDataDir: string let host: LocalToolHost + function createBackgroundBashLocalTool( + options: Parameters[0] = {} + ): ReturnType { + return createBashLocalTool({ + ...options, + backgroundShellDataDir + }) + } + beforeEach(async () => { workspace = await mkdtemp(join(tmpdir(), 'kun-tools-')) + backgroundShellDataDir = await mkdtemp(join(tmpdir(), 'kun-bg-shell-data-')) host = new LocalToolHost({ tools: defaultLocalTools }) }) afterEach(async () => { await rm(workspace, { recursive: true, force: true }) + await rm(backgroundShellDataDir, { recursive: true, force: true }) }) it('advertises the pi-style built-in tool family by default', async () => { @@ -561,89 +574,258 @@ describe('Kun built-in tools', () => { expect(Date.now() - startedAt).toBeLessThan(1500) }) - it('returns a pollable bash session for foreground long-running commands', async () => { + it('blocks foreground bash commands until the process exits', async () => { const startedAt = Date.now() const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 5', - yield_seconds: 1, + command: 'echo ready; sleep 2; echo done', timeout: 10 }) - expect(output.exit_code).toBe(null) - expect(output.status).toBe('running') - expect(typeof output.session_id).toBe('string') + expect(output.exit_code).toBe(0) expect(String(output.output)).toContain('ready') - expect(Date.now() - startedAt).toBeLessThan(2500) + expect(String(output.output)).toContain('done') + expect(output.session_id).toBeUndefined() + expect(Date.now() - startedAt).toBeGreaterThanOrEqual(1800) + }) - const stopped = await executeTool(host, workspace, 'bash', { - action: 'stop', - session_id: String(output.session_id) + it('returns immediately for background bash sessions and keeps running after abort', async () => { + const hooks = { + started: [] as string[], + settled: [] as string[] + } + const backgroundHost = new LocalToolHost({ + tools: [ + createBackgroundBashLocalTool({ + backgroundShell: { + onSessionStarted: async (record) => { + hooks.started.push(record.id) + }, + onSessionSettled: async (record) => { + hooks.settled.push(record.id) + }, + isDetachedSession: (sessionId) => hooks.started.includes(sessionId) + } + }), + createBackgroundShellTool() + ] }) - expect(stopped.status).toBe('stopped') - expect(stopped.stop_sent).toBe(true) + const abortController = new AbortController() + const startedAt = Date.now() + const output = await backgroundHost.execute( + { + callId: 'call_bash_background', + toolName: 'bash', + arguments: { + command: 'echo bg-ready; sleep 5; echo bg-done', + background: true, + timeout: 10 + } + }, + buildContext(workspace, { abortSignal: abortController.signal }) + ) + expect(output.item.kind).toBe('tool_result') + if (output.item.kind !== 'tool_result') throw new Error('expected tool_result') + const payload = output.item.output as Record + expect(payload.status).toBe('running') + expect(typeof payload.session_id).toBe('string') + expect(String(payload.session_id)).toMatch(/^[a-z0-9]{8}$/) + expect(typeof payload.output_file).toBe('string') + expect(String(payload.output_file)).toMatch(/\.output$/) + expect(Date.now() - startedAt).toBeLessThan(500) + expect(hooks.started).toHaveLength(1) + + abortController.abort() + await new Promise((resolve) => setTimeout(resolve, 2500)) + const read = await backgroundHost.execute( + { + callId: 'call_bash_background_read', + toolName: 'background_shell', + arguments: { + action: 'read', + session_id: String(payload.session_id) + } + }, + buildContext(workspace) + ) + expect(read.item.kind).toBe('tool_result') + if (read.item.kind !== 'tool_result') throw new Error('expected tool_result') + const readPayload = read.item.output as Record + expect(readPayload.status).toBe('running') + + await backgroundHost.execute( + { + callId: 'call_bash_background_stop', + toolName: 'background_shell', + arguments: { + action: 'stop', + session_id: String(payload.session_id) + } + }, + buildContext(workspace) + ) + expect(hooks.settled.length).toBeGreaterThanOrEqual(1) }) - it('polls completed bash sessions for final output', async () => { - const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 2; echo done', - yield_seconds: 1, - timeout: 10 + it('polls completed background shell sessions via background_shell', async () => { + const backgroundHost = new LocalToolHost({ + tools: [createBackgroundBashLocalTool(), createBackgroundShellTool()] }) - - expect(output.status).toBe('running') + const started = await backgroundHost.execute( + { + callId: 'call_bash_bg_poll', + toolName: 'bash', + arguments: { + command: 'echo ready; sleep 2; echo done', + background: true, + timeout: 10 + } + }, + buildContext(workspace) + ) + expect(started.item.kind).toBe('tool_result') + if (started.item.kind !== 'tool_result') throw new Error('expected tool_result') + const sessionId = String((started.item.output as { session_id?: string }).session_id) await new Promise((resolve) => setTimeout(resolve, 2500)) - const polled = await executeTool(host, workspace, 'bash', { + const polled = await executeTool(backgroundHost, workspace, 'background_shell', { action: 'poll', - session_id: String(output.session_id) + session_id: sessionId, + yield_seconds: 1 }) expect(polled.status).toBe('completed') expect(polled.exit_code).toBe(0) expect(String(polled.output)).toContain('done') + expect(typeof polled.output_file).toBe('string') }) - it('blocks the poll action for at least yield_seconds while the session keeps running', async () => { - const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 5; echo done', - yield_seconds: 1, - timeout: 10 + it('lists background shell sessions via background_shell', async () => { + const backgroundHost = new LocalToolHost({ + tools: [ + createBackgroundBashLocalTool(), + createBackgroundShellTool({ + listBackgroundSessions: () => [ + { + id: 'abcd1234', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'sleep 10', + cwd: workspace, + shell: 'bash', + status: 'running', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: null, + output: 'running', + detached: true + } + ] + }) + ] }) - expect(output.status).toBe('running') - - const startedAt = Date.now() - const polled = await executeTool(host, workspace, 'bash', { - action: 'poll', - session_id: String(output.session_id), - yield_seconds: 2 + await backgroundHost.execute( + { + callId: 'call_bash_bg', + toolName: 'bash', + arguments: { command: 'echo hi', background: true, timeout: 10 } + }, + buildContext(workspace) + ) + const listed = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'list', + thread_only: false }) - const elapsed = Date.now() - startedAt - expect(elapsed).toBeGreaterThanOrEqual(1800) - expect(polled.status).toBe('running') + expect(listed.running).toBe(1) + expect((listed.sessions as Array<{ session_id?: string }>)?.[0]?.session_id).toBe('abcd1234') + }) - await executeTool(host, workspace, 'bash', { - action: 'stop', - session_id: String(output.session_id) + it('persists full background shell output to the thread record directory', async () => { + const backgroundHost = new LocalToolHost({ + tools: [createBackgroundBashLocalTool(), createBackgroundShellTool()] }) + const started = await backgroundHost.execute( + { + callId: 'call_bash_bg_output_file', + toolName: 'bash', + arguments: { + command: "node -e \"process.stdout.write('line-one\\n'); process.stdout.write('x'.repeat(10050))\"", + background: true, + timeout: 10 + } + }, + buildContext(workspace) + ) + expect(started.item.kind).toBe('tool_result') + if (started.item.kind !== 'tool_result') throw new Error('expected tool_result') + const payload = started.item.output as Record + const outputFile = String(payload.output_file) + expect(outputFile).toContain('background-shells') + expect(outputFile.endsWith(`${String(payload.session_id)}.output`)).toBe(true) + await new Promise((resolve) => setTimeout(resolve, 500)) + const full = await readFile(outputFile, 'utf-8') + expect(full.startsWith('line-one\n')).toBe(true) + expect([...full].length).toBeGreaterThan(10_000) + const read = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'read', + session_id: String(payload.session_id) + }) + expect(String(read.output)).toContain('[background shell output truncated') + expect(read.output_file).toBe(outputFile) + expect(read.full_output_path).toBeUndefined() + expect(read.truncation).toBeUndefined() + expect(read.output_truncated).toBeUndefined() }) - it('returns from poll early once the session exits before yield_seconds', async () => { - const output = await executeTool(host, workspace, 'bash', { - command: 'echo ready; sleep 1; echo done', - yield_seconds: 1, - timeout: 10 + it('hides finished background shell sessions from list unless include_finished=true', async () => { + const backgroundHost = new LocalToolHost({ + tools: [ + createBackgroundShellTool({ + listBackgroundSessions: () => [ + { + id: 'runng001', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'sleep 10', + cwd: workspace, + shell: 'bash', + status: 'running', + startedAt: '2026-01-01T00:00:00.000Z', + exitCode: null, + output: 'running', + detached: true + }, + { + id: 'done0001', + threadId: 'thr_1', + turnId: 'turn_1', + command: 'echo done', + cwd: workspace, + shell: 'bash', + status: 'completed', + startedAt: '2026-01-01T00:00:00.000Z', + finishedAt: '2026-01-01T00:00:05.000Z', + exitCode: 0, + output: 'done', + detached: true + } + ] + }) + ] + }) + const runningOnly = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'list', + thread_only: false }) - expect(output.status).toBe('running') + expect(runningOnly.running).toBe(1) + expect((runningOnly.sessions as Array<{ session_id?: string }>).map((s) => s.session_id)).toEqual(['runng001']) - const startedAt = Date.now() - const polled = await executeTool(host, workspace, 'bash', { - action: 'poll', - session_id: String(output.session_id), - yield_seconds: 10 + const withFinished = await executeTool(backgroundHost, workspace, 'background_shell', { + action: 'list', + thread_only: false, + include_finished: true }) - const elapsed = Date.now() - startedAt - expect(elapsed).toBeLessThan(3000) - expect(polled.status).toBe('completed') - expect(polled.exit_code).toBe(0) - expect(String(polled.output)).toContain('done') + expect(withFinished.running).toBe(1) + expect((withFinished.sessions as Array<{ session_id?: string }>).map((s) => s.session_id)).toEqual([ + 'runng001', + 'done0001' + ]) }) it('includes the active shell in bash partial updates', async () => { diff --git a/kun/tests/loop.test.ts b/kun/tests/loop.test.ts index e04f96cf0..ffae9b5aa 100644 --- a/kun/tests/loop.test.ts +++ b/kun/tests/loop.test.ts @@ -2027,7 +2027,7 @@ describe('AgentLoop', () => { it('steers the turn and injects user messages', async () => { const h = makeHarness(makeSilentModel()) await bootstrapThread(h) - h.steering.enqueue(h.turnId, 'follow up') + h.steering.enqueue(h.turnId, { text: 'follow up' }) await h.loop.runTurn(h.threadId, h.turnId) const items = await h.sessionStore.loadItems(h.threadId) const user = items.find((item) => item.kind === 'user_message' && item.text === 'follow up') diff --git a/kun/tests/memory-store.test.ts b/kun/tests/memory-store.test.ts index e8aed9046..9a2526212 100644 --- a/kun/tests/memory-store.test.ts +++ b/kun/tests/memory-store.test.ts @@ -236,6 +236,27 @@ describe('Memory store and recall', () => { expect(hits).toEqual([]) }) + it('lists every memory for settings management when all=true', async () => { + const store = createStore() + await store.create({ + content: 'Project Alpha deploys with pnpm', + scope: 'project', + workspace: '/tmp/project-alpha' + }) + await store.create({ + content: 'Other workspace preference', + scope: 'workspace', + workspace: '/tmp/other' + }) + await store.create({ + content: 'User prefers concise answers', + scope: 'user' + }) + + expect(await store.list({ workspace: '/tmp/project-alpha' })).toHaveLength(2) + expect(await store.list({ all: true })).toHaveLength(3) + }) + it('isolates project memories and scope-protects mutations', async () => { const store = createStore() const memory = await store.create({ diff --git a/release/release-v0.2.16.md b/release/release-v0.2.16.md new file mode 100644 index 000000000..c54cc85ba --- /dev/null +++ b/release/release-v0.2.16.md @@ -0,0 +1,52 @@ +# Kun v0.2.16 + +这一版是在 v0.2.15 的「创建 Loop」大版本之后做的一轮能力补齐与稳定性修复。主线是本地 Whisper 语音转写、Agent 回滚与计划交互、LSP 诊断、供应商扩展,以及一批围绕 Windows 升级、Kun 端口、SSE 重连、权限与导入安全的加固。 + +### 本地 Whisper 语音转写 + +- 新增本地 Whisper 转写能力,语音可以在本机完成转文字,不必完全依赖远端 ASR。 +- Whisper 模型支持下载、状态展示、取消和删除,并加入 Hugging Face CDN 镜像源,下载完成后就绪状态也会正确刷新。 +- 补齐 Linux 与 Linux arm64 runner,并处理 macOS 跨架构打包时的 native CPU flags 问题。 +- 打包时会裁剪 Whisper 资源,减少发布包里无用文件的堆积。 + +### Agent 计划、回滚与诊断 + +- 聊天中新增助手回复级 Git 回滚动作,可以从回复处触发对应的回滚流程。 +- 回滚流程会明确提示破坏性,并暴露 rescue id;成功后隐藏内部 checkpoint refs 和多余横幅,减少界面噪音。 +- Plan mode 改为按线程保存,fork 对话不再错误继承原线程的 plan 状态。 +- 模糊请求会先进入澄清问题,而不是过早转成计划;计划回合也可以通过结构化 `user_input` 工具询问用户。 +- 新增 LSP 诊断能力与常见语言服务器预设,代码问题可以更自然地进入 Agent 上下文。 +- 新增缓存诊断与压缩记忆详情弹窗,便于排查长会话、缓存和记忆注入效果。 + +### 供应商、权限与 MCP + +- 新增 LongCat 与 Vercel AI Gateway 供应商预设。 +- 设置中统一工具权限控制,长模型 id、`modelHint` IPC 字段和供应商布局都做了兼容性打磨。 +- 外部工具(MCP)服务器新增表单式编辑器,远程 MCP URL 校验要求 HTTPS,并补充 OAuth 文档入口。 +- GitHub Skill 导入加入 SSRF 与路径穿越防护,远端导入更安全。 +- 缓存 telemetry、usage diagnostics、计划队列并发上限等内部契约得到加固。 + +### 界面、主题与终端 + +- 新增 Retroma parchment 浅色主题、侧栏底部日月主题切换按钮,以及可配置的交互效果颜色。 +- 终端新增黑白模式、自定义颜色,并修复 CJK 乱码;后续又恢复了原生彩色模式。 +- 工具失败提示改用警告色,文件树按钮图标、上下文容量数字、跳转栏编号、设置区块和深色 UI 插件 token 都做了细节修正。 +- Write 编辑器修复选区高亮宽度、Markdown 重命名可见性,并恢复 DOCX 导出。 + +### 运行时、安装与发布 + +- Kun 本地端口上移到 10000 以上,降低和系统/其他开发服务冲突的概率。 +- 运行时只注入一次初始上下文,端口回收时避免误杀仍在使用的 Kun 子进程。 +- 桌面端会节流 IPC 上的 SSE 事件,并正确处理重连。 +- Windows 升级前会停止打包进来的后台进程,并修复覆盖安装卸载失败的问题。 +- 新增每日 dev prerelease 工作流,更新完成通知标题也统一为 Kun。 + +### 升级说明 + +- 从 `v0.2.15` 升级可直接通过 GUI 更新,本地数据和配置会沿用。 +- 如果要使用本地 Whisper,首次使用需要下载模型;Linux 用户升级后可直接使用对应平台 runner。 +- 如果你之前遇到 Windows 覆盖安装、端口占用或 SSE 重连异常,这一版包含对应修复。 + +### 总结 + +v0.2.16 是一次“把大功能落稳”的版本:本地语音转写、回滚、计划交互、LSP 和供应商能力继续往前走,同时把安装、端口、权限、安全和界面细节一起补牢。 diff --git a/release/release-v0.2.17.md b/release/release-v0.2.17.md new file mode 100644 index 000000000..eaf1f151f --- /dev/null +++ b/release/release-v0.2.17.md @@ -0,0 +1,54 @@ +# Kun v0.2.17 + +这一版的主角是子代理系统。Kun 开始支持更完整的子代理 profile、按 profile 路由模型、独立子会话、实时子任务展示,以及 `.kun/agents/*.md` 这样的项目级代理定义。同时,Skill/MCP、文档附件、Agents 设置和上下文压缩也完成了一轮系统升级。 + +### 子代理系统成型 + +- 子代理支持按 profile 配置模型供应商、模式、系统提示词、允许工具、名称、描述和颜色。 +- 设置中新增 Subagents 管理视图,可以创建、编辑、删除子代理,并把配置桥接到运行时。 +- 线程支持 `agentId` 与 persona,composer 新增代理选择器,子代理运行状态可以实时显示。 +- 支持 `.kun/agents/*.md` 覆盖项目代理定义,并提供 AI 草稿、detach 与 abort 能力。 +- 子代理可以配置独立压缩模型,并内置 General / Explore 等预设。 +- 委派链路补齐被禁止工具、被禁止服务器和 profile 合并逻辑,子代理权限边界更清晰。 + +### Skill、MCP 与附件 + +- 聊天支持作用域 Skill 和文档附件,Skill 可以按项目/全局来源加载并标记来源。 +- Tool runtime 支持全局 Skill 加载、深路径文件搜索,以及从设置传入全局 Skill roots。 +- 已停用 Skill 会在运行时生效,Codex 插件缓存目录也可以开关。 +- MCP stdio server 支持配置 `cwd`,远程 MCP 继续强化 HTTPS 约束。 +- 新增进程类工具图标,纯文本代码块也会显示更清楚的 chrome。 + +### Agents 设置与模型行为 + +- Agents 配置界面重写,整合模型配置、完整管理、动画 Kun 与 i18n。 +- 内置 endpoint format 会标注供应商协议类型,例如 OpenAI 与 Anthropic。 +- 上下文压缩摘要改为更接近 opencode 的 compaction 模式,长会话阅读和续跑更自然。 +- Anthropic 并行 `tool_result` 会合并为一个 user message,避免协议不兼容。 +- 视觉模型到文本模型的锁定只在确实有图片时触发。 + +### 会话、侧栏与启动体验 + +- 侧栏会话操作更完整,长分支名、完整工作区路径和聊天跳转栏都做了展示修复。 +- 新用户引导中加入默认 Agent 权限配置。 +- 设置里新增 Git checkpoint 清理间隔,并保持 opt-in。 +- ask-user 提示面板上移到输入框上方,减少长对话中被忽略的概率。 +- Loop 编辑器在 Windows 标题栏下方正确偏移。 + +### 稳定性与安全修复 + +- Git checkpoint restore 会读取 `thread.status` busy guard,并加强路径穿越防护。 +- IM 权限透传、workspace symlink escape、MCP HTTPS 等安全边界继续收紧。 +- 运行时启动必须通过 health probe 后才宣告 ready。 +- 上游模型列表 `fetch_failed` 与本地 runtime failure 会区分展示,排障信息更准确。 +- 修复流式模型客户端丢失/截断 tool calls 的问题。 + +### 升级说明 + +- 从 `v0.2.16` 升级可直接通过 GUI 更新。 +- 如果你想使用子代理,可以在 Agents 设置中配置 profile;项目内也可以用 `.kun/agents/*.md` 保存团队共享代理定义。 +- 如果之前手动维护过 Skill 目录,升级后建议检查全局和项目 Skill roots 的开关状态。 + +### 总结 + +v0.2.17 让 Kun 的“多代理协作”正式成型:子代理不再只是一次工具调用,而是有 profile、有模型、有权限、有 UI、有项目定义的独立工作单元。配合全局 Skill、MCP `cwd`、文档附件和设置重写,这一版把 Agent 生态往可管理、可复用的方向推进了一大步。 diff --git a/release/release-v0.2.18.md b/release/release-v0.2.18.md new file mode 100644 index 000000000..bd0ffa737 --- /dev/null +++ b/release/release-v0.2.18.md @@ -0,0 +1,29 @@ +# Kun v0.2.18 + +这一版是 v0.2.17 之后的小型打磨版本,重点补在子代理委派、分支/worktree 操作、线程事件订阅和设置布局上。它不是大功能发布,但让刚成型的子代理与 Git 工作流更顺手。 + +### 子代理委派与展示 + +- 强化 delegation tool provider 与运行时的子代理支持,子代理委派链路更完整。 +- 子代理调用卡片默认折叠,避免多个子任务同时运行时把时间线撑得过长。 +- 优化子代理相关字体缩放与展示细节,配合 v0.2.17 的 profile 系统更稳定。 + +### 分支和 Worktree 操作 + +- Git 分支创建、切换文案补齐本地化,用户更容易理解当前操作。 +- 分支管理增强 worktree 支持,和会话级隔离工作树的配合更自然。 +- 相关标签与状态文案也做了同步调整。 + +### 设置和事件流 + +- 设置页把代理配置移动到供应商配置下方,并细化字体缩放控制。 +- 优化线程事件订阅,减少会话流转中的重复订阅和状态抖动。 + +### 升级说明 + +- 从 `v0.2.17` 升级可直接通过 GUI 更新,无需额外操作。 +- 如果你已经在使用子代理或 worktree 分支管理,这一版主要是体验和稳定性提升。 + +### 总结 + +v0.2.18 是一版精修补丁:让子代理调用更安静,分支/worktree 操作更清楚,设置和事件订阅更稳。它把 v0.2.17 的大改动继续磨平了一层。 diff --git a/release/release-v0.2.19.md b/release/release-v0.2.19.md new file mode 100644 index 000000000..a48880569 --- /dev/null +++ b/release/release-v0.2.19.md @@ -0,0 +1,60 @@ +# Kun v0.2.19 + +这一版的核心是 Claude Pro/Max 订阅接入:Kun 通过内置 Claude Agent SDK 路径,把完整回合委托给订阅模型,同时继续注入 Kun 的历史、Skill、模式上下文、权限和工具桥。除此之外,这一版还加入了 Conversations 标签、对话本地附件、自动验收、目标续跑增强,并修复了一批运行时与设置稳定性问题。 + +### Claude Pro/Max 订阅接入 + +- 新增 `Claude (Pro/Max 订阅)` 供应商预设,通过 Claude Agent SDK 使用订阅额度,而不是普通 HTTP API 计费路径。 +- Kun 会把完整回合路由到 SDK runtime,并把 Kun 专属工具桥接为 in-process MCP,让订阅路径也能使用 Kun 的工具、权限和上下文。 +- 新增 Claude 订阅登录 UI,可以检测本机 Claude Code 登录,也支持通过 SDK 获取可用模型。 +- Claude Code binary 改为按需下载,并提供后台下载进度;不再要求用户提前单独安装 CLI。 +- 支持从 `supportedModels()` 获取模型并自动填充,模型 id、视觉能力和上下文长度会跟随 SDK 返回值校准。 +- 图片附件会转发给 SDK,交互式输入会走 Kun 的 `user_input` 面板,plan 回合也会向 SDK 暴露 `create_plan`。 + +### 对话工作区、附件与 Composer + +- 侧栏新增 Conversations 标签,并可自动创建带时间戳的对话工作区。 +- 对话支持添加本地文件附件,composer 也新增文件和文件夹入口。 +- 项目选择器会排除 conversation workspace,避免临时对话空间干扰真实项目列表。 +- 会话置顶切换、线程预览锚点、会话动作弹窗遮罩和 timeline 置底行为都做了修复。 +- 重新打开已结束线程时,不会再重复弹出过期的 `user_input` 提示。 + +### 运行时可靠性与自动验收 + +- 运行时会重试 stale managed endpoints,并加强 endpoint health recovery。 +- 子代理运行时卡住时可以恢复,并新增 event-loop stall 日志用于诊断 runtime hang。 +- 新增自动验收验证,代码模式下 `verify_changes` 变为可选建议,减少对非代码任务的干扰。 +- 目标续跑逻辑增强,可以更好处理未完成目标对应的回合。 +- Write 与 SDD 工作台改为懒加载,降低首屏渲染和设置切换成本。 + +### 设置、MCP 与权限 + +- 设置里可以看到 MCP 与 Skill 权限来源,权限预览也不再直接回显原始 MCP 解析错误。 +- MCP 服务器按 workspace roots 作用域管理,并移除不安全的 repo-local `.kun/mcp.json` 自动导入。 +- 修复模型请求代理 URL 输入时被清空的问题,设置卸载时会 flush 待保存的供应商编辑。 +- Provider stale proxy 诊断更明确,连接测试的长错误消息也会正确换行。 + +### 平台与工作区体验 + +- Linux 下 Wayland IME flags 会按平台门控,减少输入法相关副作用。 +- Windows shell 通过绝对路径启动,并正确尊重 `danger-full-access` 文件工具权限。 +- 原生右键菜单和若干标签文案更清楚。 +- Worktree 支持重复 checkout 同一分支,并隐藏内部 worktree projects;undefined workspace 与 `.kun/worktrees` 锚点检测也做了防护。 +- 内部拆分了 main 桌面行为、路径 helper、聊天 store 初始状态与线程 action,为后续维护降低复杂度。 + +### 本版合入的修复 + +- 修复 Claude SDK 路径下的文本/推理重复流式渲染、非 Anthropic 线程模型回落、`canUseTool` 输入更新和订阅状态识别。 +- 修复设置代理、侧栏预览、composer 背景、目标面板背景、纯文本 chip 背景等 UI 问题。 +- 修复构建中缺失的 `@emnapi` lockfile 记录。 +- 补充 worktree 分组测试覆盖,并增加 runtime stall 诊断日志。 + +### 升级说明 + +- 从 `v0.2.18` 升级可直接通过 GUI 更新。 +- 如果要使用 Claude Pro/Max 订阅路径,可以在供应商中选择 `Claude (Pro/Max 订阅)`,按提示登录或下载所需 SDK binary。 +- 如果你依赖 repo-local `.kun/mcp.json` 自动导入,升级后需要改为在设置中显式配置 MCP,以避免不安全的隐式导入。 + +### 总结 + +v0.2.19 把 Kun 接到 Claude 订阅模型路径上,同时没有放弃 Kun 自己的工具、Skill、权限和上下文治理。这让订阅模型可以真正参与 Kun 的本地工作流;配合 Conversations、附件、自动验收、目标续跑和运行时修复,它也是 v0.2 后段非常关键的一次整合版本。 diff --git a/release/release-v0.2.20.md b/release/release-v0.2.20.md new file mode 100644 index 000000000..05229cb8c --- /dev/null +++ b/release/release-v0.2.20.md @@ -0,0 +1,48 @@ +# Kun v0.2.20 + +这一版是 v0.2.19 之后的一次稳定性与性能补强。主线是新增后台 shell 会话,让长时间命令可以脱离当前回合继续运行并持久化输出;同时修复 MCP streamable-http 断线导致运行时不稳、跨回合编辑误拦截、前端首屏包体偏大等问题,并补上 Agent replay benchmark,方便后续用回放方式观察运行时表现。 + +### 后台 Shell 会话 + +- 新增 `background_shell` 能力,长时间运行的 shell 命令可以在后台继续执行,不必阻塞当前对话回合。 +- 后台命令输出会按线程持久化到 runtime data 目录,并保留输出摘要与完整日志路径,便于后续查看和审计。 +- 运行结束后会向 Agent 发送完成通知,GUI 会把这类通知识别为后台 shell 事件,避免覆盖原始用户提示。 +- 后台 shell 支持列出会话、查看详情、停止运行中的会话,并默认隐藏已结束会话以减少噪音。 +- 修复后台输出目录使用错误,统一改走 `options.dataDir`,保证开发与打包环境都能找到正确位置。 + +### MCP 与运行时可靠性 + +- 修复 streamable-http MCP server 断开连接时可能把 Kun runtime 一起带崩的问题(#639)。 +- 加固 MCP runtime reconnect 生命周期:断线后按需重连,多个并发工具调用共享同一次重连,生命周期关闭会正确标记为离线。 +- 运行时 crash handler 会把可恢复的 MCP 后台拒绝视作可恢复错误,避免因为外部 MCP 抖动导致本地会话中断。 +- 修复 stale reconnect、诊断状态和重试时机相关问题,让外部工具服务恢复后能继续调用。 + +### 文件编辑与对话体验 + +- 修复跨回合编辑时 read tracker 过度保守的问题:只要旧文本仍在最近读取内容中,就允许后续回合继续编辑(#640)。 +- 记忆注入 chip hover 时显示记忆摘要,便于确认当前对话使用了哪些长期记忆。 +- 设置中新增对话文字宽度配置,可以调整消息正文和输入框正文的显示宽度。 +- 思考过程文字做了降噪处理,界面左侧边框也进一步简化。 + +### 性能与可维护性 + +- 设置页和时间线相关代码拆分为独立 bundle,减少主工作台首屏负担。 +- 新增 Agent replay benchmark,可用只读 HTTP/SSE 回放套件重复跑核心场景,方便比较运行时性能和稳定性。 +- replay benchmark 的清理逻辑与测试期望得到加固,减少基准测试自身的误报。 +- 清理 agent loop 中不再使用的 `MAX_TURN_MODEL_STEPS` 及相关逻辑。 + +### 测试与回归修复 + +- 修复后台 shell 回调 UI、工具摘要、runtime-client import 等 renderer 问题。 +- 修复批量 PR 合入后的类型、mock、timeline chip 回归。 +- 补充后台 shell、MCP reconnect、read tracker 和 replay benchmark 相关测试覆盖。 + +### 升级说明 + +- 从 `v0.2.19` 升级可直接通过 GUI 更新。 +- 后台 shell 输出会写入 Kun runtime data 目录;如果你在只读沙箱中查看输出,Kun 会允许读取这些后台日志文件。 +- 如果你依赖远程 MCP server,这一版会明显改善断线和恢复时的稳定性。 + +### 总结 + +v0.2.20 把 Kun 的长命令执行和外部工具恢复能力往前推进了一步:后台 shell 让耗时任务不再绑死对话回合,MCP reconnect 修复让 runtime 更抗抖,前端拆包和 replay benchmark 则让性能优化有了更清晰的落点。 diff --git a/src/main/claw-runtime.test.ts b/src/main/claw-runtime.test.ts index d543d54a2..7a1932563 100644 --- a/src/main/claw-runtime.test.ts +++ b/src/main/claw-runtime.test.ts @@ -25,6 +25,7 @@ function buildSettings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/claw-schedule-mcp-config.test.ts b/src/main/claw-schedule-mcp-config.test.ts index dc0bbe9eb..d65d469a7 100644 --- a/src/main/claw-schedule-mcp-config.test.ts +++ b/src/main/claw-schedule-mcp-config.test.ts @@ -34,6 +34,7 @@ function createSettings(patch: Partial = locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/claw-scheduled-task-detector.test.ts b/src/main/claw-scheduled-task-detector.test.ts index 82b717694..0d37b585d 100644 --- a/src/main/claw-scheduled-task-detector.test.ts +++ b/src/main/claw-scheduled-task-detector.test.ts @@ -28,6 +28,7 @@ function settings(endpointFormat: ModelEndpointFormat): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider, agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/ipc/app-ipc-schemas.ts b/src/main/ipc/app-ipc-schemas.ts index 756af1af5..e1407d4cf 100644 --- a/src/main/ipc/app-ipc-schemas.ts +++ b/src/main/ipc/app-ipc-schemas.ts @@ -26,7 +26,9 @@ import { KUN_THREAD_TEMPLATE, KUN_USER_INPUT_TEMPLATE, KUN_USAGE_TEMPLATE, - KUN_DEBUG_LLM_ROUNDS_TEMPLATE + KUN_DEBUG_LLM_ROUNDS_TEMPLATE, + KUN_BACKGROUND_SHELLS_TEMPLATE, + KUN_BACKGROUND_SHELL_TEMPLATE } from '../../shared/kun-endpoints' import { IMAGE_GENERATION_PROTOCOLS, @@ -46,7 +48,7 @@ import { } from '../../shared/app-settings' import { DESKTOP_COMMANDS } from '../../shared/kun-gui-api' import { GUI_UPDATE_CHANNELS } from '../../shared/gui-update' -import { WINDOW_CLOSE_ACTIONS, UI_FONT_SCALE_MIN, UI_FONT_SCALE_MAX } from '../../shared/app-settings' +import { WINDOW_CLOSE_ACTIONS, CHAT_CONTENT_MAX_WIDTH_MIN, CHAT_CONTENT_MAX_WIDTH_MAX, UI_FONT_SCALE_MIN, UI_FONT_SCALE_MAX } from '../../shared/app-settings' import { KEYBOARD_SHORTCUT_COMMANDS } from '../../shared/keyboard-shortcuts' import { WRITE_EXPORT_FORMATS } from '../../shared/write-export' import { WRITE_INFOGRAPHIC_MAX_TEXT_CHARS } from '../../shared/write-infographic' @@ -173,7 +175,10 @@ const ENDPOINTS: readonly EndpointTemplate[] = [ compileEndpoint(KUN_USER_INPUT_TEMPLATE, ['POST']), compileEndpoint(KUN_SESSION_RESUME_TEMPLATE, ['POST']), compileEndpoint(KUN_USAGE_TEMPLATE, ['GET']), - compileEndpoint(KUN_DEBUG_LLM_ROUNDS_TEMPLATE, ['GET']) + compileEndpoint(KUN_DEBUG_LLM_ROUNDS_TEMPLATE, ['GET']), + compileEndpoint(KUN_BACKGROUND_SHELLS_TEMPLATE, ['GET']), + compileEndpoint(KUN_BACKGROUND_SHELL_TEMPLATE, ['GET']), + compileEndpoint(`${KUN_BACKGROUND_SHELL_TEMPLATE}/stop`, ['POST']) ] function isAllowedRuntimeRequest(value: { path: string; method?: string }): boolean { @@ -211,6 +216,7 @@ const uiFontScaleSchema = z.union([ z.number().min(UI_FONT_SCALE_MIN).max(UI_FONT_SCALE_MAX), z.enum(['small', 'medium', 'large']) ]) +const chatContentMaxWidthSchema = z.number().min(CHAT_CONTENT_MAX_WIDTH_MIN).max(CHAT_CONTENT_MAX_WIDTH_MAX) const hexColorSchema = z.string().trim().regex(/^#[0-9a-fA-F]{6}$/) const approvalPolicySchema = z.enum(['always', 'on-request', 'untrusted', 'never', 'auto', 'suggest']) const sandboxModeSchema = z.enum(['read-only', 'workspace-write', 'danger-full-access', 'external-sandbox']) @@ -1347,6 +1353,7 @@ const settingsPatchObjectSchema = z.object({ locale: localeSchema.optional(), theme: themeSchema.optional(), uiFontScale: uiFontScaleSchema.optional(), + chatContentMaxWidthPx: chatContentMaxWidthSchema.optional(), cursorSpotlight: z.boolean().optional(), cursorSpotlightColor: hexColorSchema.optional(), provider: modelProviderPatchSchema.optional(), diff --git a/src/main/ipc/register-app-ipc-handlers.test.ts b/src/main/ipc/register-app-ipc-handlers.test.ts index bc6ae84bb..73128b2e5 100644 --- a/src/main/ipc/register-app-ipc-handlers.test.ts +++ b/src/main/ipc/register-app-ipc-handlers.test.ts @@ -37,6 +37,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/main/kun-process.test.ts b/src/main/kun-process.test.ts index 079d3509f..b902c4b45 100644 --- a/src/main/kun-process.test.ts +++ b/src/main/kun-process.test.ts @@ -34,6 +34,7 @@ function createSettings(binaryPath: string): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/main/kun-regression.test.ts b/src/main/kun-regression.test.ts index b3344d56c..764247151 100644 --- a/src/main/kun-regression.test.ts +++ b/src/main/kun-regression.test.ts @@ -113,6 +113,7 @@ describe('Kun single-agent regression', () => { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings(19000) diff --git a/src/main/runtime/kun-adapter.test.ts b/src/main/runtime/kun-adapter.test.ts index ccc5b6a89..f61a70a36 100644 --- a/src/main/runtime/kun-adapter.test.ts +++ b/src/main/runtime/kun-adapter.test.ts @@ -22,6 +22,7 @@ function settingsForPort(port: number): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/main/runtime/managed-runtime-idle.test.ts b/src/main/runtime/managed-runtime-idle.test.ts index 9d6852413..ba3fd9a77 100644 --- a/src/main/runtime/managed-runtime-idle.test.ts +++ b/src/main/runtime/managed-runtime-idle.test.ts @@ -21,6 +21,7 @@ const settings: AppSettingsV1 = { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', diff --git a/src/main/schedule-runtime.test.ts b/src/main/schedule-runtime.test.ts index 75dbcc131..180b86cab 100644 --- a/src/main/schedule-runtime.test.ts +++ b/src/main/schedule-runtime.test.ts @@ -84,6 +84,7 @@ function settingsWith( locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/main/services/skill-service.test.ts b/src/main/services/skill-service.test.ts index e076578de..767f840f1 100644 --- a/src/main/services/skill-service.test.ts +++ b/src/main/services/skill-service.test.ts @@ -290,6 +290,7 @@ describe('skill-service', () => { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot, diff --git a/src/main/services/write-inline-completion-service.test.ts b/src/main/services/write-inline-completion-service.test.ts index 3bf961c58..224c34f5b 100644 --- a/src/main/services/write-inline-completion-service.test.ts +++ b/src/main/services/write-inline-completion-service.test.ts @@ -30,6 +30,7 @@ function createSettings(patch: Partial ({ locale: 'en', theme: 'system', uiFontScale: DEFAULT_UI_FONT_SCALE, + chatContentMaxWidthPx: DEFAULT_CHAT_CONTENT_MAX_WIDTH_PX, cursorSpotlight: true, cursorSpotlightColor: DEFAULT_CURSOR_SPOTLIGHT_COLOR, provider: defaultModelProviderSettings(), diff --git a/src/main/upstream-models.test.ts b/src/main/upstream-models.test.ts index 77da1dba9..c6764c0ab 100644 --- a/src/main/upstream-models.test.ts +++ b/src/main/upstream-models.test.ts @@ -23,6 +23,7 @@ function settings(dataDir: string, model = 'settings-model'): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: { ...provider, providers: [ diff --git a/src/main/workflow-runtime.nodes.test.ts b/src/main/workflow-runtime.nodes.test.ts index 9c6aa2a88..9cb4b06d1 100644 --- a/src/main/workflow-runtime.nodes.test.ts +++ b/src/main/workflow-runtime.nodes.test.ts @@ -106,6 +106,7 @@ function buildSettings( locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { ...defaultKunRuntimeSettings(), model: 'test-model', apiKey: 'test-key' } }, workspaceRoot: '/tmp/workflow-workspace', diff --git a/src/main/workflow-runtime.run.test.ts b/src/main/workflow-runtime.run.test.ts index 665f5459f..3188206a6 100644 --- a/src/main/workflow-runtime.run.test.ts +++ b/src/main/workflow-runtime.run.test.ts @@ -47,6 +47,7 @@ function settingsWithWorkflows(workflows: WorkflowV1[], modules: WorkflowCustomM locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { ...defaultKunRuntimeSettings(), model: 'test-model', apiKey: 'test-key' } }, workspaceRoot: '/tmp/workflow-workspace', diff --git a/src/renderer/src/agent/kun-contract.ts b/src/renderer/src/agent/kun-contract.ts index e6e36d183..cb73c6fbc 100644 --- a/src/renderer/src/agent/kun-contract.ts +++ b/src/renderer/src/agent/kun-contract.ts @@ -261,6 +261,13 @@ export type CoreRuntimeInfoJson = { insecure?: boolean startedAt: string pid?: number + memoryUsage?: { + rssBytes: number + peakRssBytes: number + heapUsedBytes: number + heapTotalBytes: number + externalBytes: number + } capabilities: CoreRuntimeCapabilityManifestJson } @@ -362,6 +369,7 @@ export type CoreTurnJson = { attachmentIds?: string[] activeSkillIds?: string[] injectedMemoryIds?: string[] + injectedMemorySummaries?: Array<{ id: string; content: string }> skillInjectionBytes?: number workspaceCheckpointId?: string error?: string @@ -409,6 +417,7 @@ export type CoreTurnItemJson = { workspaceCheckpointId?: string activeSkillIds?: string[] injectedMemoryIds?: string[] + injectedMemorySummaries?: Array<{ id: string; content: string }> skillInjectionBytes?: number target?: CoreReviewTargetJson title?: string diff --git a/src/renderer/src/agent/kun-mapper.ts b/src/renderer/src/agent/kun-mapper.ts index a6f321e5f..aa8628341 100644 --- a/src/renderer/src/agent/kun-mapper.ts +++ b/src/renderer/src/agent/kun-mapper.ts @@ -21,6 +21,7 @@ import type { UserInputQuestion } from './types' import { redactSecrets, redactSecretText } from '@shared/secret-redaction' +import { applyClientUserMessageSourceMeta } from '@shared/background-shell-notice' import type { CoreChildRuntimeMetadataJson, CoreRuntimeEventJson, @@ -278,6 +279,22 @@ function normalizeUserFileReferences(value: unknown): Array<{ return references.length > 0 ? references : undefined } +function normalizeInjectedMemorySummaries( + value: unknown +): Array<{ id: string; content: string }> | undefined { + if (!Array.isArray(value)) return undefined + const summaries = value + .map((entry) => { + if (!entry || typeof entry !== 'object') return null + const raw = entry as Record + const id = typeof raw.id === 'string' && raw.id.trim() ? raw.id.trim() : '' + const content = typeof raw.content === 'string' && raw.content.trim() ? raw.content.trim() : '' + return id && content ? { id, content } : null + }) + .filter((entry): entry is { id: string; content: string } => entry !== null) + return summaries.length > 0 ? summaries : undefined +} + function applyRuntimeDisclosureMeta( meta: Record, item: CoreTurnItemJson, @@ -290,16 +307,19 @@ function applyRuntimeDisclosureMeta( const attachmentIds = stringArray(item.attachmentIds) const activeSkillIds = stringArray(item.activeSkillIds) const injectedMemoryIds = stringArray(item.injectedMemoryIds) + const injectedMemorySummaries = normalizeInjectedMemorySummaries(item.injectedMemorySummaries) const fileReferences = normalizeUserFileReferences(item.fileReferences) const normalizedChild = normalizeChildMetadata(child) const displayText = typeof item.displayText === 'string' ? item.displayText.trim() : '' if (displayText && displayText !== item.text?.trim()) { meta.displayText = displayText } + applyClientUserMessageSourceMeta(meta, item.text ?? '') if (attachmentIds) meta.attachmentIds = attachmentIds if (fileReferences) meta.fileReferences = fileReferences if (activeSkillIds) meta.activeSkillIds = activeSkillIds if (injectedMemoryIds) meta.injectedMemoryIds = injectedMemoryIds + if (injectedMemorySummaries) meta.injectedMemorySummaries = injectedMemorySummaries if (typeof item.skillInjectionBytes === 'number') { meta.skillInjectionBytes = item.skillInjectionBytes } @@ -522,8 +542,13 @@ function toolBlockFromItem(item: CoreTurnItemJson, child?: CoreChildRuntimeMetad const generatedFiles = extractToolGeneratedFiles(item) if (generatedFiles) meta.generatedFiles = generatedFiles const presentation = inferToolPresentation(item) + const payload = payloadFor(item) if (presentation.command) meta.command = presentation.command - if (presentation.toolKind === 'command_execution') applyCommandResultMeta(meta, item) + if (presentation.toolKind === 'command_execution' || item.toolName === 'background_shell') { + applyCommandResultMeta(meta, item) + } + const action = readStructuredString(payload, 'action') + if (action) meta.action = action if (isPlan) { const plan = extractPlanMetadata(item) if (plan) meta.plan = plan diff --git a/src/renderer/src/agent/kun-runtime.test.ts b/src/renderer/src/agent/kun-runtime.test.ts index 9b44387c2..e07fbf946 100644 --- a/src/renderer/src/agent/kun-runtime.test.ts +++ b/src/renderer/src/agent/kun-runtime.test.ts @@ -21,6 +21,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() diff --git a/src/renderer/src/agent/kun-runtime.ts b/src/renderer/src/agent/kun-runtime.ts index 79cba80c7..f521a70b3 100644 --- a/src/renderer/src/agent/kun-runtime.ts +++ b/src/renderer/src/agent/kun-runtime.ts @@ -217,6 +217,7 @@ export class KunRuntimeProvider implements AgentProvider { attachmentIds: turn.attachmentIds, activeSkillIds: turn.activeSkillIds, injectedMemoryIds: turn.injectedMemoryIds, + injectedMemorySummaries: turn.injectedMemorySummaries, skillInjectionBytes: turn.skillInjectionBytes, workspaceCheckpointId: item.workspaceCheckpointId ?? turn.workspaceCheckpointId })) @@ -705,10 +706,11 @@ export class KunRuntimeProvider implements AgentProvider { ) } - async listMemories(options: { workspace?: string; includeDeleted?: boolean } = {}): Promise { + async listMemories(options: { workspace?: string; includeDeleted?: boolean; all?: boolean } = {}): Promise { const query = buildQuery({ workspace: options.workspace, - include_deleted: options.includeDeleted + include_deleted: options.includeDeleted, + all: options.all }) const response = await rendererRuntimeClient.runtimeRequest(`${KUN_MEMORY_PATH}${query}`, 'GET') if (!response.ok) { diff --git a/src/renderer/src/agent/runtime-client.test.ts b/src/renderer/src/agent/runtime-client.test.ts index 7980674c9..96c9404b0 100644 --- a/src/renderer/src/agent/runtime-client.test.ts +++ b/src/renderer/src/agent/runtime-client.test.ts @@ -18,6 +18,7 @@ function settings(apiKey: string): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: { diff --git a/src/renderer/src/agent/types.ts b/src/renderer/src/agent/types.ts index 7a6bbac5d..bd65b4670 100644 --- a/src/renderer/src/agent/types.ts +++ b/src/renderer/src/agent/types.ts @@ -72,6 +72,7 @@ export type WebCitationSource = { export type RuntimeDisclosureMetadata = { displayText?: string + messageSource?: 'background_shell' // client-only rendering hint; never sent to the runtime turnId?: string workspaceCheckpointId?: string attachmentIds?: string[] @@ -80,6 +81,7 @@ export type RuntimeDisclosureMetadata = { generatedFiles?: GeneratedFileReference[] activeSkillIds?: string[] injectedMemoryIds?: string[] + injectedMemorySummaries?: Array<{ id: string; content: string }> skillInjectionBytes?: number child?: RuntimeChildMetadata sources?: WebCitationSource[] @@ -512,7 +514,7 @@ export interface AgentProvider { attachmentId: string, options?: { threadId?: string; workspace?: string } ): Promise - listMemories?(options?: { workspace?: string; includeDeleted?: boolean }): Promise + listMemories?(options?: { workspace?: string; includeDeleted?: boolean; all?: boolean }): Promise createMemory?(input: { content: string scope?: 'user' | 'workspace' | 'project' diff --git a/src/renderer/src/components/SettingsView.tsx b/src/renderer/src/components/SettingsView.tsx index 03c3c6c1a..265410511 100644 --- a/src/renderer/src/components/SettingsView.tsx +++ b/src/renderer/src/components/SettingsView.tsx @@ -1,5 +1,5 @@ -import type { ReactElement } from 'react' -import { useCallback, useEffect, useMemo, useRef, useState } from 'react' +import type { ComponentProps, ReactElement } from 'react' +import { lazy, Suspense, useCallback, useEffect, useMemo, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { DEFAULT_WRITE_INLINE_COMPLETION_BASE_URL, @@ -25,6 +25,7 @@ import type { } from '../agent/kun-contract' import type { WriteInlineCompletionDebugEntry } from '@shared/write-inline-completion' import { + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyTheme, @@ -43,7 +44,6 @@ import { } from '../lib/settings-home-paths' import { useChatStore, type SettingsRouteSection } from '../store/chat-store' import { SettingsSidebar } from './SettingsSidebar' -import { WriteDebugLogModal } from './settings-debug-log' import { useSettingsGuiUpdate } from './use-settings-gui-update' import { DEFAULT_WORKSPACE_ROOT, @@ -55,23 +55,72 @@ import { } from './settings-utils' import { loadKunDiagnostics } from '../lib/load-kun-diagnostics' import { SETTINGS_CHANGED_EVENT, emitRendererSettingsChanged } from '../lib/keyboard-shortcut-settings' -import { - AgentsSettingsSection, - ArchivedThreadsSettingsSection, - ClawSettingsSection, - EasterEggSettingsSection, - GeneralSettingsSection, - KeyboardShortcutsSettingsSection, - LlmDebugSettingsSection, - WorktreeSettingsSection, - MediaGenerationSettingsSection, - MemorySettingsSection, - ProvidersSettingsSection, - SpeechToTextSettingsSection, - UpdatesSettingsSection, - WriteSettingsSection, - TerminalSettingsSection -} from './settings-sections' +import { GeneralSettingsSection } from './settings-section-general' + +const ProvidersSettingsSection = lazy(() => + import('./settings-section-providers').then((module) => ({ default: module.ProvidersSettingsSection })) +) +const WriteSettingsSection = lazy(() => + import('./settings-section-write').then((module) => ({ default: module.WriteSettingsSection })) +) +const MediaGenerationSettingsSection = lazy(() => + import('./settings-section-media-generation').then((module) => ({ default: module.MediaGenerationSettingsSection })) +) +const SpeechToTextSettingsSection = lazy(() => + import('./settings-section-speech-to-text').then((module) => ({ default: module.SpeechToTextSettingsSection })) +) +const AgentsSettingsSection = lazy(() => + import('./settings-section-agents').then((module) => ({ default: module.AgentsSettingsSection })) +) +const ArchivedThreadsSettingsSection = lazy(() => + import('./settings-section-archives').then((module) => ({ default: module.ArchivedThreadsSettingsSection })) +) +const WorktreeSettingsSection = lazy(() => + import('./settings-section-worktree').then((module) => ({ default: module.WorktreeSettingsSection })) +) +const MemorySettingsSection = lazy(() => + import('./settings-section-memory').then((module) => ({ default: module.MemorySettingsSection })) +) +const KeyboardShortcutsSettingsSection = lazy(() => + import('./settings-section-shortcuts').then((module) => ({ default: module.KeyboardShortcutsSettingsSection })) +) +const EasterEggSettingsSection = lazy(() => + import('./settings-section-easter-egg').then((module) => ({ default: module.EasterEggSettingsSection })) +) +const ClawSettingsSection = lazy(() => + import('./settings-section-claw').then((module) => ({ default: module.ClawSettingsSection })) +) +const UpdatesSettingsSection = lazy(() => + import('./settings-section-updates').then((module) => ({ default: module.UpdatesSettingsSection })) +) +const TerminalSettingsSection = lazy(() => + import('./settings-section-terminal').then((module) => ({ default: module.TerminalSettingsSection })) +) +const LlmDebugSettingsSection = lazy(() => + import('./settings-section-llm-debug').then((module) => ({ default: module.LlmDebugSettingsSection })) +) +const WriteDebugLogModal = lazy(() => + import('./settings-debug-log').then((module) => ({ default: module.WriteDebugLogModal })) +) + +function LoadedAgentsSettingsSection({ + onReady, + ...props +}: ComponentProps & { onReady: () => void }): ReactElement { + useEffect(() => { + onReady() + }, [onReady]) + return +} + +function SettingsSectionFallback(): ReactElement { + return ( +
+
+
+
+ ) +} type SettingsCategory = 'general' | 'providers' | 'write' | 'mediaGeneration' | 'speechToText' | 'agents' | 'archives' | 'permissions' | 'worktree' | 'memory' | 'shortcuts' | 'easterEgg' | 'claw' | 'updates' | 'debug' | 'terminal' type SaveStatus = 'idle' | 'saving' | 'saved' | 'error' @@ -131,6 +180,7 @@ export function SettingsView(): ReactElement { const [memoryDiagnostics, setMemoryDiagnostics] = useState(null) const [runtimeDiagnosticsBusy, setRuntimeDiagnosticsBusy] = useState(false) const [runtimeDiagnosticsNotice, setRuntimeDiagnosticsNotice] = useState(null) + const [agentsSectionReady, setAgentsSectionReady] = useState(false) const [writeDebugModalOpen, setWriteDebugModalOpen] = useState(false) const [writeCompletionDebugEntries, setWriteCompletionDebugEntries] = useState([]) const [writeCompletionDebugSelectedId, setWriteCompletionDebugSelectedId] = useState(null) @@ -151,13 +201,14 @@ export function SettingsView(): ReactElement { const permissionsSectionRef = useRef(null) const formTheme = form?.theme const formUiFontScale = form?.uiFontScale + const formChatContentMaxWidthPx = form?.chatContentMaxWidthPx const writeTypography = form?.write?.typography - const formWorkspaceRoot = form?.workspaceRoot const formKun = form ? getKunRuntimeSettings(form) : null const formPort = formKun?.port const formGuiUpdateChannel = form?.guiUpdate?.channel const formCursorSpotlight = form?.cursorSpotlight const formCursorSpotlightColor = form?.cursorSpotlightColor + const markAgentsSectionReady = useCallback(() => setAgentsSectionReady(true), []) const settingsPlatform = typeof window !== 'undefined' ? window.kunGui?.platform ?? '' : '' const settingsHomeDir = typeof window !== 'undefined' ? window.kunGui?.homeDir ?? '' : '' const compactHomePath = useCallback((value: string): string => @@ -207,10 +258,11 @@ export function SettingsView(): ReactElement { }, []) useEffect(() => { - if (!formTheme || !formUiFontScale) return + if (!formTheme || formUiFontScale == null || formChatContentMaxWidthPx == null) return applyTheme(formTheme) applyUiFontScale(formUiFontScale) - }, [formTheme, formUiFontScale]) + applyChatContentMaxWidth(formChatContentMaxWidthPx) + }, [formTheme, formUiFontScale, formChatContentMaxWidthPx]) useEffect(() => { if (typeof formCursorSpotlight === 'boolean') { @@ -353,6 +405,7 @@ export function SettingsView(): ReactElement { ) { return } + if (!agentsSectionReady) return const refs: Record< Exclude, HTMLDivElement | null @@ -367,7 +420,7 @@ export function SettingsView(): ReactElement { window.requestAnimationFrame(() => { target.scrollIntoView({ behavior: 'smooth', block: 'start' }) }) - }, [category, form, settingsSection]) + }, [agentsSectionReady, category, form, settingsSection]) useEffect(() => { return () => { @@ -388,15 +441,16 @@ export function SettingsView(): ReactElement { if (typeof window.kunGui?.listSkillRoots !== 'function') return setSkillRootsLoading(true) try { - const workspaceRoot = normalizeWorkspaceRoot(expandHomePath(formWorkspaceRoot ?? '')) - const result = await window.kunGui.listSkillRoots(workspaceRoot || undefined) + // Settings is global: list every configured skill root from persisted + // settings, not the sidebar's currently selected project workspace. + const result = await window.kunGui.listSkillRoots() if (result.ok) setSkillRoots(result.roots) } catch { /* listing skill roots is best-effort; keep the last known list */ } finally { setSkillRootsLoading(false) } - }, [expandHomePath, formWorkspaceRoot]) + }, []) useEffect(() => { if (category !== 'agents') return @@ -492,9 +546,7 @@ export function SettingsView(): ReactElement { setRuntimeDiagnosticsBusy(true) setRuntimeDiagnosticsNotice(null) try { - const loaded = await loadKunDiagnostics(provider, { - workspace: normalizeWorkspaceRoot(expandHomePath(formWorkspaceRoot ?? '')) - }) + const loaded = await loadKunDiagnostics(provider, { listAllMemories: true }) if (loaded.runtimeInfo !== undefined) setRuntimeInfo(loaded.runtimeInfo) if (loaded.toolDiagnostics !== undefined) setToolDiagnostics(loaded.toolDiagnostics) if (loaded.memoryRecords !== undefined) setMemoryRecords(loaded.memoryRecords) @@ -512,7 +564,7 @@ export function SettingsView(): ReactElement { } finally { setRuntimeDiagnosticsBusy(false) } - }, [expandHomePath, formWorkspaceRoot]) + }, []) useEffect(() => { if (category !== 'agents' && category !== 'memory') return @@ -535,18 +587,31 @@ export function SettingsView(): ReactElement { void refreshMemoryDiagnostics() }, [category, memoryRecords]) + const memoryMutationWorkspace = useCallback((memoryId: string): string | undefined => { + const record = memoryRecords.find((item) => item.id === memoryId) + if (!record || record.scope === 'user') return undefined + if (record.scope === 'project') { + return record.project ?? record.workspace + } + return record.workspace + }, [memoryRecords]) + const createMemoryRecord = async (input: { content: string scope?: 'user' | 'workspace' | 'project' + targetPath?: string tags?: string[] confidence?: number }): Promise => { const provider = getProvider() if (typeof provider.createMemory !== 'function') return false try { - const workspace = normalizeWorkspaceRoot(formWorkspaceRoot) + const workspace = normalizeWorkspaceRoot(expandHomePath(input.targetPath ?? '')) const memory = await provider.createMemory({ - ...input, + content: input.content, + scope: input.scope, + tags: input.tags, + confidence: input.confidence, ...(input.scope === 'user' ? {} : { workspace }), ...(input.scope === 'project' ? { project: workspace } : {}) }) @@ -569,7 +634,7 @@ export function SettingsView(): ReactElement { if (typeof provider.updateMemory !== 'function') return false try { const memory = await provider.updateMemory(memoryId, patch, { - workspace: normalizeWorkspaceRoot(formWorkspaceRoot) + workspace: memoryMutationWorkspace(memoryId) }) setMemoryRecords((records) => records.map((record) => (record.id === memoryId ? memory : record))) return true @@ -587,7 +652,7 @@ export function SettingsView(): ReactElement { if (typeof provider.updateMemory !== 'function') return try { const memory = await provider.updateMemory(memoryId, { disabled: true }, { - workspace: normalizeWorkspaceRoot(formWorkspaceRoot) + workspace: memoryMutationWorkspace(memoryId) }) setMemoryRecords((records) => records.map((record) => record.id === memoryId ? memory : record)) } catch (error) { @@ -603,7 +668,7 @@ export function SettingsView(): ReactElement { if (typeof provider.deleteMemory !== 'function') return try { await provider.deleteMemory(memoryId, { - workspace: normalizeWorkspaceRoot(formWorkspaceRoot) + workspace: memoryMutationWorkspace(memoryId) }) setMemoryRecords((records) => records.filter((record) => record.id !== memoryId)) } catch (error) { @@ -1093,20 +1158,24 @@ export function SettingsView(): ReactElement { ) : null} {category === 'general' ? : null} - {category === 'providers' ? : null} - {category === 'write' ? : null} - {category === 'mediaGeneration' ? : null} - {category === 'speechToText' ? : null} - {category === 'agents' ? : null} - {category === 'archives' ? : null} - {category === 'worktree' ? : null} - {category === 'memory' ? : null} - {category === 'shortcuts' ? : null} - {category === 'easterEgg' ? : null} - {category === 'claw' ? : null} - {category === 'updates' ? : null} - {category === 'terminal' ? : null} - {category === 'debug' ? : null} + }> + {category === 'providers' ? : null} + {category === 'write' ? : null} + {category === 'mediaGeneration' ? : null} + {category === 'speechToText' ? : null} + {category === 'agents' ? ( + + ) : null} + {category === 'archives' ? : null} + {category === 'worktree' ? : null} + {category === 'memory' ? : null} + {category === 'shortcuts' ? : null} + {category === 'easterEgg' ? : null} + {category === 'claw' ? : null} + {category === 'updates' ? : null} + {category === 'terminal' ? : null} + {category === 'debug' ? : null} +
{saveStatus === 'error' && saveError ? ( @@ -1131,17 +1200,19 @@ export function SettingsView(): ReactElement { ) : null} {writeDebugModalOpen ? ( - void loadWriteDebugEntries()} - onClear={() => void clearWriteDebugEntries()} - onClose={() => setWriteDebugModalOpen(false)} - t={t} - /> + + void loadWriteDebugEntries()} + onClear={() => void clearWriteDebugEntries()} + onClose={() => setWriteDebugModalOpen(false)} + t={t} + /> + ) : null} ) diff --git a/src/renderer/src/components/Workbench.tsx b/src/renderer/src/components/Workbench.tsx index 3f4890327..d733ab94a 100644 --- a/src/renderer/src/components/Workbench.tsx +++ b/src/renderer/src/components/Workbench.tsx @@ -41,7 +41,6 @@ import { } from '../lib/dev-preview-detection' import { Sidebar } from './chat/Sidebar' import { WorkbenchTopBar, type RightPanelMode } from './chat/WorkbenchTopBar' -import { MessageTimeline } from './chat/MessageTimeline' import { SubagentReturnBar } from './chat/message-timeline-empty' import { IkunCameoLayer, KunCelebrationLayer } from './chat/AnimatedWorkLogo' import { @@ -112,6 +111,9 @@ import { shouldSuppressRuntimeErrorBanner } from '../lib/runtime-banner-visibili const ChangeInspector = lazy(() => import('./ChangeInspector').then((module) => ({ default: module.ChangeInspector })) ) +const MessageTimeline = lazy(() => + import('./chat/MessageTimeline').then((module) => ({ default: module.MessageTimeline })) +) const DevBrowserPanel = lazy(() => import('./DevBrowserPanel').then((module) => ({ default: module.DevBrowserPanel })) ) @@ -2737,34 +2739,36 @@ export function Workbench(): ReactElement {
- void probeRuntime('user', { restart: true })} - onOpenSettings={() => openSettings('agents')} - onSelectSuggestion={(text) => setInput(text)} - focusModeEnabled={focusModeEnabled} - planActionsBusy={busy} - onBuildPlan={() => void buildGuiPlan()} - onOpenPlan={openGuiPlanPanel} - devPreviewCard={ - showDevPreviewCard ? ( - - ) : null - } - /> + }> + void probeRuntime('user', { restart: true })} + onOpenSettings={() => openSettings('agents')} + onSelectSuggestion={(text) => setInput(text)} + focusModeEnabled={focusModeEnabled} + planActionsBusy={busy} + onBuildPlan={() => void buildGuiPlan()} + onOpenPlan={openGuiPlanPanel} + devPreviewCard={ + showDevPreviewCard ? ( + + ) : null + } + /> + {uiModeCameosEnabled && !focusModeEnabled ? : null} {!focusModeEnabled ? : null}
-
+
{activeThreadRelation === 'side' && activeThreadParentId ? ( { + const query = threadId ? `?thread_id=${encodeURIComponent(threadId)}` : '' + const result = await rendererRuntimeClient.runtimeRequest(`${KUN_BACKGROUND_SHELLS_PATH}${query}`) + if (!result.ok) return { sessions: [], running: 0 } + try { + return JSON.parse(result.body) as BackgroundShellListResponse + } catch { + return { sessions: [], running: 0 } + } +} + +async function stopBackgroundShell(sessionId: string): Promise { + await rendererRuntimeClient.runtimeRequest(kunBackgroundShellStopPath(sessionId), 'POST') +} + +type BackgroundShellOverlayProps = { + runtimeReady?: boolean +} + +export function BackgroundShellOverlay({ + runtimeReady = true +}: BackgroundShellOverlayProps): ReactElement | null { + const { t } = useTranslation('chat') + const [open, setOpen] = useState(false) + const [sessions, setSessions] = useState([]) + const [selectedId, setSelectedId] = useState(null) + + const refresh = useCallback(async () => { + if (!runtimeReady) return + const data = await fetchBackgroundShells() + setSessions(data.sessions) + }, [runtimeReady]) + + useEffect(() => { + void refresh() + if (!runtimeReady) return + const timer = window.setInterval(() => { + void refresh() + }, 2000) + return () => window.clearInterval(timer) + }, [refresh, runtimeReady]) + + const runningCount = useMemo( + () => sessions.filter((session) => session.status === 'running').length, + [sessions] + ) + const selected = useMemo( + () => sessions.find((session) => session.id === selectedId) ?? sessions[0] ?? null, + [selectedId, sessions] + ) + + if (runningCount <= 0 && !open) return null + + const handleStop = async (sessionId: string) => { + await stopBackgroundShell(sessionId) + await refresh() + } + + return ( +
+ {open ? ( +
+
+
+

+ {t('backgroundShells.title', { defaultValue: 'Background shells' })} +

+

+ {t('backgroundShells.runningCount', { + defaultValue: '{{count}} running', + count: runningCount + })} +

+
+ +
+
+ {sessions.length === 0 ? ( +

+ {t('backgroundShells.empty', { defaultValue: 'No background shells.' })} +

+ ) : ( + sessions.map((session) => { + const active = selected?.id === session.id + return ( + + ) + }) + )} +
+ {selected ? ( +
+
+

{selected.command}

+ {selected.status === 'running' ? ( + + ) : null} +
+
+                {selected.output.trim() || t('backgroundShells.noOutput', { defaultValue: '(no output yet)' })}
+              
+ {selected.outputFilePath ? ( +

+ {t('backgroundShells.outputFile', { defaultValue: 'Full output' })}: {selected.outputFilePath} + {selected.outputTruncated + ? ` · ${t('backgroundShells.outputTruncated', { defaultValue: 'preview truncated' })}` + : ''} +

+ ) : null} +
+ ) : null} +
+ ) : null} + +
+ ) +} diff --git a/src/renderer/src/components/chat/ChatStarterGrid.tsx b/src/renderer/src/components/chat/ChatStarterGrid.tsx index 75c73c295..4ece60ba1 100644 --- a/src/renderer/src/components/chat/ChatStarterGrid.tsx +++ b/src/renderer/src/components/chat/ChatStarterGrid.tsx @@ -49,7 +49,7 @@ export function ChatStarterGrid({ }): ReactElement { const { t } = useTranslation('common') return ( -
+
{CHAT_STARTERS.map((starter) => (
-
- ) : null} + ) : null} +
{composerMenuOpen && slashQuery == null ? (
-
+
{!hideHero ? ( = { toolBuiltinGrep: 'Search', toolBuiltinFind: 'Find', toolBuiltinLs: 'List', - toolBuiltinBash: 'Bash' + toolBuiltinBash: 'Bash', + toolBuiltinBackgroundShell: 'Background shell', + toolActionBackgroundShellRead: 'Read background shell', + toolActionBackgroundShellList: 'List background shells' } const t = (key: string) => labels[key] ?? (key === 'toolActionCommand' ? 'Ran command' : key) @@ -120,6 +123,34 @@ describe('MessageTimeline tool summaries', () => { ) ).toBe('Ran command npm test') }) + + it('summarizes background_shell with action, session id, and command', () => { + expect( + summarizeToolBlock( + toolBlock({ + summary: 'background_shell', + meta: { + toolName: 'background_shell', + action: 'read', + session_id: '2mcorxhe', + command: 'sleep 15 && echo "Hello from background!"' + }, + detail: JSON.stringify( + { + action: 'read', + session_id: '2mcorxhe', + command: 'sleep 15 && echo "Hello from background!"', + exit_code: 0, + status: 'completed' + }, + null, + 2 + ) + }), + t + ) + ).toBe('Read background shell 2mcorxhe sleep 15 && echo "Hello from background!"') + }) }) describe('MessageTimeline Kun runtime metadata smoke', () => { @@ -248,7 +279,7 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { expect(html).not.toContain('Feishu / Lark inbound message') }) - it('renders attachment, Skill, memory, web source, and child-agent chips in bubbles', () => { + it('renders tool-specific metadata chips in tool bubbles', () => { const block: ToolBlock = toolBlock({ summary: 'web_search: docs', meta: { @@ -270,9 +301,9 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { const html = renderToStaticMarkup(createElement(MessageBubble, { block })) - expect(html).toContain('Attachments 1') - expect(html).toContain('Skills 1') - expect(html).toContain('Memories 1') + expect(html).not.toContain('Attachments 1') + expect(html).not.toContain('Skills 1') + expect(html).not.toContain('Memories 1') expect(html).toContain('Child agent') expect(html).toContain('research') expect(html).toContain('Sources 1') @@ -296,7 +327,7 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { expect(html).not.toContain('bg-red-500/10') }) - it('renders the same runtime metadata on process timeline rows', () => { + it('renders tool-specific runtime metadata on process timeline rows', () => { const block: ChatBlock = toolBlock({ summary: 'delegate: research', meta: { @@ -325,9 +356,9 @@ describe('MessageTimeline Kun runtime metadata smoke', () => { }) ) - expect(html).toContain('Attachments 1') - expect(html).toContain('Skills 1') - expect(html).toContain('Memories 1') + expect(html).not.toContain('Attachments 1') + expect(html).not.toContain('Skills 1') + expect(html).not.toContain('Memories 1') expect(html).toContain('Child agent') expect(html).toContain('research') expect(html).toContain('Sources 1') diff --git a/src/renderer/src/components/chat/MessageTimeline.tsx b/src/renderer/src/components/chat/MessageTimeline.tsx index 402ae0847..791542d81 100644 --- a/src/renderer/src/components/chat/MessageTimeline.tsx +++ b/src/renderer/src/components/chat/MessageTimeline.tsx @@ -22,12 +22,14 @@ import type { UiPluginLabelKey } from '@shared/ui-plugin' import { useUiPluginWorkLabel } from '../../store/ui-plugin-store' import { groupTurns, + isBackgroundShellNoticeBlock, sameTurnContent, splitThink, stableTurnKey, type Turn } from './message-timeline-turns' import { extractPlanMetadataFromBlock } from '../../plan/plan-tool' +import { InjectedMemoryLookupProvider } from './injected-memory-lookup' import { planDisplayNameFromRelativePath } from '../../plan/plan-path' export { summarizeToolBlock } from './message-timeline-process' @@ -90,6 +92,12 @@ function blockScrollStamp(block: ChatBlock | undefined): string { } function turnPreview(turn: Turn, fallback: string): string { + if (turn.user && isBackgroundShellNoticeBlock(turn.user)) { + const display = turn.user.meta?.displayText?.trim() + if (display) { + return display.length > 48 ? `${display.slice(0, 47).trimEnd()}...` : display + } + } const text = turn.user?.text.trim() ?? '' if (!text) return fallback const oneLine = text.replace(/\s+/g, ' ') @@ -255,6 +263,7 @@ export function MessageTimeline({ } return ( +
{visibleTurnAnchors.length > 2 ? (
- +
{canExpand ? ( effectiveOpen ? ( diff --git a/src/renderer/src/components/chat/message-timeline-empty.tsx b/src/renderer/src/components/chat/message-timeline-empty.tsx index e32368b15..5dc3c13c8 100644 --- a/src/renderer/src/components/chat/message-timeline-empty.tsx +++ b/src/renderer/src/components/chat/message-timeline-empty.tsx @@ -44,7 +44,7 @@ function ClawEmptyHero({ return (
-
+
@@ -222,7 +222,7 @@ export function SubagentReturnBar({ +
+ setChatContentMaxWidthPx(Number(e.target.value))} + /> + px +
+ +
+
+
+ } + /> { it('returns false in view mode regardless of draft', () => { const record = sampleRecord() const dialog: MemoryDialogState = { mode: 'view', memory: record } - const draft: MemoryDraft = { content: 'totally different', scope: 'user', tags: 'x', confidence: 0 } + const draft: MemoryDraft = { content: 'totally different', scope: 'user', targetPath: '', tags: 'x', confidence: 0 } expect(isMemoryDraftDirty(dialog, draft)).toBe(false) }) @@ -197,6 +197,7 @@ describe('isMemoryDraftDirty', () => { const draft: MemoryDraft = { content: record.content, scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary, kook-bot', confidence: record.confidence ?? 1 } @@ -209,6 +210,7 @@ describe('isMemoryDraftDirty', () => { const baseline: MemoryDraft = { content: record.content, scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary', confidence: 1 } @@ -219,15 +221,15 @@ describe('isMemoryDraftDirty', () => { it('returns false in create mode for an empty draft on the default scope', () => { const dialog: MemoryDialogState = { mode: 'create' } - const draft: MemoryDraft = { content: ' ', scope: 'workspace', tags: ' ', confidence: 1 } + const draft: MemoryDraft = { content: ' ', scope: 'workspace', targetPath: '', tags: ' ', confidence: 1 } expect(isMemoryDraftDirty(dialog, draft)).toBe(false) }) it('returns true in create mode when any field changes from the empty default', () => { const dialog: MemoryDialogState = { mode: 'create' } - expect(isMemoryDraftDirty(dialog, { content: 'hello', scope: 'workspace', tags: '', confidence: 1 })).toBe(true) - expect(isMemoryDraftDirty(dialog, { content: '', scope: 'workspace', tags: 'tag', confidence: 1 })).toBe(true) - expect(isMemoryDraftDirty(dialog, { content: '', scope: 'user', tags: '', confidence: 1 })).toBe(true) + expect(isMemoryDraftDirty(dialog, { content: 'hello', scope: 'workspace', targetPath: '', tags: '', confidence: 1 })).toBe(true) + expect(isMemoryDraftDirty(dialog, { content: '', scope: 'workspace', targetPath: '', tags: 'tag', confidence: 1 })).toBe(true) + expect(isMemoryDraftDirty(dialog, { content: '', scope: 'user', targetPath: '', tags: '', confidence: 1 })).toBe(true) }) }) @@ -238,6 +240,7 @@ describe('attemptCloseMemoryDialog', () => { const draft: MemoryDraft = { content: record.content, scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary', confidence: 1 } @@ -254,7 +257,7 @@ describe('attemptCloseMemoryDialog', () => { const close = vi.fn() const result = await attemptCloseMemoryDialog({ dialog: null, - draft: { content: 'anything', scope: 'workspace', tags: '', confidence: 1 }, + draft: { content: 'anything', scope: 'workspace', targetPath: '', tags: '', confidence: 1 }, confirm, close }) @@ -269,6 +272,7 @@ describe('attemptCloseMemoryDialog', () => { const draft: MemoryDraft = { content: 'EDITED content', scope: record.scope, + targetPath: record.workspace ?? '', tags: 'summary', confidence: 1 } @@ -285,6 +289,7 @@ describe('attemptCloseMemoryDialog', () => { const draft: MemoryDraft = { content: 'half-typed thought', scope: 'workspace', + targetPath: '', tags: '', confidence: 1 } diff --git a/src/renderer/src/components/settings-section-memory.tsx b/src/renderer/src/components/settings-section-memory.tsx index fa839cc31..694c4d48b 100644 --- a/src/renderer/src/components/settings-section-memory.tsx +++ b/src/renderer/src/components/settings-section-memory.tsx @@ -10,6 +10,7 @@ type MemoryScope = 'user' | 'workspace' | 'project' export type MemoryDraft = { content: string scope: MemoryScope + targetPath: string tags: string confidence: number } @@ -22,6 +23,7 @@ export type MemoryDialogState = const EMPTY_DRAFT: MemoryDraft = { content: '', scope: 'workspace', + targetPath: '', tags: '', confidence: 1 } @@ -64,6 +66,7 @@ export function isMemoryDraftDirty( return ( draft.content.trim() !== '' || draft.tags.trim() !== '' || + draft.targetPath.trim() !== '' || draft.scope !== DEFAULT_DRAFT_SCOPE ) } @@ -135,6 +138,7 @@ export function MemorySettingsSection({ ctx }: { ctx: Record }): Re setDraft({ content: record.content, scope: record.scope, + targetPath: projectForMemory(record) ?? '', tags: (record.tags ?? []).join(', '), confidence: record.confidence ?? 1 }) @@ -164,11 +168,14 @@ export function MemorySettingsSection({ ctx }: { ctx: Record }): Re const saveDraft = async (): Promise => { const trimmed = draft.content.trim() if (!trimmed) return + const targetPath = draft.targetPath.trim() + if (dialog?.mode === 'create' && draft.scope !== 'user' && !targetPath) return let ok = false if (dialog?.mode === 'create') { ok = await createMemoryRecord({ content: trimmed, scope: draft.scope, + ...(draft.scope === 'user' ? {} : { targetPath }), tags: parseTags(draft.tags), confidence: draft.confidence }) @@ -465,6 +472,15 @@ function MemoryRecordDialog({ ) : null} + {dialog.mode === 'create' && draft.scope !== 'user' ? ( + onDraftChange((prev) => ({ ...prev, targetPath: e.target.value }))} + placeholder={t('memoryTargetPathPlaceholder')} + className="min-w-[200px] flex-1 rounded-lg border border-ds-border-muted bg-ds-surface-subtle px-2 py-1 text-[12px] text-ds-ink outline-none" + /> + ) : null} {t('memorySave')} diff --git a/src/renderer/src/components/settings-utils.ts b/src/renderer/src/components/settings-utils.ts index 0a944d7b9..be334cc5f 100644 --- a/src/renderer/src/components/settings-utils.ts +++ b/src/renderer/src/components/settings-utils.ts @@ -26,6 +26,7 @@ import { normalizeWorkflowSettings, normalizeWriteSettings, normalizeTerminalSettings, + normalizeChatContentMaxWidth, normalizeUiFontScale, type AppSettingsPatch, type AppSettingsV1 @@ -98,11 +99,13 @@ export function coerceRendererSettings(settings: AppSettingsV1): AppSettingsV1 { ? raw.theme : 'system' const uiFontScale = normalizeUiFontScale(raw.uiFontScale) + const chatContentMaxWidthPx = normalizeChatContentMaxWidth(raw.chatContentMaxWidthPx) return { version: 1, locale: raw.locale === 'zh' ? 'zh' : 'en', theme, uiFontScale, + chatContentMaxWidthPx, cursorSpotlight: raw.cursorSpotlight !== false, cursorSpotlightColor: normalizeCursorSpotlightColor(raw.cursorSpotlightColor), provider: normalizeModelProviderSettings(raw.provider), diff --git a/src/renderer/src/lib/apply-theme.ts b/src/renderer/src/lib/apply-theme.ts index 0ab3ff89a..e24dbf0da 100644 --- a/src/renderer/src/lib/apply-theme.ts +++ b/src/renderer/src/lib/apply-theme.ts @@ -1,13 +1,15 @@ import { DEFAULT_CURSOR_SPOTLIGHT_COLOR, + normalizeChatContentMaxWidth, normalizeUiFontScale, writeFontStackFor, + type ChatContentMaxWidthPx, type UiFontScale, type WriteTypographySettingsV1 } from '@shared/app-settings' export type ThemePreference = 'system' | 'light' | 'dark' -export type { UiFontScale } +export type { ChatContentMaxWidthPx, UiFontScale } let removeSystemListener: (() => void) | null = null @@ -49,6 +51,11 @@ export function applyUiFontScale(scale: UiFontScale): void { root.style.setProperty('--ds-ui-scale', String(normalizeUiFontScale(scale))) } +export function applyChatContentMaxWidth(widthPx: ChatContentMaxWidthPx): void { + const root = document.documentElement + root.style.setProperty('--ds-chat-content-max-width', `${normalizeChatContentMaxWidth(widthPx)}px`) +} + export function applyCursorSpotlight(enabled: boolean): void { document.documentElement.dataset.cursorSpotlight = enabled ? 'on' : 'off' } diff --git a/src/renderer/src/lib/claw-model-options.test.ts b/src/renderer/src/lib/claw-model-options.test.ts index 07eac8069..e3295aa35 100644 --- a/src/renderer/src/lib/claw-model-options.test.ts +++ b/src/renderer/src/lib/claw-model-options.test.ts @@ -26,6 +26,7 @@ function buildSettings(models: string[]): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.88, + chatContentMaxWidthPx: 896, provider, agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', diff --git a/src/renderer/src/lib/load-kun-diagnostics.test.ts b/src/renderer/src/lib/load-kun-diagnostics.test.ts index f89708ff7..119442b93 100644 --- a/src/renderer/src/lib/load-kun-diagnostics.test.ts +++ b/src/renderer/src/lib/load-kun-diagnostics.test.ts @@ -9,10 +9,13 @@ describe('loadKunDiagnostics', () => { const provider = { getRuntimeInfo: async () => runtimeInfo, getToolDiagnostics: async () => toolDiagnostics, - listMemories: async () => memoryRecords + listMemories: async (options?: { all?: boolean; includeDeleted?: boolean }) => { + expect(options).toEqual({ all: true, includeDeleted: false }) + return memoryRecords + } } - const loaded = await loadKunDiagnostics(provider, { workspace: '/tmp/project' }) + const loaded = await loadKunDiagnostics(provider) expect(loaded.runtimeInfo).toBe(runtimeInfo) expect(loaded.toolDiagnostics).toBe(toolDiagnostics) @@ -20,6 +23,40 @@ describe('loadKunDiagnostics', () => { expect(loaded.errors).toEqual([]) }) + it('loads all memories by default for global settings diagnostics', async () => { + const memoryRecords = [{ id: 'mem_1', content: 'remember this' }] as any + const provider = { + getRuntimeInfo: async () => null as any, + getToolDiagnostics: async () => null as any, + listMemories: async (options?: { all?: boolean }) => { + expect(options).toEqual({ all: true, includeDeleted: false }) + return memoryRecords + } + } + + const loaded = await loadKunDiagnostics(provider) + + expect(loaded.memoryRecords).toBe(memoryRecords) + expect(loaded.errors).toEqual([]) + }) + + it('can scope memory loading to the current workspace when explicitly requested', async () => { + const memoryRecords = [{ id: 'mem_ws', content: 'workspace only' }] as any + const provider = { + getRuntimeInfo: async () => null as any, + getToolDiagnostics: async () => null as any, + listMemories: async (options?: { all?: boolean }) => { + expect(options).toEqual({ includeDeleted: false }) + return memoryRecords + } + } + + const loaded = await loadKunDiagnostics(provider, { listAllMemories: false }) + + expect(loaded.memoryRecords).toBe(memoryRecords) + expect(loaded.errors).toEqual([]) + }) + it('keeps successful diagnostics when memory loading fails', async () => { const runtimeInfo = { pid: 42 } as any const toolDiagnostics = { providers: [{ id: 'builtin' }], mcpServers: [] } as any @@ -31,7 +68,7 @@ describe('loadKunDiagnostics', () => { } } - const loaded = await loadKunDiagnostics(provider, { workspace: '/tmp/project' }) + const loaded = await loadKunDiagnostics(provider) expect(loaded.runtimeInfo).toBe(runtimeInfo) expect(loaded.toolDiagnostics).toBe(toolDiagnostics) diff --git a/src/renderer/src/lib/load-kun-diagnostics.ts b/src/renderer/src/lib/load-kun-diagnostics.ts index c3bd683f1..a6d6f0761 100644 --- a/src/renderer/src/lib/load-kun-diagnostics.ts +++ b/src/renderer/src/lib/load-kun-diagnostics.ts @@ -17,13 +17,18 @@ export type LoadedKunDiagnostics = { export async function loadKunDiagnostics( provider: DiagnosticsProvider, - options: { workspace?: string } = {} + options: { listAllMemories?: boolean } = {} ): Promise { + const listAllMemories = options.listAllMemories !== false const [runtimeInfo, toolDiagnostics, memoryRecords] = await Promise.allSettled([ provider.getRuntimeInfo ? provider.getRuntimeInfo() : Promise.resolve(null), provider.getToolDiagnostics ? provider.getToolDiagnostics() : Promise.resolve(null), provider.listMemories - ? provider.listMemories({ workspace: options.workspace, includeDeleted: false }) + ? provider.listMemories( + listAllMemories + ? { all: true, includeDeleted: false } + : { includeDeleted: false } + ) : Promise.resolve([]) ]) diff --git a/src/renderer/src/lib/memory-preview.ts b/src/renderer/src/lib/memory-preview.ts new file mode 100644 index 000000000..aa5e866ad --- /dev/null +++ b/src/renderer/src/lib/memory-preview.ts @@ -0,0 +1,5 @@ +export function memoryPreview(content: string, maxLength = 200): string { + const compact = content.replace(/\s+/g, ' ').trim() + if (compact.length <= maxLength) return compact + return `${compact.slice(0, maxLength).trimEnd()}...` +} diff --git a/src/renderer/src/lib/settings-home-paths.test.ts b/src/renderer/src/lib/settings-home-paths.test.ts index 240aad019..69c2707c6 100644 --- a/src/renderer/src/lib/settings-home-paths.test.ts +++ b/src/renderer/src/lib/settings-home-paths.test.ts @@ -93,6 +93,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, cursorSpotlight: true, provider: defaultModelProviderSettings(), agents: { diff --git a/src/renderer/src/locales/en/common.json b/src/renderer/src/locales/en/common.json index c3b4fb1bf..e74afab3e 100644 --- a/src/renderer/src/locales/en/common.json +++ b/src/renderer/src/locales/en/common.json @@ -1998,6 +1998,13 @@ "threadForkPointFrom": "Branch from {{title}} starts here", "compactionRunning": "Compacting context", "compactionManualCompleted": "Compacted context", + "backgroundShellNotice.title": "Background shell completed", + "backgroundShellNotice.kindLabel": "Background callback", + "backgroundShellNotice.sessionId": "Session", + "backgroundShellNotice.command": "Command", + "backgroundShellNotice.exitCode": "Exit code", + "backgroundShellNotice.outputPreview": "Output preview", + "backgroundShellNotice.outputFile": "Full output file", "compactionManualCompletedWithTokens": "Compacted context · ~{{tokens}} tokens freed", "compactionAutoCompleted": "Auto-compacted context", "compactionAutoCompletedWithTokens": "Auto-compacted context · ~{{tokens}} tokens freed", @@ -2018,6 +2025,8 @@ "turnChangeFilesMany": "Edited {{count}} files", "groupRanCommands": "Ran {{count}} commands", "groupRanCommand": "Ran 1 command", + "groupRanBackgroundCommands": "Ran {{count}} background commands", + "groupRanBackgroundCommand": "Ran 1 background command", "groupUsedTools": "Used {{count}} tools", "groupUsedTool": "Used 1 tool", "groupApprovals": "{{count}} approvals", @@ -2033,6 +2042,7 @@ "rewindBusyError": "Wait for the current turn to finish before rewinding.", "toolActionFile": "Edited file", "toolActionCommand": "Ran command", + "toolActionBackgroundCommand": "Ran background command", "toolActionTool": "Called tool", "toolKindFile": "File change", "toolKindCommand": "Command", @@ -2055,6 +2065,13 @@ "toolBuiltinFind": "Find", "toolBuiltinLs": "List", "toolBuiltinBash": "Bash", + "toolBuiltinBackgroundShell": "Background shell", + "toolActionBackgroundShellList": "List background shells", + "toolActionBackgroundShellRead": "Read background shell", + "toolActionBackgroundShellPoll": "Poll background shell", + "toolActionBackgroundShellWrite": "Write to background shell", + "toolActionBackgroundShellStop": "Stop background shell", + "toolActionBackgroundShellIncludeFinished": "include finished", "toolBuiltinDelegate": "Delegate task", "subagentDefaultName": "Subagent", "subagentStatusQueued": "Queued", diff --git a/src/renderer/src/locales/en/settings.json b/src/renderer/src/locales/en/settings.json index 52717ceec..e47d488e2 100644 --- a/src/renderer/src/locales/en/settings.json +++ b/src/renderer/src/locales/en/settings.json @@ -188,6 +188,12 @@ "fontScaleMedium": "Medium", "fontScaleLarge": "Large", "fontScaleCurrent": "Current: {{value}}", + "chatContentMaxWidth": "Conversation text width", + "chatContentMaxWidthDesc": "Adjust how wide chat messages and the composer are displayed.", + "chatContentMaxWidthNarrow": "Narrow", + "chatContentMaxWidthWide": "Wide", + "chatContentMaxWidthDecrease": "Decrease conversation text width", + "chatContentMaxWidthIncrease": "Increase conversation text width", "cursorSpotlight": "Interactive effects", "cursorSpotlightDesc": "Show a soft cursor-follow spotlight on the title bar and sidebar.", "cursorSpotlightColor": "Interaction effect color", @@ -1231,6 +1237,7 @@ "memoryCancel": "Cancel", "memoryEmpty": "No memory records yet. The assistant will create them automatically as it learns your preferences, or add one manually.", "memoryContentPlaceholder": "What should the assistant remember? e.g. \"Prefer TypeScript with 2-space indentation.\"", + "memoryTargetPathPlaceholder": "Absolute path to the workspace or project directory", "memoryTagsPlaceholder": "Tags, comma-separated", "memoryConfidence": "Confidence", "memoryProject": "Project", diff --git a/src/renderer/src/locales/zh/common.json b/src/renderer/src/locales/zh/common.json index 831726da9..74186ac71 100644 --- a/src/renderer/src/locales/zh/common.json +++ b/src/renderer/src/locales/zh/common.json @@ -1998,6 +1998,13 @@ "threadForkPointFrom": "从「{{title}}」分叉,后续从这里开始", "compactionRunning": "正在压缩上下文", "compactionManualCompleted": "已压缩上下文", + "backgroundShellNotice.title": "后台 shell 已完成", + "backgroundShellNotice.kindLabel": "后台回调", + "backgroundShellNotice.sessionId": "会话", + "backgroundShellNotice.command": "命令", + "backgroundShellNotice.exitCode": "退出码", + "backgroundShellNotice.outputPreview": "输出预览", + "backgroundShellNotice.outputFile": "完整输出文件", "compactionManualCompletedWithTokens": "已压缩上下文 · 释放约 {{tokens}} tokens", "compactionAutoCompleted": "已自动压缩上下文", "compactionAutoCompletedWithTokens": "已自动压缩上下文 · 释放约 {{tokens}} tokens", @@ -2018,6 +2025,8 @@ "turnChangeFilesMany": "已编辑 {{count}} 个文件", "groupRanCommands": "运行了 {{count}} 条命令", "groupRanCommand": "运行了 1 条命令", + "groupRanBackgroundCommands": "运行了 {{count}} 条后台命令", + "groupRanBackgroundCommand": "运行了 1 条后台命令", "groupUsedTools": "调用了 {{count}} 个工具", "groupUsedTool": "调用了 1 个工具", "groupApprovals": "{{count}} 个审批", @@ -2033,6 +2042,7 @@ "rewindBusyError": "当前回合还在进行,等结束后再回退。", "toolActionFile": "修改文件", "toolActionCommand": "运行命令", + "toolActionBackgroundCommand": "运行后台命令", "toolActionTool": "调用工具", "toolKindFile": "文件变更", "toolKindCommand": "命令", @@ -2055,6 +2065,13 @@ "toolBuiltinFind": "查找", "toolBuiltinLs": "列出", "toolBuiltinBash": "命令", + "toolBuiltinBackgroundShell": "后台 shell", + "toolActionBackgroundShellList": "列出后台 shell", + "toolActionBackgroundShellRead": "读取后台 shell", + "toolActionBackgroundShellPoll": "轮询后台 shell", + "toolActionBackgroundShellWrite": "写入后台 shell", + "toolActionBackgroundShellStop": "停止后台 shell", + "toolActionBackgroundShellIncludeFinished": "含已结束", "toolBuiltinDelegate": "委派任务", "subagentDefaultName": "子代理", "subagentStatusQueued": "排队中", diff --git a/src/renderer/src/locales/zh/settings.json b/src/renderer/src/locales/zh/settings.json index 00cfdbe8f..982d2c4dd 100644 --- a/src/renderer/src/locales/zh/settings.json +++ b/src/renderer/src/locales/zh/settings.json @@ -188,6 +188,12 @@ "fontScaleMedium": "中", "fontScaleLarge": "大", "fontScaleCurrent": "当前:{{value}}", + "chatContentMaxWidth": "对话文字宽度", + "chatContentMaxWidthDesc": "调整对话消息与输入框的正文显示宽度。", + "chatContentMaxWidthNarrow": "窄", + "chatContentMaxWidthWide": "宽", + "chatContentMaxWidthDecrease": "减小对话文字宽度", + "chatContentMaxWidthIncrease": "增大对话文字宽度", "cursorSpotlight": "交互特效", "cursorSpotlightDesc": "在置顶栏和侧边栏显示跟随鼠标的柔和高光。", "cursorSpotlightColor": "交互特效颜色", @@ -1231,6 +1237,7 @@ "memoryCancel": "取消", "memoryEmpty": "暂无记忆记录。助手会在了解你的偏好后自动创建,也可以手动添加。", "memoryContentPlaceholder": "想让助手记住什么?例如:「偏好 TypeScript,2 空格缩进」", + "memoryTargetPathPlaceholder": "工作区或项目目录的绝对路径", "memoryTagsPlaceholder": "标签,逗号分隔", "memoryConfidence": "置信度", "memoryProject": "所属项目", diff --git a/src/renderer/src/store/chat-store-app-actions.test.ts b/src/renderer/src/store/chat-store-app-actions.test.ts index a4a21aadb..2eaf89db4 100644 --- a/src/renderer/src/store/chat-store-app-actions.test.ts +++ b/src/renderer/src/store/chat-store-app-actions.test.ts @@ -84,6 +84,7 @@ function buildHarness(fetchModelsResult: FetchModelsResult): { }, applyTheme: () => undefined, applyUiFontScale: () => undefined, + applyChatContentMaxWidth: () => undefined, applyCursorSpotlight: () => undefined, applyCursorSpotlightColor: () => undefined, applyWriteTypography: () => undefined, diff --git a/src/renderer/src/store/chat-store-app-actions.ts b/src/renderer/src/store/chat-store-app-actions.ts index 8f61fcbb9..7829fd91f 100644 --- a/src/renderer/src/store/chat-store-app-actions.ts +++ b/src/renderer/src/store/chat-store-app-actions.ts @@ -33,6 +33,7 @@ type CreateAppActionsOptions = { setComposerModelLoadPromise: (promise: Promise | null) => void applyTheme: (theme: AppSettingsV1['theme']) => void applyUiFontScale: (scale: AppSettingsV1['uiFontScale']) => void + applyChatContentMaxWidth: (widthPx: AppSettingsV1['chatContentMaxWidthPx']) => void applyCursorSpotlight: (enabled: boolean) => void applyCursorSpotlightColor: (color: AppSettingsV1['cursorSpotlightColor']) => void applyWriteTypography: (typography: AppSettingsV1['write']['typography']) => void @@ -75,6 +76,7 @@ export function createAppActions(options: CreateAppActionsOptions): Pick< setComposerModelLoadPromise, applyTheme, applyUiFontScale, + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyWriteTypography, @@ -239,6 +241,7 @@ export function createAppActions(options: CreateAppActionsOptions): Pick< const workspaceRoot = normalizeWorkspaceRoot(settings.workspaceRoot) applyTheme(settings.theme) applyUiFontScale(settings.uiFontScale) + applyChatContentMaxWidth(settings.chatContentMaxWidthPx) applyCursorSpotlight(settings.cursorSpotlight !== false) applyCursorSpotlightColor(settings.cursorSpotlightColor) if (settings.write?.typography) applyWriteTypography(settings.write.typography) diff --git a/src/renderer/src/store/chat-store-navigation-actions.test.ts b/src/renderer/src/store/chat-store-navigation-actions.test.ts index 430b98d6a..d4f7cc36b 100644 --- a/src/renderer/src/store/chat-store-navigation-actions.test.ts +++ b/src/renderer/src/store/chat-store-navigation-actions.test.ts @@ -16,6 +16,7 @@ const applyThemeLibMock = vi.hoisted(() => ({ applyCursorSpotlightColor: vi.fn(), applyTheme: vi.fn(), applyUiFontScale: vi.fn(), + applyChatContentMaxWidth: vi.fn(), applyDocumentLocale: vi.fn() })) @@ -230,6 +231,7 @@ describe('onClawChannelActivity routes through subscribeThreadEventsLive (not se }, theme: 'dark', uiFontScale: 1, + chatContentMaxWidthPx: 896, locale: 'en', agents: { kun: { apiKey: 'test-key', model: 'deepseek-v4-pro', baseUrl: '' } }, disabledSkillIds: [] diff --git a/src/renderer/src/store/chat-store-navigation-actions.ts b/src/renderer/src/store/chat-store-navigation-actions.ts index 12df5a486..24f283701 100644 --- a/src/renderer/src/store/chat-store-navigation-actions.ts +++ b/src/renderer/src/store/chat-store-navigation-actions.ts @@ -3,6 +3,7 @@ import { getProvider } from '../agent/registry' import { rendererRuntimeClient } from '../agent/runtime-client' import i18n from '../i18n' import { + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyTheme, @@ -376,6 +377,7 @@ export function createNavigationActions( const needsInitialSetup = !getActiveAgentApiKey(settings).trim() applyTheme(settings.theme) applyUiFontScale(settings.uiFontScale) + applyChatContentMaxWidth(settings.chatContentMaxWidthPx) applyCursorSpotlight(settings.cursorSpotlight !== false) applyCursorSpotlightColor(settings.cursorSpotlightColor) if (settings.write?.typography) applyWriteTypography(settings.write.typography) diff --git a/src/renderer/src/store/chat-store-runtime-helpers.test.ts b/src/renderer/src/store/chat-store-runtime-helpers.test.ts index c3fad2087..cba8d1858 100644 --- a/src/renderer/src/store/chat-store-runtime-helpers.test.ts +++ b/src/renderer/src/store/chat-store-runtime-helpers.test.ts @@ -1,185 +1,70 @@ import { describe, expect, it } from 'vitest' +import { + inferClientUserMessageSource, + isBackgroundShellNoticeUserMessage +} from '@shared/background-shell-notice' import type { ChatBlock } from '../agent/types' -import type { NormalizedThread } from '../agent/types' -import type { ChatState } from './chat-store-types' import { - findReusableEmptyThreadId, - hasPendingRuntimeWork, - settlePendingRuntimeWorkAfterInterrupt, - threadHasPendingRuntimeWork, - threadSnapshotLooksRunning + isOptimisticUserBlockId, + reconcileOptimisticUserBlock, + upsertUserBlock } from './chat-store-runtime-helpers' -describe('chat-store-runtime-helpers compaction state', () => { - it('keeps the thread busy while a compaction item is running', () => { - const runningCompaction: ChatBlock = { - kind: 'compaction', - id: 'compact-running', - summary: 'Compacting context', - status: 'running' - } - const completedCompaction: ChatBlock = { - kind: 'compaction', - id: 'compact-completed', - summary: 'Compacted context', - status: 'success' - } - - expect(hasPendingRuntimeWork(runningCompaction)).toBe(true) - expect(hasPendingRuntimeWork(completedCompaction)).toBe(false) - expect(threadSnapshotLooksRunning([runningCompaction])).toBe(true) - expect(threadSnapshotLooksRunning([completedCompaction])).toBe(false) - }) - - it('trusts an explicit idle thread status over stale pending blocks', () => { - const staleTool: ChatBlock = { - kind: 'tool', - id: 'tool-stale', - summary: 'Old tool', - status: 'running', - toolKind: 'tool_call' - } - - expect(threadSnapshotLooksRunning([staleTool], 'idle')).toBe(false) - expect(threadSnapshotLooksRunning([staleTool], 'aborted')).toBe(false) - expect(threadSnapshotLooksRunning([staleTool], 'running')).toBe(true) - expect(threadSnapshotLooksRunning([staleTool])).toBe(true) - }) - - it('ignores stale pending work once the same turn has visible assistant content', () => { - const blocks: ChatBlock[] = [ - { kind: 'user', id: 'user-1', text: 'Run the task' }, - { - kind: 'tool', - id: 'tool-stale', - summary: 'Old tool', - status: 'running', - toolKind: 'tool_call' - }, - { kind: 'assistant', id: 'answer-1', text: 'The task is complete.' } - ] - - expect(threadHasPendingRuntimeWork(blocks)).toBe(false) - expect(threadSnapshotLooksRunning(blocks)).toBe(false) - }) - - it('keeps the thread busy when pending work has no later assistant answer', () => { - const blocks: ChatBlock[] = [ - { kind: 'user', id: 'user-1', text: 'Run the task' }, - { kind: 'assistant', id: 'partial-1', text: 'I will check that.' }, - { - kind: 'tool', - id: 'tool-running', - summary: 'Still running', - status: 'running', - toolKind: 'tool_call' - } - ] - - expect(threadHasPendingRuntimeWork(blocks)).toBe(true) - expect(threadSnapshotLooksRunning(blocks)).toBe(true) +describe('chat store runtime helpers', () => { + it('detects optimistic user block ids', () => { + expect(isOptimisticUserBlockId('u-123')).toBe(true) + expect(isOptimisticUserBlockId('item_turn_abc_user')).toBe(false) }) - it('does not let stale pending work from an older turn block new input', () => { - const blocks: ChatBlock[] = [ - { kind: 'user', id: 'user-1', text: 'First task' }, - { - kind: 'tool', - id: 'tool-stale', - summary: 'Old tool', - status: 'running', - toolKind: 'tool_call' - }, - { kind: 'user', id: 'user-2', text: 'Second task' }, - { kind: 'assistant', id: 'answer-2', text: 'Second answer.' } - ] - - expect(threadHasPendingRuntimeWork(blocks)).toBe(false) - expect(threadSnapshotLooksRunning(blocks)).toBe(false) + it('tags background shell notices locally from xml text without server metadata', () => { + const noticeText = + 'abcd1234npm run build0okread output' + expect(inferClientUserMessageSource(noticeText)).toBe('background_shell') + expect( + isBackgroundShellNoticeUserMessage({ + text: noticeText + }) + ).toBe(true) }) - it('settles local pending work after a successful interrupt', () => { - const blocks: ChatBlock[] = [ - { - kind: 'tool', - id: 'tool-running', - summary: 'Running tool', - status: 'running', - toolKind: 'tool_call' - }, - { - kind: 'approval', - id: 'approval-pending', - approvalId: 'approval-1', - summary: 'Needs approval', - status: 'pending' - }, - { - kind: 'user_input', - id: 'input-pending', - requestId: 'input-1', - questions: [], - status: 'pending' - }, - { - kind: 'tool', - id: 'tool-success', - summary: 'Done', - status: 'success', - toolKind: 'tool_call' + it('preserves the original user prompt when a background shell notice arrives', () => { + const originalUser: ChatBlock = { + kind: 'user', + id: 'item_turn_abc_user', + text: 'Run build in background' + } + const blocks: ChatBlock[] = [originalUser] + const notice = { + itemId: 'item_steered_notice', + turnId: 'turn_abc', + text: 'abcd1234npm run build0okread output', + meta: { + displayText: 'Background shell abcd1234 completed' } - ] - - const settled = settlePendingRuntimeWorkAfterInterrupt(blocks) - - expect(settled.map((block) => ('status' in block ? block.status : ''))).toEqual([ - 'error', - 'error', - 'cancelled', - 'success' - ]) - expect(settled.some(hasPendingRuntimeWork)).toBe(false) - }) -}) - -describe('findReusableEmptyThreadId', () => { - const workspace = '/work/project' - const makeThread = (overrides: Partial): NormalizedThread => ({ - id: 'thread', - title: '新会话', - updatedAt: '2026-06-14T00:00:00.000Z', - model: 'deepseek', - mode: 'agent', - workspace, - ...overrides - }) - const stateWith = (threads: NormalizedThread[]): ChatState => - ({ activeThreadId: null, threads, blocks: [] } as unknown as ChatState) - const emptyProvider = { getThreadDetail: async () => ({ blocks: [] }) } - - it('reuses an empty thread that still carries the default placeholder title', async () => { - const state = stateWith([makeThread({ id: 'blank', title: '新会话' })]) - const reused = await findReusableEmptyThreadId(state, emptyProvider, workspace) - expect(reused).toBe('blank') - }) - - it('does not reuse an empty thread that carries a meaningful title (e.g. a released requirement)', async () => { - // Regression: a freshly released SDD requirement thread is empty but keeps - // its requirement title. Reusing it would make the next "new conversation" - // inherit "旅游旅行社区网页" instead of starting fresh. - const state = stateWith([makeThread({ id: 'requirement', title: '旅游旅行社区网页' })]) - const reused = await findReusableEmptyThreadId(state, emptyProvider, workspace) - expect(reused).toBeNull() - }) + } - it('does not reuse the active thread when it carries a meaningful title', async () => { - const titled = makeThread({ id: 'requirement', title: '旅游旅行社区网页' }) - const state = { - activeThreadId: 'requirement', - threads: [titled], - blocks: [] - } as unknown as ChatState - const reused = await findReusableEmptyThreadId(state, emptyProvider, workspace) - expect(reused).toBeNull() + const canReconcileOptimisticUser = + !isBackgroundShellNoticeUserMessage(notice) && + 'item_turn_abc_user' !== notice.itemId && + isOptimisticUserBlockId('item_turn_abc_user') + + expect(canReconcileOptimisticUser).toBe(false) + + const reconciledBlocks = canReconcileOptimisticUser + ? reconcileOptimisticUserBlock(blocks, 'item_turn_abc_user', notice.itemId, notice.text) + : blocks + const nextBlocks = upsertUserBlock(reconciledBlocks, notice) + + expect(nextBlocks).toHaveLength(2) + expect(nextBlocks[0]).toMatchObject({ + kind: 'user', + id: 'item_turn_abc_user', + text: 'Run build in background' + }) + expect(nextBlocks[1]).toMatchObject({ + kind: 'user', + id: 'item_steered_notice', + meta: { messageSource: 'background_shell' } + }) }) }) diff --git a/src/renderer/src/store/chat-store-runtime-helpers.ts b/src/renderer/src/store/chat-store-runtime-helpers.ts index 375dae170..3c90a57c1 100644 --- a/src/renderer/src/store/chat-store-runtime-helpers.ts +++ b/src/renderer/src/store/chat-store-runtime-helpers.ts @@ -4,6 +4,10 @@ import type { RuntimeDisclosureMetadata, UserMessageEventPayload } from '../agent/types' +import { + applyClientUserMessageSourceMeta, + isBackgroundShellNoticeUserMessage +} from '@shared/background-shell-notice' import { normalizeWorkspaceRoot } from '../lib/workspace-path' import { shouldAutoTitleThread } from '../lib/thread-title' import type { ChatState } from './chat-store-types' @@ -40,6 +44,7 @@ export function threadHasPendingRuntimeWork(blocks: ChatBlock[]): boolean { for (const block of blocks) { if (block.kind === 'user') { + if (isBackgroundShellNoticeUserMessage(block)) continue pendingInCurrentTurn = false continue } @@ -99,6 +104,8 @@ export function findLatestUserBlockId(blocks: ChatBlock[]): string | null { } export function upsertUserBlock(blocks: ChatBlock[], ev: UserMessageEventPayload): ChatBlock[] { + const clientMeta: RuntimeDisclosureMetadata = { ...(ev.meta ?? {}) } + applyClientUserMessageSourceMeta(clientMeta as Record, ev.text) const nextBlock: ChatBlock = { kind: 'user', id: ev.itemId, @@ -107,12 +114,12 @@ export function upsertUserBlock(blocks: ChatBlock[], ev: UserMessageEventPayload text: ev.text, ...(ev.modelLabel ? { modelLabel: ev.modelLabel } : {}), ...(ev.managedBy ? { managedBy: ev.managedBy } : {}), - ...(ev.meta ? { meta: ev.meta } : {}) + ...(Object.keys(clientMeta).length > 0 ? { meta: clientMeta } : {}) } const existingIndex = blocks.findIndex((block) => block.kind === 'user' && block.id === ev.itemId) if (existingIndex < 0) return [...blocks, nextBlock] const current = blocks[existingIndex] - const meta = mergeRuntimeDisclosureMeta( + const mergedMeta = mergeRuntimeDisclosureMeta( current.kind === 'user' ? current.meta : undefined, nextBlock.kind === 'user' ? nextBlock.meta : undefined ) @@ -120,7 +127,12 @@ export function upsertUserBlock(blocks: ChatBlock[], ev: UserMessageEventPayload ...current, ...nextBlock, createdAt: current.createdAt ?? nextBlock.createdAt, - ...(meta ? { meta } : {}) + ...(mergedMeta ? { meta: mergedMeta } : {}) + } + if (merged.kind === 'user') { + const metaRecord = { ...(merged.meta ?? {}) } as Record + applyClientUserMessageSourceMeta(metaRecord, merged.text) + merged.meta = Object.keys(metaRecord).length > 0 ? (metaRecord as RuntimeDisclosureMetadata) : undefined } const next = [...blocks] next[existingIndex] = merged @@ -138,6 +150,10 @@ function mergeRuntimeDisclosureMeta( } } +export function isOptimisticUserBlockId(id: string): boolean { + return id.startsWith('u-') +} + export function reconcileOptimisticUserBlock( blocks: ChatBlock[], optimisticId: string, diff --git a/src/renderer/src/store/chat-store-runtime.ts b/src/renderer/src/store/chat-store-runtime.ts index e1899adb6..f84861472 100644 --- a/src/renderer/src/store/chat-store-runtime.ts +++ b/src/renderer/src/store/chat-store-runtime.ts @@ -16,10 +16,12 @@ import i18n from '../i18n' import { describeRuntimeError, formatRuntimeError, getRuntimeErrorCode } from '../lib/format-runtime-error' import { isClawWorkspacePath, isInternalTemporaryWorkspace, normalizeWorkspaceRoot } from '../lib/workspace-path' import type { ClawImChannelV1 } from '@shared/app-settings' +import { isBackgroundShellNoticeUserMessage } from '@shared/background-shell-notice' import type { ChatState } from './chat-store-types' import { isClawThread } from './chat-store-helpers' import { collectAssistantTextForTurn, + isOptimisticUserBlockId, reconcileOptimisticUserBlock, settlePendingRuntimeWorkAfterInterrupt, threadSnapshotLooksRunning, @@ -634,31 +636,45 @@ export function buildThreadEventSink( const flushed = flushLiveBlocks(s) const baseBlocks = flushed.blocks ?? s.blocks const optimisticCurrentUserId = s.currentTurnUserId - const reconciledBlocks = + const isBackgroundShellNotice = isBackgroundShellNoticeUserMessage({ + text: ev.text, + meta: ev.meta + }) + const canReconcileOptimisticUser = + !isBackgroundShellNotice && optimisticCurrentUserId && optimisticCurrentUserId !== ev.itemId && + isOptimisticUserBlockId(optimisticCurrentUserId) && baseBlocks.some((block) => block.kind === 'user' && block.id === optimisticCurrentUserId) - ? reconcileOptimisticUserBlock( - baseBlocks, - optimisticCurrentUserId, - ev.itemId, - ev.text, - ev.modelLabel - ) - : baseBlocks + const reconciledBlocks = canReconcileOptimisticUser + ? reconcileOptimisticUserBlock( + baseBlocks, + optimisticCurrentUserId, + ev.itemId, + ev.text, + ev.modelLabel + ) + : baseBlocks const nextBlocks = upsertUserBlock(reconciledBlocks, ev) const startedAt = runtimeEventStartedAt(ev.createdAt) armBusyWatchdog(set, get) + const nextCurrentTurnUserId = isBackgroundShellNotice + ? optimisticCurrentUserId + : canReconcileOptimisticUser || !optimisticCurrentUserId + ? ev.itemId + : optimisticCurrentUserId return { ...flushed, blocks: nextBlocks, busy: true, currentTurnId: ev.turnId ?? s.currentTurnId, - currentTurnUserId: ev.itemId, - turnStartedAtByUserId: { - ...s.turnStartedAtByUserId, - [ev.itemId]: s.turnStartedAtByUserId[ev.itemId] ?? startedAt - }, + currentTurnUserId: nextCurrentTurnUserId, + turnStartedAtByUserId: isBackgroundShellNotice + ? s.turnStartedAtByUserId + : { + ...s.turnStartedAtByUserId, + [ev.itemId]: s.turnStartedAtByUserId[ev.itemId] ?? startedAt + }, error: clearRuntimeStreamRecoveringError(s.error) } }), diff --git a/src/renderer/src/store/chat-store-thread-actions.test.ts b/src/renderer/src/store/chat-store-thread-actions.test.ts index b4527ffa9..2c873f355 100644 --- a/src/renderer/src/store/chat-store-thread-actions.test.ts +++ b/src/renderer/src/store/chat-store-thread-actions.test.ts @@ -475,6 +475,7 @@ describe('chat-store-thread-actions createThread conversation mode', () => { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: { providers: [], apiKey: '', baseUrl: '', proxy: { enabled: false } }, agents: { kun: { model: 'deepseek-v4-pro', apiKey: 'k', baseUrl: '' } }, workspaceRoot: '/tmp/workspace', diff --git a/src/renderer/src/store/chat-store.ts b/src/renderer/src/store/chat-store.ts index 6ca9589e4..4bf30978c 100644 --- a/src/renderer/src/store/chat-store.ts +++ b/src/renderer/src/store/chat-store.ts @@ -4,6 +4,7 @@ import { getProvider } from '../agent/registry' import { rendererRuntimeClient } from '../agent/runtime-client' import i18n from '../i18n' import { + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyDocumentLocale, @@ -162,6 +163,7 @@ export const useChatStore = create((set, get) => ({ }, applyTheme, applyUiFontScale, + applyChatContentMaxWidth, applyCursorSpotlight, applyCursorSpotlightColor, applyWriteTypography, diff --git a/src/renderer/src/styles/base-shell.css b/src/renderer/src/styles/base-shell.css index 37e00fd91..7f4262245 100644 --- a/src/renderer/src/styles/base-shell.css +++ b/src/renderer/src/styles/base-shell.css @@ -2849,7 +2849,11 @@ pre { } .ds-chat-column-inset { - padding-inline: clamp(0.75rem, calc((100% - 48rem) / 2 + 1rem), 2rem); + padding-inline: clamp(0.75rem, calc((100% - var(--ds-chat-content-max-width, 56rem)) / 2 + 1rem), 2rem); +} + +.ds-chat-content-max-width { + max-width: var(--ds-chat-content-max-width, 56rem); } .ds-chat-stage { diff --git a/src/shared/app-settings-normalize.ts b/src/shared/app-settings-normalize.ts index f98773a00..99f6f55ca 100644 --- a/src/shared/app-settings-normalize.ts +++ b/src/shared/app-settings-normalize.ts @@ -6,6 +6,7 @@ import { DEFAULT_CURSOR_SPOTLIGHT_COLOR, DEFAULT_LOG_RETENTION_DAYS, normalizeGuiUpdateChannel, + normalizeChatContentMaxWidth, normalizeUiFontScale, type AppBehaviorConfigV1, type AppSettingsV1, @@ -78,6 +79,7 @@ export function normalizeAppSettings(settings: AppSettingsV1): AppSettingsV1 { ? maybeSettings.theme : 'system', uiFontScale: normalizeUiFontScale(maybeSettings.uiFontScale), + chatContentMaxWidthPx: normalizeChatContentMaxWidth(maybeSettings.chatContentMaxWidthPx), cursorSpotlight: maybeSettings.cursorSpotlight !== false, cursorSpotlightColor: normalizeCursorSpotlightColor(maybeSettings.cursorSpotlightColor), provider: providerSettings, diff --git a/src/shared/app-settings-provider.test.ts b/src/shared/app-settings-provider.test.ts index 5f9a92223..3519d34bf 100644 --- a/src/shared/app-settings-provider.test.ts +++ b/src/shared/app-settings-provider.test.ts @@ -44,6 +44,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: { ...defaultModelProviderSettings(), providers: [ diff --git a/src/shared/app-settings-types.ts b/src/shared/app-settings-types.ts index f61164b86..07b18d2a4 100644 --- a/src/shared/app-settings-types.ts +++ b/src/shared/app-settings-types.ts @@ -43,6 +43,19 @@ export function normalizeUiFontScale(value: unknown): UiFontScale { if (!Number.isFinite(num)) return DEFAULT_UI_FONT_SCALE return Math.min(UI_FONT_SCALE_MAX, Math.max(UI_FONT_SCALE_MIN, Math.round(num * 100) / 100)) } +/** Max width of the main chat message column, in CSS pixels. */ +export type ChatContentMaxWidthPx = number +export const CHAT_CONTENT_MAX_WIDTH_MIN = 640 +export const CHAT_CONTENT_MAX_WIDTH_MAX = 1200 +export const DEFAULT_CHAT_CONTENT_MAX_WIDTH_PX = 896 +export function normalizeChatContentMaxWidth(value: unknown): ChatContentMaxWidthPx { + const num = typeof value === 'number' ? value : Number(value) + if (!Number.isFinite(num)) return DEFAULT_CHAT_CONTENT_MAX_WIDTH_PX + return Math.min( + CHAT_CONTENT_MAX_WIDTH_MAX, + Math.max(CHAT_CONTENT_MAX_WIDTH_MIN, Math.round(num / 8) * 8) + ) +} export type ScheduleRunMode = 'agent' | 'plan' export type ScheduleKind = 'manual' | 'interval' | 'daily' | 'at' export type ScheduleTaskStatus = 'idle' | 'queued' | 'running' | 'success' | 'error' @@ -1748,6 +1761,7 @@ export type AppSettingsV1 = { locale: 'en' | 'zh' theme: 'system' | 'light' | 'dark' uiFontScale: UiFontScale + chatContentMaxWidthPx: ChatContentMaxWidthPx cursorSpotlight?: boolean cursorSpotlightColor?: string provider: ModelProviderSettingsV1 diff --git a/src/shared/app-settings.test.ts b/src/shared/app-settings.test.ts index 2b7478b46..569d4177c 100644 --- a/src/shared/app-settings.test.ts +++ b/src/shared/app-settings.test.ts @@ -32,6 +32,7 @@ import { isKunRuntimeInsecure, migrateLegacyAppSettings, normalizeAppSettings, + normalizeChatContentMaxWidth, parseClawUserPromptForDisplay, inferModelEndpointFormatFromUrl, kunToolPermissionModeFromSettings, @@ -52,6 +53,7 @@ function settings(): AppSettingsV1 { locale: 'en', theme: 'system', uiFontScale: 0.82, + chatContentMaxWidthPx: 896, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() @@ -74,6 +76,20 @@ function settings(): AppSettingsV1 { } } +describe('chat content max width', () => { + it('defaults invalid values to 896px', () => { + expect(normalizeChatContentMaxWidth(undefined)).toBe(896) + expect(normalizeChatContentMaxWidth('bad')).toBe(896) + }) + + it('clamps and rounds to 8px steps', () => { + expect(normalizeChatContentMaxWidth(500)).toBe(640) + expect(normalizeChatContentMaxWidth(896)).toBe(896) + expect(normalizeChatContentMaxWidth(1300)).toBe(1200) + expect(normalizeChatContentMaxWidth(905)).toBe(904) + }) +}) + describe('model endpoint format inference', () => { it('treats /completions custom endpoints as Chat Completions-shaped', () => { expect(inferModelEndpointFormatFromUrl('https://api.example.com/custom/completions')).toBe('chat_completions') diff --git a/src/shared/background-shell-notice.ts b/src/shared/background-shell-notice.ts new file mode 100644 index 000000000..c0c705a2d --- /dev/null +++ b/src/shared/background-shell-notice.ts @@ -0,0 +1,67 @@ +export type BackgroundShellCompletionNotice = { + sessionId: string + command: string + exitCode: number + outputPreview: string + outputFile?: string + hint: string +} + +function unescapeXml(text: string): string { + return text + .replace(/"/g, '"') + .replace(/>/g, '>') + .replace(/</g, '<') + .replace(/&/g, '&') +} + +function readXmlTag(xml: string, tag: string): string | null { + const match = xml.match(new RegExp(`<${tag}>([\\s\\S]*?)`)) + if (!match) return null + return unescapeXml(match[1].trim()) +} + +export function parseBackgroundShellCompletionNotice(text: string): BackgroundShellCompletionNotice | null { + const trimmed = text.trim() + if (!trimmed.includes('')) return null + const sessionId = readXmlTag(trimmed, 'session_id') + const command = readXmlTag(trimmed, 'command') + const exitCodeRaw = readXmlTag(trimmed, 'exit_code') + const outputPreview = readXmlTag(trimmed, 'output_preview') + const outputFile = readXmlTag(trimmed, 'output_file') ?? undefined + const hint = readXmlTag(trimmed, 'hint') + if (!sessionId || !command || exitCodeRaw === null || outputPreview === null || !hint) return null + const exitCode = Number.parseInt(exitCodeRaw, 10) + if (!Number.isFinite(exitCode)) return null + return { sessionId, command, exitCode, outputPreview, ...(outputFile ? { outputFile } : {}), hint } +} + +export function isBackgroundShellNoticeSource( + messageSource: unknown +): messageSource is 'background_shell' { + return messageSource === 'background_shell' +} + +export type ClientUserMessageSource = 'background_shell' + +/** Client-only hint derived from persisted user_message text, not from server metadata. */ +export function inferClientUserMessageSource(text: string): ClientUserMessageSource | undefined { + return parseBackgroundShellCompletionNotice(text) ? 'background_shell' : undefined +} + +export function applyClientUserMessageSourceMeta( + meta: Record, + text: string +): void { + const messageSource = inferClientUserMessageSource(text) + if (messageSource) meta.messageSource = messageSource + else delete meta.messageSource +} + +export function isBackgroundShellNoticeUserMessage(input: { + text: string + meta?: Record | null +}): boolean { + if (isBackgroundShellNoticeSource(input.meta?.messageSource)) return true + return inferClientUserMessageSource(input.text) === 'background_shell' +} diff --git a/src/shared/kun-endpoints.ts b/src/shared/kun-endpoints.ts index 677659edb..865262c0b 100644 --- a/src/shared/kun-endpoints.ts +++ b/src/shared/kun-endpoints.ts @@ -121,6 +121,16 @@ export const KUN_USAGE_TEMPLATE = '/v1/usage' export const KUN_DEBUG_LLM_ROUNDS_PATH = '/v1/debug/llm-rounds' export const KUN_DEBUG_LLM_ROUNDS_TEMPLATE = '/v1/debug/llm-rounds' +export const KUN_BACKGROUND_SHELLS_PATH = '/v1/background-shells' +export const KUN_BACKGROUND_SHELLS_TEMPLATE = '/v1/background-shells' +export const KUN_BACKGROUND_SHELL_TEMPLATE = '/v1/background-shells/{sessionId}' +export function kunBackgroundShellPath(sessionId: string): string { + return `/v1/background-shells/${encodeURIComponent(sessionId)}` +} +export function kunBackgroundShellStopPath(sessionId: string): string { + return `${kunBackgroundShellPath(sessionId)}/stop` +} + /** Thread mode shared with the Kun contract. */ export type KunThreadMode = 'agent' | 'plan'