diff --git a/.github/workflows/daily-dev-prerelease.yml b/.github/workflows/daily-dev-prerelease.yml index 8765b6d96..7a28765db 100644 --- a/.github/workflows/daily-dev-prerelease.yml +++ b/.github/workflows/daily-dev-prerelease.yml @@ -230,9 +230,6 @@ jobs: "Kun-${DEV_VERSION}-mac-x64.zip" "Kun-${DEV_VERSION}-win-x64.exe" "Kun-${DEV_VERSION}-linux-x86_64.AppImage" - "latest-mac.yml" - "latest.yml" - "latest-linux.yml" ) for file in "${required[@]}"; do diff --git a/kun/README.md b/kun/README.md index c1c2748cf..f63d06110 100644 --- a/kun/README.md +++ b/kun/README.md @@ -187,6 +187,7 @@ Shape: "enabled": true, "transport": "stdio", "command": "npx", + "cwd": "/path/to/workspace", "args": ["-y", "@modelcontextprotocol/server-github"], "env": { "GITHUB_TOKEN": "" }, "trustScope": "workspace", @@ -453,7 +454,8 @@ stay local to one thread, leave it as a pinned constraint. server `enabled` flag, transport-specific fields (`command` for `stdio`, `url` for HTTP/SSE), `trustedWorkspaceRoots` for workspace-scoped servers, and `/v1/runtime/tools` for redacted - `lastError` diagnostics. + `lastError` diagnostics. Stdio servers can set `cwd`; if omitted, + workspace-scoped servers start in the first trusted workspace root. - Web tools are missing: `capabilities.web.enabled` must be true and at least one of `fetchEnabled` / `searchEnabled` must be true. Built-in fetch handles HTTP(S) pages; search may still be diff --git a/kun/README.zh-CN.md b/kun/README.zh-CN.md index 12e296e59..7458fd6a3 100644 --- a/kun/README.zh-CN.md +++ b/kun/README.zh-CN.md @@ -176,6 +176,7 @@ Kun 使用 JSON 配置文件管理运行时行为,避免重建后重配或硬 "enabled": true, "transport": "stdio", "command": "npx", + "cwd": "/path/to/workspace", "args": ["-y", "@modelcontextprotocol/server-github"], "env": { "GITHUB_TOKEN": "" }, "trustScope": "workspace", @@ -391,7 +392,7 @@ SSE 使用 `id: `、`event: ` 与 `data:`。新连接可通过 `since ## 故障排查 -- MCP 不出现:检查 `capabilities.mcp.enabled`、服务器的 `enabled` 开关、`transport` 字段(`stdio` 需检查 `command`,HTTP/SSE 需检查 `url`)、workspace 级服务器的 `trustedWorkspaceRoots`,以及 `/v1/runtime/tools` 的 `lastError`。 +- MCP 不出现:检查 `capabilities.mcp.enabled`、服务器的 `enabled` 开关、`transport` 字段(`stdio` 需检查 `command`,HTTP/SSE 需检查 `url`)、workspace 级服务器的 `trustedWorkspaceRoots`,以及 `/v1/runtime/tools` 的 `lastError`。stdio 服务器可配置 `cwd`;未配置时,workspace 级服务器会在第一个受信任工作区根目录中启动。 - Web 工具不可用:检查 `capabilities.web.enabled`,并确保 `fetchEnabled` / `searchEnabled` 至少一项为 true。内置 provider 负责抓取 HTTP(S) 页面,搜索可能因未实现 provider 而不可用。 - 图片上传失败:检查 `maxImageBytes`、`maxImageDimension`、`allowedMimeTypes` 与文本 fallback 的大小限制。纯文本模型需要足够小的 base64 文本 fallback。 - 记忆未注入:确认 `capabilities.memory` 为 true,`/v1/memory/diagnostics` 显示正常,作用域与工作区匹配且未被禁用;再看 `lastInjectedIds`。 diff --git a/kun/src/adapters/in-memory-approval-gate.ts b/kun/src/adapters/in-memory-approval-gate.ts index 5614511ca..39a0f40f6 100644 --- a/kun/src/adapters/in-memory-approval-gate.ts +++ b/kun/src/adapters/in-memory-approval-gate.ts @@ -26,6 +26,7 @@ export class InMemoryApprovalGate implements ApprovalGate { decide(approvalId: string, decision: 'allow' | 'deny', reason?: string): boolean { const approval = this.approvals.get(approvalId) if (!approval) return false + if (approval.status !== 'pending') return false const resolved = resolveApprovalRequest(approval, decision, reason) this.approvals.set(approvalId, resolved) const resolver = this.resolvers.get(approvalId) diff --git a/kun/src/adapters/model/compat-model-client.streaming-tool-calls.test.ts b/kun/src/adapters/model/compat-model-client.streaming-tool-calls.test.ts new file mode 100644 index 000000000..1f846adae --- /dev/null +++ b/kun/src/adapters/model/compat-model-client.streaming-tool-calls.test.ts @@ -0,0 +1,243 @@ +import { describe, expect, it } from 'vitest' +import { CompatModelClient } from './compat-model-client.js' +import type { ModelCapabilityMetadata } from '../../contracts/capabilities.js' +import type { ModelRequest, ModelStreamChunk } from '../../ports/model-client.js' + +// These tests exercise the REAL streaming SSE path (`streamSse` / +// `consumeStreamPayload`), which had no coverage before. They lock in the fix +// for the silent tool-call drop: the chat_completions branch only finalized on +// `finish_reason === 'tool_calls'`, so a provider ending with 'stop', 'length', +// or a bare `[DONE]` while a tool call was pending dropped it entirely. + +type CapturedCall = { url: string; body: Record } + +function sseResponse(frames: string[]): Response { + const encoder = new TextEncoder() + const stream = new ReadableStream({ + start(controller) { + for (const frame of frames) controller.enqueue(encoder.encode(frame)) + controller.close() + } + }) + return new Response(stream, { + status: 200, + headers: { 'content-type': 'text/event-stream' } + }) +} + +function frame(payload: unknown): string { + return `data: ${JSON.stringify(payload)}\n\n` +} + +function streamingFetch(frames: string[], calls: CapturedCall[] = []): typeof fetch { + return (async (url: string, init: { body: string }) => { + calls.push({ url: String(url), body: JSON.parse(init.body) as Record }) + return sseResponse(frames) + }) as unknown as typeof fetch +} + +function capability(overrides: Partial = {}): (model: string) => ModelCapabilityMetadata { + return (model) => ({ + id: model, + inputModalities: ['text'], + outputModalities: ['text'], + supportsToolCalling: true, + messageParts: ['text'], + ...overrides + }) +} + +function request(overrides: Partial = {}): ModelRequest { + return { + threadId: 't1', + turnId: 'u1', + model: 'test-model', + systemPrompt: 'You are a helpful assistant.', + prefix: [], + history: [], + tools: [{ name: 'edit', description: 'edit a file', inputSchema: { type: 'object' } }], + abortSignal: new AbortController().signal, + ...overrides + } +} + +async function drain(iterable: AsyncIterable): Promise { + const chunks: ModelStreamChunk[] = [] + for await (const chunk of iterable) chunks.push(chunk) + return chunks +} + +function toolCallCompletes( + chunks: ModelStreamChunk[] +): Extract[] { + return chunks.filter( + (c): c is Extract => + c.kind === 'tool_call_complete' + ) +} + +function completed(chunks: ModelStreamChunk[]): Extract { + const last = chunks.at(-1) + if (!last || last.kind !== 'completed') throw new Error('stream did not end with completed') + return last +} + +function chatToolDelta(d: { index: number; id?: string; name?: string; args?: string }): string { + const fn: Record = {} + if (d.name !== undefined) fn.name = d.name + if (d.args !== undefined) fn.arguments = d.args + const call: Record = { index: d.index, function: fn } + if (d.id !== undefined) call.id = d.id + return frame({ choices: [{ index: 0, delta: { tool_calls: [call] } }] }) +} + +function chatFinish(reason: string): string { + return frame({ choices: [{ index: 0, delta: {}, finish_reason: reason }] }) +} + +// A two-part chat_completions tool-call stream. The args split across deltas so +// the test also covers index-based continuation accumulation. +function chatToolCallDeltas(): string[] { + return [ + chatToolDelta({ index: 0, id: 'call_1', name: 'edit', args: '{"path":' }), + chatToolDelta({ index: 0, args: '"a.txt"}' }) + ] +} + +function makeClient(fetchImpl: typeof fetch, modelCapabilities?: (model: string) => ModelCapabilityMetadata) { + return new CompatModelClient({ + baseUrl: 'https://provider.example/v1/chat/completions', + apiKey: 'sk-test', + model: 'test-model', + endpointFormat: 'chat_completions', + fetchImpl, + ...(modelCapabilities ? { modelCapabilities } : {}) + }) +} + +describe('CompatModelClient streaming tool-call finalization', () => { + it('emits a tool call when chat_completions ends with finish_reason "tool_calls" (no double emit)', async () => { + const frames = [...chatToolCallDeltas(), chatFinish('tool_calls'), 'data: [DONE]\n\n'] + const chunks = await drain(makeClient(streamingFetch(frames)).stream(request())) + const calls = toolCallCompletes(chunks) + expect(calls).toHaveLength(1) + expect(calls[0].toolName).toBe('edit') + expect(calls[0].arguments).toEqual({ path: 'a.txt' }) + expect(completed(chunks).stopReason).toBe('tool_calls') + }) + + it('recovers a tool call the provider mislabeled as finish_reason "stop"', async () => { + // Regression: previously dropped silently because finishReason !== 'tool_calls'. + const frames = [...chatToolCallDeltas(), chatFinish('stop'), 'data: [DONE]\n\n'] + const chunks = await drain(makeClient(streamingFetch(frames)).stream(request())) + const calls = toolCallCompletes(chunks) + expect(calls).toHaveLength(1) + expect(calls[0].arguments).toEqual({ path: 'a.txt' }) + // A recovered call means it was really a tool-call turn. + expect(completed(chunks).stopReason).toBe('tool_calls') + }) + + it('recovers a tool call when the stream ends with a bare [DONE] and no finish_reason', async () => { + const frames = [...chatToolCallDeltas(), 'data: [DONE]\n\n'] + const chunks = await drain(makeClient(streamingFetch(frames)).stream(request())) + expect(toolCallCompletes(chunks)).toHaveLength(1) + expect(completed(chunks).stopReason).toBe('tool_calls') + }) + + it('surfaces truncated arguments as __raw (instead of dropping) on finish_reason "length"', async () => { + // Only the first (incomplete) delta arrives, then the model hits its cap. + const frames = [ + chatToolDelta({ index: 0, id: 'call_1', name: 'edit', args: '{"path":' }), + chatFinish('length'), + 'data: [DONE]\n\n' + ] + const chunks = await drain(makeClient(streamingFetch(frames)).stream(request())) + const calls = toolCallCompletes(chunks) + expect(calls).toHaveLength(1) + expect(calls[0].arguments).toHaveProperty('__raw', '{"path":') + // Truncation stays visible as 'length' so the loop can warn the user. + expect(completed(chunks).stopReason).toBe('length') + }) + + it('does not emit a tool call when no tool deltas were streamed', async () => { + const frames = [ + frame({ choices: [{ index: 0, delta: { content: 'hello' } }] }), + chatFinish('stop'), + 'data: [DONE]\n\n' + ] + const chunks = await drain(makeClient(streamingFetch(frames)).stream(request())) + expect(toolCallCompletes(chunks)).toHaveLength(0) + expect(completed(chunks).stopReason).toBe('stop') + }) + + it('recovers an Anthropic Messages tool_use block cut off before content_block_stop', async () => { + const frames = [ + frame({ type: 'message_start', message: { usage: { input_tokens: 10 } } }), + frame({ type: 'content_block_start', index: 0, content_block: { type: 'tool_use', id: 'toolu_1', name: 'edit' } }), + frame({ type: 'content_block_delta', index: 0, delta: { type: 'input_json_delta', partial_json: '{"path":"a.txt"}' } }), + // No content_block_stop — stream is cut off, then the message ends. + frame({ type: 'message_delta', delta: { stop_reason: 'max_tokens' } }), + frame({ type: 'message_stop' }) + ] + const client = new CompatModelClient({ + baseUrl: 'https://provider.example/anthropic', + apiKey: 'sk-test', + model: 'test-model', + endpointFormat: 'messages', + fetchImpl: streamingFetch(frames) + }) + const chunks = await drain(client.stream(request())) + const calls = toolCallCompletes(chunks) + expect(calls).toHaveLength(1) + expect(calls[0].toolName).toBe('edit') + expect(calls[0].arguments).toEqual({ path: 'a.txt' }) + }) +}) + +describe('CompatModelClient output-token cap', () => { + function captureMessagesBody( + cap: (model: string) => ModelCapabilityMetadata, + req: Partial = {} + ): Promise> { + const calls: CapturedCall[] = [] + const frames = [frame({ type: 'message_start', message: { usage: {} } }), frame({ type: 'message_stop' })] + const client = new CompatModelClient({ + baseUrl: 'https://provider.example/anthropic', + apiKey: 'sk-test', + model: 'test-model', + endpointFormat: 'messages', + fetchImpl: streamingFetch(frames, calls), + modelCapabilities: cap + }) + return drain(client.stream(request(req))).then(() => calls[0].body) + } + + it('gives reasoning (anthropic-thinking) models a large messages max_tokens default', async () => { + const body = await captureMessagesBody( + capability({ reasoning: { supportedEfforts: ['auto', 'off'], defaultEffort: 'auto', requestProtocol: 'anthropic-thinking' } }), + { reasoningEffort: 'auto' } + ) + expect(body.max_tokens).toBe(32_768) + }) + + it('uses the smaller messages default for non-reasoning models', async () => { + const body = await captureMessagesBody(capability()) + expect(body.max_tokens).toBe(8_192) + }) + + it('lets a per-model maxOutputTokens capability override the default', async () => { + const body = await captureMessagesBody( + capability({ + maxOutputTokens: 5_000, + reasoning: { supportedEfforts: ['auto', 'off'], defaultEffort: 'auto', requestProtocol: 'anthropic-thinking' } + }), + { reasoningEffort: 'auto' } + ) + expect(body.max_tokens).toBe(5_000) + }) + + it('lets an explicit request.maxTokens win over everything', async () => { + const body = await captureMessagesBody(capability({ maxOutputTokens: 5_000 }), { maxTokens: 1_234 }) + expect(body.max_tokens).toBe(1_234) + }) +}) diff --git a/kun/src/adapters/model/compat-model-client.tool-images.test.ts b/kun/src/adapters/model/compat-model-client.tool-images.test.ts index 5ed5c193b..07cebd33f 100644 --- a/kun/src/adapters/model/compat-model-client.tool-images.test.ts +++ b/kun/src/adapters/model/compat-model-client.tool-images.test.ts @@ -62,6 +62,56 @@ function screenshotHistory(): TurnItem[] { return [toolCall, toolResult] } +// Two parallel tool calls in a single assistant turn, each with its own +// tool_result — the shape that triggered issue #574 (each tool_result must +// land in the SAME user message under the Anthropic Messages protocol). +function parallelToolHistory(): TurnItem[] { + const base = { turnId: 'u2', threadId: 't1', status: 'completed' as const, createdAt: '2026-01-01T00:00:00.000Z' } + const callA: TurnItem = { + ...base, + id: 'pc1', + role: 'assistant', + kind: 'tool_call', + toolName: 'read_file', + callId: 'call_a', + toolKind: 'command_execution', + arguments: { path: 'a.txt' } + } + const callB: TurnItem = { + ...base, + id: 'pc2', + role: 'assistant', + kind: 'tool_call', + toolName: 'read_file', + callId: 'call_b', + toolKind: 'command_execution', + arguments: { path: 'b.txt' } + } + const resultA: TurnItem = { + ...base, + id: 'pr1', + role: 'tool', + kind: 'tool_result', + toolName: 'read_file', + callId: 'call_a', + toolKind: 'command_execution', + isError: false, + output: { kind: 'text', text: 'contents-a' } + } + const resultB: TurnItem = { + ...base, + id: 'pr2', + role: 'tool', + kind: 'tool_result', + toolName: 'read_file', + callId: 'call_b', + toolKind: 'command_execution', + isError: false, + output: { kind: 'text', text: 'contents-b' } + } + return [callA, callB, resultA, resultB] +} + function request(model: string): ModelRequest { return { threadId: 't1', @@ -75,6 +125,13 @@ function request(model: string): ModelRequest { } } +function parallelRequest(model: string): ModelRequest { + return { + ...request(model), + history: parallelToolHistory() + } +} + async function drain(iterable: AsyncIterable): Promise { for await (const _ of iterable) void _ } @@ -160,4 +217,41 @@ describe('CompatModelClient tool-result image forwarding', () => { // Metadata (the screen size) still reaches the model as text. expect(body).toContain('computer_screenshot') }) + + it('merges parallel tool_results into one user message (anthropic messages)', async () => { + // Regression for issue #574: parallel tool calls must answer with a + // single user message holding both tool_result blocks, not two separate + // user messages (which trips Anthropic's tool_result-immediately-after + // rule and yields HTTP 400 on compat providers). + const calls: CapturedCall[] = [] + const client = new CompatModelClient({ + baseUrl: 'https://api.example.com/v1', + apiKey: 'sk', + model: 'claude-parallel', + endpointFormat: 'messages', + nonStreaming: true, + fetchImpl: fakeFetch(calls), + modelCapabilities: caps(false, 'messages') + }) + await drain(client.stream(parallelRequest('claude-parallel'))) + expect(calls[0].url).toMatch(/\/messages$/) + const messages = calls[0].body.messages as Array<{ + role: string + content: Array<{ type: string; tool_use_id?: string }> + }> + const toolResultUserMessages = messages.filter( + (m) => + m.role === 'user' && + Array.isArray(m.content) && + m.content.some((b) => b.type === 'tool_result') + ) + // Exactly ONE user message carries the tool_result blocks. + expect(toolResultUserMessages).toHaveLength(1) + const merged = toolResultUserMessages[0]! + const toolResultIds = merged.content + .filter((b) => b.type === 'tool_result') + .map((b) => b.tool_use_id) + .sort() + expect(toolResultIds).toEqual(['call_a', 'call_b']) + }) }) diff --git a/kun/src/adapters/model/compat-model-client.ts b/kun/src/adapters/model/compat-model-client.ts index 611f8b72a..79b718a40 100644 --- a/kun/src/adapters/model/compat-model-client.ts +++ b/kun/src/adapters/model/compat-model-client.ts @@ -143,7 +143,13 @@ type StreamReadResult = | { kind: 'error'; message: string } const DEFAULT_STREAM_IDLE_TIMEOUT_MS = 45_000 -const DEFAULT_MESSAGES_MAX_TOKENS = 4096 +// Anthropic Messages requires an explicit `max_tokens`. The old 4096 default +// was far too small for reasoning models: their thinking tokens are drawn from +// the SAME output budget, so a long think left almost nothing for the tool +// call, truncating its arguments into invalid JSON. Give thinking models much +// more headroom; a per-model `maxOutputTokens` capability still overrides both. +const DEFAULT_MESSAGES_MAX_TOKENS = 8192 +const DEFAULT_MESSAGES_REASONING_MAX_TOKENS = 32_768 /** * Multi-provider HTTP model client. @@ -368,6 +374,23 @@ export class CompatModelClient implements ModelClient { return this.config.modelCapabilities?.(model).reasoning } + /** Per-model output-token cap from capability metadata, if declared. */ + private maxOutputTokensFor(model: string): number | undefined { + return this.config.modelCapabilities?.(model).maxOutputTokens + } + + /** + * Resolves the output-token cap for a request: an explicit request value + * wins, then the per-model capability override, then the supplied default. + */ + private resolveMaxTokens( + request: ModelRequest, + model: string, + fallback?: number + ): number | undefined { + return request.maxTokens ?? this.maxOutputTokensFor(model) ?? fallback + } + private async postChatCompletion( url: string, headers: Record, @@ -480,8 +503,9 @@ export class CompatModelClient implements ModelClient { stream, messages: splitToolImageMessagesForOpenAi(messages) } - if (request.maxTokens !== undefined) { - body.max_tokens = request.maxTokens + const maxTokens = this.resolveMaxTokens(request, model) + if (maxTokens !== undefined) { + body.max_tokens = maxTokens } if (request.temperature !== undefined) { body.temperature = request.temperature @@ -536,8 +560,9 @@ export class CompatModelClient implements ModelClient { stream, input: messagesToResponsesInput(splitToolImageMessagesForOpenAi(messages)) } - if (request.maxTokens !== undefined) { - body.max_output_tokens = request.maxTokens + const maxTokens = this.resolveMaxTokens(request, model) + if (maxTokens !== undefined) { + body.max_output_tokens = maxTokens } if (request.temperature !== undefined) { body.temperature = request.temperature @@ -576,10 +601,24 @@ export class CompatModelClient implements ModelClient { this.modelReasoningFor(model)?.requestProtocol === 'anthropic-thinking' ) applyAnthropicCacheControl(converted.messages) + // Thinking tokens are billed against the same output budget, so reasoning + // models need a much larger default cap or their tool-call arguments get + // truncated. A per-model `maxOutputTokens` (or an explicit request value) + // still wins over these defaults. + const reasoning = this.modelReasoningFor(model) + const resolvedEffort = + reasoning?.requestProtocol === 'anthropic-thinking' + ? resolveReasoningEffort(request.reasoningEffort, reasoning) + : undefined + const thinkingEnabled = resolvedEffort !== undefined && resolvedEffort !== 'off' const body: Record = { model, stream, - max_tokens: request.maxTokens ?? DEFAULT_MESSAGES_MAX_TOKENS, + max_tokens: this.resolveMaxTokens( + request, + model, + thinkingEnabled ? DEFAULT_MESSAGES_REASONING_MAX_TOKENS : DEFAULT_MESSAGES_MAX_TOKENS + ), messages: converted.messages } const systemText = request.responseFormat === 'json_object' @@ -937,6 +976,27 @@ export class CompatModelClient implements ModelClient { yield { kind: 'error', message: 'request was aborted' } return } + // Safety net: finalize any tool call whose arguments finished streaming but + // was never emitted because the stream ended without a per-call "done" + // signal. The chat_completions branch only finalizes on + // `finish_reason === 'tool_calls'`, so a provider that ends with 'stop', + // 'length', or a bare `[DONE]` while a tool call is still pending would + // otherwise DROP the call silently. Truncated arguments surface here as + // `{ __raw }` (a tool error the model can react to) instead of vanishing. + let flushedPendingToolCall = false + for (const [callId, pending] of pendingArguments) { + if (!pending.name) continue + if (completedToolCalls.has(callId)) continue + flushedPendingToolCall = true + completedToolCalls.add(callId) + yield { + kind: 'tool_call_complete', + callId, + toolName: pending.name, + arguments: this.parseToolArguments(pending.arguments || '{}') + } + } + pendingArguments.clear() if (usage) yield { kind: 'usage', usage } stopReason = ((): ModelStopReason => { switch (finishReason) { @@ -947,7 +1007,9 @@ export class CompatModelClient implements ModelClient { case 'error': return 'error' default: - return 'stop' + // A recovered tool call means this was really a tool-call turn the + // provider mislabeled (e.g. finish_reason 'stop' or bare `[DONE]`). + return flushedPendingToolCall ? 'tool_calls' : 'stop' } })() yield { kind: 'completed', stopReason } @@ -1614,7 +1676,24 @@ function messagesToAnthropic( if (image) blocks.push({ type: 'image', source: image }) } } - out.push({ role: 'user', content: blocks }) + // Parallel tool calls arrive as N consecutive `role: 'tool'` messages. + // Anthropic requires every tool_use from a single assistant turn to be + // answered by tool_result blocks inside ONE user message — emitting N + // separate user messages trips "tool_use ids were found without + // tool_result blocks immediately after" on compat providers. Real user + // turns never carry a tool_result block, so its presence marks the run + // we are still folding into. + const last = out[out.length - 1] + if ( + last && + last.role === 'user' && + Array.isArray(last.content) && + (last.content as AnthropicContentBlock[]).some((b) => b.type === 'tool_result') + ) { + last.content.push(...blocks) + } else { + out.push({ role: 'user', content: blocks }) + } continue } const content = chatContentToAnthropicContent(message.content) diff --git a/kun/src/adapters/tool/builtin-file-tools.ts b/kun/src/adapters/tool/builtin-file-tools.ts index 07a14585a..2b9a89dcb 100644 --- a/kun/src/adapters/tool/builtin-file-tools.ts +++ b/kun/src/adapters/tool/builtin-file-tools.ts @@ -60,7 +60,7 @@ export function createWriteLocalTool(_options: WriteLocalToolOptions = {}): Loca if (!rawPath.trim() || content == null) { return { output: { error: 'path and content are required' }, isError: true } } - const { absolutePath, relativePath } = resolveWorkspacePath(rawPath, context) + const { absolutePath, relativePath } = await resolveWorkspacePath(rawPath, context) assertCanWritePath(absolutePath, context) return withFileMutationQueue(absolutePath, async () => { await mkdirOp(dirname(absolutePath)) @@ -118,7 +118,7 @@ export function createEditLocalTool(_options: EditLocalToolOptions = {}): LocalT if (!rawPath.trim() || edits.length === 0) { return { output: { error: 'path and at least one edit are required' }, isError: true } } - const { absolutePath, relativePath } = resolveWorkspacePath(rawPath, context) + const { absolutePath, relativePath } = await resolveWorkspacePath(rawPath, context) assertCanWritePath(absolutePath, context) return withFileMutationQueue(absolutePath, async () => { const rawSource = await readFileOp(absolutePath) diff --git a/kun/src/adapters/tool/builtin-lsp-tool.ts b/kun/src/adapters/tool/builtin-lsp-tool.ts index 39c2bdf58..9205ffbb3 100644 --- a/kun/src/adapters/tool/builtin-lsp-tool.ts +++ b/kun/src/adapters/tool/builtin-lsp-tool.ts @@ -111,7 +111,7 @@ export function createLspLocalTool(): LocalTool { return { output: { error: `Unknown operation: ${operation}` }, isError: true } } - const { absolutePath, workspaceRoot } = resolveWorkspacePath(rawPath, context) + const { absolutePath, workspaceRoot } = await resolveWorkspacePath(rawPath, context) const server = findLanguageServerForFile(absolutePath) if (!server) { const supported = listLanguageServers() diff --git a/kun/src/adapters/tool/builtin-read-tool.ts b/kun/src/adapters/tool/builtin-read-tool.ts index 125c30f98..d3c5c8817 100644 --- a/kun/src/adapters/tool/builtin-read-tool.ts +++ b/kun/src/adapters/tool/builtin-read-tool.ts @@ -49,7 +49,7 @@ export function createReadLocalTool(options: ReadLocalToolOptions = {}): LocalTo isError: true } } - const { absolutePath, relativePath } = resolveWorkspacePath(rawPath, context) + const { absolutePath, relativePath } = await resolveWorkspacePath(rawPath, context) await statOp(absolutePath) const fileBuffer = await readFileOp(absolutePath) const classification = getReadClassification(absolutePath, context.workspace) diff --git a/kun/src/adapters/tool/builtin-search-tools.ts b/kun/src/adapters/tool/builtin-search-tools.ts index 9cf2fa70d..a07f36bab 100644 --- a/kun/src/adapters/tool/builtin-search-tools.ts +++ b/kun/src/adapters/tool/builtin-search-tools.ts @@ -43,7 +43,7 @@ export function createLsLocalTool(options: LsLocalToolOptions = {}): LocalTool { execute: async (args, context) => withToolBoundary(async () => { const rawPath = typeof args.path === 'string' && args.path.trim() ? args.path : '.' const limit = normalizePositiveInteger(args.limit, options.defaultLimit ?? DEFAULT_LIST_LIMIT) - const { workspaceRoot: root, absolutePath, relativePath } = resolveWorkspacePath(rawPath, context) + const { workspaceRoot: root, absolutePath, relativePath } = await resolveWorkspacePath(rawPath, context) const targetStat = await statOp(absolutePath) if (!targetStat.isDirectory()) { return { @@ -95,7 +95,7 @@ export function createFindLocalTool(options: FindLocalToolOptions = {}): LocalTo if (!pattern) return { output: { error: 'pattern is required' }, isError: true } const rawPath = typeof args.path === 'string' && args.path.trim() ? args.path : '.' const limit = normalizePositiveInteger(args.limit, options.defaultLimit ?? DEFAULT_FIND_LIMIT) - const { workspaceRoot: root, absolutePath, relativePath } = resolveWorkspacePath(rawPath, context) + const { workspaceRoot: root, absolutePath, relativePath } = await resolveWorkspacePath(rawPath, context) const matcher = globToRegExp(pattern.includes('/') ? pattern : `**/${pattern}`) if (options.operations?.glob) { const matches = await options.operations.glob({ pattern, path: absolutePath, limit }) @@ -213,7 +213,7 @@ export function createGrepLocalTool(options: GrepLocalToolOptions = {}): LocalTo ? new RegExp(pattern.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'), flags) : new RegExp(pattern, flags) const globMatcher = glob ? globToRegExp(glob.includes('/') ? glob : `**/${glob}`) : null - const { workspaceRoot: root, absolutePath, relativePath } = resolveWorkspacePath(rawPath, context) + const { workspaceRoot: root, absolutePath, relativePath } = await resolveWorkspacePath(rawPath, context) if (options.operations?.search) { const matches = await options.operations.search({ pattern, diff --git a/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts b/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts new file mode 100644 index 000000000..a3270f08d --- /dev/null +++ b/kun/src/adapters/tool/builtin-tool-utils.symlink.test.ts @@ -0,0 +1,76 @@ +import { mkdtemp, mkdir, rm, symlink, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import type { ToolHostContext } from '../../ports/tool-host.js' +import { resolveWorkspacePath } from './builtin-tool-utils.js' + +function context(workspace: string): ToolHostContext { + return { + threadId: 'thread_symlink', + turnId: 'turn_symlink', + workspace, + approvalPolicy: 'always', + sandboxMode: 'workspace-write', + abortSignal: new AbortController().signal, + awaitApproval: async () => 'allow' + } +} + +describe('resolveWorkspacePath symlink escape', () => { + let base: string + let workspace: string + let outside: string + + beforeEach(async () => { + base = await mkdtemp(join(tmpdir(), 'kun-symlink-')) + workspace = join(base, 'ws') + outside = join(base, 'outside') + await mkdir(workspace, { recursive: true }) + }) + + afterEach(async () => { + await rm(base, { recursive: true, force: true }) + }) + + it('rejects a DANGLING symlink whose target is outside the workspace (write/create case)', async () => { + // `outside` deliberately does NOT exist — realpath() reports ENOENT for the + // link exactly as for a missing file. This is the hole the fix closes. + await symlink(outside, join(workspace, 'evil')) + await expect(resolveWorkspacePath('evil', context(workspace))).rejects.toThrow(/escapes the workspace root/) + }) + + it('rejects a path that traverses through a dangling symlink to an outside dir', async () => { + await symlink(outside, join(workspace, 'dlink')) + await expect(resolveWorkspacePath('dlink/sub/new.txt', context(workspace))).rejects.toThrow( + /escapes the workspace root/ + ) + }) + + it('rejects an EXISTING symlink that points outside the workspace', async () => { + await mkdir(outside, { recursive: true }) + await symlink(outside, join(workspace, 'link')) + await expect(resolveWorkspacePath('link/file.txt', context(workspace))).rejects.toThrow( + /escapes the workspace root/ + ) + }) + + it('allows a dangling symlink that stays inside the workspace', async () => { + // Link target is absent but in-workspace — a legitimate write/create target. + await symlink(join(workspace, 'data', 'note.txt'), join(workspace, 'good')) + const resolved = await resolveWorkspacePath('good', context(workspace)) + expect(resolved.absolutePath).toBe(join(workspace, 'good')) + }) + + it('allows creating a new nested file with no symlinks involved', async () => { + const resolved = await resolveWorkspacePath('sub/dir/new.txt', context(workspace)) + expect(resolved.absolutePath).toBe(join(workspace, 'sub', 'dir', 'new.txt')) + }) + + it('allows reading an existing in-workspace file', async () => { + await writeFile(join(workspace, 'real.txt'), 'hi') + const resolved = await resolveWorkspacePath('real.txt', context(workspace)) + expect(resolved.absolutePath).toBe(join(workspace, 'real.txt')) + expect(resolved.relativePath).toBe('real.txt') + }) +}) diff --git a/kun/src/adapters/tool/builtin-tool-utils.ts b/kun/src/adapters/tool/builtin-tool-utils.ts index 4099c155f..4d8907e47 100644 --- a/kun/src/adapters/tool/builtin-tool-utils.ts +++ b/kun/src/adapters/tool/builtin-tool-utils.ts @@ -1,5 +1,5 @@ import { existsSync } from 'node:fs' -import { readFile, readdir, stat } from 'node:fs/promises' +import { lstat, readFile, readdir, readlink, realpath, stat } from 'node:fs/promises' import { spawn, spawnSync, type ChildProcess } from 'node:child_process' import { basename, dirname, isAbsolute, join, relative, resolve, sep } from 'node:path' import type { ToolHostContext } from '../../ports/tool-host.js' @@ -57,24 +57,98 @@ export function workspaceRoot(workspace: string): string { return isAbsolute(workspace) ? resolve(workspace) : resolve(process.cwd(), workspace) } -export function resolveWorkspacePath(inputPath: string, context: ToolHostContext): { +export async function resolveWorkspacePath(inputPath: string, context: ToolHostContext): Promise<{ workspaceRoot: string absolutePath: string relativePath: string -} { +}> { const root = workspaceRoot(context.workspace) - const absolutePath = isAbsolute(inputPath) ? resolve(inputPath) : resolve(root, inputPath) - const relativePath = relative(root, absolutePath) - if (relativePath === '..' || relativePath.startsWith(`..${sep}`) || isAbsolute(relativePath)) { + const lexicalAbsolutePath = isAbsolute(inputPath) ? resolve(inputPath) : resolve(root, inputPath) + const resolvedRoot = await safeRealpath(root) + if (resolvedRoot === null) { + // Workspace root itself does not exist; nothing to anchor the escape + // check against. This is distinct from an actual escape (handled below). + throw new Error(`workspace root does not exist: ${root}`) + } + const resolvedAbsolute = await resolveSymlinkSafe(lexicalAbsolutePath) + const resolvedRelative = relative(resolvedRoot, resolvedAbsolute) + if (resolvedRelative === '..' || resolvedRelative.startsWith(`..${sep}`) || isAbsolute(resolvedRelative)) { throw new Error(`path escapes the workspace root: ${inputPath}`) } + // Return LEXICAL paths to callers. The realpath-resolved pair is only used + // for the escape check above; downstream code (subprocess cwd, display + // paths, language-server init) expects the user-facing workspace path, + // which on symlinked roots (e.g. macOS `/tmp` -> `/private/tmp`) would + // otherwise diverge from what the user typed and break display layers. return { workspaceRoot: root, - absolutePath, - relativePath: relativePath || '.' + absolutePath: lexicalAbsolutePath, + relativePath: relative(root, lexicalAbsolutePath) || '.' } } +async function safeRealpath(target: string): Promise { + try { + return await realpath(target) + } catch (error) { + const code = (error as NodeJS.ErrnoException).code + if (code === 'ENOENT') return null + if (code === 'EACCES' || code === 'ELOOP' || code === 'ENOTDIR') return null + throw error + } +} + +// Whether `target` is itself a symbolic link, without following it. Returns +// false when the entry is absent or cannot be stat-ed (treated as "not a link" +// — the escape check still anchors against the nearest existing ancestor). +async function isSymlink(target: string): Promise { + try { + return (await lstat(target)).isSymbolicLink() + } catch { + return false + } +} + +async function resolveSymlinkSafe(lexicalPath: string, depth = 0): Promise { + // Guard against symlink loops (dangling link A -> B -> A never resolves). + if (depth > 40) { + throw new Error(`too many symbolic links resolving: ${lexicalPath}`) + } + const direct = await safeRealpath(lexicalPath) + if (direct !== null) return direct + // Target doesn't fully resolve — either a genuinely missing path (write/create + // case) or a *dangling* symlink whose target is absent. `realpath` reports + // both as ENOENT, so we must walk the components ourselves: anchor on the + // nearest existing ancestor, but if a non-resolving component is actually a + // symlink, follow it explicitly so the redirection is reflected in the escape + // check. Re-anchoring a dangling symlink lexically (the old behavior) let a + // planted link like `/evil -> /etc/passwd` (target absent) pass as an + // in-workspace path and escape on the subsequent write. + const segments: string[] = [] + let current = lexicalPath + // Guard against pathological component counts. + for (let i = 0; i < 128 && current !== dirname(current); i += 1) { + const resolved = await safeRealpath(current) + if (resolved !== null) { + return segments.length > 0 ? resolve(resolved, ...segments) : resolved + } + if (await isSymlink(current)) { + // Dangling (or otherwise non-resolving) symlink: follow its target so the + // redirection is reflected, then re-resolve the target plus the suffix + // collected below this component. + const linkTarget = await readlink(current) + const resolvedParent = (await safeRealpath(dirname(current))) ?? dirname(current) + const followed = isAbsolute(linkTarget) ? resolve(linkTarget) : resolve(resolvedParent, linkTarget) + const rejoined = segments.length > 0 ? resolve(followed, ...segments) : followed + return resolveSymlinkSafe(rejoined, depth + 1) + } + segments.unshift(basename(current)) + current = dirname(current) + } + // Nothing on the path exists; treat as escape. + throw new Error(`path escapes the workspace root: ${lexicalPath}`) +} + export function isBinaryBuffer(buffer: Buffer): boolean { const sample = buffer.subarray(0, Math.min(buffer.length, 4096)) for (const byte of sample) { diff --git a/kun/src/adapters/tool/capability-registry.ts b/kun/src/adapters/tool/capability-registry.ts index 65552bb6f..ecabc78a4 100644 --- a/kun/src/adapters/tool/capability-registry.ts +++ b/kun/src/adapters/tool/capability-registry.ts @@ -116,6 +116,7 @@ export class CapabilityRegistry { private canUseProvider(provider: ToolProviderPolicy, context?: ToolHostContext): boolean { if (!provider.enabled || !provider.available) return false + if (context?.blockedProviderIds?.includes(provider.id)) return false const allowed = context?.allowedProviderIds if (allowed && !allowed.includes(provider.id)) return false return true @@ -125,6 +126,7 @@ export class CapabilityRegistry { if (isPlanModeContext(context) && !PLAN_MODE_ALLOWED_TOOL_NAMES.has(toolName)) { return false } + if (context?.blockedToolNames?.includes(toolName)) return false const allowed = context?.allowedToolNames return !allowed || allowed.includes(toolName) } diff --git a/kun/src/adapters/tool/delegation-tool-provider.ts b/kun/src/adapters/tool/delegation-tool-provider.ts index 5e91553db..f4000ed3b 100644 --- a/kun/src/adapters/tool/delegation-tool-provider.ts +++ b/kun/src/adapters/tool/delegation-tool-provider.ts @@ -4,7 +4,9 @@ import { LocalToolHost } from './local-tool-host.js' export function buildDelegationToolProviders(runtime: DelegationRuntime | undefined): CapabilityToolProvider[] { if (!runtime) return [] - const profiles = runtime.listProfiles() + // Only subagent/all roles are delegation targets; primary-only personas + // are for starting a session, not for delegate_task. + const profiles = runtime.listProfiles().filter((profile) => profile.mode !== 'primary') const profileNames = profiles.map((profile) => profile.name) return [{ id: 'delegation', @@ -18,13 +20,17 @@ export function buildDelegationToolProviders(runtime: DelegationRuntime | undefi inputSchema: { type: 'object', properties: { - label: { type: 'string', description: 'Short label for this subagent run.' }, + label: { type: 'string', description: 'A 2-4 word name for this subagent, shown in the UI as its title (e.g. "审查登录流程", "fix failing test", "greet user"). ALWAYS provide it so the user can tell subagents apart, especially when delegating several in parallel. Prefer a distinct label per call.' }, prompt: { type: 'string', description: 'The task for the child agent.' }, workspace: { type: 'string' }, model: { type: 'string', description: 'Override the child model. Defaults to the profile model or server default.' }, profile: profileNames.length ? { type: 'string', enum: profileNames, description: 'Subagent role to apply (model, preamble, tool policy).' } - : { type: 'string', description: 'Subagent role to apply (model, preamble, tool policy).' } + : { type: 'string', description: 'Subagent role to apply (model, preamble, tool policy).' }, + detach: { + type: 'boolean', + description: 'Fire-and-forget. The call returns immediately with a queued/running record; the child keeps executing in the background and can be checked via diagnostics or aborted from the GUI.' + } }, required: ['prompt'], additionalProperties: false @@ -41,6 +47,7 @@ export function buildDelegationToolProviders(runtime: DelegationRuntime | undefi workspace: typeof args.workspace === 'string' ? args.workspace : context.workspace, ...(typeof args.model === 'string' ? { model: args.model } : {}), ...(typeof args.profile === 'string' ? { profile: args.profile } : {}), + ...(args.detach === true ? { detach: true } : {}), signal: context.abortSignal }) return { @@ -66,7 +73,7 @@ export function buildDelegationToolProviders(runtime: DelegationRuntime | undefi function buildDelegateTaskDescription( runtime: DelegationRuntime, - profiles: { name: string; toolPolicy: string; model?: string }[] + profiles: { name: string; mode: string; toolPolicy: string; model?: string; providerId?: string; description?: string }[] ): string { const lines = [ 'Run a bounded child agent task and return its summary.', @@ -75,7 +82,7 @@ function buildDelegateTaskDescription( ] if (profiles.length) { const summary = profiles - .map((profile) => `${profile.name} (${profile.toolPolicy}${profile.model ? `, ${profile.model}` : ''})`) + .map((profile) => `${profile.name} (${profile.toolPolicy}${profile.model ? `, ${profile.model}` : ''}${profile.providerId ? ` @${profile.providerId}` : ''})${profile.description ? ` — ${profile.description}` : ''}`) .join('; ') lines.push(`Available profiles: ${summary}.`) } diff --git a/kun/src/adapters/tool/mcp-tool-provider.ts b/kun/src/adapters/tool/mcp-tool-provider.ts index d02a2a32e..99220f734 100644 --- a/kun/src/adapters/tool/mcp-tool-provider.ts +++ b/kun/src/adapters/tool/mcp-tool-provider.ts @@ -463,13 +463,16 @@ async function createSdkMcpClient(serverId: string, server: McpServerConfig): Pr function createTransport(server: McpServerConfig): Transport { switch (server.transport) { - case 'stdio': + case 'stdio': { + const cwd = resolveMcpServerCwd(server) return new StdioClientTransport({ command: server.command ?? '', args: server.args, env: buildMcpStdioEnvironment(server.env), + ...(cwd ? { cwd } : {}), stderr: 'pipe' }) + } case 'streamable-http': return new StreamableHTTPClientTransport(new URL(server.url ?? ''), { requestInit: { headers: server.headers } @@ -482,6 +485,14 @@ function createTransport(server: McpServerConfig): Transport { } } +export function resolveMcpServerCwd(server: McpServerConfig): string | undefined { + if (server.transport !== 'stdio') return undefined + const configured = server.cwd?.trim() + if (configured) return configured + if (server.trustScope !== 'workspace') return undefined + return server.trustedWorkspaceRoots.map((root) => root.trim()).find(Boolean) +} + function fetchWithHeaders(headers: Record): typeof fetch { return (input, init) => { const mergedHeaders = new Headers(init?.headers) diff --git a/kun/src/adapters/tool/mcp-tool-search.ts b/kun/src/adapters/tool/mcp-tool-search.ts index 255396601..20989f486 100644 --- a/kun/src/adapters/tool/mcp-tool-search.ts +++ b/kun/src/adapters/tool/mcp-tool-search.ts @@ -319,7 +319,16 @@ function createMcpSearchTools(options: McpSearchProviderOptions): LocalTool[] { } function trustedRecords(options: McpSearchProviderOptions, context: ToolHostContext): McpSearchCatalogRecord[] { - return options.state.records.filter((record) => options.isServerTrusted(record.server, context.workspace)) + const blocked = context.blockedProviderIds + return options.state.records.filter((record) => + options.isServerTrusted(record.server, context.workspace) + // Honor the per-turn provider deny-list (e.g. a subagent's blockedMcpServers). + // In search mode the per-server `mcp:` provider is never registered, so + // CapabilityRegistry.canUseProvider can't gate it — this is the single + // chokepoint shared by mcp_search/mcp_describe/mcp_call, so filtering here + // blocks enumeration, schema disclosure, AND execution of a blocked server. + && !blocked?.includes(`mcp:${record.serverId}`) + ) } function resolveTrustedRecord( diff --git a/kun/src/adapters/tool/skill-tool-provider.ts b/kun/src/adapters/tool/skill-tool-provider.ts index b24b265bd..b76c44b0d 100644 --- a/kun/src/adapters/tool/skill-tool-provider.ts +++ b/kun/src/adapters/tool/skill-tool-provider.ts @@ -15,7 +15,7 @@ import type { SkillRuntime } from '../../skills/skill-runtime.js' export function buildSkillToolProviders( skillRuntime: SkillRuntime | undefined ): CapabilityToolProvider[] { - if (!skillRuntime || skillRuntime.count() === 0) return [] + if (!skillRuntime || !skillRuntime.enabled()) return [] return [{ id: 'skill', kind: 'skill', @@ -43,10 +43,10 @@ export function buildSkillToolProviders( additionalProperties: false }, policy: 'auto', - execute: async (args) => { + execute: async (args, context) => { const skillId = typeof args.skill_id === 'string' ? args.skill_id : '' if (!skillId.trim()) return { output: { error: 'skill_id is required' }, isError: true } - const result = skillRuntime.loadSkillById(skillId) + const result = await skillRuntime.loadSkillById(skillId, context.workspace, context.blockedSkillIds) if ('error' in result) return { output: result, isError: true } return { output: result } } diff --git a/kun/src/cli/cli-options.ts b/kun/src/cli/cli-options.ts index 3bce14730..33f9b993a 100644 --- a/kun/src/cli/cli-options.ts +++ b/kun/src/cli/cli-options.ts @@ -11,6 +11,7 @@ import { DEFAULT_STORAGE_CONFIG, ModelConfigSchema, QualityConfigSchema, + RolesConfigSchema, RuntimeTuningConfigSchema, ServeProviderConfigSchema, StorageConfigSchema, @@ -59,6 +60,7 @@ export const ServeOptionsSchema = z.object({ models: ModelConfigSchema.optional(), contextCompaction: ContextCompactionConfigSchema.optional(), runtime: RuntimeTuningConfigSchema.optional(), + roles: RolesConfigSchema.optional(), capabilities: KunCapabilitiesConfig.default(DEFAULT_KUN_CAPABILITIES_CONFIG), hooks: HooksConfigSchema.optional(), quality: QualityConfigSchema.optional() diff --git a/kun/src/cli/serve-entry.ts b/kun/src/cli/serve-entry.ts index 6944d03ff..7467c43a5 100644 --- a/kun/src/cli/serve-entry.ts +++ b/kun/src/cli/serve-entry.ts @@ -61,6 +61,7 @@ async function serveMain(argv: readonly string[]): Promise { installServeCrashHandlers(() => handle) const server = await startKunServe(parsed.options) handle = server + await selfVerifyHealth(server.host, server.port) const info = server.runtime.info() const startupInfo = { service: 'kun', @@ -112,6 +113,31 @@ async function hideMacosDockIfRunningAsElectron(): Promise { } } +const SELF_VERIFY_TIMEOUT_MS = 5_000 +const SELF_VERIFY_POLL_MS = 100 + +async function selfVerifyHealth(host: string, port: number): Promise { + const url = `http://${host}:${port}/health` + const deadline = Date.now() + SELF_VERIFY_TIMEOUT_MS + while (Date.now() < deadline) { + try { + const res = await fetch(url, { + signal: AbortSignal.timeout(1_000) + }) + if (res.ok) { + const body = (await res.json()) as { status?: string } + if (body?.status === 'ok') return + } + } catch { + // retry + } + await new Promise((r) => setTimeout(r, SELF_VERIFY_POLL_MS)) + } + process.stderr.write( + `[kun] warning: self-health-probe on http://${host}:${port}/health did not pass within ${SELF_VERIFY_TIMEOUT_MS}ms — proceeding anyway\n` + ) +} + export async function main(argv: readonly string[]): Promise { await hideMacosDockIfRunningAsElectron() const command = splitKunCliCommand(argv) diff --git a/kun/src/cli/serve.ts b/kun/src/cli/serve.ts index 4cfd303df..6b80bf92c 100644 --- a/kun/src/cli/serve.ts +++ b/kun/src/cli/serve.ts @@ -143,6 +143,7 @@ export function parseServeOptions( models: loadedConfig?.config.models, contextCompaction: loadedConfig?.config.contextCompaction, runtime: loadedConfig?.config.runtime, + roles: loadedConfig?.config.roles, capabilities: loadedConfig?.config.capabilities ?? DEFAULT_SERVE_OPTIONS.capabilities, hooks: loadedConfig?.config.hooks, quality: loadedConfig?.config.quality diff --git a/kun/src/config/kun-config.ts b/kun/src/config/kun-config.ts index 91ccdfb47..5eb1b4621 100644 --- a/kun/src/config/kun-config.ts +++ b/kun/src/config/kun-config.ts @@ -13,7 +13,8 @@ import { KunCapabilitiesConfig, ModelInputModality, ModelMessagePartSupport, - ModelReasoningCapabilityMetadata + ModelReasoningCapabilityMetadata, + ModelReasoningEffort } from '../contracts/capabilities.js' import { DEFAULT_MODEL_ENDPOINT_FORMAT, @@ -107,6 +108,8 @@ export const ContextCompactionConfigSchema = z summaryTimeoutMs: PositiveInt.optional(), summaryMaxTokens: PositiveInt.optional(), summaryInputMaxBytes: PositiveInt.optional(), + summaryModel: z.string().min(1).optional(), + summaryProviderId: z.string().min(1).optional(), modelProfiles: z.record(z.string().min(1), ModelContextProfileConfigSchema).optional() }) .strict() @@ -248,12 +251,39 @@ export const KunServeConfigSchema = z }) .strict() +/** + * Internal-LLM role model routing. The global `smallModel` slot is the default + * for cheap internal one-shot calls (thread title, whole-session summary). Each + * role can override with its own model/provider. Empty/absent => fall back to + * smallModel, then the main conversation model. Compaction is intentionally NOT + * here: it reuses the main conversation model for prompt-cache reasons and only + * exposes its heuristic/model toggle via contextCompaction.summaryMode. + */ +export const RolesConfigSchema = z + .object({ + smallModel: z.string().min(1).optional(), + smallModelProviderId: z.string().min(1).optional(), + titleModel: z.string().min(1).optional(), + titleProviderId: z.string().min(1).optional(), + summaryModel: z.string().min(1).optional(), + summaryProviderId: z.string().min(1).optional(), + codeReviewModel: z.string().min(1).optional(), + codeReviewProviderId: z.string().min(1).optional(), + // Per-role reasoning depth. Default 'off' (the GUI omits it entirely). + titleReasoningEffort: ModelReasoningEffort.optional(), + summaryReasoningEffort: ModelReasoningEffort.optional(), + codeReviewReasoningEffort: ModelReasoningEffort.optional() + }) + .strict() +export type RolesConfig = z.infer + export const KunConfigSchema = z .object({ serve: KunServeConfigSchema.optional(), models: ModelConfigSchema.optional(), contextCompaction: ContextCompactionConfigSchema.optional(), runtime: RuntimeTuningConfigSchema.optional(), + roles: RolesConfigSchema.optional(), capabilities: KunCapabilitiesConfig.default(DEFAULT_KUN_CAPABILITIES_CONFIG), hooks: HooksConfigSchema.optional(), quality: QualityConfigSchema.optional() diff --git a/kun/src/contracts/approvals.ts b/kun/src/contracts/approvals.ts index e8a8b9576..b8f64cf82 100644 --- a/kun/src/contracts/approvals.ts +++ b/kun/src/contracts/approvals.ts @@ -10,6 +10,7 @@ export type ApprovalDecisionRequest = z.infer export const ApprovalDecisionResponse = z.object({ approvalId: z.string().min(1), decision: z.enum(['allow', 'deny']), - status: z.enum(['allowed', 'denied', 'expired']) + status: z.enum(['allowed', 'denied', 'expired']), + alreadyResolved: z.boolean().optional() }) export type ApprovalDecisionResponse = z.infer diff --git a/kun/src/contracts/capabilities.ts b/kun/src/contracts/capabilities.ts index 062ba3e00..c46ffde67 100644 --- a/kun/src/contracts/capabilities.ts +++ b/kun/src/contracts/capabilities.ts @@ -51,6 +51,11 @@ export const ModelCapabilityMetadata = z outputModalities: z.array(ModelInputModality).min(1), supportsToolCalling: z.boolean(), contextWindowTokens: z.number().int().positive().optional(), + // Maximum tokens the model may emit per response. When set it caps the + // request's output budget (max_tokens / max_output_tokens). Absent means + // "use the runtime default" — which is reasoning-aware for the Anthropic + // Messages format so thinking models don't truncate their tool calls. + maxOutputTokens: z.number().int().positive().optional(), messageParts: z.array(ModelMessagePartSupport).min(1), reasoning: ModelReasoningCapabilityMetadata.optional(), // Per-model wire-format override. Lets one provider route some models to @@ -112,6 +117,7 @@ export const McpServerConfig = z transport: McpTransportKind, command: z.string().min(1).optional(), args: z.array(z.string()).default([]), + cwd: z.string().min(1).optional(), url: z.string().min(1).optional(), headers: StringRecord.default({}), env: StringRecord.default({}), @@ -182,6 +188,16 @@ export type WebCapabilityConfig = z.infer export const SkillsCapabilityConfig = CapabilityToggleConfig.extend({ roots: z.array(z.string().min(1)).default([]), + workspaceRoots: z.array(z.string().min(1)).default([]), + /** Global skill roots (e.g. ~/.kun/skills). Scanned after project roots. */ + globalRoots: z.array(z.string().min(1)).default([]), + /** + * Skill ids the user disabled in the GUI. Excluded everywhere a skill can + * surface (catalog, auto-match, load_skill, diagnostics) so a disabled skill + * is truly gone from the runtime, not merely hidden in the UI. Compared after + * `slug()` normalization on both sides. + */ + disabledIds: z.array(z.string().min(1)).default([]), legacySkillMd: z.boolean().default(true) }).strict() export type SkillsCapabilityConfig = z.infer @@ -189,6 +205,10 @@ export type SkillsCapabilityConfig = z.infer export const SubagentToolPolicy = z.enum(['readOnly', 'inherit']) export type SubagentToolPolicy = z.infer +/** Where an agent can be used: a delegated subagent, a primary session persona, or both. */ +export const SubagentMode = z.enum(['subagent', 'primary', 'all']) +export type SubagentMode = z.infer + /** * Tools a `readOnly` subagent may call. The list is enforced twice: the * child loop advertises only these names (schema filter) and the @@ -200,12 +220,43 @@ export const SUBAGENT_READ_ONLY_TOOL_NAMES = ['read', 'grep', 'find', 'ls'] as c export const SubagentProfileConfig = z .object({ + /** Display name for the GUI roster and pickers (falls back to the profile key). */ + name: z.string().min(1).optional(), + /** When-to-use description shown in the delegate_task schema and the GUI. */ + description: z.string().min(1).optional(), + /** UI accent color (hex) for the agent's chip/avatar. */ + color: z.string().min(1).optional(), + /** Where the agent can be used: delegated subagent, primary session persona, or both. */ + mode: SubagentMode.default('subagent'), /** Overrides the child model for this role (falls back to the server default). */ model: z.string().min(1).optional(), + /** Routes this role's child to a specific provider id (falls back to the runtime default provider). */ + providerId: z.string().min(1).optional(), + /** Persona/instructions appended to the base system prompt for this role (not a full replace). */ + systemPrompt: z.string().min(1).optional(), /** Short instruction prepended to the delegated task prompt. */ promptPreamble: z.string().min(1).optional(), - /** Whether the child is restricted to read-only tools or inherits the full set. */ - toolPolicy: SubagentToolPolicy.default('readOnly') + /** + * Whether the child is restricted to read-only tools or inherits the + * parent agent's full tool set + approval policy. Defaults to `inherit` + * (follow the main agent); a profile that needs read-only must say so + * explicitly (e.g. the built-in reviewers). + */ + toolPolicy: SubagentToolPolicy.default('inherit'), + /** Exact tool allow-list; overrides toolPolicy when set (e.g. ['read','grep','bash']). */ + allowedTools: z.array(z.string().min(1)).min(1).optional(), + /** Built-in tool names blocked for this profile (deny-list, layered on `inherit`; e.g. ['bash','write']). */ + blockedTools: z.array(z.string().min(1)).optional(), + /** MCP server ids blocked for this profile (deny-list; the server's entire toolset is hidden from the child). */ + blockedMcpServers: z.array(z.string().min(1)).optional(), + /** Skill ids blocked for this profile (deny-list; default inherits every available skill). */ + blockedSkills: z.array(z.string().min(1)).optional(), + /** + * Reasoning depth applied to this profile's child model requests. Default + * 'off' (cheap); a profile opts into deeper thinking explicitly. Flows to + * the child agent's ModelRequest.reasoningEffort. + */ + reasoningEffort: ModelReasoningEffort.optional() }) .strict() export type SubagentProfileConfig = z.infer @@ -215,8 +266,16 @@ export const SubagentsCapabilityConfig = CapabilityToggleConfig.extend({ maxParallel: z.number().int().nonnegative().default(0), /** Hard cap on total children per parent thread. */ maxChildRuns: z.number().int().nonnegative().default(0), - /** Tool policy applied to children that do not resolve a profile. */ - defaultToolPolicy: SubagentToolPolicy.default('readOnly'), + /** + * Tool policy applied to children that do not resolve a profile. Defaults to + * `inherit` so a delegated subagent follows the MAIN agent's tools AND + * approval/permission policy (it can edit/run shell iff the parent can). + * `inherit` never escalates beyond the parent: the child loop runs under the + * parent thread's approvalPolicy/sandboxMode, so a read-only parent yields a + * read-only child. Per-profile `toolPolicy` (e.g. the built-in read-only + * reviewers) still wins over this default. + */ + defaultToolPolicy: SubagentToolPolicy.default('inherit'), /** Profile chosen when `delegate_task` omits an explicit profile. */ defaultProfile: z.string().min(1).optional(), /** Named subagent roles (e.g. researcher/reviewer/verifier). */ diff --git a/kun/src/contracts/events.ts b/kun/src/contracts/events.ts index 835274873..45e0c4797 100644 --- a/kun/src/contracts/events.ts +++ b/kun/src/contracts/events.ts @@ -78,6 +78,7 @@ const RuntimeEventBase = z.object({ // the GUI can show prefix reuse, tool fan-out, timing, and cost per // subagent without a separate diagnostics fetch. childModel: z.string().optional(), + childProviderId: z.string().optional(), childProfile: z.string().optional(), childToolPolicy: SubagentToolPolicy.optional(), prefixReused: z.boolean().optional(), @@ -109,6 +110,7 @@ export type ItemEvent = z.infer export const ThreadLifecycleEvent = RuntimeEventBase.extend({ kind: z.enum(['thread_created', 'thread_updated']), title: z.string().optional(), + titleAuto: z.boolean().optional(), status: z.string().optional() }) export type ThreadLifecycleEvent = z.infer diff --git a/kun/src/contracts/threads.ts b/kun/src/contracts/threads.ts index 5eee0a1e1..4948a64d3 100644 --- a/kun/src/contracts/threads.ts +++ b/kun/src/contracts/threads.ts @@ -93,6 +93,20 @@ export type ThreadTodoList = z.infer export const ThreadSchema = z.object({ id: z.string().min(1), title: z.string(), + /** + * Whether the current title was auto-derived (client-side first-message + * heuristic or the backend LLM titler) rather than set by the user. + * - `true` → provisional/auto title; the backend LLM titler may upgrade it. + * - `false` → the user renamed it manually; never auto-overwrite. + * - absent → legacy/unknown; the backend only upgrades placeholder titles. + */ + titleAuto: z.boolean().optional(), + /** + * Optional whole-conversation summary (~1 paragraph) produced on demand by + * the Summary internal-LLM role. Surfaced as the conversation's hover / + * subtitle in the thread list. Absent until the user runs "summarize". + */ + summary: z.string().optional(), workspace: z.string(), model: z.string(), /** @@ -102,10 +116,24 @@ export const ThreadSchema = z.object({ * bridges pin a non-runtime provider per thread. */ providerId: z.string().optional(), + /** + * Optional subagent profile id this thread is bound to. When set, the + * thread persona (model / providerId / systemPrompt below) is a snapshot + * of the agent at thread-create time so later agent edits don't drift + * historical conversations. + */ + agentId: z.string().optional(), + /** + * Optional thread-level systemPrompt override. When non-empty, it + * replaces the runtime's base systemPrompt in every ModelRequest on this + * thread (primary-agent persona snapshot path). + */ + systemPrompt: z.string().optional(), mode: ThreadMode, status: ThreadStatus, approvalPolicy: ApprovalPolicySchema.default(DEFAULT_APPROVAL_POLICY), sandboxMode: SandboxModeSchema.default(DEFAULT_SANDBOX_MODE), + pinned: z.boolean().optional(), costBudgetUsd: z.number().positive().optional(), costBudgetWarningSent: z.boolean().optional(), relation: ThreadRelation.default('primary'), @@ -126,13 +154,18 @@ export type ThreadRecord = z.infer export const ThreadSummarySchema = ThreadSchema.pick({ id: true, title: true, + titleAuto: true, + summary: true, workspace: true, model: true, providerId: true, + agentId: true, + systemPrompt: true, mode: true, status: true, approvalPolicy: true, sandboxMode: true, + pinned: true, costBudgetUsd: true, costBudgetWarningSent: true, relation: true, @@ -151,6 +184,8 @@ export type ThreadSummary = z.infer export const CreateThreadRequest = z.object({ title: z.string().optional(), + /** Marks the provided title as an auto/provisional title (see ThreadSchema.titleAuto). */ + titleAuto: z.boolean().optional(), workspace: z.string().min(1), model: z.string().min(1), /** @@ -160,6 +195,10 @@ export const CreateThreadRequest = z.object({ * provider's HTTP client. */ providerId: z.string().optional(), + /** Optional subagent profile id to bind this thread to. */ + agentId: z.string().optional(), + /** Optional persona systemPrompt snapshot applied to every ModelRequest on this thread. */ + systemPrompt: z.string().optional(), mode: ThreadMode.default('agent'), approvalPolicy: ApprovalPolicySchema.optional(), sandboxMode: SandboxModeSchema.optional(), @@ -242,10 +281,13 @@ export type ClearThreadTodosResponse = z.infer export const UpdateThreadRequest = z .object({ title: z.string().optional(), + /** Marks the new title as auto/provisional (true) or user-set/locked (false). */ + titleAuto: z.boolean().optional(), workspace: z.string().min(1).optional(), status: ThreadStatus.optional(), approvalPolicy: ApprovalPolicySchema.optional(), sandboxMode: SandboxModeSchema.optional(), + pinned: z.boolean().optional(), costBudgetUsd: z.number().positive().nullable().optional(), costBudgetWarningSent: z.boolean().optional(), relation: ThreadRelation.optional() @@ -253,10 +295,12 @@ export const UpdateThreadRequest = z .refine( (value) => value.title !== undefined || + value.titleAuto !== undefined || value.workspace !== undefined || value.status !== undefined || value.approvalPolicy !== undefined || value.sandboxMode !== undefined || + value.pinned !== undefined || value.costBudgetUsd !== undefined || value.costBudgetWarningSent !== undefined || value.relation !== undefined, diff --git a/kun/src/delegation/builtin-profiles.ts b/kun/src/delegation/builtin-profiles.ts index ac90973c7..8d34b789e 100644 --- a/kun/src/delegation/builtin-profiles.ts +++ b/kun/src/delegation/builtin-profiles.ts @@ -18,6 +18,7 @@ import type { * is `readOnly`, enforced by the delegation runtime and tool registry). */ export const DESIGN_REVIEWER_PROFILE: SubagentProfileConfig = { + mode: 'subagent', toolPolicy: 'readOnly', promptPreamble: [ '你是 Kun 内置的设计审查者,以只读方式审查前端代码与原型的视觉与交互质量。', @@ -37,6 +38,7 @@ export const DESIGN_REVIEWER_PROFILE: SubagentProfileConfig = { * a normal review pass) and never edits files (toolPolicy is `readOnly`). */ export const OVER_ENGINEERING_REVIEWER_PROFILE: SubagentProfileConfig = { + mode: 'subagent', toolPolicy: 'readOnly', promptPreamble: [ '你是 Kun 内置的「过度设计审查者」,以只读方式审查代码的过度设计与不必要的复杂度——只找“能删什么、能用标准库/平台能力替换什么”,', @@ -56,8 +58,43 @@ export const OVER_ENGINEERING_REVIEWER_PROFILE: SubagentProfileConfig = { ].join('') } +/** + * General-purpose agent: full tool access (inherits the parent's tools and + * approval policy), so it can research and carry out multi-step work including + * editing files. The default target for "do this independent unit of work" + * delegations, including several in parallel. + */ +export const GENERAL_PROFILE: SubagentProfileConfig = { + mode: 'subagent', + toolPolicy: 'inherit', + description: '通用代理:研究复杂问题、执行多步骤任务,可读写文件、运行命令,可并行。', + promptPreamble: [ + '你是 Kun 内置的「通用代理」(General)。你能研究复杂问题并执行多步骤任务,', + '拥有与主代理一致的完整工具访问权限(todo 除外),因此可以在需要时读写文件、运行命令。', + '适合被派去并行承担一个独立的工作单元。聚焦交给你的具体任务,完成后简洁汇报结果与关键改动。' + ].join('') +} + +/** + * Fast read-only explorer: finds files, greps for keywords and answers + * questions about the codebase. Never edits (toolPolicy `readOnly`). + */ +export const EXPLORE_PROFILE: SubagentProfileConfig = { + mode: 'subagent', + toolPolicy: 'readOnly', + description: '只读探索代理:快速查找文件、搜索关键字、回答关于代码库的问题,不修改任何文件。', + promptPreamble: [ + '你是 Kun 内置的「探索代理」(Explore),一个快速的只读代码库代理。', + '你只读取/搜索/列目录,绝不修改任何文件。', + '当需要按模式快速查找文件、搜索代码关键字、或回答关于代码库的问题时使用你。', + '高效定位相关位置,返回结论(文件:行 + 简要说明),不做与任务无关的展开。' + ].join('') +} + /** All builtin profiles, keyed by their `delegate_task` profile name. */ export const BUILTIN_SUBAGENT_PROFILES: Readonly> = { + general: GENERAL_PROFILE, + explore: EXPLORE_PROFILE, 'design-reviewer': DESIGN_REVIEWER_PROFILE, 'over-engineering-reviewer': OVER_ENGINEERING_REVIEWER_PROFILE } @@ -66,8 +103,16 @@ export const BUILTIN_SUBAGENT_PROFILES: Readonly = { ...config.profiles } + for (const [id, builtin] of Object.entries(BUILTIN_SUBAGENT_PROFILES)) { + const override = config.profiles[id] + profiles[id] = override ? { ...builtin, ...override } : builtin } + return { ...config, profiles } } diff --git a/kun/src/delegation/child-agent-executor.ts b/kun/src/delegation/child-agent-executor.ts index a8eb0e15b..e07ffe2d9 100644 --- a/kun/src/delegation/child-agent-executor.ts +++ b/kun/src/delegation/child-agent-executor.ts @@ -3,12 +3,13 @@ import { InMemoryEventBus } from '../adapters/in-memory-event-bus.js' import { InMemorySessionStore } from '../adapters/in-memory-session-store.js' import { InMemoryThreadStore } from '../adapters/in-memory-thread-store.js' import { InMemoryUserInputGate } from '../adapters/in-memory-user-input-gate.js' -import type { ImmutablePrefix } from '../cache/immutable-prefix.js' +import { setSystemPrompt, type ImmutablePrefix } from '../cache/immutable-prefix.js' import { SUBAGENT_READ_ONLY_TOOL_NAMES, type ModelCapabilityMetadata } from '../contracts/capabilities.js' import type { TurnItem } from '../contracts/items.js' import type { ApprovalPolicy, SandboxMode } from '../contracts/policy.js' import type { RuntimeTuningConfig } from '../config/kun-config.js' import { AgentLoop } from '../loop/agent-loop.js' +import { normalizeRoleReasoningEffort } from '../loop/reasoning-effort.js' import type { ContextCompactionConfig, ModelConfig } from '../loop/model-context-profile.js' import { ContextCompactor } from '../loop/context-compactor.js' import { InflightTracker } from '../loop/inflight-tracker.js' @@ -79,12 +80,38 @@ export function createChildAgentExecutor(options: ChildAgentExecutorOptions): Ch ids, nowIso }) - // Read-only children advertise only investigation tools. The allow-list - // is enforced twice by the capability registry: tools outside it are - // dropped from the model's tool schema and rejected at execute time. - const forcedAllowedToolNames = input.toolPolicy === 'readOnly' - ? [...SUBAGENT_READ_ONLY_TOOL_NAMES] + // Tool gating, most-specific first: an explicit allow-list wins; else a + // read-only policy restricts to investigation tools; else (inherit) the + // child sees the parent agent's FULL tool set — no forced allow-list, so + // it can edit/run shell exactly like the parent. The capability registry + // enforces an explicit list twice (dropped from the model's tool schema + // and rejected at execute), but `inherit` leaves it undefined so nothing + // is forced. The child is not an escalation: it runs under the parent + // thread's approvalPolicy/sandboxMode (set on the thread below from + // options.approvalPolicy/sandboxMode, which the runtime factory threads + // from the parent runtime), so a read-only parent still yields a + // read-only child. + const forcedAllowedToolNames = input.allowedTools + ? [...input.allowedTools] + : input.toolPolicy === 'readOnly' + ? [...SUBAGENT_READ_ONLY_TOOL_NAMES] + : undefined + // GUI "custom" capability scope: deny-lists layered on top of inherit. + // Built-in tools block by name; MCP servers block at the provider level + // (`mcp:`, drift-proof — new tools from a blocked server stay + // hidden); skills block by id. All three only REMOVE access, so they + // compose with the parent intersection and can never escalate the child. + const blockedToolNames = input.blockedTools?.length ? [...input.blockedTools] : undefined + const blockedProviderIds = input.blockedMcpServers?.length + ? input.blockedMcpServers.map((serverId) => `mcp:${serverId}`) : undefined + const blockedSkillIds = input.blockedSkills?.length ? [...input.blockedSkills] : undefined + // A custom system prompt augments the base prefix (kun tool/safety + // conventions stay) on a distinct fingerprint, so same-agent calls still + // hit the prompt cache; cross-agent reuse is intentionally given up. + const childPrefix = input.systemPrompt?.trim() + ? setSystemPrompt(options.prefix, `${options.prefix.systemPrompt}\n\n${input.systemPrompt.trim()}`.trim()) + : options.prefix const loop = new AgentLoop({ threadStore, sessionStore, @@ -98,10 +125,13 @@ export function createChildAgentExecutor(options: ChildAgentExecutorOptions): Ch inflight, steering, compactor, - prefix: options.prefix, + prefix: childPrefix, ids, nowIso, ...(forcedAllowedToolNames ? { forcedAllowedToolNames } : {}), + ...(blockedToolNames ? { blockedToolNames } : {}), + ...(blockedProviderIds ? { blockedProviderIds } : {}), + ...(blockedSkillIds ? { blockedSkillIds } : {}), ...(options.modelCapabilities ? { modelCapabilities: options.modelCapabilities } : {}), ...(options.skillRuntime ? { skillRuntime: options.skillRuntime } : {}), ...(options.memoryStore ? { memoryStore: options.memoryStore } : {}), @@ -118,7 +148,11 @@ export function createChildAgentExecutor(options: ChildAgentExecutorOptions): Ch model, mode: 'agent', approvalPolicy: options.approvalPolicy ?? 'auto', - ...(options.sandboxMode ? { sandboxMode: options.sandboxMode } : {}) + ...(options.sandboxMode ? { sandboxMode: options.sandboxMode } : {}), + // Route the child to the profile's provider. ThreadService threads + // providerId into every ModelRequest, and the executor's model is the + // MultiProviderModelClient, so this single field is all routing needs. + ...(input.providerId ? { providerId: input.providerId } : {}) }, { id: input.childId, title: childThreadTitle(input.childId, input.label) @@ -134,6 +168,7 @@ export function createChildAgentExecutor(options: ChildAgentExecutorOptions): Ch prompt, model, mode: 'agent', + reasoningEffort: normalizeRoleReasoningEffort(input.reasoningEffort), // Children have no GUI surface to answer structured input prompts. disableUserInput: true } diff --git a/kun/src/delegation/delegation-runtime.ts b/kun/src/delegation/delegation-runtime.ts index 306986fbc..6b218aac1 100644 --- a/kun/src/delegation/delegation-runtime.ts +++ b/kun/src/delegation/delegation-runtime.ts @@ -1,9 +1,10 @@ import { mkdir, readFile, readdir, writeFile } from 'node:fs/promises' import { join } from 'node:path' import { z } from 'zod' -import { SubagentToolPolicy, type SubagentsCapabilityConfig } from '../contracts/capabilities.js' +import { SubagentToolPolicy, type SubagentMode, type SubagentProfileConfig, type SubagentsCapabilityConfig } from '../contracts/capabilities.js' import type { RuntimeEventRecorder } from '../services/runtime-event-recorder.js' import type { UsageSnapshot } from '../contracts/usage.js' +import { loadWorkspaceAgentProfiles } from './workspace-agents.js' const ChildRunUsage = z.object({ promptTokens: z.number().int().nonnegative().default(0), @@ -31,6 +32,8 @@ export const ChildRunRecord = z.object({ prompt: z.string().min(1), workspace: z.string().optional(), model: z.string().optional(), + /** Resolved provider id the child routed through, when one was selected. */ + providerId: z.string().optional(), /** Resolved subagent profile name, when one was selected. */ profile: z.string().optional(), /** Effective tool policy applied to the child (read-only vs inherited). */ @@ -64,8 +67,19 @@ export type ChildRunExecutor = (input: { prompt: string workspace?: string model?: string + providerId?: string + systemPrompt?: string + allowedTools?: string[] + /** Built-in tool names blocked for this child (deny-list layered on inherit). */ + blockedTools?: string[] + /** MCP server ids blocked for this child (deny-list; whole server toolset hidden). */ + blockedMcpServers?: string[] + /** Skill ids blocked for this child (deny-list; catalog + activation + load_skill). */ + blockedSkills?: string[] toolPolicy: SubagentToolPolicy promptPreamble?: string + /** Reasoning depth for this profile's child model requests (default 'off'). */ + reasoningEffort?: string signal: AbortSignal }) => Promise<{ summary: string @@ -132,6 +146,12 @@ export class DelegationRuntime { private readonly threadCounts = new Map() /** Cached per-thread seed reads so concurrent first-spawns don't double-count. */ private readonly threadSeeds = new Map>() + /** + * Background (detached) child runs keyed by childId, exposing an + * AbortController so the user can cancel a long-running task from the + * GUI even after the parent turn finished. + */ + private readonly detachedAborts = new Map() constructor(private readonly options: { config: SubagentsCapabilityConfig @@ -150,7 +170,15 @@ export class DelegationRuntime { prompt: string workspace?: string model?: string + providerId?: string profile?: string + /** + * When true, runChild returns the queued ChildRunRecord immediately and + * continues execution in the background. The detached run gets its own + * AbortController so the user can cancel it via `abortChild(id)` even + * after the parent turn finishes. Default: false (synchronous). + */ + detach?: boolean signal: AbortSignal }): Promise { const config = this.options.config @@ -159,13 +187,28 @@ export class DelegationRuntime { // Resolve the profile up front so model/preamble/tool-policy are // captured on the record even if the child later fails. const profileName = input.profile?.trim() || config.defaultProfile - const profile = profileName ? config.profiles[profileName] : undefined + // Workspace overlay: `.kun/agents/*.md` in the call's workspace wins + // over the static `config.profiles` map. Loaded fresh per call so the + // user can edit overlays without restarting the runtime. + let profile: SubagentProfileConfig | undefined = profileName ? config.profiles[profileName] : undefined + if (profileName && input.workspace) { + const overlay = await loadWorkspaceAgentProfiles(input.workspace) + const hit = overlay.find((entry) => entry.id === profileName) + if (hit) profile = hit.profile + } if (profileName && !profile) { throw new Error(`unknown subagent profile: ${profileName}`) } const toolPolicy = profile?.toolPolicy ?? config.defaultToolPolicy const resolvedModel = input.model?.trim() || profile?.model + const resolvedProviderId = input.providerId?.trim() || profile?.providerId + const resolvedSystemPrompt = profile?.systemPrompt + const resolvedAllowedTools = profile?.allowedTools + const resolvedBlockedTools = profile?.blockedTools + const resolvedBlockedMcpServers = profile?.blockedMcpServers + const resolvedBlockedSkills = profile?.blockedSkills const promptPreamble = profile?.promptPreamble + const resolvedReasoningEffort = profile?.reasoningEffort // Reserve against the per-thread budget before persisting anything. await this.ensureSeeded(input.parentThreadId) @@ -183,6 +226,7 @@ export class DelegationRuntime { prompt: input.prompt, workspace: input.workspace, model: resolvedModel, + providerId: resolvedProviderId, profile: profileName, toolPolicy, status: 'queued', @@ -192,6 +236,38 @@ export class DelegationRuntime { await this.options.store.upsert(record) await this.recordChildEvent(record) + if (input.detach) { + // Spawn an independent signal so the parent turn's signal aborting + // doesn't reach into the background run. The user can still cancel + // via abortChild(id). + const detachedController = new AbortController() + this.detachedAborts.set(record.id, detachedController) + // Surface ChildRunExecutor's resolved fields via the closure shared with + // the synchronous path. The same executor block runs inside executeChild. + void this.executeChild({ + record, + queuedAt, + profileName, + toolPolicy, + resolvedModel, + resolvedProviderId, + resolvedSystemPrompt, + resolvedAllowedTools, + resolvedBlockedTools, + resolvedBlockedMcpServers, + resolvedBlockedSkills, + promptPreamble, + resolvedReasoningEffort, + workspace: input.workspace, + label: input.label, + parentThreadId: input.parentThreadId, + parentTurnId: input.parentTurnId, + prompt: input.prompt, + signal: detachedController.signal + }).finally(() => this.detachedAborts.delete(record.id)) + return record + } + try { await this.acquireSlot(input.signal) } catch (error) { @@ -222,8 +298,15 @@ export class DelegationRuntime { prompt: input.prompt, workspace: input.workspace, model: resolvedModel, + ...(resolvedProviderId ? { providerId: resolvedProviderId } : {}), + ...(resolvedSystemPrompt ? { systemPrompt: resolvedSystemPrompt } : {}), + ...(resolvedAllowedTools ? { allowedTools: resolvedAllowedTools } : {}), + ...(resolvedBlockedTools ? { blockedTools: resolvedBlockedTools } : {}), + ...(resolvedBlockedMcpServers ? { blockedMcpServers: resolvedBlockedMcpServers } : {}), + ...(resolvedBlockedSkills ? { blockedSkills: resolvedBlockedSkills } : {}), toolPolicy, ...(promptPreamble ? { promptPreamble } : {}), + ...(resolvedReasoningEffort ? { reasoningEffort: resolvedReasoningEffort } : {}), signal: input.signal }) const finishedAt = this.now() @@ -259,6 +342,120 @@ export class DelegationRuntime { } } + /** + * Run the queue-acquire + execute + result-recording block for a child + * that was already persisted with status='queued'. Shared by the + * synchronous path (via inline code in runChild) and the detached path. + * Failures are recorded on the record rather than re-thrown — for + * detached runs nobody is awaiting them anyway. + */ + private async executeChild(args: { + record: ChildRunRecord + queuedAt: string + profileName: string | undefined + toolPolicy: SubagentToolPolicy + resolvedModel: string | undefined + resolvedProviderId: string | undefined + resolvedSystemPrompt: string | undefined + resolvedAllowedTools: string[] | undefined + resolvedBlockedTools: string[] | undefined + resolvedBlockedMcpServers: string[] | undefined + resolvedBlockedSkills: string[] | undefined + promptPreamble: string | undefined + resolvedReasoningEffort: string | undefined + workspace: string | undefined + label: string | undefined + parentThreadId: string + parentTurnId: string + prompt: string + signal: AbortSignal + }): Promise { + let record = args.record + try { + await this.acquireSlot(args.signal) + } catch (error) { + record = ChildRunRecord.parse({ + ...record, + status: 'aborted', + error: errorMessage(error), + updatedAt: this.now() + }) + await this.options.store.upsert(record) + await this.recordChildEvent(record) + return record + } + + const startedAt = this.now() + const queuedMs = elapsedMs(args.queuedAt, startedAt) + record = ChildRunRecord.parse({ ...record, status: 'running', startedAt, queuedMs, updatedAt: startedAt }) + await this.options.store.upsert(record) + await this.recordChildEvent(record) + try { + const executor: ChildRunExecutor = this.options.executor ?? defaultExecutor + const result = await executor({ + childId: record.id, + parentThreadId: args.parentThreadId, + parentTurnId: args.parentTurnId, + ...(args.label ? { label: args.label } : {}), + prompt: args.prompt, + workspace: args.workspace, + model: args.resolvedModel, + ...(args.resolvedProviderId ? { providerId: args.resolvedProviderId } : {}), + ...(args.resolvedSystemPrompt ? { systemPrompt: args.resolvedSystemPrompt } : {}), + ...(args.resolvedAllowedTools ? { allowedTools: args.resolvedAllowedTools } : {}), + ...(args.resolvedBlockedTools ? { blockedTools: args.resolvedBlockedTools } : {}), + ...(args.resolvedBlockedMcpServers ? { blockedMcpServers: args.resolvedBlockedMcpServers } : {}), + ...(args.resolvedBlockedSkills ? { blockedSkills: args.resolvedBlockedSkills } : {}), + toolPolicy: args.toolPolicy, + ...(args.promptPreamble ? { promptPreamble: args.promptPreamble } : {}), + ...(args.resolvedReasoningEffort ? { reasoningEffort: args.resolvedReasoningEffort } : {}), + signal: args.signal + }) + const finishedAt = this.now() + record = ChildRunRecord.parse({ + ...record, + status: 'completed', + summary: result.summary, + usage: result.usage ?? record.usage, + toolInvocations: result.toolInvocations, + prefixReused: result.prefixReused, + inheritedHistoryItems: result.inheritedHistoryItems, + durationMs: elapsedMs(startedAt, finishedAt), + updatedAt: finishedAt + }) + await this.options.store.upsert(record) + await this.recordChildEvent(record) + this.recordExternalUsage(record) + return record + } catch (error) { + const finishedAt = this.now() + record = ChildRunRecord.parse({ + ...record, + status: args.signal.aborted ? 'aborted' : 'failed', + error: errorMessage(error), + durationMs: elapsedMs(startedAt, finishedAt), + updatedAt: finishedAt + }) + await this.options.store.upsert(record) + await this.recordChildEvent(record) + return record + } finally { + this.releaseSlot() + } + } + + /** + * Abort a detached child by id. Returns `true` when a running detached + * job was signalled, `false` otherwise. Synchronous (in-flight) runs + * are unaffected — the caller can abort their own parent signal instead. + */ + abortChild(childId: string): boolean { + const controller = this.detachedAborts.get(childId) + if (!controller) return false + controller.abort() + return true + } + /** Concurrency ceiling; clamps to at least 1 so an enabled runtime never deadlocks. */ private get parallelLimit(): number { return Math.max(1, this.options.config.maxParallel) @@ -324,11 +521,14 @@ export class DelegationRuntime { } /** Configured profiles, surfaced to the delegate_task tool schema/UI. */ - listProfiles(): { name: string; toolPolicy: SubagentToolPolicy; model?: string }[] { + listProfiles(): { name: string; mode: SubagentMode; toolPolicy: SubagentToolPolicy; model?: string; providerId?: string; description?: string }[] { return Object.entries(this.options.config.profiles).map(([name, profile]) => ({ name, + mode: profile.mode, toolPolicy: profile.toolPolicy, - ...(profile.model ? { model: profile.model } : {}) + ...(profile.model ? { model: profile.model } : {}), + ...(profile.providerId ? { providerId: profile.providerId } : {}), + ...(profile.description ? { description: profile.description } : {}) })) } @@ -371,6 +571,7 @@ export class DelegationRuntime { childStatus: record.status, childSeq: ++this.childSeq, ...(record.model ? { childModel: record.model } : {}), + ...(record.providerId ? { childProviderId: record.providerId } : {}), ...(record.profile ? { childProfile: record.profile } : {}), ...(record.toolPolicy ? { childToolPolicy: record.toolPolicy } : {}), ...(record.prefixReused !== undefined ? { prefixReused: record.prefixReused } : {}), diff --git a/kun/src/delegation/workspace-agents.ts b/kun/src/delegation/workspace-agents.ts new file mode 100644 index 000000000..2519aa224 --- /dev/null +++ b/kun/src/delegation/workspace-agents.ts @@ -0,0 +1,151 @@ +import { readdir, readFile } from 'node:fs/promises' +import { join } from 'node:path' +import { SubagentProfileConfig, type SubagentMode, type SubagentToolPolicy } from '../contracts/capabilities.js' + +/** + * Workspace-level agent overlay. + * + * Loads `/.kun/agents/*.md` and produces a profile map that + * the delegation runtime overlays on top of (`internal < GUI < workspace`). + * Frontmatter format: + * + * --- + * id: code-reviewer # optional, defaults to filename stem + * name: Code Reviewer + * description: One-line "when to use" + * mode: subagent # subagent | primary | all + * model: deepseek-chat + * providerId: deepseek + * toolPolicy: inherit # readOnly | inherit (default: inherit = follow main agent) + * allowedTools: [read, grep] + * color: "#3b82f6" + * --- + * Body becomes the systemPrompt verbatim (kun's base prompt is + * prepended unless omit_base_prompt: true). + * + * Files with invalid frontmatter or missing required fields are dropped + * silently so a single broken file doesn't take down delegation. + */ +export type WorkspaceAgentProfile = { + id: string + source: 'workspace' + filePath: string + profile: SubagentProfileConfig +} + +const FRONTMATTER_RE = /^---\r?\n([\s\S]*?)\r?\n---\r?\n?/ + +export async function loadWorkspaceAgentProfiles(workspace: string): Promise { + if (!workspace) return [] + const dir = join(workspace, '.kun', 'agents') + let entries: string[] + try { + entries = await readdir(dir) + } catch (error) { + if ((error as NodeJS.ErrnoException).code === 'ENOENT') return [] + if ((error as NodeJS.ErrnoException).code === 'ENOTDIR') return [] + throw error + } + const results: WorkspaceAgentProfile[] = [] + for (const entry of entries) { + if (!entry.endsWith('.md')) continue + const filePath = join(dir, entry) + try { + const text = await readFile(filePath, 'utf8') + const parsed = parseAgentMarkdown(text, entry.replace(/\.md$/i, '')) + if (parsed) results.push({ ...parsed, filePath, source: 'workspace' }) + } catch { + // Skip unreadable / malformed files; do not bubble — overlay should + // never break the parent delegate_task call. + } + } + return results +} + +function parseAgentMarkdown(text: string, defaultId: string): { id: string; profile: SubagentProfileConfig } | null { + const match = FRONTMATTER_RE.exec(text) + if (!match) return null + const yamlRaw = match[1] ?? '' + const body = text.slice(match[0].length).trim() + const fields = parseSimpleYaml(yamlRaw) + const id = fields.id?.trim() || defaultId + if (!id) return null + const omitBase = boolField(fields, 'omit_base_prompt') === true || boolField(fields, 'omitBasePrompt') === true + const systemPromptFromBody = body || undefined + const raw: Record = { + ...(fields.name ? { name: fields.name } : {}), + ...(fields.description ? { description: fields.description } : {}), + ...(fields.color ? { color: fields.color } : {}), + mode: normalizeMode(fields.mode), + ...(fields.model ? { model: fields.model } : {}), + ...(fields.providerId ? { providerId: fields.providerId } : {}), + ...(fields.systemPrompt ? { systemPrompt: fields.systemPrompt } : systemPromptFromBody ? { systemPrompt: systemPromptFromBody } : {}), + ...(fields.promptPreamble ? { promptPreamble: fields.promptPreamble } : {}), + toolPolicy: normalizeToolPolicy(fields.toolPolicy), + ...(parseListField(fields, 'allowedTools') ? { allowedTools: parseListField(fields, 'allowedTools') } : {}), + ...(parseListField(fields, 'blockedTools') ? { blockedTools: parseListField(fields, 'blockedTools') } : {}), + ...(parseListField(fields, 'blockedMcpServers') ? { blockedMcpServers: parseListField(fields, 'blockedMcpServers') } : {}), + ...(parseListField(fields, 'blockedSkills') ? { blockedSkills: parseListField(fields, 'blockedSkills') } : {}), + // Reasoning depth (off|low|medium|high|max). SubagentProfileConfig validates; + // an invalid value would fail safeParse, so only known values survive. + ...(fields.reasoningEffort ? { reasoningEffort: fields.reasoningEffort } : {}) + } + // omit_base_prompt is a hint to the augment strategy; we model it as a + // marker the runtime can check if it ever needs to. For now we just keep + // the systemPrompt as-is and let the executor's augment-base behavior + // append the base prefix. + void omitBase + const parsed = SubagentProfileConfig.safeParse(raw) + if (!parsed.success) return null + return { id, profile: parsed.data } +} + +function normalizeMode(value: string | undefined): SubagentMode { + if (value === 'primary' || value === 'all') return value + return 'subagent' +} + +function normalizeToolPolicy(value: string | undefined): SubagentToolPolicy { + // Default follows the main agent (inherit); an overlay must say + // `toolPolicy: readOnly` explicitly to restrict the child. + if (value === 'readOnly') return 'readOnly' + return 'inherit' +} + +function boolField(fields: Record, key: string): boolean | undefined { + const raw = fields[key]?.trim().toLowerCase() + if (raw === 'true' || raw === 'yes') return true + if (raw === 'false' || raw === 'no') return false + return undefined +} + +function parseListField(fields: Record, key: string): string[] | undefined { + const raw = fields[key]?.trim() + if (!raw) return undefined + // Support both inline `[a, b, c]` and comma-separated `a, b, c`. + const stripped = raw.replace(/^\[/, '').replace(/\]$/, '') + const items = stripped.split(',').map((s) => s.trim().replace(/^['"]|['"]$/g, '')).filter(Boolean) + return items.length ? items : undefined +} + +/** + * Lean YAML key:value parser. Only supports flat scalars, lists, and + * double-quoted strings — sufficient for agent frontmatter without pulling + * in a YAML dependency. + */ +function parseSimpleYaml(raw: string): Record { + const result: Record = {} + for (const rawLine of raw.split(/\r?\n/)) { + const line = rawLine.replace(/\s+#.*$/, '').trim() + if (!line || line.startsWith('#')) continue + const colon = line.indexOf(':') + if (colon < 0) continue + const key = line.slice(0, colon).trim() + let value = line.slice(colon + 1).trim() + if (!key) continue + if (value.startsWith('"') && value.endsWith('"')) value = value.slice(1, -1) + else if (value.startsWith("'") && value.endsWith("'")) value = value.slice(1, -1) + result[key] = value + } + return result +} diff --git a/kun/src/domain/thread.ts b/kun/src/domain/thread.ts index 8abf76be2..aa359e8fa 100644 --- a/kun/src/domain/thread.ts +++ b/kun/src/domain/thread.ts @@ -23,13 +23,17 @@ export type ThreadEntity = ThreadRecord export function createThreadRecord(input: { id: string title: string + titleAuto?: boolean workspace: string model: string providerId?: string + agentId?: string + systemPrompt?: string mode?: ThreadMode status?: ThreadStatus approvalPolicy?: ApprovalPolicy sandboxMode?: SandboxMode + pinned?: boolean costBudgetUsd?: number costBudgetWarningSent?: boolean relation?: ThreadRelation @@ -47,13 +51,17 @@ export function createThreadRecord(input: { return { id: input.id, title: input.title, + ...(input.titleAuto !== undefined ? { titleAuto: input.titleAuto } : {}), workspace: input.workspace, model: input.model, ...(input.providerId ? { providerId: input.providerId } : {}), + ...(input.agentId ? { agentId: input.agentId } : {}), + ...(input.systemPrompt ? { systemPrompt: input.systemPrompt } : {}), mode: input.mode ?? 'agent', status: input.status ?? 'idle', approvalPolicy: input.approvalPolicy ?? DEFAULT_APPROVAL_POLICY, sandboxMode: input.sandboxMode ?? DEFAULT_SANDBOX_MODE, + ...(input.pinned !== undefined ? { pinned: input.pinned } : {}), ...(input.costBudgetUsd !== undefined ? { costBudgetUsd: input.costBudgetUsd } : {}), ...(input.costBudgetWarningSent !== undefined ? { costBudgetWarningSent: input.costBudgetWarningSent } : {}), relation: input.relation ?? 'primary', @@ -79,7 +87,7 @@ export function toThreadSummary( thread: ThreadEntity ): Pick< ThreadEntity, - 'id' | 'title' | 'workspace' | 'model' | 'providerId' | 'mode' | 'status' | 'approvalPolicy' | 'sandboxMode' | 'createdAt' | 'updatedAt' + 'id' | 'title' | 'titleAuto' | 'summary' | 'workspace' | 'model' | 'providerId' | 'agentId' | 'systemPrompt' | 'mode' | 'status' | 'approvalPolicy' | 'sandboxMode' | 'pinned' | 'createdAt' | 'updatedAt' | 'costBudgetUsd' | 'costBudgetWarningSent' | 'relation' | 'parentThreadId' | 'forkedFromThreadId' | 'forkedFromTitle' | 'forkedAt' | 'forkedFromMessageCount' | 'forkedFromTurnCount' @@ -88,13 +96,18 @@ export function toThreadSummary( return { id: thread.id, title: thread.title, + ...(thread.titleAuto !== undefined ? { titleAuto: thread.titleAuto } : {}), + ...(thread.summary ? { summary: thread.summary } : {}), workspace: thread.workspace, model: thread.model, ...(thread.providerId ? { providerId: thread.providerId } : {}), + ...(thread.agentId ? { agentId: thread.agentId } : {}), + ...(thread.systemPrompt ? { systemPrompt: thread.systemPrompt } : {}), mode: thread.mode, status: thread.status, approvalPolicy: thread.approvalPolicy, sandboxMode: thread.sandboxMode, + ...(thread.pinned !== undefined ? { pinned: thread.pinned } : {}), ...(thread.costBudgetUsd !== undefined ? { costBudgetUsd: thread.costBudgetUsd } : {}), ...(thread.costBudgetWarningSent !== undefined ? { costBudgetWarningSent: thread.costBudgetWarningSent } : {}), relation: thread.relation ?? 'primary', diff --git a/kun/src/loop/agent-loop.ts b/kun/src/loop/agent-loop.ts index 77ac92b51..9d3f84b79 100644 --- a/kun/src/loop/agent-loop.ts +++ b/kun/src/loop/agent-loop.ts @@ -28,7 +28,9 @@ import { insertCompactionIntoVisibleHistory, placeCompactionsAtTurnEnd } from './compaction-history.js' -import { summarizeCompactionWithModel } from './compaction-summary.js' +import { resolveCompactionModel, summarizeCompactionWithModel } from './compaction-summary.js' +import { generateThreadTitle, resolveRoleModel } from './title-generator.js' +import type { RolesConfig } from '../config/kun-config.js' import { InflightTracker } from './inflight-tracker.js' import { SteeringQueue } from './steering-queue.js' import { @@ -133,6 +135,37 @@ const GOAL_RESUME_PROMPT = [ function goalResumeKey(threadId: string, goal: ThreadGoal): string { return `${threadId}::${goal.createdAt}::${goal.objective}` } + +/** + * Placeholder titles the GUI assigns to a fresh thread. When a thread still + * carries one of these (or an empty title), the title is considered + * auto-generatable; a user-set title never matches and is preserved. Mirrors + * the renderer's `shouldAutoTitleThread` placeholder set so backend title + * generation only fills in genuinely-default titles. + */ +const PLACEHOLDER_THREAD_TITLES = new Set(['New Thread', '新会话', 'Untitled', '未命名']) +const CODEX_PLACEHOLDER_TITLE = /^__codex_[a-z0-9_]+__$/i + +function isAutoTitleableThreadTitle(title: string | null | undefined): boolean { + const raw = title?.trim() ?? '' + if (!raw) return true + if (PLACEHOLDER_THREAD_TITLES.has(raw)) return true + if (CODEX_PLACEHOLDER_TITLE.test(raw)) return true + return false +} + +/** + * Whether the backend LLM titler may (re)generate a thread's title. + * + * - `titleAuto === false` → user renamed it manually; never overwrite. + * - `titleAuto === true` → client set a provisional first-message title; upgrade it. + * - absent (legacy) → only upgrade placeholder titles, never a real one. + */ +export function canUpgradeThreadTitle(thread: { title?: string | null; titleAuto?: boolean }): boolean { + if (thread.titleAuto === false) return false + if (thread.titleAuto === true) return true + return isAutoTitleableThreadTitle(thread.title) +} const MAX_TOOL_CATALOG_SNAPSHOTS = 256 type TurnFailure = { @@ -580,6 +613,8 @@ export type AgentLoopOptions = { memoryStore?: MemoryStore tokenEconomy?: TokenEconomyConfig contextCompaction?: ContextCompactionConfig + /** Internal-LLM role model routing (smallModel slot + title/summary/codeReview overrides). */ + roles?: RolesConfig toolStorm?: ToolStormBreakerOptions & { enabled?: boolean } toolArgumentRepair?: { maxStringBytes?: number @@ -599,6 +634,23 @@ export type AgentLoopOptions = { * tools — enforced at both the schema (listTools) and execute layers. */ forcedAllowedToolNames?: readonly string[] + /** + * Provider ids hard-blocked for this loop (e.g. a subagent profile's blocked + * MCP servers, as `mcp:`). Deny-list layered on top of inherit and + * enforced at both the schema and execute layers. + */ + blockedProviderIds?: readonly string[] + /** + * Tool names hard-blocked for this loop (e.g. a subagent profile's blocked + * built-in tools). Deny-list layered on top of inherit; enforced at both layers. + */ + blockedToolNames?: readonly string[] + /** + * Skill ids hard-blocked for this loop's turns (e.g. a subagent profile's + * blockedSkills). Hidden from the catalog + auto-activation and rejected by + * `load_skill`, without mutating the shared skill runtime. + */ + blockedSkillIds?: readonly string[] /** * Lifecycle hooks (UserPromptSubmit, TurnStart, TurnEnd, PreCompact). * Tool phases are handled by the tool host; the loop ignores them. @@ -749,6 +801,11 @@ export class AgentLoop { }) finalStatus = status finalError = failure?.error + if (status === 'completed') { + // Fire-and-forget: generate an LLM title after the FIRST assistant + // reply completes, only when the thread still has a default title. + void this.maybeGenerateThreadTitle(threadId, turnId, signal).catch(() => {}) + } return status } catch (error) { const raw = error instanceof Error ? error.message : String(error) @@ -879,6 +936,68 @@ export class AgentLoop { await this.opts.turns.finishTurn({ threadId, turnId, status: 'failed', error: message }) } + /** + * After the FIRST assistant reply completes, generate a concise LLM title for + * the thread — but only when the thread still carries a default/placeholder + * title (so a user-set or already-generated title is never overwritten) and + * only on the first completed turn. Model precedence: titleModel -> smallModel + * -> main conversation model. Persists the title to the thread store and emits + * a `thread_updated` event so the renderer's list refreshes. Best-effort: any + * failure is swallowed by the fire-and-forget caller. + */ + private async maybeGenerateThreadTitle(threadId: string, turnId: string, signal?: AbortSignal): Promise { + const thread = await this.opts.threadStore.get(threadId) + if (!thread) return + // Only on the first completed turn so we don't re-title on every reply. + const completedTurns = thread.turns.filter((t) => t.status === 'completed').length + if (completedTurns > 1) return + if (!canUpgradeThreadTitle(thread)) return + + const items = await this.opts.sessionStore.loadItems(threadId) + const userText = items.find((item) => item.kind === 'user_message')?.text ?? '' + if (!userText.trim()) return + const assistantText = items.find((item) => item.kind === 'assistant_text')?.text + + const resolved = resolveRoleModel({ + roleModel: this.opts.roles?.titleModel, + roleProviderId: this.opts.roles?.titleProviderId, + roles: this.opts.roles, + mainModel: thread.model || this.opts.model.model, + mainProviderId: thread.providerId + }) + if (!resolved) return + + const title = await generateThreadTitle({ + threadId, + turnId, + modelClient: this.opts.model, + model: resolved.model, + ...(resolved.providerId ? { providerId: resolved.providerId } : {}), + userText, + ...(assistantText ? { assistantText } : {}), + ...(this.opts.roles?.titleReasoningEffort + ? { reasoningEffort: this.opts.roles.titleReasoningEffort } + : {}), + ...(signal ? { abortSignal: signal } : {}) + }) + if (!title) return + + // Re-check the title is still upgradeable (no user rename raced us). + const latest = await this.opts.threadStore.get(threadId) + if (!latest || !canUpgradeThreadTitle(latest)) return + // Keep titleAuto:true — the LLM title is still auto-generated, so a later + // user rename can still lock it, but we won't re-title (gated by turn count). + const updated = touchThread({ ...latest, title, titleAuto: true }, this.opts.nowIso()) + await this.opts.threadStore.upsert(updated) + await this.opts.events.record({ + kind: 'thread_updated', + threadId, + title: updated.title, + titleAuto: true, + status: updated.status + }) + } + private rememberTurnFailure(turnId: string, failure: TurnFailure): void { if (!failure.error.trim()) return this.turnFailures.set(turnId, failure) @@ -1192,9 +1311,10 @@ export class AgentLoop { workspace: thread?.workspace ?? '', modelCapabilities }) - const skillResolution = this.opts.skillRuntime?.resolveTurn({ + const skillResolution = await this.opts.skillRuntime?.resolveTurn({ prompt: turn?.prompt ?? '', - workspace: thread?.workspace ?? '' + workspace: thread?.workspace ?? '', + ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}) }) ?? { activeSkillIds: [], activations: [], @@ -1237,6 +1357,9 @@ export class AgentLoop { memoryPolicy: { enabled: Boolean(this.opts.memoryStore) }, delegationPolicy: { enabled: false }, ...(allowedToolNames ? { allowedToolNames } : {}), + ...(this.opts.blockedProviderIds ? { blockedProviderIds: this.opts.blockedProviderIds } : {}), + ...(this.opts.blockedToolNames ? { blockedToolNames: this.opts.blockedToolNames } : {}), + ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}), approvalPolicy, sandboxMode, abortSignal: signal, @@ -1340,6 +1463,7 @@ export class AgentLoop { tools: effectiveToolSpecs }), ...memoryInstructions(memories), + ...(skillResolution.catalogInstruction ? [skillResolution.catalogInstruction] : []), ...skillResolution.instructions, ...(userInputDisabled ? [userInputUnavailableInstruction()] : []), ...(effectiveToolSpecs.some((tool) => tool.name === 'bash') ? [shellRuntimeInstruction()] : []), @@ -1355,7 +1479,15 @@ export class AgentLoop { turnId, model, ...(thread?.providerId?.trim() ? { providerId: thread.providerId.trim() } : {}), - systemPrompt: this.opts.prefix.systemPrompt, + // Thread-level systemPrompt (primary-agent persona snapshot) is + // appended to the runtime base — same augment strategy as child agents + // (child-agent-executor) — so the agent keeps kun's tool/safety + // conventions and skill catalog instead of losing them to the persona. + // Empty/whitespace falls back to the immutable prefix verbatim so + // unbound threads keep the prompt-cache fingerprint. + systemPrompt: thread?.systemPrompt?.trim() + ? `${this.opts.prefix.systemPrompt}\n\n${thread.systemPrompt.trim()}` + : this.opts.prefix.systemPrompt, ...(planTurnActive ? { modeInstruction: PLAN_MODE_INSTRUCTION } : {}), ...(contextInstructions.length ? { contextInstructions } : {}), prefix: this.opts.prefix.fewShots, @@ -1766,6 +1898,34 @@ export class AgentLoop { this.lastNoToolTextByTurn.set(turnId, textAccumulator.value) return 'continue' } + if (stopReason === 'length') { + // The model hit its output-token ceiling and was cut off without a tool + // call. Don't report this as a clean completion — surface a warning so + // the truncation is visible instead of looking like the model "gave up". + const message = + 'The model reached its maximum output length and the response was truncated. ' + + 'Raise the model’s max output tokens, or ask it to continue or split the work into smaller steps.' + await this.opts.events.record({ + kind: 'error', + threadId, + turnId, + message, + code: 'output_truncated', + severity: 'warning' + }) + await this.opts.turns.applyItem( + threadId, + makeErrorItem({ + id: this.opts.ids.next('item_error'), + turnId, + threadId, + message, + code: 'output_truncated', + severity: 'warning' + }) + ) + return 'stop' + } return 'stop' } // Tool calls mean the turn is making progress again; reset the no-tool @@ -1961,6 +2121,9 @@ export class AgentLoop { memoryPolicy: { enabled: Boolean(this.opts.memoryStore) }, delegationPolicy: { enabled: false }, ...(input.allowedToolNames ? { allowedToolNames: input.allowedToolNames } : {}), + ...(this.opts.blockedProviderIds ? { blockedProviderIds: this.opts.blockedProviderIds } : {}), + ...(this.opts.blockedToolNames ? { blockedToolNames: this.opts.blockedToolNames } : {}), + ...(this.opts.blockedSkillIds ? { blockedSkillIds: this.opts.blockedSkillIds } : {}), approvalPolicy: input.approvalPolicy, sandboxMode: input.sandboxMode, abortSignal: input.signal, @@ -2342,10 +2505,15 @@ export class AgentLoop { keepRecent: plan.keepRecent }) if (result.replacedTokens > 0 && this.opts.contextCompaction?.summaryMode === 'model') { + const compactionModel = resolveCompactionModel({ + contextCompaction: this.opts.contextCompaction, + fallbackModel: model + }) const modelSummary = await summarizeCompactionWithModel({ threadId, turnId, - model, + model: compactionModel.model, + ...(compactionModel.providerId ? { providerId: compactionModel.providerId } : {}), modelClient: this.opts.model, prefix: this.opts.prefix, contextCompaction: this.opts.contextCompaction, @@ -2358,7 +2526,7 @@ export class AgentLoop { kind: 'usage', threadId, turnId, - model, + model: compactionModel.model, usage }) }, diff --git a/kun/src/loop/compaction-summary.ts b/kun/src/loop/compaction-summary.ts index 6e8d52136..fe7a9992e 100644 --- a/kun/src/loop/compaction-summary.ts +++ b/kun/src/loop/compaction-summary.ts @@ -2,16 +2,93 @@ import type { ImmutablePrefix } from '../cache/immutable-prefix.js' import type { TurnItem } from '../contracts/items.js' import type { UsageSnapshot } from '../contracts/usage.js' import type { ModelClient } from '../ports/model-client.js' +import { trimTrailingToolCalls } from './context-compactor.js' import type { ContextCompactionConfig } from './model-context-profile.js' export const DEFAULT_COMPACTION_SUMMARY_TIMEOUT_MS = 15_000 -export const DEFAULT_COMPACTION_SUMMARY_MAX_TOKENS = 1_200 +export const DEFAULT_COMPACTION_SUMMARY_MAX_TOKENS = 2_048 +/** @deprecated The compaction-mode path feeds real conversation messages, not a byte-capped transcript. Kept for config back-compat. */ export const DEFAULT_COMPACTION_SUMMARY_INPUT_MAX_BYTES = 96 * 1024 +/** + * System prompt for the dedicated "compaction mode" turn. Ported from + * opencode's compaction agent (`agent/prompt/compaction.txt`) and adapted + * for kun's coding-agent context: the model reads the actual conversation + * (fed as real messages, not a serialized transcript) and writes a free-form + * handoff summary so work can continue past the context window. + */ +export const COMPACTION_SYSTEM_PROMPT = [ + 'You are summarizing a long coding-agent conversation so the work can continue past the context window.', + '', + 'Provide a detailed but concise summary. Focus on information that would be helpful for continuing the work, including:', + '- What was requested and the overall goal', + '- What has been done and the decisions that were made (and why)', + '- Which files are being created, edited, or inspected (with their paths)', + '- Key technical findings: root causes, data values, API shapes, commands run and their results', + '- What still needs to be done next', + '- User requests, constraints, and preferences that must persist', + '', + 'Preserve concrete identifiers verbatim: file paths, function and variable names, commands, URLs, IDs, and error messages.', + 'Do not invent facts and do not add generic advice. Write in the same language as the conversation.', + 'Your summary should be comprehensive enough to provide full context but concise enough to be quickly understood.' +].join('\n') + +/** + * Free-form continuation prompt appended as the final user message of the + * compaction turn. Ported from opencode's default compaction prompt; the new + * session has no access to the conversation above, so the model is asked to + * write a self-contained handoff. Optional pinned constraints are appended so + * durable rules survive even in the free-form summary text. + */ +export function buildCompactionContinuationMessage(pinnedConstraints?: readonly string[]): string { + const lines = [ + 'Provide a detailed summary of our conversation above, written so a new session with no access to ' + + 'this history can continue the work seamlessly. Cover what we set out to do, what has been done, ' + + 'which files and locations are involved, the key findings and decisions, and what remains to be done next. ' + + 'Preserve concrete identifiers (file paths, function/variable names, commands, URLs, IDs, error messages) verbatim.' + ] + const pins = (pinnedConstraints ?? []).map((pin) => pin.trim()).filter((pin) => pin.length > 0) + if (pins.length > 0) { + lines.push('') + lines.push('Durable constraints that MUST be preserved in your summary:') + for (const pin of pins) lines.push(`- ${pin}`) + } + return lines.join('\n') +} + +/** + * Resolve the model + provider for the compaction-mode turn. Mirrors + * opencode's compaction agent precedence: an explicit compaction model + * override (`contextCompaction.summaryModel`) wins, otherwise it falls back to + * the main conversation model. Unlike one-shot roles (title/session-summary) + * it does NOT drop to the small model — a faithful handoff summary wants the + * same capability as the conversation it is folding. + */ +export function resolveCompactionModel(input: { + contextCompaction?: ContextCompactionConfig + fallbackModel: string +}): { model: string; providerId?: string } { + const override = input.contextCompaction?.summaryModel?.trim() + if (override) { + const providerId = input.contextCompaction?.summaryProviderId?.trim() + return { model: override, ...(providerId ? { providerId } : {}) } + } + return { model: input.fallbackModel } +} + +/** + * Run the dedicated compaction-mode turn. The real conversation `items` are + * fed to the model as messages (mirroring opencode's compaction agent), + * followed by a free-form continuation prompt; the model returns a natural + * handoff summary. Returns `undefined` on timeout / error / empty output so + * the caller falls back to the heuristic summary. + */ export async function summarizeCompactionWithModel(input: { threadId: string turnId: string model: string + /** Optional per-provider routing id paired with `model`. */ + providerId?: string modelClient: ModelClient prefix: ImmutablePrefix contextCompaction?: ContextCompactionConfig @@ -37,8 +114,12 @@ export async function summarizeCompactionWithModel(input: { await input.recordFallback?.(message) } try { - const requestItem = { - id: `item_${input.turnId}_compaction_summary_request`, + // Feed the real conversation as model messages (compaction mode), not a + // serialized transcript. Trailing tool calls without results are dropped + // so the request stays well-formed for OpenAI-compatible providers. + const conversation = trimTrailingToolCalls(input.items) + const continuationItem: TurnItem = { + id: `item_${input.turnId}_compaction_continuation`, turnId: input.turnId, threadId: input.threadId, role: 'user' as const, @@ -46,23 +127,19 @@ export async function summarizeCompactionWithModel(input: { createdAt: new Date().toISOString(), finishedAt: new Date().toISOString(), kind: 'user_message' as const, - text: buildModelCompactionPrompt({ - items: input.items, - heuristicSummary: input.heuristicSummary, - maxBytes: input.contextCompaction?.summaryInputMaxBytes ?? DEFAULT_COMPACTION_SUMMARY_INPUT_MAX_BYTES - }) + text: buildCompactionContinuationMessage(input.prefix.pinnedConstraints) } let text = '' for await (const chunk of input.modelClient.stream({ threadId: input.threadId, turnId: input.turnId, model: input.model, - systemPrompt: input.prefix.systemPrompt, - contextInstructions: [ - 'Summarize context for a history fold. Preserve durable task state and omit transient chatter.' - ], - prefix: input.prefix.fewShots, - history: [requestItem], + ...(input.providerId ? { providerId: input.providerId } : {}), + // Dedicated compaction-mode system prompt; the main agent prefix and + // few-shots are intentionally dropped so this is a clean summarizer turn. + systemPrompt: COMPACTION_SYSTEM_PROMPT, + prefix: [], + history: [...conversation, continuationItem], tools: [], stream: true, maxTokens: Math.max( @@ -107,99 +184,3 @@ export async function summarizeCompactionWithModel(input: { input.signal.removeEventListener('abort', onAbort) } } - -export function buildModelCompactionPrompt(input: { - items: readonly TurnItem[] - heuristicSummary: string - maxBytes: number -}): string { - const transcript = fitTextToBytes( - input.items - .map(compactionPromptLine) - .filter((line) => line.length > 0) - .join('\n'), - Math.max(1_024, input.maxBytes) - ) - return [ - 'You are compacting a long agent conversation so work can continue past the context window.', - 'Write a dense, factual handoff summary using EXACTLY the following section headers, in this order.', - 'Keep every section; write "- (none)" when a section has no content. Use short bullets, not prose.', - 'Do not invent facts, do not add generic advice, and preserve concrete identifiers verbatim', - '(file paths, function/variable names, commands, URLs, IDs, error messages).', - '', - '## Goal', - "- The user's overall objective and any explicit requirements or constraints.", - '## Completed', - '- Work already done and decisions made, with the concrete outcome of each.', - '## Key findings', - '- Important facts discovered (root causes, data values, API shapes) needed to continue.', - '## Files & locations', - '- Files created/edited/inspected and the relevant paths or line ranges.', - '## Tool & command results', - '- Notable tool/command outcomes, especially errors and their resolution status.', - '## Pending', - '- Unresolved next steps and anything explicitly requested but not yet done.', - '## Constraints & pins', - '- Durable rules, user preferences, and active/pinned skills that must survive.', - '', - 'Existing heuristic summary to cross-check (may be incomplete):', - input.heuristicSummary.trim() || '(none)', - '', - 'Conversation history to fold:', - transcript || '(empty)' - ].join('\n') -} - -function compactionPromptLine(item: TurnItem): string { - switch (item.kind) { - case 'user_message': - return `[user] ${clipForPrompt(item.text, 2_000)}` - case 'assistant_text': - return `[assistant] ${clipForPrompt(item.text, 2_000)}` - case 'assistant_reasoning': - return '' - case 'tool_call': - return `[tool_call:${item.toolName}] ${clipForPrompt(item.summary || stringifyForPrompt(item.arguments), 1_200)}` - case 'tool_result': - return `[tool_result:${item.toolName}${item.isError ? ':error' : ''}] ${clipForPrompt(stringifyForPrompt(item.output), 2_000)}` - case 'approval': - return `[approval:${item.status}:${item.toolName}] ${clipForPrompt(item.summary, 800)}` - case 'user_input': - return `[user_input:${item.status}] ${clipForPrompt(item.prompt, 800)}` - case 'compaction': - return item.replacedTokens > 0 ? `[compaction] ${clipForPrompt(item.summary, 2_000)}` : '' - case 'review': - return `[review:${item.title}] ${clipForPrompt(item.reviewText || stringifyForPrompt(item.output), 2_000)}` - case 'error': - return `[error${item.code ? `:${item.code}` : ''}] ${clipForPrompt(item.message, 1_200)}` - } -} - -function stringifyForPrompt(value: unknown): string { - if (typeof value === 'string') return value - if (value == null) return '' - try { - return JSON.stringify(value) - } catch { - return String(value) - } -} - -function clipForPrompt(text: string, maxChars: number): string { - const compact = text.replace(/\s+/g, ' ').trim() - if (compact.length <= maxChars) return compact - return `${compact.slice(0, Math.max(0, maxChars - 3)).trim()}...` -} - -function fitTextToBytes(text: string, maxBytes: number): string { - if (Buffer.byteLength(text, 'utf8') <= maxBytes) return text - let used = 0 - let out = '' - for (const char of text) { - const bytes = Buffer.byteLength(char, 'utf8') - if (used + bytes > maxBytes) break - out += char - used += bytes - } - return `${out.trimEnd()}\n...[truncated for model compaction summary]` -} diff --git a/kun/src/loop/model-context-profile.ts b/kun/src/loop/model-context-profile.ts index fca670e38..f80a3a797 100644 --- a/kun/src/loop/model-context-profile.ts +++ b/kun/src/loop/model-context-profile.ts @@ -61,6 +61,10 @@ export type ContextCompactionConfig = { summaryTimeoutMs?: number summaryMaxTokens?: number summaryInputMaxBytes?: number + /** Optional model override for compaction summary (empty = follow main model). */ + summaryModel?: string + /** Provider id paired with summaryModel. */ + summaryProviderId?: string /** * @deprecated Model-specific context windows and compaction thresholds belong * in top-level models.profiles. This field is still read for compatibility. diff --git a/kun/src/loop/reasoning-effort.ts b/kun/src/loop/reasoning-effort.ts new file mode 100644 index 000000000..de3ebedce --- /dev/null +++ b/kun/src/loop/reasoning-effort.ts @@ -0,0 +1,11 @@ +import { ModelReasoningEffort } from '../contracts/capabilities.js' + +/** + * Normalize a configured per-role reasoning-depth value into a valid + * ModelReasoningEffort. Invalid or missing values fall back to 'off' so the + * cheap default never accidentally escalates a title/summary/review call. + */ +export function normalizeRoleReasoningEffort(value: string | undefined): ModelReasoningEffort { + const parsed = ModelReasoningEffort.safeParse(typeof value === 'string' ? value.trim() : value) + return parsed.success ? parsed.data : 'off' +} diff --git a/kun/src/loop/session-summary.ts b/kun/src/loop/session-summary.ts new file mode 100644 index 000000000..46d01edbb --- /dev/null +++ b/kun/src/loop/session-summary.ts @@ -0,0 +1,150 @@ +import type { TurnItem } from '../contracts/items.js' +import type { ModelClient, ModelRequest } from '../ports/model-client.js' +import { normalizeRoleReasoningEffort } from './reasoning-effort.js' + +export const DEFAULT_SESSION_SUMMARY_TIMEOUT_MS = 20_000 +export const DEFAULT_SESSION_SUMMARY_MAX_TOKENS = 400 +export const DEFAULT_SESSION_SUMMARY_INPUT_MAX_BYTES = 96 * 1024 + +const SESSION_SUMMARY_SYSTEM_PROMPT = [ + 'You write a short, neutral summary of an entire chat conversation.', + 'Output rules:', + '- Output ONE paragraph (roughly 2-4 sentences). No headings, no bullet lists, no markdown.', + "- Describe what the user wanted and what was accomplished or concluded.", + '- Do not invent facts. Do not include tool names or raw code.', + "- Write in the same language as the conversation." +].join('\n') + +/** + * One-shot internal LLM call producing a ~1-paragraph whole-conversation + * summary from the full transcript. Mirrors the compaction-summary one-shot + * pattern. Returns undefined on any failure / empty output. + */ +export async function generateSessionSummary(input: { + threadId: string + modelClient: ModelClient + /** Resolved model id for the summary role (see resolveRoleModel). */ + model: string + /** Optional per-provider routing id. */ + providerId?: string + systemPrompt?: string + /** Full conversation transcript items, oldest first. */ + items: readonly TurnItem[] + /** Reasoning depth for the summary call. Invalid/missing => 'off'. */ + reasoningEffort?: string + timeoutMs?: number + maxTokens?: number + inputMaxBytes?: number + abortSignal?: AbortSignal +}): Promise { + if (input.abortSignal?.aborted) return undefined + const transcript = buildSessionTranscript(input.items, input.inputMaxBytes ?? DEFAULT_SESSION_SUMMARY_INPUT_MAX_BYTES) + if (!transcript.trim()) return undefined + + const timeoutMs = Math.max(1, Math.floor(input.timeoutMs ?? DEFAULT_SESSION_SUMMARY_TIMEOUT_MS)) + const controller = new AbortController() + const onAbort = (): void => controller.abort() + const timeout = setTimeout(() => controller.abort(), timeoutMs) + input.abortSignal?.addEventListener('abort', onAbort, { once: true }) + + try { + const turnId = `${input.threadId}_session_summary` + const requestItem: TurnItem = { + id: `item_${turnId}_request`, + turnId, + threadId: input.threadId, + role: 'user', + status: 'completed', + createdAt: new Date().toISOString(), + finishedAt: new Date().toISOString(), + kind: 'user_message', + text: ['Conversation transcript:', transcript, '', 'Write the one-paragraph summary now.'].join('\n') + } + const request: ModelRequest = { + threadId: input.threadId, + turnId, + model: input.model, + ...(input.providerId ? { providerId: input.providerId } : {}), + ...(input.systemPrompt ? { systemPrompt: input.systemPrompt } : {}), + contextInstructions: [SESSION_SUMMARY_SYSTEM_PROMPT], + prefix: [], + history: [requestItem], + tools: [], + stream: true, + maxTokens: Math.max(1, Math.floor(input.maxTokens ?? DEFAULT_SESSION_SUMMARY_MAX_TOKENS)), + temperature: 0, + reasoningEffort: normalizeRoleReasoningEffort(input.reasoningEffort), + abortSignal: controller.signal + } + let text = '' + for await (const chunk of input.modelClient.stream(request)) { + if (input.abortSignal?.aborted || controller.signal.aborted) return undefined + if (chunk.kind === 'assistant_text_delta') text += chunk.text + if (chunk.kind === 'error') return undefined + } + const summary = text.replace(/\s+/g, ' ').trim() + return summary || undefined + } catch { + return undefined + } finally { + clearTimeout(timeout) + input.abortSignal?.removeEventListener('abort', onAbort) + } +} + +export function buildSessionTranscript(items: readonly TurnItem[], maxBytes: number): string { + const text = items + .map(transcriptLine) + .filter((line) => line.length > 0) + .join('\n') + return fitTextToBytes(text, Math.max(1_024, maxBytes)) +} + +function transcriptLine(item: TurnItem): string { + switch (item.kind) { + case 'user_message': + return `[user] ${clip(item.text, 2_000)}` + case 'assistant_text': + return `[assistant] ${clip(item.text, 2_000)}` + case 'tool_call': + return `[tool_call:${item.toolName}] ${clip(item.summary || stringify(item.arguments), 600)}` + case 'tool_result': + return `[tool_result:${item.toolName}${item.isError ? ':error' : ''}] ${clip(stringify(item.output), 800)}` + case 'compaction': + return item.replacedTokens > 0 ? `[earlier summary] ${clip(item.summary, 2_000)}` : '' + case 'review': + return `[review:${item.title}] ${clip(item.reviewText || stringify(item.output), 1_200)}` + case 'error': + return `[error${item.code ? `:${item.code}` : ''}] ${clip(item.message, 600)}` + default: + return '' + } +} + +function stringify(value: unknown): string { + if (typeof value === 'string') return value + if (value == null) return '' + try { + return JSON.stringify(value) + } catch { + return String(value) + } +} + +function clip(text: string, maxChars: number): string { + const compact = text.replace(/\s+/g, ' ').trim() + return compact.length <= maxChars ? compact : `${compact.slice(0, Math.max(0, maxChars - 3)).trim()}...` +} + +function fitTextToBytes(text: string, maxBytes: number): string { + if (Buffer.byteLength(text, 'utf8') <= maxBytes) return text + let used = 0 + let out = '' + for (const char of text) { + const bytes = Buffer.byteLength(char, 'utf8') + if (used + bytes > maxBytes) break + out += char + used += bytes + } + return `${out.trimEnd()}\n...[truncated]` +} diff --git a/kun/src/loop/title-generator.ts b/kun/src/loop/title-generator.ts new file mode 100644 index 000000000..00425ef11 --- /dev/null +++ b/kun/src/loop/title-generator.ts @@ -0,0 +1,157 @@ +import type { TurnItem } from '../contracts/items.js' +import type { ModelClient, ModelRequest } from '../ports/model-client.js' +import type { RolesConfig } from '../config/kun-config.js' +import { normalizeRoleReasoningEffort } from './reasoning-effort.js' + +export const DEFAULT_TITLE_TIMEOUT_MS = 12_000 +export const DEFAULT_TITLE_MAX_TOKENS = 64 +export const MAX_TITLE_CHARS = 50 +const MAX_TITLE_INPUT_CHARS = 4_000 + +const TITLE_SYSTEM_PROMPT = [ + 'You generate a concise title for a chat conversation.', + 'Output rules:', + '- Output ONLY the title text on a single line. No quotes, no markdown, no prefix like "Title:".', + `- Maximum ${MAX_TITLE_CHARS} characters.`, + '- Summarize the user\'s intent, not the assistant\'s actions.', + '- Never include tool names, file paths, code, or punctuation-only output.', + '- Write in the same language as the user\'s message.' +].join('\n') + +/** + * Resolve the model + providerId for a one-shot internal role call using the + * precedence: role override -> global smallModel -> main conversation model. + * Returns undefined when no model is resolvable at all. + */ +export function resolveRoleModel(input: { + roleModel?: string + roleProviderId?: string + roles?: RolesConfig + mainModel?: string + mainProviderId?: string +}): { model: string; providerId?: string } | undefined { + const role = trim(input.roleModel) + if (role) { + return { model: role, ...(trim(input.roleProviderId) ? { providerId: trim(input.roleProviderId) } : {}) } + } + const small = trim(input.roles?.smallModel) + if (small) { + return { model: small, ...(trim(input.roles?.smallModelProviderId) ? { providerId: trim(input.roles?.smallModelProviderId) } : {}) } + } + const main = trim(input.mainModel) + if (main) { + return { model: main, ...(trim(input.mainProviderId) ? { providerId: trim(input.mainProviderId) } : {}) } + } + return undefined +} + +/** + * One-shot internal LLM call that produces a single-line thread title. + * Mirrors the compaction-summary one-shot pattern (timeout + abort + collect + * text). Returns undefined on any failure so callers can silently keep the + * existing default title. + */ +export async function generateThreadTitle(input: { + threadId: string + turnId: string + modelClient: ModelClient + /** Resolved model id for the title role (see resolveRoleModel). */ + model: string + /** Optional per-provider routing id. */ + providerId?: string + systemPrompt?: string + /** First user message text (intent). Required for a meaningful title. */ + userText: string + /** First assistant reply text. Optional supporting context. */ + assistantText?: string + /** Reasoning depth for the title call. Invalid/missing => 'off'. */ + reasoningEffort?: string + timeoutMs?: number + abortSignal?: AbortSignal +}): Promise { + const userText = trim(input.userText) + if (!userText) return undefined + if (input.abortSignal?.aborted) return undefined + + const timeoutMs = Math.max(1, Math.floor(input.timeoutMs ?? DEFAULT_TITLE_TIMEOUT_MS)) + const controller = new AbortController() + const onAbort = (): void => controller.abort() + const timeout = setTimeout(() => controller.abort(), timeoutMs) + input.abortSignal?.addEventListener('abort', onAbort, { once: true }) + + try { + const promptText = buildTitlePrompt(userText, input.assistantText) + const requestItem: TurnItem = { + id: `item_${input.turnId}_title_request`, + turnId: input.turnId, + threadId: input.threadId, + role: 'user', + status: 'completed', + createdAt: new Date().toISOString(), + finishedAt: new Date().toISOString(), + kind: 'user_message', + text: promptText + } + const request: ModelRequest = { + threadId: input.threadId, + turnId: `${input.turnId}_title`, + model: input.model, + ...(input.providerId ? { providerId: input.providerId } : {}), + ...(input.systemPrompt ? { systemPrompt: input.systemPrompt } : {}), + contextInstructions: [TITLE_SYSTEM_PROMPT], + prefix: [], + history: [requestItem], + tools: [], + stream: true, + maxTokens: DEFAULT_TITLE_MAX_TOKENS, + temperature: 0, + reasoningEffort: normalizeRoleReasoningEffort(input.reasoningEffort), + abortSignal: controller.signal + } + let text = '' + for await (const chunk of input.modelClient.stream(request)) { + if (input.abortSignal?.aborted || controller.signal.aborted) return undefined + if (chunk.kind === 'assistant_text_delta') text += chunk.text + if (chunk.kind === 'error') return undefined + } + return sanitizeTitle(text) + } catch { + return undefined + } finally { + clearTimeout(timeout) + input.abortSignal?.removeEventListener('abort', onAbort) + } +} + +function buildTitlePrompt(userText: string, assistantText?: string): string { + const lines = ['User message:', clip(userText, MAX_TITLE_INPUT_CHARS)] + const assistant = trim(assistantText) + if (assistant) { + lines.push('', 'Assistant reply (for context only):', clip(assistant, 1_000)) + } + lines.push('', `Title (single line, <= ${MAX_TITLE_CHARS} chars):`) + return lines.join('\n') +} + +/** Strip quotes/markdown/leading "Title:" and clamp to the char cap. */ +export function sanitizeTitle(raw: string): string | undefined { + let title = raw.replace(/\r/g, '').split('\n').map((line) => line.trim()).find((line) => line.length > 0) ?? '' + title = title.replace(/^title\s*[::]\s*/i, '') + title = title.replace(/^["'“”『』「」]+|["'“”『』「」]+$/g, '') + title = title.replace(/^#+\s*/, '').replace(/^\*+|\*+$/g, '') + title = title.replace(/\s+/g, ' ').trim() + if (!title) return undefined + if (title.length > MAX_TITLE_CHARS) { + title = title.slice(0, MAX_TITLE_CHARS).trim() + } + return title || undefined +} + +function clip(text: string, maxChars: number): string { + const compact = text.replace(/\s+/g, ' ').trim() + return compact.length <= maxChars ? compact : `${compact.slice(0, Math.max(0, maxChars - 3)).trim()}...` +} + +function trim(value: string | undefined): string { + return typeof value === 'string' ? value.trim() : '' +} diff --git a/kun/src/ports/tool-host.ts b/kun/src/ports/tool-host.ts index d849a5c33..2dd1669be 100644 --- a/kun/src/ports/tool-host.ts +++ b/kun/src/ports/tool-host.ts @@ -82,6 +82,12 @@ export type ToolHostContext = { allowedProviderIds?: readonly string[] /** Optional tool-name allow-list. When set, other tools are not advertised or executed. */ allowedToolNames?: readonly string[] + /** Optional provider deny-list. Providers listed here are never advertised or executed (deny-list layered on inherit). */ + blockedProviderIds?: readonly string[] + /** Optional tool-name deny-list. Tools listed here are never advertised or executed (deny-list layered on inherit). */ + blockedToolNames?: readonly string[] + /** Optional skill-id deny-list for this turn: hides skills from the catalog + auto-activation and rejects `load_skill`. */ + blockedSkillIds?: readonly string[] approvalPolicy: ApprovalPolicy /** Filesystem/command sandbox selected for this turn. Defaults at execution time for old callers. */ sandboxMode?: SandboxMode diff --git a/kun/src/server/routes/approvals.ts b/kun/src/server/routes/approvals.ts index 43157c19e..6ae8d6534 100644 --- a/kun/src/server/routes/approvals.ts +++ b/kun/src/server/routes/approvals.ts @@ -28,6 +28,24 @@ export async function decideApproval(input: { if (!approval) { return ERRORS.notFound(`approval not found: ${input.approvalId}`) } + if (approval.status !== 'pending') { + const resolvedDecision = + approval.status === 'allowed' + ? 'allow' + : approval.status === 'denied' + ? 'deny' + : null + if (resolvedDecision && resolvedDecision === parsed.data.decision) { + const response: ApprovalDecisionResponse = { + approvalId: input.approvalId, + decision: parsed.data.decision, + status: approval.status, + alreadyResolved: true + } + return jsonResponse(response) + } + return ERRORS.conflict(`approval already decided: ${input.approvalId}`) + } const ok = input.gate.decide(input.approvalId, parsed.data.decision, parsed.data.reason) if (!ok) { return ERRORS.conflict(`approval already decided: ${input.approvalId}`) diff --git a/kun/src/server/routes/delegation.ts b/kun/src/server/routes/delegation.ts new file mode 100644 index 000000000..ce8b0b141 --- /dev/null +++ b/kun/src/server/routes/delegation.ts @@ -0,0 +1,65 @@ +import type { DelegationRuntime } from '../../delegation/delegation-runtime.js' +import { jsonResponse, type JsonResponse } from '../response.js' +import { ERRORS } from './runtime-error.js' + +/** + * GET /v1/delegation/diagnostics + * + * Returns a snapshot of all child runs (queued/running/completed/failed/ + * aborted) tracked by the delegation runtime. Optional `parentThreadId` + * query param filters by parent thread. + * + * Used by the GUI SubagentsView to show realtime status per profile. + */ +export async function delegationDiagnostics( + runtime: DelegationRuntime | undefined, + request: Request +): Promise { + if (!runtime) { + return jsonResponse({ + enabled: false, + active: 0, + childRuns: [], + aggregates: [] + }) + } + const url = new URL(request.url) + const parent = url.searchParams.get('parent_thread_id') ?? undefined + return jsonResponse(await runtime.diagnostics(parent)) +} + +/** + * GET /v1/delegation/profiles + * + * Returns the merged profile roster (builtin + GUI + future workspace + * markdown overlay). Lighter than diagnostics — pure config snapshot. + */ +export async function delegationProfiles( + runtime: DelegationRuntime | undefined +): Promise { + if (!runtime) { + return jsonResponse({ profiles: [], defaultProfile: undefined }) + } + return jsonResponse({ + profiles: runtime.listProfiles(), + defaultProfile: runtime.defaultProfileName + }) +} + +/** + * POST /v1/delegation/abort/:childId + * + * Cancel a detached (background) child run. Synchronous runs are + * unaffected — abort their parent turn instead. + */ +export async function delegationAbort( + runtime: DelegationRuntime | undefined, + childId: string +): Promise { + if (!runtime) return ERRORS.unavailable('delegation runtime is unavailable') + if (!childId.trim()) return ERRORS.validation('childId is required', []) + const aborted = runtime.abortChild(childId) + return jsonResponse({ childId, aborted }) +} + +export { ERRORS as DelegationErrors } diff --git a/kun/src/server/routes/index.ts b/kun/src/server/routes/index.ts index 9c30c5fe5..72e5cb3dc 100644 --- a/kun/src/server/routes/index.ts +++ b/kun/src/server/routes/index.ts @@ -15,6 +15,7 @@ import { setThreadTodos, updateThread } from './threads.js' +import { summarizeThread } from './threads-summarize.js' import { compactTurn, getTurn, @@ -45,6 +46,11 @@ import { memoryDiagnostics, updateMemory } from './memory.js' +import { + delegationAbort, + delegationDiagnostics, + delegationProfiles +} from './delegation.js' import { isAuthorized, bearerToken } from '../auth.js' import { ERRORS } from './runtime-error.js' import type { ServerRuntime } from './server-runtime.js' @@ -59,10 +65,13 @@ import type { ServerRuntime } from './server-runtime.js' * - `GET /v1/attachments/diagnostics` (auth) * - `GET /v1/attachments/{id}` and `{id}/content` (auth) * - `GET/POST /v1/memory`, `PATCH/DELETE /v1/memory/{id}`, diagnostics (auth) + * - `GET /v1/delegation/diagnostics` and `/v1/delegation/profiles` (auth) + * - `POST /v1/delegation/abort/{childId}` (auth) * - `GET /v1/workspace/status` (auth) * - `GET/POST /v1/threads` (auth) * - `GET/PATCH/DELETE /v1/threads/{id}` (auth) * - `POST /v1/threads/{id}/fork` (auth) + * - `POST /v1/threads/{id}/summarize` (auth) * - `GET/POST/DELETE /v1/threads/{id}/goal` (auth) * - `GET/POST/DELETE /v1/threads/{id}/todos` (auth) * - `POST /v1/threads/{id}/turns` (auth) @@ -129,6 +138,18 @@ export function buildRouter(runtime: ServerRuntime): Router { if (!authorize(request, runtime)) return ERRORS.unauthorized() return deleteMemory(runtime.memoryStore, ctx.params.id, request) }) + router.add('GET', '/v1/delegation/diagnostics', async (request) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return delegationDiagnostics(runtime.delegationRuntime, request) + }) + router.add('GET', '/v1/delegation/profiles', async (request) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return delegationProfiles(runtime.delegationRuntime) + }) + router.add('POST', '/v1/delegation/abort/:childId', async (request, ctx) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return delegationAbort(runtime.delegationRuntime, ctx.params.childId) + }) router.add('GET', '/v1/workspace/status', async (request) => { if (!authorize(request, runtime)) return ERRORS.unauthorized() const url = new URL(request.url) @@ -159,6 +180,10 @@ export function buildRouter(runtime: ServerRuntime): Router { if (!authorize(request, runtime)) return ERRORS.unauthorized() return forkThread(runtime.threadService, ctx.params.id, request) }) + router.add('POST', '/v1/threads/:id/summarize', async (request, ctx) => { + if (!authorize(request, runtime)) return ERRORS.unauthorized() + return summarizeThread(runtime, ctx.params.id, request) + }) router.add('GET', '/v1/threads/:id/goal', async (request, ctx) => { if (!authorize(request, runtime)) return ERRORS.unauthorized() return getThreadGoal(runtime.threadService, ctx.params.id) diff --git a/kun/src/server/routes/server-runtime.ts b/kun/src/server/routes/server-runtime.ts index cf5e212d5..74ca31329 100644 --- a/kun/src/server/routes/server-runtime.ts +++ b/kun/src/server/routes/server-runtime.ts @@ -26,6 +26,10 @@ import type { AttachmentStore } from '../../attachments/attachment-store.js' import type { MemoryDiagnostics } from '../../contracts/memory.js' import type { MemoryStore } from '../../memory/memory-store.js' import type { ReviewTarget } from '../../contracts/review.js' +import type { DelegationRuntime } from '../../delegation/delegation-runtime.js' +import type { ModelClient } from '../../ports/model-client.js' +import type { RolesConfig } from '../../config/kun-config.js' +import type { ImmutablePrefix } from '../../cache/immutable-prefix.js' export type RuntimeToolDiagnostics = { providers: ToolProviderPolicy[] @@ -62,6 +66,29 @@ export type ServerRuntime = { toolHost?: ToolHost attachmentStore?: AttachmentStore memoryStore?: MemoryStore + /** + * Active delegation runtime exposed for diagnostics + agent profile + * listing. Optional so test scaffolds can omit it. + */ + delegationRuntime?: DelegationRuntime + /** + * Default ModelClient + model id for one-shot completions outside the + * agent loop (e.g. AI-generated subagent profiles). Optional so test + * scaffolds can omit it. + */ + modelClient?: ModelClient + defaultModel?: string + /** + * Internal-LLM role model routing. Used by on-demand routes (e.g. session + * summary) to resolve the summary/title/codeReview model precedence + * (role override -> smallModel -> defaultModel). Optional for test scaffolds. + */ + roles?: RolesConfig + /** + * Immutable prefix (systemPrompt + few-shots + fingerprint). Exposed so + * one-shot internal routes can reuse the runtime's systemPrompt. Optional. + */ + immutablePrefix?: ImmutablePrefix runTurn(threadId: string, turnId: string): Promise<'completed' | 'failed' | 'aborted'> | void /** * Relaunch goal continuation turns for threads whose in-flight turn was diff --git a/kun/src/server/routes/threads-summarize.ts b/kun/src/server/routes/threads-summarize.ts new file mode 100644 index 000000000..d1d6bfba0 --- /dev/null +++ b/kun/src/server/routes/threads-summarize.ts @@ -0,0 +1,89 @@ +import { z } from 'zod' +import { jsonResponse, type JsonResponse } from '../response.js' +import { readJsonBody } from '../read-json-body.js' +import { ERRORS } from './runtime-error.js' +import { generateSessionSummary } from '../../loop/session-summary.js' +import { resolveRoleModel } from '../../loop/title-generator.js' +import type { ServerRuntime } from './server-runtime.js' + +const SummarizeThreadRequest = z + .object({ + /** Optional per-request model override (falls back to summary role precedence). */ + model: z.string().min(1).optional(), + providerId: z.string().min(1).optional() + }) + .optional() + +export const SummarizeThreadResponse = z.object({ + id: z.string(), + summary: z.string() +}) +export type SummarizeThreadResponse = z.infer + +/** + * On-demand whole-session summary. Reads the full transcript, runs the Summary + * internal-LLM role (precedence: summaryModel -> smallModel -> main model), + * persists the result onto the thread (`summary` field) and returns it. NOT + * triggered automatically — the renderer calls this from a "summarize" action. + * + * Route: POST /v1/threads/:id/summarize + */ +export async function summarizeThread( + runtime: ServerRuntime, + threadId: string, + request: Request +): Promise { + const body = await readJsonBody(request) + if (!body.ok) return body.response + // An empty object body is valid (no overrides); coerce {} -> undefined. + const rawBody = body.value && typeof body.value === 'object' && Object.keys(body.value).length === 0 + ? undefined + : body.value + const parsed = SummarizeThreadRequest.safeParse(rawBody) + if (!parsed.success) return ERRORS.validation('invalid summarize body', parsed.error.issues) + + if (!runtime.modelClient) return ERRORS.unavailable('model client is unavailable') + + const thread = await runtime.threadService.get(threadId) + if (!thread) return ERRORS.notFound(`thread not found: ${threadId}`) + + const items = await runtime.sessionStore.loadItems(threadId) + if (!items.some((item) => item.kind === 'user_message' || item.kind === 'assistant_text')) { + return ERRORS.validation('thread has no conversation to summarize') + } + + const resolved = resolveRoleModel({ + roleModel: parsed.data?.model ?? runtime.roles?.summaryModel, + roleProviderId: parsed.data?.providerId ?? runtime.roles?.summaryProviderId, + roles: runtime.roles, + mainModel: thread.model || runtime.defaultModel, + mainProviderId: thread.providerId + }) + if (!resolved) return ERRORS.unavailable('no model is configured for session summary') + + const abortController = new AbortController() + const onAbort = (): void => abortController.abort() + request.signal?.addEventListener('abort', onAbort) + + let summary: string | undefined + try { + summary = await generateSessionSummary({ + threadId, + modelClient: runtime.modelClient, + model: resolved.model, + ...(resolved.providerId ? { providerId: resolved.providerId } : {}), + ...(runtime.immutablePrefix?.systemPrompt ? { systemPrompt: runtime.immutablePrefix.systemPrompt } : {}), + items, + ...(runtime.roles?.summaryReasoningEffort + ? { reasoningEffort: runtime.roles.summaryReasoningEffort } + : {}), + abortSignal: abortController.signal + }) + } finally { + request.signal?.removeEventListener('abort', onAbort) + } + if (!summary) return ERRORS.unavailable('session summary returned no content') + + const updated = await runtime.threadService.update(threadId, { summary }) + return jsonResponse(SummarizeThreadResponse.parse({ id: updated.id, summary: updated.summary ?? summary })) +} diff --git a/kun/src/server/runtime-factory.ts b/kun/src/server/runtime-factory.ts index 8e9bc6896..a51335261 100644 --- a/kun/src/server/runtime-factory.ts +++ b/kun/src/server/runtime-factory.ts @@ -28,7 +28,7 @@ import { buildVideoGenToolProviders } from '../adapters/tool/media-gen-tool-provider.js' import { LocalWorkspaceInspector } from '../adapters/workspace/local-workspace-inspector.js' -import { createImmutablePrefix, setSystemPrompt } from '../cache/immutable-prefix.js' +import { createImmutablePrefix } from '../cache/immutable-prefix.js' import { buildRuntimeCapabilityManifest, type KunCapabilitiesConfig @@ -48,6 +48,7 @@ import { DEFAULT_STORAGE_CONFIG, expandHomePath, type QualityConfig, + type RolesConfig, type RuntimeTuningConfig, type ServeProviderConfig, type StorageConfig @@ -105,6 +106,8 @@ export type KunServeRuntimeOptions = { models?: ModelConfig contextCompaction?: ContextCompactionConfig runtime?: RuntimeTuningConfig + /** Internal-LLM role model routing (small-model slot + title/summary/codeReview overrides). */ + roles?: RolesConfig storage?: StorageConfig capabilities?: KunCapabilitiesConfig /** Command hooks from config.json; resolved and wired into tool hosts and the loop. */ @@ -211,13 +214,6 @@ export async function createKunServeRuntime( SkillRuntime.create(options.capabilities?.skills), seedUsageCarryover({ threadStore, sessionStore, usageService }) ]) - // Fold the available-skills catalog into the stable prefix once per session so - // the model knows which skills exist (and where to read them) even when no - // trigger fires. Stays byte-stable across turns, preserving prompt-cache reuse. - const skillCatalog = skillRuntime.catalogInstruction() - if (skillCatalog) { - prefix = setSystemPrompt(prefix, `${KUN_SYSTEM_PROMPT}\n\n${skillCatalog}`) - } const turnService = new TurnService({ threadStore, sessionStore, @@ -243,7 +239,10 @@ export async function createKunServeRuntime( ...(options.models ? { models: options.models } : {}), ...(options.contextCompaction ? { contextCompaction: options.contextCompaction } : {}), ...(tokenEconomy ? { tokenEconomy } : {}), - ...(options.runtime ? { runtime: options.runtime } : {}) + ...(options.runtime ? { runtime: options.runtime } : {}), + ...(options.roles?.codeReviewReasoningEffort + ? { reasoningEffort: options.roles.codeReviewReasoningEffort } + : {}) }) const webProviders = buildWebToolProviders(options.capabilities?.web) const attachmentStore = options.capabilities?.attachments.enabled @@ -445,6 +444,7 @@ export async function createKunServeRuntime( skillRuntime, tokenEconomy, contextCompaction: options.contextCompaction, + ...(options.roles ? { roles: options.roles } : {}), ...(options.runtime?.toolStorm ? { toolStorm: options.runtime.toolStorm } : {}), ...(options.runtime?.toolArgumentRepair ? { toolArgumentRepair: options.runtime.toolArgumentRepair } : {}), ...(resolvedHooks.length ? { hooks: resolvedHooks } : {}), @@ -475,6 +475,11 @@ export async function createKunServeRuntime( toolHost, ...(attachmentStore ? { attachmentStore } : {}), ...(memoryStore ? { memoryStore } : {}), + ...(delegationRuntime ? { delegationRuntime } : {}), + modelClient, + defaultModel: options.model, + ...(options.roles ? { roles: options.roles } : {}), + immutablePrefix: prefix, runTurn(threadId, turnId) { return loop.runTurn(threadId, turnId) }, diff --git a/kun/src/services/review-service.ts b/kun/src/services/review-service.ts index 8d8435c7e..4b6f8d187 100644 --- a/kun/src/services/review-service.ts +++ b/kun/src/services/review-service.ts @@ -6,6 +6,7 @@ import { InMemoryUserInputGate } from '../adapters/in-memory-user-input-gate.js' import { buildReadOnlyBuiltinLocalTools } from '../adapters/tool/builtin-tools.js' import { LocalToolHost } from '../adapters/tool/local-tool-host.js' import { createImmutablePrefix } from '../cache/immutable-prefix.js' +import { normalizeRoleReasoningEffort } from '../loop/reasoning-effort.js' import type { ModelCapabilityMetadata } from '../contracts/capabilities.js' import type { TurnItem } from '../contracts/items.js' import type { ReviewTarget } from '../contracts/review.js' @@ -39,6 +40,8 @@ export type ReviewServiceDeps = { tokenEconomy?: TokenEconomyConfig runtime?: RuntimeTuningConfig modelCapabilities?: (model: string) => ModelCapabilityMetadata + /** Reasoning depth for the code-review model call. Invalid/missing => 'off'. */ + reasoningEffort?: string } export class ReviewService { @@ -194,7 +197,8 @@ export class ReviewService { request: { prompt: input.prompt, model: input.model, - mode: 'agent' + mode: 'agent', + reasoningEffort: normalizeRoleReasoningEffort(this.deps.reasoningEffort) } }) const abortChild = (): void => { diff --git a/kun/src/services/thread-service.ts b/kun/src/services/thread-service.ts index a99814e41..ed2ed2c40 100644 --- a/kun/src/services/thread-service.ts +++ b/kun/src/services/thread-service.ts @@ -119,9 +119,12 @@ export class ThreadService { const thread = createThreadRecord({ id, title: options.title ?? (request.title?.trim() || 'New chat'), + ...(request.titleAuto !== undefined ? { titleAuto: request.titleAuto } : {}), workspace: request.workspace, model: request.model, ...(request.providerId?.trim() ? { providerId: request.providerId.trim() } : {}), + ...(request.agentId?.trim() ? { agentId: request.agentId.trim() } : {}), + ...(request.systemPrompt?.trim() ? { systemPrompt: request.systemPrompt.trim() } : {}), mode: request.mode, approvalPolicy: request.approvalPolicy, sandboxMode: request.sandboxMode, @@ -139,10 +142,13 @@ export class ThreadService { async update(threadId: string, patch: { title?: string + titleAuto?: boolean + summary?: string workspace?: string status?: ThreadStatus approvalPolicy?: ApprovalPolicy sandboxMode?: SandboxMode + pinned?: boolean costBudgetUsd?: number | null costBudgetWarningSent?: boolean relation?: ThreadRelation @@ -171,6 +177,7 @@ export class ThreadService { kind: 'thread_updated', threadId, title: updated.title, + ...(updated.titleAuto !== undefined ? { titleAuto: updated.titleAuto } : {}), status: updated.status }) return updated diff --git a/kun/src/services/turn-service.test.ts b/kun/src/services/turn-service.test.ts index cc9f7f272..05b1316f1 100644 --- a/kun/src/services/turn-service.test.ts +++ b/kun/src/services/turn-service.test.ts @@ -7,6 +7,7 @@ import { makeAssistantTextItem, makeUserItem } from '../domain/item.js' import { createThreadRecord } from '../domain/thread.js' import { appendTurnItem, createTurnRecord, finishTurn } from '../domain/turn.js' import { ContextCompactor } from '../loop/context-compactor.js' +import { COMPACTION_SYSTEM_PROMPT } from '../loop/compaction-summary.js' import { effectiveHistoryAfterLatestCompaction } from '../loop/compaction-history.js' import { InflightTracker } from '../loop/inflight-tracker.js' import { SteeringQueue } from '../loop/steering-queue.js' @@ -124,14 +125,19 @@ describe('TurnService compact', () => { expect(model.requests).toHaveLength(1) expect(model.requests[0].model).toBe('thread-model') - expect(model.requests[0].systemPrompt).toBe(prefix.systemPrompt) - const summaryRequestItem = model.requests[0].history[0] - expect(summaryRequestItem?.kind).toBe('user_message') - if (!summaryRequestItem || summaryRequestItem.kind !== 'user_message') { - throw new Error('expected compaction summary request to be a user message') + // Compaction-mode turn uses the dedicated summarizer system prompt and + // feeds the real conversation as messages (not a serialized transcript). + expect(model.requests[0].systemPrompt).toBe(COMPACTION_SYSTEM_PROMPT) + expect(model.requests[0].prefix).toEqual([]) + const summaryHistory = model.requests[0].history + expect(summaryHistory[0]?.kind === 'user_message' ? summaryHistory[0].text : '') + .toContain('Initial task: fix /compact.') + const continuationItem = summaryHistory[summaryHistory.length - 1] + expect(continuationItem?.kind).toBe('user_message') + if (!continuationItem || continuationItem.kind !== 'user_message') { + throw new Error('expected compaction continuation message to be a user message') } - expect(summaryRequestItem.text).toContain('Conversation history to fold:') - expect(summaryRequestItem.text).toContain('Initial task: fix /compact.') + expect(continuationItem.text).toContain('Provide a detailed summary of our conversation above') expect(response.summary).toContain('MODEL SUMMARY kept the durable state.') expect(response.pinnedConstraints).toEqual(prefix.pinnedConstraints) diff --git a/kun/src/services/turn-service.ts b/kun/src/services/turn-service.ts index a9d60bc25..e495619b0 100644 --- a/kun/src/services/turn-service.ts +++ b/kun/src/services/turn-service.ts @@ -23,7 +23,7 @@ import { insertCompactionIntoVisibleHistory, placeCompactionsAtTurnEnd } from '../loop/compaction-history.js' -import { summarizeCompactionWithModel } from '../loop/compaction-summary.js' +import { resolveCompactionModel, summarizeCompactionWithModel } from '../loop/compaction-summary.js' import type { ContextCompactionConfig } from '../loop/model-context-profile.js' import { makeUserItem, makeErrorItem } from '../domain/item.js' import { appendTurnItem, createTurnRecord, finishTurn, replaceTurnItem, startTurn as startTurnRecord } from '../domain/turn.js' @@ -239,15 +239,21 @@ export class TurnService { auto: false }) if (this.deps.contextCompaction?.summaryMode === 'model' && this.deps.model) { - const model = modelForManualCompaction({ + const fallbackModel = modelForManualCompaction({ threadModel: thread.model, defaultModel: this.deps.defaultModel, clientModel: this.deps.model.model }) + const compactionModel = resolveCompactionModel({ + contextCompaction: this.deps.contextCompaction, + fallbackModel + }) + const model = compactionModel.model const modelSummary = await summarizeCompactionWithModel({ threadId: input.threadId, turnId, model, + ...(compactionModel.providerId ? { providerId: compactionModel.providerId } : {}), modelClient: this.deps.model, prefix, contextCompaction: this.deps.contextCompaction, diff --git a/kun/src/skills/skill-runtime.ts b/kun/src/skills/skill-runtime.ts index 70a04338e..2492009d1 100644 --- a/kun/src/skills/skill-runtime.ts +++ b/kun/src/skills/skill-runtime.ts @@ -1,12 +1,18 @@ import { type Dirent } from 'node:fs' import { readdir, readFile, stat } from 'node:fs/promises' -import { basename, extname, join, resolve } from 'node:path' +import { basename, extname, isAbsolute, join, relative, resolve } from 'node:path' import { z } from 'zod' import type { SkillsCapabilityConfig } from '../contracts/capabilities.js' const DEFAULT_ACTIVE_LIMIT = 3 const DEFAULT_INSTRUCTION_BUDGET_BYTES = 24_000 const DEFAULT_CATALOG_BUDGET_BYTES = 8_000 +const WORKSPACE_SKILL_RELATIVE_DIRS = [ + '.agents/skills', + '.claude/skills', + '.codex/skills', + 'skills' +] as const const SkillTriggerManifest = z.object({ commands: z.array(z.string().min(1)).default([]), @@ -40,6 +46,8 @@ export type LoadedSkill = { assets: string[] priority: number legacy: boolean + /** Source of the skill: 'project' (workspace) or 'global' (user-level). */ + source: 'project' | 'global' } export type SkillActivation = { @@ -51,6 +59,7 @@ export type SkillActivation = { export type SkillTurnResolution = { activeSkillIds: string[] activations: SkillActivation[] + catalogInstruction?: string instructions: string[] allowedToolNames?: string[] injectedBytes: number @@ -59,12 +68,14 @@ export type SkillTurnResolution = { export type SkillRuntimeDiagnostics = { enabled: boolean roots: string[] + globalRoots: string[] skills: Array<{ id: string name: string description?: string version: string root: string + source: 'project' | 'global' legacy: boolean triggers: LoadedSkill['triggers'] allowedTools: string[] @@ -82,13 +93,18 @@ export type SkillRuntimeDiagnostics = { export type SkillRuntimeOptions = { activeLimit?: number instructionBudgetBytes?: number - /** Byte budget for the always-on available-skills catalog folded into the prefix. */ + /** Byte budget for the per-turn available-skills catalog. */ catalogBudgetBytes?: number } export class SkillRuntime { private skills: LoadedSkill[] private validationErrors: Array<{ root: string; message: string }> + private readonly workspaceSkillCache = new Map + }>() private lastActivations: SkillActivation[] = [] private lastInjection: SkillRuntimeDiagnostics['lastInjection'] @@ -99,13 +115,18 @@ export class SkillRuntime { ) { this.skills = loaded.skills this.validationErrors = loaded.validationErrors + this.workspaceSkillCache.clear() + } + + enabled(): boolean { + return this.config.enabled } static async create( config: SkillsCapabilityConfig | undefined, options: SkillRuntimeOptions = {} ): Promise { - const normalized = config ?? { enabled: false, roots: [], legacySkillMd: true } + const normalized = config ?? { enabled: false, roots: [], workspaceRoots: [], globalRoots: [], disabledIds: [], legacySkillMd: true } const resolvedOptions = { activeLimit: options.activeLimit ?? DEFAULT_ACTIVE_LIMIT, instructionBudgetBytes: options.instructionBudgetBytes ?? DEFAULT_INSTRUCTION_BUDGET_BYTES, @@ -123,18 +144,25 @@ export class SkillRuntime { : { skills: [], validationErrors: [] } this.skills = loaded.skills this.validationErrors = loaded.validationErrors + this.workspaceSkillCache.clear() } - resolveTurn(input: { + async resolveTurn(input: { prompt: string workspace: string filePaths?: readonly string[] - }): SkillTurnResolution { + /** Per-call skill-id deny-list (e.g. a subagent profile's blockedSkills). Hidden from catalog + auto-activation. */ + blockedSkillIds?: readonly string[] + }): Promise { if (!this.config.enabled) return emptyResolution() - const matches = this.matchSkills(input) + const skills = filterBlockedSkills(await this.skillsForWorkspace(input.workspace), input.blockedSkillIds) + const catalogInstruction = renderCatalogInstruction(skills, this.options.catalogBudgetBytes) + const matches = this.matchSkills(input, skills) const active = matches.slice(0, this.options.activeLimit) const injection = buildInjection(active, this.options.instructionBudgetBytes) - const blockedToolNames = blockedToolsFor(this.skills, injection.allowedToolNames) + const catalogBytes = catalogInstruction ? Buffer.byteLength(catalogInstruction, 'utf8') : 0 + const injectedBytes = injection.injectedBytes + catalogBytes + const blockedToolNames = blockedToolsFor(skills, injection.allowedToolNames) this.lastActivations = active.map(({ skill, reason, score }) => ({ skillId: skill.id, reason, @@ -142,61 +170,26 @@ export class SkillRuntime { })) this.lastInjection = { activeSkillIds: injection.activeSkillIds, - injectedBytes: injection.injectedBytes, + injectedBytes, budgetBytes: this.options.instructionBudgetBytes, blockedToolNames } return { activeSkillIds: injection.activeSkillIds, activations: this.lastActivations, + ...(catalogInstruction ? { catalogInstruction } : {}), instructions: injection.instructions, ...(injection.allowedToolNames ? { allowedToolNames: injection.allowedToolNames } : {}), - injectedBytes: injection.injectedBytes + injectedBytes } } /** - * Renders the always-on catalog of available skills, folded once into the - * stable prefix at session start. Unlike {@link resolveTurn} (which only - * injects a skill once its triggers fire on the user prompt), the catalog - * lets the model learn that skills exist at all and read the linked - * `SKILL.md` on demand. Mirrors codex's `render_available_skills_body`. - * Returns `undefined` when skills are disabled or none are loaded so the - * prefix stays byte-identical to the no-skills case. - */ + * Renders the global catalog for diagnostics and compatibility. Runtime turns + * use resolveTurn so workspace-local skills stay out of the immutable prefix. + */ catalogInstruction(): string | undefined { - if (!this.config.enabled || this.skills.length === 0) return undefined - const budget = this.options.catalogBudgetBytes - const header = '## Skills\n' + - 'A skill is a reusable set of instructions stored on disk. The skills below ' + - 'are available in this workspace. When a user request matches one, read its ' + - '`SKILL.md` (the file path is listed) before acting, then follow it.' - const footer = '### How to use skills\n' + - '- A skill activates automatically when the user mentions it by id ' + - '(`$id`, `@id`, or `/skill:id`) or trips one of its triggers; its full ' + - 'instructions are then injected for that turn.\n' + - '- Otherwise, if a request clearly matches a skill above, call the ' + - '`load_skill` tool with its id to pull the full instructions, then follow ' + - 'them. (You can also read the listed file directly.)' - const lines: string[] = [] - let used = Buffer.byteLength(`${header}\n\n### Available skills\n\n${footer}`, 'utf8') - let dropped = 0 - for (const skill of this.skills) { - const desc = skill.description ? `: ${skill.description}` : '' - const line = `- ${skill.name} (${skill.id})${desc} (file: ${skill.entryPath})` - const cost = Buffer.byteLength(`${line}\n`, 'utf8') - if (used + cost > budget) { - dropped += 1 - continue - } - lines.push(line) - used += cost - } - if (lines.length === 0) return undefined - if (dropped > 0) { - lines.push(`- …and ${dropped} more skill${dropped === 1 ? '' : 's'} omitted (catalog budget reached).`) - } - return `${header}\n\n### Available skills\n${lines.join('\n')}\n\n${footer}` + return renderCatalogInstruction(this.skills, this.options.catalogBudgetBytes) } /** @@ -206,19 +199,20 @@ export class SkillRuntime { * Returns an error payload (never throws) so the tool can surface it to the * model as a normal tool result. */ - loadSkillById(skillId: string): { + async loadSkillById(skillId: string, workspace = '', blockedIds?: readonly string[]): Promise<{ skillId: string name: string instruction: string allowedTools: string[] truncated: boolean - } | { error: string } { + } | { error: string }> { if (!this.config.enabled) return { error: 'skills are disabled' } + const skills = filterBlockedSkills(await this.skillsForWorkspace(workspace), blockedIds) const normalized = slug(skillId.trim().replace(/^[$@]/, '').replace(/^skill:/i, '')) - const skill = this.skills.find((candidate) => candidate.id === normalized) ?? - this.skills.find((candidate) => slug(candidate.name) === normalized) + const skill = skills.find((candidate) => candidate.id === normalized) ?? + skills.find((candidate) => slug(candidate.name) === normalized) if (!skill) { - const available = this.skills.map((candidate) => candidate.id).join(', ') + const available = skills.map((candidate) => candidate.id).join(', ') return { error: `unknown skill id "${skillId}". Available: ${available || '(none)'}` } } let instruction = formatSkillInstruction(skill, 'load_skill') @@ -242,15 +236,19 @@ export class SkillRuntime { } diagnostics(): SkillRuntimeDiagnostics { + const projectRoots = this.config.roots ?? [] + const globalRoots = this.config.globalRoots ?? [] return { enabled: this.config.enabled, - roots: [...this.config.roots], + roots: [...projectRoots], + globalRoots: [...globalRoots], skills: this.skills.map((skill) => ({ id: skill.id, name: skill.name, ...(skill.description ? { description: skill.description } : {}), version: skill.version, root: skill.root, + source: skill.source, legacy: skill.legacy, triggers: skill.triggers, allowedTools: skill.allowedTools @@ -265,16 +263,21 @@ export class SkillRuntime { return this.skills.length } + async countForWorkspace(workspace: string): Promise { + if (!this.config.enabled) return 0 + return (await this.skillsForWorkspace(workspace)).length + } + private matchSkills(input: { prompt: string workspace: string filePaths?: readonly string[] - }): Array { + }, skills: LoadedSkill[]): Array { const prompt = input.prompt const lowerPrompt = prompt.toLowerCase() const fileTypes = fileTypesFrom(input.filePaths ?? [], prompt) const matches: Array = [] - for (const skill of this.skills) { + for (const skill of skills) { const explicit = explicitSkillMention(skill, prompt) if (explicit) { matches.push({ skill, skillId: skill.id, reason: explicit, score: 1_000 + skill.priority }) @@ -297,6 +300,139 @@ export class SkillRuntime { } return matches.sort((a, b) => b.score - a.score || a.skill.id.localeCompare(b.skill.id)) } + + private async skillsForWorkspace(workspace: string): Promise { + const workspaceRoot = normalizeRoot(workspace) + const workspaceLoaded = workspaceRoot + ? await this.loadWorkspaceSkills(workspaceRoot) + : { skills: [], validationErrors: [] } + const knownWorkspaceRoots = [ + workspaceRoot, + ...(this.config.workspaceRoots ?? []).map(normalizeRoot) + ].filter(Boolean) + const staticSkills = this.skills.filter((skill) => + skillVisibleForWorkspace(skill.root, workspaceRoot, knownWorkspaceRoots) + ) + const unique = new Map() + for (const skill of [...workspaceLoaded.skills, ...staticSkills]) { + if (!unique.has(skill.id)) unique.set(skill.id, skill) + } + return [...unique.values()].sort((a, b) => a.id.localeCompare(b.id)) + } + + private async loadWorkspaceSkills(workspaceRoot: string): Promise<{ + skills: LoadedSkill[] + validationErrors: Array<{ root: string; message: string }> + }> { + const discoveredRoots = await existingWorkspaceSkillRoots(workspaceRoot) + const configRoots = new Set((this.config.roots ?? []).map(normalizeRoot).filter(Boolean)) + const knownWorkspaceRoots = (this.config.workspaceRoots ?? []).map(normalizeRoot).filter(Boolean) + const isKnownWorkspace = knownWorkspaceRoots.some((candidate) => candidate === workspaceRoot) + const roots = isKnownWorkspace + ? discoveredRoots.filter((root) => configRoots.has(normalizeRoot(root))) + : discoveredRoots + const rootsKey = roots.join('\0') + const cached = this.workspaceSkillCache.get(workspaceRoot) + if (cached?.rootsKey === rootsKey) { + return { skills: cached.skills, validationErrors: cached.validationErrors } + } + const loaded = roots.length > 0 + ? await discoverSkills({ ...this.config, roots }) + : { skills: [], validationErrors: [] } + this.workspaceSkillCache.set(workspaceRoot, { rootsKey, ...loaded }) + return loaded + } +} + +function renderCatalogInstruction(skills: LoadedSkill[], budget: number): string | undefined { + if (skills.length === 0) return undefined + const header = '## Skills\n' + + 'A skill is a reusable set of instructions stored on disk. The skills below ' + + 'are available in this workspace. When a user request matches one, read its ' + + '`SKILL.md` (the file path is listed) before acting, then follow it.' + const footer = '### How to use skills\n' + + '- A skill activates automatically when the user mentions it by id ' + + '(`$id`, `@id`, or `/skill:id`) or trips one of its triggers; its full ' + + 'instructions are then injected for that turn.\n' + + '- Otherwise, if a request clearly matches a skill above, call the ' + + '`load_skill` tool with its id to pull the full instructions, then follow ' + + 'them. (You can also read the listed file directly.)' + const lines: string[] = [] + let used = Buffer.byteLength(`${header}\n\n### Available skills\n\n${footer}`, 'utf8') + let dropped = 0 + for (const skill of skills) { + const desc = skill.description ? `: ${skill.description}` : '' + const line = `- ${skill.name} (${skill.id})${desc} (file: ${skill.entryPath})` + const cost = Buffer.byteLength(`${line}\n`, 'utf8') + if (used + cost > budget) { + dropped += 1 + continue + } + lines.push(line) + used += cost + } + if (lines.length === 0) return undefined + if (dropped > 0) { + lines.push(`- ...and ${dropped} more skill${dropped === 1 ? '' : 's'} omitted (catalog budget reached).`) + } + return `${header}\n\n### Available skills\n${lines.join('\n')}\n\n${footer}` +} + +function normalizeRoot(path: string | undefined): string { + const trimmed = path?.trim() + return trimmed ? resolve(trimmed) : '' +} + +function isSameOrInside(parent: string, target: string): boolean { + if (!parent || !target) return false + const rel = relative(parent, target) + return rel === '' || (!!rel && !rel.startsWith('..') && !isAbsolute(rel)) +} + +function skillVisibleForWorkspace( + skillRoot: string, + workspaceRoot: string, + knownWorkspaceRoots: string[] +): boolean { + const root = normalizeRoot(skillRoot) + if (workspaceRoot && isSameOrInside(workspaceRoot, root)) return true + const ownerWorkspace = knownWorkspaceRoots.find((candidate) => isSameOrInside(candidate, root)) + if (ownerWorkspace) return workspaceRoot !== '' && ownerWorkspace === workspaceRoot + if (workspaceRoot && looksLikeWorkspaceSkillRoot(root) && !isSameOrInside(workspaceRoot, root)) { + return false + } + return true +} + +function looksLikeWorkspaceSkillRoot(root: string): boolean { + const parts = root.split(/[\\/]+/) + if (parts.length < 2) return false + const tail2 = parts.slice(-2).join('/') + return tail2 === '.agents/skills' || tail2 === '.claude/skills' || tail2 === '.codex/skills' +} + +async function existingWorkspaceSkillRoots(workspaceRoot: string): Promise { + const roots: string[] = [] + for (const relativeDir of WORKSPACE_SKILL_RELATIVE_DIRS) { + const root = resolve(workspaceRoot, ...relativeDir.split('/')) + if (await exists(root)) roots.push(root) + } + return roots +} + +/** + * Per-call skill deny-list. Mirrors the global `disabledIds` discovery filter + * (slug both sides) but applies to a single resolveTurn/loadSkill call — e.g. a + * subagent profile that blocks specific skills — without mutating the shared + * runtime instance, so sibling children are unaffected. + */ +function filterBlockedSkills(skills: LoadedSkill[], blockedIds: readonly string[] | undefined): LoadedSkill[] { + if (!blockedIds || blockedIds.length === 0) return skills + // Normalize like loadSkillById's lookup (strip leading $/@ and a `skill:` + // prefix before slugging) so a `skill:gmail` / `$gmail` deny entry matches + // the discovered, slugged id. + const blocked = new Set(blockedIds.map((id) => slug(id.trim().replace(/^[$@]/, '').replace(/^skill:/i, '')))) + return skills.filter((skill) => !blocked.has(skill.id)) } async function discoverSkills(config: SkillsCapabilityConfig): Promise<{ @@ -305,6 +441,13 @@ async function discoverSkills(config: SkillsCapabilityConfig): Promise<{ }> { const skills: LoadedSkill[] = [] const validationErrors: Array<{ root: string; message: string }> = [] + // Skill ids the user disabled. Slug both sides so `gmail`, `Gmail`, and + // `skill:gmail` all match the discovered `slug(manifest.id)`. A disabled + // skill is dropped here at the single discovery chokepoint, so it stays out + // of the catalog, auto-match, load_skill, diagnostics, and counts alike. + const disabledIds = new Set((config.disabledIds ?? []).map(slug)) + + // Scan project roots (priority over global — loaded first) for (const rawRoot of config.roots) { const root = resolve(rawRoot) const candidates = await packageCandidates(root).catch((error) => { @@ -312,15 +455,34 @@ async function discoverSkills(config: SkillsCapabilityConfig): Promise<{ return [] }) for (const candidate of candidates) { - const loaded = await loadSkillPackage(candidate, config.legacySkillMd).catch((error) => { + const loaded = await loadSkillPackage(candidate, config.legacySkillMd, 'project').catch((error) => { validationErrors.push({ root: candidate, message: errorMessage(error) }) return null }) if (loaded) skills.push(loaded) } } + + // Scan global roots (#149: global skill loading fix) + const globalRoots = config.globalRoots ?? [] + for (const rawRoot of globalRoots) { + const root = resolve(rawRoot) + const candidates = await packageCandidates(root).catch((error) => { + validationErrors.push({ root, message: errorMessage(error) }) + return [] + }) + for (const candidate of candidates) { + const loaded = await loadSkillPackage(candidate, config.legacySkillMd, 'global').catch((error) => { + validationErrors.push({ root: candidate, message: errorMessage(error) }) + return null + }) + if (loaded) skills.push(loaded) + } + } + const unique = new Map() for (const skill of skills) { + if (disabledIds.has(skill.id)) continue if (!unique.has(skill.id)) unique.set(skill.id, skill) else validationErrors.push({ root: skill.root, message: `duplicate Skill id: ${skill.id}` }) } @@ -361,7 +523,7 @@ async function entryIsDirectory(entry: Dirent, path: string): Promise { } } -async function loadSkillPackage(root: string, allowLegacy: boolean): Promise { +async function loadSkillPackage(root: string, allowLegacy: boolean, source: 'project' | 'global'): Promise { const manifestPath = join(root, 'skill.json') if (await exists(manifestPath)) { const manifest = SkillManifest.parse(JSON.parse(await readFile(manifestPath, 'utf8'))) @@ -379,7 +541,8 @@ async function loadSkillPackage(root: string, allowLegacy: boolean): Promise resolve(root, asset)), priority: manifest.priority, - legacy: false + legacy: false, + source, } } if (!allowLegacy) return null @@ -401,7 +564,8 @@ async function loadSkillPackage(root: string, allowLegacy: boolean): Promise { + it('upgrades placeholder titles when titleAuto is absent (legacy)', () => { + expect(canUpgradeThreadTitle({ title: '新会话' })).toBe(true) + expect(canUpgradeThreadTitle({ title: 'New Thread' })).toBe(true) + expect(canUpgradeThreadTitle({ title: '' })).toBe(true) + expect(canUpgradeThreadTitle({ title: undefined })).toBe(true) + }) + + it('does NOT upgrade a real legacy title with no auto flag', () => { + expect(canUpgradeThreadTitle({ title: 'Fix the login bug' })).toBe(false) + }) + + it('upgrades a provisional (titleAuto:true) title even when it is a real string', () => { + // The renderer writes the raw first message with titleAuto:true; the backend + // must still be allowed to replace it with a summarized LLM title. + expect(canUpgradeThreadTitle({ title: '用三个subagent更我说你好', titleAuto: true })).toBe(true) + }) + + it('never upgrades a user-locked (titleAuto:false) title', () => { + expect(canUpgradeThreadTitle({ title: 'My pinned title', titleAuto: false })).toBe(false) + // titleAuto:false wins even over a placeholder-looking string. + expect(canUpgradeThreadTitle({ title: '新会话', titleAuto: false })).toBe(false) + }) +}) diff --git a/kun/tests/builtin-profiles.test.ts b/kun/tests/builtin-profiles.test.ts new file mode 100644 index 000000000..efaf41e13 --- /dev/null +++ b/kun/tests/builtin-profiles.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from 'vitest' +import { mergeBuiltinSubagentProfiles } from '../src/delegation/builtin-profiles.js' +import { SubagentsCapabilityConfig } from '../src/contracts/capabilities.js' + +describe('mergeBuiltinSubagentProfiles', () => { + it('deep-merges a thin GUI override onto a builtin, preserving its persona', () => { + // The GUI persists a builtin override carrying only the edited fields (here + // a tool policy + a deny-list); the builtin's promptPreamble/description + // must survive (a shallow replace would wipe them). + const config = SubagentsCapabilityConfig.parse({ + profiles: { general: { toolPolicy: 'inherit', blockedTools: ['bash'] } } + }) + const general = mergeBuiltinSubagentProfiles(config).profiles.general! + + // User fields win. + expect(general.toolPolicy).toBe('inherit') + expect(general.blockedTools).toEqual(['bash']) + // Builtin persona/description fall back instead of being clobbered. + expect(general.promptPreamble).toContain('通用代理') + expect(general.description).toBeTruthy() + // An un-overridden builtin is untouched. + expect(mergeBuiltinSubagentProfiles(config).profiles.explore!.promptPreamble).toContain('探索代理') + }) + + it('keeps user-only profiles alongside every builtin', () => { + const config = SubagentsCapabilityConfig.parse({ + profiles: { mine: { mode: 'subagent', toolPolicy: 'readOnly' } } + }) + const merged = mergeBuiltinSubagentProfiles(config) + expect(Object.keys(merged.profiles).sort()).toEqual( + ['design-reviewer', 'explore', 'general', 'mine', 'over-engineering-reviewer'].sort() + ) + }) +}) diff --git a/kun/tests/capability-registry.test.ts b/kun/tests/capability-registry.test.ts index af36dcea5..69aad95f4 100644 --- a/kun/tests/capability-registry.test.ts +++ b/kun/tests/capability-registry.test.ts @@ -138,4 +138,67 @@ describe('CapabilityRegistry', () => { ) expect(visible.map((entry) => entry.name)).toEqual(['vision_tool']) }) + + it('honors provider deny-lists (blockedProviderIds) at advertise and execute', async () => { + let executed = false + const tool = LocalToolHost.defineTool({ + name: 'mcp_github_create_issue', + description: 'create issue', + inputSchema: { type: 'object' }, + policy: 'auto', + execute: async () => { + executed = true + return { output: { ok: true } } + } + }) + const host = new LocalToolHost({ + registry: new CapabilityRegistry([ + { id: 'mcp:github', kind: 'mcp', enabled: true, available: true, tools: [tool] } + ]) + }) + + // No deny-list → the MCP tool is advertised. + expect((await host.listTools(buildContext())).map((entry) => entry.name)).toEqual(['mcp_github_create_issue']) + + // blockedProviderIds hides the whole server and rejects execution. + const blocked = buildContext({ blockedProviderIds: ['mcp:github'] }) + expect(await host.listTools(blocked)).toEqual([]) + await expect( + host.execute({ callId: 'call_1', toolName: 'mcp_github_create_issue', arguments: {} }, blocked) + ).rejects.toThrow(/not advertised/) + expect(executed).toBe(false) + }) + + it('honors tool-name deny-lists (blockedToolNames) while leaving sibling tools advertised', async () => { + let executed = false + const bash = LocalToolHost.defineTool({ + name: 'bash', + description: 'run', + inputSchema: { type: 'object' }, + policy: 'auto', + execute: async () => { + executed = true + return { output: { ok: true } } + } + }) + const read = LocalToolHost.defineTool({ + name: 'read', + description: 'read', + inputSchema: { type: 'object' }, + policy: 'auto', + execute: async () => ({ output: { ok: true } }) + }) + const host = new LocalToolHost({ + registry: new CapabilityRegistry([ + { id: 'builtin', kind: 'built-in', enabled: true, available: true, tools: [bash, read] } + ]) + }) + + const blocked = buildContext({ blockedToolNames: ['bash'] }) + expect((await host.listTools(blocked)).map((entry) => entry.name)).toEqual(['read']) + await expect( + host.execute({ callId: 'call_1', toolName: 'bash', arguments: {} }, blocked) + ).rejects.toThrow(/active tool policy/) + expect(executed).toBe(false) + }) }) diff --git a/kun/tests/child-agent-executor.test.ts b/kun/tests/child-agent-executor.test.ts index 7e6ffb1a5..75c6f191e 100644 --- a/kun/tests/child-agent-executor.test.ts +++ b/kun/tests/child-agent-executor.test.ts @@ -140,4 +140,204 @@ describe('child agent executor', () => { }) expect(result).toMatchObject({ prefixReused: true, inheritedHistoryItems: 0 }) }) + + it('threads the input providerId onto the child ModelRequest for routing', async () => { + const seen: ModelRequest[] = [] + const executor = createChildAgentExecutor({ + model: model([ + { kind: 'assistant_text_delta', text: 'ok' }, + { kind: 'completed', stopReason: 'stop' } + ], seen), + toolHost: new LocalToolHost({ registry: new CapabilityRegistry([]) }), + prefix: createImmutablePrefix({ systemPrompt: 'child system' }), + defaultModel: 'child-test', + nowIso: () => '2026-06-03T00:00:00.000Z' + }) + + await executor({ + childId: 'child_provider', + parentThreadId: 'thr_parent', + parentTurnId: 'turn_parent', + prompt: 'Route me', + providerId: 'minimax', + toolPolicy: 'inherit', + signal: new AbortController().signal + }) + + expect(seen[0]?.providerId).toBe('minimax') + }) + + it('gives an inherit child the parent agent full tool set (no forced read-only allowlist)', async () => { + const seen: ModelRequest[] = [] + const registry = new CapabilityRegistry([{ + id: 'builtin', + kind: 'built-in', + enabled: true, + available: true, + tools: buildDefaultLocalTools() + }]) + const executor = createChildAgentExecutor({ + model: model([ + { kind: 'assistant_text_delta', text: 'done' }, + { kind: 'completed', stopReason: 'stop' } + ], seen), + toolHost: new LocalToolHost({ registry }), + prefix: createImmutablePrefix({ systemPrompt: 'child system' }), + defaultModel: 'child-test', + nowIso: () => '2026-06-03T00:00:00.000Z' + }) + + await executor({ + childId: 'child_inherit', + parentThreadId: 'thr_parent', + parentTurnId: 'turn_parent', + prompt: 'Do the work', + toolPolicy: 'inherit', + signal: new AbortController().signal + }) + + const toolNames = (seen[0]?.tools ?? []).map((tool) => tool.name) + // The child sees write/shell tools (not just the read-only investigation + // set) because inherit applies no forced allow-list. + expect(toolNames).toContain('read') + expect(toolNames.length).toBeGreaterThan(4) + const restricted = new Set(['read', 'grep', 'find', 'ls']) + expect(toolNames.some((name) => !restricted.has(name))).toBe(true) + }) + + it('honors an explicit allowedTools list over the tool policy', async () => { + const seen: ModelRequest[] = [] + const registry = new CapabilityRegistry([{ + id: 'builtin', + kind: 'built-in', + enabled: true, + available: true, + tools: buildDefaultLocalTools() + }]) + const executor = createChildAgentExecutor({ + model: model([ + { kind: 'assistant_text_delta', text: 'done' }, + { kind: 'completed', stopReason: 'stop' } + ], seen), + toolHost: new LocalToolHost({ registry }), + prefix: createImmutablePrefix({ systemPrompt: 'child system' }), + defaultModel: 'child-test', + nowIso: () => '2026-06-03T00:00:00.000Z' + }) + + await executor({ + childId: 'child_tools', + parentThreadId: 'thr_parent', + parentTurnId: 'turn_parent', + prompt: 'Investigate', + // readOnly would allow read/grep/find/ls; the explicit list narrows it. + toolPolicy: 'readOnly', + allowedTools: ['read', 'grep'], + signal: new AbortController().signal + }) + + const toolNames = (seen[0]?.tools ?? []).map((tool) => tool.name).sort() + expect(toolNames).toEqual(['grep', 'read']) + }) + + it('drops blocked built-in tools (blockedTools) from an inherit child', async () => { + const seen: ModelRequest[] = [] + const registry = new CapabilityRegistry([{ + id: 'builtin', + kind: 'built-in', + enabled: true, + available: true, + tools: buildDefaultLocalTools() + }]) + const executor = createChildAgentExecutor({ + model: model([ + { kind: 'assistant_text_delta', text: 'done' }, + { kind: 'completed', stopReason: 'stop' } + ], seen), + toolHost: new LocalToolHost({ registry }), + prefix: createImmutablePrefix({ systemPrompt: 'child system' }), + defaultModel: 'child-test', + nowIso: () => '2026-06-03T00:00:00.000Z' + }) + + await executor({ + childId: 'child_blocked_tools', + parentThreadId: 'thr_parent', + parentTurnId: 'turn_parent', + prompt: 'Do the work', + toolPolicy: 'inherit', + blockedTools: ['bash', 'write'], + signal: new AbortController().signal + }) + + const toolNames = (seen[0]?.tools ?? []).map((tool) => tool.name) + expect(toolNames).toContain('read') + expect(toolNames).not.toContain('bash') + expect(toolNames).not.toContain('write') + }) + + it('maps blockedMcpServers to mcp: and hides that server tools from the child', async () => { + const seen: ModelRequest[] = [] + const mcpTool = LocalToolHost.defineTool({ + name: 'mcp_github_create_issue', + description: 'create issue', + inputSchema: { type: 'object' }, + policy: 'auto', + execute: async () => ({ output: { ok: true } }) + }) + const registry = new CapabilityRegistry([ + { id: 'builtin', kind: 'built-in', enabled: true, available: true, tools: buildDefaultLocalTools() }, + { id: 'mcp:github', kind: 'mcp', enabled: true, available: true, tools: [mcpTool] } + ]) + const executor = createChildAgentExecutor({ + model: model([ + { kind: 'assistant_text_delta', text: 'done' }, + { kind: 'completed', stopReason: 'stop' } + ], seen), + toolHost: new LocalToolHost({ registry }), + prefix: createImmutablePrefix({ systemPrompt: 'child system' }), + defaultModel: 'child-test', + nowIso: () => '2026-06-03T00:00:00.000Z' + }) + + await executor({ + childId: 'child_blocked_mcp', + parentThreadId: 'thr_parent', + parentTurnId: 'turn_parent', + prompt: 'Do the work', + toolPolicy: 'inherit', + blockedMcpServers: ['github'], + signal: new AbortController().signal + }) + + const toolNames = (seen[0]?.tools ?? []).map((tool) => tool.name) + expect(toolNames).toContain('read') + expect(toolNames).not.toContain('mcp_github_create_issue') + }) + + it('augments the base system prompt with the agent systemPrompt', async () => { + const seen: ModelRequest[] = [] + const executor = createChildAgentExecutor({ + model: model([ + { kind: 'assistant_text_delta', text: 'ok' }, + { kind: 'completed', stopReason: 'stop' } + ], seen), + toolHost: new LocalToolHost({ registry: new CapabilityRegistry([]) }), + prefix: createImmutablePrefix({ systemPrompt: 'BASE PROMPT' }), + defaultModel: 'child-test', + nowIso: () => '2026-06-03T00:00:00.000Z' + }) + + await executor({ + childId: 'child_sys', + parentThreadId: 'thr_parent', + parentTurnId: 'turn_parent', + prompt: 'Task', + systemPrompt: 'You are a careful reviewer.', + toolPolicy: 'inherit', + signal: new AbortController().signal + }) + + expect(seen[0]?.systemPrompt).toBe('BASE PROMPT\n\nYou are a careful reviewer.') + }) }) diff --git a/kun/tests/contracts.test.ts b/kun/tests/contracts.test.ts index 89017ba6a..0fcc07f1d 100644 --- a/kun/tests/contracts.test.ts +++ b/kun/tests/contracts.test.ts @@ -518,7 +518,7 @@ describe('cli', () => { expect('defaultStepLimit' in config.subagents).toBe(false) }) - it('parses subagent profiles and defaults the tool policy to read-only', () => { + it('parses subagent profiles and defaults the tool policy to inherit', () => { const config = KunCapabilitiesConfig.parse({ subagents: { enabled: true, @@ -527,15 +527,19 @@ describe('cli', () => { defaultProfile: 'reviewer', profiles: { reviewer: { model: 'deepseek-v4-pro', promptPreamble: 'Review for bugs.', toolPolicy: 'readOnly' }, - fixer: { toolPolicy: 'inherit' } + fixer: { toolPolicy: 'inherit' }, + helper: {} } } }) - expect(config.subagents.defaultToolPolicy).toBe('readOnly') + // Default subagent policy follows the main agent (inherit), not read-only. + expect(config.subagents.defaultToolPolicy).toBe('inherit') expect(config.subagents.defaultProfile).toBe('reviewer') + // Explicit per-profile policy still wins over the inherit default. expect(config.subagents.profiles.reviewer).toMatchObject({ model: 'deepseek-v4-pro', toolPolicy: 'readOnly' }) - // Profiles default toolPolicy to readOnly when omitted. expect(config.subagents.profiles.fixer.toolPolicy).toBe('inherit') + // Profiles default toolPolicy to inherit when omitted. + expect(config.subagents.profiles.helper.toolPolicy).toBe('inherit') }) it('rejects a defaultProfile that is not defined in profiles', () => { @@ -561,7 +565,7 @@ describe('cli', () => { expect(manifest.subagents).toMatchObject({ maxParallel: 2, maxChildRuns: 6, - defaultToolPolicy: 'readOnly', + defaultToolPolicy: 'inherit', defaultProfile: 'reviewer', profiles: [{ name: 'reviewer', model: 'deepseek-v4-pro', toolPolicy: 'readOnly' }] }) diff --git a/kun/tests/delegation-runtime.test.ts b/kun/tests/delegation-runtime.test.ts index 5db6d46cc..967314195 100644 --- a/kun/tests/delegation-runtime.test.ts +++ b/kun/tests/delegation-runtime.test.ts @@ -146,15 +146,15 @@ describe('DelegationRuntime', () => { await expect(blocking).resolves.toMatchObject({ status: 'completed' }) }) - it('resolves a profile to model, preamble, and tool policy', async () => { - const seen: Array<{ model?: string; promptPreamble?: string; toolPolicy: string }> = [] + it('resolves a profile to model, provider, preamble, and tool policy', async () => { + const seen: Array<{ model?: string; providerId?: string; promptPreamble?: string; toolPolicy: string }> = [] const runtime = createRuntime({ defaultProfile: 'reviewer', profiles: { - reviewer: { model: 'deepseek-v4-pro', promptPreamble: 'Review for bugs.', toolPolicy: 'readOnly' } + reviewer: { model: 'deepseek-v4-pro', providerId: 'minimax', promptPreamble: 'Review for bugs.', toolPolicy: 'readOnly' } }, executor: async (input) => { - seen.push({ model: input.model, promptPreamble: input.promptPreamble, toolPolicy: input.toolPolicy }) + seen.push({ model: input.model, providerId: input.providerId, promptPreamble: input.promptPreamble, toolPolicy: input.toolPolicy }) return { summary: 'reviewed', toolInvocations: 2, prefixReused: true, inheritedHistoryItems: 0 } } }) @@ -164,17 +164,79 @@ describe('DelegationRuntime', () => { prompt: 'check the diff', signal: new AbortController().signal }) - expect(seen[0]).toMatchObject({ model: 'deepseek-v4-pro', promptPreamble: 'Review for bugs.', toolPolicy: 'readOnly' }) + expect(seen[0]).toMatchObject({ model: 'deepseek-v4-pro', providerId: 'minimax', promptPreamble: 'Review for bugs.', toolPolicy: 'readOnly' }) expect(record).toMatchObject({ profile: 'reviewer', toolPolicy: 'readOnly', model: 'deepseek-v4-pro', + providerId: 'minimax', toolInvocations: 2, prefixReused: true, inheritedHistoryItems: 0 }) }) + it('threads a profile\'s blocked tool/MCP/skill deny-lists to the child executor', async () => { + const seen: Array<{ blockedTools?: string[]; blockedMcpServers?: string[]; blockedSkills?: string[] }> = [] + const runtime = createRuntime({ + defaultProfile: 'scoped', + profiles: { + scoped: { + toolPolicy: 'inherit', + blockedTools: ['bash', 'write'], + blockedMcpServers: ['github'], + blockedSkills: ['deep-research'] + } + }, + executor: async (input) => { + seen.push({ + blockedTools: input.blockedTools, + blockedMcpServers: input.blockedMcpServers, + blockedSkills: input.blockedSkills + }) + return { summary: 'ok' } + } + }) + await runtime.runChild({ + parentThreadId: 'thr_1', + parentTurnId: 'turn_1', + prompt: 'go', + signal: new AbortController().signal + }) + expect(seen[0]).toEqual({ + blockedTools: ['bash', 'write'], + blockedMcpServers: ['github'], + blockedSkills: ['deep-research'] + }) + }) + + it('routes a child through an explicit providerId, overriding the profile, and surfaces it on the event', async () => { + const sessionStore = new InMemorySessionStore() + const seen: Array<{ providerId?: string }> = [] + const runtime = createRuntime({ + sessionStore, + defaultProfile: 'reviewer', + profiles: { reviewer: { providerId: 'minimax', toolPolicy: 'readOnly' } }, + executor: async (input) => { + seen.push({ providerId: input.providerId }) + return { summary: 'ok' } + } + }) + // An explicit providerId on the call wins over the profile's providerId. + const record = await runtime.runChild({ + parentThreadId: 'thr_1', + parentTurnId: 'turn_1', + prompt: 'go', + providerId: 'anthropic', + signal: new AbortController().signal + }) + expect(seen[0]?.providerId).toBe('anthropic') + expect(record.providerId).toBe('anthropic') + const events = await sessionStore.loadEventsSince('thr_1', 0) + const completed = events.find((event) => event.child?.childId === record.id && event.child.childStatus === 'completed') + expect(completed?.child?.childProviderId).toBe('anthropic') + }) + it('rejects an unknown profile name', async () => { const runtime = createRuntime({ profiles: { reviewer: { toolPolicy: 'readOnly' } } }) await expect(runtime.runChild({ @@ -186,7 +248,7 @@ describe('DelegationRuntime', () => { })).rejects.toThrow(/unknown subagent profile/) }) - it('defaults the tool policy to read-only when no profile resolves', async () => { + it('defaults the tool policy to inherit (follow the main agent) when no profile resolves', async () => { const seen: string[] = [] const runtime = createRuntime({ executor: async (input) => { @@ -200,6 +262,25 @@ describe('DelegationRuntime', () => { prompt: 'investigate', signal: new AbortController().signal }) + expect(seen[0]).toBe('inherit') + expect(record.toolPolicy).toBe('inherit') + }) + + it('still honors an explicit read-only default tool policy', async () => { + const seen: string[] = [] + const runtime = createRuntime({ + defaultToolPolicy: 'readOnly', + executor: async (input) => { + seen.push(input.toolPolicy) + return { summary: 'ok' } + } + }) + const record = await runtime.runChild({ + parentThreadId: 'thr_1', + parentTurnId: 'turn_1', + prompt: 'investigate', + signal: new AbortController().signal + }) expect(seen[0]).toBe('readOnly') expect(record.toolPolicy).toBe('readOnly') }) @@ -233,8 +314,71 @@ describe('DelegationRuntime', () => { prefixReused: true, totalTokens: 3, cacheHitRate: 0.5, - childToolPolicy: 'readOnly' + childToolPolicy: 'inherit' + }) + }) + + it('returns immediately when detach=true and keeps executing in the background', async () => { + const start = deferred() + const release = deferred() + const runtime = createRuntime({ + executor: async () => { + start.resolve() + await release.promise + return { summary: 'background done' } + } + }) + const queued = await runtime.runChild({ + parentThreadId: 'thr_detach', + parentTurnId: 'turn_detach', + prompt: 'long running task', + detach: true, + signal: new AbortController().signal + }) + // Immediately returns with status 'queued' — synchronous runs would + // have returned 'completed' here. + expect(queued.status).toBe('queued') + // The executor actually runs in the background. + await start.promise + let diagnostics = await runtime.diagnostics('thr_detach') + expect(diagnostics.childRuns[0]?.status).toBe('running') + // Release the executor and wait for the record to flip to completed. + release.resolve() + await waitFor(async () => { + diagnostics = await runtime.diagnostics('thr_detach') + return diagnostics.childRuns[0]?.status === 'completed' + }) + expect(diagnostics.childRuns[0]?.summary).toBe('background done') + }) + + it('abortChild signals a detached run and false-returns for unknown ids', async () => { + const start = deferred() + const runtime = createRuntime({ + executor: async ({ signal }) => { + start.resolve() + await new Promise((resolve, reject) => { + signal.addEventListener('abort', () => reject(new Error('aborted'))) + }) + return { summary: 'unreachable' } + } + }) + const queued = await runtime.runChild({ + parentThreadId: 'thr_abort', + parentTurnId: 'turn_abort', + prompt: 'long task', + detach: true, + signal: new AbortController().signal + }) + await start.promise + expect(runtime.abortChild(queued.id)).toBe(true) + await waitFor(async () => { + const diagnostics = await runtime.diagnostics('thr_abort') + return diagnostics.childRuns[0]?.status === 'aborted' }) + // After the run finished the controller is cleaned up via .finally. + // Poll because the cleanup runs in a microtask after the run resolves. + await waitFor(() => runtime.abortChild(queued.id) === false) + expect(runtime.abortChild('child_unknown')).toBe(false) }) it('aggregates child runs by label and model for dashboards', async () => { @@ -301,7 +445,7 @@ describe('DelegationRuntime', () => { maxChildRuns?: number defaultToolPolicy?: 'readOnly' | 'inherit' defaultProfile?: string - profiles?: Record + profiles?: Record sessionStore?: InMemorySessionStore executor?: ConstructorParameters[0]['executor'] recordExternalUsage?: ConstructorParameters[0]['recordExternalUsage'] diff --git a/kun/tests/domain.test.ts b/kun/tests/domain.test.ts index fb865273c..c8fde108e 100644 --- a/kun/tests/domain.test.ts +++ b/kun/tests/domain.test.ts @@ -64,12 +64,14 @@ describe('domain.thread', () => { workspace: '/tmp', model: 'deepseek-chat', approvalPolicy: 'on-request', - sandboxMode: 'read-only' + sandboxMode: 'read-only', + pinned: true }) const summary = toThreadSummary(thread) expect(summary).not.toHaveProperty('turns') expect(summary.approvalPolicy).toBe('on-request') expect(summary.sandboxMode).toBe('read-only') + expect(summary.pinned).toBe(true) }) }) diff --git a/kun/tests/http-server.test.ts b/kun/tests/http-server.test.ts index fb64100bf..3868a3b9d 100644 --- a/kun/tests/http-server.test.ts +++ b/kun/tests/http-server.test.ts @@ -1,7 +1,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { mkdtemp, rm } from 'node:fs/promises' import { tmpdir } from 'node:os' -import { join } from 'node:path' +import { join, resolve } from 'node:path' import { dispatchRequest } from '../src/server/http-server.js' import { createApprovalRequest } from '../src/domain/approval.js' import { makeAssistantTextItem, makeToolCallItem, makeToolResultItem } from '../src/domain/item.js' @@ -116,6 +116,7 @@ describe('HTTP server', () => { skills: { enabled: false, roots: [], + globalRoots: [], skills: [], validationErrors: [], lastActivations: [] @@ -174,6 +175,7 @@ describe('HTTP server', () => { h.runtime.skills = () => ({ enabled: true, roots: ['/tmp/skills'], + globalRoots: [], skills: [ { id: 'review', @@ -181,6 +183,7 @@ describe('HTTP server', () => { description: 'Review the current change', version: '1.0.0', root: '/tmp/skills/review', + source: 'project' as const, legacy: false, triggers: { commands: ['/review'], promptPatterns: [], fileTypes: [] }, allowedTools: ['read'] @@ -857,6 +860,30 @@ describe('HTTP server', () => { const body = (await readJson(decide)) as { decision: string } expect(body.decision).toBe('allow') await expect(pending).resolves.toBe('allow') + + const replay = await dispatchRequest( + h.router, + new Request('http://localhost/v1/approvals/appr_1', { + method: 'POST', + headers: { authorization: 'Bearer tok-1', 'content-type': 'application/json' }, + body: JSON.stringify({ decision: 'allow' }) + }) + ) + expect(replay.status).toBe(200) + expect(await readJson(replay)).toMatchObject({ + decision: 'allow', + alreadyResolved: true + }) + + const conflict = await dispatchRequest( + h.router, + new Request('http://localhost/v1/approvals/appr_1', { + method: 'POST', + headers: { authorization: 'Bearer tok-1', 'content-type': 'application/json' }, + body: JSON.stringify({ decision: 'deny' }) + }) + ) + expect(conflict.status).toBe(409) }) it('resolves GUI user input through both HTTP compatibility endpoints', async () => { @@ -1269,6 +1296,6 @@ describe('HTTP server', () => { ) expect(response.status).toBe(200) const body = (await readJson(response)) as { path: string } - expect(body.path).toBe('/tmp') + expect(body.path).toBe(resolve('/tmp')) }) }) diff --git a/kun/tests/loop.test.ts b/kun/tests/loop.test.ts index 5fc68c2ad..e04f96cf0 100644 --- a/kun/tests/loop.test.ts +++ b/kun/tests/loop.test.ts @@ -10,6 +10,7 @@ import { GET_GOAL_TOOL_NAME, UPDATE_GOAL_TOOL_NAME } from '../src/adapters/tool/ import { FileThreadStore, FileSessionStore } from '../src/adapters/file/index.js' import { RuntimeEventRecorder } from '../src/services/runtime-event-recorder.js' import { ContextCompactor } from '../src/loop/context-compactor.js' +import { COMPACTION_SYSTEM_PROMPT } from '../src/loop/compaction-summary.js' import { effectiveHistoryAfterLatestCompaction } from '../src/loop/compaction-history.js' import { resolveModelContextProfile } from '../src/loop/model-context-profile.js' import { isPlanClarifyingQuestion } from '../src/loop/agent-loop.js' @@ -2347,7 +2348,7 @@ describe('AgentLoop', () => { async *stream(request: ModelRequest): AsyncIterable { requests.push(request) const isSummaryRequest = request.tools.length === 0 && - request.contextInstructions?.some((text) => text.includes('history fold')) + request.systemPrompt === COMPACTION_SYSTEM_PROMPT if (isSummaryRequest) { yield { kind: 'usage', @@ -2398,23 +2399,25 @@ describe('AgentLoop', () => { const status = await h.loop.runTurn(h.threadId, h.turnId) const [summaryRequest, mainRequest] = requests if (!summaryRequest || !mainRequest) throw new Error('expected summary and main model requests') - const summaryPromptItem = summaryRequest.history[0] + const summaryContinuation = summaryRequest.history[summaryRequest.history.length - 1] const persisted = await h.sessionStore.loadItems(h.threadId) const persistedSummary = persisted.find((item) => item.kind === 'compaction') const mainSummary = mainRequest.history.find((item) => item.kind === 'compaction') expect(status).toBe('completed') expect(requests).toHaveLength(2) - expect(summaryRequest.systemPrompt).toBe('be brief') - expect(summaryRequest.prefix).toBe(h.prefix.fewShots) + // Compaction-mode turn: dedicated summarizer system prompt, no main prefix, + // and the real conversation fed as messages with a free-form continuation. + expect(summaryRequest.systemPrompt).toBe(COMPACTION_SYSTEM_PROMPT) + expect(summaryRequest.prefix).toEqual([]) expect(summaryRequest.tools).toEqual([]) expect(summaryRequest.maxTokens).toBe(333) expect(summaryRequest.temperature).toBe(0) expect(summaryRequest.reasoningEffort).toBe('off') - expect(summaryRequest.contextInstructions?.join('\n')).toContain('history fold') - expect(summaryPromptItem?.kind).toBe('user_message') - expect(summaryPromptItem?.kind === 'user_message' ? summaryPromptItem.text : '') - .toContain('Conversation history to fold') + expect(summaryRequest.history.some((item) => item.id === 'model_summary_hist_0')).toBe(true) + expect(summaryContinuation?.kind).toBe('user_message') + expect(summaryContinuation?.kind === 'user_message' ? summaryContinuation.text : '') + .toContain('Provide a detailed summary of our conversation above') expect(mainSummary?.kind === 'compaction' ? mainSummary.summary : '') .toContain('Model summary: preserve alpha.txt') expect(persistedSummary?.kind === 'compaction' ? persistedSummary.summary : '') @@ -2430,7 +2433,7 @@ describe('AgentLoop', () => { async *stream(request: ModelRequest): AsyncIterable { requests.push(request) const isSummaryRequest = request.tools.length === 0 && - request.contextInstructions?.some((text) => text.includes('history fold')) + request.systemPrompt === COMPACTION_SYSTEM_PROMPT if (isSummaryRequest) { yield { kind: 'error', message: 'summary model unavailable', code: 'summary_down' } return diff --git a/kun/tests/mcp-config.test.ts b/kun/tests/mcp-config.test.ts index e73958aff..4d7e35780 100644 --- a/kun/tests/mcp-config.test.ts +++ b/kun/tests/mcp-config.test.ts @@ -10,6 +10,7 @@ describe('MCP config', () => { const server = McpServerConfig.parse({ transport: 'stdio', command: 'node', + cwd: '/tmp/project', args: ['server.js'], env: { API_KEY: 'secret' }, trustScope: 'workspace', @@ -18,6 +19,7 @@ describe('MCP config', () => { expect(server.enabled).toBe(true) expect(server.transport).toBe('stdio') + expect(server.cwd).toBe('/tmp/project') expect(server.timeoutMs).toBe(30_000) }) diff --git a/kun/tests/mcp-tool-provider.test.ts b/kun/tests/mcp-tool-provider.test.ts index 3fe4ad33c..c0b647171 100644 --- a/kun/tests/mcp-tool-provider.test.ts +++ b/kun/tests/mcp-tool-provider.test.ts @@ -7,6 +7,7 @@ import { formatMcpConnectionError, isMcpServerTrusted, normalizeMcpToolName, + resolveMcpServerCwd, type McpClientLike } from '../src/adapters/tool/mcp-tool-provider.js' import { REDACTED_SECRET } from '../src/config/secret-redaction.js' @@ -138,6 +139,26 @@ describe('MCP tool provider', () => { expect(isMcpServerTrusted(server, '/tmp/other')).toBe(false) }) + it('resolves stdio MCP working directories from explicit config or trusted workspace fallback', () => { + const base = { + enabled: true, + transport: 'stdio', + command: 'node', + args: [], + url: undefined, + headers: {}, + env: {}, + trustScope: 'workspace', + trustedWorkspaceRoots: ['/tmp/project'], + timeoutMs: 30_000 + } satisfies McpServerConfig + + expect(resolveMcpServerCwd({ ...base, cwd: '/tmp/explicit' })).toBe('/tmp/explicit') + expect(resolveMcpServerCwd(base)).toBe('/tmp/project') + expect(resolveMcpServerCwd({ ...base, trustScope: 'user', trustedWorkspaceRoots: [] })).toBeUndefined() + expect(resolveMcpServerCwd({ ...base, transport: 'streamable-http', url: 'https://mcp.example.test' })).toBeUndefined() + }) + it('builds registry providers from connected MCP clients and executes tools', async () => { const config = KunCapabilitiesConfig.parse({ mcp: { diff --git a/kun/tests/mcp-tool-search.test.ts b/kun/tests/mcp-tool-search.test.ts new file mode 100644 index 000000000..3a9c56caa --- /dev/null +++ b/kun/tests/mcp-tool-search.test.ts @@ -0,0 +1,107 @@ +import { describe, expect, it } from 'vitest' +import { CapabilityRegistry } from '../src/adapters/tool/capability-registry.js' +import { LocalToolHost } from '../src/adapters/tool/local-tool-host.js' +import { + createMcpSearchProvider, + type McpSearchCatalogRecord +} from '../src/adapters/tool/mcp-tool-search.js' +import { KunCapabilitiesConfig, McpServerConfig } from '../src/contracts/capabilities.js' +import type { ToolHostContext } from '../src/ports/tool-host.js' + +const SEARCH_CONFIG = KunCapabilitiesConfig.parse({ + mcp: { enabled: true, search: { enabled: true, mode: 'search' } } +}).mcp.search + +const SERVER = McpServerConfig.parse({ transport: 'stdio', command: 'noop', trustScope: 'user' }) + +function record(serverId: string, toolName: string, calls: string[]): McpSearchCatalogRecord { + return { + toolId: `${serverId}/${toolName}`, + serverId, + server: SERVER, + client: { + async callTool(input) { + calls.push(`${serverId}/${input.name}`) + return { ok: true } + } + }, + descriptor: { name: toolName, description: `${toolName} on ${serverId}` }, + normalizedName: `mcp_${serverId}_${toolName}`, + policy: 'auto' + } +} + +function ctx(overrides: Partial = {}): ToolHostContext { + return { + threadId: 't', + turnId: 'u', + workspace: '/ws', + approvalPolicy: 'auto', + abortSignal: new AbortController().signal, + awaitApproval: async () => 'allow', + ...overrides + } +} + +describe('MCP search provider honors blockedProviderIds', () => { + it('hides a blocked server from mcp_search/mcp_describe/mcp_call but leaves others reachable', async () => { + const calls: string[] = [] + const records = [record('github', 'create_issue', calls), record('files', 'read_file', calls)] + const host = new LocalToolHost({ + registry: new CapabilityRegistry([ + createMcpSearchProvider({ + config: SEARCH_CONFIG, + state: { records }, + refreshCatalog: async () => records, + isServerTrusted: () => true + }) + ]) + }) + const blocked = ctx({ blockedProviderIds: ['mcp:github'] }) + + // mcp_search must not surface the blocked server's tool, and reports a + // reduced searched-tools count. + const search = await host.execute( + { callId: 'c0', toolName: 'mcp_search', arguments: { query: 'create issue github' } }, + blocked + ) + expect(search.item.kind).toBe('tool_result') + if (search.item.kind === 'tool_result') { + const output = search.item.output as { searchedTools: number; results: Array<{ serverId: string }> } + expect(output.searchedTools).toBe(1) + expect(output.results.every((r) => r.serverId !== 'github')).toBe(true) + } + + // mcp_describe (schema disclosure) is refused for the blocked server. + const describeBlocked = await host.execute( + { callId: 'c1', toolName: 'mcp_describe', arguments: { toolId: 'github/create_issue' } }, + blocked + ) + expect(describeBlocked.item).toMatchObject({ kind: 'tool_result', isError: true }) + + // mcp_call (execution) is refused AND the underlying client is never invoked. + const callBlocked = await host.execute( + { callId: 'c2', toolName: 'mcp_call', arguments: { toolId: 'github/create_issue', arguments: {} } }, + blocked + ) + expect(callBlocked.item).toMatchObject({ kind: 'tool_result', isError: true }) + expect(calls).toEqual([]) + + // A non-blocked server still works. + const callOk = await host.execute( + { callId: 'c3', toolName: 'mcp_call', arguments: { toolId: 'files/read_file', arguments: {} } }, + blocked + ) + expect(callOk.item).toMatchObject({ kind: 'tool_result', isError: false }) + expect(calls).toEqual(['files/read_file']) + + // Without a deny-list the blocked server is reachable again (the deny is + // per-turn, not baked into the catalog). + const callDefault = await host.execute( + { callId: 'c4', toolName: 'mcp_call', arguments: { toolId: 'github/create_issue', arguments: {} } }, + ctx() + ) + expect(callDefault.item).toMatchObject({ kind: 'tool_result', isError: false }) + expect(calls).toEqual(['files/read_file', 'github/create_issue']) + }) +}) diff --git a/kun/tests/model-client.test.ts b/kun/tests/model-client.test.ts index 1ef1350e9..9da15a53c 100644 --- a/kun/tests/model-client.test.ts +++ b/kun/tests/model-client.test.ts @@ -512,7 +512,9 @@ describe('CompatModelClient', () => { }) expect(sentBodies[0]).toMatchObject({ model: 'deepseek-chat', - max_tokens: 4096, + // Non-reasoning messages default (raised from 4096 so reasoning models + // don't truncate their tool calls; this model has no reasoning metadata). + max_tokens: 8192, system: [{ type: 'text', text: 'You are a helpful assistant.', diff --git a/kun/tests/skill-runtime.test.ts b/kun/tests/skill-runtime.test.ts index 110dd49e8..804dccd1f 100644 --- a/kun/tests/skill-runtime.test.ts +++ b/kun/tests/skill-runtime.test.ts @@ -109,7 +109,8 @@ describe('SkillRuntime', () => { const runtime = await createRuntime() expect(runtime.diagnostics().skills.map((skill) => skill.id)).toContain('linked') - expect(runtime.resolveTurn({ prompt: '/linked go', workspace: root }).activeSkillIds).toEqual(['linked']) + await expect(runtime.resolveTurn({ prompt: '/linked go', workspace: root })) + .resolves.toMatchObject({ activeSkillIds: ['linked'] }) } finally { await rm(realDir, { recursive: true, force: true }) } @@ -129,7 +130,7 @@ describe('SkillRuntime', () => { }, 'small instructions') const runtime = await createRuntime({ instructionBudgetBytes: 600 }) - const resolution = runtime.resolveTurn({ + const resolution = await runtime.resolveTurn({ prompt: 'Please handle TypeScript in src/app.ts', workspace: root }) @@ -157,6 +158,60 @@ describe('SkillRuntime', () => { expect(catalog).toContain('### How to use skills') }) + it('scopes the dynamic skill catalog to the current workspace', async () => { + const workspaceA = await mkdtemp(join(tmpdir(), 'kun-skill-workspace-a-')) + const workspaceB = await mkdtemp(join(tmpdir(), 'kun-skill-workspace-b-')) + try { + const rootA = join(workspaceA, '.agents', 'skills') + const rootB = join(workspaceB, '.agents', 'skills') + await writeSkillAt(rootA, 'alpha', { + id: 'alpha', + name: 'Alpha', + triggers: { commands: ['/alpha'] } + }, 'alpha instructions') + await writeSkillAt(rootB, 'beta', { + id: 'beta', + name: 'Beta', + triggers: { commands: ['/beta'] } + }, 'beta instructions') + + const config = KunCapabilitiesConfig.parse({ + skills: { + enabled: true, + roots: [rootA, rootB], + workspaceRoots: [workspaceA, workspaceB], + legacySkillMd: true + } + }) + const runtime = await SkillRuntime.create(config.skills) + const workspaceAResolution = await runtime.resolveTurn({ prompt: '/alpha run', workspace: workspaceA }) + const workspaceBResolution = await runtime.resolveTurn({ prompt: '/beta run', workspace: workspaceB }) + + expect(workspaceAResolution.catalogInstruction).toContain('Alpha') + expect(workspaceAResolution.catalogInstruction).not.toContain('Beta') + expect(workspaceAResolution.activeSkillIds).toEqual(['alpha']) + expect(workspaceBResolution.catalogInstruction).toContain('Beta') + expect(workspaceBResolution.catalogInstruction).not.toContain('Alpha') + expect(workspaceBResolution.activeSkillIds).toEqual(['beta']) + + const disabledConfig = KunCapabilitiesConfig.parse({ + skills: { + enabled: true, + roots: [rootA], + workspaceRoots: [workspaceA, workspaceB], + legacySkillMd: true + } + }) + const disabledRuntime = await SkillRuntime.create(disabledConfig.skills) + const disabledResolution = await disabledRuntime.resolveTurn({ prompt: '/beta run', workspace: workspaceB }) + expect(disabledResolution.catalogInstruction).toBeUndefined() + expect(disabledResolution.activeSkillIds).toEqual([]) + } finally { + await rm(workspaceA, { recursive: true, force: true }) + await rm(workspaceB, { recursive: true, force: true }) + } + }) + it('truncates the catalog when the byte budget is exceeded', async () => { await writeSkill('one', { id: 'one', name: 'One', description: 'd'.repeat(400) }, 'b') await writeSkill('two', { id: 'two', name: 'Two', description: 'd'.repeat(400) }, 'b') @@ -168,7 +223,7 @@ describe('SkillRuntime', () => { }) it('returns no catalog when skills are disabled', async () => { - const runtime = await SkillRuntime.create({ enabled: false, roots: [], legacySkillMd: true }) + const runtime = await SkillRuntime.create({ enabled: false, roots: [], workspaceRoots: [], globalRoots: [], disabledIds: [], legacySkillMd: true }) expect(runtime.catalogInstruction()).toBeUndefined() }) @@ -182,7 +237,7 @@ describe('SkillRuntime', () => { const runtime = await createRuntime() for (const ref of ['gamma', '$gamma', '@gamma', 'skill:gamma']) { - const result = runtime.loadSkillById(ref) + const result = await runtime.loadSkillById(ref) expect('error' in result).toBe(false) if ('error' in result) continue expect(result.skillId).toBe('gamma') @@ -197,16 +252,79 @@ describe('SkillRuntime', () => { await writeSkill('known', { id: 'known', name: 'Known' }, 'body') const runtime = await createRuntime() - const result = runtime.loadSkillById('does-not-exist') + const result = await runtime.loadSkillById('does-not-exist') expect('error' in result).toBe(true) if ('error' in result) expect(result.error).toContain('known') }) + it('excludes disabledIds from catalog, matching, load, and diagnostics', async () => { + await writeSkill('gmail', { + id: 'gmail', + name: 'Gmail', + triggers: { commands: ['/gmail'] } + }, 'gmail body') + await writeSkill('keeper', { + id: 'keeper', + name: 'Keeper', + triggers: { commands: ['/keeper'] } + }, 'keeper body') + // Mixed-case / prefixed forms must still match the slugged discovered id. + const runtime = await createRuntime({}, { disabledIds: ['Gmail', 'skill:nonexistent'] }) + + // diagnostics + count + expect(runtime.diagnostics().skills.map((skill) => skill.id)).toEqual(['keeper']) + expect(runtime.count()).toBe(1) + + // global catalog + expect(runtime.catalogInstruction()).not.toContain('Gmail') + expect(runtime.catalogInstruction()).toContain('Keeper') + + // per-turn catalog + auto-match (command trigger) + const resolution = await runtime.resolveTurn({ prompt: '/gmail send', workspace: root }) + expect(resolution.catalogInstruction).not.toContain('Gmail') + expect(resolution.activeSkillIds).not.toContain('gmail') + + // load_skill + const result = await runtime.loadSkillById('gmail') + expect('error' in result).toBe(true) + }) + + it('excludes per-call blockedSkillIds from resolveTurn + load_skill without mutating siblings', async () => { + await writeSkill('gmail', { + id: 'gmail', + name: 'Gmail', + triggers: { commands: ['/gmail'] } + }, 'gmail body') + await writeSkill('keeper', { + id: 'keeper', + name: 'Keeper', + triggers: { commands: ['/keeper'] } + }, 'keeper body') + const runtime = await createRuntime() + + // A child whose profile blocks gmail: hidden from the per-turn catalog and + // auto-match. The `skill:`-prefixed + mixed-case form must still match the + // slugged discovered id (mirrors load_skill's accepted forms). + const blocked = await runtime.resolveTurn({ prompt: '/gmail send', workspace: root, blockedSkillIds: ['skill:Gmail'] }) + expect(blocked.catalogInstruction).not.toContain('Gmail') + expect(blocked.catalogInstruction).toContain('Keeper') + expect(blocked.activeSkillIds).not.toContain('gmail') + // load_skill rejects the blocked id for that child. + expect('error' in (await runtime.loadSkillById('gmail', root, ['gmail']))).toBe(true) + + // The shared runtime is NOT mutated: a sibling call with no deny-list still + // sees gmail in the catalog, auto-activates it, and can load it. + const sibling = await runtime.resolveTurn({ prompt: '/gmail send', workspace: root }) + expect(sibling.catalogInstruction).toContain('Gmail') + expect(sibling.activeSkillIds).toContain('gmail') + expect('error' in (await runtime.loadSkillById('gmail', root))).toBe(false) + }) + it('truncates an oversized skill body to the instruction budget on load', async () => { await writeSkill('huge', { id: 'huge', name: 'Huge' }, 'z'.repeat(5_000)) const runtime = await createRuntime({ instructionBudgetBytes: 1_000 }) - const result = runtime.loadSkillById('huge') + const result = await runtime.loadSkillById('huge') expect('error' in result).toBe(false) if ('error' in result) return expect(result.truncated).toBe(true) @@ -228,7 +346,7 @@ describe('SkillRuntime', () => { allowedTools: ['bash'] }, 'Use bash') const runtime = await createRuntime() - const resolution = runtime.resolveTurn({ + const resolution = await runtime.resolveTurn({ prompt: '/readonly inspect', workspace: root }) @@ -283,7 +401,8 @@ describe('SkillRuntime', () => { await runtime.refresh() expect(runtime.count()).toBe(1) - expect(runtime.resolveTurn({ prompt: '/new run', workspace: root }).activeSkillIds).toEqual(['new']) + await expect(runtime.resolveTurn({ prompt: '/new run', workspace: root })) + .resolves.toMatchObject({ activeSkillIds: ['new'] }) }) it('injects active Skills into AgentLoop context and turn metadata', async () => { @@ -326,19 +445,24 @@ describe('SkillRuntime', () => { await h.loop.runTurn(h.threadId, h.turnId) - expect(seenRequest?.contextInstructions?.[0]).toContain('Always inspect the diff first.') + expect(seenRequest?.contextInstructions?.join('\n')).toContain('Always inspect the diff first.') expect(seenRequest?.tools.map((tool) => tool.name)).toEqual(['read']) const turn = await h.turns.getTurn(h.threadId, h.turnId) expect(turn?.activeSkillIds).toEqual(['review']) expect(turn?.skillInjectionBytes).toBeGreaterThan(0) }) - async function createRuntime(options: Parameters[1] = {}) { + async function createRuntime( + options: Parameters[1] = {}, + skillsOverride: Record = {} + ) { const config = KunCapabilitiesConfig.parse({ skills: { enabled: true, roots: [root], - legacySkillMd: true + workspaceRoots: [], + legacySkillMd: true, + ...skillsOverride } }) return SkillRuntime.create(config.skills, options) @@ -349,7 +473,16 @@ describe('SkillRuntime', () => { manifest: Record, entry: string ): Promise { - const dir = join(root, folder) + await writeSkillAt(root, folder, manifest, entry) + } + + async function writeSkillAt( + parentRoot: string, + folder: string, + manifest: Record, + entry: string + ): Promise { + const dir = join(parentRoot, folder) await mkdir(dir, { recursive: true }) const entryName = typeof manifest.entry === 'string' ? manifest.entry : 'SKILL.md' await writeFile(join(dir, 'skill.json'), JSON.stringify(manifest), 'utf8') diff --git a/kun/tests/skill-tool-provider.test.ts b/kun/tests/skill-tool-provider.test.ts index 644ebc750..c5d2dc94a 100644 --- a/kun/tests/skill-tool-provider.test.ts +++ b/kun/tests/skill-tool-provider.test.ts @@ -29,7 +29,7 @@ describe('buildSkillToolProviders', () => { } it('returns no provider when no skills are loaded', async () => { - const runtime = await SkillRuntime.create({ enabled: false, roots: [], legacySkillMd: true }) + const runtime = await SkillRuntime.create({ enabled: false, roots: [], workspaceRoots: [], globalRoots: [], disabledIds: [], legacySkillMd: true }) expect(buildSkillToolProviders(runtime)).toEqual([]) expect(buildSkillToolProviders(undefined)).toEqual([]) }) diff --git a/kun/tests/workspace-agents.test.ts b/kun/tests/workspace-agents.test.ts new file mode 100644 index 000000000..b64d3bd29 --- /dev/null +++ b/kun/tests/workspace-agents.test.ts @@ -0,0 +1,108 @@ +import { mkdir, mkdtemp, rm, writeFile } from 'node:fs/promises' +import { tmpdir } from 'node:os' +import { join } from 'node:path' +import { afterEach, beforeEach, describe, expect, it } from 'vitest' + +import { loadWorkspaceAgentProfiles } from '../src/delegation/workspace-agents.js' + +describe('loadWorkspaceAgentProfiles', () => { + let workspace: string + + beforeEach(async () => { + workspace = await mkdtemp(join(tmpdir(), 'kun-ws-agents-')) + await mkdir(join(workspace, '.kun', 'agents'), { recursive: true }) + }) + + afterEach(async () => { + await rm(workspace, { recursive: true, force: true }).catch(() => undefined) + }) + + it('returns an empty list when the agents directory is missing', async () => { + const empty = await mkdtemp(join(tmpdir(), 'kun-ws-empty-')) + expect(await loadWorkspaceAgentProfiles(empty)).toEqual([]) + await rm(empty, { recursive: true, force: true }) + }) + + it('parses a minimal frontmatter agent file', async () => { + await writeFile( + join(workspace, '.kun', 'agents', 'reviewer.md'), + [ + '---', + 'name: Reviewer', + 'description: 检查代码', + 'mode: subagent', + 'toolPolicy: readOnly', + '---', + 'You are a careful reviewer.' + ].join('\n') + ) + const profiles = await loadWorkspaceAgentProfiles(workspace) + expect(profiles).toHaveLength(1) + const entry = profiles[0]! + expect(entry.id).toBe('reviewer') + expect(entry.profile.name).toBe('Reviewer') + expect(entry.profile.description).toBe('检查代码') + expect(entry.profile.mode).toBe('subagent') + expect(entry.profile.toolPolicy).toBe('readOnly') + // Body becomes the systemPrompt when no explicit field is given. + expect(entry.profile.systemPrompt).toBe('You are a careful reviewer.') + }) + + it('uses explicit id, parses allowedTools list, and falls back to subagent mode', async () => { + await writeFile( + join(workspace, '.kun', 'agents', 'security.md'), + [ + '---', + 'id: security-reviewer', + 'name: Security Reviewer', + 'allowedTools: [read, grep, ls]', + 'model: deepseek-chat', + 'providerId: deepseek', + 'color: "#10b981"', + '---' + ].join('\n') + ) + const profiles = await loadWorkspaceAgentProfiles(workspace) + expect(profiles).toHaveLength(1) + const entry = profiles[0]! + expect(entry.id).toBe('security-reviewer') + expect(entry.profile.allowedTools).toEqual(['read', 'grep', 'ls']) + expect(entry.profile.providerId).toBe('deepseek') + expect(entry.profile.color).toBe('#10b981') + expect(entry.profile.mode).toBe('subagent') + }) + + it('parses blockedTools / blockedMcpServers / blockedSkills deny-lists from frontmatter', async () => { + await writeFile( + join(workspace, '.kun', 'agents', 'scoped.md'), + [ + '---', + 'id: scoped', + 'name: Scoped', + 'toolPolicy: inherit', + 'blockedTools: [bash, write]', + 'blockedMcpServers: [github]', + 'blockedSkills: [deep-research, pdf]', + '---' + ].join('\n') + ) + const profiles = await loadWorkspaceAgentProfiles(workspace) + const entry = profiles.find((p) => p.id === 'scoped')! + expect(entry.profile.blockedTools).toEqual(['bash', 'write']) + expect(entry.profile.blockedMcpServers).toEqual(['github']) + expect(entry.profile.blockedSkills).toEqual(['deep-research', 'pdf']) + }) + + it('drops files without frontmatter silently', async () => { + await writeFile( + join(workspace, '.kun', 'agents', 'no-front.md'), + 'Plain markdown without YAML frontmatter.' + ) + await writeFile( + join(workspace, '.kun', 'agents', 'real.md'), + '---\nname: Real\nmode: all\ntoolPolicy: inherit\n---\nBody.' + ) + const profiles = await loadWorkspaceAgentProfiles(workspace) + expect(profiles.map((p) => p.id)).toEqual(['real']) + }) +}) diff --git a/src/asset/img/kun_cheer.png b/src/asset/img/kun_cheer.png new file mode 100644 index 000000000..ec13c5b69 Binary files /dev/null and b/src/asset/img/kun_cheer.png differ diff --git a/src/asset/img/kun_clip.png b/src/asset/img/kun_clip.png new file mode 100644 index 000000000..9829a4fe3 Binary files /dev/null and b/src/asset/img/kun_clip.png differ diff --git a/src/asset/img/kun_headset.png b/src/asset/img/kun_headset.png new file mode 100644 index 000000000..18da18b08 Binary files /dev/null and b/src/asset/img/kun_headset.png differ diff --git a/src/asset/img/kun_laptop.png b/src/asset/img/kun_laptop.png new file mode 100644 index 000000000..9c5c3f530 Binary files /dev/null and b/src/asset/img/kun_laptop.png differ diff --git a/src/asset/img/kun_magic.png b/src/asset/img/kun_magic.png new file mode 100644 index 000000000..550acee48 Binary files /dev/null and b/src/asset/img/kun_magic.png differ diff --git a/src/asset/img/kun_rest.png b/src/asset/img/kun_rest.png new file mode 100644 index 000000000..f16a9d96b Binary files /dev/null and b/src/asset/img/kun_rest.png differ diff --git a/src/asset/img/kun_search.png b/src/asset/img/kun_search.png new file mode 100644 index 000000000..48b9db30d Binary files /dev/null and b/src/asset/img/kun_search.png differ diff --git a/src/asset/img/kun_wrench.png b/src/asset/img/kun_wrench.png new file mode 100644 index 000000000..a4a4c3aa4 Binary files /dev/null and b/src/asset/img/kun_wrench.png differ diff --git a/src/main/claw-runtime.test.ts b/src/main/claw-runtime.test.ts index 5804c2a01..323d252cb 100644 --- a/src/main/claw-runtime.test.ts +++ b/src/main/claw-runtime.test.ts @@ -24,13 +24,14 @@ function buildSettings(): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', log: { enabled: true, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), @@ -359,6 +360,8 @@ describe('ClawRuntime', () => { it('accepts assistant_text items when waiting for a Claw turn result', async () => { const settings = buildSettings() + settings.agents.kun.approvalPolicy = 'on-request' + settings.agents.kun.sandboxMode = 'workspace-write' const runtimeRequest = vi.fn(async (_settings, path, init) => { if (path === '/v1/threads') { return { ok: true, status: 200, body: JSON.stringify({ id: 'thr_1' }) } @@ -422,19 +425,243 @@ describe('ClawRuntime', () => { ([, path, init]) => path === '/v1/threads' && init?.method === 'POST' ) expect(JSON.parse(String(createThreadCall?.[2]?.body ?? '{}'))).toMatchObject({ - approvalPolicy: 'auto', - sandboxMode: 'danger-full-access' + approvalPolicy: 'on-request', + sandboxMode: 'workspace-write' }) const turnCall = runtimeRequest.mock.calls.find( ([, path, init]) => path === '/v1/threads/thr_1/turns' && init?.method === 'POST' ) expect(JSON.parse(String(turnCall?.[2]?.body ?? '{}'))).toMatchObject({ disableUserInput: true, - approvalPolicy: 'auto', - sandboxMode: 'danger-full-access' + approvalPolicy: 'on-request', + sandboxMode: 'workspace-write' + }) + }) + + it('passes non-default agent approval/sandbox settings through to IM turns without downgrading', async () => { + const settings = buildSettings() + settings.agents.kun.approvalPolicy = 'untrusted' + settings.agents.kun.sandboxMode = 'read-only' + const runtimeRequest = vi.fn(async (_settings, path, init) => { + if (path === '/v1/threads') { + return { ok: true, status: 200, body: JSON.stringify({ id: 'thr_1' }) } + } + if (path === '/v1/threads/thr_1' && init?.method === 'PATCH') { + return { ok: true, status: 200, body: '{}' } + } + if (path === '/v1/threads/thr_1' && init?.method === 'GET') { + return { + ok: true, + status: 200, + body: JSON.stringify({ + thread: { id: 'thr_1', status: 'completed' }, + turns: [{ id: 'turn_1', status: 'completed' }], + items: [{ kind: 'assistant_text', detail: 'ok' }] + }) + } + } + if (path === '/v1/threads/thr_1/turns') { + return { ok: true, status: 202, body: JSON.stringify({ threadId: 'thr_1', turnId: 'turn_1' }) } + } + return { ok: true, status: 200, body: '{}' } + }) + const runtime = createClawRuntime({ + store: { load: vi.fn(async () => settings), patch: vi.fn(async () => settings) } as never, + runtimeRequest, + logError: () => undefined + }) + + await (runtime as unknown as { + runPrompt: ( + settingsArg: AppSettingsV1, + options: { + prompt: string + title: string + workspaceRoot: string + model: string + mode: 'agent' | 'plan' + waitForResult: boolean + responseTimeoutMs: number + source: 'task' | 'im' + } + ) => Promise<{ ok: boolean; text?: string }> + }).runPrompt(settings, { + prompt: 'hello', + title: 'demo', + workspaceRoot: '/tmp/workspace', + model: 'auto', + mode: 'agent', + waitForResult: true, + responseTimeoutMs: 10, + source: 'im' + }) + + const createThreadCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads' && init?.method === 'POST' + ) + expect(JSON.parse(String(createThreadCall?.[2]?.body ?? '{}'))).toMatchObject({ + approvalPolicy: 'untrusted', + sandboxMode: 'read-only' + }) + const turnCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads/thr_1/turns' && init?.method === 'POST' + ) + expect(JSON.parse(String(turnCall?.[2]?.body ?? '{}'))).toMatchObject({ + disableUserInput: true, + approvalPolicy: 'untrusted', + sandboxMode: 'read-only' + }) + }) + + it('passes the never approval policy through to IM turns without escalating to auto', async () => { + const settings = buildSettings() + settings.agents.kun.approvalPolicy = 'never' + settings.agents.kun.sandboxMode = 'read-only' + const runtimeRequest = vi.fn(async (_settings, path, init) => { + if (path === '/v1/threads') { + return { ok: true, status: 200, body: JSON.stringify({ id: 'thr_1' }) } + } + if (path === '/v1/threads/thr_1' && init?.method === 'PATCH') { + return { ok: true, status: 200, body: '{}' } + } + if (path === '/v1/threads/thr_1' && init?.method === 'GET') { + return { + ok: true, + status: 200, + body: JSON.stringify({ + thread: { id: 'thr_1', status: 'completed' }, + turns: [{ id: 'turn_1', status: 'completed' }], + items: [{ kind: 'assistant_text', detail: 'ok' }] + }) + } + } + if (path === '/v1/threads/thr_1/turns') { + return { ok: true, status: 202, body: JSON.stringify({ threadId: 'thr_1', turnId: 'turn_1' }) } + } + return { ok: true, status: 200, body: '{}' } + }) + const runtime = createClawRuntime({ + store: { load: vi.fn(async () => settings), patch: vi.fn(async () => settings) } as never, + runtimeRequest, + logError: () => undefined + }) + + await (runtime as unknown as { + runPrompt: ( + settingsArg: AppSettingsV1, + options: { + prompt: string + title: string + workspaceRoot: string + model: string + mode: 'agent' | 'plan' + waitForResult: boolean + responseTimeoutMs: number + source: 'task' | 'im' + } + ) => Promise<{ ok: boolean; text?: string }> + }).runPrompt(settings, { + prompt: 'hello', + title: 'demo', + workspaceRoot: '/tmp/workspace', + model: 'auto', + mode: 'agent', + waitForResult: true, + responseTimeoutMs: 10, + source: 'im' + }) + + const createThreadCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads' && init?.method === 'POST' + ) + expect(JSON.parse(String(createThreadCall?.[2]?.body ?? '{}'))).toMatchObject({ + approvalPolicy: 'never', + sandboxMode: 'read-only' + }) + const turnCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads/thr_1/turns' && init?.method === 'POST' + ) + expect(JSON.parse(String(turnCall?.[2]?.body ?? '{}'))).toMatchObject({ + disableUserInput: true, + approvalPolicy: 'never', + sandboxMode: 'read-only' }) }) + it('does not attach IM-only permission fields when source is not im', async () => { + const settings = buildSettings() + settings.agents.kun.approvalPolicy = 'untrusted' + settings.agents.kun.sandboxMode = 'read-only' + const runtimeRequest = vi.fn(async (_settings, path, init) => { + if (path === '/v1/threads') { + return { ok: true, status: 200, body: JSON.stringify({ id: 'thr_1' }) } + } + if (path === '/v1/threads/thr_1' && init?.method === 'PATCH') { + return { ok: true, status: 200, body: '{}' } + } + if (path === '/v1/threads/thr_1' && init?.method === 'GET') { + return { + ok: true, + status: 200, + body: JSON.stringify({ + thread: { id: 'thr_1', status: 'completed' }, + turns: [{ id: 'turn_1', status: 'completed' }], + items: [{ kind: 'assistant_text', detail: 'ok' }] + }) + } + } + if (path === '/v1/threads/thr_1/turns') { + return { ok: true, status: 202, body: JSON.stringify({ threadId: 'thr_1', turnId: 'turn_1' }) } + } + return { ok: true, status: 200, body: '{}' } + }) + const runtime = createClawRuntime({ + store: { load: vi.fn(async () => settings), patch: vi.fn(async () => settings) } as never, + runtimeRequest, + logError: () => undefined + }) + + await (runtime as unknown as { + runPrompt: ( + settingsArg: AppSettingsV1, + options: { + prompt: string + title: string + workspaceRoot: string + model: string + mode: 'agent' | 'plan' + waitForResult: boolean + responseTimeoutMs: number + source: 'task' | 'im' + } + ) => Promise<{ ok: boolean; text?: string }> + }).runPrompt(settings, { + prompt: 'hello', + title: 'demo', + workspaceRoot: '/tmp/workspace', + model: 'auto', + mode: 'agent', + waitForResult: true, + responseTimeoutMs: 10, + source: 'task' + }) + + const createThreadCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads' && init?.method === 'POST' + ) + const createBody = JSON.parse(String(createThreadCall?.[2]?.body ?? '{}')) + expect(createBody).not.toHaveProperty('approvalPolicy') + expect(createBody).not.toHaveProperty('sandboxMode') + expect(createBody).not.toHaveProperty('disableUserInput') + const turnCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads/thr_1/turns' && init?.method === 'POST' + ) + const turnBody = JSON.parse(String(turnCall?.[2]?.body ?? '{}')) + expect(turnBody).not.toHaveProperty('approvalPolicy') + expect(turnBody).not.toHaveProperty('sandboxMode') + expect(turnBody).not.toHaveProperty('disableUserInput') + }) + it('reads assistant text from the Kun thread detail shape used by the real runtime', async () => { const settings = buildSettings() const runtimeRequest = vi.fn(async (_settings, path, init) => { @@ -1214,6 +1441,105 @@ describe('ClawRuntime', () => { }) }) + it('passes non-default agent approval/sandbox settings through the WeChat webhook path without downgrading', async () => { + const settings = buildSettings() + settings.claw.im.enabled = true + settings.claw.im.responseTimeoutMs = 2_500 + settings.agents.kun.approvalPolicy = 'untrusted' + settings.agents.kun.sandboxMode = 'read-only' + settings.claw.channels = [buildChannel({ + provider: 'weixin' as const, + id: 'channel_weixin', + label: 'WeChat', + threadId: '', + conversations: [] + })] + const { store } = mutableSettingsStore(settings) + const runtimeRequest = vi.fn(async (_settings, path, init) => { + if (path === '/v1/threads' && init?.method === 'POST') { + return { ok: true, status: 201, body: JSON.stringify({ id: 'thr_weixin' }) } + } + if (path === '/v1/threads/thr_weixin' && init?.method === 'PATCH') { + return { ok: true, status: 200, body: '{}' } + } + if (path === '/v1/threads/thr_weixin/turns' && init?.method === 'POST') { + return { ok: true, status: 202, body: JSON.stringify({ turnId: 'turn_weixin' }) } + } + if (path === '/v1/threads/thr_weixin' && init?.method === 'GET') { + return { + ok: true, + status: 200, + body: JSON.stringify({ + id: 'thr_weixin', + status: 'idle', + turns: [ + { + id: 'turn_weixin', + status: 'completed', + items: [{ kind: 'assistant_text', text: 'hello from GUI' }] + } + ] + }) + } + } + throw new Error(`unexpected path ${path}`) + }) + const runtime = createClawRuntime({ + store: store as never, + runtimeRequest: runtimeRequest as never, + logError: () => undefined, + createScheduledTaskFromText: vi.fn(async () => ({ kind: 'noop' as const })) + }) + const body = JSON.stringify({ + text: '你好', + provider: 'weixin', + channelId: 'channel_weixin', + chatId: 'wx_user_1', + messageId: 'wx_msg_1', + senderId: 'wx_user_1', + senderName: 'Alice' + }) + const req = { + method: 'POST', + url: settings.claw.im.path, + headers: {}, + async *[Symbol.asyncIterator]() { + yield Buffer.from(body) + } + } + let status = 0 + let responseBody = '' + const res = { + writeHead: vi.fn((nextStatus: number) => { + status = nextStatus + }), + end: vi.fn((payload: string) => { + responseBody = payload + }) + } + + await (runtime as unknown as { + handleWebhook: (request: typeof req, response: typeof res) => Promise + }).handleWebhook(req, res) + + expect(status).toBe(200) + const createThreadCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads' && init?.method === 'POST' + ) + expect(JSON.parse(String(createThreadCall?.[2]?.body ?? '{}'))).toMatchObject({ + approvalPolicy: 'untrusted', + sandboxMode: 'read-only' + }) + const turnCall = runtimeRequest.mock.calls.find( + ([, path, init]) => path === '/v1/threads/thr_weixin/turns' && init?.method === 'POST' + ) + expect(JSON.parse(String(turnCall?.[2]?.body ?? '{}'))).toMatchObject({ + disableUserInput: true, + approvalPolicy: 'untrusted', + sandboxMode: 'read-only' + }) + }) + it('backfills a WeChat conversation when an existing channel thread handles the webhook', async () => { const settings = buildSettings() settings.claw.im.enabled = true diff --git a/src/main/claw-runtime.ts b/src/main/claw-runtime.ts index ab0fb3c55..17f14aa10 100644 --- a/src/main/claw-runtime.ts +++ b/src/main/claw-runtime.ts @@ -80,8 +80,6 @@ import { FeishuStreamer } from './feishu-streamer' import type { TelegramInboundPayload } from './telegram-runtime' const MAX_IM_FILE_UPLOAD_BYTES = 50 * 1024 * 1024 -const CLAW_IM_APPROVAL_POLICY = 'auto' -const CLAW_IM_SANDBOX_MODE = 'danger-full-access' const CLAW_TELEGRAM_INBOUND_IMAGE_HEADING = '[Telegram inbound message]' type FeishuClawChannel = ClawImChannelV1 & { @@ -573,8 +571,8 @@ export class ClawRuntime { const createThread = async (): Promise => { const body: Record = { workspace, model, mode: options.mode } if (options.source === 'im') { - body.approvalPolicy = CLAW_IM_APPROVAL_POLICY - body.sandboxMode = CLAW_IM_SANDBOX_MODE + body.approvalPolicy = runtimeSettings.agents.kun.approvalPolicy + body.sandboxMode = runtimeSettings.agents.kun.sandboxMode } const create = await this.deps.runtimeRequest(runtimeSettings, '/v1/threads', { method: 'POST', @@ -604,10 +602,12 @@ export class ClawRuntime { if (model) turnBody.model = model // IM senders can only reply in their chat app; they cannot answer // GUI prompts, so the runtime must not expose user-input tools. + // Permission fields are pure passthrough from the agent settings so + // IM turns follow the same policy the user picked for the GUI. if (options.source === 'im') { turnBody.disableUserInput = true - turnBody.approvalPolicy = CLAW_IM_APPROVAL_POLICY - turnBody.sandboxMode = CLAW_IM_SANDBOX_MODE + turnBody.approvalPolicy = runtimeSettings.agents.kun.approvalPolicy + turnBody.sandboxMode = runtimeSettings.agents.kun.sandboxMode } let turn = await this.startRuntimeTurn(runtimeSettings, thread.id, turnBody) if (!turn.ok && existingThreadId && isMissingThreadResult(turn)) { diff --git a/src/main/claw-schedule-mcp-config.test.ts b/src/main/claw-schedule-mcp-config.test.ts index 3b140faa1..d66c35698 100644 --- a/src/main/claw-schedule-mcp-config.test.ts +++ b/src/main/claw-schedule-mcp-config.test.ts @@ -33,7 +33,7 @@ function createSettings(patch: Partial = version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() @@ -43,6 +43,7 @@ function createSettings(patch: Partial = enabled: true, retentionDays: 2 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, diff --git a/src/main/claw-scheduled-task-detector.test.ts b/src/main/claw-scheduled-task-detector.test.ts index 84ab28634..762234dfa 100644 --- a/src/main/claw-scheduled-task-detector.test.ts +++ b/src/main/claw-scheduled-task-detector.test.ts @@ -27,13 +27,14 @@ function settings(endpointFormat: ModelEndpointFormat): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider, agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/index.ts b/src/main/index.ts index 6eaf5187d..75def014e 100644 --- a/src/main/index.ts +++ b/src/main/index.ts @@ -31,6 +31,7 @@ import { MIN_KUN_LOCAL_PORT, normalizeAppSettings, normalizeAppBehaviorSettings, + normalizeCheckpointCleanupSettings, normalizeKeyboardShortcuts, resolveKunRuntimeSettings, resolveTerminalColorMode, @@ -52,9 +53,15 @@ import { runtimeRequestViaHost } from './runtime/kun-adapter' import { waitForRuntimeTurnsIdle } from './runtime/managed-runtime-idle' -import { setKunUnexpectedExitHandler, type KunUnexpectedExitInfo } from './kun-process' +import { + resolveKunDataDir, + setKunUnexpectedExitHandler, + waitForKunStartupSettled, + type KunUnexpectedExitInfo +} from './kun-process' import { RestartBudget, type KunRuntimeStatus } from './kun-runtime-supervisor' import { configureLogger, logError, logWarn, pruneOnStartup } from './logger' +import { cleanupUnusedGitCheckpointsIfDue } from './services/git-checkpoint-service' import { createClawRuntime, type ClawRuntime } from './claw-runtime' import { createScheduleRuntime, type ScheduleRuntime } from './schedule-runtime' import { createWorkflowRuntime, type WorkflowRuntime } from './workflow-runtime' @@ -216,6 +223,7 @@ let trayMenu: Menu | null = null let trayMenuOpenPromise: Promise | null = null let isQuitting = false let closeWindowPromptOpen = false +let checkpointCleanupTimer: ReturnType | null = null type GuiUpdaterModule = typeof import('./gui-updater') @@ -227,6 +235,50 @@ function emitClawChannelActivity(payload: { channelId: string; threadId: string mainWindow.webContents.send('claw:channel-activity', payload) } +function stopCheckpointCleanupTimer(): void { + if (checkpointCleanupTimer) { + clearInterval(checkpointCleanupTimer) + checkpointCleanupTimer = null + } +} + +async function runCheckpointCleanupIfDue(settings: AppSettingsV1): Promise { + if (!settings.checkpointCleanup.enabled) return + const runtime = resolveKunRuntimeSettings(settings) + const dataDir = resolveKunDataDir(runtime) + const intervalDays = settings.checkpointCleanup.intervalDays + try { + const cleanup = await cleanupUnusedGitCheckpointsIfDue({ dataDir, intervalDays }) + if (!cleanup.due) return + const { result } = cleanup + console.info( + `[kun-gui] git checkpoint cleanup scanned=${result.scanned} deleted=${result.deleted} kept=${result.kept} failed=${result.failed}` + ) + if (result.failed > 0) { + logWarn('git-checkpoint-cleanup', 'failed to delete some unused checkpoints', { + failed: result.failed, + failedIds: result.failedIds + }) + } + } catch (error) { + logWarn('git-checkpoint-cleanup', 'failed to clean unused checkpoints', { + message: error instanceof Error ? error.message : String(error) + }) + } +} + +function syncCheckpointCleanupTimer(settings: AppSettingsV1): void { + stopCheckpointCleanupTimer() + if (!settings.checkpointCleanup.enabled) return + const intervalMs = settings.checkpointCleanup.intervalDays * 24 * 60 * 60 * 1_000 + const run = (): void => { + void runCheckpointCleanupIfDue(settings) + } + run() + checkpointCleanupTimer = setInterval(run, intervalMs) + checkpointCleanupTimer.unref?.() +} + async function stopManagedRuntimesForQuit(): Promise { if (managedRuntimesStoppedForQuit) return await stopManagedRuntimes() @@ -632,6 +684,7 @@ async function probeThreadApi(settings: AppSettingsV1): Promise< async function waitForKunHealth(settings: AppSettingsV1, timeoutMs: number): Promise { const base = getRuntimeBaseUrlForSettings(settings) const deadline = Date.now() + timeoutMs + let lastError = '' while (Date.now() <= deadline) { try { @@ -641,12 +694,18 @@ async function waitForKunHealth(settings: AppSettingsV1, timeoutMs: number): Pro signal: AbortSignal.timeout(Math.max(250, Math.min(1_000, remaining))) }) if (res.ok && isKunHealthResponseBody(await res.text())) return true - } catch { - /* retry until the deadline */ + lastError = `unexpected status ${res.status}` + } catch (e) { + const msg = e instanceof Error ? e.message : String(e) + if (msg !== lastError) { + lastError = msg + logWarn('health-probe', `${base}/health: ${msg}`) + } } await sleep(150) } + logWarn('health-probe', `gave up after ${timeoutMs}ms, last error: ${lastError}`) return false } @@ -1042,6 +1101,10 @@ async function restartRuntime(settings: AppSettingsV1): Promise { async function restartRuntimeOnce(settings: AppSettingsV1): Promise { await waitForQueuedRuntimeSettingsApply() + // Don't tear down a child that is still completing its startup; wait for it + // to settle so a restart trigger that races a boot doesn't reset the clock + // (#544). Resolves immediately when nothing is launching. + await waitForKunStartupSettled() const runtime = getKunRuntimeSettings(settings) if (!resolveConfiguredApiKey(settings)) { @@ -1218,6 +1281,13 @@ async function restartManagedRuntimeForSettingsChange( ): Promise { if (!runtimeStartupConfigChanged(prev, next)) return + // Let any in-flight boot launch finish (or fail) before we read liveness + // and stop the child. Killing a kun that is still inside its startup window + // throws away the boot's progress and restarts the clock — the #544 restart + // storm. Once it settles, the child is either healthy (graceful restart + // below) or already gone (`wasRunning` is false and we return). + await waitForKunStartupSettled() + const runtime = resolveKunRuntimeSettings(next) const adapter = kunRuntimeAdapter const wasRunning = adapter.isChildRunning() @@ -1328,6 +1398,10 @@ async function rollbackRuntimeSettingsAfterFailedApply( } async function restartManagedRuntimeForMcpConfigChange(settings: AppSettingsV1): Promise { + // See restartManagedRuntimeForSettingsChange: never interrupt an in-flight + // boot launch (#544 restart storm). + await waitForKunStartupSettled() + const runtime = resolveKunRuntimeSettings(settings) const adapter = kunRuntimeAdapter const wasRunning = adapter.isChildRunning() @@ -1431,6 +1505,7 @@ app.whenReady().then(async () => { retentionDays: initial.log.retentionDays }) traceStartup('logger configured') + syncCheckpointCleanupTimer(initial) scheduleRuntime = createScheduleRuntime({ store, runtimeRequest, logError, powerSaveBlocker }) scheduleRuntime.sync(initial) workflowRuntime = createWorkflowRuntime({ store, runtimeRequest, logError, powerSaveBlocker }) @@ -1480,6 +1555,10 @@ app.whenReady().then(async () => { ...restPatch, provider: mergeModelProviderSettings(prev.provider, providerPatch), log: { ...prev.log, ...(partial.log ?? {}) }, + checkpointCleanup: normalizeCheckpointCleanupSettings({ + ...prev.checkpointCleanup, + ...(partial.checkpointCleanup ?? {}) + }), notifications: { ...prev.notifications, ...(partial.notifications ?? {}) }, appBehavior: mergeAppBehaviorSettings(prev.appBehavior, partial.appBehavior), keyboardShortcuts: normalizeKeyboardShortcuts({ @@ -1522,6 +1601,7 @@ app.whenReady().then(async () => { syncWeixinBridgeRuntime(saved) syncLoginItemSettings(saved) syncTray(saved) + syncCheckpointCleanupTimer(saved) return saved } @@ -1630,6 +1710,7 @@ app.on('window-all-closed', () => { app.on('before-quit', (event) => { isQuitting = true stopRuntimeWatchdog() + stopCheckpointCleanupTimer() if (managedRuntimesStoppedForQuit) return event.preventDefault() void stopManagedRuntimesForQuit() diff --git a/src/main/ipc/app-ipc-schemas.ts b/src/main/ipc/app-ipc-schemas.ts index 54902d853..0eedada96 100644 --- a/src/main/ipc/app-ipc-schemas.ts +++ b/src/main/ipc/app-ipc-schemas.ts @@ -46,7 +46,7 @@ import { } from '../../shared/app-settings' import { DESKTOP_COMMANDS } from '../../shared/kun-gui-api' import { GUI_UPDATE_CHANNELS } from '../../shared/gui-update' -import { WINDOW_CLOSE_ACTIONS } from '../../shared/app-settings' +import { WINDOW_CLOSE_ACTIONS, UI_FONT_SCALE_MIN, UI_FONT_SCALE_MAX } from '../../shared/app-settings' import { KEYBOARD_SHORTCUT_COMMANDS } from '../../shared/keyboard-shortcuts' import { WRITE_EXPORT_FORMATS } from '../../shared/write-export' import { WRITE_INFOGRAPHIC_MAX_TEXT_CHARS } from '../../shared/write-infographic' @@ -207,7 +207,10 @@ export const runtimeRequestPayloadSchema = z const localeSchema = z.enum(['en', 'zh']) const themeSchema = z.enum(['system', 'light', 'dark']) -const uiFontScaleSchema = z.enum(['small', 'medium', 'large']) +const uiFontScaleSchema = z.union([ + z.number().min(UI_FONT_SCALE_MIN).max(UI_FONT_SCALE_MAX), + z.enum(['small', 'medium', 'large']) +]) const hexColorSchema = z.string().trim().regex(/^#[0-9a-fA-F]{6}$/) const approvalPolicySchema = z.enum(['always', 'on-request', 'untrusted', 'never', 'auto', 'suggest']) const sandboxModeSchema = z.enum(['read-only', 'workspace-write', 'danger-full-access', 'external-sandbox']) @@ -321,6 +324,42 @@ const modelProviderPatchSchema = z.object({ }).strict()).max(50).optional() }).strict() +// Subagent profile patch. `.passthrough()` so a field the GUI adds later is +// preserved through the strict parent instead of being dropped (which would +// silently lose a configured model/reasoning on round-trip). +const subagentProfilePatchSchema = z + .object({ + id: z.string().min(1).max(128), + enabled: z.boolean(), + name: z.string().max(200), + description: z.string().max(2000).optional(), + color: z.string().max(32).optional(), + mode: z.enum(['subagent', 'primary', 'all']), + model: z.string().max(256).optional(), + providerId: z.string().trim().max(64).optional(), + systemPrompt: z.string().max(MAX_BODY_BYTES).optional(), + promptPreamble: z.string().max(MAX_BODY_BYTES).optional(), + toolPolicy: z.enum(['readOnly', 'inherit']), + allowedTools: z.array(z.string().max(128)).max(200).optional(), + blockedTools: z.array(z.string().max(128)).max(200).optional(), + blockedMcpServers: z.array(z.string().max(128)).max(200).optional(), + blockedSkills: z.array(z.string().max(128)).max(200).optional(), + reasoningEffort: modelReasoningEffortSchema.optional(), + builtin: z.boolean().optional() + }) + .passthrough() + +const subagentsPatchSchema = z + .object({ + enabled: z.boolean().optional(), + maxParallel: z.number().int().nonnegative().max(64).optional(), + maxChildRuns: z.number().int().nonnegative().max(10_000).optional(), + defaultToolPolicy: z.enum(['readOnly', 'inherit']).optional(), + defaultProfile: z.string().max(128).optional(), + profiles: z.array(subagentProfilePatchSchema).max(200).optional() + }) + .passthrough() + const kunRuntimePatchSchema = z.object({ binaryPath: defaultPathSchema, port: z.number().int().min(MIN_KUN_LOCAL_PORT).max(65_535).optional(), @@ -368,7 +407,9 @@ const kunRuntimePatchSchema = z.object({ summaryMode: kunCompactionSummaryModeSchema.optional(), summaryTimeoutMs: z.number().int().positive().max(120_000).optional(), summaryMaxTokens: z.number().int().positive().max(16_000).optional(), - summaryInputMaxBytes: z.number().int().positive().max(8 * 1024 * 1024).optional() + summaryInputMaxBytes: z.number().int().positive().max(8 * 1024 * 1024).optional(), + summaryModel: optionalModelIdSchema, + summaryProviderId: z.string().trim().max(64).optional() }).strict().optional(), runtimeTuning: z.object({ streamIdleTimeoutMs: z.number().int().min(0).max(3_600_000).optional(), @@ -454,7 +495,22 @@ const kunRuntimePatchSchema = z.object({ modelIdSchema, modelProfilePatchSchema.nullable() ).optional(), - memoryEnabled: z.boolean().optional() + memoryEnabled: z.boolean().optional(), + // Global small-model slot + per-role internal-LLM model overrides (agents.kun.*). + // Title & Summary default to smallModel, then the main conversation model. + smallModel: optionalModelIdSchema, + smallModelProviderId: z.string().trim().max(64).optional(), + titleModel: optionalModelIdSchema, + titleProviderId: z.string().trim().max(64).optional(), + summaryModel: optionalModelIdSchema, + summaryProviderId: z.string().trim().max(64).optional(), + codeReviewModel: optionalModelIdSchema, + codeReviewProviderId: z.string().trim().max(64).optional(), + // Per-role reasoning depth. Default 'off' is omitted by the normalizer. + titleReasoningEffort: modelReasoningEffortSchema.optional(), + summaryReasoningEffort: modelReasoningEffortSchema.optional(), + codeReviewReasoningEffort: modelReasoningEffortSchema.optional(), + subagents: subagentsPatchSchema.optional() }).strict() const logPatchSchema = z.object({ @@ -462,6 +518,17 @@ const logPatchSchema = z.object({ retentionDays: z.number().int().min(1).max(365).optional() }).strict() +const checkpointCleanupPatchSchema = z.object({ + enabled: z.boolean().optional(), + intervalDays: z.union([ + z.literal(1), + z.literal(2), + z.literal(3), + z.literal(5), + z.literal(10) + ]).optional() +}).strict() + const notificationsPatchSchema = z.object({ turnComplete: z.boolean().optional() }).strict() @@ -1286,6 +1353,7 @@ const settingsPatchObjectSchema = z.object({ }).strict().optional(), workspaceRoot: defaultPathSchema, log: logPatchSchema.optional(), + checkpointCleanup: checkpointCleanupPatchSchema.optional(), notifications: notificationsPatchSchema.optional(), appBehavior: appBehaviorPatchSchema.optional(), keyboardShortcuts: keyboardShortcutsPatchSchema.optional(), @@ -1338,6 +1406,11 @@ export const skillListPayloadSchema = z .strict() export const rootPathSchema = trimmedString(MAX_PATH_LENGTH) +export const localPdfTextTargetPayloadSchema = z + .object({ + path: rootPathSchema + }) + .strict() export const deepseekConfigContentSchema = z.string().max(MAX_CONFIG_FILE_BYTES) export const workspaceRootSchema = trimmedString(MAX_PATH_LENGTH) diff --git a/src/main/ipc/register-app-ipc-handlers.test.ts b/src/main/ipc/register-app-ipc-handlers.test.ts index b424df67d..26c611160 100644 --- a/src/main/ipc/register-app-ipc-handlers.test.ts +++ b/src/main/ipc/register-app-ipc-handlers.test.ts @@ -36,13 +36,14 @@ function settings(): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), @@ -124,6 +125,35 @@ describe('registerAppIpcHandlers', () => { expect(applySettingsPatch).toHaveBeenCalledWith(payload) }) + it('accepts checkpoint cleanup settings patches', async () => { + const { registerAppIpcHandlers } = await import('./register-app-ipc-handlers') + const applySettingsPatch = vi.fn(async () => settings()) + + registerAppIpcHandlers(registerOptions({ applySettingsPatch })) + + const payload = { + checkpointCleanup: { + intervalDays: 5 + } + } + const handler = handlers.get('settings:set') + await expect(handler?.({}, payload)).resolves.toEqual(settings()) + expect(applySettingsPatch).toHaveBeenCalledWith(payload) + }) + + it('rejects unsupported checkpoint cleanup intervals', async () => { + const { registerAppIpcHandlers } = await import('./register-app-ipc-handlers') + const applySettingsPatch = vi.fn(async () => settings()) + + registerAppIpcHandlers(registerOptions({ applySettingsPatch })) + + const handler = handlers.get('settings:set') + await expect( + handler?.({}, { checkpointCleanup: { intervalDays: 4 } }) + ).rejects.toThrow(/Invalid payload for settings:set/) + expect(applySettingsPatch).not.toHaveBeenCalled() + }) + it('accepts telegram phone connection settings patches', async () => { const { registerAppIpcHandlers } = await import('./register-app-ipc-handlers') const applySettingsPatch = vi.fn(async () => settings()) diff --git a/src/main/ipc/register-app-ipc-handlers.ts b/src/main/ipc/register-app-ipc-handlers.ts index 8d76d1e3c..fe02baf8f 100644 --- a/src/main/ipc/register-app-ipc-handlers.ts +++ b/src/main/ipc/register-app-ipc-handlers.ts @@ -46,6 +46,7 @@ import { gitCheckpointRestorePayloadSchema, gitWorktreeRemoveSchema, guiUpdateChannelSchema, + localPdfTextTargetPayloadSchema, logErrorPayloadSchema, notificationPayloadSchema, openEditorPathPayloadSchema, @@ -178,6 +179,7 @@ import { } from '../services/computer-use-permissions' import { copyWriteDocumentAsRichText, exportWriteDocument } from '../services/write-export-service' import { importGithubSkillsToRoot } from '../services/github-skill-import-service' +import { readLocalPdfText } from '../services/write-pdf-text-service' import { saveGuiSkillPackage } from '../services/skill-save-service' import { listGuiSkillRoots, listGuiSkills } from '../services/skill-service' @@ -958,7 +960,18 @@ export function registerAppIpcHandlers(options: RegisterAppIpcHandlersOptions): const request = parseIpcPayload('git:checkpoint:restore', gitCheckpointRestorePayloadSchema, payload) return restoreGitCheckpoint({ dataDir: await resolveKunThreadsDataDir(), - checkpointId: request.checkpointId + checkpointId: request.checkpointId, + // Bridge the main-process runtimeRequest into the shape restoreGitCheckpoint + // expects ((path, {method, body}) => {ok,status,body}). On a transport-level + // failure (runtime not up, connection refused) we return a non-ok result so + // the busy guard fails closed instead of throwing past the handler. + runtimeRequest: async (path, init) => { + try { + return await runtimeRequest(path, init?.method, init?.body) + } catch (error) { + return { ok: false, status: 0, body: error instanceof Error ? error.message : String(error) } + } + } }) }) ipcMain.handle( @@ -1087,6 +1100,22 @@ export function registerAppIpcHandlers(options: RegisterAppIpcHandlersOptions): parseIpcPayload('file:read-workspace-pdf', workspaceFileTargetPayloadSchema, payload) ) ) + ipcMain.handle('file:read-local-pdf-text', async (_, payload: unknown) => { + const result = await readLocalPdfText( + parseIpcPayload('file:read-local-pdf-text', localPdfTextTargetPayloadSchema, payload) + ) + if (!result.ok) return result + return { + ok: true, + path: result.path, + size: result.size, + mtimeMs: result.mtimeMs, + pageCount: result.pageCount, + text: result.pages.map((page) => page.text).join('\n\n'), + hasText: result.hasText, + truncated: result.truncated + } + }) ipcMain.handle('file:save-as', async (_, payload: unknown) => saveWorkspaceFileAs(payload, getMainWindow) ) diff --git a/src/main/kun-process.test.ts b/src/main/kun-process.test.ts index a7677ac18..cc21f0c4f 100644 --- a/src/main/kun-process.test.ts +++ b/src/main/kun-process.test.ts @@ -33,7 +33,7 @@ function createSettings(binaryPath: string): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: { @@ -44,6 +44,7 @@ function createSettings(binaryPath: string): AppSettingsV1 { }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), @@ -113,9 +114,17 @@ describe('startKunChild', () => { const script = writeScript( 'ready-child.js', [ - "setTimeout(() => {", - " process.stdout.write('KUN_READY ' + JSON.stringify({ service: 'kun', mode: 'serve', port: 18899 }) + '\\n')", - "}, 50)", + "const http = require('node:http')", + "const port = 18899", + "const server = http.createServer((req, res) => {", + " res.setHeader('content-type', 'application/json')", + " res.end(JSON.stringify({ service: 'kun', mode: 'serve', status: 'ok' }))", + "})", + "server.listen(port, '127.0.0.1', () => {", + " setTimeout(() => {", + " process.stdout.write('KUN_READY ' + JSON.stringify({ service: 'kun', mode: 'serve', port }) + '\\n')", + " }, 50)", + "})", "setInterval(() => {}, 1_000)" ].join('\n') ) @@ -128,19 +137,76 @@ describe('startKunChild', () => { expect(logText).toContain('ready marker received on port 18899') }) + it('does not settle on the ready marker until the /health endpoint responds', async () => { + if (!tempRoot) throw new Error('temp root not initialized') + const healthSignalPath = join(tempRoot, 'allow-health') + const script = writeScript( + 'marker-without-health-child.js', + [ + "const http = require('node:http')", + "const { existsSync } = require('node:fs')", + `const healthSignalPath = ${JSON.stringify(healthSignalPath)}`, + "const port = 18899", + // Emit the ready marker right away but serve no /health yet: the + // marker alone must NOT be enough to settle the launch. + "process.stdout.write('KUN_READY ' + JSON.stringify({ service: 'kun', mode: 'serve', port }) + '\\n')", + 'let served = false', + 'setInterval(() => {', + ' if (served || !existsSync(healthSignalPath)) return', + ' served = true', + " const server = http.createServer((req, res) => {", + " res.setHeader('content-type', 'application/json')", + " res.end(JSON.stringify({ service: 'kun', mode: 'serve', status: 'ok' }))", + " })", + " server.listen(port, '127.0.0.1')", + '}, 10)', + 'setInterval(() => {}, 1_000)' + ].join('\n') + ) + const module = await import('./kun-process') + let resolved = false + const start = module.startKunChild(createSettings(script)).then(() => { + resolved = true + }) + + // The marker has been emitted but /health is not up yet. The child is + // spawned and alive, yet the launch must stay PENDING for the whole + // window (the startup timeout is far larger, so it cannot mask this). + await new Promise((resolve) => setTimeout(resolve, 300)) + expect(resolved).toBe(false) + + // Bring /health online; the parallel probe now settles the launch. + writeFileSync(healthSignalPath, 'ok', 'utf8') + await start + expect(resolved).toBe(true) + expect(module.isKunChildRunning()).toBe(true) + + await module.stopKunChildAndWait() + }) + it('shares the startup promise while Kun is spawned but not ready', async () => { if (!tempRoot) throw new Error('temp root not initialized') const readySignalPath = join(tempRoot, 'allow-ready') const script = writeScript( 'delayed-ready-child.js', [ + "const http = require('node:http')", "const { existsSync } = require('node:fs')", `const readySignalPath = ${JSON.stringify(readySignalPath)}`, + "const port = 18899", 'let sentReady = false', + // Only stand up the /health server once the signal exists so the + // parallel health probe cannot settle the launch before then. 'setInterval(() => {', ' if (sentReady || !existsSync(readySignalPath)) return', ' sentReady = true', - " process.stdout.write('KUN_READY ' + JSON.stringify({ service: 'kun', mode: 'serve', port: 18899 }) + '\\n')", + " const server = http.createServer((req, res) => {", + " res.setHeader('content-type', 'application/json')", + " res.end(JSON.stringify({ service: 'kun', mode: 'serve', status: 'ok' }))", + " })", + " server.listen(port, '127.0.0.1', () => {", + " process.stdout.write('KUN_READY ' + JSON.stringify({ service: 'kun', mode: 'serve', port }) + '\\n')", + " })", '}, 10)', 'setInterval(() => {}, 1_000)' ].join('\n') @@ -188,6 +254,99 @@ describe('startKunChild', () => { }) }) +describe('resolveKunStartupTimeoutMs', () => { + it('gives Windows the larger default and other platforms a smaller one', async () => { + const { resolveKunStartupTimeoutMs } = await import('./kun-process') + expect(resolveKunStartupTimeoutMs('win32', {})).toBe(90_000) + expect(resolveKunStartupTimeoutMs('darwin', {})).toBe(60_000) + expect(resolveKunStartupTimeoutMs('linux', {})).toBe(60_000) + }) + + it('honors a valid KUN_STARTUP_TIMEOUT_MS override on every platform', async () => { + const { resolveKunStartupTimeoutMs } = await import('./kun-process') + expect(resolveKunStartupTimeoutMs('win32', { KUN_STARTUP_TIMEOUT_MS: '120000' })).toBe(120_000) + expect(resolveKunStartupTimeoutMs('linux', { KUN_STARTUP_TIMEOUT_MS: ' 30000 ' })).toBe(30_000) + }) + + it('clamps an out-of-range override to the 15s–10min bounds', async () => { + const { resolveKunStartupTimeoutMs } = await import('./kun-process') + expect(resolveKunStartupTimeoutMs('linux', { KUN_STARTUP_TIMEOUT_MS: '1000' })).toBe(15_000) + expect(resolveKunStartupTimeoutMs('linux', { KUN_STARTUP_TIMEOUT_MS: '99999999' })).toBe(600_000) + }) + + it('falls back to the platform default when the override is not a finite number', async () => { + const { resolveKunStartupTimeoutMs } = await import('./kun-process') + expect(resolveKunStartupTimeoutMs('win32', { KUN_STARTUP_TIMEOUT_MS: 'soon' })).toBe(90_000) + expect(resolveKunStartupTimeoutMs('darwin', { KUN_STARTUP_TIMEOUT_MS: '' })).toBe(60_000) + expect(resolveKunStartupTimeoutMs('darwin', { KUN_STARTUP_TIMEOUT_MS: ' ' })).toBe(60_000) + }) +}) + +describe('waitForKunStartupSettled', () => { + it('resolves immediately when no launch is in flight', async () => { + const module = await import('./kun-process') + let resolved = false + await Promise.race([ + module.waitForKunStartupSettled().then(() => { + resolved = true + }), + new Promise((resolve) => setTimeout(resolve, 50)) + ]) + expect(resolved).toBe(true) + }) + + it('does not resolve until an in-flight launch settles', async () => { + if (!tempRoot) throw new Error('temp root not initialized') + const readySignalPath = join(tempRoot, 'allow-ready-settled') + const script = writeScript( + 'settled-delayed-child.js', + [ + "const http = require('node:http')", + "const { existsSync } = require('node:fs')", + `const readySignalPath = ${JSON.stringify(readySignalPath)}`, + "const port = 18899", + 'let sentReady = false', + // Only stand up the /health server once the signal exists so the + // parallel health probe cannot settle the launch before then. + 'setInterval(() => {', + ' if (sentReady || !existsSync(readySignalPath)) return', + ' sentReady = true', + " const server = http.createServer((req, res) => {", + " res.setHeader('content-type', 'application/json')", + " res.end(JSON.stringify({ service: 'kun', mode: 'serve', status: 'ok' }))", + " })", + " server.listen(port, '127.0.0.1', () => {", + " process.stdout.write('KUN_READY ' + JSON.stringify({ service: 'kun', mode: 'serve', port }) + '\\n')", + " })", + '}, 10)', + 'setInterval(() => {}, 1_000)' + ].join('\n') + ) + const module = await import('./kun-process') + const settings = createSettings(script) + const start = module.startKunChild(settings) + + for (let attempt = 0; attempt < 100 && !module.isKunChildRunning(); attempt += 1) { + await new Promise((resolve) => setTimeout(resolve, 10)) + } + expect(module.isKunChildRunning()).toBe(true) + + let settled = false + const settledPromise = module.waitForKunStartupSettled().then(() => { + settled = true + }) + await new Promise((resolve) => setTimeout(resolve, 50)) + expect(settled).toBe(false) + + writeFileSync(readySignalPath, 'ready', 'utf8') + await start + await settledPromise + expect(settled).toBe(true) + + await module.stopKunChildAndWait() + }) +}) + describe('reclaimKunPort', () => { it('reports a port as unavailable when another listener owns it', async () => { const server = createServer() @@ -384,6 +543,10 @@ describe('syncGuiManagedKunConfig', () => { expect(parsed.runtime.toolArgumentRepair).toMatchObject({ maxStringBytes: 524288 }) expect(parsed.capabilities.attachments).toMatchObject({ enabled: true }) expect(parsed.capabilities.memory).toMatchObject({ enabled: false }) + // Subagents have no GUI enable toggle: they default ON so delegate_task + the + // built-in profiles are always offered. maxParallel/maxChildRuns must be >=1 or + // DelegationRuntime can never run a child. This locks the default against regressions. + expect(parsed.capabilities.subagents).toMatchObject({ enabled: true, maxParallel: 3, maxChildRuns: 12 }) expect(parsed.capabilities.web).toMatchObject({ enabled: true, fetchEnabled: true }) expect(parsed.capabilities.mcp.search).toMatchObject({ enabled: false, mode: 'auto' }) expect(parsed.capabilities.imageGen).toEqual({ @@ -683,6 +846,43 @@ describe('syncGuiManagedKunConfig', () => { ])) }) + it('drops stale Codex plugin cache roots but keeps hand-added manual roots', async () => { + if (!tempRoot) throw new Error('temp root not initialized') + const configPath = join(tempRoot, 'config.json') + // A version directory left behind by a plugin upgrade and a root a user + // added by hand to the Kun config file. + const staleRoot = join(homedir(), '.codex', 'plugins', 'cache', 'gmail', '0.0.0-stale', 'skills') + const manualRoot = join(tempRoot, 'manual', 'skills') + writeFileSync(configPath, JSON.stringify({ + capabilities: { skills: { enabled: true, roots: [staleRoot, manualRoot], legacySkillMd: true } } + }), 'utf8') + const module = await import('./kun-process') + + await module.syncGuiManagedKunConfig(tempRoot, defaultKunRuntimeSettings()) + + const parsed = JSON.parse(readFileSync(configPath, 'utf8')) as any + expect(parsed.capabilities.skills.roots).not.toContain(staleRoot) + expect(parsed.capabilities.skills.roots).toContain(manualRoot) + }) + + it('forwards GUI disabledSkillIds into the runtime skills capability', async () => { + if (!tempRoot) throw new Error('temp root not initialized') + const configPath = join(tempRoot, 'config.json') + const module = await import('./kun-process') + const settings = createSettings('/tmp/fake-kun-child.js') + settings.disabledSkillIds = ['gmail', 'vercel-agent'] + + await module.syncGuiManagedKunConfig(tempRoot, defaultKunRuntimeSettings(), { + scheduleMcp: { + settings, + launch: { appPath: '/tmp/deepseek-gui-test-app', execPath: '/tmp/electron', isPackaged: false } + } + }) + + const parsed = JSON.parse(readFileSync(configPath, 'utf8')) as any + expect(parsed.capabilities.skills.disabledIds).toEqual(['gmail', 'vercel-agent']) + }) + it('writes GUI-managed MCP search settings without removing existing servers', async () => { if (!tempRoot) throw new Error('temp root not initialized') const configPath = join(tempRoot, 'config.json') @@ -875,6 +1075,7 @@ describe('syncGuiManagedKunConfig', () => { servers: { 'stata-mcp': { command: 'uvx', + cwd: 'D:\\Workspace\\stata-project', args: ['stata-mcp'], env: { STATA_CLI: 'D:\\stata\\StataMP-64.exe' @@ -902,6 +1103,7 @@ describe('syncGuiManagedKunConfig', () => { enabled: true, transport: 'stdio', command: 'uvx', + cwd: 'D:\\Workspace\\stata-project', args: ['stata-mcp'], env: { STATA_CLI: 'D:\\stata\\StataMP-64.exe' @@ -1021,3 +1223,43 @@ describe('syncGuiManagedKunConfig', () => { }) }) }) + +describe('subagentProfilesForRuntime', () => { + it('drops blank optional fields so the runtime config still parses', async () => { + const module = await import('./kun-process') + // Built-in profiles store an empty `name` (the GUI localizes the label) and + // the user picked a model on one of them. The runtime schema marks every + // optional string `.min(1)`, so a forwarded empty string used to throw and + // strand the runtime at "无法连接到本地运行时". + const config = module.subagentProfilesForRuntime({ + enabled: true, + profiles: [ + { + id: 'general', + enabled: true, + name: '', + mode: 'subagent', + toolPolicy: 'inherit', + model: 'deepseek-v4', + description: ' ' + } + ] + }) + + expect(config.profiles.general).toBeDefined() + expect('name' in config.profiles.general).toBe(false) + expect('description' in config.profiles.general).toBe(false) + expect(config.profiles.general.model).toBe('deepseek-v4') + }) + + it('keeps a non-empty name', async () => { + const module = await import('./kun-process') + const config = module.subagentProfilesForRuntime({ + enabled: true, + profiles: [ + { id: 'custom', enabled: true, name: '我的代理', mode: 'subagent', toolPolicy: 'inherit' } + ] + }) + expect(config.profiles.custom.name).toBe('我的代理') + }) +}) diff --git a/src/main/kun-process.ts b/src/main/kun-process.ts index b5f66718e..4ea134b00 100644 --- a/src/main/kun-process.ts +++ b/src/main/kun-process.ts @@ -16,6 +16,7 @@ import { type ModelProviderModelProfileV1, type ModelProviderProfileV1, type KunRuntimeSettingsV1, + type KunSubagentsSettingsV1, type AppSettingsV1 } from '../shared/app-settings' import { @@ -28,7 +29,8 @@ import { ModelConfigSchema, ContextCompactionConfigSchema, QualityConfigSchema, - RuntimeTuningConfigSchema + RuntimeTuningConfigSchema, + RolesConfigSchema } from '../../kun/src/config/kun-config.js' import { HooksConfigSchema } from '../../kun/src/hooks/hook-config.js' import { @@ -58,7 +60,9 @@ import { appendManagedLogLine } from './logger' import { comparableSkillRootPath, guiSkillManagedComparablePaths, + guiSkillWorkspaceRootsForRuntime, guiSkillRootsForRuntime, + isCodexPluginCacheRoot, normalizeSkillRootPath } from './services/skill-service' @@ -93,10 +97,45 @@ export function setKunUnexpectedExitHandler( const execFileAsync = promisify(execFile) const KUN_READY_PREFIX = 'KUN_READY ' -// Cold starts on slow disks (Windows + antivirus scans, sqlite rebuilds, -// MCP server connects) routinely exceed 15s; killing kun that early left -// fresh installs permanently "unable to connect" (#188). -const KUN_STARTUP_TIMEOUT_MS = 45_000 +const KUN_STARTUP_TIMEOUT_FLOOR_MS = 15_000 +const KUN_STARTUP_TIMEOUT_CEILING_MS = 600_000 + +/** + * How long to wait for a freshly spawned kun to report ready before giving + * up and killing it. kun emits its ready marker only after the HTTP server + * is actually listening, which it does only after sqlite opens, the thread + * store finishes its backfill, usage carryover replays every thread's + * events, and the MCP fast-connect race runs. On a slow disk (Windows + + * antivirus scans) with a large history this routinely exceeds 45s, leaving + * the runtime stuck in a "did not report ready within 45000ms" → SIGTERM → + * respawn loop (#188, #544). + * + * A generous ceiling is free on fast machines: the parallel /health probe + * in waitForKunStartup settles the moment the server responds, and a process + * that actually crashes rejects immediately via its exit event rather than + * waiting out the timeout. Only a slow-but-progressing boot uses the extra + * runway. Windows gets the larger default; everything is overridable via the + * KUN_STARTUP_TIMEOUT_MS env var (milliseconds, clamped to 15s–10min) for + * extreme cases without a rebuild. + */ +export function resolveKunStartupTimeoutMs( + platform: NodeJS.Platform, + env: NodeJS.ProcessEnv +): number { + const raw = env.KUN_STARTUP_TIMEOUT_MS + if (raw && raw.trim()) { + const parsed = Number(raw) + if (Number.isFinite(parsed)) { + return Math.min( + KUN_STARTUP_TIMEOUT_CEILING_MS, + Math.max(KUN_STARTUP_TIMEOUT_FLOOR_MS, Math.floor(parsed)) + ) + } + } + return platform === 'win32' ? 90_000 : 60_000 +} + +const KUN_STARTUP_TIMEOUT_MS = resolveKunStartupTimeoutMs(process.platform, process.env) const KUN_STARTUP_HEALTH_POLL_MS = 500 const KUN_STARTUP_HEALTH_REQUEST_TIMEOUT_MS = 1_000 const KUN_STOP_GRACE_MS = 5_000 @@ -260,6 +299,21 @@ function isCurrentKunChildPid(pid: number): boolean { return Boolean(child?.pid === pid && isKunChildRunning()) } +/** + * Resolve once any in-flight kun launch has settled — whether it became + * ready or failed. The settings/MCP-apply paths use this to avoid + * SIGTERM-ing a child that is still inside its (deliberately generous) + * startup window: interrupting a slow-but-healthy boot only restarts the + * clock and is what turns one slow start into the #544 restart storm. + * + * Deadlock-safe by construction: `kunStartPromise` is only set once a launch + * has already passed the settings-apply gate, so an apply that awaits it can + * never be the thing that launch is itself waiting on. + */ +export function waitForKunStartupSettled(): Promise { + return kunStartPromise ? kunStartPromise.catch(() => undefined) : Promise.resolve() +} + export function startKunChild(settings: AppSettingsV1): Promise { if (kunStartPromise) return kunStartPromise const runtime = resolveKunRuntimeSettings(settings) @@ -399,6 +453,15 @@ export async function syncGuiManagedKunConfig( | 'modelProfiles' | 'memoryEnabled' | 'quality' + | 'subagents' + | 'smallModel' + | 'smallModelProviderId' + | 'titleModel' + | 'titleProviderId' + | 'summaryModel' + | 'summaryProviderId' + | 'codeReviewModel' + | 'codeReviewProviderId' >, options?: { scheduleMcp?: { @@ -458,6 +521,10 @@ export async function syncGuiManagedKunConfig( contextCompaction: contextCompactionConfigForRuntime(runtime.contextCompaction, existingContextCompaction), runtime: runtimeTuningConfigForRuntime(runtime.runtimeTuning, existingRuntimeTuning), quality: qualityConfigForRuntime(runtime.quality, existingQuality), + ...(() => { + const roles = rolesConfigForRuntime(runtime) + return Object.keys(roles).length ? { roles } : {} + })(), capabilities: { ...capabilities, attachments: { @@ -479,6 +546,7 @@ export async function syncGuiManagedKunConfig( ...memory, enabled: runtime.memoryEnabled }, + subagents: subagentProfilesForRuntime(runtime.subagents ?? { enabled: true, profiles: [] }), mcp: { ...mcp, ...(options?.scheduleMcp || mcpSearch.enabled || hasImportedEnabledMcpServer @@ -546,10 +614,17 @@ async function skillCapabilityConfigForRuntime( // Drop previously-persisted GUI-managed roots so disabling a directory in // settings actually removes it — otherwise a toggled-off root would stick // around forever via `existing.roots`. + // GUI-managed roots are dropped from the carried-over set and rebuilt fresh + // below. Besides the common/extra candidates, auto-discovered Codex plugin + // caches count as managed too — otherwise old version directories from a + // plugin upgrade stay in `roots` forever (#392). const managed = guiSkillManagedComparablePaths(settings) const manualExisting = stringArrayValue(existing.roots) .map(normalizeSkillRootPath) - .filter((path) => path.length > 0 && !managed.has(comparableSkillRootPath(path))) + .filter((path) => + path.length > 0 && + !managed.has(comparableSkillRootPath(path)) && + !isCodexPluginCacheRoot(path)) const roots = uniqueStrings([ ...manualExisting, ...(await guiSkillRootsForRuntime(settings)).map((root) => root.path) @@ -562,6 +637,13 @@ async function skillCapabilityConfigForRuntime( // skills. An explicit `true` still forces on even with no roots. enabled: roots.length > 0 || existing.enabled === true, roots, + workspaceRoots: guiSkillWorkspaceRootsForRuntime(settings), + // #149: Pass global skill roots from settings (e.g. ~/.kun/skills) + globalRoots: existing.globalRoots ?? [], + // Skills the user disabled in the GUI. Forwarded so the runtime drops them + // from discovery — without this they stay loadable via load_skill and keep + // appearing in the catalog despite the GUI toggle (#392). + disabledIds: settings?.disabledSkillIds ?? stringArrayValue(existing.disabledIds), legacySkillMd: existing.legacySkillMd === false ? false : true } } @@ -610,6 +692,7 @@ function mcpServersFromGuiConfig(config: Record): Record | null { const raw = objectValue(server) const command = scalarStringValue(raw.command) + const cwd = scalarStringValue(raw.cwd)?.trim() const url = scalarStringValue(raw.url) const args = stringArrayValue(raw.args) const headers = stringRecordValue(raw.headers) @@ -626,6 +709,7 @@ function normalizeGuiManagedMcpServer(server: unknown): Record enabled: raw.enabled === false || raw.disabled === true ? false : true, transport, ...(command ? { command } : {}), + ...(transport === 'stdio' && cwd ? { cwd } : {}), ...(args.length > 0 ? { args } : {}), ...(url ? { url } : {}), ...(Object.keys(headers).length > 0 ? { headers } : {}), @@ -821,8 +905,52 @@ function contextCompactionConfigForRuntime( summaryMode: contextCompaction.summaryMode, summaryTimeoutMs: contextCompaction.summaryTimeoutMs, summaryMaxTokens: contextCompaction.summaryMaxTokens, - summaryInputMaxBytes: contextCompaction.summaryInputMaxBytes + summaryInputMaxBytes: contextCompaction.summaryInputMaxBytes, + ...(contextCompaction.summaryModel ? { summaryModel: contextCompaction.summaryModel } : {}), + ...(contextCompaction.summaryProviderId ? { summaryProviderId: contextCompaction.summaryProviderId } : {}) + } +} + +/** + * Build the kun `roles` config (internal-LLM model routing) from GUI settings. + * Only non-empty fields are emitted so the strict RolesConfigSchema accepts the + * result and a cleared field removes itself from config.json. + */ +function rolesConfigForRuntime( + runtime: Pick< + KunRuntimeSettingsV1, + | 'smallModel' + | 'smallModelProviderId' + | 'titleModel' + | 'titleProviderId' + | 'summaryModel' + | 'summaryProviderId' + | 'codeReviewModel' + | 'codeReviewProviderId' + | 'titleReasoningEffort' + | 'summaryReasoningEffort' + | 'codeReviewReasoningEffort' + > +): Record { + const out: Record = {} + const put = (key: string, value: string | undefined): void => { + const trimmed = typeof value === 'string' ? value.trim() : '' + if (trimmed) out[key] = trimmed } + put('smallModel', runtime.smallModel) + put('smallModelProviderId', runtime.smallModelProviderId) + put('titleModel', runtime.titleModel) + put('titleProviderId', runtime.titleProviderId) + put('summaryModel', runtime.summaryModel) + put('summaryProviderId', runtime.summaryProviderId) + put('codeReviewModel', runtime.codeReviewModel) + put('codeReviewProviderId', runtime.codeReviewProviderId) + // Per-role reasoning depth. 'off' is the default and is intentionally omitted + // by the normalizer, so only an opted-in level (low/medium/high/max) is emitted. + put('titleReasoningEffort', runtime.titleReasoningEffort) + put('summaryReasoningEffort', runtime.summaryReasoningEffort) + put('codeReviewReasoningEffort', runtime.codeReviewReasoningEffort) + return out } function computerUseConfigForRuntime( @@ -979,6 +1107,75 @@ function qualityConfigForRuntime( } } +const VALID_PROFILE_REASONING = new Set(['auto', 'low', 'medium', 'high', 'max']) + +/** + * Remove optional fields the runtime schema rejects when blank: empty/whitespace + * strings (every optional string there is `.min(1)`) and empty arrays. Leaving + * them in throws on SubagentsCapabilityConfig.parse and stops the runtime from + * starting; dropping them lets the field fall back to its server default. + */ +function stripBlankProfileFields(profile: Record): Record { + const next: Record = {} + for (const [key, value] of Object.entries(profile)) { + if (typeof value === 'string' && value.trim() === '') continue + if (Array.isArray(value) && value.length === 0) continue + next[key] = value + } + return next +} + +export function subagentProfilesForRuntime(subagents: KunSubagentsSettingsV1): SubagentsCapabilityConfig { + const profiles: Record = {} + for (const profile of subagents.profiles) { + if (!profile.enabled) continue + const { id: _id, enabled: _enabled, name, reasoningEffort, ...rest } = profile + // Coerce the per-profile reasoning enum so a hand-edited invalid value can't + // throw SubagentsCapabilityConfig.parse below ('off'/invalid → omitted). + const effort = typeof reasoningEffort === 'string' && VALID_PROFILE_REASONING.has(reasoningEffort) + ? { reasoningEffort } + : {} + // Built-in profiles carry an empty `name` (the GUI localizes their display + // labels rather than storing them), and the user can blank any optional + // field in the editor. The runtime schema marks every optional string as + // `.min(1)`, so forwarding an empty string throws and the runtime never + // connects. Drop blank strings / empty arrays so they fall back to defaults. + profiles[profile.id] = stripBlankProfileFields({ name, ...rest, ...effort }) + } + const candidate = { + // Subagents are a first-class feature with no GUI "enable" toggle; default ON + // (only an explicit `false` disables) so delegate_task + the built-in profiles + // (design-reviewer / over-engineering-reviewer) are always offered to the model. + // maxParallel/maxChildRuns MUST be >=1 or DelegationRuntime can never run a child. + enabled: subagents.enabled !== false, + maxParallel: subagents.maxParallel && subagents.maxParallel > 0 ? subagents.maxParallel : 3, + maxChildRuns: subagents.maxChildRuns && subagents.maxChildRuns > 0 ? subagents.maxChildRuns : 12, + ...(subagents.defaultToolPolicy ? { defaultToolPolicy: subagents.defaultToolPolicy } : {}), + ...(subagents.defaultProfile ? { defaultProfile: subagents.defaultProfile } : {}), + profiles + } + // A single malformed profile must never brick the whole runtime connection. + // If the GUI somehow persisted a value the schema rejects, drop the custom + // profiles and fall back to a minimal valid block — the runtime still merges + // in the built-in reviewers, so subagents keep working. + const parsed = SubagentsCapabilityConfig.safeParse(candidate) + if (parsed.success) return parsed.data + void appendManagedLogLine( + 'kun', + formatKunLogLine( + 'lifecycle', + undefined, + `[settings] dropped invalid subagent profiles: ${JSON.stringify(parsed.error.issues)}` + ) + ) + return SubagentsCapabilityConfig.parse({ + enabled: candidate.enabled, + maxParallel: candidate.maxParallel, + maxChildRuns: candidate.maxChildRuns, + ...(subagents.defaultToolPolicy ? { defaultToolPolicy: subagents.defaultToolPolicy } : {}) + }) +} + async function readJsonObjectIfExists(path: string): Promise | null> { try { const text = await readFile(path, 'utf8') @@ -1067,6 +1264,9 @@ function sanitizeKunConfigSections( runtime: parseKunConfigSection(RuntimeTuningConfigSchema, existing.runtime), quality: parseKunConfigSection(QualityConfigSchema, existing.quality), capabilities: sanitizeKunCapabilitiesConfig(existing.capabilities), + ...('roles' in existing + ? { roles: parseKunConfigSection(RolesConfigSchema, existing.roles) } + : {}), ...(hooks.length ? { hooks } : {}) } } @@ -1370,22 +1570,30 @@ async function waitForKunStartup(startedChild: ChildProcess, port?: number): Pro let stdoutBuffer = '' let stderrTail = '' let healthProbeInFlight = false + let healthConfirmed = false + let readyMarkerSeen = false const timer = setTimeout(() => { if (settled) return settled = true cleanup() - reject(new Error(describeKunStartupTimeout(stderrTail))) + reject(new Error(describeKunStartupTimeout(stderrTail, readyMarkerSeen && Boolean(port)))) }, KUN_STARTUP_TIMEOUT_MS) // The stdout ready marker can lag behind the actual server (pipe // buffering) or get lost in unusual spawn environments; the HTTP // health endpoint is the ground truth, so poll it in parallel. + // A passing health probe alone is enough to settle (it proves the + // server responds). The stdout marker alone is NOT enough — it only + // proves the process started, not that the HTTP server can serve. const healthTimer = port ? setInterval(() => { if (settled || healthProbeInFlight) return healthProbeInFlight = true void probeKunHealth(port) .then((healthy) => { - if (healthy) settleReady() + if (healthy) { + healthConfirmed = true + settleReady() + } }) .finally(() => { healthProbeInFlight = false @@ -1423,7 +1631,11 @@ async function waitForKunStartup(startedChild: ChildProcess, port?: number): Pro } const onStdout = (chunk: Buffer | string): void => { stdoutBuffer = appendTail(stdoutBuffer, String(chunk), STDERR_TAIL_MAX_CHARS * 2) - if (tryParseReady()) settleReady() + if (!tryParseReady()) return + readyMarkerSeen = true + if (healthConfirmed || !healthTimer) { + settleReady() + } } const onStderr = (chunk: Buffer | string): void => { stderrTail = appendTail(stderrTail, String(chunk)) @@ -1458,8 +1670,11 @@ function describeKunExit( return `Kun exited during startup${suffix}` } -function describeKunStartupTimeout(stderrTail: string): string { +function describeKunStartupTimeout(stderrTail: string, sawReadyMarker = false): string { const suffix = stderrTail.trim() ? `\n${stderrTail.trim()}` : '' + if (sawReadyMarker) { + return `Kun reported ready but did not pass health checks within ${KUN_STARTUP_TIMEOUT_MS}ms${suffix}` + } return `Kun did not report ready within ${KUN_STARTUP_TIMEOUT_MS}ms${suffix}` } diff --git a/src/main/kun-regression.test.ts b/src/main/kun-regression.test.ts index 65db123d8..8b6be1207 100644 --- a/src/main/kun-regression.test.ts +++ b/src/main/kun-regression.test.ts @@ -112,13 +112,14 @@ describe('Kun single-agent regression', () => { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings(19000) }, workspaceRoot: '/tmp', log: { enabled: true, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/runtime/kun-adapter.test.ts b/src/main/runtime/kun-adapter.test.ts index 9b785a2f8..1a0af093f 100644 --- a/src/main/runtime/kun-adapter.test.ts +++ b/src/main/runtime/kun-adapter.test.ts @@ -21,7 +21,7 @@ function settingsForPort(port: number): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: { @@ -31,6 +31,7 @@ function settingsForPort(port: number): AppSettingsV1 { }, workspaceRoot: '/tmp', log: { enabled: true, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/runtime/managed-runtime-idle.test.ts b/src/main/runtime/managed-runtime-idle.test.ts index b18a51733..d3e19e6a3 100644 --- a/src/main/runtime/managed-runtime-idle.test.ts +++ b/src/main/runtime/managed-runtime-idle.test.ts @@ -20,11 +20,12 @@ const settings: AppSettingsV1 = { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/schedule-runtime.test.ts b/src/main/schedule-runtime.test.ts index 98c8b1d40..7213fd944 100644 --- a/src/main/schedule-runtime.test.ts +++ b/src/main/schedule-runtime.test.ts @@ -83,7 +83,7 @@ function settingsWith( version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: { @@ -93,6 +93,7 @@ function settingsWith( }, workspaceRoot: '/tmp/workspace', log: { enabled: true, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/services/git-checkpoint-service.test.ts b/src/main/services/git-checkpoint-service.test.ts index 77af61d70..8d60eec95 100644 --- a/src/main/services/git-checkpoint-service.test.ts +++ b/src/main/services/git-checkpoint-service.test.ts @@ -1,9 +1,15 @@ -import { afterEach, beforeEach, describe, expect, it } from 'vitest' +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest' import { execFileSync } from 'node:child_process' -import { mkdtemp, readFile, rm, stat, writeFile } from 'node:fs/promises' +import { mkdtemp, mkdir, readFile, rm, stat, symlink, utimes, writeFile } from 'node:fs/promises' import { tmpdir } from 'node:os' -import { join } from 'node:path' -import { createGitCheckpoint, restoreGitCheckpoint } from './git-checkpoint-service' +import { join, normalize } from 'node:path' +import { + cleanupUnusedGitCheckpoints, + cleanupUnusedGitCheckpointsIfDue, + createGitCheckpoint, + restoreGitCheckpoint, + testResolvePathWithinRepository +} from './git-checkpoint-service' let sandbox = '' let repoRoot = '' @@ -153,4 +159,261 @@ describe('git checkpoint service', () => { const refs = execFileSync('git', ['-C', repoRoot, 'show-ref'], { encoding: 'utf-8' }) expect(refs).not.toContain('refs/kun/checkpoints') }) + + it('refuses to restore when a tampered checkpoint smuggles a path-traversal untracked entry', async () => { + // Build a legitimate checkpoint, then rewrite its metadata.json so an + // untracked entry escapes the repository root (`../escape.txt`). The restore + // must reject the traversal rather than copying the file outside the repo. + const checkpoint = await createGitCheckpoint({ + dataDir, + workspaceRoot: repoRoot, + threadId: 'thr_traversal' + }) + expect(checkpoint.ok).toBe(true) + if (!checkpoint.ok) throw new Error(checkpoint.message) + + const checkpointDir = join(dataDir, 'git-checkpoints', checkpoint.checkpointId) + const metadataPath = join(checkpointDir, 'metadata.json') + const metadata = JSON.parse(await readFile(metadataPath, 'utf-8')) as { + untrackedFiles: string[] + [key: string]: unknown + } + metadata.untrackedFiles = ['../escape.txt'] + await writeFile(metadataPath, JSON.stringify(metadata, null, 2), 'utf-8') + + // Plant a payload at the smuggled source location inside the checkpoint + // untracked dir so the existence check would succeed without the guard. + const smuggledSource = join(checkpointDir, 'untracked', '..', 'escape.txt') + await writeFile(smuggledSource, 'escaped payload\n') + + // The destination the traversal would write to, OUTSIDE the repo. + const escapeTarget = join(repoRoot, '..', 'escape.txt') + + const restored = await restoreGitCheckpoint({ + dataDir, + checkpointId: checkpoint.checkpointId + }) + expect(restored.ok).toBe(false) + if (restored.ok) throw new Error('expected restore to be refused') + expect(restored.reason).toBe('error') + expect(restored.message).toMatch(/escapes the repository root/) + + // Nothing must have been written outside the repository. + await expect(stat(escapeTarget)).rejects.toThrow() + }) + + it('refuses to restore when a tampered checkpoint smuggles an absolute untracked path', async () => { + const checkpoint = await createGitCheckpoint({ + dataDir, + workspaceRoot: repoRoot, + threadId: 'thr_absolute' + }) + expect(checkpoint.ok).toBe(true) + if (!checkpoint.ok) throw new Error(checkpoint.message) + + const checkpointDir = join(dataDir, 'git-checkpoints', checkpoint.checkpointId) + const metadataPath = join(checkpointDir, 'metadata.json') + const metadata = JSON.parse(await readFile(metadataPath, 'utf-8')) as { + untrackedFiles: string[] + [key: string]: unknown + } + metadata.untrackedFiles = ['/tmp/escape-absolute.txt'] + await writeFile(metadataPath, JSON.stringify(metadata, null, 2), 'utf-8') + + const restored = await restoreGitCheckpoint({ + dataDir, + checkpointId: checkpoint.checkpointId + }) + expect(restored.ok).toBe(false) + if (restored.ok) throw new Error('expected restore to be refused') + expect(restored.reason).toBe('error') + expect(restored.message).toMatch(/invalid untracked path|escapes the repository root/) + }) + + it('resolvePathWithinRepository rejects a path that escapes via an in-repo symlink', async () => { + // An in-repo symlink `repo/link -> /outside` makes the relative path + // `link/payload.txt` lexically contained (target startsWith repo+sep), but + // cp() would follow the link and write OUTSIDE the repo. The helper must + // resolve the target's real path (walking up to the existing symlink dir) + // and reject the escape. This is the regression for the symlink-anchored + // traversal that a lexical-only check misses. + const outsideDir = join(sandbox, 'outside') + await mkdir(outsideDir, { recursive: true }) + await symlink(outsideDir, join(repoRoot, 'link'), 'dir') + + await expect( + testResolvePathWithinRepository(repoRoot, 'link/payload.txt') + ).rejects.toThrow(/escapes the repository root/) + // And nothing should have been created outside the repo. + await expect(stat(join(outsideDir, 'payload.txt'))).rejects.toThrow() + }) + + it('resolvePathWithinRepository accepts a legitimate path inside the repo', async () => { + const { realpath } = await import('node:fs/promises') + await mkdir(join(repoRoot, 'sub'), { recursive: true }) + // The helper anchors against the realpath'd root (macOS /var -> /private/var), + // so compare against the canonical root, not the lexical repoRoot. + const repoReal = await realpath(repoRoot) + await expect( + testResolvePathWithinRepository(repoRoot, 'sub/file.txt') + ).resolves.toBe(normalize(join(repoReal, 'sub', 'file.txt'))) + }) + + it('resolvePathWithinRepository rejects traversal, absolute, and null-byte paths', async () => { + await expect(testResolvePathWithinRepository(repoRoot, '../escape.txt')).rejects.toThrow() + await expect(testResolvePathWithinRepository(repoRoot, '/tmp/escape.txt')).rejects.toThrow() + await expect(testResolvePathWithinRepository(repoRoot, 'evil\0.txt')).rejects.toThrow() + await expect(testResolvePathWithinRepository(repoRoot, '.')).rejects.toThrow() + await expect(testResolvePathWithinRepository(repoRoot, '..')).rejects.toThrow() + }) + + it('refuses to restore while a thread is running and leaves the working tree untouched', async () => { + const checkpoint = await createGitCheckpoint({ + dataDir, + workspaceRoot: repoRoot, + threadId: 'thr_busy' + }) + expect(checkpoint.ok).toBe(true) + if (!checkpoint.ok) throw new Error(checkpoint.message) + + // Mutate the working tree after the checkpoint so we can assert the restore + // did NOT clobber it. If the busy guard fails open, these changes vanish. + await writeFile(join(repoRoot, 'tracked.txt'), 'agent editing\n') + await writeFile(join(repoRoot, 'post-checkpoint.txt'), 'should survive\n') + const headBefore = execFileSync('git', ['-C', repoRoot, 'rev-parse', 'HEAD'], { encoding: 'utf-8' }).trim() + + const runtimeRequest = vi.fn(async () => ({ + ok: true, + status: 200, + // ThreadSummary exposes `status` (idle|running|archived|deleted), NOT + // `state`. A running thread must be reported as status === 'running'. + body: JSON.stringify({ threads: [{ id: 'thr_running', status: 'running' }] }) + })) + + const restored = await restoreGitCheckpoint({ + dataDir, + checkpointId: checkpoint.checkpointId, + runtimeRequest + }) + + expect(restored.ok).toBe(false) + if (restored.ok) throw new Error('expected restore to be refused') + expect(restored.reason).toBe('error') + expect(restored.message).toMatch(/Cannot restore checkpoint while a thread is running/) + + // The busy guard must fire BEFORE any destructive git op, so the runtime + // probe is the only call made and the working tree is byte-for-byte intact. + expect(runtimeRequest).toHaveBeenCalledTimes(1) + expect(await readFile(join(repoRoot, 'tracked.txt'), 'utf-8')).toBe('agent editing\n') + expect(await readFile(join(repoRoot, 'post-checkpoint.txt'), 'utf-8')).toBe('should survive\n') + expect(execFileSync('git', ['-C', repoRoot, 'rev-parse', 'HEAD'], { encoding: 'utf-8' }).trim()).toBe(headBefore) + }) + + it('restores when the runtime reports all threads idle (runtimeRequest exercised)', async () => { + const checkpoint = await createGitCheckpoint({ + dataDir, + workspaceRoot: repoRoot, + threadId: 'thr_idle' + }) + expect(checkpoint.ok).toBe(true) + if (!checkpoint.ok) throw new Error(checkpoint.message) + + await writeFile(join(repoRoot, 'tracked.txt'), 'changed after checkpoint\n') + await writeFile(join(repoRoot, 'new-after.txt'), 'new\n') + + const runtimeRequest = vi.fn(async () => ({ + ok: true, + status: 200, + body: JSON.stringify({ threads: [{ id: 'thr_a', status: 'idle' }, { id: 'thr_b', status: 'archived' }] }) + })) + + const restored = await restoreGitCheckpoint({ + dataDir, + checkpointId: checkpoint.checkpointId, + runtimeRequest + }) + expect(restored.ok).toBe(true) + if (!restored.ok) throw new Error(restored.message) + // The guard ran and let the restore proceed. + expect(runtimeRequest).toHaveBeenCalledTimes(1) + // Restore rewound the tracked file to its checkpoint content and removed the + // post-checkpoint untracked file (git clean -fd). + expect(await readFile(join(repoRoot, 'tracked.txt'), 'utf-8')).toBe('base\n') + await expect(stat(join(repoRoot, 'new-after.txt'))).rejects.toThrow() + }) + + it('deletes checkpoint directories that are not referenced by thread data', async () => { + const used = 'gcp_used' + const unused = 'gcp_unused' + await mkdir(join(dataDir, 'git-checkpoints', used), { recursive: true }) + await mkdir(join(dataDir, 'git-checkpoints', unused), { recursive: true }) + await mkdir(join(dataDir, 'threads', 'thr_1'), { recursive: true }) + await writeFile( + join(dataDir, 'threads', 'thr_1', 'items.jsonl'), + `${JSON.stringify({ id: 'item_1', workspaceCheckpointId: used })}\n`, + 'utf-8' + ) + + const result = await cleanupUnusedGitCheckpoints({ dataDir, graceMs: 0 }) + + expect(result.scanned).toBe(2) + expect(result.kept).toBe(1) + expect(result.deleted).toBe(1) + expect(result.deletedIds).toEqual([unused]) + await expect(stat(join(dataDir, 'git-checkpoints', used))).resolves.toBeTruthy() + await expect(stat(join(dataDir, 'git-checkpoints', unused))).rejects.toThrow() + }) + + it('keeps recently created checkpoints (create-vs-flush grace) and deletes old ones', async () => { + const fresh = 'gcp_fresh' + const stale = 'gcp_stale' + await mkdir(join(dataDir, 'git-checkpoints', fresh), { recursive: true }) + await mkdir(join(dataDir, 'git-checkpoints', stale), { recursive: true }) + // Backdate the stale checkpoint beyond the grace window; the fresh one keeps + // its just-now mtime and must not be deleted (its referencing item may not + // be flushed yet). + const old = new Date('2020-01-01T00:00:00.000Z') + await utimes(join(dataDir, 'git-checkpoints', stale), old, old) + + const result = await cleanupUnusedGitCheckpoints({ dataDir, graceMs: 10 * 60 * 1000 }) + + expect(result.deletedIds).toEqual([stale]) + expect(result.kept).toBe(1) + await expect(stat(join(dataDir, 'git-checkpoints', fresh))).resolves.toBeTruthy() + await expect(stat(join(dataDir, 'git-checkpoints', stale))).rejects.toThrow() + }) + + it('records cleanup state and skips runs before the configured interval elapses', async () => { + await mkdir(join(dataDir, 'git-checkpoints', 'gcp_first'), { recursive: true }) + + const first = await cleanupUnusedGitCheckpointsIfDue({ + dataDir, + intervalDays: 3, + graceMs: 0, + now: new Date('2026-01-01T00:00:00.000Z') + }) + expect(first.due).toBe(true) + if (!first.due) throw new Error('expected cleanup to run') + expect(first.result.deletedIds).toEqual(['gcp_first']) + + await mkdir(join(dataDir, 'git-checkpoints', 'gcp_second'), { recursive: true }) + const skipped = await cleanupUnusedGitCheckpointsIfDue({ + dataDir, + intervalDays: 3, + graceMs: 0, + now: new Date('2026-01-03T23:59:59.000Z') + }) + expect(skipped.due).toBe(false) + await expect(stat(join(dataDir, 'git-checkpoints', 'gcp_second'))).resolves.toBeTruthy() + + const second = await cleanupUnusedGitCheckpointsIfDue({ + dataDir, + intervalDays: 3, + graceMs: 0, + now: new Date('2026-01-04T00:00:00.000Z') + }) + expect(second.due).toBe(true) + if (!second.due) throw new Error('expected cleanup to run after interval') + expect(second.result.deletedIds).toEqual(['gcp_second']) + }) }) diff --git a/src/main/services/git-checkpoint-service.ts b/src/main/services/git-checkpoint-service.ts index c96700501..562681eaa 100644 --- a/src/main/services/git-checkpoint-service.ts +++ b/src/main/services/git-checkpoint-service.ts @@ -1,5 +1,6 @@ -import { cp, mkdir, readFile, rm, stat, writeFile } from 'node:fs/promises' -import { dirname, join, resolve } from 'node:path' +import { cp, mkdir, readFile, readdir, realpath, rm, stat, writeFile } from 'node:fs/promises' +import type { Dirent } from 'node:fs' +import { dirname, basename, extname, isAbsolute, join, normalize, resolve, sep } from 'node:path' import { randomUUID } from 'node:crypto' import { runGit, resolveGitCwd } from './git-service' import type { @@ -18,6 +19,27 @@ type GitCheckpointMetadata = { untrackedFiles: string[] } +export type GitCheckpointCleanupResult = { + scanned: number + kept: number + deleted: number + failed: number + deletedIds: string[] + failedIds: string[] +} + +export type GitCheckpointCleanupDueResult = + | { due: false, lastRunAt: string | null } + | { due: true, lastRunAt: string, result: GitCheckpointCleanupResult } + +type GitCheckpointCleanupState = { + lastRunAt?: string +} + +const DAY_MS = 24 * 60 * 60 * 1_000 +const CHECKPOINT_CLEANUP_STATE_FILE = '.cleanup.json' +const CHECKPOINT_REFERENCE_FILE_EXTENSIONS = new Set(['.json', '.jsonl']) + function checkpointFailure(error: unknown): Extract { const message = error instanceof Error ? error.message : String(error) if (/not a git repository/i.test(message)) { @@ -38,6 +60,14 @@ function checkpointDir(dataDir: string, checkpointId: string): string { return join(resolve(dataDir), 'git-checkpoints', checkpointId) } +function checkpointRootDir(dataDir: string): string { + return join(resolve(dataDir), 'git-checkpoints') +} + +function checkpointCleanupStatePath(dataDir: string): string { + return join(checkpointRootDir(dataDir), CHECKPOINT_CLEANUP_STATE_FILE) +} + function checkpointHeadBundlePath(dataDir: string, checkpointId: string): string { return join(checkpointDir(dataDir, checkpointId), 'head.bundle') } @@ -128,6 +158,286 @@ async function resolveRepositoryRoot(workspaceRoot: string): Promise { + // Reject empty / current / parent / absolute, plus null bytes and Windows + // drive-relative forms ("C:file") that bypass isAbsolute(). + if (!relativePath || relativePath === '.' || relativePath === '..' || isAbsolute(relativePath)) { + throw new Error(`invalid untracked path: ${relativePath}`) + } + if (relativePath.includes('\0') || /^[a-zA-Z]:/.test(relativePath)) { + throw new Error(`invalid untracked path: ${relativePath}`) + } + + const repoReal = await realpath(repositoryRoot) + const targetNormalized = normalize(join(repoReal, relativePath)) + // startsWith with a trailing separator prevents prefix attacks where + // repoReal is a textual prefix of an unrelated dir (e.g. "/repo" vs + // "/repo-evil"). Exact equality covers the (already-rejected) root case. + if (targetNormalized !== repoReal && !targetNormalized.startsWith(repoReal + sep)) { + throw new Error(`untracked path escapes the repository root: ${relativePath}`) + } + + // The lexical check above is necessary but NOT sufficient: an in-repo + // symlink (e.g. repo/link -> /outside) makes `link/payload.txt` lexically + // contained while cp() follows the link and writes outside the repo. Resolve + // the target via realpath to defeat any symlink on the path. The target may + // not exist yet (cp creates it), so when the direct realpath fails with + // ENOENT we canonicalize the nearest existing ancestor (the parent dir) and + // re-join the remaining suffix, then re-assert containment on the resolved + // pair. Any other realpath failure (EACCES/ELOOP/ENOTDIR/…) fails closed. + const targetReal = await resolveSymlinkSafe(targetNormalized) + if (targetReal !== repoReal && !targetReal.startsWith(repoReal + sep)) { + throw new Error(`untracked path escapes the repository root: ${relativePath}`) + } + + // Return the lexical target so downstream mkdir/cp operate on the path the + // caller asked for; the escape check above already proved it cannot leave + // the repository root through any symlink on the path. + return targetNormalized +} + +/** + * Exported for tests. Validates an untracked-file relative path (from + * persisted metadata) stays inside `repositoryRoot`, defeating `..`, + * absolute, drive-relative, null-byte, AND in-repo-symlink escapes. + */ +export async function testResolvePathWithinRepository( + repositoryRoot: string, + relativePath: string +): Promise { + return resolvePathWithinRepository(repositoryRoot, relativePath) +} + +/** + * Canonicalizes `lexicalPath`, tolerating a not-yet-existing leaf (the + * write/create case) by realpath-ing the nearest existing ancestor and + * re-joining the non-existent suffix. Fail-closed on realpath errors other + * than ENOENT. Mirrors the approach used by the workspace tool escape check. + */ +async function resolveSymlinkSafe(lexicalPath: string): Promise { + const direct = await safeRealpath(lexicalPath) + if (direct !== null) return direct + const segments: string[] = [] + let current = lexicalPath + let ancestor: string | null = null + for (let i = 0; i < 128 && current !== dirname(current); i += 1) { + const resolved = await safeRealpath(current) + if (resolved !== null) { + ancestor = resolved + break + } + segments.unshift(basename(current)) + current = dirname(current) + } + if (ancestor === null) { + throw new Error(`cannot canonicalize path (no existing ancestor): ${lexicalPath}`) + } + return segments.length > 0 ? normalize(join(ancestor, ...segments)) : ancestor +} + +async function safeRealpath(target: string): Promise { + try { + return await realpath(target) + } catch (error) { + const code = (error as NodeJS.ErrnoException).code + if (code === 'ENOENT' || code === 'EACCES' || code === 'ELOOP' || code === 'ENOTDIR') { + return null + } + throw error + } +} + +/** + * Lexical containment check used against an already-realpath'd base (the + * checkpoint untracked dir, whose realpath may be a fallback when the dir is + * absent). Shares the same rejection rules as {@link resolvePathWithinRepository} + * so a traversal path cannot slip through on the source side. + */ +function isValidWithinBase(relativePath: string, baseReal: string): boolean { + if (!relativePath || relativePath === '.' || relativePath === '..' || isAbsolute(relativePath)) { + return false + } + if (relativePath.includes('\0') || /^[a-zA-Z]:/.test(relativePath)) { + return false + } + const targetNormalized = normalize(join(baseReal, relativePath)) + return targetNormalized === baseReal || targetNormalized.startsWith(baseReal + sep) +} + +function extractWorkspaceCheckpointIds(text: string): Set { + const ids = new Set() + const pattern = /"workspaceCheckpointId"\s*:\s*"([^"]+)"/g + let match: RegExpExecArray | null = null + while ((match = pattern.exec(text)) !== null) { + const id = match[1]?.trim() + if (id) ids.add(id) + } + return ids +} + +async function collectReferencedCheckpointIds(dataDir: string): Promise> { + const referenced = new Set() + const roots = [join(resolve(dataDir), 'threads')] + const visit = async (dir: string): Promise => { + let entries: Dirent[] + try { + entries = await readdir(dir, { withFileTypes: true }) + } catch (error) { + const code = (error as NodeJS.ErrnoException).code + if (code === 'ENOENT' || code === 'ENOTDIR') return + throw error + } + + for (const entry of entries) { + const path = join(dir, entry.name) + if (entry.isDirectory()) { + await visit(path) + continue + } + if (!entry.isFile() || !CHECKPOINT_REFERENCE_FILE_EXTENSIONS.has(extname(entry.name))) continue + let text = '' + try { + text = await readFile(path, 'utf-8') + } catch { + continue + } + for (const id of extractWorkspaceCheckpointIds(text)) { + referenced.add(id) + } + } + } + + for (const root of roots) { + await visit(root) + } + return referenced +} + +async function readCleanupState(dataDir: string): Promise { + try { + const raw = await readFile(checkpointCleanupStatePath(dataDir), 'utf-8') + const parsed = JSON.parse(raw) as GitCheckpointCleanupState + return typeof parsed === 'object' && parsed !== null ? parsed : {} + } catch { + return {} + } +} + +async function writeCleanupState(dataDir: string, state: GitCheckpointCleanupState): Promise { + const root = checkpointRootDir(dataDir) + await mkdir(root, { recursive: true }) + await writeFile(checkpointCleanupStatePath(dataDir), JSON.stringify(state, null, 2), 'utf-8') +} + +function isCheckpointCleanupDue(lastRunAt: string | undefined, intervalDays: number, now: Date): boolean { + if (!lastRunAt) return true + const lastRunMs = Date.parse(lastRunAt) + if (!Number.isFinite(lastRunMs)) return true + return now.getTime() - lastRunMs >= intervalDays * DAY_MS +} + +// A checkpoint directory is created before its referencing thread item is +// flushed to disk, so a freshly-created checkpoint can momentarily look +// unreferenced. Skip directories modified within this window so a cleanup pass +// landing in that gap can't delete a checkpoint a concurrent turn just created; +// a genuinely orphaned one is removed on a later pass. Injectable so tests can +// disable it with graceMs: 0. +const CHECKPOINT_CLEANUP_GRACE_MS = 10 * 60 * 1_000 + +export async function cleanupUnusedGitCheckpoints(params: { + dataDir: string + graceMs?: number + now?: Date +}): Promise { + const graceMs = params.graceMs ?? CHECKPOINT_CLEANUP_GRACE_MS + const nowMs = (params.now ?? new Date()).getTime() + const root = checkpointRootDir(params.dataDir) + const referenced = await collectReferencedCheckpointIds(params.dataDir) + const result: GitCheckpointCleanupResult = { + scanned: 0, + kept: 0, + deleted: 0, + failed: 0, + deletedIds: [], + failedIds: [] + } + + let entries: Dirent[] + try { + entries = await readdir(root, { withFileTypes: true }) + } catch (error) { + const code = (error as NodeJS.ErrnoException).code + if (code === 'ENOENT' || code === 'ENOTDIR') return result + throw error + } + + for (const entry of entries) { + if (!entry.isDirectory()) continue + const checkpointId = entry.name + result.scanned += 1 + if (referenced.has(checkpointId)) { + result.kept += 1 + continue + } + if (graceMs > 0) { + try { + const dirStat = await stat(join(root, checkpointId)) + if (nowMs - dirStat.mtimeMs < graceMs) { + // Recently touched — may be referenced by an item not yet flushed. + result.kept += 1 + continue + } + } catch { + // Cannot stat (e.g. removed concurrently); fall through to the delete. + } + } + try { + await rm(join(root, checkpointId), { recursive: true, force: true }) + result.deleted += 1 + result.deletedIds.push(checkpointId) + } catch { + result.failed += 1 + result.failedIds.push(checkpointId) + } + } + + return result +} + +export async function cleanupUnusedGitCheckpointsIfDue(params: { + dataDir: string + intervalDays: number + now?: Date + graceMs?: number +}): Promise { + const now = params.now ?? new Date() + const state = await readCleanupState(params.dataDir) + const lastRunAt = typeof state.lastRunAt === 'string' ? state.lastRunAt : undefined + if (!isCheckpointCleanupDue(lastRunAt, params.intervalDays, now)) { + return { due: false, lastRunAt: lastRunAt ?? null } + } + const result = await cleanupUnusedGitCheckpoints({ dataDir: params.dataDir, now, graceMs: params.graceMs }) + const nextLastRunAt = now.toISOString() + await writeCleanupState(params.dataDir, { lastRunAt: nextLastRunAt }) + return { due: true, lastRunAt: nextLastRunAt, result } +} + export async function createGitCheckpoint(params: { dataDir: string workspaceRoot: string @@ -191,6 +501,14 @@ export async function createGitCheckpoint(params: { export async function restoreGitCheckpoint(params: { dataDir: string checkpointId: string + /** + * Optional runtime bridge used to verify that no thread is mid-turn before + * running the destructive `git reset --hard` / `git clean -fd`. When omitted + * (e.g. from existing callers and unit tests) the check is skipped and the + * function behaves as before. When provided, a non-ok response or any thrown + * error fails closed: the restore is refused rather than proceeding. + */ + runtimeRequest?: (path: string, init: { method?: string; body?: string }) => Promise<{ ok: boolean; status: number; body: string }> }): Promise { const checkpointId = params.checkpointId.trim() const metadata = await readMetadata(params.dataDir, checkpointId) @@ -202,6 +520,46 @@ export async function restoreGitCheckpoint(params: { await assertNoUnmerged(repositoryRoot) const targetRef = await resolveCheckpointTarget(repositoryRoot, params.dataDir, metadata) + // Busy guard: a checkpoint restore runs `git reset --hard` + `git clean + // -fd`, which would destroy files the agent is actively editing. Before + // those destructive ops, ask the runtime whether any thread is currently + // running a turn. `GET /v1/threads` serializes ThreadSummary, whose only + // activity-relevant field is `status` with the enum + // `idle | running | archived | deleted`; a thread is busy exactly when its + // status is `running`. Fail closed if the runtime cannot be queried. + // + // (An earlier version of this guard read a non-existent `thread.state` + // field and compared it against turn-level states that never appear on a + // thread summary; that made the guard a no-op and the race still fired.) + if (params.runtimeRequest) { + try { + const response = await params.runtimeRequest('/v1/threads?limit=500&include=side', { method: 'GET' }) + if (!response.ok) { + return { + ok: false, + reason: 'error', + message: 'Cannot verify runtime state before checkpoint restore. Please ensure the runtime is healthy and try again.' + } + } + const data = JSON.parse(response.body) as { threads?: Array<{ status?: string }> } + const hasRunning = data.threads?.some((thread) => thread.status === 'running') + if (hasRunning) { + return { + ok: false, + reason: 'error', + message: 'Cannot restore checkpoint while a thread is running. Please wait for the current turn to finish.' + } + } + } catch (error) { + const message = error instanceof Error ? error.message : String(error) + return { + ok: false, + reason: 'error', + message: `Cannot verify runtime state before checkpoint restore: ${message}` + } + } + } + const rescue = await createGitCheckpoint({ dataDir: params.dataDir, workspaceRoot: repositoryRoot, @@ -223,12 +581,33 @@ export async function restoreGitCheckpoint(params: { await applyPatchIfPresent(repositoryRoot, join(dir, 'staged.patch'), true) await applyPatchIfPresent(repositoryRoot, join(dir, 'unstaged.patch'), false) + const checkpointUntrackedDir = join(dir, 'untracked') + // The untracked dir is created at checkpoint time but may legitimately be + // absent on old checkpoints that had no untracked files. realpath() would + // throw ENOENT, so canonicalize tolerantly for this non-security-critical + // anchor (the per-path escape check below still runs). + let checkpointUntrackedReal: string + try { + checkpointUntrackedReal = await realpath(checkpointUntrackedDir) + } catch { + checkpointUntrackedReal = normalize(checkpointUntrackedDir) + } + for (const relativePath of metadata.untrackedFiles) { - const from = join(dir, 'untracked', relativePath) - if (!(await fileExists(from))) continue - const to = join(repositoryRoot, relativePath) - await mkdir(dirname(to), { recursive: true }) - await cp(from, to, { recursive: true, force: true, errorOnExist: false }) + // `relativePath` comes from persisted, untrusted metadata. Validate it + // stays inside the repository root (rejecting `..`, absolute, drive + // forms, null bytes) and inside the checkpoint's untracked dir. Both + // checks run through realpath/normalize so symlinks cannot redirect the + // copy outside the validated roots. + const targetWithinRepo = await resolvePathWithinRepository(repositoryRoot, relativePath) + if (!isValidWithinBase(relativePath, checkpointUntrackedReal)) { + throw new Error(`untracked path escapes the checkpoint directory: ${relativePath}`) + } + const sourceWithinCheckpoint = normalize(join(checkpointUntrackedReal, relativePath)) + + if (!(await fileExists(sourceWithinCheckpoint))) continue + await mkdir(dirname(targetWithinRepo), { recursive: true }) + await cp(sourceWithinCheckpoint, targetWithinRepo, { recursive: true, force: true, errorOnExist: false }) } return { diff --git a/src/main/services/skill-service.test.ts b/src/main/services/skill-service.test.ts index d3665acd7..f41dd0072 100644 --- a/src/main/services/skill-service.test.ts +++ b/src/main/services/skill-service.test.ts @@ -13,7 +13,12 @@ import { defaultTerminalSettings, type AppSettingsV1 } from '../../shared/app-settings' -import { guiSkillRootsForRuntime, listGuiSkillRoots, listGuiSkills } from './skill-service' +import { + guiSkillRootsForRuntime, + isCodexPluginCacheRoot, + listGuiSkillRoots, + listGuiSkills +} from './skill-service' vi.mock('node:os', async (importOriginal) => { const actual = await importOriginal() @@ -210,6 +215,29 @@ describe('skill-service', () => { .not.toContain(comparable(pluginRoot)) }) + it('stops scanning Codex plugin caches when global-codex is disabled', async () => { + const workspaceRoot = join(tempRoot, 'ws-plugin-global') + const pluginRoot = join(tempRoot, '.codex', 'plugins', 'cache', 'gmail', '1.0', 'skills') + await mkdir(join(pluginRoot, 'gmail'), { recursive: true }) + await writeFile(join(pluginRoot, 'gmail', 'SKILL.md'), ['---', 'name: gmail', '---'].join('\n'), 'utf8') + + const settings = createSettings(workspaceRoot) + // Plugin caches are on by default... + expect((await guiSkillRootsForRuntime(settings, workspaceRoot)).map((root) => comparable(root.path))) + .toContain(comparable(pluginRoot)) + + // ...and disabling the Codex global root toggle takes them all down with it. + settings.claw.skills.disabledDirs = ['global-codex'] + expect((await guiSkillRootsForRuntime(settings, workspaceRoot)).map((root) => comparable(root.path))) + .not.toContain(comparable(pluginRoot)) + }) + + it('recognizes roots under ~/.codex/plugins/cache as Codex plugin caches', () => { + expect(isCodexPluginCacheRoot(join(tempRoot, '.codex', 'plugins', 'cache', 'vercel', '2.1', 'skills'))).toBe(true) + expect(isCodexPluginCacheRoot(join(tempRoot, '.codex', 'skills'))).toBe(false) + expect(isCodexPluginCacheRoot(join(tempRoot, '.kun', 'skills'))).toBe(false) + }) + it('rejects a skill.json whose entry escapes the package directory (path traversal)', async () => { const workspaceRoot = join(tempRoot, 'ws-traversal') const skillRoot = join(workspaceRoot, '.claude', 'skills', 'evil') @@ -261,11 +289,12 @@ describe('skill-service', () => { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot, log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/services/skill-service.ts b/src/main/services/skill-service.ts index b5275b877..075126d18 100644 --- a/src/main/services/skill-service.ts +++ b/src/main/services/skill-service.ts @@ -84,10 +84,17 @@ export async function guiSkillRootsForRuntime( const projectCommon = candidates.filter((c) => c.source === 'common' && c.scope === 'project') const globalCommon = candidates.filter((c) => c.source === 'common' && c.scope === 'global') const extra = candidates.filter((c) => c.source === 'extra') - const pluginRoots = (await discoverCodexPluginSkillRoots()) - .filter((root) => existsSync(root)) - .filter((root) => !disabled.has(comparablePath(root))) - .map((path) => ({ path, scope: 'global' as const })) + // Codex plugin caches follow the `global-codex` toggle: disabling the Codex + // global root (~/.codex/skills) also stops scanning ~/.codex/plugins/cache/** + // (#392). The per-path filter is kept so a hand-added exact plugin path in + // `disabledDirs` still works. + const codexPluginsDisabled = disabled.has('global-codex') + const pluginRoots = codexPluginsDisabled + ? [] + : (await discoverCodexPluginSkillRoots()) + .filter((root) => existsSync(root)) + .filter((root) => !disabled.has(comparablePath(root))) + .map((path) => ({ path, scope: 'global' as const })) return uniqueSkillRoots([ ...projectCommon.map(toGuiSkillRoot), @@ -97,11 +104,19 @@ export async function guiSkillRootsForRuntime( ]) } +export function guiSkillWorkspaceRootsForRuntime( + settings: AppSettingsV1 | undefined, + workspaceRootOverride?: string +): string[] { + return collectWorkspaceRoots(settings, workspaceRootOverride) +} + /** * Full list of detected common skill directories + configured extra dirs for * the settings UI, including ones the user disabled or that do not exist yet, - * annotated with skill counts and enabled state. Codex plugin caches are - * always-on and intentionally excluded from this user-toggleable list. + * annotated with skill counts and enabled state. Codex plugin caches are not + * listed as a separate row; they follow the `global-codex` toggle (disabling + * the Codex global root also stops scanning ~/.codex/plugins/cache/**). */ export async function listGuiSkillRoots( settings: AppSettingsV1, @@ -297,10 +312,26 @@ export function normalizeSkillRootPath(path: string | undefined): string { async function discoverCodexPluginSkillRoots(): Promise { const roots: string[] = [] - await collectSkillRoots(join(homedir(), '.codex', 'plugins', 'cache'), roots, 0, 5) + await collectSkillRoots(codexPluginCacheBase(), roots, 0, 5) return roots } +function codexPluginCacheBase(): string { + return join(homedir(), '.codex', 'plugins', 'cache') +} + +/** + * Whether a persisted root lives under `~/.codex/plugins/cache/`. The runtime + * config builder treats these as GUI-managed (auto-discovered) so stale version + * directories left behind by a plugin upgrade are dropped instead of lingering + * in `roots` forever and emitting ENOENT validation errors (#392). + */ +export function isCodexPluginCacheRoot(path: string): boolean { + const comparable = comparablePath(path) + const base = comparablePath(codexPluginCacheBase()) + return comparable === base || comparable.startsWith(`${base}/`) +} + async function collectSkillRoots(root: string, roots: string[], depth: number, maxDepth: number): Promise { if (depth > maxDepth || !existsSync(root)) return if (basename(root) === 'skills' && skillRootHasPackages(root)) { diff --git a/src/main/services/write-inline-completion-service.test.ts b/src/main/services/write-inline-completion-service.test.ts index 186d3ad1e..453c45f46 100644 --- a/src/main/services/write-inline-completion-service.test.ts +++ b/src/main/services/write-inline-completion-service.test.ts @@ -29,7 +29,7 @@ function createSettings(patch: Partial { charStart: 0 }) expect(result.pages[0].text).toContain('PDF BM25 keyword retrieval context') - }) + }, 15_000) + + it('extracts text for local PDF attachments', async () => { + const workspaceRoot = await mkdtemp(join(tmpdir(), 'ds-gui-local-pdf-text-')) + const pdfPath = join(workspaceRoot, 'fixture.pdf') + await writeFile(pdfPath, createSimpleTextPdf('Local PDF attachment text')) + + const result = await readLocalPdfText({ path: pdfPath }) + + expect(result.ok).toBe(true) + if (!result.ok) return + expect(result.pageCount).toBe(1) + expect(result.hasText).toBe(true) + expect(result.pages[0]?.text).toContain('Local PDF attachment text') + }, 15_000) }) diff --git a/src/main/services/write-pdf-text-service.ts b/src/main/services/write-pdf-text-service.ts index e0a172791..b96032494 100644 --- a/src/main/services/write-pdf-text-service.ts +++ b/src/main/services/write-pdf-text-service.ts @@ -133,27 +133,18 @@ async function extractPdfText( export async function readWritePdfText(payload: WorkspaceFileTarget): Promise { try { const targetPath = await resolveOpenTargetPath(payload.path, payload.workspaceRoot) - const fileInfo = await stat(targetPath) - if (fileInfo.isDirectory()) return { ok: false, message: 'Cannot read text from a directory.' } - if (fileInfo.size > MAX_PDF_TEXT_BYTES) { - return { ok: false, message: 'This PDF is too large to parse in Write mode.' } - } - if (extname(targetPath).toLowerCase() !== '.pdf') { - return { ok: false, message: 'This file is not a PDF document.' } + return readLocalPdfTextByPath(targetPath, 'This PDF is too large to parse in Write mode.') + } catch (error) { + return { + ok: false, + message: error instanceof Error ? error.message : String(error) } + } +} - const cacheKey = `${targetPath}:${fileInfo.size}:${fileInfo.mtimeMs}` - const cached = pdfTextCache.get(cacheKey) - if (cached) return cached - - const pending = extractPdfText(targetPath, fileInfo.size, fileInfo.mtimeMs).finally(() => { - if (pdfTextCache.size > 32) { - const oldest = pdfTextCache.keys().next().value - if (oldest) pdfTextCache.delete(oldest) - } - }) - pdfTextCache.set(cacheKey, pending) - return pending +export async function readLocalPdfText(payload: { path: string }): Promise { + try { + return readLocalPdfTextByPath(payload.path, 'This PDF is too large to attach.') } catch (error) { return { ok: false, @@ -162,6 +153,30 @@ export async function readWritePdfText(payload: WorkspaceFileTarget): Promise { + const fileInfo = await stat(targetPath) + if (fileInfo.isDirectory()) return { ok: false, message: 'Cannot read text from a directory.' } + if (fileInfo.size > MAX_PDF_TEXT_BYTES) { + return { ok: false, message: tooLargeMessage } + } + if (extname(targetPath).toLowerCase() !== '.pdf') { + return { ok: false, message: 'This file is not a PDF document.' } + } + + const cacheKey = `${targetPath}:${fileInfo.size}:${fileInfo.mtimeMs}` + const cached = pdfTextCache.get(cacheKey) + if (cached) return cached + + const pending = extractPdfText(targetPath, fileInfo.size, fileInfo.mtimeMs).finally(() => { + if (pdfTextCache.size > 32) { + const oldest = pdfTextCache.keys().next().value + if (oldest) pdfTextCache.delete(oldest) + } + }) + pdfTextCache.set(cacheKey, pending) + return pending +} + export function clearWritePdfTextCache(): void { pdfTextCache.clear() } diff --git a/src/main/settings-store.test.ts b/src/main/settings-store.test.ts index e42585148..bda167787 100644 --- a/src/main/settings-store.test.ts +++ b/src/main/settings-store.test.ts @@ -2,7 +2,12 @@ import { mkdir, mkdtemp, readFile, readdir, rm, stat, writeFile } from 'node:fs/ import { tmpdir } from 'node:os' import { join } from 'node:path' import { describe, expect, it } from 'vitest' -import { DEFAULT_APPROVAL_POLICY, defaultKunRuntimeSettings, defaultModelProviderSettings } from '../shared/app-settings' +import { + DEFAULT_APPROVAL_POLICY, + DEFAULT_CHECKPOINT_CLEANUP_INTERVAL_DAYS, + defaultKunRuntimeSettings, + defaultModelProviderSettings +} from '../shared/app-settings' import { DEFAULT_GUI_UPDATE_CHANNEL } from '../shared/gui-update' import { JsonSettingsStore } from './settings-store' @@ -15,6 +20,9 @@ describe('JsonSettingsStore', () => { expect(loaded.guiUpdate.channel).toBe(DEFAULT_GUI_UPDATE_CHANNEL) expect(loaded.agents.kun.approvalPolicy).toBe(DEFAULT_APPROVAL_POLICY) + expect(loaded.checkpointCleanup.intervalDays).toBe(DEFAULT_CHECKPOINT_CLEANUP_INTERVAL_DAYS) + // Checkpoint cleanup deletes data, so it must be opt-in (disabled by default). + expect(loaded.checkpointCleanup.enabled).toBe(false) expect(loaded.appBehavior).toEqual({ openAtLogin: false, startMinimized: false, @@ -23,6 +31,19 @@ describe('JsonSettingsStore', () => { }) }) + it('patches and normalizes checkpoint cleanup settings', async () => { + const userDataDir = await mkdtemp(join(tmpdir(), 'ds-gui-settings-')) + + const store = new JsonSettingsStore(userDataDir) + const patched = await store.patch({ checkpointCleanup: { intervalDays: 5 } }) + expect(patched.checkpointCleanup.intervalDays).toBe(5) + + const clamped = await store.patch({ + checkpointCleanup: { intervalDays: 99 as unknown as typeof patched.checkpointCleanup.intervalDays } + }) + expect(clamped.checkpointCleanup.intervalDays).toBe(10) + }) + it('creates a default write workspace with welcome.md', async () => { const userDataDir = await mkdtemp(join(tmpdir(), 'ds-gui-settings-')) diff --git a/src/main/settings-store.ts b/src/main/settings-store.ts index 47bb3c326..f76e292ee 100644 --- a/src/main/settings-store.ts +++ b/src/main/settings-store.ts @@ -6,6 +6,8 @@ import { applyKunRuntimePatch, kunSettingsEnvelope, DEFAULT_GUI_UPDATE_CHANNEL, + DEFAULT_CHECKPOINT_CLEANUP_ENABLED, + DEFAULT_CHECKPOINT_CLEANUP_INTERVAL_DAYS, DEFAULT_CURSOR_SPOTLIGHT_COLOR, DEFAULT_LOG_RETENTION_DAYS, DEFAULT_WRITE_WORKSPACE_ROOT, @@ -25,7 +27,9 @@ import { mergeWriteSettings, defaultTerminalSettings, mergeTerminalSettings, + DEFAULT_UI_FONT_SCALE, normalizeAppBehaviorSettings, + normalizeCheckpointCleanupSettings, normalizeKeyboardShortcuts, migrateLegacyAppSettings, normalizeAppSettings, @@ -201,7 +205,7 @@ const defaultSettings = (): AppSettingsV1 => ({ version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: DEFAULT_UI_FONT_SCALE, cursorSpotlight: true, cursorSpotlightColor: DEFAULT_CURSOR_SPOTLIGHT_COLOR, provider: defaultModelProviderSettings(), @@ -213,6 +217,10 @@ const defaultSettings = (): AppSettingsV1 => ({ enabled: true, retentionDays: DEFAULT_LOG_RETENTION_DAYS }, + checkpointCleanup: { + enabled: DEFAULT_CHECKPOINT_CLEANUP_ENABLED, + intervalDays: DEFAULT_CHECKPOINT_CLEANUP_INTERVAL_DAYS + }, notifications: { turnComplete: true }, @@ -241,6 +249,10 @@ function buildMergedSettings(parsed: Partial): AppSettingsV1 { mergeKunRuntimeSettings(getKunRuntimeSettings(defaults), migrated.agents?.kun) ), log: { ...defaults.log, ...migrated.log }, + checkpointCleanup: normalizeCheckpointCleanupSettings({ + ...defaults.checkpointCleanup, + ...migrated.checkpointCleanup + }), notifications: { ...defaults.notifications, ...migrated.notifications }, appBehavior: mergeAppBehaviorSettings(defaults.appBehavior, migrated.appBehavior), keyboardShortcuts: normalizeKeyboardShortcuts(migrated.keyboardShortcuts), @@ -426,6 +438,10 @@ export class JsonSettingsStore { ...restPatch, provider: mergeModelProviderSettings(cur.provider, providerPatch), log: { ...cur.log, ...(partial.log ?? {}) }, + checkpointCleanup: normalizeCheckpointCleanupSettings({ + ...cur.checkpointCleanup, + ...(partial.checkpointCleanup ?? {}) + }), notifications: { ...cur.notifications, ...(partial.notifications ?? {}) }, appBehavior: mergeAppBehaviorSettings(cur.appBehavior, partial.appBehavior), keyboardShortcuts: normalizeKeyboardShortcuts({ diff --git a/src/main/upstream-models.test.ts b/src/main/upstream-models.test.ts index 05285a1a7..4db175f41 100644 --- a/src/main/upstream-models.test.ts +++ b/src/main/upstream-models.test.ts @@ -22,7 +22,7 @@ function settings(dataDir: string, model = 'settings-model'): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: { ...provider, providers: [ @@ -48,6 +48,7 @@ function settings(dataDir: string, model = 'settings-model'): AppSettingsV1 { }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/workflow-runtime.nodes.test.ts b/src/main/workflow-runtime.nodes.test.ts index 66503806c..d3db7367e 100644 --- a/src/main/workflow-runtime.nodes.test.ts +++ b/src/main/workflow-runtime.nodes.test.ts @@ -105,11 +105,12 @@ function buildSettings( version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: { ...defaultKunRuntimeSettings(), model: 'test-model', apiKey: 'test-key' } }, workspaceRoot: '/tmp/workflow-workspace', log: { enabled: true, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/main/workflow-runtime.run.test.ts b/src/main/workflow-runtime.run.test.ts index 48cb827b8..9b2273c46 100644 --- a/src/main/workflow-runtime.run.test.ts +++ b/src/main/workflow-runtime.run.test.ts @@ -46,11 +46,12 @@ function settingsWithWorkflows(workflows: WorkflowV1[], modules: WorkflowCustomM version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: { ...defaultKunRuntimeSettings(), model: 'test-model', apiKey: 'test-key' } }, workspaceRoot: '/tmp/workflow-workspace', log: { enabled: true, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/preload/index.ts b/src/preload/index.ts index e0cf5f235..5c5959760 100644 --- a/src/preload/index.ts +++ b/src/preload/index.ts @@ -134,6 +134,8 @@ const api = { ipcRenderer.invoke('file:read-workspace-image', options), readWorkspacePdf: (options) => ipcRenderer.invoke('file:read-workspace-pdf', options), + readLocalPdfText: (options) => + ipcRenderer.invoke('file:read-local-pdf-text', options), saveWorkspaceFileAs: (payload) => ipcRenderer.invoke('file:save-as', payload), writeWorkspaceFile: (payload) => diff --git a/src/renderer/src/agent/kun-contract.ts b/src/renderer/src/agent/kun-contract.ts index 8641ae6a4..f0e76abcd 100644 --- a/src/renderer/src/agent/kun-contract.ts +++ b/src/renderer/src/agent/kun-contract.ts @@ -16,12 +16,20 @@ export type CoreItemStatus = export type CoreThreadSummaryJson = { id: string title: string + /** Whether the title is auto/provisional (see ThreadSchema.titleAuto on the core). */ + titleAuto?: boolean + /** Optional whole-conversation summary produced by the summarize route. */ + summary?: string workspace?: string model: string mode: string status: CoreThreadStatus approvalPolicy?: string sandboxMode?: string + pinned?: boolean + providerId?: string + agentId?: string + systemPrompt?: string relation?: 'primary' | 'fork' | 'side' parentThreadId?: string forkedFromThreadId?: string @@ -535,6 +543,10 @@ export type CoreRuntimeEventJson = { changeKind?: 'additive' | 'breaking' toolNames?: string[] status?: string + /** thread_created / thread_updated: the thread's (possibly upgraded) title. */ + title?: string + /** thread_created / thread_updated: whether that title is auto/provisional. */ + titleAuto?: boolean stage?: | 'setup' | 'pre_start' diff --git a/src/renderer/src/agent/kun-mapper.ts b/src/renderer/src/agent/kun-mapper.ts index 1c37ae741..dd20c84f2 100644 --- a/src/renderer/src/agent/kun-mapper.ts +++ b/src/renderer/src/agent/kun-mapper.ts @@ -48,6 +48,8 @@ export function threadFromCore(thread: CoreThreadSummaryJson): NormalizedThread return { id: thread.id, title: thread.title?.trim() || thread.id.slice(0, 8), + ...(thread.titleAuto !== undefined ? { titleAuto: thread.titleAuto } : {}), + ...(thread.summary?.trim() ? { summary: thread.summary.trim() } : {}), updatedAt: thread.updatedAt, model: thread.model, mode: thread.mode, @@ -56,6 +58,10 @@ export function threadFromCore(thread: CoreThreadSummaryJson): NormalizedThread approvalPolicy: normalizeApprovalPolicy(thread.approvalPolicy), sandboxMode: normalizeSandboxMode(thread.sandboxMode), archived: thread.status === 'archived', + pinned: thread.pinned === true, + ...(thread.providerId ? { providerId: thread.providerId } : {}), + ...(thread.agentId ? { agentId: thread.agentId } : {}), + ...(thread.systemPrompt ? { systemPrompt: thread.systemPrompt } : {}), relation: thread.relation, parentThreadId: thread.parentThreadId, forkedFromThreadId: thread.forkedFromThreadId, @@ -1222,6 +1228,14 @@ export async function dispatchKunRuntimeEvent( case 'usage': if (event.usage) sink.onUsage?.(usageFromCore(event.usage)) return + case 'thread_updated': + sink.onThreadUpdated?.({ + threadId: event.threadId ?? '', + ...(event.title !== undefined ? { title: event.title } : {}), + ...(event.titleAuto !== undefined ? { titleAuto: event.titleAuto } : {}), + ...(event.status !== undefined ? { status: event.status } : {}) + }) + return case 'turn_completed': case 'turn_aborted': sink.onTurnComplete() diff --git a/src/renderer/src/agent/kun-runtime.test.ts b/src/renderer/src/agent/kun-runtime.test.ts index b1b8c19fc..101756e69 100644 --- a/src/renderer/src/agent/kun-runtime.test.ts +++ b/src/renderer/src/agent/kun-runtime.test.ts @@ -20,13 +20,14 @@ function settings(): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: defaultKunRuntimeSettings() }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/renderer/src/agent/kun-runtime.ts b/src/renderer/src/agent/kun-runtime.ts index a5363c9b0..0d63ac107 100644 --- a/src/renderer/src/agent/kun-runtime.ts +++ b/src/renderer/src/agent/kun-runtime.ts @@ -155,7 +155,12 @@ export class KunRuntimeProvider implements AgentProvider { async createThread(input: { workspace?: string title?: string + titleAuto?: boolean mode?: KunThreadMode + agentId?: string + providerId?: string + model?: string + systemPrompt?: string }): Promise { const settings = await rendererRuntimeClient.getSettings() const runtime = getKunRuntimeSettings(settings) @@ -165,10 +170,14 @@ export class KunRuntimeProvider implements AgentProvider { JSON.stringify({ workspace: input.workspace || settings.workspaceRoot || '~', title: input.title, - model: runtime.model, + ...(input.titleAuto !== undefined ? { titleAuto: input.titleAuto } : {}), + model: input.model?.trim() || runtime.model, mode: normalizeThreadMode(input.mode), approvalPolicy: runtime.approvalPolicy, - sandboxMode: runtime.sandboxMode + sandboxMode: runtime.sandboxMode, + ...(input.providerId?.trim() ? { providerId: input.providerId.trim() } : {}), + ...(input.agentId?.trim() ? { agentId: input.agentId.trim() } : {}), + ...(input.systemPrompt?.trim() ? { systemPrompt: input.systemPrompt.trim() } : {}) }) ) if (!response.ok) { @@ -366,11 +375,11 @@ export class KunRuntimeProvider implements AgentProvider { } } - async renameThread(threadId: string, title: string): Promise { + async renameThread(threadId: string, title: string, auto?: boolean): Promise { const response = await rendererRuntimeClient.runtimeRequest( kunThreadPath(threadId), 'PATCH', - JSON.stringify({ title }) + JSON.stringify({ title, ...(auto !== undefined ? { titleAuto: auto } : {}) }) ) if (!response.ok) { throw runtimeErrorToError(readRuntimeError(response.body, 'rename thread failed')) @@ -388,6 +397,17 @@ export class KunRuntimeProvider implements AgentProvider { } } + async updateThreadPinned(threadId: string, pinned: boolean): Promise { + const response = await rendererRuntimeClient.runtimeRequest( + kunThreadPath(threadId), + 'PATCH', + JSON.stringify({ pinned }) + ) + if (!response.ok) { + throw runtimeErrorToError(readRuntimeError(response.body, 'update thread pin failed')) + } + } + async archiveThread(threadId: string, archived: boolean): Promise { const response = await window.kunGui.runtimeRequest( kunThreadPath(threadId), diff --git a/src/renderer/src/agent/runtime-client.test.ts b/src/renderer/src/agent/runtime-client.test.ts index c62f647fd..d087c7de9 100644 --- a/src/renderer/src/agent/runtime-client.test.ts +++ b/src/renderer/src/agent/runtime-client.test.ts @@ -17,7 +17,7 @@ function settings(apiKey: string): AppSettingsV1 { version: 1, locale: 'en', theme: 'system', - uiFontScale: 'small', + uiFontScale: 0.82, provider: defaultModelProviderSettings(), agents: { kun: { @@ -27,6 +27,7 @@ function settings(apiKey: string): AppSettingsV1 { }, workspaceRoot: '/tmp/workspace', log: { enabled: false, retentionDays: 7 }, + checkpointCleanup: { enabled: false, intervalDays: 3 }, notifications: { turnComplete: true }, appBehavior: { openAtLogin: false, startMinimized: false, closeToTray: false }, keyboardShortcuts: defaultKeyboardShortcuts(), diff --git a/src/renderer/src/agent/types.ts b/src/renderer/src/agent/types.ts index 425ab5c1b..943b58bfc 100644 --- a/src/renderer/src/agent/types.ts +++ b/src/renderer/src/agent/types.ts @@ -15,11 +15,16 @@ export type RuntimeErrorSeverity = 'info' | 'warning' | 'error' export type AttachmentReference = { id: string + kind?: 'image' | 'document' name?: string mimeType?: string byteSize?: number width?: number height?: number + pageCount?: number + truncated?: boolean + textPreview?: string + documentText?: string previewUrl?: string } @@ -95,6 +100,8 @@ export type UserInputAnswer = { export type NormalizedThread = { id: string title: string + /** Whether the title is auto/provisional (true) vs user-set/locked (false); absent = legacy. */ + titleAuto?: boolean updatedAt: string model: string mode: string @@ -102,8 +109,17 @@ export type NormalizedThread = { status?: string approvalPolicy?: ApprovalPolicy sandboxMode?: SandboxMode + /** Optional provider id when this thread is pinned to a non-default provider. */ + providerId?: string + /** Optional subagent profile id this thread is bound to (primary-agent persona). */ + agentId?: string + /** Optional persona systemPrompt snapshot applied to every ModelRequest on this thread. */ + systemPrompt?: string archived?: boolean + pinned?: boolean preview?: string + /** Whole-conversation summary produced by the summarize route; shown as the list subtitle. */ + summary?: string latestTurnId?: string latestTurnStatus?: string relation?: 'primary' | 'fork' | 'side' @@ -265,7 +281,7 @@ export type ChatBlock = approvalId: string summary: string toolName?: string - status: 'pending' | 'allowed' | 'denied' | 'error' + status: 'pending' | 'submitting' | 'allowed' | 'denied' | 'error' errorMessage?: string meta?: RuntimeDisclosureMetadata } @@ -405,6 +421,8 @@ export type ThreadEventSink = { onRuntimeError?(ev: RuntimeErrorEventPayload): void onGoal(ev: { threadId: string; goal: ThreadGoal | null; cleared?: boolean; createdAt?: string }): void onTodos?(ev: { threadId: string; todos: ThreadTodoList | null; cleared?: boolean; createdAt?: string }): void + /** Thread metadata changed out-of-band (e.g. the backend LLM titler upgraded the title). */ + onThreadUpdated?(ev: { threadId: string; title?: string; titleAuto?: boolean; status?: string }): void onTurnComplete(): void onError(err: Error, options?: ThreadErrorOptions): void /** Optional: cumulative usage update for the thread. */ @@ -423,7 +441,7 @@ export interface AgentProvider { } connect(): Promise listThreads(options?: ThreadListOptions): Promise - createThread(input: { workspace?: string; title?: string; mode?: string }): Promise + createThread(input: { workspace?: string; title?: string; titleAuto?: boolean; mode?: string; agentId?: string; providerId?: string; model?: string; systemPrompt?: string }): Promise getThreadDetail(threadId: string): Promise<{ blocks: ChatBlock[] latestSeq: number @@ -496,8 +514,14 @@ export interface AgentProvider { getMemoryDiagnostics?(): Promise steerUserMessage?(threadId: string, turnId: string, text: string): Promise interruptTurn(threadId: string, turnId: string, options?: { discard?: boolean }): Promise - renameThread(threadId: string, title: string): Promise + /** + * Rename a thread. `auto` marks the title as provisional/auto (true, e.g. the + * client first-message heuristic — the backend LLM titler may upgrade it) or + * user-set/locked (false). Omit to leave the title's auto flag unchanged. + */ + renameThread(threadId: string, title: string, auto?: boolean): Promise updateThreadWorkspace?(threadId: string, workspace: string): Promise + updateThreadPinned?(threadId: string, pinned: boolean): Promise archiveThread?(threadId: string, archived: boolean): Promise deleteThread(threadId: string): Promise compactThread?(threadId: string, reason?: string): Promise<{ replacedTokens: number } | void> diff --git a/src/renderer/src/components/InitialSetupDialog.tsx b/src/renderer/src/components/InitialSetupDialog.tsx index 3df702648..0208089f6 100644 --- a/src/renderer/src/components/InitialSetupDialog.tsx +++ b/src/renderer/src/components/InitialSetupDialog.tsx @@ -2,9 +2,12 @@ import { type ReactElement, useEffect, useRef, useState } from 'react' import { useTranslation } from 'react-i18next' import { DEFAULT_MODEL_PROVIDER_ID, + KUN_TOOL_PERMISSION_MODES, + kunToolPermissionModeSettings, normalizeAppSettings, type AppSettingsPatch, type AppSettingsV1, + type KunToolPermissionMode, type ModelProviderPreset } from '@shared/app-settings' import { @@ -27,9 +30,13 @@ import { Eye, EyeOff, ExternalLink, + FolderPen, + Hand, Image as ImageIcon, + LockKeyholeOpen, MessageCircle, Mic, + ShieldQuestion, Sparkles, Sun, Moon, @@ -51,6 +58,59 @@ const themeOptions: { value: ThemePref; icon: typeof Sun; labelKey: string }[] = ] const DEEPSEEK_USAGE_URL = 'https://platform.deepseek.com/usage' +type PermissionOption = { + value: KunToolPermissionMode + labelKey: string + descriptionKey: string + Icon: typeof Hand + iconClass: string +} + +const PERMISSION_OPTIONS: PermissionOption[] = KUN_TOOL_PERMISSION_MODES.map((value) => { + switch (value) { + case 'always-ask': + return { + value, + labelKey: 'toolPermissionAlwaysAsk', + descriptionKey: 'toolPermissionAlwaysAskDesc', + Icon: Hand, + iconClass: 'border-sky-400/30 bg-sky-500/10 text-sky-700 dark:text-sky-200' + } + case 'read-only': + return { + value, + labelKey: 'toolPermissionReadOnly', + descriptionKey: 'toolPermissionReadOnlyDesc', + Icon: Eye, + iconClass: 'border-emerald-400/30 bg-emerald-500/10 text-emerald-700 dark:text-emerald-200' + } + case 'sensitive-ask': + return { + value, + labelKey: 'toolPermissionSensitiveAsk', + descriptionKey: 'toolPermissionSensitiveAskDesc', + Icon: ShieldQuestion, + iconClass: 'border-amber-400/35 bg-amber-500/10 text-amber-700 dark:text-amber-200' + } + case 'workspace-write': + return { + value, + labelKey: 'toolPermissionWorkspaceWrite', + descriptionKey: 'toolPermissionWorkspaceWriteDesc', + Icon: FolderPen, + iconClass: 'border-indigo-400/30 bg-indigo-500/10 text-indigo-700 dark:text-indigo-200' + } + case 'bypass': + return { + value, + labelKey: 'toolPermissionBypass', + descriptionKey: 'toolPermissionBypassDesc', + Icon: LockKeyholeOpen, + iconClass: 'border-orange-400/35 bg-orange-500/10 text-orange-700 dark:text-orange-200' + } + } +}) + type SetupProviderCard = { presetId: string name: string @@ -141,7 +201,8 @@ export function InitialSetupDialog(): ReactElement { const [drafts, setDrafts] = useState(null) const [selection, setSelection] = useState({ presetId: DEFAULT_MODEL_PROVIDER_ID, - mode: 'api' + mode: 'api', + permissionMode: 'read-only' }) const [showApiKey, setShowApiKey] = useState(false) const [saving, setSaving] = useState(false) @@ -211,7 +272,7 @@ export function InitialSetupDialog(): ReactElement { const selectCard = (presetId: string): void => { setError(null) - setSelection((current) => (current.presetId === presetId ? current : { presetId, mode: 'api' })) + setSelection((current) => (current.presetId === presetId ? current : { ...current, presetId, mode: 'api' })) } const selectMode = (mode: InitialSetupSelection['mode']): void => { @@ -219,6 +280,22 @@ export function InitialSetupDialog(): ReactElement { setSelection((current) => ({ ...current, mode })) } + const selectPermissionMode = (permissionMode: KunToolPermissionMode): void => { + setError(null) + setSelection((current) => ({ ...current, permissionMode })) + const current = formRef.current + if (!current) return + updateForm({ + agents: { + ...current.agents, + kun: { + ...current.agents.kun, + ...kunToolPermissionModeSettings(permissionMode) + } + } + } as SetupFormPatch) + } + const cardFilled = (card: SetupProviderCard): boolean => { if (!drafts) return false if (drafts[card.presetId]?.apiKey.trim()) return true @@ -465,6 +542,39 @@ export function InitialSetupDialog(): ReactElement { )} +
+ +
+ {PERMISSION_OPTIONS.map((option) => { + const isActive = selection.permissionMode === option.value + const Icon = option.Icon + return ( + + ) + })} +
+
+ {t('firstRunPermissionFullAccessRisk')} +
+
+ {regions.length > 0 && (