Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
78a439c
feat(runtime): add background shell sessions with output persistence
musnows Jun 28, 2026
1a04c34
fix(runtime): use options.dataDir for background shell output path
musnows Jun 28, 2026
7fd7bd3
fix(renderer): correct runtime-client import in BackgroundShellOverlay
musnows Jun 28, 2026
2df9580
fix(settings): decouple memory and skill loading from current workspace
musnows Jun 28, 2026
a47c78b
feat(settings): add configurable conversation text width
musnows Jun 28, 2026
88c11a9
feat(chat): show memory summary on injected memory chip hover
musnows Jun 28, 2026
f3d7606
fix(ui): remove left border and dim thinking process text
musnows Jun 28, 2026
81639ab
fix(renderer): polish background shell callback UI and tool summaries
musnows Jun 28, 2026
046a69a
Merge branch 'pr-637' into land-pr-batch
XingYu-Zhong Jun 28, 2026
6fff6cf
Merge branch 'pr-635' into land-pr-batch
XingYu-Zhong Jun 28, 2026
9ef1c09
Merge branch 'pr-636' into land-pr-batch
XingYu-Zhong Jun 28, 2026
a3aaba0
fix(test): satisfy types/mocks for batched PRs #635 #636
XingYu-Zhong Jun 28, 2026
cc9c725
merge(upstream): sync develop into codex/background-shell
musnows Jun 28, 2026
f7d5767
fix(test): address PR review typecheck and timeline chip regressions
musnows Jun 28, 2026
7d9dcc0
Merge pull request #634 from musnows/codex/background-shell
XingYu-Zhong Jun 28, 2026
d98c3fc
fix(read-tracker): allow cross-turn edits when oldText still matches …
XingYu-Zhong Jun 28, 2026
2c04df8
fix(mcp): keep runtime alive when a streamable-http MCP server discon…
XingYu-Zhong Jun 28, 2026
97b35ae
fix(mcp): harden runtime reconnect lifecycle
luoye520ww Jun 28, 2026
24d961a
feat(perf): add agent replay benchmark
luoye520ww Jun 29, 2026
a57069f
perf(renderer): split settings and timeline bundles
luoye520ww Jun 29, 2026
a05aa54
Merge pull request #641 from KunAgent/fix/issues-639-640
XingYu-Zhong Jun 29, 2026
a9d9e22
Merge pull request #642 from luoye520ww/codex/runtime-mcp-reliability
XingYu-Zhong Jun 29, 2026
f712ded
Merge pull request #646 from luoye520ww/codex/frontend-performance-split
XingYu-Zhong Jun 29, 2026
16f43ca
fix(benchmark): harden replay cleanup and expectations
XingYu-Zhong Jun 29, 2026
8c013b7
merge(benchmark): add replay benchmark fixes
XingYu-Zhong Jun 29, 2026
7588cf1
refactor(agent-loop): remove unused MAX_TURN_MODEL_STEPS and related …
XingYu-Zhong Jun 29, 2026
b5ce4b6
feat(release): add release notes for v0.2.19 with Claude Pro/Max subs…
XingYu-Zhong Jun 29, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions kun/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,27 @@ Run from the `kun/` directory.
- `npm run serve` – start the runtime after a build.
- `npm run dev` – rebuild in watch mode.

- `npm run benchmark:replay -- --suite <file>` - run a read-only HTTP/SSE agent replay suite.

### Agent replay benchmark

Start a Kun runtime, set `KUN_RUNTIME_URL` and `KUN_RUNTIME_TOKEN`, then run the five-task smoke set:

```bash
npm run benchmark:replay -- --suite benchmarks/agent-core.json --tag smoke --output replay-smoke.json
```

Run all 20 tasks twice and compare with an earlier report:

```bash
npm run benchmark:replay -- --suite benchmarks/agent-core.json --repeat 2 \
--baseline replay-baseline.json --output replay-current.json --fail-on-regression
```

Replay threads always use the `read-only` sandbox and disable interactive input. Reports include success rate,
TTFT, full latency, tool time, SSE delivery delay, token/cache/cost counters, and Kun process peak RSS. The runtime
token is accepted only through `KUN_RUNTIME_TOKEN`, so it does not leak through process arguments.

## CLI

`kun serve` accepts the following flags:
Expand Down
130 changes: 130 additions & 0 deletions kun/benchmarks/agent-core.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
{
"version": 1,
"name": "kun-agent-core",
"defaults": {
"reasoningEffort": "off",
"timeoutMs": 300000
},
"tasks": [
{
"id": "architecture-summary",
"tags": ["smoke", "architecture"],
"prompt": "Read the repository and explain the active Renderer -> preload -> main -> Kun runtime data path. Cite the most relevant file paths. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "runtime-entrypoint",
"tags": ["smoke", "runtime"],
"prompt": "Find the Kun serve-mode composition root and summarize how stores, model clients, tools, and the agent loop are assembled. Cite exact file paths. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "renderer-send-flow",
"tags": ["smoke", "frontend"],
"prompt": "Trace a chat message from the renderer composer through the preload/main bridge to the Kun turn endpoint. Return a concise ordered call path with files. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "sse-replay",
"tags": ["smoke", "runtime"],
"prompt": "Explain how Kun SSE event replay avoids duplicates and cursor rewind after reconnect or restart. Cite the implementation and tests. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "mcp-lifecycle",
"tags": ["smoke", "mcp"],
"prompt": "Inspect MCP startup, tool discovery, execution, and reconnect behavior. Identify the main reliability boundaries and cite the implementation files. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "cache-prefix",
"tags": ["cache"],
"prompt": "Explain what makes Kun's immutable prompt prefix stable and list dynamic data that must remain outside it. Cite code and documentation. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "provider-url-contract",
"tags": ["provider"],
"prompt": "Trace how baseUrl and endpointFormat affect provider URL construction and request bodies across chat and auxiliary model calls. Cite all important consumers. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "attachment-flow",
"tags": ["attachments"],
"prompt": "Trace an image or local file attachment from renderer selection to model input or fallback. Identify the cross-layer contract fields and failure points. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "approval-flow",
"tags": ["runtime", "security"],
"prompt": "Trace a tool approval request from agent loop creation through SSE/UI resolution back to tool execution. Cite routes, gates, and renderer handlers. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "goal-resume",
"tags": ["runtime", "goal"],
"prompt": "Explain how active goals survive runtime restart, how orphaned turns are reconciled, and where auto-resume is triggered. Cite tests if present. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "subagent-permissions",
"tags": ["subagent", "security"],
"prompt": "Explain how subagent tool policies inherit or restrict built-in tools, MCP servers, and skills without escalating the parent permissions. Cite enforcement points. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "settings-persistence",
"tags": ["settings"],
"prompt": "Trace a Kun settings change from renderer state through validation/persistence to managed runtime restart. Highlight rollback behavior. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "test-selection",
"tags": ["quality"],
"prompt": "Identify how the verify_changes tool selects and runs validation after edits. Explain its safety limits and output contract. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "build-pipeline",
"tags": ["build"],
"prompt": "Summarize the development, typecheck, test, build, and packaging pipeline for Kun. Cite package scripts and packaging configuration. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "security-boundaries",
"tags": ["security"],
"prompt": "Map the main trust boundaries for renderer IPC, filesystem tools, command execution, MCP, and secrets. Cite concrete enforcement files. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "runtime-hotspots",
"tags": ["performance"],
"prompt": "Inspect runtime event persistence, SSE replay, tool execution, and context assembly. Identify three evidence-based performance or memory hotspots with file references. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "thread-persistence",
"tags": ["storage"],
"prompt": "Explain how thread/session data is persisted and indexed across file and hybrid SQLite stores, including usage carryover. Cite implementation files. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "model-capabilities",
"tags": ["provider"],
"prompt": "Explain how model capabilities control image input, tool calling, reasoning effort, endpoint format, and context limits. Cite schemas and request construction. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "frontend-chunking",
"tags": ["frontend", "performance"],
"prompt": "Inspect renderer lazy loading and identify which Workbench surfaces are split into separate chunks and which heavy chat dependencies still load eagerly. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
},
{
"id": "failure-recovery",
"tags": ["runtime", "reliability"],
"prompt": "Map how the desktop app detects an unhealthy Kun child, budgets restarts, distinguishes settings restarts from crashes, and reports status to the renderer. Do not modify files.",
"expect": { "requiredAnyTools": ["read", "grep", "find", "ls"] }
}
]
}
1 change: 1 addition & 0 deletions kun/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@
"test": "vitest run",
"test:watch": "vitest",
"transcript:diff": "node ./scripts/transcript-diff.mjs",
"benchmark:replay": "npm run build && node ./dist/cli/replay-entry.js",
"serve": "node ./dist/cli/serve-entry.js",
"dev": "tsc -p tsconfig.build.json --watch"
},
Expand Down
5 changes: 5 additions & 0 deletions kun/src/adapters/hybrid/hybrid-thread-store.ts
Original file line number Diff line number Diff line change
Expand Up @@ -932,6 +932,10 @@ function mergeTurnMetadata(previous: Turn, next: Turn): Turn {
attachmentIds: mergeStringArrays(previous.attachmentIds, next.attachmentIds),
activeSkillIds: mergeStringArrays(previous.activeSkillIds, next.activeSkillIds),
injectedMemoryIds: mergeStringArrays(previous.injectedMemoryIds, next.injectedMemoryIds),
injectedMemorySummaries:
next.injectedMemorySummaries.length > 0
? next.injectedMemorySummaries
: previous.injectedMemorySummaries,
items: mergeTurnItems(previous.items, next.items)
}
}
Expand Down Expand Up @@ -971,6 +975,7 @@ function turnFromItems(threadId: string, turnId: string, items: TurnItem[], fall
attachmentIds: attachmentIdsFromItems(items),
activeSkillIds: [],
injectedMemoryIds: [],
injectedMemorySummaries: [],
createdAt,
finishedAt: hasOpenItem ? undefined : items[items.length - 1]?.finishedAt ?? fallbackTime,
items
Expand Down
142 changes: 142 additions & 0 deletions kun/src/adapters/tool/background-shell-tool.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,142 @@
import { LocalToolHost, type LocalTool } from './local-tool-host.js'
import { withToolBoundary } from './builtin-tool-utils.js'
import type { BackgroundShellRecordInput } from './builtin-tool-types.js'
import {
isBashSessionId,
listBashSessionRecords,
pollBashSession,
readBashSessionPayload,
stopBashSessionById,
writeBashSessionStdin
} from './builtin-bash-tool.js'

export type BackgroundShellToolOptions = {
listBackgroundSessions?: (threadId?: string) => readonly BackgroundShellRecordInput[]
}


function normalizeYieldSeconds(value: unknown): number {
const raw = typeof value === 'number' && Number.isFinite(value) ? Math.floor(value) : 10
return Math.max(1, Math.min(60, raw))
}

export function createBackgroundShellTool(options: BackgroundShellToolOptions = {}): LocalTool {
return LocalToolHost.defineTool({
name: 'background_shell',
description:
'Manage shell sessions started with bash background=true. The bash tool assigns an 8-character session_id when starting a background command; use that id here. action="list" lists running sessions by default (set include_finished=true to also show completed/stopped/failed sessions; optional thread_only). action="read" returns a non-blocking output snapshot. action="poll" waits up to yield_seconds for more output or exit. action="write" sends stdin via input. action="stop" terminates a running session.',
inputSchema: {
type: 'object',
properties: {
action: {
type: 'string',
enum: ['list', 'read', 'poll', 'write', 'stop']
},
session_id: {
type: 'string',
description: 'Required for read, poll, write, and stop. The 8-character id returned by bash when background=true.'
},
yield_seconds: { type: 'number' },
include_finished: { type: 'boolean', default: false },
thread_only: { type: 'boolean', default: true },
input: { type: 'string' }
},
required: ['action'],
additionalProperties: false
},
policy: 'auto',
toolKind: 'tool_call',
execute: async (args, context) =>
withToolBoundary(async () => {
const action = typeof args.action === 'string' ? args.action.trim() : ''
if (action === 'list') {
const threadOnly = args.thread_only !== false
const threadId = threadOnly ? context.threadId : undefined
let sessions = options.listBackgroundSessions
? [...options.listBackgroundSessions(threadId)]
: await listBashSessionRecords(threadId)
if (args.include_finished !== true) {
sessions = sessions.filter((session) => session.status === 'running')
}
return {
output: {
sessions: sessions.map((session) => ({
session_id: session.id,
command: session.command,
cwd: session.cwd,
shell: session.shell,
status: session.status,
started_at: session.startedAt,
...(session.finishedAt ? { finished_at: session.finishedAt } : {}),
exit_code: session.exitCode,
output: session.output,
...(session.outputTruncated ? { output_truncated: true } : {}),
...(session.outputFilePath ? { output_file: session.outputFilePath } : {}),
detached: session.detached
})),
running: sessions.filter((session) => session.status === 'running').length
}
}
}

const sessionId = typeof args.session_id === 'string' ? args.session_id.trim() : ''
if (!sessionId) {
return { output: { error: 'session_id is required' }, isError: true }
}
if (!isBashSessionId(sessionId)) {
return {
output: {
error: 'session_id must be the 8-character id returned by bash when background=true',
session_id: sessionId
},
isError: true
}
}

if (action === 'read') {
const payload = await readBashSessionPayload(sessionId)
if (!payload) {
return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true }
}
return { output: payload, isError: payload.status === 'failed' }
}

if (action === 'stop') {
const stopped = await stopBashSessionById(sessionId)
const payload = await readBashSessionPayload(sessionId)
if (!payload) {
return {
output: { error: 'background shell session not found', session_id: sessionId, stopped },
isError: true
}
}
return {
output: { ...payload, stop_sent: stopped },
isError: payload.status === 'running' || payload.status === 'failed'
}
}

if (action === 'write') {
const payload = await writeBashSessionStdin(
sessionId,
typeof args.input === 'string' ? args.input : '',
normalizeYieldSeconds(args.yield_seconds)
)
if (!payload) {
return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true }
}
return { output: payload, isError: payload.status === 'failed' }
}

if (action === 'poll') {
const payload = await pollBashSession(sessionId, normalizeYieldSeconds(args.yield_seconds))
if (!payload) {
return { output: { error: 'background shell session not found', session_id: sessionId }, isError: true }
}
return { output: payload, isError: payload.status === 'failed' }
}

return { output: { error: `unsupported background_shell action: ${action}` }, isError: true }
})
})
}
Loading
Loading