diff --git a/src/commands/chat.ts b/src/commands/chat.ts index 9db316a..c7476bc 100644 --- a/src/commands/chat.ts +++ b/src/commands/chat.ts @@ -4,7 +4,7 @@ import { runShell } from "../util/shell.js"; import { readFile, writeFile, readdir } from "fs/promises"; import { dirname, join, isAbsolute, relative } from "path"; import { writeTodos, readTodos, parseSetTodoListFromOutput, type TodoItem } from "../util/todo-sync.js"; -import { writeSessionMeta, readSessionMeta, createOmkSessionEnv, createOmkSessionId } from "../util/session.js"; +import { writeSessionMeta, readSessionMeta, createOmkSessionEnv, createOmkSessionId, type SessionMeta } from "../util/session.js"; import type { OmkMode } from "../util/mode-preset.js"; export async function updateChatHeartbeat(root: string, runId: string): Promise { @@ -296,6 +296,18 @@ async function buildChatSmokeReport(options: { type ChatLayout = "auto" | "tmux" | "inline" | "plain"; type ChatBrand = "kimicat" | "minimal" | "plain"; +export function applyKimiSessionResumeArgs( + args: string[], + env: Record, + meta: Pick | null | undefined +): boolean { + const kimiSessionId = meta?.kimiSessionId?.trim(); + if (!kimiSessionId) return false; + args.push("--session", kimiSessionId); + env.KIMI_SESSION_ID = kimiSessionId; + return true; +} + function resolveLayout(requested: ChatLayout | undefined): ChatLayout { if (requested && requested !== "auto") return requested; // Already inside a tmux cockpit pane — never launch tmux again @@ -402,6 +414,7 @@ export async function chatCommand(options: { const sessionId = createOmkSessionId("chat"); const runId = options.runId; const effectiveRunId = runId ?? sessionId; + const resumeSessionMeta = runId ? await readSessionMeta(effectiveRunId).catch(() => null) : null; const layout = resolveLayout(options.layout); const brand = options.brand ?? "kimicat"; const resources = await getOmkResourceSettings(); @@ -723,6 +736,7 @@ export async function chatCommand(options: { const env = createOmkSessionEnv(root, sessionId); env.OMK_WORKERS = effectiveWorkers; + applyKimiSessionResumeArgs(args, env, resumeSessionMeta); if (options.maxStepsPerTurn) { args.push("--max-steps-per-turn", options.maxStepsPerTurn); } diff --git a/src/kimi/isolated-home.ts b/src/kimi/isolated-home.ts index 2f2a1b2..89e4c85 100644 --- a/src/kimi/isolated-home.ts +++ b/src/kimi/isolated-home.ts @@ -1,7 +1,7 @@ import { lstat, mkdtemp, mkdir, symlink, rm, writeFile, readFile } from "fs/promises"; import { dirname, isAbsolute, join } from "path"; import { tmpdir } from "os"; -import { pathExists, getProjectRoot, getUserHome, extractHooksBlocks } from "../util/fs.js"; +import { pathExists, getProjectRoot, getUserHome, extractHooksBlocks, getRunPath, sanitizeRunId } from "../util/fs.js"; type RuntimeScope = "all" | "project" | "none"; @@ -11,9 +11,23 @@ export interface IsolatedKimiHomeOptions { inheritLocalAuth?: boolean; skillsScope?: RuntimeScope; hooksScope?: RuntimeScope; + persistentHome?: boolean; + homeLabel?: string; env?: NodeJS.ProcessEnv; } +export interface IsolatedKimiHomeCleanupOptions { + preserve?: boolean; + reason?: string; +} + +export interface IsolatedKimiHomeCleanupResult { + path: string; + removed: boolean; + retained: boolean; + markerPath?: string; +} + const KIMI_BASE_INHERITED_DIRS = ["credentials", "agents", "logs"] as const; /** @@ -67,7 +81,12 @@ export async function prepareIsolatedKimiHome(options: IsolatedKimiHomeOptions = const env = options.env ?? process.env; const originalHome = options.originalHome ?? resolveOriginalHome(env); const projectRoot = options.projectRoot ?? getProjectRoot(); - const tmpHome = await mkdtemp(join(tmpdir(), "omk-home-")); + const tmpHome = await resolveIsolatedKimiHomePath({ + projectRoot, + env, + persistentHome: options.persistentHome, + homeLabel: options.homeLabel, + }); const originalKimi = join(originalHome, ".kimi"); const tmpKimi = join(tmpHome, ".kimi"); const skillsScope = normalizeRuntimeScope(options.skillsScope ?? env.OMK_SKILLS_SCOPE, "project"); @@ -86,6 +105,7 @@ export async function prepareIsolatedKimiHome(options: IsolatedKimiHomeOptions = const src = join(originalKimi, name); const dst = join(tmpKimi, name); if (await pathExists(src)) { + if (await pathExists(dst)) continue; if (name === "credentials") { try { await symlink(src, dst, "dir"); @@ -157,8 +177,13 @@ export async function prepareIsolatedKimiHome(options: IsolatedKimiHomeOptions = return tmpHome; } -export async function cleanupIsolatedKimiHome(tmpHome: string): Promise { +export async function cleanupIsolatedKimiHome(tmpHome: string, options: IsolatedKimiHomeCleanupOptions = {}): Promise { + if (options.preserve) { + const markerPath = await writeRetainedHomeMarker(tmpHome, options.reason); + return { path: tmpHome, removed: false, retained: true, markerPath }; + } await rm(tmpHome, { recursive: true, force: true }).catch(() => {}); + return { path: tmpHome, removed: true, retained: false }; } export function resolveOriginalHome(env: NodeJS.ProcessEnv = process.env): string { @@ -192,6 +217,45 @@ function normalizeRuntimeScope(value: string | undefined, fallback: RuntimeScope return fallback; } +async function resolveIsolatedKimiHomePath(options: { + projectRoot: string; + env: NodeJS.ProcessEnv; + persistentHome?: boolean; + homeLabel?: string; +}): Promise { + const runId = options.env.OMK_RUN_ID ?? options.env.OMK_SESSION_ID ?? options.env.KIMI_SESSION_ID; + const persistentEnabled = shouldUsePersistentHome(options.persistentHome, options.env); + if (!persistentEnabled || !runId) { + return await mkdtemp(join(tmpdir(), "omk-home-")); + } + + const safeRunId = sanitizeRunId(runId, "run"); + const rawLabel = options.homeLabel ?? options.env.OMK_NODE_ID ?? options.env.OMK_ROLE ?? "chat"; + const safeLabel = sanitizeRunId(rawLabel, "kimi-home"); + const persistentHome = join(getRunPath(safeRunId, undefined, options.projectRoot), "kimi-home", safeLabel); + await mkdir(persistentHome, { recursive: true }); + return persistentHome; +} + +function shouldUsePersistentHome(optionValue: boolean | undefined, env: NodeJS.ProcessEnv): boolean { + if (typeof optionValue === "boolean") return optionValue; + const value = env.OMK_PERSIST_KIMI_HOME; + if (!value) return true; + return !["0", "false", "no", "off", "tmp", "temporary"].includes(value.trim().toLowerCase()); +} + +async function writeRetainedHomeMarker(tmpHome: string, reason: string | undefined): Promise { + const markerPath = join(tmpHome, ".omk-retained-kimi-home.json"); + await mkdir(tmpHome, { recursive: true }); + await writeFile(markerPath, JSON.stringify({ + retainedAt: new Date().toISOString(), + reason: reason ?? "preserved for recovery", + home: tmpHome, + kimiSessions: join(tmpHome, ".kimi", "sessions"), + }, null, 2) + "\n", { mode: 0o600 }); + return markerPath; +} + function stripHooksBlocks(content: string): string { const result: string[] = []; let skippingHookBlock = false; diff --git a/src/kimi/runner.ts b/src/kimi/runner.ts index f555f8f..d90aa9e 100644 --- a/src/kimi/runner.ts +++ b/src/kimi/runner.ts @@ -230,6 +230,43 @@ export function formatKimiProviderFailureHint(output: string): string | null { return lines.join("\n") + "\n"; } +function retainKimiHomeSetting(env: Record): "off" | "provider" | "all" { + const value = env.OMK_RETAIN_KIMI_HOME_ON_FAILURE?.trim().toLowerCase(); + if (value && ["0", "false", "no", "off", "never"].includes(value)) return "off"; + if (value && ["1", "true", "yes", "on", "all", "always"].includes(value)) return "all"; + return "provider"; +} + +function retainedKimiHomeDecision(options: { + exitCode: number; + output: string; + env: Record; + stoppedByController?: boolean; + startupFailure?: boolean; +}): { retain: boolean; reason?: string } { + if (options.exitCode === 0 || options.stoppedByController || options.startupFailure) { + return { retain: false }; + } + const setting = retainKimiHomeSetting(options.env); + if (setting === "off") return { retain: false }; + const diagnosis = classifyKimiProviderFailure(options.output); + if (diagnosis) { + return { retain: true, reason: `${diagnosis.kind} provider failure` }; + } + if (setting === "all") { + return { retain: true, reason: `kimi exited with code ${options.exitCode}` }; + } + return { retain: false }; +} + +function formatRetainedKimiHomeHint(home: string, reason: string | undefined): string { + return [ + `[omk] Preserved Kimi HOME for recovery: ${home}`, + ` Kimi sessions/subagents: ${join(home, ".kimi", "sessions")}`, + ` Reason: ${reason ?? "provider failure"}`, + ].join("\n") + "\n"; +} + export interface KimiStartupExitDiagnosis { elapsedMs: number; thresholdMs: number; @@ -628,7 +665,7 @@ export async function runKimiInteractive( const clearWatchdogs = (): void => { if (startupTimer) clearTimeout(startupTimer); }; - const cleanupRuntime = async (): Promise => { + const cleanupRuntime = async (cleanupOptions: { preserveHome?: boolean; reason?: string } = {}): Promise => { if (cleaned) return; cleaned = true; clearWatchdogs(); @@ -644,7 +681,13 @@ export async function runKimiInteractive( restoreTerminalInputState(process.stdin, terminalInputState); } } - await cleanupIsolatedKimiHome(tmpHome); + const cleanup = await cleanupIsolatedKimiHome(tmpHome, { + preserve: cleanupOptions.preserveHome, + reason: cleanupOptions.reason, + }); + if (cleanup.retained) { + process.stderr.write(style.orange(formatRetainedKimiHomeHint(tmpHome, cleanupOptions.reason))); + } }; const resolveOnce = (code: number): void => { if (settled) return; @@ -706,13 +749,19 @@ export async function runKimiInteractive( if (bugRest) writeStdout(statusLine.process(bugRest)); const replacerRest = replacer.forceFlush(); if (replacerRest) writeStdout(statusLine.process(replacerRest)); - await cleanupRuntime().catch((err: unknown) => { - const message = err instanceof Error ? err.message : String(err); - process.stderr.write(`[omk] PTY cleanup warning: ${message}\n`); - }); const runId = options?.env?.OMK_RUN_ID; const resumeHint = runId ? ` • resume: omk chat --run-id ${runId}` : ""; const startupExit = classifyKimiStartupExit(exitCode, elapsedMs, env); + const retention = retainedKimiHomeDecision({ + exitCode, + output: recentProviderOutput, + env, + startupFailure: Boolean(startupExit), + }); + await cleanupRuntime({ preserveHome: retention.retain, reason: retention.reason }).catch((err: unknown) => { + const message = err instanceof Error ? err.message : String(err); + process.stderr.write(`[omk] PTY cleanup warning: ${message}\n`); + }); if (startupExit) { process.stderr.write(style.red(`[omk] ${startupExit.message}${resumeHint}\n`)); process.stderr.write( @@ -930,6 +979,7 @@ export function createKimiTaskRunner(options: KimiTaskRunnerOptions = {}): TaskR const kimiBin = resolveKimiBin(mergedEnv); const kimiAvailable = await checkCommand(kimiBin); if (!kimiAvailable) { + await cleanupIsolatedKimiHome(tmpHome); return { success: false, exitCode: 1, @@ -938,18 +988,33 @@ export function createKimiTaskRunner(options: KimiTaskRunnerOptions = {}): TaskR }; } let result: Awaited>; - try { - result = await runShellStreaming(kimiBin, args, { - cwd: worktree, - timeout: effectiveTimeout, - env: mergedEnv, - logPath, - input: "", - onStdout: thinkingHandler, - signal, - }); - } finally { - await cleanupIsolatedKimiHome(tmpHome); + result = await runShellStreaming(kimiBin, args, { + cwd: worktree, + timeout: effectiveTimeout, + env: mergedEnv, + logPath, + input: "", + onStdout: thinkingHandler, + signal, + }); + const stoppedByController = Boolean(signal?.aborted) || /(?:^|\n)(?:aborted|timed out after \d+ms)(?:\n|$)/i.test(result.stderr); + const retention = retainedKimiHomeDecision({ + exitCode: result.exitCode, + output: `${result.stderr}\n${result.stdout}`, + env: mergedEnv, + stoppedByController, + }); + const cleanup = await cleanupIsolatedKimiHome(tmpHome, { + preserve: retention.retain, + reason: retention.reason, + }); + if (cleanup.retained) { + result = { + ...result, + stderr: result.stderr + ? `${result.stderr}\n${formatRetainedKimiHomeHint(tmpHome, retention.reason)}` + : formatRetainedKimiHomeHint(tmpHome, retention.reason), + }; } // Debug: log runner result so we can diagnose unexpected failures diff --git a/test/chat-startup.test.mjs b/test/chat-startup.test.mjs index 67dabb9..3cf77f9 100644 --- a/test/chat-startup.test.mjs +++ b/test/chat-startup.test.mjs @@ -7,6 +7,7 @@ import { join, delimiter } from "node:path"; import { tmpdir } from "node:os"; const { ensureChatStartupArtifacts, formatChatStartupDate } = await import("../dist/util/chat-startup.js"); +const { applyKimiSessionResumeArgs } = await import("../dist/commands/chat.js"); const CLI = join(process.cwd(), "dist", "cli.js"); async function createFakeKimi(binRoot, scriptBody) { @@ -91,6 +92,16 @@ test("chat startup is idempotent and does not overwrite daily docs", async () => } }); +test("chat resume args restore recorded Kimi session id", () => { + const args = ["--agent-file", "root.yaml"]; + const env = { OMK_RUN_ID: "resume-run" }; + const applied = applyKimiSessionResumeArgs(args, env, { kimiSessionId: "kimi-session-existing-123" }); + + assert.equal(applied, true); + assert.deepEqual(args.slice(-2), ["--session", "kimi-session-existing-123"]); + assert.equal(env.KIMI_SESSION_ID, "kimi-session-existing-123"); +}); + test("chat command fails loudly when Kimi exits immediately with code 0", { skip: process.platform === "linux" ? false : "native node-pty fake-shell startup classification is covered on Linux", }, async () => { @@ -151,6 +162,69 @@ test("chat command fails loudly when Kimi exits immediately with code 0", { } }); +test("chat --run-id resumes recorded Kimi session id", { + skip: process.platform === "linux" ? false : "native node-pty fake-shell startup classification is covered on Linux", +}, async () => { + const projectRoot = await mkdtemp(join(tmpdir(), "omk-chat-resume-session-project-")); + const homeRoot = await mkdtemp(join(tmpdir(), "omk-chat-resume-session-home-")); + const binRoot = await mkdtemp(join(tmpdir(), "omk-chat-resume-session-bin-")); + const runId = "resume-kimi-session"; + const kimiSessionId = "kimi-session-existing-123"; + const argvPath = join(projectRoot, "kimi-argv.json"); + + try { + await mkdir(join(projectRoot, ".omk", "runs", runId), { recursive: true }); + await writeFile(join(projectRoot, ".omk", "runs", runId, "session.json"), JSON.stringify({ + runId, + type: "chat", + status: "failed", + startedAt: "2026-05-23T12:00:00.000Z", + updatedAt: "2026-05-23T12:00:00.000Z", + kimiSessionId, + todoCount: 0, + todoDoneCount: 0, + }, null, 2), "utf-8"); + + await mkdir(binRoot, { recursive: true }); + const kimiBin = await createFakeKimi(binRoot, [ + `require("fs").writeFileSync(${JSON.stringify(argvPath)}, JSON.stringify(process.argv.slice(2)));`, + `process.exit(0);`, + ``, + ].join("\n")); + + const result = spawnSync(process.execPath, [CLI, "chat", "--layout", "plain", "--brand", "plain", "--run-id", runId], { + cwd: projectRoot, + encoding: "utf-8", + timeout: 20000, + env: { + ...process.env, + HOME: homeRoot, + OMK_ORIGINAL_HOME: homeRoot, + OMK_PROJECT_ROOT: projectRoot, + OMK_MCP_SCOPE: "", + OMK_SKILLS_SCOPE: "", + OMK_HOOKS_SCOPE: "", + OMK_MCP_SUPPRESS_PRUNE_WARNINGS: "1", + OMK_RENDER_LOGO: "0", + OMK_STAR_PROMPT: "0", + OMK_CHAT_NO_BANNER: "1", + OMK_CHAT_FAST_EXIT_MS: "5000", + KIMI_BIN: kimiBin, + }, + }); + + assert.equal(result.status, 1, result.stderr || result.stdout); + const argv = JSON.parse(await readFile(argvPath, "utf-8")); + const sessionArgIndex = argv.indexOf("--session"); + assert.notEqual(sessionArgIndex, -1); + assert.equal(argv[sessionArgIndex + 1], kimiSessionId); + } finally { + await rm(projectRoot, { recursive: true, force: true }); + await rm(homeRoot, { recursive: true, force: true }); + await rm(binRoot, { recursive: true, force: true }); + } +}); + test("chat command startup watchdog fails a silent Kimi launch", { skip: process.platform === "linux" ? false : "native node-pty fake-shell startup classification is covered on Linux", }, async () => { diff --git a/test/isolated-home.test.mjs b/test/isolated-home.test.mjs index 49e86b3..35c0d32 100644 --- a/test/isolated-home.test.mjs +++ b/test/isolated-home.test.mjs @@ -62,6 +62,76 @@ test("isolated Kimi HOME bridges shell profiles only with trusted opt-in", async } }); +test("isolated Kimi HOME uses stable run-scoped path and can be retained for recovery", async () => { + const projectRoot = await mkdtemp(join(tmpdir(), "omk-isolated-persistent-project-")); + const originalHome = await mkdtemp(join(tmpdir(), "omk-isolated-persistent-home-")); + let kimiHome; + + try { + await mkdir(join(originalHome, ".kimi", "credentials"), { recursive: true }); + + kimiHome = await prepareIsolatedKimiHome({ + originalHome, + projectRoot, + inheritLocalAuth: false, + env: { + OMK_RUN_ID: "chat-2026-05-23T16-41-41-731Z-71375", + OMK_NODE_ID: "planner/subagent", + }, + }); + + assert.equal( + kimiHome, + join(projectRoot, ".omk", "runs", "chat-2026-05-23T16-41-41-731Z-71375", "kimi-home", "planner-subagent") + ); + + await mkdir(join(kimiHome, ".kimi", "sessions", "session-1", "run-1", "subagents", "agent-1"), { recursive: true }); + await writeFile(join(kimiHome, ".kimi", "sessions", "session-1", "run-1", "subagents", "agent-1", "meta.json"), "{}\n"); + + const cleanup = await cleanupIsolatedKimiHome(kimiHome, { preserve: true, reason: "rate-limit provider failure" }); + assert.equal(cleanup.retained, true); + assert.equal(cleanup.removed, false); + assert.equal((await lstat(join(kimiHome, ".kimi", "sessions", "session-1", "run-1", "subagents", "agent-1", "meta.json"))).isFile(), true); + + const marker = await readFile(join(kimiHome, ".omk-retained-kimi-home.json"), "utf8"); + assert.match(marker, /rate-limit provider failure/); + assert.match(marker, /\.kimi/); + } finally { + if (kimiHome) await cleanupIsolatedKimiHome(kimiHome); + await rm(projectRoot, { recursive: true, force: true }); + await rm(originalHome, { recursive: true, force: true }); + } +}); + +test("persistent isolated Kimi HOME can be prepared repeatedly for the same run", async () => { + const projectRoot = await mkdtemp(join(tmpdir(), "omk-isolated-persistent-reuse-project-")); + const originalHome = await mkdtemp(join(tmpdir(), "omk-isolated-persistent-reuse-home-")); + let firstHome; + let secondHome; + + try { + await mkdir(join(originalHome, ".kimi", "credentials"), { recursive: true }); + const options = { + originalHome, + projectRoot, + inheritLocalAuth: false, + env: { OMK_RUN_ID: "reuse-run", OMK_NODE_ID: "coder" }, + }; + + firstHome = await prepareIsolatedKimiHome(options); + secondHome = await prepareIsolatedKimiHome(options); + + assert.equal(secondHome, firstHome); + if (!IS_WINDOWS) { + assert.equal((await lstat(join(secondHome, ".kimi", "credentials"))).isSymbolicLink(), true); + } + } finally { + if (firstHome) await cleanupIsolatedKimiHome(firstHome); + await rm(projectRoot, { recursive: true, force: true }); + await rm(originalHome, { recursive: true, force: true }); + } +}); + test("isolated Kimi HOME respects project skills/hooks scope", async () => { const projectRoot = await mkdtemp(join(tmpdir(), "omk-isolated-project-scope-")); const originalHome = await mkdtemp(join(tmpdir(), "omk-isolated-original-scope-"));