Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 15 additions & 1 deletion src/commands/chat.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ import { runShell } from "../util/shell.js";
import { readFile, writeFile, readdir } from "fs/promises";
import { dirname, join, isAbsolute, relative } from "path";
import { writeTodos, readTodos, parseSetTodoListFromOutput, type TodoItem } from "../util/todo-sync.js";
import { writeSessionMeta, readSessionMeta, createOmkSessionEnv, createOmkSessionId } from "../util/session.js";
import { writeSessionMeta, readSessionMeta, createOmkSessionEnv, createOmkSessionId, type SessionMeta } from "../util/session.js";
import type { OmkMode } from "../util/mode-preset.js";

export async function updateChatHeartbeat(root: string, runId: string): Promise<void> {
Expand Down Expand Up @@ -296,6 +296,18 @@ async function buildChatSmokeReport(options: {
type ChatLayout = "auto" | "tmux" | "inline" | "plain";
type ChatBrand = "kimicat" | "minimal" | "plain";

export function applyKimiSessionResumeArgs(
args: string[],
env: Record<string, string>,
meta: Pick<SessionMeta, "kimiSessionId"> | null | undefined
): boolean {
const kimiSessionId = meta?.kimiSessionId?.trim();
if (!kimiSessionId) return false;
args.push("--session", kimiSessionId);
env.KIMI_SESSION_ID = kimiSessionId;
return true;
}

function resolveLayout(requested: ChatLayout | undefined): ChatLayout {
if (requested && requested !== "auto") return requested;
// Already inside a tmux cockpit pane — never launch tmux again
Expand Down Expand Up @@ -402,6 +414,7 @@ export async function chatCommand(options: {
const sessionId = createOmkSessionId("chat");
const runId = options.runId;
const effectiveRunId = runId ?? sessionId;
const resumeSessionMeta = runId ? await readSessionMeta(effectiveRunId).catch(() => null) : null;
const layout = resolveLayout(options.layout);
const brand = options.brand ?? "kimicat";
const resources = await getOmkResourceSettings();
Expand Down Expand Up @@ -723,6 +736,7 @@ export async function chatCommand(options: {

const env = createOmkSessionEnv(root, sessionId);
env.OMK_WORKERS = effectiveWorkers;
applyKimiSessionResumeArgs(args, env, resumeSessionMeta);
if (options.maxStepsPerTurn) {
args.push("--max-steps-per-turn", options.maxStepsPerTurn);
}
Expand Down
70 changes: 67 additions & 3 deletions src/kimi/isolated-home.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import { lstat, mkdtemp, mkdir, symlink, rm, writeFile, readFile } from "fs/promises";
import { dirname, isAbsolute, join } from "path";
import { tmpdir } from "os";
import { pathExists, getProjectRoot, getUserHome, extractHooksBlocks } from "../util/fs.js";
import { pathExists, getProjectRoot, getUserHome, extractHooksBlocks, getRunPath, sanitizeRunId } from "../util/fs.js";

type RuntimeScope = "all" | "project" | "none";

Expand All @@ -11,9 +11,23 @@ export interface IsolatedKimiHomeOptions {
inheritLocalAuth?: boolean;
skillsScope?: RuntimeScope;
hooksScope?: RuntimeScope;
persistentHome?: boolean;
homeLabel?: string;
env?: NodeJS.ProcessEnv;
}

export interface IsolatedKimiHomeCleanupOptions {
preserve?: boolean;
reason?: string;
}

export interface IsolatedKimiHomeCleanupResult {
path: string;
removed: boolean;
retained: boolean;
markerPath?: string;
}

const KIMI_BASE_INHERITED_DIRS = ["credentials", "agents", "logs"] as const;

/**
Expand Down Expand Up @@ -67,7 +81,12 @@ export async function prepareIsolatedKimiHome(options: IsolatedKimiHomeOptions =
const env = options.env ?? process.env;
const originalHome = options.originalHome ?? resolveOriginalHome(env);
const projectRoot = options.projectRoot ?? getProjectRoot();
const tmpHome = await mkdtemp(join(tmpdir(), "omk-home-"));
const tmpHome = await resolveIsolatedKimiHomePath({
projectRoot,
env,
persistentHome: options.persistentHome,
homeLabel: options.homeLabel,
});
const originalKimi = join(originalHome, ".kimi");
const tmpKimi = join(tmpHome, ".kimi");
const skillsScope = normalizeRuntimeScope(options.skillsScope ?? env.OMK_SKILLS_SCOPE, "project");
Expand All @@ -86,6 +105,7 @@ export async function prepareIsolatedKimiHome(options: IsolatedKimiHomeOptions =
const src = join(originalKimi, name);
const dst = join(tmpKimi, name);
if (await pathExists(src)) {
if (await pathExists(dst)) continue;
if (name === "credentials") {
try {
await symlink(src, dst, "dir");
Expand Down Expand Up @@ -157,8 +177,13 @@ export async function prepareIsolatedKimiHome(options: IsolatedKimiHomeOptions =
return tmpHome;
}

export async function cleanupIsolatedKimiHome(tmpHome: string): Promise<void> {
export async function cleanupIsolatedKimiHome(tmpHome: string, options: IsolatedKimiHomeCleanupOptions = {}): Promise<IsolatedKimiHomeCleanupResult> {
if (options.preserve) {
const markerPath = await writeRetainedHomeMarker(tmpHome, options.reason);
return { path: tmpHome, removed: false, retained: true, markerPath };
}
await rm(tmpHome, { recursive: true, force: true }).catch(() => {});
return { path: tmpHome, removed: true, retained: false };
}

export function resolveOriginalHome(env: NodeJS.ProcessEnv = process.env): string {
Expand Down Expand Up @@ -192,6 +217,45 @@ function normalizeRuntimeScope(value: string | undefined, fallback: RuntimeScope
return fallback;
}

async function resolveIsolatedKimiHomePath(options: {
projectRoot: string;
env: NodeJS.ProcessEnv;
persistentHome?: boolean;
homeLabel?: string;
}): Promise<string> {
const runId = options.env.OMK_RUN_ID ?? options.env.OMK_SESSION_ID ?? options.env.KIMI_SESSION_ID;
const persistentEnabled = shouldUsePersistentHome(options.persistentHome, options.env);
if (!persistentEnabled || !runId) {
return await mkdtemp(join(tmpdir(), "omk-home-"));
}

const safeRunId = sanitizeRunId(runId, "run");
const rawLabel = options.homeLabel ?? options.env.OMK_NODE_ID ?? options.env.OMK_ROLE ?? "chat";
const safeLabel = sanitizeRunId(rawLabel, "kimi-home");
const persistentHome = join(getRunPath(safeRunId, undefined, options.projectRoot), "kimi-home", safeLabel);
await mkdir(persistentHome, { recursive: true });
return persistentHome;
}

function shouldUsePersistentHome(optionValue: boolean | undefined, env: NodeJS.ProcessEnv): boolean {
if (typeof optionValue === "boolean") return optionValue;
const value = env.OMK_PERSIST_KIMI_HOME;
if (!value) return true;
return !["0", "false", "no", "off", "tmp", "temporary"].includes(value.trim().toLowerCase());
}

async function writeRetainedHomeMarker(tmpHome: string, reason: string | undefined): Promise<string> {
const markerPath = join(tmpHome, ".omk-retained-kimi-home.json");
await mkdir(tmpHome, { recursive: true });
await writeFile(markerPath, JSON.stringify({
retainedAt: new Date().toISOString(),
reason: reason ?? "preserved for recovery",
home: tmpHome,
kimiSessions: join(tmpHome, ".kimi", "sessions"),
}, null, 2) + "\n", { mode: 0o600 });
return markerPath;
}

function stripHooksBlocks(content: string): string {
const result: string[] = [];
let skippingHookBlock = false;
Expand Down
101 changes: 83 additions & 18 deletions src/kimi/runner.ts
Original file line number Diff line number Diff line change
Expand Up @@ -230,6 +230,43 @@ export function formatKimiProviderFailureHint(output: string): string | null {
return lines.join("\n") + "\n";
}

function retainKimiHomeSetting(env: Record<string, string | undefined>): "off" | "provider" | "all" {
const value = env.OMK_RETAIN_KIMI_HOME_ON_FAILURE?.trim().toLowerCase();
if (value && ["0", "false", "no", "off", "never"].includes(value)) return "off";
if (value && ["1", "true", "yes", "on", "all", "always"].includes(value)) return "all";
return "provider";
}

function retainedKimiHomeDecision(options: {
exitCode: number;
output: string;
env: Record<string, string | undefined>;
stoppedByController?: boolean;
startupFailure?: boolean;
}): { retain: boolean; reason?: string } {
if (options.exitCode === 0 || options.stoppedByController || options.startupFailure) {
return { retain: false };
}
const setting = retainKimiHomeSetting(options.env);
if (setting === "off") return { retain: false };
const diagnosis = classifyKimiProviderFailure(options.output);
if (diagnosis) {
return { retain: true, reason: `${diagnosis.kind} provider failure` };
}
if (setting === "all") {
return { retain: true, reason: `kimi exited with code ${options.exitCode}` };
}
return { retain: false };
}

function formatRetainedKimiHomeHint(home: string, reason: string | undefined): string {
return [
`[omk] Preserved Kimi HOME for recovery: ${home}`,
` Kimi sessions/subagents: ${join(home, ".kimi", "sessions")}`,
` Reason: ${reason ?? "provider failure"}`,
].join("\n") + "\n";
}

export interface KimiStartupExitDiagnosis {
elapsedMs: number;
thresholdMs: number;
Expand Down Expand Up @@ -628,7 +665,7 @@ export async function runKimiInteractive(
const clearWatchdogs = (): void => {
if (startupTimer) clearTimeout(startupTimer);
};
const cleanupRuntime = async (): Promise<void> => {
const cleanupRuntime = async (cleanupOptions: { preserveHome?: boolean; reason?: string } = {}): Promise<void> => {
if (cleaned) return;
cleaned = true;
clearWatchdogs();
Expand All @@ -644,7 +681,13 @@ export async function runKimiInteractive(
restoreTerminalInputState(process.stdin, terminalInputState);
}
}
await cleanupIsolatedKimiHome(tmpHome);
const cleanup = await cleanupIsolatedKimiHome(tmpHome, {
preserve: cleanupOptions.preserveHome,
reason: cleanupOptions.reason,
});
if (cleanup.retained) {
process.stderr.write(style.orange(formatRetainedKimiHomeHint(tmpHome, cleanupOptions.reason)));
}
};
const resolveOnce = (code: number): void => {
if (settled) return;
Expand Down Expand Up @@ -706,13 +749,19 @@ export async function runKimiInteractive(
if (bugRest) writeStdout(statusLine.process(bugRest));
const replacerRest = replacer.forceFlush();
if (replacerRest) writeStdout(statusLine.process(replacerRest));
await cleanupRuntime().catch((err: unknown) => {
const message = err instanceof Error ? err.message : String(err);
process.stderr.write(`[omk] PTY cleanup warning: ${message}\n`);
});
const runId = options?.env?.OMK_RUN_ID;
const resumeHint = runId ? ` • resume: omk chat --run-id ${runId}` : "";
const startupExit = classifyKimiStartupExit(exitCode, elapsedMs, env);
const retention = retainedKimiHomeDecision({
exitCode,
output: recentProviderOutput,
env,
startupFailure: Boolean(startupExit),
});
await cleanupRuntime({ preserveHome: retention.retain, reason: retention.reason }).catch((err: unknown) => {
const message = err instanceof Error ? err.message : String(err);
process.stderr.write(`[omk] PTY cleanup warning: ${message}\n`);
});
if (startupExit) {
process.stderr.write(style.red(`[omk] ${startupExit.message}${resumeHint}\n`));
process.stderr.write(
Expand Down Expand Up @@ -930,6 +979,7 @@ export function createKimiTaskRunner(options: KimiTaskRunnerOptions = {}): TaskR
const kimiBin = resolveKimiBin(mergedEnv);
const kimiAvailable = await checkCommand(kimiBin);
if (!kimiAvailable) {
await cleanupIsolatedKimiHome(tmpHome);
return {
success: false,
exitCode: 1,
Expand All @@ -938,18 +988,33 @@ export function createKimiTaskRunner(options: KimiTaskRunnerOptions = {}): TaskR
};
}
let result: Awaited<ReturnType<typeof runShellStreaming>>;
try {
result = await runShellStreaming(kimiBin, args, {
cwd: worktree,
timeout: effectiveTimeout,
env: mergedEnv,
logPath,
input: "",
onStdout: thinkingHandler,
signal,
});
} finally {
await cleanupIsolatedKimiHome(tmpHome);
result = await runShellStreaming(kimiBin, args, {
cwd: worktree,
timeout: effectiveTimeout,
env: mergedEnv,
logPath,
input: "",
onStdout: thinkingHandler,
signal,
});
Comment on lines +991 to +999

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

P1 Badge Ensure isolated HOME cleanup runs on runner exceptions

Wrap the runShellStreaming call in a try/finally so cleanupIsolatedKimiHome(tmpHome) executes even when runShellStreaming rejects before producing a result (for example, log-path creation or spawn/setup errors). In the current flow, those exceptions bypass cleanup entirely, leaving the isolated HOME (including symlinked auth material) behind and violating the intended behavior of cleaning up on non-provider failures.

Useful? React with 👍 / 👎.

const stoppedByController = Boolean(signal?.aborted) || /(?:^|\n)(?:aborted|timed out after \d+ms)(?:\n|$)/i.test(result.stderr);
const retention = retainedKimiHomeDecision({
exitCode: result.exitCode,
output: `${result.stderr}\n${result.stdout}`,
env: mergedEnv,
stoppedByController,
});
const cleanup = await cleanupIsolatedKimiHome(tmpHome, {
preserve: retention.retain,
reason: retention.reason,
});
if (cleanup.retained) {
result = {
...result,
stderr: result.stderr
? `${result.stderr}\n${formatRetainedKimiHomeHint(tmpHome, retention.reason)}`
: formatRetainedKimiHomeHint(tmpHome, retention.reason),
};
}

// Debug: log runner result so we can diagnose unexpected failures
Expand Down
Loading