From 76b5c55ac3d8ab1a0507ebebd42a51f1c7908e09 Mon Sep 17 00:00:00 2001 From: shibukazu Date: Sat, 27 Jun 2026 14:31:54 +0900 Subject: [PATCH 1/6] fix(core): guard shutdown handler against re-entry and ensure process termination MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Problem ------- When an AI agent SDK (e.g. the Codex or Pi SDK) registers its own SIGINT/SIGTERM listener, `installShutdownHandlers` correctly flushes active runs but then defers exit because `process.listenerCount(signal) > 1`. If the co-listener never calls `process.exit()` — as is the case with some agent SDKs during long-running inference calls — the CLI hangs indefinitely after Ctrl+C. A secondary issue: if SIGINT and SIGTERM arrive in rapid succession (e.g. a process manager sends SIGTERM right after the user hits Ctrl+C), the handler fires twice, potentially corrupting run metadata that was already written on the first invocation. Fix --- 1. Add a `shutdownStarted` boolean guard that causes the handler to return immediately on re-entry. 2. After `flushActiveRuns()` completes its synchronous writes, send SIGKILL to the calling process's process group (`process.kill(0, "SIGKILL")`). SIGKILL cannot be caught or deferred, so this guarantees the CLI and any agent child-processes it spawned are terminated even when co-listeners are present. Run metadata has already been persisted at this point, so there is no data loss. A `catch` block falls back to `process.exit()` for restricted environments where `kill(0)` is not permitted. Tests ----- - Spawn child processes with `detached: true` so `kill(0, "SIGKILL")` inside the child kills only its own process group and does not propagate back to the Vitest runner. - Update exit expectations from numeric codes (130/143) to `signal === "SIGKILL"`, reflecting the new termination path. - Replace the "defers to co-listener" test with a "still kills even when co-listener hangs" test that directly covers the regression. --- packages/core/src/__tests__/shutdown.test.ts | 52 +++++++++++--------- packages/core/src/run.ts | 31 +++++++----- 2 files changed, 46 insertions(+), 37 deletions(-) diff --git a/packages/core/src/__tests__/shutdown.test.ts b/packages/core/src/__tests__/shutdown.test.ts index 0f0417d..f0b74c5 100644 --- a/packages/core/src/__tests__/shutdown.test.ts +++ b/packages/core/src/__tests__/shutdown.test.ts @@ -33,6 +33,10 @@ function spawnShutdownChild(opts: { `; const child = spawn(TSX, ["-e", script], { env: { ...process.env, DEEPSEC_DATA_ROOT: opts.dataRoot }, + // detached: true puts the child in its own process group so that + // process.kill(0, "SIGKILL") inside the child only kills the child's + // group and does not propagate SIGKILL back to the test runner. + detached: true, stdio: ["ignore", "pipe", "pipe"], }); const ready = new Promise((resolve, reject) => { @@ -85,10 +89,12 @@ function writeRunningMeta(opts: { dataRoot: string; projectId: string; runId: st } describe("shutdown handler exit path", () => { - it("exits the process on SIGINT when no other handler is installed", async () => { + it("kills the process on SIGINT", async () => { // The regression this guards: attaching a SIGINT listener // suppresses Node's default termination. If the listener doesn't - // call process.exit, the process hangs after Ctrl+C. + // call process.exit (or kill), the process hangs after Ctrl+C. + // The handler now uses process.kill(0, "SIGKILL") to guarantee + // termination even when other listeners are registered. const dataRoot = fs.mkdtempSync(path.join(os.tmpdir(), "deepsec-shutdown-")); const projectId = "test-proj"; const runId = "20260101000000-aaaaaaaaaaaaaaaa"; @@ -97,8 +103,9 @@ describe("shutdown handler exit path", () => { const { child, ready } = await spawnShutdownChild({ dataRoot, projectId, runId }); await ready; child.kill("SIGINT"); - const { code } = await waitForExit(child, 5000); - expect(code).toBe(130); + const { signal } = await waitForExit(child, 5000); + // process.kill(0, "SIGKILL") terminates via SIGKILL, not a numeric exit code. + expect(signal).toBe("SIGKILL"); // And the run should have been flipped to error by the handler. const meta = JSON.parse( @@ -107,7 +114,7 @@ describe("shutdown handler exit path", () => { expect(meta.phase).toBe("error"); }); - it("exits the process on SIGTERM when no other handler is installed", async () => { + it("kills the process on SIGTERM", async () => { const dataRoot = fs.mkdtempSync(path.join(os.tmpdir(), "deepsec-shutdown-")); const projectId = "test-proj"; const runId = "20260101000000-bbbbbbbbbbbbbbbb"; @@ -116,16 +123,16 @@ describe("shutdown handler exit path", () => { const { child, ready } = await spawnShutdownChild({ dataRoot, projectId, runId }); await ready; child.kill("SIGTERM"); - const { code } = await waitForExit(child, 5000); - expect(code).toBe(143); + const { signal } = await waitForExit(child, 5000); + expect(signal).toBe("SIGKILL"); }); - it("defers exit when another SIGINT listener is installed", async () => { - // When another handler is registered (the sandbox shutdown handler - // in the real CLI), our handler must NOT exit — that handler needs - // async cleanup time and will exit itself. We simulate the - // co-listener with a dummy that exits with a sentinel code so the - // test can tell whose exit path fired. + it("still flushes runs and kills the process even when another SIGINT listener is installed", async () => { + // Previously the handler deferred to co-listeners (e.g. the sandbox + // shutdown handler). This caused hangs when a co-listener (such as + // an agent SDK) was registered but never called process.exit(). + // Now the handler always kills the process group via SIGKILL after + // flushing run metadata, regardless of other listeners. const dataRoot = fs.mkdtempSync(path.join(os.tmpdir(), "deepsec-shutdown-")); const projectId = "test-proj"; const runId = "20260101000000-cccccccccccccccc"; @@ -134,18 +141,15 @@ describe("shutdown handler exit path", () => { const script = ` import { registerActiveRun } from ${JSON.stringify(REGISTER_HOOK)}; registerActiveRun(${JSON.stringify(projectId)}, ${JSON.stringify(runId)}); - // Install a co-listener that takes ownership of exit, like the - // sandbox shutdown handler does in production. - process.on("SIGINT", () => { - // Give the core handler a chance to run first, then exit - // with a sentinel code that proves WE owned the exit path. - setTimeout(() => process.exit(42), 50); - }); + // Simulate a co-listener (e.g. agent SDK) that is registered but + // never calls process.exit(). + process.on("SIGINT", () => { /* intentionally hangs */ }); process.stdout.write("READY\\n"); setInterval(() => {}, 1000); `; const child = spawn(TSX, ["-e", script], { env: { ...process.env, DEEPSEC_DATA_ROOT: dataRoot }, + detached: true, stdio: ["ignore", "pipe", "pipe"], }); await new Promise((resolve, reject) => { @@ -157,11 +161,11 @@ describe("shutdown handler exit path", () => { child.on("error", reject); }); child.kill("SIGINT"); - const { code } = await waitForExit(child, 5000); - // Sentinel proves the OTHER listener exited the process, not ours. - expect(code).toBe(42); + const { signal } = await waitForExit(child, 5000); + // SIGKILL guarantees termination even though the co-listener hangs. + expect(signal).toBe("SIGKILL"); - // And the run was still flipped to error by the core handler. + // Run was still flipped to error before the kill. const meta = JSON.parse( fs.readFileSync(path.join(dataRoot, projectId, "runs", `${runId}.json`), "utf-8"), ); diff --git a/packages/core/src/run.ts b/packages/core/src/run.ts index 15ed894..438da20 100644 --- a/packages/core/src/run.ts +++ b/packages/core/src/run.ts @@ -182,6 +182,10 @@ export function completeRun( // `isReclaimableLock`. const activeRuns = new Map(); let shutdownHandlersInstalled = false; +// Guards against the handler firing twice when both SIGINT and SIGTERM +// are delivered in quick succession (e.g. a process manager sending +// SIGTERM immediately after the user hits Ctrl+C). +let shutdownStarted = false; function flushActiveRuns(): void { // Snapshot to a fresh array — completeRun's read+write can throw if @@ -208,20 +212,21 @@ function installShutdownHandlers(): void { if (shutdownHandlersInstalled) return; shutdownHandlersInstalled = true; const handler = (signal: NodeJS.Signals) => { + if (shutdownStarted) return; + shutdownStarted = true; flushActiveRuns(); - // Attaching a listener for SIGINT/SIGTERM suppresses Node's - // default termination, so we have to provide an exit path - // ourselves or the process hangs after Ctrl+C. When another - // listener is also registered (e.g. the sandbox shutdown handler - // in `deepsec/sandbox/shutdown.ts`), defer to it — that handler - // needs async cleanup time and calls process.exit() itself once - // its sandboxes have stopped (or its 10s timeout fires). - // - // listenerCount counts the currently-executing handler too, so - // "1" means we're the only listener. - if (process.listenerCount(signal) <= 1) { - // Conventional signal exit codes: 128 + signal number - // (SIGINT=2 → 130, SIGTERM=15 → 143). + // After flushing run metadata synchronously, kill the entire process + // group. This handles cases where another SIGINT listener (e.g. an + // agent SDK) is registered but never calls process.exit(), which + // would otherwise leave the CLI hanging indefinitely after Ctrl+C. + // SIGKILL cannot be caught or deferred, so this is guaranteed to + // terminate the process even if other handlers are mid-flight. + // flushActiveRuns() has already persisted run state, so data loss + // is not a concern. + try { + process.kill(0, "SIGKILL"); + } catch { + // Fallback: kill(0) may throw EPERM in restricted environments. process.exit(signal === "SIGINT" ? 130 : 143); } }; From ce226ef0126e1242cdd11a37d4fa8b533863a099 Mon Sep 17 00:00:00 2001 From: shibukazu Date: Sat, 27 Jun 2026 14:36:28 +0900 Subject: [PATCH 2/6] refactor: trim verbose comments added in previous commit --- packages/core/src/run.ts | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/packages/core/src/run.ts b/packages/core/src/run.ts index 438da20..08a9a3c 100644 --- a/packages/core/src/run.ts +++ b/packages/core/src/run.ts @@ -182,9 +182,6 @@ export function completeRun( // `isReclaimableLock`. const activeRuns = new Map(); let shutdownHandlersInstalled = false; -// Guards against the handler firing twice when both SIGINT and SIGTERM -// are delivered in quick succession (e.g. a process manager sending -// SIGTERM immediately after the user hits Ctrl+C). let shutdownStarted = false; function flushActiveRuns(): void { @@ -215,18 +212,11 @@ function installShutdownHandlers(): void { if (shutdownStarted) return; shutdownStarted = true; flushActiveRuns(); - // After flushing run metadata synchronously, kill the entire process - // group. This handles cases where another SIGINT listener (e.g. an - // agent SDK) is registered but never calls process.exit(), which - // would otherwise leave the CLI hanging indefinitely after Ctrl+C. - // SIGKILL cannot be caught or deferred, so this is guaranteed to - // terminate the process even if other handlers are mid-flight. - // flushActiveRuns() has already persisted run state, so data loss - // is not a concern. + // Kill the process group so co-listeners that never call process.exit() + // (e.g. some agent SDKs) cannot hang the CLI indefinitely. try { process.kill(0, "SIGKILL"); } catch { - // Fallback: kill(0) may throw EPERM in restricted environments. process.exit(signal === "SIGINT" ? 130 : 143); } }; From 2b510f4a2139ffcbb3dc1bb52ff2e96d6a7f8159 Mon Sep 17 00:00:00 2001 From: shibukazu Date: Sat, 27 Jun 2026 14:39:48 +0900 Subject: [PATCH 3/6] refactor: restore original comments; update exit-path explanation inline --- packages/core/src/run.ts | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/packages/core/src/run.ts b/packages/core/src/run.ts index 08a9a3c..81e0c19 100644 --- a/packages/core/src/run.ts +++ b/packages/core/src/run.ts @@ -212,11 +212,21 @@ function installShutdownHandlers(): void { if (shutdownStarted) return; shutdownStarted = true; flushActiveRuns(); - // Kill the process group so co-listeners that never call process.exit() - // (e.g. some agent SDKs) cannot hang the CLI indefinitely. + // Attaching a listener for SIGINT/SIGTERM suppresses Node's + // default termination, so we have to provide an exit path + // ourselves or the process hangs after Ctrl+C. Previously we + // deferred to co-listeners (e.g. the sandbox shutdown handler in + // `deepsec/sandbox/shutdown.ts`) when listenerCount > 1, but + // agent SDKs can register listeners that never call process.exit(), + // causing the CLI to hang. Killing the process group via SIGKILL + // guarantees termination in all cases; run metadata has already + // been persisted synchronously above. try { process.kill(0, "SIGKILL"); } catch { + // Fallback for restricted environments where kill(0) is not + // permitted. Conventional signal exit codes: 128 + signal number + // (SIGINT=2 → 130, SIGTERM=15 → 143). process.exit(signal === "SIGINT" ? 130 : 143); } }; From 312a58040873c51361fe23b7c9645cc108869481 Mon Sep 17 00:00:00 2001 From: shibukazu Date: Sat, 27 Jun 2026 14:41:08 +0900 Subject: [PATCH 4/6] refactor: simplify added comment to minimal --- packages/core/src/run.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/packages/core/src/run.ts b/packages/core/src/run.ts index 81e0c19..47a19b9 100644 --- a/packages/core/src/run.ts +++ b/packages/core/src/run.ts @@ -214,18 +214,13 @@ function installShutdownHandlers(): void { flushActiveRuns(); // Attaching a listener for SIGINT/SIGTERM suppresses Node's // default termination, so we have to provide an exit path - // ourselves or the process hangs after Ctrl+C. Previously we - // deferred to co-listeners (e.g. the sandbox shutdown handler in - // `deepsec/sandbox/shutdown.ts`) when listenerCount > 1, but - // agent SDKs can register listeners that never call process.exit(), - // causing the CLI to hang. Killing the process group via SIGKILL - // guarantees termination in all cases; run metadata has already - // been persisted synchronously above. + // ourselves or the process hangs after Ctrl+C. Kill the process + // group so co-listeners that never call process.exit() (e.g. some + // agent SDKs) cannot leave the CLI hanging. try { process.kill(0, "SIGKILL"); } catch { - // Fallback for restricted environments where kill(0) is not - // permitted. Conventional signal exit codes: 128 + signal number + // Conventional signal exit codes: 128 + signal number // (SIGINT=2 → 130, SIGTERM=15 → 143). process.exit(signal === "SIGINT" ? 130 : 143); } From e014c9b5ae9c4aa502184d41b5ed965e80996265 Mon Sep 17 00:00:00 2001 From: shibukazu Date: Sat, 27 Jun 2026 14:53:30 +0900 Subject: [PATCH 5/6] style: fix awkward comment line breaks --- packages/core/src/run.ts | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/packages/core/src/run.ts b/packages/core/src/run.ts index 47a19b9..10f697d 100644 --- a/packages/core/src/run.ts +++ b/packages/core/src/run.ts @@ -212,11 +212,10 @@ function installShutdownHandlers(): void { if (shutdownStarted) return; shutdownStarted = true; flushActiveRuns(); - // Attaching a listener for SIGINT/SIGTERM suppresses Node's - // default termination, so we have to provide an exit path - // ourselves or the process hangs after Ctrl+C. Kill the process - // group so co-listeners that never call process.exit() (e.g. some - // agent SDKs) cannot leave the CLI hanging. + // Attaching a listener for SIGINT/SIGTERM suppresses Node's default termination, so we + // have to provide an exit path ourselves or the process hangs after Ctrl+C. Kill the + // process group so co-listeners that never call process.exit() (e.g. some agent SDKs) + // cannot leave the CLI hanging. try { process.kill(0, "SIGKILL"); } catch { @@ -229,8 +228,8 @@ function installShutdownHandlers(): void { process.on("SIGTERM", handler); // beforeExit covers the case where a thrown error bubbles out of // `process()` / `revalidate()` without `unregisterActiveRun` being - // called — without this, the run would be stranded at `phase: - // "running"` even though the node process is on its way out. + // called — without this, the run would be stranded at `phase: "running"` + // even though the node process is on its way out. process.on("beforeExit", flushActiveRuns); } From 2d9e21408abd1b006da60eba4566f7b3774accc5 Mon Sep 17 00:00:00 2001 From: shibukazu Date: Sat, 27 Jun 2026 14:55:57 +0900 Subject: [PATCH 6/6] refactor: remove unnecessary shutdownStarted guard --- packages/core/src/run.ts | 3 --- 1 file changed, 3 deletions(-) diff --git a/packages/core/src/run.ts b/packages/core/src/run.ts index 10f697d..b74ba1b 100644 --- a/packages/core/src/run.ts +++ b/packages/core/src/run.ts @@ -182,7 +182,6 @@ export function completeRun( // `isReclaimableLock`. const activeRuns = new Map(); let shutdownHandlersInstalled = false; -let shutdownStarted = false; function flushActiveRuns(): void { // Snapshot to a fresh array — completeRun's read+write can throw if @@ -209,8 +208,6 @@ function installShutdownHandlers(): void { if (shutdownHandlersInstalled) return; shutdownHandlersInstalled = true; const handler = (signal: NodeJS.Signals) => { - if (shutdownStarted) return; - shutdownStarted = true; flushActiveRuns(); // Attaching a listener for SIGINT/SIGTERM suppresses Node's default termination, so we // have to provide an exit path ourselves or the process hangs after Ctrl+C. Kill the