diff --git a/packages/argent/src/cli.ts b/packages/argent/src/cli.ts index 7901037e..8411777c 100644 --- a/packages/argent/src/cli.ts +++ b/packages/argent/src/cli.ts @@ -26,6 +26,7 @@ import type * as Installer from "@argent/installer"; import type * as Mcp from "@argent/mcp"; import type * as Cli from "@argent/cli"; import { BUNDLED_RUNTIME_PATHS } from "./bundled-paths.js"; +import { installFatalHandlers } from "./fatal-handlers.js"; const PACKAGE_NAME = "@swmansion/argent"; @@ -44,16 +45,7 @@ function getInstalledVersion(): string | null { const [, , command, ...rest] = process.argv; const isMcpServer = command === "mcp"; -process.on("uncaughtException", (err) => { - process.stderr.write(`[argent] Uncaught exception: ${err.stack ?? err}\n`); - if (!isMcpServer) process.exit(1); -}); -process.on("unhandledRejection", (reason) => { - process.stderr.write( - `[argent] Unhandled rejection: ${reason instanceof Error ? (reason.stack ?? reason.message) : reason}\n` - ); - if (!isMcpServer) process.exit(1); -}); +installFatalHandlers({ isMcpServer }); function printHelp(): void { const version = getInstalledVersion() ?? "unknown"; diff --git a/packages/argent/src/fatal-handlers.ts b/packages/argent/src/fatal-handlers.ts new file mode 100644 index 00000000..fd441d47 --- /dev/null +++ b/packages/argent/src/fatal-handlers.ts @@ -0,0 +1,53 @@ +// Process-level handlers for uncaughtException and unhandledRejection. +// +// In MCP-server mode we do NOT exit on every uncaught error — many are +// transient and the editor expects the server to keep running. But the naive +// version (`stderr.write(...); if (!isMcp) exit`) burns 100% CPU when stderr +// itself is broken (e.g. the parent process is gone): the write emits an +// async 'error' event on the stream, which without a listener becomes another +// uncaughtException, runs the handler, writes to the same broken stream, ... +// +// The fix has three pieces: +// 1. `'error'` listeners on stdout/stderr — broken stdio is fatal; exit +// before the failure round-trips into uncaughtException. This is what +// breaks the production loop. +// 2. try/catch around `stderr.write` — synchronous write failures also exit +// cleanly instead of escaping into another uncaughtException. +// 3. try/catch around the formatter — a throwing `.stack` getter or +// `toString` (the production trace pointed at defaultPrepareStackTrace) +// can't take down the handler. + +let installed = false; + +export function installFatalHandlers(opts: { isMcpServer: boolean }): void { + if (installed) return; + installed = true; + + for (const stream of [process.stdout, process.stderr] as const) { + stream.on("error", () => process.exit(1)); + } + + function reportFatal(label: string, getDetail: () => string): void { + try { + let detail: string; + try { + detail = getDetail(); + } catch { + detail = ""; + } + process.stderr.write(`[argent] ${label}: ${detail}\n`); + } catch { + process.exit(1); + } + if (!opts.isMcpServer) process.exit(1); + } + + process.on("uncaughtException", (err) => { + reportFatal("Uncaught exception", () => String((err as Error)?.stack ?? err)); + }); + process.on("unhandledRejection", (reason) => { + reportFatal("Unhandled rejection", () => + reason instanceof Error ? (reason.stack ?? reason.message) : String(reason) + ); + }); +} diff --git a/packages/argent/test/fatal-handlers.test.ts b/packages/argent/test/fatal-handlers.test.ts new file mode 100644 index 00000000..44290851 --- /dev/null +++ b/packages/argent/test/fatal-handlers.test.ts @@ -0,0 +1,161 @@ +import { spawn } from "node:child_process"; +import * as fs from "node:fs"; +import * as os from "node:os"; +import * as path from "node:path"; +import * as esbuild from "esbuild"; +import { afterAll, beforeAll, describe, expect, it } from "vitest"; + +// Compile src/fatal-handlers.ts to a tmp .mjs once before all tests so the +// spawned child node processes can `import` it without depending on a prior +// `npm run build` step. Self-contained: no dist/ build dependency. +let handlerUrl = ""; +let tmpDir = ""; + +beforeAll(async () => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), "argent-fatal-handlers-")); + const srcPath = path.resolve(import.meta.dirname, "../src/fatal-handlers.ts"); + const out = await esbuild.transform(fs.readFileSync(srcPath, "utf8"), { + loader: "ts", + format: "esm", + target: "node20", + }); + const handlerPath = path.join(tmpDir, "fatal-handlers.mjs"); + fs.writeFileSync(handlerPath, out.code); + handlerUrl = JSON.stringify(`file://${handlerPath}`); +}); + +afterAll(() => { + if (tmpDir) fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +interface RunResult { + exitCode: number | null; + signal: NodeJS.Signals | null; + stdout: string; + stderr: string; + durationMs: number; +} + +function runChild(scriptBody: string, timeoutMs = 3_000): Promise { + return new Promise((resolve, reject) => { + const start = Date.now(); + const child = spawn(process.execPath, ["--input-type=module", "-e", scriptBody], { + stdio: ["ignore", "pipe", "pipe"], + }); + let stdout = ""; + let stderr = ""; + child.stdout.on("data", (chunk) => (stdout += chunk.toString())); + child.stderr.on("data", (chunk) => (stderr += chunk.toString())); + const killer = setTimeout(() => { + child.kill("SIGKILL"); + reject( + new Error(`child still running after ${timeoutMs}ms — fix is broken; output: ${stderr}`) + ); + }, timeoutMs); + child.on("exit", (code, signal) => { + clearTimeout(killer); + resolve({ + exitCode: code, + signal, + stdout, + stderr, + durationMs: Date.now() - start, + }); + }); + child.on("error", (err) => { + clearTimeout(killer); + reject(err); + }); + }); +} + +describe("installFatalHandlers", () => { + it("exits cleanly when stderr is broken (the orphaned-process loop scenario)", async () => { + // Reproduces the production bug: an orphaned MCP process whose stderr + // pipe broke entered an exception loop. Each stderr.write emitted an + // async 'error' event, which without a listener became another + // uncaughtException, calling the handler again, ... + const script = ` + import { installFatalHandlers } from ${handlerUrl}; + const origWrite = process.stderr.write.bind(process.stderr); + let writes = 0; + process.stderr.write = (chunk, ...rest) => { + writes++; + if (writes > 50) { + // Hardstop: if we get here the fix is broken. + process._rawDebug("LOOP NOT FIXED writes=" + writes); + process.exit(99); + } + // Synthesize the broken-pipe behavior: every write emits an async error. + setImmediate(() => process.stderr.emit("error", new Error("synthetic EPIPE"))); + return true; + }; + installFatalHandlers({ isMcpServer: true }); + throw new Error("initial boom"); + `; + const result = await runChild(script); + expect(result.exitCode).toBe(1); + expect(result.durationMs).toBeLessThan(2_000); + }); + + it("exits with code 1 in non-mcp mode on uncaught exception", async () => { + const script = ` + import { installFatalHandlers } from ${handlerUrl}; + installFatalHandlers({ isMcpServer: false }); + throw new Error("boom"); + `; + const result = await runChild(script); + expect(result.exitCode).toBe(1); + expect(result.stderr).toContain("[argent] Uncaught exception"); + expect(result.stderr).toContain("boom"); + }); + + it("does NOT exit in mcp mode when stderr works (transient errors stay non-fatal)", async () => { + // Regression guard: the bug fix must not change the existing "MCP server + // keeps running on uncaught exceptions" semantics. After throwing once, + // the process should still be alive and able to do work. + const script = ` + import { installFatalHandlers } from ${handlerUrl}; + installFatalHandlers({ isMcpServer: true }); + process.nextTick(() => { throw new Error("transient"); }); + setTimeout(() => { + process.stdout.write("STILL_ALIVE"); + process.exit(0); + }, 200); + `; + const result = await runChild(script); + expect(result.exitCode).toBe(0); + expect(result.stdout).toContain("STILL_ALIVE"); + expect(result.stderr).toContain("[argent] Uncaught exception"); + expect(result.stderr).toContain("transient"); + }); + + it("formats unhandled rejections", async () => { + const script = ` + import { installFatalHandlers } from ${handlerUrl}; + installFatalHandlers({ isMcpServer: false }); + Promise.reject(new Error("rejected-thing")); + `; + const result = await runChild(script); + expect(result.exitCode).toBe(1); + expect(result.stderr).toContain("[argent] Unhandled rejection"); + expect(result.stderr).toContain("rejected-thing"); + }); + + it("survives errors thrown from inside the stack-formatter (defaultPrepareStackTrace)", async () => { + // Real production trace showed time spent in defaultPrepareStackTrace, + // suggesting the original error's .stack getter was throwing. The fix + // wraps the formatter in a try/catch so a failing .stack does not start + // the loop. + const script = ` + import { installFatalHandlers } from ${handlerUrl}; + installFatalHandlers({ isMcpServer: false }); + const err = new Error("boom"); + Object.defineProperty(err, "stack", { get() { throw new Error("stack-getter-blew-up"); } }); + throw err; + `; + const result = await runChild(script); + expect(result.exitCode).toBe(1); + expect(result.stderr).toContain("[argent] Uncaught exception"); + }); +});