From 8754cf1601cff206f46d8a43b2b2cd19f9808b8b Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:16:39 +0800 Subject: [PATCH 1/2] fix: 10min idle timeout + poll stdin for dead clients (#148) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two changes: 1. Idle timeout increased from 2min to 10min — allows long debugging sessions where the agent thinks for >2min 2. Watchdog now polls stdin every 10s for POLLHUP — detects dead clients immediately instead of waiting for the full timeout. When the client pipe closes, server exits within 10 seconds. This means: - Client disconnects cleanly → stdin EOF → server exits immediately - Client crashes/killed → POLLHUP detected → server exits in ≤10s - Client connected but idle → 10min timeout (plenty for debugging) - Orphaned process (no client ever) → 10min timeout Co-Authored-By: Claude Opus 4.6 (1M context) --- src/main.zig | 23 +++++++++++++++++++++-- src/mcp.zig | 2 +- 2 files changed, 22 insertions(+), 3 deletions(-) diff --git a/src/main.zig b/src/main.zig index 822fd31..fff9050 100644 --- a/src/main.zig +++ b/src/main.zig @@ -706,13 +706,32 @@ fn scanBg(store: *Store, explorer: *Explorer, root: []const u8, allocator: std.m fn idleWatchdog(shutdown: *std.atomic.Value(bool)) void { const mcp = @import("mcp.zig"); while (!shutdown.load(.acquire)) { - std.Thread.sleep(30 * std.time.ns_per_s); + std.Thread.sleep(10 * std.time.ns_per_s); // check every 10s instead of 30s + + // Quick liveness check: try a zero-byte read on stdin + // If the pipe is broken (client gone), this returns immediately + const stdin = std.fs.File.stdin(); + var poll_fds = [_]std.posix.pollfd{.{ + .fd = stdin.handle, + .events = std.posix.POLL.IN | std.posix.POLL.HUP, + .revents = 0, + }}; + // Non-blocking poll with 0 timeout + const poll_result = std.posix.poll(&poll_fds, 0) catch 0; + if (poll_result > 0 and (poll_fds[0].revents & std.posix.POLL.HUP) != 0) { + std.log.info("stdin closed (client disconnected), exiting", .{}); + stdin.close(); + shutdown.store(true, .release); + return; + } + + // Fallback: idle timeout const last = mcp.last_activity.load(.acquire); if (last == 0) continue; const now = std.time.milliTimestamp(); if (now - last > mcp.idle_timeout_ms) { std.log.info("idle for {d}s, exiting", .{@divTrunc(now - last, 1000)}); - std.fs.File.stdin().close(); + stdin.close(); shutdown.store(true, .release); return; } diff --git a/src/mcp.zig b/src/mcp.zig index 574669f..4bfead9 100644 --- a/src/mcp.zig +++ b/src/mcp.zig @@ -289,7 +289,7 @@ pub var last_activity: std.atomic.Value(i64) = std.atomic.Value(i64).init(0); /// How long (ms) the server may sit idle before auto-exiting. /// Claude Code restarts MCP servers on demand, so this is safe. -pub const idle_timeout_ms: i64 = 2 * 60 * 1000; // 2 minutes — MCP clients restart servers on demand +pub const idle_timeout_ms: i64 = 10 * 60 * 1000; // 10 minutes — allows long debugging sessions; stdin EOF exits immediately // ── Session state for MCP protocol ────────────────────────────────────────── From 252a9f1fedba519a7ffe212cc6d890f7d8d44bbc Mon Sep 17 00:00:00 2001 From: justrach <54503978+justrach@users.noreply.github.com> Date: Mon, 6 Apr 2026 09:25:42 +0800 Subject: [PATCH 2/2] test: add unit tests for timeout + POLLHUP detection (#148) - Verify idle_timeout_ms is 10 minutes (600,000ms) - Verify POLLHUP detected when pipe write end is closed (client gone) - Verify open pipe does NOT trigger false HUP Co-Authored-By: Claude Opus 4.6 (1M context) --- src/tests.zig | 86 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/src/tests.zig b/src/tests.zig index 6fea5c5..66c461b 100644 --- a/src/tests.zig +++ b/src/tests.zig @@ -4570,3 +4570,89 @@ test "issue-116: getGitHead returns valid SHA for git repos" { } } } + +test "issue-148: idle timeout is 10 minutes" { + const mcp = @import("mcp.zig"); + try testing.expectEqual(@as(i64, 10 * 60 * 1000), mcp.idle_timeout_ms); +} + +test "issue-148: POLLHUP detects closed pipe" { + const pipe = try std.posix.pipe(); + std.posix.close(pipe[1]); + + var poll_fds = [_]std.posix.pollfd{.{ + .fd = pipe[0], + .events = std.posix.POLL.IN | std.posix.POLL.HUP, + .revents = 0, + }}; + + const result = try std.posix.poll(&poll_fds, 0); + try testing.expect(result > 0); + try testing.expect((poll_fds[0].revents & std.posix.POLL.HUP) != 0); + std.posix.close(pipe[0]); +} + +test "issue-148: open pipe does not trigger HUP" { + const pipe = try std.posix.pipe(); + defer std.posix.close(pipe[0]); + defer std.posix.close(pipe[1]); + + var poll_fds = [_]std.posix.pollfd{.{ + .fd = pipe[0], + .events = std.posix.POLL.IN | std.posix.POLL.HUP, + .revents = 0, + }}; + + const result = try std.posix.poll(&poll_fds, 0); + try testing.expectEqual(@as(usize, 0), result); +} + +test "issue-148: codedb mcp exits when stdin is closed" { + // Integration test: spawn codedb mcp, close stdin, verify it exits + var child = std.process.Child.init( + &.{ "zig", "build", "run", "--", "--mcp" }, + testing.allocator, + ); + child.stdin_behavior = .Pipe; + child.stdout_behavior = .Pipe; + child.stderr_behavior = .Ignore; + + try child.spawn(); + + // Send initialize then close stdin (simulate client crash) + const init_msg = "{\"jsonrpc\":\"2.0\",\"id\":1,\"method\":\"initialize\",\"params\":{\"protocolVersion\":\"2024-11-05\",\"capabilities\":{},\"clientInfo\":{\"name\":\"test\",\"version\":\"1\"}}}"; + const header = std.fmt.comptimePrint("Content-Length: {d}\r\n\r\n", .{init_msg.len}); + + if (child.stdin) |stdin| { + stdin.writeAll(header) catch {}; + stdin.writeAll(init_msg) catch {}; + // Close stdin — simulates client disconnecting + stdin.close(); + child.stdin = null; + } + + // Wait up to 15 seconds for the process to exit + // (watchdog polls every 10s, so it should detect POLLHUP within ~10s) + const start = std.time.milliTimestamp(); + const term = child.wait() catch { + // If wait fails, the process is stuck — test fails + try testing.expect(false); + return; + }; + + const elapsed = std.time.milliTimestamp() - start; + + // Should have exited (not been killed by us) + switch (term) { + .Exited => |code| { + // Any exit code is fine — we just care that it exited + _ = code; + }, + else => { + // Signal-killed or other — acceptable + }, + } + + // Should exit within 15 seconds (10s poll interval + margin) + try testing.expect(elapsed < 15_000); +}