From 915a6049a220c7a9808662ca5e72dc0cac789a59 Mon Sep 17 00:00:00 2001 From: LIU9293 Date: Sat, 7 Mar 2026 02:04:48 +0000 Subject: [PATCH 1/3] fix: recover status updates and remove CLI hard timeouts --- package.json | 2 +- packages/agents/claude/client.ts | 1 - packages/agents/codex/client.ts | 1 - packages/agents/gemini/client.ts | 1 - packages/agents/goose/client.ts | 1 - packages/agents/kilo/client.ts | 1 - packages/agents/kimi/client.ts | 1 - packages/agents/kiro/client.ts | 8 --- packages/agents/qwen/client.ts | 1 - packages/agents/runtime/base.ts | 20 ++++--- packages/core/kernel/request-run.ts | 22 ++++++++ packages/core/runtime/helpers.ts | 2 +- packages/core/runtime/message-updates.ts | 28 ++++++++-- packages/core/test/runtime-helpers.test.ts | 5 ++ .../core/test/runtime-resilience-e2e.test.ts | 54 ++++++++++++++++++- packages/core/types.ts | 1 + 16 files changed, 118 insertions(+), 31 deletions(-) diff --git a/package.json b/package.json index 0a7d9c9a..8dcbeb72 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "ode", - "version": "0.1.18", + "version": "0.1.19", "description": "Coding anywhere with your coding agents connected", "module": "packages/core/index.ts", "type": "module", diff --git a/packages/agents/claude/client.ts b/packages/agents/claude/client.ts index a8aff2de..003fd8fc 100644 --- a/packages/agents/claude/client.ts +++ b/packages/agents/claude/client.ts @@ -218,7 +218,6 @@ async function runClaudeCommand( cwd, env, entry, - timeoutMs: 5 * 60 * 1000, onRecord, onSpawn: (pid) => { log.info("Claude CLI spawned", { pid }); diff --git a/packages/agents/codex/client.ts b/packages/agents/codex/client.ts index 849bfbbd..4f1d5c82 100644 --- a/packages/agents/codex/client.ts +++ b/packages/agents/codex/client.ts @@ -203,7 +203,6 @@ export async function sendMessage( cwd: workingPath, env: envOverrides, entry, - timeoutMs: 10 * 60 * 1000, onRecord: (event) => { if (event.type === "thread.started" && typeof event.thread_id === "string") { latestSessionId = event.thread_id; diff --git a/packages/agents/gemini/client.ts b/packages/agents/gemini/client.ts index 26a06261..36f664cb 100644 --- a/packages/agents/gemini/client.ts +++ b/packages/agents/gemini/client.ts @@ -207,7 +207,6 @@ export async function sendMessage( cwd: workingPath, env: envOverrides, entry, - timeoutMs: 10 * 60 * 1000, onRecord: (record) => { publishGeminiRecordAsSessionEvents(record, sessionId); }, diff --git a/packages/agents/goose/client.ts b/packages/agents/goose/client.ts index d2780f5e..7fa178e4 100644 --- a/packages/agents/goose/client.ts +++ b/packages/agents/goose/client.ts @@ -239,7 +239,6 @@ export async function sendMessage( cwd: workingPath, env: envOverrides, entry, - timeoutMs: 20 * 60 * 1000, onRecord: (record) => { const recordSessionId = getRecordSessionId(record, sessionId); latestSessionId = recordSessionId; diff --git a/packages/agents/kilo/client.ts b/packages/agents/kilo/client.ts index 0884d893..865acebd 100644 --- a/packages/agents/kilo/client.ts +++ b/packages/agents/kilo/client.ts @@ -342,7 +342,6 @@ export async function sendMessage( cwd: workingPath, env: envOverrides, entry, - timeoutMs: 10 * 60 * 1000, onRecord: (record) => { publishKiloRecordAsSessionEvents(record, sessionId); const recordSessionId = getRecordSessionId(record, sessionId); diff --git a/packages/agents/kimi/client.ts b/packages/agents/kimi/client.ts index 64b854eb..20db8c15 100644 --- a/packages/agents/kimi/client.ts +++ b/packages/agents/kimi/client.ts @@ -181,7 +181,6 @@ export async function sendMessage( cwd: workingPath, env: envOverrides, entry, - timeoutMs: 10 * 60 * 1000, onRecord: (record) => { publishKimiEvent(sessionId, record); }, diff --git a/packages/agents/kiro/client.ts b/packages/agents/kiro/client.ts index 595416fa..8284d43d 100644 --- a/packages/agents/kiro/client.ts +++ b/packages/agents/kiro/client.ts @@ -295,19 +295,11 @@ async function runKiroCommand( child.stderr?.on("data", (chunk) => stderrChunks.push(Buffer.from(chunk))); - const timeout = setTimeout(() => { - child.kill("SIGTERM"); - reject(new Error("Kiro CLI timed out")); - }, 10 * 60 * 1000); - child.on("error", (err) => { - clearTimeout(timeout); reject(err); }); child.on("close", (code) => { - clearTimeout(timeout); - const stdout = Buffer.concat(stdoutChunks).toString("utf-8").trim(); const stderr = Buffer.concat(stderrChunks).toString("utf-8").trim(); diff --git a/packages/agents/qwen/client.ts b/packages/agents/qwen/client.ts index c624ccbf..344b4eaa 100644 --- a/packages/agents/qwen/client.ts +++ b/packages/agents/qwen/client.ts @@ -198,7 +198,6 @@ export async function sendMessage( cwd: workingPath, env: envOverrides, entry, - timeoutMs: 10 * 60 * 1000, onRecord: (record) => { const recordSessionId = getRecordSessionId(record, sessionId); latestSessionId = recordSessionId; diff --git a/packages/agents/runtime/base.ts b/packages/agents/runtime/base.ts index 27dfd67c..b6dfdb83 100644 --- a/packages/agents/runtime/base.ts +++ b/packages/agents/runtime/base.ts @@ -17,7 +17,7 @@ type RunCliJsonCommandParams = { cwd: string; env: SessionEnvironment; entry: ActiveRequestEntry; - timeoutMs: number; + timeoutMs?: number; onRecord?: (record: TRecord) => void; onSpawn?: (pid: number | undefined) => void; onExit?: (code: number | null, signal: NodeJS.Signals | null) => void; @@ -78,13 +78,19 @@ export async function runCliJsonCommand(params: RunCliJsonCommandParams child.stderr?.on("data", (chunk) => stderrChunks.push(Buffer.from(chunk))); - const timeout = setTimeout(() => { - child.kill("SIGTERM"); - reject(new Error(`${providerName} CLI timed out`)); - }, timeoutMs); + const effectiveTimeoutMs = + typeof timeoutMs === "number" && Number.isFinite(timeoutMs) && timeoutMs > 0 + ? timeoutMs + : null; + const timeout = effectiveTimeoutMs === null + ? null + : setTimeout(() => { + child.kill("SIGTERM"); + reject(new Error(`${providerName} CLI timed out`)); + }, effectiveTimeoutMs); child.on("error", (err) => { - clearTimeout(timeout); + if (timeout) clearTimeout(timeout); reject(err); }); @@ -97,7 +103,7 @@ export async function runCliJsonCommand(params: RunCliJsonCommandParams }); child.on("close", (code) => { - clearTimeout(timeout); + if (timeout) clearTimeout(timeout); if (stdoutBuffer.trim().length > 0) { flushLine(stdoutBuffer); } diff --git a/packages/core/kernel/request-run.ts b/packages/core/kernel/request-run.ts index 05b15a73..b818a368 100644 --- a/packages/core/kernel/request-run.ts +++ b/packages/core/kernel/request-run.ts @@ -371,6 +371,28 @@ export async function runOpenRequest( statusTs = updatedStatusTs; request.statusMessageTs = updatedStatusTs; } + + const updateError = deps.im.takeUpdateError?.(context.channelId, statusTs); + if (updateError) { + const compactError = updateError.replace(/\s+/g, " ").trim().slice(0, 180); + const fallbackNotice = compactError.length > 0 + ? `Status update failed: ${compactError}` + : "Status update failed due to an unknown error."; + await deps.im.sendMessage( + context.channelId, + context.replyThreadId, + `${fallbackNotice}\nSwitching to a new status message below.` + ); + const replacementStatusTs = await deps.im.sendMessage( + context.channelId, + context.replyThreadId, + statusText + ); + if (typeof replacementStatusTs === "string" && replacementStatusTs.length > 0) { + statusTs = replacementStatusTs; + request.statusMessageTs = replacementStatusTs; + } + } } updateActiveRequest(context.channelId, context.threadId, { statusMessageTs: request.statusMessageTs, diff --git a/packages/core/runtime/helpers.ts b/packages/core/runtime/helpers.ts index 54792daa..20999d05 100644 --- a/packages/core/runtime/helpers.ts +++ b/packages/core/runtime/helpers.ts @@ -26,7 +26,7 @@ export function categorizeRuntimeError( ): { message: string; suggestion: string } { const errorStr = err instanceof Error ? err.message : String(err); - if (errorStr.includes("timeout") || errorStr.includes("ETIMEDOUT")) { + if (errorStr.includes("timeout") || errorStr.includes("timed out") || errorStr.includes("ETIMEDOUT")) { return { message: "Request timed out", suggestion: "The operation took too long. Try a simpler request or break it into smaller steps.", diff --git a/packages/core/runtime/message-updates.ts b/packages/core/runtime/message-updates.ts index c72041c7..bbd0c49f 100644 --- a/packages/core/runtime/message-updates.ts +++ b/packages/core/runtime/message-updates.ts @@ -14,6 +14,7 @@ export function createRateLimitedImAdapter( ): IMAdapter { const rateLimitedMessages = new Set(); const rateLimitErrors = new Map(); + const updateErrors = new Map(); function key(channelId: string, messageTs: string): string { return `${channelId}:${messageTs}`; @@ -24,22 +25,25 @@ export function createRateLimitedImAdapter( async ({ channelId, messageId }, text) => { try { const maybeUpdatedTs = await im.updateMessage(channelId, messageId, text); + updateErrors.delete(key(channelId, messageId)); return typeof maybeUpdatedTs === "string" ? maybeUpdatedTs : undefined; } catch (error) { + const errorText = String(error); + const updateErrorKey = key(channelId, messageId); + updateErrors.set(updateErrorKey, errorText); if (isRateLimitError(error)) { - const rateLimitKey = key(channelId, messageId); - rateLimitedMessages.add(rateLimitKey); - rateLimitErrors.set(rateLimitKey, String(error)); + rateLimitedMessages.add(updateErrorKey); + rateLimitErrors.set(updateErrorKey, errorText); log.warn("IM message update hit rate limit (429)", { channelId, messageTs: messageId, - error: String(error), + error: errorText, }); } log.debug("IM message update failed", { channelId, messageTs: messageId, - error: String(error), + error: errorText, }); return undefined; } @@ -61,6 +65,20 @@ export function createRateLimitedImAdapter( } return rateLimitErrors.get(key(channelId, messageTs)); }, + takeUpdateError: (channelId: string, messageTs: string): string | undefined => { + const updateErrorKey = key(channelId, messageTs); + if (typeof im.takeUpdateError === "function") { + const upstream = im.takeUpdateError(channelId, messageTs); + if (upstream) { + updateErrors.delete(updateErrorKey); + return upstream; + } + } + const captured = updateErrors.get(updateErrorKey); + if (!captured) return undefined; + updateErrors.delete(updateErrorKey); + return captured; + }, updateMessage: async ( channelId: string, messageTs: string, diff --git a/packages/core/test/runtime-helpers.test.ts b/packages/core/test/runtime-helpers.test.ts index 5113469d..6db4f7d3 100644 --- a/packages/core/test/runtime-helpers.test.ts +++ b/packages/core/test/runtime-helpers.test.ts @@ -48,4 +48,9 @@ describe("runtime helpers", () => { expect(result.message).toContain("http://127.0.0.1:4096"); expect(result.suggestion).toContain("OpenCode server"); }); + + it("categorizes timed out phrasing as timeout", () => { + const result = categorizeRuntimeError(new Error("Codex CLI timed out")); + expect(result.message).toBe("Request timed out"); + }); }); diff --git a/packages/core/test/runtime-resilience-e2e.test.ts b/packages/core/test/runtime-resilience-e2e.test.ts index 6704f26e..d08cd2ef 100644 --- a/packages/core/test/runtime-resilience-e2e.test.ts +++ b/packages/core/test/runtime-resilience-e2e.test.ts @@ -60,8 +60,10 @@ function createFakeIm(logs: { updates: Array<{ channelId: string; messageTs: string; text: string }>; }, options?: { failUpdateWith429?: boolean; + failUpdateWithErrorOnce?: string; }): IMAdapter { let nextTs = 0; + let failedOnce = false; return { sendMessage: async (channelId, threadId, text) => { logs.sends.push({ channelId, threadId, text }); @@ -73,6 +75,10 @@ function createFakeIm(logs: { if (options?.failUpdateWith429) { throw new Error("429 rate limited"); } + if (!failedOnce && options?.failUpdateWithErrorOnce) { + failedOnce = true; + throw new Error(options.failUpdateWithErrorOnce); + } }, deleteMessage: async () => {}, fetchThreadHistory: async () => null, @@ -200,15 +206,59 @@ describe("core runtime resilience e2e", () => { messageId: context.messageId, text: "trigger rate limit flow", })); - await waitFor(() => logs.sends.some((entry) => entry.text === "final from agent"), 8000); + await waitFor( + () => + logs.sends.some((entry) => entry.text === "final from agent") + || logs.updates.some((entry) => entry.text === "final from agent"), + 8000 + ); expect(logs.updates.length).toBeGreaterThan(0); - expect(logs.sends.some((entry) => entry.text === "final from agent")).toBe(true); + expect( + logs.sends.some((entry) => entry.text === "final from agent") + || logs.updates.some((entry) => entry.text === "final from agent") + ).toBe(true); deleteSession(context.channelId, context.threadId); }); }, 15000); + it("reports status update errors and continues on a replacement status message", async () => { + await withFastMessageUpdates(async () => { + const logs = { sends: [], updates: [] } as { + sends: Array<{ channelId: string; threadId: string; text: string }>; + updates: Array<{ channelId: string; messageTs: string; text: string }>; + }; + const im = createFakeIm(logs, { failUpdateWithErrorOnce: "socket hang up" }); + const { agent } = createFakeAgent({ + delayMs: 1200, + responseText: "recovered output", + }); + const runtime = createCoreRuntime({ platform: "slack", im, agent }); + const channelId = uniqueId("CE2E-RECOVER-STATUS"); + const threadId = uniqueId("TE2E-RECOVER-STATUS"); + + await runtime.handleInboundEvent(toInboundEvent({ + channelId, + threadId, + userId: "UE2E-recover-status", + messageId: uniqueId("ME2E-recover-status"), + text: "trigger status replacement flow", + })); + + await waitFor( + () => logs.sends.some((entry) => entry.text.startsWith("Status update failed:")), + 5000 + ); + + expect(logs.updates.some((entry) => entry.messageTs === "ts-1")).toBe(true); + expect(logs.updates.some((entry) => entry.messageTs === "ts-3")).toBe(true); + expect(logs.sends.some((entry) => entry.text.startsWith("Status update failed:"))).toBe(true); + + deleteSession(channelId, threadId); + }); + }, 15000); + it("handles stop race in event-stream mode and still completes gracefully", async () => { const logs = { sends: [], updates: [] } as { sends: Array<{ channelId: string; threadId: string; text: string }>; diff --git a/packages/core/types.ts b/packages/core/types.ts index 72420d48..f3acc2e7 100644 --- a/packages/core/types.ts +++ b/packages/core/types.ts @@ -51,6 +51,7 @@ export interface IMAdapter { ): Promise; wasRateLimited?(channelId: string, messageTs: string): boolean; getRateLimitError?(channelId: string, messageTs: string): string | undefined; + takeUpdateError?(channelId: string, messageTs: string): string | undefined; deleteMessage(channelId: string, messageTs: string): Promise; fetchThreadHistory(channelId: string, threadId: string, messageId: string): Promise; buildAgentContext(params: AgentContextBuilderParams): Promise; From 20f2846f356892cf20c2d5ece844694ac9c556d1 Mon Sep 17 00:00:00 2001 From: LIU9293 Date: Sat, 7 Mar 2026 02:09:09 +0000 Subject: [PATCH 2/3] fix: stabilize status fallback resilience test --- .../core/test/runtime-resilience-e2e.test.ts | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/packages/core/test/runtime-resilience-e2e.test.ts b/packages/core/test/runtime-resilience-e2e.test.ts index d08cd2ef..f9594095 100644 --- a/packages/core/test/runtime-resilience-e2e.test.ts +++ b/packages/core/test/runtime-resilience-e2e.test.ts @@ -56,7 +56,7 @@ async function withFastMessageUpdates(run: () => Promise): Promise { } function createFakeIm(logs: { - sends: Array<{ channelId: string; threadId: string; text: string }>; + sends: Array<{ channelId: string; threadId: string; text: string; messageTs: string }>; updates: Array<{ channelId: string; messageTs: string; text: string }>; }, options?: { failUpdateWith429?: boolean; @@ -66,9 +66,10 @@ function createFakeIm(logs: { let failedOnce = false; return { sendMessage: async (channelId, threadId, text) => { - logs.sends.push({ channelId, threadId, text }); nextTs += 1; - return `ts-${nextTs}`; + const messageTs = `ts-${nextTs}`; + logs.sends.push({ channelId, threadId, text, messageTs }); + return messageTs; }, updateMessage: async (channelId, messageTs, text) => { logs.updates.push({ channelId, messageTs, text }); @@ -179,7 +180,7 @@ describe("core runtime resilience e2e", () => { it("falls back to sending final message when status updates are rate-limited", async () => { await withFastMessageUpdates(async () => { const logs = { sends: [], updates: [] } as { - sends: Array<{ channelId: string; threadId: string; text: string }>; + sends: Array<{ channelId: string; threadId: string; text: string; messageTs: string }>; updates: Array<{ channelId: string; messageTs: string; text: string }>; }; const im = createFakeIm(logs, { failUpdateWith429: true }); @@ -226,7 +227,7 @@ describe("core runtime resilience e2e", () => { it("reports status update errors and continues on a replacement status message", async () => { await withFastMessageUpdates(async () => { const logs = { sends: [], updates: [] } as { - sends: Array<{ channelId: string; threadId: string; text: string }>; + sends: Array<{ channelId: string; threadId: string; text: string; messageTs: string }>; updates: Array<{ channelId: string; messageTs: string; text: string }>; }; const im = createFakeIm(logs, { failUpdateWithErrorOnce: "socket hang up" }); @@ -251,9 +252,13 @@ describe("core runtime resilience e2e", () => { 5000 ); - expect(logs.updates.some((entry) => entry.messageTs === "ts-1")).toBe(true); - expect(logs.updates.some((entry) => entry.messageTs === "ts-3")).toBe(true); - expect(logs.sends.some((entry) => entry.text.startsWith("Status update failed:"))).toBe(true); + const fallbackNoticeIndex = logs.sends.findIndex((entry) => entry.text.startsWith("Status update failed:")); + const fallbackNotice = fallbackNoticeIndex >= 0 ? logs.sends[fallbackNoticeIndex] : undefined; + expect(fallbackNotice).toBeDefined(); + + const replacementStatus = fallbackNoticeIndex >= 0 ? logs.sends[fallbackNoticeIndex + 1] : undefined; + expect(replacementStatus).toBeDefined(); + expect(logs.updates.some((entry) => entry.messageTs === replacementStatus!.messageTs)).toBe(true); deleteSession(channelId, threadId); }); @@ -261,7 +266,7 @@ describe("core runtime resilience e2e", () => { it("handles stop race in event-stream mode and still completes gracefully", async () => { const logs = { sends: [], updates: [] } as { - sends: Array<{ channelId: string; threadId: string; text: string }>; + sends: Array<{ channelId: string; threadId: string; text: string; messageTs: string }>; updates: Array<{ channelId: string; messageTs: string; text: string }>; }; const im = createFakeIm(logs); @@ -300,7 +305,7 @@ describe("core runtime resilience e2e", () => { it("recovers pending in-flight requests after restart", async () => { await withFastMessageUpdates(async () => { const logs = { sends: [], updates: [] } as { - sends: Array<{ channelId: string; threadId: string; text: string }>; + sends: Array<{ channelId: string; threadId: string; text: string; messageTs: string }>; updates: Array<{ channelId: string; messageTs: string; text: string }>; }; const im = createFakeIm(logs); From 1c710ee5d0204cf16ee68cbdee82227a39e74f0e Mon Sep 17 00:00:00 2001 From: LIU9293 Date: Sat, 7 Mar 2026 02:13:44 +0000 Subject: [PATCH 3/3] fix: run codex without sandboxing --- packages/agents/codex/client.ts | 2 +- packages/agents/test/cli-command.test.ts | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/packages/agents/codex/client.ts b/packages/agents/codex/client.ts index 4f1d5c82..bccfd754 100644 --- a/packages/agents/codex/client.ts +++ b/packages/agents/codex/client.ts @@ -87,7 +87,7 @@ export function buildCodexCommandArgs(params: { if (params.planMode) { args.push("--sandbox", "read-only"); } else { - args.push("--full-auto"); + args.push("--yolo"); } if (params.model) { args.push("--model", params.model); diff --git a/packages/agents/test/cli-command.test.ts b/packages/agents/test/cli-command.test.ts index a7433d99..309b68a8 100644 --- a/packages/agents/test/cli-command.test.ts +++ b/packages/agents/test/cli-command.test.ts @@ -116,7 +116,7 @@ describe("agent cli command formatting", () => { expect(command).toContain("codex exec --json --skip-git-repo-check"); expect(command).toContain("--json"); - expect(command).toContain("--full-auto"); + expect(command).toContain("--yolo"); expect(command).toContain("--model gpt-5-codex"); expect(command).toContain("session-3"); expect(command).toContain("'hello from codex'"); @@ -134,7 +134,7 @@ describe("agent cli command formatting", () => { expect(command).toContain("codex exec --json --skip-git-repo-check"); expect(command).toContain("--json"); expect(command).toContain("--sandbox read-only"); - expect(command).not.toContain("--full-auto"); + expect(command).not.toContain("--yolo"); expect(command).toContain("session-3"); expect(command).toContain("'plan this change'"); });