From e14d5930a54720374290c1d7fb5852bd119673b3 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 8 Jun 2026 02:21:57 +0800 Subject: [PATCH 1/2] fix: resume eligibility requires output_tokens > 0, not just usage presence MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PR #56's has-usage heuristic shows a resume button for codex sessions whose only turns billed input but produced zero output (hung WS turns, cross-session retries). Those sessions have no rollout file on disk, so `codex resume` fails — 4/19 real sessions were false positives against ~/.codex/sessions ground truth (issue #44, specimen 2/3). status and stopReason are unreliable discriminators (101 and 'completed' appear on failed turns); output_tokens > 0 separates all 19 sessions correctly. Tighten markSessionUsage to that condition. Refs #44 Co-Authored-By: Claude Opus 4.8 (1M context) --- server/store.js | 11 +++++++---- test/restore.test.js | 27 ++++++++++++++++++++++++--- test/sse-broadcast.test.js | 2 +- test/store.test.js | 20 ++++++++++++++------ 4 files changed, 46 insertions(+), 14 deletions(-) diff --git a/server/store.js b/server/store.js index 029d103..cd73259 100644 --- a/server/store.js +++ b/server/store.js @@ -258,14 +258,17 @@ function setRestoreState(patch) { } // Mark a session as having produced a real completed turn. Codex only writes a -// resumable rollout file once a turn reports usage; a non-subagent entry with -// usage is our proxy-side signal for "this session can be resumed". Monotonic: -// once true it never flips back. +// resumable rollout file once a turn produces output; turns that billed input +// but emitted zero output (hung WS turns, cross-session retries) leave no +// rollout file, so `usage` alone is a false signal — status and stopReason are +// unreliable too (verified against ~/.codex/sessions ground truth, issue #44). +// output_tokens > 0 on a non-subagent entry is the only discriminator that +// matches. Monotonic: once true it never flips back. function markSessionUsage(entry) { const sid = entry?.sessionId; if (!sid || NON_RESUMABLE_SESSIONS.has(sid)) return; if (entry.isSubagent) return; - if (!entry.usage) return; + if (!(entry.usage?.output_tokens > 0)) return; const meta = sessionMeta[sid] || (sessionMeta[sid] = {}); meta.hasUsage = true; } diff --git a/test/restore.test.js b/test/restore.test.js index ce76926..2d47cc9 100644 --- a/test/restore.test.js +++ b/test/restore.test.js @@ -182,7 +182,7 @@ describe('restoreFromLogs — maxContext re-inference for legacy entries', () => // Resume-eligibility must survive a restart: it is rebuilt purely from the // index (no rollout-file probing), so a codex session is resumable iff the -// index holds a non-subagent usage turn for it. +// index holds a non-subagent turn with output_tokens > 0 for it. describe('restoreFromLogs — codex resume eligibility', () => { const config = require('../server/config'); const store = require('../server/store'); @@ -213,7 +213,7 @@ describe('restoreFromLogs — codex resume eligibility', () => { await config.storage.appendIndex(JSON.stringify({ id: '2026-05-20T10-00-00-000', ts: '10:00:00', sessionId: sid, provider: 'openai', agent: 'codex', model: 'gpt-5', - usage: { input_tokens: 42 }, isSubagent: false, + usage: { input_tokens: 42, output_tokens: 12 }, isSubagent: false, isSSE: true, status: 200, receivedAt: 1779000000000, }) + '\n'); @@ -241,7 +241,7 @@ describe('restoreFromLogs — codex resume eligibility', () => { await config.storage.appendIndex(JSON.stringify({ id: '2026-05-20T12-01-00-000', ts: '12:01:00', sessionId: sid, provider: 'openai', agent: 'codex', model: 'gpt-5', - usage: { input_tokens: 7 }, isSubagent: false, + usage: { input_tokens: 7, output_tokens: 2 }, isSubagent: false, isSSE: true, status: 200, receivedAt: 1779000060000, }) + '\n'); @@ -250,6 +250,27 @@ describe('restoreFromLogs — codex resume eligibility', () => { assert.equal(summarizeEntry(first).resumeCommand, `codex resume ${sid}`); }); + it('a codex session with only a billed zero-output turn is not resumable after restore', async () => { + store.entries.length = 0; + const sid = 'codex-restore-zero-output'; + // Issue #44 specimen 2: hung WS turn — input billed, zero output, no + // rollout file on disk. Restore must not resurrect the resume button. + await config.storage.appendIndex(JSON.stringify({ + id: '2026-05-20T13-00-00-000', ts: '13:00:00', sessionId: sid, + provider: 'openai', agent: 'codex', model: 'gpt-5', + usage: { input_tokens: 9953, output_tokens: 0 }, isSubagent: false, + isSSE: true, status: 499, receivedAt: 1779000000000, + }) + '\n'); + + await restoreFromLogs(); + assert.deepEqual( + store.computeSessionResume(sid, 'openai'), + { resumable: false, resumeCommand: null }, + ); + const summary = summarizeEntry(store.entries.find(e => e.sessionId === sid)); + assert.equal(summary.resumeCommand, null); + }); + it('a codex session with only a 502-style turn (no usage) is not resumable', async () => { store.entries.length = 0; const sid = 'codex-restore-502'; diff --git a/test/sse-broadcast.test.js b/test/sse-broadcast.test.js index debf009..fdaaa2b 100644 --- a/test/sse-broadcast.test.js +++ b/test/sse-broadcast.test.js @@ -23,7 +23,7 @@ describe('sse-broadcast', () => { // A real completed turn flips the session to resumable; summarizeEntry both // records the signal and reads it back (single source of truth). - const withUsage = summarizeEntry({ id: 'r2', sessionId: sid, provider: 'openai', usage: { input_tokens: 9 }, isSubagent: false }); + const withUsage = summarizeEntry({ id: 'r2', sessionId: sid, provider: 'openai', usage: { input_tokens: 9, output_tokens: 4 }, isSubagent: false }); assert.equal(withUsage.resumable, true); assert.equal(withUsage.resumeCommand, `codex resume ${sid}`); diff --git a/test/store.test.js b/test/store.test.js index 9045dce..9dc1b4d 100644 --- a/test/store.test.js +++ b/test/store.test.js @@ -68,7 +68,7 @@ describe('store', () => { }); it('an unknown provider fails closed (no resume command)', () => { - store.markSessionUsage({ sessionId: 'future-sid', isSubagent: false, usage: { input_tokens: 5 } }); + store.markSessionUsage({ sessionId: 'future-sid', isSubagent: false, usage: { input_tokens: 5, output_tokens: 3 } }); assert.deepEqual(store.computeSessionResume('future-sid', 'future-provider'), { resumable: false, resumeCommand: null }); }); @@ -81,16 +81,16 @@ describe('store', () => { assert.deepEqual(r, { resumable: false, resumeCommand: null }); }); - it('codex session becomes resumable after a non-subagent usage turn', () => { + it('codex session becomes resumable after a non-subagent turn with output', () => { const sid = 'codex-sid-withusage'; - store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5 } }); + store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5, output_tokens: 3 } }); const r = store.computeSessionResume(sid, 'openai'); assert.deepEqual(r, { resumable: true, resumeCommand: `codex resume ${sid}` }); }); it('subagent usage alone does not make a codex session resumable', () => { const sid = 'codex-sid-subonly'; - store.markSessionUsage({ sessionId: sid, isSubagent: true, usage: { input_tokens: 5 } }); + store.markSessionUsage({ sessionId: sid, isSubagent: true, usage: { input_tokens: 5, output_tokens: 3 } }); assert.deepEqual(store.computeSessionResume(sid, 'openai'), { resumable: false, resumeCommand: null }); }); @@ -100,16 +100,24 @@ describe('store', () => { assert.deepEqual(store.computeSessionResume(sid, 'openai'), { resumable: false, resumeCommand: null }); }); + it('a billed zero-output turn does not mark the session (hung WS / cross-session retry)', () => { + const sid = 'codex-sid-zero-output'; + // Specimen from issue #44: status 499 after 45m, input billed, no output, + // no rollout file on disk — `codex resume` would fail. + store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 9953, output_tokens: 0 } }); + assert.deepEqual(store.computeSessionResume(sid, 'openai'), { resumable: false, resumeCommand: null }); + }); + it('hasUsage is monotonic — a later usage-less turn keeps resumability', () => { const sid = 'codex-sid-monotonic'; - store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5 } }); + store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5, output_tokens: 3 } }); store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: null }); assert.equal(store.computeSessionResume(sid, 'openai').resumable, true); }); it('sentinel sessions are never resumable regardless of provider', () => { for (const sid of ['direct-api', 'codex-raw', 'unknown']) { - store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5 } }); + store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5, output_tokens: 3 } }); assert.deepEqual(store.computeSessionResume(sid, 'openai'), { resumable: false, resumeCommand: null }); assert.deepEqual(store.computeSessionResume(sid, 'anthropic'), { resumable: false, resumeCommand: null }); } From 164ecb12ce869f9f84b876738a7e89f7a8885c97 Mon Sep 17 00:00:00 2001 From: Justin Lee Date: Mon, 8 Jun 2026 02:25:36 +0800 Subject: [PATCH 2/2] test: legacy usage without output_tokens field fails closed Co-Authored-By: Claude Opus 4.8 (1M context) --- test/store.test.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/test/store.test.js b/test/store.test.js index 9dc1b4d..0a26103 100644 --- a/test/store.test.js +++ b/test/store.test.js @@ -100,6 +100,12 @@ describe('store', () => { assert.deepEqual(store.computeSessionResume(sid, 'openai'), { resumable: false, resumeCommand: null }); }); + it('legacy usage without an output_tokens field fails closed', () => { + const sid = 'codex-sid-legacy-no-output-field'; + store.markSessionUsage({ sessionId: sid, isSubagent: false, usage: { input_tokens: 5 } }); + assert.deepEqual(store.computeSessionResume(sid, 'openai'), { resumable: false, resumeCommand: null }); + }); + it('a billed zero-output turn does not mark the session (hung WS / cross-session retry)', () => { const sid = 'codex-sid-zero-output'; // Specimen from issue #44: status 499 after 45m, input billed, no output,