From b5d8687b72c7ebdbf0a66a0dd012e2928c9187e3 Mon Sep 17 00:00:00 2001 From: Nixon Cheaz <6854716+ncheaz@users.noreply.github.com> Date: Wed, 22 Apr 2026 18:26:08 -0400 Subject: [PATCH] Bring back the dirty bucket changes --- lib/mini-ralph/runner.js | 115 +++++++++- lib/mini-ralph/state.js | 62 +++++- scripts/mini-ralph-cli.js | 7 + scripts/ralph-run.sh | 16 +- .../mini-ralph-runner-autocommit.test.js | 34 ++- .../unit/javascript/mini-ralph-runner.test.js | 199 +++++++++++++++++- .../unit/javascript/mini-ralph-state.test.js | 30 +++ 7 files changed, 442 insertions(+), 21 deletions(-) diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js index 765de28..7bfbd27 100644 --- a/lib/mini-ralph/runner.js +++ b/lib/mini-ralph/runner.js @@ -29,8 +29,36 @@ const DEFAULTS = { tasksMode: false, noCommit: false, verbose: false, + // Stall detector: break the loop after N *consecutive* iterations that + // succeeded but produced no progress (no promise, no completed tasks, no + // files changed). 0 disables the detector. Failed iterations do not count + // toward the streak because their signal is already surfaced via the + // `Recent Loop Signals` feedback block. + stallThreshold: 3, }; +/** + * Determine whether an iteration made any forward progress. + * + * An iteration is considered productive if any of the following are true: + * - OpenCode emitted the task or completion promise + * - One or more tasks transitioned to "completed" during the iteration + * - At least one repo-tracked file was observed to have changed + * - The iteration failed outright (its signal is handled separately) + * + * @param {object} iterationSignals + * @returns {boolean} + */ +function _iterationIsStalled(iterationSignals) { + if (!iterationSignals) return false; + if (iterationSignals.iterationFailed) return false; + if (iterationSignals.hasCompletion) return false; + if (iterationSignals.hasTask) return false; + if (iterationSignals.completedTasksCount > 0) return false; + if (iterationSignals.filesChangedCount > 0) return false; + return true; +} + function _isFailedIteration(result) { if (!result || typeof result !== 'object') return false; if (result.signal !== null && result.signal !== undefined && result.signal !== '') { @@ -119,11 +147,19 @@ async function run(opts) { const minIterations = options.minIterations; const completionPromise = options.completionPromise; const taskPromise = options.taskPromise; + const stallThreshold = + typeof options.stallThreshold === 'number' && options.stallThreshold >= 0 + ? Math.floor(options.stallThreshold) + : DEFAULTS.stallThreshold; let stateInitialized = false; let iterationCount = 0; let completed = false; let exitReason = 'max_iterations'; + // Consecutive iterations that succeeded but produced no progress signal. + // Reset whenever any progress is detected (or when the iteration failed, so + // transient infra errors don't trip the stall detector). + let stallStreak = 0; try { @@ -140,6 +176,18 @@ async function run(opts) { } // Initialize state file for this run, preserving history count if resuming. + // + // `startedAt` semantics: this field marks the first time *this change* was + // put through a Ralph loop. On a resume we must preserve the original + // timestamp, not overwrite it with the current time -- previously, every + // resume reset `startedAt` and the status dashboard lost the true wall- + // clock duration. `resumedAt` tracks the most recent resume. + const nowIso = new Date().toISOString(); + const preservedStartedAt = + resumeIteration > 1 && existingState && existingState.startedAt + ? existingState.startedAt + : nowIso; + state.init(ralphDir, { active: true, iteration: resumeIteration, @@ -153,8 +201,8 @@ async function run(opts) { promptTemplate: options.promptTemplate || null, noCommit: options.noCommit, model: options.model || '', - startedAt: new Date().toISOString(), - resumedAt: resumeIteration > 1 ? new Date().toISOString() : null, + startedAt: preservedStartedAt, + resumedAt: resumeIteration > 1 ? nowIso : null, completedAt: null, stoppedAt: null, exitReason: null, @@ -194,8 +242,13 @@ async function run(opts) { throw err; } - const errorEntries = errors.readEntries(ralphDir, 3); - const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 3), errorEntries); + // Widen the feedback window to 5 so the agent sees a longer streak + // when it keeps emitting the same hand-off / no-promise signal — the + // `_failureFingerprint` dedup collapses identical entries into a + // single "same failure as iteration N" line, so more history is + // cheap and actionable. + const errorEntries = errors.readEntries(ralphDir, 5); + const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 5), errorEntries); // Inject any pending context const pendingContext = context.consume(ralphDir); @@ -307,6 +360,35 @@ async function run(opts) { break; } + // Stall detection: track consecutive unproductive iterations and stop + // early so the loop doesn't burn through the full `maxIterations` + // budget on pure no-ops (e.g. agent hitting a quality gate it can't + // fix and asking for hand-off without emitting a promise). + const iterationFailed = _isFailedIteration(result); + const stalledThisIteration = _iterationIsStalled({ + iterationFailed, + hasCompletion, + hasTask, + completedTasksCount: completedTasks.length, + filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0, + }); + + if (stalledThisIteration) { + stallStreak++; + } else { + stallStreak = 0; + } + + if (stallThreshold > 0 && stallStreak >= stallThreshold) { + if (options.verbose) { + process.stderr.write( + `[mini-ralph] stall detector: ${stallStreak} consecutive no-op iteration(s); halting.\n` + ); + } + exitReason = 'stalled'; + break; + } + // In tasks mode, task promise just continues the loop if (options.tasksMode && hasTask) { // Continue to next iteration @@ -438,7 +520,13 @@ function _autoCommit(iteration, opts = {}) { } try { - childProcess.execFileSync('git', ['add', '--', ...filesToStage], { + // Use `git add -A -- ` (not plain `git add -- `) so deletions + // and renames are staged alongside modifications/additions. Tasks that call + // `git rm` via a shell tool leave the path absent from the working tree but + // still present in `git status --porcelain`, which means the plain form + // would error with `fatal: pathspec did not match`. Scoping to the per-path + // allowlist preserves the protected-artifact guarantee. + childProcess.execFileSync('git', ['add', '-A', '--', ...filesToStage], { stdio: verbose ? 'inherit' : ['pipe', 'pipe', 'pipe'], encoding: 'utf8', }); @@ -575,17 +663,31 @@ function _failureFingerprint(entry, errorEntries) { const match = errors.matchIteration(errorEntries, entry.iteration); stderrHead = _firstNonEmptyLine(match && match.stderr, 120); } + // A "no promise emitted" iteration is also a distinguishable failure mode + // even when exitCode===0 and there's no stderr (e.g. the agent explicitly + // refuses to continue). Encoding it in the fingerprint lets the dedup + // collapse repeated hand-off iterations into a single actionable line + // instead of N identical bullets. + const noPromise = !entry.completionDetected && !entry.taskDetected; return JSON.stringify({ failureStage: entry.failureStage || '', exitCode: entry.exitCode, stderrHead, + noPromise, + commitAnomalyType: entry.commitAnomalyType || '', }); } function _isEmptyFingerprint(fingerprint) { try { const obj = JSON.parse(fingerprint); - return !obj.failureStage && obj.exitCode === 0 && !obj.stderrHead; + return ( + !obj.failureStage && + obj.exitCode === 0 && + !obj.stderrHead && + !obj.noPromise && + !obj.commitAnomalyType + ); } catch { return false; } @@ -846,4 +948,5 @@ module.exports = { _appendFatalIterationFailure, _failureFingerprint, _firstNonEmptyLine, + _iterationIsStalled, }; diff --git a/lib/mini-ralph/state.js b/lib/mini-ralph/state.js index ba8a442..d57693c 100644 --- a/lib/mini-ralph/state.js +++ b/lib/mini-ralph/state.js @@ -55,11 +55,27 @@ function init(ralphDir, data) { function read(ralphDir) { const file = statePath(ralphDir); if (!fs.existsSync(file)) return null; - try { - return JSON.parse(fs.readFileSync(file, 'utf8')); - } catch { - return null; + // One-shot retry to paper over the vanishingly-rare race where we read the + // state file between `openSync('wx')` on the temp file and the rename. On + // POSIX `renameSync` is atomic, so the retry window is only meaningful on + // filesystems where it is not -- but the cost of retrying is tiny, so we + // do it uniformly for robustness. + for (let attempt = 0; attempt < 2; attempt++) { + try { + const raw = fs.readFileSync(file, 'utf8'); + if (!raw) { + if (attempt === 0) continue; + return null; + } + return JSON.parse(raw); + } catch (err) { + if (attempt === 0 && (err.code === 'ENOENT' || err instanceof SyntaxError)) { + continue; + } + return null; + } } + return null; } /** @@ -182,7 +198,43 @@ function _ensureDir(ralphDir) { function _write(ralphDir, data) { _ensureDir(ralphDir); - fs.writeFileSync(statePath(ralphDir), JSON.stringify(data, null, 2), 'utf8'); + const target = statePath(ralphDir); + // Atomic write: serialize to a temp file in the same directory, then rename. + // A concurrent `read()` either sees the fully-written old file or the fully- + // written new file -- never a partially-written one. This matters because + // `ralph-run --status` can race with the live loop's per-iteration + // `state.update()` calls, and a torn read used to surface as JSON.parse + // errors or the dashboard reporting a stale iteration counter. + const tmp = `${target}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`; + const serialized = JSON.stringify(data, null, 2); + + let handle = null; + try { + handle = fs.openSync(tmp, 'wx'); + fs.writeFileSync(handle, serialized, 'utf8'); + fs.fsyncSync(handle); + } finally { + if (handle !== null) { + try { + fs.closeSync(handle); + } catch { + /* best-effort close */ + } + } + } + + try { + fs.renameSync(tmp, target); + } catch (err) { + // Clean up the temp file if rename failed, then rethrow so the caller + // sees the real error (disk full, permissions, etc.). + try { + fs.unlinkSync(tmp); + } catch { + /* best-effort cleanup */ + } + throw err; + } } function _createLockToken() { diff --git a/scripts/mini-ralph-cli.js b/scripts/mini-ralph-cli.js index 5d0e19d..5c50aaa 100755 --- a/scripts/mini-ralph-cli.js +++ b/scripts/mini-ralph-cli.js @@ -19,6 +19,7 @@ * --tasks Enable tasks mode * --min-iterations Minimum iterations (default: 1) * --max-iterations Maximum iterations (default: 50) + * --stall-threshold Halt after N consecutive no-op iterations (default: 3; 0 disables) * --completion-promise Completion promise string (default: COMPLETE) * --task-promise Task promise string (default: READY_FOR_NEXT_TASK) * --no-commit Suppress auto-commit @@ -48,6 +49,7 @@ function parseArgs(argv) { tasksMode: false, minIterations: 1, maxIterations: 50, + stallThreshold: 3, completionPromise: 'COMPLETE', taskPromise: 'READY_FOR_NEXT_TASK', noCommit: false, @@ -88,6 +90,9 @@ function parseArgs(argv) { case '--max-iterations': opts.maxIterations = parseInt(args[++i], 10); break; + case '--stall-threshold': + opts.stallThreshold = parseInt(args[++i], 10); + break; case '--completion-promise': opts.completionPromise = args[++i]; break; @@ -141,6 +146,7 @@ Options: --tasks Enable tasks mode --min-iterations Minimum iterations (default: 1) --max-iterations Maximum iterations (default: 50) + --stall-threshold Halt after N consecutive no-op iterations (default: 3; 0 disables) --completion-promise Completion promise string --task-promise Task promise string --no-commit Suppress auto-commit @@ -197,6 +203,7 @@ async function main() { tasksMode: opts.tasksMode, minIterations: opts.minIterations, maxIterations: opts.maxIterations, + stallThreshold: opts.stallThreshold, completionPromise: opts.completionPromise, taskPromise: opts.taskPromise, noCommit: opts.noCommit, diff --git a/scripts/ralph-run.sh b/scripts/ralph-run.sh index 051c163..4c5e9eb 100755 --- a/scripts/ralph-run.sh +++ b/scripts/ralph-run.sh @@ -756,7 +756,21 @@ Change directory: {{change_dir}} Before implementing, read the OpenSpec artifacts listed above that are relevant to the current task. -Pick the first [ ] or [/] task in tasks.md, mark it [/], implement it (smallest change that fully satisfies the Done-when conditions), run the task's verification command, mark it [x] on success, then output `{{task_promise}}`. Output `{{completion_promise}}` only when every task is [x]. Output promise tags on their own line, literal; do not quote or describe them. Do not fabricate a promise to exit the loop. If an approach fails twice, try a different one. +Follow this loop contract EXACTLY. Do not skip steps. Do not batch. Do not output a promise until every step is done. + +1. Open `tasks.md` (at `{{change_dir}}/tasks.md`) and find the FIRST line matching `- [ ] ` or `- [/] `. Remember its exact text. +2. Edit `tasks.md` in place to change that line's marker to `- [/] ` (in-progress). You MUST use your file edit tool to modify the file on disk — a shell `cp`, `sed`, or print-to-stdout does not count. Verify by re-reading the file. +3. Implement the smallest change that fully satisfies the task's Done-when conditions. Run the task's verification command if one is specified. +4. On success, edit `tasks.md` again in place to change that line's marker from `- [/] ` to `- [x] `. Verify by re-reading the file and confirming the `[x]` is present on that exact line. +5. ONLY after step 4 writes `[x]` to disk, output `{{task_promise}}` on its own line. +6. If and only if EVERY task line in `tasks.md` is `- [x] `, output `{{completion_promise}}` instead. + +Hard rules: +- If you do not actually modify `tasks.md` on disk in this iteration, DO NOT output any promise tag. Output a short failure note instead and stop. +- Never output `{{task_promise}}` while the task you just worked on is still `- [ ]` on disk. That causes the same task to repeat forever. +- Promise tags must be on their own line, literal, unquoted, and not described in prose. +- If an approach fails twice, try a different one. +- If the task is already satisfied by prior work (e.g. target file already exists with the right content), you STILL must flip the checkbox to `[x]` in `tasks.md` before emitting the promise. ## Commit Contract diff --git a/tests/unit/javascript/mini-ralph-runner-autocommit.test.js b/tests/unit/javascript/mini-ralph-runner-autocommit.test.js index fa27576..6983823 100644 --- a/tests/unit/javascript/mini-ralph-runner-autocommit.test.js +++ b/tests/unit/javascript/mini-ralph-runner-autocommit.test.js @@ -67,7 +67,7 @@ describe('runner._autoCommit()', () => { expect(execFileSync).toHaveBeenCalledWith( 'git', - ['add', '--', 'tasks.md', 'src/app.js'], + ['add', '-A', '--', 'tasks.md', 'src/app.js'], expect.any(Object) ); expect(execFileSync).toHaveBeenCalledWith( @@ -98,7 +98,7 @@ describe('runner._autoCommit()', () => { expect(execFileSync).toHaveBeenNthCalledWith( 1, 'git', - ['add', '--', 'tasks.md', 'src/app.js'], + ['add', '-A', '--', 'tasks.md', 'src/app.js'], expect.any(Object) ); expect(execFileSync).toHaveBeenNthCalledWith( @@ -160,9 +160,11 @@ describe('runner._autoCommit()', () => { expect(execFileSync).toHaveBeenCalledWith( 'git', - ['add', '--', 'tasks.md', 'src/app.js'], + ['add', '-A', '--', 'tasks.md', 'src/app.js'], expect.any(Object) ); + // Guard against the unscoped form, which would stage *every* dirty file in + // the repo (including files unrelated to the current task). expect(execFileSync).not.toHaveBeenCalledWith( 'git', ['add', '-A'], @@ -170,6 +172,32 @@ describe('runner._autoCommit()', () => { ); }); + test('stages deletions alongside modifications via `git add -A -- `', () => { + // Simulate a task that removed a file: the path is in the allowlist but + // no longer exists on disk. `git add -A -- ` must still succeed and + // record the deletion in the index. + execFileSync.mockImplementation((command, args) => { + if (command === 'git' && args[0] === 'diff') { + return 'deleted/file.webp\ntasks.md\n'; + } + return ''; + }); + + const result = runner._autoCommit(7, { + completedTasks: [completedTask], + filesToStage: ['deleted/file.webp', 'tasks.md'], + verbose: false, + }); + + expect(execFileSync).toHaveBeenNthCalledWith( + 1, + 'git', + ['add', '-A', '--', 'deleted/file.webp', 'tasks.md'], + expect.any(Object) + ); + expect(result).toEqual({ attempted: true, committed: true, anomaly: null }); + }); + test('blocks protected OpenSpec artifacts from loop-managed commits', () => { const result = runner._autoCommit(6, { completedTasks: [completedTask], diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js index 6f60b8c..db645b7 100644 --- a/tests/unit/javascript/mini-ralph-runner.test.js +++ b/tests/unit/javascript/mini-ralph-runner.test.js @@ -901,6 +901,63 @@ describe('_buildIterationFeedback() - fingerprint dedup', () => { expect(stderrHeadCount).toBe(1); }); + test('repeated no-promise (clean-exit) iterations dedupe into a single detailed line', () => { + // Regression for a stuck-loop symptom: when the agent hits a blocker it + // cannot clear, it keeps exiting cleanly with no promise. The feedback + // used to render N identical bullets (e.g. "no loop promise emitted") + // which bloated the prompt without helping the agent. The fingerprint + // now covers the `noPromise` state so repeats collapse to a back-reference. + const history = [ + { iteration: 23, exitCode: 0, signal: '', failureStage: '', filesChanged: [], completionDetected: false, taskDetected: false }, + { iteration: 24, exitCode: 0, signal: '', failureStage: '', filesChanged: [], completionDetected: false, taskDetected: false }, + { iteration: 25, exitCode: 0, signal: '', failureStage: '', filesChanged: [], completionDetected: false, taskDetected: false }, + ]; + + const feedback = _buildIterationFeedback(history); + + expect(feedback).toContain('Iteration 23:'); + // Only the first occurrence gets the full "no loop promise emitted" line; + // the next two are back-references. + const noPromiseCount = (feedback.match(/no loop promise emitted/g) || []).length; + expect(noPromiseCount).toBe(1); + expect(feedback).toContain('Iteration 24: same failure as iteration 23 (see above).'); + expect(feedback).toContain('Iteration 25: same failure as iteration 23 (see above).'); + }); + + test('commit anomaly iterations are deduped by anomaly type', () => { + // Two iterations that failed to commit with the same anomaly type should + // collapse to one detailed line + one back-reference, rather than two + // duplicate bullets. + const history = [ + { + iteration: 22, + exitCode: 0, + signal: '', + failureStage: '', + filesChanged: ['tasks.md'], + completionDetected: false, + taskDetected: true, + commitAnomaly: 'Auto-commit failed: pathspec did not match', + commitAnomalyType: 'commit_failed', + }, + { + iteration: 23, + exitCode: 0, + signal: '', + failureStage: '', + filesChanged: ['tasks.md'], + completionDetected: false, + taskDetected: true, + commitAnomaly: 'Auto-commit failed: pathspec did not match', + commitAnomalyType: 'commit_failed', + }, + ]; + + const feedback = _buildIterationFeedback(history); + expect(feedback).toContain('Iteration 22: commit anomaly'); + expect(feedback).toContain('Iteration 23: same failure as iteration 22'); + }); + test('three distinct-fingerprint failures: three full detail entries', () => { const errorContent = [ { iteration: 1, exitCode: 1, stderr: 'ErrorA', stdout: '', signal: '', failureStage: 'stageA' }, @@ -996,7 +1053,9 @@ describe('run() with mocked invoker', () => { })); try { - const result = await run(makeOptions()); + // Disable the stall detector here so we can verify the pure + // `max_iterations` exit path independently of the stall logic. + const result = await run(makeOptions({ stallThreshold: 0 })); expect(result.completed).toBe(false); expect(result.iterations).toBe(3); expect(result.exitReason).toBe('max_iterations'); @@ -1010,6 +1069,70 @@ describe('run() with mocked invoker', () => { } }); + test('stall detector halts the loop after N consecutive no-op iterations', async () => { + let callCount = 0; + const restore = mockInvoker(invoker, async () => { + callCount++; + return { + // No promise, no files changed, no tasks completed -> stalled. + stdout: 'HAND OFF REQUIRED: cannot make progress', + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + const result = await run( + makeOptions({ maxIterations: 20, stallThreshold: 3 }) + ); + expect(result.completed).toBe(false); + expect(result.iterations).toBe(3); + expect(result.exitReason).toBe('stalled'); + expect(callCount).toBe(3); + const persistedState = state.read(path.join(tmpDir, '.ralph')); + expect(persistedState.active).toBe(false); + expect(persistedState.stoppedAt).toBeTruthy(); + expect(persistedState.exitReason).toBe('stalled'); + } finally { + restore(); + } + }); + + test('stall streak resets when the iteration makes any progress', async () => { + let callCount = 0; + const restore = mockInvoker(invoker, async () => { + callCount++; + // Stalled, stalled, progress, stalled, stalled -> streak resets on + // iteration 3 and 2 subsequent stalls (threshold=3) should NOT halt. + if (callCount === 3) { + return { + stdout: 'changed something', + exitCode: 0, + filesChanged: ['src/app.js'], + toolUsage: [], + }; + } + return { + stdout: 'no progress', + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + const result = await run( + makeOptions({ maxIterations: 5, stallThreshold: 3 }) + ); + // We should reach max_iterations because the streak resets on iter 3. + expect(result.iterations).toBe(5); + expect(result.exitReason).toBe('max_iterations'); + } finally { + restore(); + } + }); + test('exits early when completion promise is detected', async () => { let callCount = 0; const restore = mockInvoker(invoker, async () => { @@ -1577,7 +1700,7 @@ describe('run() with mocked invoker', () => { })); try { - await run(makeOptions({ ralphDir, maxIterations: 5 })); + await run(makeOptions({ ralphDir, maxIterations: 5, stallThreshold: 0 })); const s = state.read(ralphDir); expect(s.resumedAt).not.toBeNull(); expect(s.resumedAt).toBeDefined(); @@ -1586,6 +1709,66 @@ describe('run() with mocked invoker', () => { } }); + test('preserves the original startedAt timestamp across resumes', async () => { + const ralphDir = path.join(tmpDir, '.ralph'); + fs.mkdirSync(ralphDir, { recursive: true }); + // Seed a prior state whose startedAt is the "true" original run start. + const originalStartedAt = '2026-04-20T12:00:00.000Z'; + state.init(ralphDir, { + active: false, + iteration: 4, + startedAt: originalStartedAt, + completedAt: null, + stoppedAt: '2026-04-20T13:00:00.000Z', + }); + + const restore = mockInvoker(invoker, async () => ({ + stdout: 'COMPLETE', + exitCode: 0, + filesChanged: [], + toolUsage: [], + })); + + try { + await run(makeOptions({ ralphDir, maxIterations: 6 })); + const s = state.read(ralphDir); + // startedAt must not be overwritten on resume. + expect(s.startedAt).toBe(originalStartedAt); + // resumedAt should be fresh (a new ISO timestamp, different from startedAt). + expect(s.resumedAt).toBeTruthy(); + expect(s.resumedAt).not.toBe(originalStartedAt); + } finally { + restore(); + } + }); + + test('sets startedAt on a fresh run when no prior state exists', async () => { + const ralphDir = path.join(tmpDir, '.ralph'); + fs.mkdirSync(ralphDir, { recursive: true }); + // No prior state -> startedAt is set to "now" and resumedAt stays null. + + const restore = mockInvoker(invoker, async () => ({ + stdout: 'COMPLETE', + exitCode: 0, + filesChanged: [], + toolUsage: [], + })); + + try { + const before = Date.now(); + await run(makeOptions({ ralphDir, maxIterations: 2 })); + const after = Date.now(); + const s = state.read(ralphDir); + expect(s.resumedAt).toBeNull(); + expect(s.startedAt).toBeTruthy(); + const startedAtMs = Date.parse(s.startedAt); + expect(startedAtMs).toBeGreaterThanOrEqual(before); + expect(startedAtMs).toBeLessThanOrEqual(after); + } finally { + restore(); + } + }); + test('logs a resume message in verbose mode', async () => { const ralphDir = path.join(tmpDir, '.ralph'); fs.mkdirSync(ralphDir, { recursive: true }); @@ -1980,12 +2163,15 @@ describe('run() with mocked invoker', () => { }); test('uses only recent parsed error entries for prompt feedback lookup', async () => { + // Feedback window is 5 iterations. After 6 consecutive failing + // iterations, iter 1's error should have aged out of the window when + // iter 7 starts, while iter 6's error should still be visible. const prompts = []; let callCount = 0; const restore = mockInvoker(invoker, async (opts) => { callCount++; prompts.push(opts.prompt); - if (callCount <= 4) { + if (callCount <= 6) { return { stdout: 'no promise', stderr: `critical failure ${callCount}`, @@ -2003,9 +2189,10 @@ describe('run() with mocked invoker', () => { }); try { - await run(makeOptions({ maxIterations: 5, noCommit: true })); - expect(prompts[4]).toContain('critical failure 4'); - expect(prompts[4]).not.toContain('critical failure 1'); + await run(makeOptions({ maxIterations: 7, noCommit: true })); + // prompts[6] is the prompt passed into the 7th invocation. + expect(prompts[6]).toContain('critical failure 6'); + expect(prompts[6]).not.toContain('critical failure 1'); } finally { restore(); } diff --git a/tests/unit/javascript/mini-ralph-state.test.js b/tests/unit/javascript/mini-ralph-state.test.js index 7aaca19..5fd0e0d 100644 --- a/tests/unit/javascript/mini-ralph-state.test.js +++ b/tests/unit/javascript/mini-ralph-state.test.js @@ -222,6 +222,36 @@ describe('run lock lifecycle', () => { }); }); +describe('atomic writes', () => { + test('does not leave a temp file behind on a successful write', () => { + const ralphDir = path.join(tmpDir, '.ralph'); + state.init(ralphDir, { active: true, iteration: 1 }); + state.update(ralphDir, { iteration: 2 }); + state.update(ralphDir, { iteration: 3 }); + + const entries = fs.readdirSync(ralphDir); + const tmpFiles = entries.filter((name) => name.includes('.tmp-')); + expect(tmpFiles).toEqual([]); + expect(state.read(ralphDir).iteration).toBe(3); + }); + + test('read() tolerates a missing state file and returns null', () => { + const ralphDir = path.join(tmpDir, '.ralph-empty'); + // Directory does not exist yet. + expect(state.read(ralphDir)).toBeNull(); + // Directory exists but file does not. + fs.mkdirSync(ralphDir, { recursive: true }); + expect(state.read(ralphDir)).toBeNull(); + }); + + test('read() returns null rather than throwing on a corrupted state file', () => { + const ralphDir = path.join(tmpDir, '.ralph-corrupt'); + fs.mkdirSync(ralphDir, { recursive: true }); + fs.writeFileSync(state.statePath(ralphDir), '{not valid json', 'utf8'); + expect(state.read(ralphDir)).toBeNull(); + }); +}); + describe('state fields include required loop metadata', () => { test('stores all required loop startup fields', () => { const ralphDir = path.join(tmpDir, '.ralph');