From b5d8687b72c7ebdbf0a66a0dd012e2928c9187e3 Mon Sep 17 00:00:00 2001
From: Nixon Cheaz <6854716+ncheaz@users.noreply.github.com>
Date: Wed, 22 Apr 2026 18:26:08 -0400
Subject: [PATCH] Bring back the dirty bucket changes

---
 lib/mini-ralph/runner.js                      | 115 +++++++++-
 lib/mini-ralph/state.js                       |  62 +++++-
 scripts/mini-ralph-cli.js                     |   7 +
 scripts/ralph-run.sh                          |  16 +-
 .../mini-ralph-runner-autocommit.test.js      |  34 ++-
 .../unit/javascript/mini-ralph-runner.test.js | 199 +++++++++++++++++-
 .../unit/javascript/mini-ralph-state.test.js  |  30 +++
 7 files changed, 442 insertions(+), 21 deletions(-)

diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js
index 765de28..7bfbd27 100644
--- a/lib/mini-ralph/runner.js
+++ b/lib/mini-ralph/runner.js
@@ -29,8 +29,36 @@ const DEFAULTS = {
   tasksMode: false,
   noCommit: false,
   verbose: false,
+  // Stall detector: break the loop after N *consecutive* iterations that
+  // succeeded but produced no progress (no promise, no completed tasks, no
+  // files changed). 0 disables the detector. Failed iterations do not count
+  // toward the streak because their signal is already surfaced via the
+  // `Recent Loop Signals` feedback block.
+  stallThreshold: 3,
 };
 
+/**
+ * Determine whether an iteration made any forward progress.
+ *
+ * An iteration is considered productive if any of the following are true:
+ *   - OpenCode emitted the task or completion promise
+ *   - One or more tasks transitioned to "completed" during the iteration
+ *   - At least one repo-tracked file was observed to have changed
+ *   - The iteration failed outright (its signal is handled separately)
+ *
+ * @param {object} iterationSignals
+ * @returns {boolean}
+ */
+function _iterationIsStalled(iterationSignals) {
+  if (!iterationSignals) return false;
+  if (iterationSignals.iterationFailed) return false;
+  if (iterationSignals.hasCompletion) return false;
+  if (iterationSignals.hasTask) return false;
+  if (iterationSignals.completedTasksCount > 0) return false;
+  if (iterationSignals.filesChangedCount > 0) return false;
+  return true;
+}
+
 function _isFailedIteration(result) {
   if (!result || typeof result !== 'object') return false;
   if (result.signal !== null && result.signal !== undefined && result.signal !== '') {
@@ -119,11 +147,19 @@ async function run(opts) {
   const minIterations = options.minIterations;
   const completionPromise = options.completionPromise;
   const taskPromise = options.taskPromise;
+  const stallThreshold =
+    typeof options.stallThreshold === 'number' && options.stallThreshold >= 0
+      ? Math.floor(options.stallThreshold)
+      : DEFAULTS.stallThreshold;
 
   let stateInitialized = false;
   let iterationCount = 0;
   let completed = false;
   let exitReason = 'max_iterations';
+  // Consecutive iterations that succeeded but produced no progress signal.
+  // Reset whenever any progress is detected (or when the iteration failed, so
+  // transient infra errors don't trip the stall detector).
+  let stallStreak = 0;
 
   try {
 
@@ -140,6 +176,18 @@ async function run(opts) {
     }
 
     // Initialize state file for this run, preserving history count if resuming.
+    //
+    // `startedAt` semantics: this field marks the first time *this change* was
+    // put through a Ralph loop. On a resume we must preserve the original
+    // timestamp, not overwrite it with the current time -- previously, every
+    // resume reset `startedAt` and the status dashboard lost the true wall-
+    // clock duration. `resumedAt` tracks the most recent resume.
+    const nowIso = new Date().toISOString();
+    const preservedStartedAt =
+      resumeIteration > 1 && existingState && existingState.startedAt
+        ? existingState.startedAt
+        : nowIso;
+
     state.init(ralphDir, {
       active: true,
       iteration: resumeIteration,
@@ -153,8 +201,8 @@ async function run(opts) {
       promptTemplate: options.promptTemplate || null,
       noCommit: options.noCommit,
       model: options.model || '',
-      startedAt: new Date().toISOString(),
-      resumedAt: resumeIteration > 1 ? new Date().toISOString() : null,
+      startedAt: preservedStartedAt,
+      resumedAt: resumeIteration > 1 ? nowIso : null,
       completedAt: null,
       stoppedAt: null,
       exitReason: null,
@@ -194,8 +242,13 @@ async function run(opts) {
             throw err;
           }
 
-          const errorEntries = errors.readEntries(ralphDir, 3);
-          const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 3), errorEntries);
+          // Widen the feedback window to 5 so the agent sees a longer streak
+          // when it keeps emitting the same hand-off / no-promise signal — the
+          // `_failureFingerprint` dedup collapses identical entries into a
+          // single "same failure as iteration N" line, so more history is
+          // cheap and actionable.
+          const errorEntries = errors.readEntries(ralphDir, 5);
+          const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 5), errorEntries);
 
           // Inject any pending context
           const pendingContext = context.consume(ralphDir);
@@ -307,6 +360,35 @@ async function run(opts) {
           break;
         }
 
+        // Stall detection: track consecutive unproductive iterations and stop
+        // early so the loop doesn't burn through the full `maxIterations`
+        // budget on pure no-ops (e.g. agent hitting a quality gate it can't
+        // fix and asking for hand-off without emitting a promise).
+        const iterationFailed = _isFailedIteration(result);
+        const stalledThisIteration = _iterationIsStalled({
+          iterationFailed,
+          hasCompletion,
+          hasTask,
+          completedTasksCount: completedTasks.length,
+          filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
+        });
+
+        if (stalledThisIteration) {
+          stallStreak++;
+        } else {
+          stallStreak = 0;
+        }
+
+        if (stallThreshold > 0 && stallStreak >= stallThreshold) {
+          if (options.verbose) {
+            process.stderr.write(
+              `[mini-ralph] stall detector: ${stallStreak} consecutive no-op iteration(s); halting.\n`
+            );
+          }
+          exitReason = 'stalled';
+          break;
+        }
+
         // In tasks mode, task promise just continues the loop
         if (options.tasksMode && hasTask) {
           // Continue to next iteration
@@ -438,7 +520,13 @@ function _autoCommit(iteration, opts = {}) {
   }
 
   try {
-    childProcess.execFileSync('git', ['add', '--', ...filesToStage], {
+    // Use `git add -A -- <paths>` (not plain `git add -- <paths>`) so deletions
+    // and renames are staged alongside modifications/additions. Tasks that call
+    // `git rm` via a shell tool leave the path absent from the working tree but
+    // still present in `git status --porcelain`, which means the plain form
+    // would error with `fatal: pathspec did not match`. Scoping to the per-path
+    // allowlist preserves the protected-artifact guarantee.
+    childProcess.execFileSync('git', ['add', '-A', '--', ...filesToStage], {
       stdio: verbose ? 'inherit' : ['pipe', 'pipe', 'pipe'],
       encoding: 'utf8',
     });
@@ -575,17 +663,31 @@ function _failureFingerprint(entry, errorEntries) {
     const match = errors.matchIteration(errorEntries, entry.iteration);
     stderrHead = _firstNonEmptyLine(match && match.stderr, 120);
   }
+  // A "no promise emitted" iteration is also a distinguishable failure mode
+  // even when exitCode===0 and there's no stderr (e.g. the agent explicitly
+  // refuses to continue). Encoding it in the fingerprint lets the dedup
+  // collapse repeated hand-off iterations into a single actionable line
+  // instead of N identical bullets.
+  const noPromise = !entry.completionDetected && !entry.taskDetected;
   return JSON.stringify({
     failureStage: entry.failureStage || '',
     exitCode: entry.exitCode,
     stderrHead,
+    noPromise,
+    commitAnomalyType: entry.commitAnomalyType || '',
   });
 }
 
 function _isEmptyFingerprint(fingerprint) {
   try {
     const obj = JSON.parse(fingerprint);
-    return !obj.failureStage && obj.exitCode === 0 && !obj.stderrHead;
+    return (
+      !obj.failureStage &&
+      obj.exitCode === 0 &&
+      !obj.stderrHead &&
+      !obj.noPromise &&
+      !obj.commitAnomalyType
+    );
   } catch {
     return false;
   }
@@ -846,4 +948,5 @@ module.exports = {
   _appendFatalIterationFailure,
   _failureFingerprint,
   _firstNonEmptyLine,
+  _iterationIsStalled,
 };
diff --git a/lib/mini-ralph/state.js b/lib/mini-ralph/state.js
index ba8a442..d57693c 100644
--- a/lib/mini-ralph/state.js
+++ b/lib/mini-ralph/state.js
@@ -55,11 +55,27 @@ function init(ralphDir, data) {
 function read(ralphDir) {
   const file = statePath(ralphDir);
   if (!fs.existsSync(file)) return null;
-  try {
-    return JSON.parse(fs.readFileSync(file, 'utf8'));
-  } catch {
-    return null;
+  // One-shot retry to paper over the vanishingly-rare race where we read the
+  // state file between `openSync('wx')` on the temp file and the rename. On
+  // POSIX `renameSync` is atomic, so the retry window is only meaningful on
+  // filesystems where it is not -- but the cost of retrying is tiny, so we
+  // do it uniformly for robustness.
+  for (let attempt = 0; attempt < 2; attempt++) {
+    try {
+      const raw = fs.readFileSync(file, 'utf8');
+      if (!raw) {
+        if (attempt === 0) continue;
+        return null;
+      }
+      return JSON.parse(raw);
+    } catch (err) {
+      if (attempt === 0 && (err.code === 'ENOENT' || err instanceof SyntaxError)) {
+        continue;
+      }
+      return null;
+    }
   }
+  return null;
 }
 
 /**
@@ -182,7 +198,43 @@ function _ensureDir(ralphDir) {
 
 function _write(ralphDir, data) {
   _ensureDir(ralphDir);
-  fs.writeFileSync(statePath(ralphDir), JSON.stringify(data, null, 2), 'utf8');
+  const target = statePath(ralphDir);
+  // Atomic write: serialize to a temp file in the same directory, then rename.
+  // A concurrent `read()` either sees the fully-written old file or the fully-
+  // written new file -- never a partially-written one. This matters because
+  // `ralph-run --status` can race with the live loop's per-iteration
+  // `state.update()` calls, and a torn read used to surface as JSON.parse
+  // errors or the dashboard reporting a stale iteration counter.
+  const tmp = `${target}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+  const serialized = JSON.stringify(data, null, 2);
+
+  let handle = null;
+  try {
+    handle = fs.openSync(tmp, 'wx');
+    fs.writeFileSync(handle, serialized, 'utf8');
+    fs.fsyncSync(handle);
+  } finally {
+    if (handle !== null) {
+      try {
+        fs.closeSync(handle);
+      } catch {
+        /* best-effort close */
+      }
+    }
+  }
+
+  try {
+    fs.renameSync(tmp, target);
+  } catch (err) {
+    // Clean up the temp file if rename failed, then rethrow so the caller
+    // sees the real error (disk full, permissions, etc.).
+    try {
+      fs.unlinkSync(tmp);
+    } catch {
+      /* best-effort cleanup */
+    }
+    throw err;
+  }
 }
 
 function _createLockToken() {
diff --git a/scripts/mini-ralph-cli.js b/scripts/mini-ralph-cli.js
index 5d0e19d..5c50aaa 100755
--- a/scripts/mini-ralph-cli.js
+++ b/scripts/mini-ralph-cli.js
@@ -19,6 +19,7 @@
  *   --tasks                    Enable tasks mode
  *   --min-iterations <n>       Minimum iterations (default: 1)
  *   --max-iterations <n>       Maximum iterations (default: 50)
+ *   --stall-threshold <n>      Halt after N consecutive no-op iterations (default: 3; 0 disables)
  *   --completion-promise <s>   Completion promise string (default: COMPLETE)
  *   --task-promise <s>         Task promise string (default: READY_FOR_NEXT_TASK)
  *   --no-commit                Suppress auto-commit
@@ -48,6 +49,7 @@ function parseArgs(argv) {
     tasksMode: false,
     minIterations: 1,
     maxIterations: 50,
+    stallThreshold: 3,
     completionPromise: 'COMPLETE',
     taskPromise: 'READY_FOR_NEXT_TASK',
     noCommit: false,
@@ -88,6 +90,9 @@ function parseArgs(argv) {
       case '--max-iterations':
         opts.maxIterations = parseInt(args[++i], 10);
         break;
+      case '--stall-threshold':
+        opts.stallThreshold = parseInt(args[++i], 10);
+        break;
       case '--completion-promise':
         opts.completionPromise = args[++i];
         break;
@@ -141,6 +146,7 @@ Options:
   --tasks                    Enable tasks mode
   --min-iterations <n>       Minimum iterations (default: 1)
   --max-iterations <n>       Maximum iterations (default: 50)
+  --stall-threshold <n>      Halt after N consecutive no-op iterations (default: 3; 0 disables)
   --completion-promise <s>   Completion promise string
   --task-promise <s>         Task promise string
   --no-commit                Suppress auto-commit
@@ -197,6 +203,7 @@ async function main() {
     tasksMode: opts.tasksMode,
     minIterations: opts.minIterations,
     maxIterations: opts.maxIterations,
+    stallThreshold: opts.stallThreshold,
     completionPromise: opts.completionPromise,
     taskPromise: opts.taskPromise,
     noCommit: opts.noCommit,
diff --git a/scripts/ralph-run.sh b/scripts/ralph-run.sh
index 051c163..4c5e9eb 100755
--- a/scripts/ralph-run.sh
+++ b/scripts/ralph-run.sh
@@ -756,7 +756,21 @@ Change directory: {{change_dir}}
 
 Before implementing, read the OpenSpec artifacts listed above that are relevant to the current task.
 
-Pick the first [ ] or [/] task in tasks.md, mark it [/], implement it (smallest change that fully satisfies the Done-when conditions), run the task's verification command, mark it [x] on success, then output `<promise>{{task_promise}}</promise>`. Output `<promise>{{completion_promise}}</promise>` only when every task is [x]. Output promise tags on their own line, literal; do not quote or describe them. Do not fabricate a promise to exit the loop. If an approach fails twice, try a different one.
+Follow this loop contract EXACTLY. Do not skip steps. Do not batch. Do not output a promise until every step is done.
+
+1. Open `tasks.md` (at `{{change_dir}}/tasks.md`) and find the FIRST line matching `- [ ] ` or `- [/] `. Remember its exact text.
+2. Edit `tasks.md` in place to change that line's marker to `- [/] ` (in-progress). You MUST use your file edit tool to modify the file on disk — a shell `cp`, `sed`, or print-to-stdout does not count. Verify by re-reading the file.
+3. Implement the smallest change that fully satisfies the task's Done-when conditions. Run the task's verification command if one is specified.
+4. On success, edit `tasks.md` again in place to change that line's marker from `- [/] ` to `- [x] `. Verify by re-reading the file and confirming the `[x]` is present on that exact line.
+5. ONLY after step 4 writes `[x]` to disk, output `<promise>{{task_promise}}</promise>` on its own line.
+6. If and only if EVERY task line in `tasks.md` is `- [x] `, output `<promise>{{completion_promise}}</promise>` instead.
+
+Hard rules:
+- If you do not actually modify `tasks.md` on disk in this iteration, DO NOT output any promise tag. Output a short failure note instead and stop.
+- Never output `<promise>{{task_promise}}</promise>` while the task you just worked on is still `- [ ]` on disk. That causes the same task to repeat forever.
+- Promise tags must be on their own line, literal, unquoted, and not described in prose.
+- If an approach fails twice, try a different one.
+- If the task is already satisfied by prior work (e.g. target file already exists with the right content), you STILL must flip the checkbox to `[x]` in `tasks.md` before emitting the promise.
 
 ## Commit Contract
 
diff --git a/tests/unit/javascript/mini-ralph-runner-autocommit.test.js b/tests/unit/javascript/mini-ralph-runner-autocommit.test.js
index fa27576..6983823 100644
--- a/tests/unit/javascript/mini-ralph-runner-autocommit.test.js
+++ b/tests/unit/javascript/mini-ralph-runner-autocommit.test.js
@@ -67,7 +67,7 @@ describe('runner._autoCommit()', () => {
 
     expect(execFileSync).toHaveBeenCalledWith(
       'git',
-      ['add', '--', 'tasks.md', 'src/app.js'],
+      ['add', '-A', '--', 'tasks.md', 'src/app.js'],
       expect.any(Object)
     );
     expect(execFileSync).toHaveBeenCalledWith(
@@ -98,7 +98,7 @@ describe('runner._autoCommit()', () => {
     expect(execFileSync).toHaveBeenNthCalledWith(
       1,
       'git',
-      ['add', '--', 'tasks.md', 'src/app.js'],
+      ['add', '-A', '--', 'tasks.md', 'src/app.js'],
       expect.any(Object)
     );
     expect(execFileSync).toHaveBeenNthCalledWith(
@@ -160,9 +160,11 @@ describe('runner._autoCommit()', () => {
 
     expect(execFileSync).toHaveBeenCalledWith(
       'git',
-      ['add', '--', 'tasks.md', 'src/app.js'],
+      ['add', '-A', '--', 'tasks.md', 'src/app.js'],
       expect.any(Object)
     );
+    // Guard against the unscoped form, which would stage *every* dirty file in
+    // the repo (including files unrelated to the current task).
     expect(execFileSync).not.toHaveBeenCalledWith(
       'git',
       ['add', '-A'],
@@ -170,6 +172,32 @@ describe('runner._autoCommit()', () => {
     );
   });
 
+  test('stages deletions alongside modifications via `git add -A -- <paths>`', () => {
+    // Simulate a task that removed a file: the path is in the allowlist but
+    // no longer exists on disk. `git add -A -- <path>` must still succeed and
+    // record the deletion in the index.
+    execFileSync.mockImplementation((command, args) => {
+      if (command === 'git' && args[0] === 'diff') {
+        return 'deleted/file.webp\ntasks.md\n';
+      }
+      return '';
+    });
+
+    const result = runner._autoCommit(7, {
+      completedTasks: [completedTask],
+      filesToStage: ['deleted/file.webp', 'tasks.md'],
+      verbose: false,
+    });
+
+    expect(execFileSync).toHaveBeenNthCalledWith(
+      1,
+      'git',
+      ['add', '-A', '--', 'deleted/file.webp', 'tasks.md'],
+      expect.any(Object)
+    );
+    expect(result).toEqual({ attempted: true, committed: true, anomaly: null });
+  });
+
   test('blocks protected OpenSpec artifacts from loop-managed commits', () => {
     const result = runner._autoCommit(6, {
       completedTasks: [completedTask],
diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js
index 6f60b8c..db645b7 100644
--- a/tests/unit/javascript/mini-ralph-runner.test.js
+++ b/tests/unit/javascript/mini-ralph-runner.test.js
@@ -901,6 +901,63 @@ describe('_buildIterationFeedback() - fingerprint dedup', () => {
     expect(stderrHeadCount).toBe(1);
   });
 
+  test('repeated no-promise (clean-exit) iterations dedupe into a single detailed line', () => {
+    // Regression for a stuck-loop symptom: when the agent hits a blocker it
+    // cannot clear, it keeps exiting cleanly with no promise. The feedback
+    // used to render N identical bullets (e.g. "no loop promise emitted")
+    // which bloated the prompt without helping the agent. The fingerprint
+    // now covers the `noPromise` state so repeats collapse to a back-reference.
+    const history = [
+      { iteration: 23, exitCode: 0, signal: '', failureStage: '', filesChanged: [], completionDetected: false, taskDetected: false },
+      { iteration: 24, exitCode: 0, signal: '', failureStage: '', filesChanged: [], completionDetected: false, taskDetected: false },
+      { iteration: 25, exitCode: 0, signal: '', failureStage: '', filesChanged: [], completionDetected: false, taskDetected: false },
+    ];
+
+    const feedback = _buildIterationFeedback(history);
+
+    expect(feedback).toContain('Iteration 23:');
+    // Only the first occurrence gets the full "no loop promise emitted" line;
+    // the next two are back-references.
+    const noPromiseCount = (feedback.match(/no loop promise emitted/g) || []).length;
+    expect(noPromiseCount).toBe(1);
+    expect(feedback).toContain('Iteration 24: same failure as iteration 23 (see above).');
+    expect(feedback).toContain('Iteration 25: same failure as iteration 23 (see above).');
+  });
+
+  test('commit anomaly iterations are deduped by anomaly type', () => {
+    // Two iterations that failed to commit with the same anomaly type should
+    // collapse to one detailed line + one back-reference, rather than two
+    // duplicate bullets.
+    const history = [
+      {
+        iteration: 22,
+        exitCode: 0,
+        signal: '',
+        failureStage: '',
+        filesChanged: ['tasks.md'],
+        completionDetected: false,
+        taskDetected: true,
+        commitAnomaly: 'Auto-commit failed: pathspec did not match',
+        commitAnomalyType: 'commit_failed',
+      },
+      {
+        iteration: 23,
+        exitCode: 0,
+        signal: '',
+        failureStage: '',
+        filesChanged: ['tasks.md'],
+        completionDetected: false,
+        taskDetected: true,
+        commitAnomaly: 'Auto-commit failed: pathspec did not match',
+        commitAnomalyType: 'commit_failed',
+      },
+    ];
+
+    const feedback = _buildIterationFeedback(history);
+    expect(feedback).toContain('Iteration 22: commit anomaly');
+    expect(feedback).toContain('Iteration 23: same failure as iteration 22');
+  });
+
   test('three distinct-fingerprint failures: three full detail entries', () => {
     const errorContent = [
       { iteration: 1, exitCode: 1, stderr: 'ErrorA', stdout: '', signal: '', failureStage: 'stageA' },
@@ -996,7 +1053,9 @@ describe('run() with mocked invoker', () => {
     }));
 
     try {
-      const result = await run(makeOptions());
+      // Disable the stall detector here so we can verify the pure
+      // `max_iterations` exit path independently of the stall logic.
+      const result = await run(makeOptions({ stallThreshold: 0 }));
       expect(result.completed).toBe(false);
       expect(result.iterations).toBe(3);
       expect(result.exitReason).toBe('max_iterations');
@@ -1010,6 +1069,70 @@ describe('run() with mocked invoker', () => {
     }
   });
 
+  test('stall detector halts the loop after N consecutive no-op iterations', async () => {
+    let callCount = 0;
+    const restore = mockInvoker(invoker, async () => {
+      callCount++;
+      return {
+        // No promise, no files changed, no tasks completed -> stalled.
+        stdout: 'HAND OFF REQUIRED: cannot make progress',
+        exitCode: 0,
+        filesChanged: [],
+        toolUsage: [],
+      };
+    });
+
+    try {
+      const result = await run(
+        makeOptions({ maxIterations: 20, stallThreshold: 3 })
+      );
+      expect(result.completed).toBe(false);
+      expect(result.iterations).toBe(3);
+      expect(result.exitReason).toBe('stalled');
+      expect(callCount).toBe(3);
+      const persistedState = state.read(path.join(tmpDir, '.ralph'));
+      expect(persistedState.active).toBe(false);
+      expect(persistedState.stoppedAt).toBeTruthy();
+      expect(persistedState.exitReason).toBe('stalled');
+    } finally {
+      restore();
+    }
+  });
+
+  test('stall streak resets when the iteration makes any progress', async () => {
+    let callCount = 0;
+    const restore = mockInvoker(invoker, async () => {
+      callCount++;
+      // Stalled, stalled, progress, stalled, stalled -> streak resets on
+      // iteration 3 and 2 subsequent stalls (threshold=3) should NOT halt.
+      if (callCount === 3) {
+        return {
+          stdout: 'changed something',
+          exitCode: 0,
+          filesChanged: ['src/app.js'],
+          toolUsage: [],
+        };
+      }
+      return {
+        stdout: 'no progress',
+        exitCode: 0,
+        filesChanged: [],
+        toolUsage: [],
+      };
+    });
+
+    try {
+      const result = await run(
+        makeOptions({ maxIterations: 5, stallThreshold: 3 })
+      );
+      // We should reach max_iterations because the streak resets on iter 3.
+      expect(result.iterations).toBe(5);
+      expect(result.exitReason).toBe('max_iterations');
+    } finally {
+      restore();
+    }
+  });
+
   test('exits early when completion promise is detected', async () => {
     let callCount = 0;
     const restore = mockInvoker(invoker, async () => {
@@ -1577,7 +1700,7 @@ describe('run() with mocked invoker', () => {
     }));
 
     try {
-      await run(makeOptions({ ralphDir, maxIterations: 5 }));
+      await run(makeOptions({ ralphDir, maxIterations: 5, stallThreshold: 0 }));
       const s = state.read(ralphDir);
       expect(s.resumedAt).not.toBeNull();
       expect(s.resumedAt).toBeDefined();
@@ -1586,6 +1709,66 @@ describe('run() with mocked invoker', () => {
     }
   });
 
+  test('preserves the original startedAt timestamp across resumes', async () => {
+    const ralphDir = path.join(tmpDir, '.ralph');
+    fs.mkdirSync(ralphDir, { recursive: true });
+    // Seed a prior state whose startedAt is the "true" original run start.
+    const originalStartedAt = '2026-04-20T12:00:00.000Z';
+    state.init(ralphDir, {
+      active: false,
+      iteration: 4,
+      startedAt: originalStartedAt,
+      completedAt: null,
+      stoppedAt: '2026-04-20T13:00:00.000Z',
+    });
+
+    const restore = mockInvoker(invoker, async () => ({
+      stdout: '<promise>COMPLETE</promise>',
+      exitCode: 0,
+      filesChanged: [],
+      toolUsage: [],
+    }));
+
+    try {
+      await run(makeOptions({ ralphDir, maxIterations: 6 }));
+      const s = state.read(ralphDir);
+      // startedAt must not be overwritten on resume.
+      expect(s.startedAt).toBe(originalStartedAt);
+      // resumedAt should be fresh (a new ISO timestamp, different from startedAt).
+      expect(s.resumedAt).toBeTruthy();
+      expect(s.resumedAt).not.toBe(originalStartedAt);
+    } finally {
+      restore();
+    }
+  });
+
+  test('sets startedAt on a fresh run when no prior state exists', async () => {
+    const ralphDir = path.join(tmpDir, '.ralph');
+    fs.mkdirSync(ralphDir, { recursive: true });
+    // No prior state -> startedAt is set to "now" and resumedAt stays null.
+
+    const restore = mockInvoker(invoker, async () => ({
+      stdout: '<promise>COMPLETE</promise>',
+      exitCode: 0,
+      filesChanged: [],
+      toolUsage: [],
+    }));
+
+    try {
+      const before = Date.now();
+      await run(makeOptions({ ralphDir, maxIterations: 2 }));
+      const after = Date.now();
+      const s = state.read(ralphDir);
+      expect(s.resumedAt).toBeNull();
+      expect(s.startedAt).toBeTruthy();
+      const startedAtMs = Date.parse(s.startedAt);
+      expect(startedAtMs).toBeGreaterThanOrEqual(before);
+      expect(startedAtMs).toBeLessThanOrEqual(after);
+    } finally {
+      restore();
+    }
+  });
+
   test('logs a resume message in verbose mode', async () => {
     const ralphDir = path.join(tmpDir, '.ralph');
     fs.mkdirSync(ralphDir, { recursive: true });
@@ -1980,12 +2163,15 @@ describe('run() with mocked invoker', () => {
   });
 
   test('uses only recent parsed error entries for prompt feedback lookup', async () => {
+    // Feedback window is 5 iterations. After 6 consecutive failing
+    // iterations, iter 1's error should have aged out of the window when
+    // iter 7 starts, while iter 6's error should still be visible.
     const prompts = [];
     let callCount = 0;
     const restore = mockInvoker(invoker, async (opts) => {
       callCount++;
       prompts.push(opts.prompt);
-      if (callCount <= 4) {
+      if (callCount <= 6) {
         return {
           stdout: 'no promise',
           stderr: `critical failure ${callCount}`,
@@ -2003,9 +2189,10 @@ describe('run() with mocked invoker', () => {
     });
 
     try {
-      await run(makeOptions({ maxIterations: 5, noCommit: true }));
-      expect(prompts[4]).toContain('critical failure 4');
-      expect(prompts[4]).not.toContain('critical failure 1');
+      await run(makeOptions({ maxIterations: 7, noCommit: true }));
+      // prompts[6] is the prompt passed into the 7th invocation.
+      expect(prompts[6]).toContain('critical failure 6');
+      expect(prompts[6]).not.toContain('critical failure 1');
     } finally {
       restore();
     }
diff --git a/tests/unit/javascript/mini-ralph-state.test.js b/tests/unit/javascript/mini-ralph-state.test.js
index 7aaca19..5fd0e0d 100644
--- a/tests/unit/javascript/mini-ralph-state.test.js
+++ b/tests/unit/javascript/mini-ralph-state.test.js
@@ -222,6 +222,36 @@ describe('run lock lifecycle', () => {
   });
 });
 
+describe('atomic writes', () => {
+  test('does not leave a temp file behind on a successful write', () => {
+    const ralphDir = path.join(tmpDir, '.ralph');
+    state.init(ralphDir, { active: true, iteration: 1 });
+    state.update(ralphDir, { iteration: 2 });
+    state.update(ralphDir, { iteration: 3 });
+
+    const entries = fs.readdirSync(ralphDir);
+    const tmpFiles = entries.filter((name) => name.includes('.tmp-'));
+    expect(tmpFiles).toEqual([]);
+    expect(state.read(ralphDir).iteration).toBe(3);
+  });
+
+  test('read() tolerates a missing state file and returns null', () => {
+    const ralphDir = path.join(tmpDir, '.ralph-empty');
+    // Directory does not exist yet.
+    expect(state.read(ralphDir)).toBeNull();
+    // Directory exists but file does not.
+    fs.mkdirSync(ralphDir, { recursive: true });
+    expect(state.read(ralphDir)).toBeNull();
+  });
+
+  test('read() returns null rather than throwing on a corrupted state file', () => {
+    const ralphDir = path.join(tmpDir, '.ralph-corrupt');
+    fs.mkdirSync(ralphDir, { recursive: true });
+    fs.writeFileSync(state.statePath(ralphDir), '{not valid json', 'utf8');
+    expect(state.read(ralphDir)).toBeNull();
+  });
+});
+
 describe('state fields include required loop metadata', () => {
   test('stores all required loop startup fields', () => {
     const ralphDir = path.join(tmpDir, '.ralph');