ncheaz · ncheaz · Apr 22, 2026 · Apr 22, 2026
diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js
@@ -29,8 +29,36 @@ const DEFAULTS = {
   tasksMode: false,
   noCommit: false,
   verbose: false,
+  // Stall detector: break the loop after N *consecutive* iterations that
+  // succeeded but produced no progress (no promise, no completed tasks, no
+  // files changed). 0 disables the detector. Failed iterations do not count
+  // toward the streak because their signal is already surfaced via the
+  // `Recent Loop Signals` feedback block.
+  stallThreshold: 3,
 };
 
+/**
+ * Determine whether an iteration made any forward progress.
+ *
+ * An iteration is considered productive if any of the following are true:
+ *   - OpenCode emitted the task or completion promise
+ *   - One or more tasks transitioned to "completed" during the iteration
+ *   - At least one repo-tracked file was observed to have changed
+ *   - The iteration failed outright (its signal is handled separately)
+ *
+ * @param {object} iterationSignals
+ * @returns {boolean}
+ */
+function _iterationIsStalled(iterationSignals) {
+  if (!iterationSignals) return false;
+  if (iterationSignals.iterationFailed) return false;
+  if (iterationSignals.hasCompletion) return false;
+  if (iterationSignals.hasTask) return false;
+  if (iterationSignals.completedTasksCount > 0) return false;
+  if (iterationSignals.filesChangedCount > 0) return false;
+  return true;
+}
+
 function _isFailedIteration(result) {
   if (!result || typeof result !== 'object') return false;
   if (result.signal !== null && result.signal !== undefined && result.signal !== '') {
@@ -119,11 +147,19 @@ async function run(opts) {
   const minIterations = options.minIterations;
   const completionPromise = options.completionPromise;
   const taskPromise = options.taskPromise;
+  const stallThreshold =
+    typeof options.stallThreshold === 'number' && options.stallThreshold >= 0
+      ? Math.floor(options.stallThreshold)
+      : DEFAULTS.stallThreshold;
 
   let stateInitialized = false;
   let iterationCount = 0;
   let completed = false;
   let exitReason = 'max_iterations';
+  // Consecutive iterations that succeeded but produced no progress signal.
+  // Reset whenever any progress is detected (or when the iteration failed, so
+  // transient infra errors don't trip the stall detector).
+  let stallStreak = 0;
 
   try {
 
@@ -140,6 +176,18 @@ async function run(opts) {
     }
 
     // Initialize state file for this run, preserving history count if resuming.
+    //
+    // `startedAt` semantics: this field marks the first time *this change* was
+    // put through a Ralph loop. On a resume we must preserve the original
+    // timestamp, not overwrite it with the current time -- previously, every
+    // resume reset `startedAt` and the status dashboard lost the true wall-
+    // clock duration. `resumedAt` tracks the most recent resume.
+    const nowIso = new Date().toISOString();
+    const preservedStartedAt =
+      resumeIteration > 1 && existingState && existingState.startedAt
+        ? existingState.startedAt
+        : nowIso;
+
     state.init(ralphDir, {
       active: true,
       iteration: resumeIteration,
@@ -153,8 +201,8 @@ async function run(opts) {
       promptTemplate: options.promptTemplate || null,
       noCommit: options.noCommit,
       model: options.model || '',
-      startedAt: new Date().toISOString(),
-      resumedAt: resumeIteration > 1 ? new Date().toISOString() : null,
+      startedAt: preservedStartedAt,
+      resumedAt: resumeIteration > 1 ? nowIso : null,
       completedAt: null,
       stoppedAt: null,
       exitReason: null,
@@ -194,8 +242,13 @@ async function run(opts) {
             throw err;
           }
 
-          const errorEntries = errors.readEntries(ralphDir, 3);
-          const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 3), errorEntries);
+          // Widen the feedback window to 5 so the agent sees a longer streak
+          // when it keeps emitting the same hand-off / no-promise signal — the
+          // `_failureFingerprint` dedup collapses identical entries into a
+          // single "same failure as iteration N" line, so more history is
+          // cheap and actionable.
+          const errorEntries = errors.readEntries(ralphDir, 5);
+          const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 5), errorEntries);
 
           // Inject any pending context
           const pendingContext = context.consume(ralphDir);
@@ -307,6 +360,35 @@ async function run(opts) {
           break;
         }
 
+        // Stall detection: track consecutive unproductive iterations and stop
+        // early so the loop doesn't burn through the full `maxIterations`
+        // budget on pure no-ops (e.g. agent hitting a quality gate it can't
+        // fix and asking for hand-off without emitting a promise).
+        const iterationFailed = _isFailedIteration(result);
+        const stalledThisIteration = _iterationIsStalled({
+          iterationFailed,
+          hasCompletion,
+          hasTask,
+          completedTasksCount: completedTasks.length,
+          filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
+        });
+
+        if (stalledThisIteration) {
+          stallStreak++;
+        } else {
+          stallStreak = 0;
+        }
+
+        if (stallThreshold > 0 && stallStreak >= stallThreshold) {
+          if (options.verbose) {
+            process.stderr.write(
+              `[mini-ralph] stall detector: ${stallStreak} consecutive no-op iteration(s); halting.\n`
+            );
+          }
+          exitReason = 'stalled';
+          break;
+        }
+
         // In tasks mode, task promise just continues the loop
         if (options.tasksMode && hasTask) {
           // Continue to next iteration
@@ -438,7 +520,13 @@ function _autoCommit(iteration, opts = {}) {
   }
 
   try {
-    childProcess.execFileSync('git', ['add', '--', ...filesToStage], {
+    // Use `git add -A -- <paths>` (not plain `git add -- <paths>`) so deletions
+    // and renames are staged alongside modifications/additions. Tasks that call
+    // `git rm` via a shell tool leave the path absent from the working tree but
+    // still present in `git status --porcelain`, which means the plain form
+    // would error with `fatal: pathspec did not match`. Scoping to the per-path
+    // allowlist preserves the protected-artifact guarantee.
+    childProcess.execFileSync('git', ['add', '-A', '--', ...filesToStage], {
       stdio: verbose ? 'inherit' : ['pipe', 'pipe', 'pipe'],
       encoding: 'utf8',
     });
@@ -575,17 +663,31 @@ function _failureFingerprint(entry, errorEntries) {
     const match = errors.matchIteration(errorEntries, entry.iteration);
     stderrHead = _firstNonEmptyLine(match && match.stderr, 120);
   }
+  // A "no promise emitted" iteration is also a distinguishable failure mode
+  // even when exitCode===0 and there's no stderr (e.g. the agent explicitly
+  // refuses to continue). Encoding it in the fingerprint lets the dedup
+  // collapse repeated hand-off iterations into a single actionable line
+  // instead of N identical bullets.
+  const noPromise = !entry.completionDetected && !entry.taskDetected;
   return JSON.stringify({
     failureStage: entry.failureStage || '',
     exitCode: entry.exitCode,
     stderrHead,
+    noPromise,
+    commitAnomalyType: entry.commitAnomalyType || '',
   });
 }
 
 function _isEmptyFingerprint(fingerprint) {
   try {
     const obj = JSON.parse(fingerprint);
-    return !obj.failureStage && obj.exitCode === 0 && !obj.stderrHead;
+    return (
+      !obj.failureStage &&
+      obj.exitCode === 0 &&
+      !obj.stderrHead &&
+      !obj.noPromise &&
+      !obj.commitAnomalyType
+    );
   } catch {
     return false;
   }
@@ -846,4 +948,5 @@ module.exports = {
   _appendFatalIterationFailure,
   _failureFingerprint,
   _firstNonEmptyLine,
+  _iterationIsStalled,
 };
diff --git a/lib/mini-ralph/state.js b/lib/mini-ralph/state.js
@@ -55,11 +55,27 @@ function init(ralphDir, data) {
 function read(ralphDir) {
   const file = statePath(ralphDir);
   if (!fs.existsSync(file)) return null;
-  try {
-    return JSON.parse(fs.readFileSync(file, 'utf8'));
-  } catch {
-    return null;
+  // One-shot retry to paper over the vanishingly-rare race where we read the
+  // state file between `openSync('wx')` on the temp file and the rename. On
+  // POSIX `renameSync` is atomic, so the retry window is only meaningful on
+  // filesystems where it is not -- but the cost of retrying is tiny, so we
+  // do it uniformly for robustness.
+  for (let attempt = 0; attempt < 2; attempt++) {
+    try {
+      const raw = fs.readFileSync(file, 'utf8');
+      if (!raw) {
+        if (attempt === 0) continue;
+        return null;
+      }
+      return JSON.parse(raw);
+    } catch (err) {
+      if (attempt === 0 && (err.code === 'ENOENT' || err instanceof SyntaxError)) {
+        continue;
+      }
+      return null;
+    }
   }
+  return null;
 }
 
 /**
@@ -182,7 +198,43 @@ function _ensureDir(ralphDir) {
 
 function _write(ralphDir, data) {
   _ensureDir(ralphDir);
-  fs.writeFileSync(statePath(ralphDir), JSON.stringify(data, null, 2), 'utf8');
+  const target = statePath(ralphDir);
+  // Atomic write: serialize to a temp file in the same directory, then rename.
+  // A concurrent `read()` either sees the fully-written old file or the fully-
+  // written new file -- never a partially-written one. This matters because
+  // `ralph-run --status` can race with the live loop's per-iteration
+  // `state.update()` calls, and a torn read used to surface as JSON.parse
+  // errors or the dashboard reporting a stale iteration counter.
+  const tmp = `${target}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
+  const serialized = JSON.stringify(data, null, 2);
+
+  let handle = null;
+  try {
+    handle = fs.openSync(tmp, 'wx');
+    fs.writeFileSync(handle, serialized, 'utf8');
+    fs.fsyncSync(handle);
+  } finally {
+    if (handle !== null) {
+      try {
+        fs.closeSync(handle);
+      } catch {
+        /* best-effort close */
+      }
+    }
+  }
+
+  try {
+    fs.renameSync(tmp, target);
+  } catch (err) {
+    // Clean up the temp file if rename failed, then rethrow so the caller
+    // sees the real error (disk full, permissions, etc.).
+    try {
+      fs.unlinkSync(tmp);
+    } catch {
+      /* best-effort cleanup */
+    }
+    throw err;
+  }
 }
 
 function _createLockToken() {

diff --git a/scripts/mini-ralph-cli.js b/scripts/mini-ralph-cli.js
@@ -19,6 +19,7 @@
  *   --tasks                    Enable tasks mode
  *   --min-iterations <n>       Minimum iterations (default: 1)
  *   --max-iterations <n>       Maximum iterations (default: 50)
+ *   --stall-threshold <n>      Halt after N consecutive no-op iterations (default: 3; 0 disables)
  *   --completion-promise <s>   Completion promise string (default: COMPLETE)
  *   --task-promise <s>         Task promise string (default: READY_FOR_NEXT_TASK)
  *   --no-commit                Suppress auto-commit
@@ -48,6 +49,7 @@ function parseArgs(argv) {
     tasksMode: false,
     minIterations: 1,
     maxIterations: 50,
+    stallThreshold: 3,
     completionPromise: 'COMPLETE',
     taskPromise: 'READY_FOR_NEXT_TASK',
     noCommit: false,
@@ -88,6 +90,9 @@ function parseArgs(argv) {
       case '--max-iterations':
         opts.maxIterations = parseInt(args[++i], 10);
         break;
+      case '--stall-threshold':
+        opts.stallThreshold = parseInt(args[++i], 10);
+        break;
       case '--completion-promise':
         opts.completionPromise = args[++i];
         break;
@@ -141,6 +146,7 @@ Options:
   --tasks                    Enable tasks mode
   --min-iterations <n>       Minimum iterations (default: 1)
   --max-iterations <n>       Maximum iterations (default: 50)
+  --stall-threshold <n>      Halt after N consecutive no-op iterations (default: 3; 0 disables)
   --completion-promise <s>   Completion promise string
   --task-promise <s>         Task promise string
   --no-commit                Suppress auto-commit
@@ -197,6 +203,7 @@ async function main() {
     tasksMode: opts.tasksMode,
     minIterations: opts.minIterations,
     maxIterations: opts.maxIterations,
+    stallThreshold: opts.stallThreshold,
     completionPromise: opts.completionPromise,
     taskPromise: opts.taskPromise,
     noCommit: opts.noCommit,

diff --git a/scripts/ralph-run.sh b/scripts/ralph-run.sh
@@ -756,7 +756,21 @@ Change directory: {{change_dir}}
 
 Before implementing, read the OpenSpec artifacts listed above that are relevant to the current task.
 
-Pick the first [ ] or [/] task in tasks.md, mark it [/], implement it (smallest change that fully satisfies the Done-when conditions), run the task's verification command, mark it [x] on success, then output `<promise>{{task_promise}}</promise>`. Output `<promise>{{completion_promise}}</promise>` only when every task is [x]. Output promise tags on their own line, literal; do not quote or describe them. Do not fabricate a promise to exit the loop. If an approach fails twice, try a different one.
+Follow this loop contract EXACTLY. Do not skip steps. Do not batch. Do not output a promise until every step is done.
+
+1. Open `tasks.md` (at `{{change_dir}}/tasks.md`) and find the FIRST line matching `- [ ] ` or `- [/] `. Remember its exact text.
+2. Edit `tasks.md` in place to change that line's marker to `- [/] ` (in-progress). You MUST use your file edit tool to modify the file on disk — a shell `cp`, `sed`, or print-to-stdout does not count. Verify by re-reading the file.
+3. Implement the smallest change that fully satisfies the task's Done-when conditions. Run the task's verification command if one is specified.
+4. On success, edit `tasks.md` again in place to change that line's marker from `- [/] ` to `- [x] `. Verify by re-reading the file and confirming the `[x]` is present on that exact line.
+5. ONLY after step 4 writes `[x]` to disk, output `<promise>{{task_promise}}</promise>` on its own line.
+6. If and only if EVERY task line in `tasks.md` is `- [x] `, output `<promise>{{completion_promise}}</promise>` instead.
+
+Hard rules:
+- If you do not actually modify `tasks.md` on disk in this iteration, DO NOT output any promise tag. Output a short failure note instead and stop.
+- Never output `<promise>{{task_promise}}</promise>` while the task you just worked on is still `- [ ]` on disk. That causes the same task to repeat forever.
+- Promise tags must be on their own line, literal, unquoted, and not described in prose.
+- If an approach fails twice, try a different one.
+- If the task is already satisfied by prior work (e.g. target file already exists with the right content), you STILL must flip the checkbox to `[x]` in `tasks.md` before emitting the promise.
 
 ## Commit Contract
 

diff --git a/tests/unit/javascript/mini-ralph-runner-autocommit.test.js b/tests/unit/javascript/mini-ralph-runner-autocommit.test.js
@@ -67,7 +67,7 @@ describe('runner._autoCommit()', () => {
 
     expect(execFileSync).toHaveBeenCalledWith(
       'git',
-      ['add', '--', 'tasks.md', 'src/app.js'],
+      ['add', '-A', '--', 'tasks.md', 'src/app.js'],
       expect.any(Object)
     );
     expect(execFileSync).toHaveBeenCalledWith(
@@ -98,7 +98,7 @@ describe('runner._autoCommit()', () => {
     expect(execFileSync).toHaveBeenNthCalledWith(
       1,
       'git',
-      ['add', '--', 'tasks.md', 'src/app.js'],
+      ['add', '-A', '--', 'tasks.md', 'src/app.js'],
       expect.any(Object)
     );
     expect(execFileSync).toHaveBeenNthCalledWith(
@@ -160,16 +160,44 @@ describe('runner._autoCommit()', () => {
 
     expect(execFileSync).toHaveBeenCalledWith(
       'git',
-      ['add', '--', 'tasks.md', 'src/app.js'],
+      ['add', '-A', '--', 'tasks.md', 'src/app.js'],
       expect.any(Object)
     );
+    // Guard against the unscoped form, which would stage *every* dirty file in
+    // the repo (including files unrelated to the current task).
     expect(execFileSync).not.toHaveBeenCalledWith(
       'git',
       ['add', '-A'],
       expect.any(Object)
     );
   });
 
+  test('stages deletions alongside modifications via `git add -A -- <paths>`', () => {
+    // Simulate a task that removed a file: the path is in the allowlist but
+    // no longer exists on disk. `git add -A -- <path>` must still succeed and
+    // record the deletion in the index.
+    execFileSync.mockImplementation((command, args) => {
+      if (command === 'git' && args[0] === 'diff') {
+        return 'deleted/file.webp\ntasks.md\n';
+      }
+      return '';
+    });
+
+    const result = runner._autoCommit(7, {
+      completedTasks: [completedTask],
+      filesToStage: ['deleted/file.webp', 'tasks.md'],
+      verbose: false,
+    });
+
+    expect(execFileSync).toHaveBeenNthCalledWith(
+      1,
+      'git',
+      ['add', '-A', '--', 'deleted/file.webp', 'tasks.md'],
+      expect.any(Object)
+    );
+    expect(result).toEqual({ attempted: true, committed: true, anomaly: null });
+  });
+
   test('blocks protected OpenSpec artifacts from loop-managed commits', () => {
     const result = runner._autoCommit(6, {
       completedTasks: [completedTask],