Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 109 additions & 6 deletions lib/mini-ralph/runner.js
Original file line number Diff line number Diff line change
Expand Up @@ -29,8 +29,36 @@ const DEFAULTS = {
tasksMode: false,
noCommit: false,
verbose: false,
// Stall detector: break the loop after N *consecutive* iterations that
// succeeded but produced no progress (no promise, no completed tasks, no
// files changed). 0 disables the detector. Failed iterations do not count
// toward the streak because their signal is already surfaced via the
// `Recent Loop Signals` feedback block.
stallThreshold: 3,
};

/**
* Determine whether an iteration made any forward progress.
*
* An iteration is considered productive if any of the following are true:
* - OpenCode emitted the task or completion promise
* - One or more tasks transitioned to "completed" during the iteration
* - At least one repo-tracked file was observed to have changed
* - The iteration failed outright (its signal is handled separately)
*
* @param {object} iterationSignals
* @returns {boolean}
*/
function _iterationIsStalled(iterationSignals) {
if (!iterationSignals) return false;
if (iterationSignals.iterationFailed) return false;
if (iterationSignals.hasCompletion) return false;
if (iterationSignals.hasTask) return false;
if (iterationSignals.completedTasksCount > 0) return false;
if (iterationSignals.filesChangedCount > 0) return false;
return true;
}

function _isFailedIteration(result) {
if (!result || typeof result !== 'object') return false;
if (result.signal !== null && result.signal !== undefined && result.signal !== '') {
Expand Down Expand Up @@ -119,11 +147,19 @@ async function run(opts) {
const minIterations = options.minIterations;
const completionPromise = options.completionPromise;
const taskPromise = options.taskPromise;
const stallThreshold =
typeof options.stallThreshold === 'number' && options.stallThreshold >= 0
? Math.floor(options.stallThreshold)
: DEFAULTS.stallThreshold;

let stateInitialized = false;
let iterationCount = 0;
let completed = false;
let exitReason = 'max_iterations';
// Consecutive iterations that succeeded but produced no progress signal.
// Reset whenever any progress is detected (or when the iteration failed, so
// transient infra errors don't trip the stall detector).
let stallStreak = 0;

try {

Expand All @@ -140,6 +176,18 @@ async function run(opts) {
}

// Initialize state file for this run, preserving history count if resuming.
//
// `startedAt` semantics: this field marks the first time *this change* was
// put through a Ralph loop. On a resume we must preserve the original
// timestamp, not overwrite it with the current time -- previously, every
// resume reset `startedAt` and the status dashboard lost the true wall-
// clock duration. `resumedAt` tracks the most recent resume.
const nowIso = new Date().toISOString();
const preservedStartedAt =
resumeIteration > 1 && existingState && existingState.startedAt
? existingState.startedAt
: nowIso;

state.init(ralphDir, {
active: true,
iteration: resumeIteration,
Expand All @@ -153,8 +201,8 @@ async function run(opts) {
promptTemplate: options.promptTemplate || null,
noCommit: options.noCommit,
model: options.model || '',
startedAt: new Date().toISOString(),
resumedAt: resumeIteration > 1 ? new Date().toISOString() : null,
startedAt: preservedStartedAt,
resumedAt: resumeIteration > 1 ? nowIso : null,
completedAt: null,
stoppedAt: null,
exitReason: null,
Expand Down Expand Up @@ -194,8 +242,13 @@ async function run(opts) {
throw err;
}

const errorEntries = errors.readEntries(ralphDir, 3);
const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 3), errorEntries);
// Widen the feedback window to 5 so the agent sees a longer streak
// when it keeps emitting the same hand-off / no-promise signal — the
// `_failureFingerprint` dedup collapses identical entries into a
// single "same failure as iteration N" line, so more history is
// cheap and actionable.
const errorEntries = errors.readEntries(ralphDir, 5);
const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 5), errorEntries);

// Inject any pending context
const pendingContext = context.consume(ralphDir);
Expand Down Expand Up @@ -307,6 +360,35 @@ async function run(opts) {
break;
}

// Stall detection: track consecutive unproductive iterations and stop
// early so the loop doesn't burn through the full `maxIterations`
// budget on pure no-ops (e.g. agent hitting a quality gate it can't
// fix and asking for hand-off without emitting a promise).
const iterationFailed = _isFailedIteration(result);
const stalledThisIteration = _iterationIsStalled({
iterationFailed,
hasCompletion,
hasTask,
completedTasksCount: completedTasks.length,
filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0,
});

if (stalledThisIteration) {
stallStreak++;
} else {
stallStreak = 0;
}

if (stallThreshold > 0 && stallStreak >= stallThreshold) {
if (options.verbose) {
process.stderr.write(
`[mini-ralph] stall detector: ${stallStreak} consecutive no-op iteration(s); halting.\n`
);
}
exitReason = 'stalled';
break;
}

// In tasks mode, task promise just continues the loop
if (options.tasksMode && hasTask) {
// Continue to next iteration
Expand Down Expand Up @@ -438,7 +520,13 @@ function _autoCommit(iteration, opts = {}) {
}

try {
childProcess.execFileSync('git', ['add', '--', ...filesToStage], {
// Use `git add -A -- <paths>` (not plain `git add -- <paths>`) so deletions
// and renames are staged alongside modifications/additions. Tasks that call
// `git rm` via a shell tool leave the path absent from the working tree but
// still present in `git status --porcelain`, which means the plain form
// would error with `fatal: pathspec did not match`. Scoping to the per-path
// allowlist preserves the protected-artifact guarantee.
childProcess.execFileSync('git', ['add', '-A', '--', ...filesToStage], {
stdio: verbose ? 'inherit' : ['pipe', 'pipe', 'pipe'],
encoding: 'utf8',
});
Expand Down Expand Up @@ -575,17 +663,31 @@ function _failureFingerprint(entry, errorEntries) {
const match = errors.matchIteration(errorEntries, entry.iteration);
stderrHead = _firstNonEmptyLine(match && match.stderr, 120);
}
// A "no promise emitted" iteration is also a distinguishable failure mode
// even when exitCode===0 and there's no stderr (e.g. the agent explicitly
// refuses to continue). Encoding it in the fingerprint lets the dedup
// collapse repeated hand-off iterations into a single actionable line
// instead of N identical bullets.
const noPromise = !entry.completionDetected && !entry.taskDetected;
return JSON.stringify({
failureStage: entry.failureStage || '',
exitCode: entry.exitCode,
stderrHead,
noPromise,
commitAnomalyType: entry.commitAnomalyType || '',
});
}

function _isEmptyFingerprint(fingerprint) {
try {
const obj = JSON.parse(fingerprint);
return !obj.failureStage && obj.exitCode === 0 && !obj.stderrHead;
return (
!obj.failureStage &&
obj.exitCode === 0 &&
!obj.stderrHead &&
!obj.noPromise &&
!obj.commitAnomalyType
);
} catch {
return false;
}
Expand Down Expand Up @@ -846,4 +948,5 @@ module.exports = {
_appendFatalIterationFailure,
_failureFingerprint,
_firstNonEmptyLine,
_iterationIsStalled,
};
62 changes: 57 additions & 5 deletions lib/mini-ralph/state.js
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,27 @@ function init(ralphDir, data) {
function read(ralphDir) {
const file = statePath(ralphDir);
if (!fs.existsSync(file)) return null;
try {
return JSON.parse(fs.readFileSync(file, 'utf8'));
} catch {
return null;
// One-shot retry to paper over the vanishingly-rare race where we read the
// state file between `openSync('wx')` on the temp file and the rename. On
// POSIX `renameSync` is atomic, so the retry window is only meaningful on
// filesystems where it is not -- but the cost of retrying is tiny, so we
// do it uniformly for robustness.
for (let attempt = 0; attempt < 2; attempt++) {
try {
const raw = fs.readFileSync(file, 'utf8');
if (!raw) {
if (attempt === 0) continue;
return null;
}
return JSON.parse(raw);
} catch (err) {
if (attempt === 0 && (err.code === 'ENOENT' || err instanceof SyntaxError)) {
continue;
}
return null;
}
}
return null;
}

/**
Expand Down Expand Up @@ -182,7 +198,43 @@ function _ensureDir(ralphDir) {

function _write(ralphDir, data) {
_ensureDir(ralphDir);
fs.writeFileSync(statePath(ralphDir), JSON.stringify(data, null, 2), 'utf8');
const target = statePath(ralphDir);
// Atomic write: serialize to a temp file in the same directory, then rename.
// A concurrent `read()` either sees the fully-written old file or the fully-
// written new file -- never a partially-written one. This matters because
// `ralph-run --status` can race with the live loop's per-iteration
// `state.update()` calls, and a torn read used to surface as JSON.parse
// errors or the dashboard reporting a stale iteration counter.
const tmp = `${target}.tmp-${process.pid}-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
const serialized = JSON.stringify(data, null, 2);

let handle = null;
try {
handle = fs.openSync(tmp, 'wx');
fs.writeFileSync(handle, serialized, 'utf8');
fs.fsyncSync(handle);
} finally {
if (handle !== null) {
try {
fs.closeSync(handle);
} catch {
/* best-effort close */
}
}
}

try {
fs.renameSync(tmp, target);
} catch (err) {
// Clean up the temp file if rename failed, then rethrow so the caller
// sees the real error (disk full, permissions, etc.).
try {
fs.unlinkSync(tmp);
} catch {
/* best-effort cleanup */
}
throw err;
}
}

function _createLockToken() {
Expand Down
7 changes: 7 additions & 0 deletions scripts/mini-ralph-cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
* --tasks Enable tasks mode
* --min-iterations <n> Minimum iterations (default: 1)
* --max-iterations <n> Maximum iterations (default: 50)
* --stall-threshold <n> Halt after N consecutive no-op iterations (default: 3; 0 disables)
* --completion-promise <s> Completion promise string (default: COMPLETE)
* --task-promise <s> Task promise string (default: READY_FOR_NEXT_TASK)
* --no-commit Suppress auto-commit
Expand Down Expand Up @@ -48,6 +49,7 @@ function parseArgs(argv) {
tasksMode: false,
minIterations: 1,
maxIterations: 50,
stallThreshold: 3,
completionPromise: 'COMPLETE',
taskPromise: 'READY_FOR_NEXT_TASK',
noCommit: false,
Expand Down Expand Up @@ -88,6 +90,9 @@ function parseArgs(argv) {
case '--max-iterations':
opts.maxIterations = parseInt(args[++i], 10);
break;
case '--stall-threshold':
opts.stallThreshold = parseInt(args[++i], 10);
break;
case '--completion-promise':
opts.completionPromise = args[++i];
break;
Expand Down Expand Up @@ -141,6 +146,7 @@ Options:
--tasks Enable tasks mode
--min-iterations <n> Minimum iterations (default: 1)
--max-iterations <n> Maximum iterations (default: 50)
--stall-threshold <n> Halt after N consecutive no-op iterations (default: 3; 0 disables)
--completion-promise <s> Completion promise string
--task-promise <s> Task promise string
--no-commit Suppress auto-commit
Expand Down Expand Up @@ -197,6 +203,7 @@ async function main() {
tasksMode: opts.tasksMode,
minIterations: opts.minIterations,
maxIterations: opts.maxIterations,
stallThreshold: opts.stallThreshold,
completionPromise: opts.completionPromise,
taskPromise: opts.taskPromise,
noCommit: opts.noCommit,
Expand Down
16 changes: 15 additions & 1 deletion scripts/ralph-run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -756,7 +756,21 @@ Change directory: {{change_dir}}

Before implementing, read the OpenSpec artifacts listed above that are relevant to the current task.

Pick the first [ ] or [/] task in tasks.md, mark it [/], implement it (smallest change that fully satisfies the Done-when conditions), run the task's verification command, mark it [x] on success, then output `<promise>{{task_promise}}</promise>`. Output `<promise>{{completion_promise}}</promise>` only when every task is [x]. Output promise tags on their own line, literal; do not quote or describe them. Do not fabricate a promise to exit the loop. If an approach fails twice, try a different one.
Follow this loop contract EXACTLY. Do not skip steps. Do not batch. Do not output a promise until every step is done.

1. Open `tasks.md` (at `{{change_dir}}/tasks.md`) and find the FIRST line matching `- [ ] ` or `- [/] `. Remember its exact text.
2. Edit `tasks.md` in place to change that line's marker to `- [/] ` (in-progress). You MUST use your file edit tool to modify the file on disk — a shell `cp`, `sed`, or print-to-stdout does not count. Verify by re-reading the file.
3. Implement the smallest change that fully satisfies the task's Done-when conditions. Run the task's verification command if one is specified.
4. On success, edit `tasks.md` again in place to change that line's marker from `- [/] ` to `- [x] `. Verify by re-reading the file and confirming the `[x]` is present on that exact line.
5. ONLY after step 4 writes `[x]` to disk, output `<promise>{{task_promise}}</promise>` on its own line.
6. If and only if EVERY task line in `tasks.md` is `- [x] `, output `<promise>{{completion_promise}}</promise>` instead.

Hard rules:
- If you do not actually modify `tasks.md` on disk in this iteration, DO NOT output any promise tag. Output a short failure note instead and stop.
- Never output `<promise>{{task_promise}}</promise>` while the task you just worked on is still `- [ ]` on disk. That causes the same task to repeat forever.
- Promise tags must be on their own line, literal, unquoted, and not described in prose.
- If an approach fails twice, try a different one.
- If the task is already satisfied by prior work (e.g. target file already exists with the right content), you STILL must flip the checkbox to `[x]` in `tasks.md` before emitting the promise.

## Commit Contract

Expand Down
34 changes: 31 additions & 3 deletions tests/unit/javascript/mini-ralph-runner-autocommit.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ describe('runner._autoCommit()', () => {

expect(execFileSync).toHaveBeenCalledWith(
'git',
['add', '--', 'tasks.md', 'src/app.js'],
['add', '-A', '--', 'tasks.md', 'src/app.js'],
expect.any(Object)
);
expect(execFileSync).toHaveBeenCalledWith(
Expand Down Expand Up @@ -98,7 +98,7 @@ describe('runner._autoCommit()', () => {
expect(execFileSync).toHaveBeenNthCalledWith(
1,
'git',
['add', '--', 'tasks.md', 'src/app.js'],
['add', '-A', '--', 'tasks.md', 'src/app.js'],
expect.any(Object)
);
expect(execFileSync).toHaveBeenNthCalledWith(
Expand Down Expand Up @@ -160,16 +160,44 @@ describe('runner._autoCommit()', () => {

expect(execFileSync).toHaveBeenCalledWith(
'git',
['add', '--', 'tasks.md', 'src/app.js'],
['add', '-A', '--', 'tasks.md', 'src/app.js'],
expect.any(Object)
);
// Guard against the unscoped form, which would stage *every* dirty file in
// the repo (including files unrelated to the current task).
expect(execFileSync).not.toHaveBeenCalledWith(
'git',
['add', '-A'],
expect.any(Object)
);
});

test('stages deletions alongside modifications via `git add -A -- <paths>`', () => {
// Simulate a task that removed a file: the path is in the allowlist but
// no longer exists on disk. `git add -A -- <path>` must still succeed and
// record the deletion in the index.
execFileSync.mockImplementation((command, args) => {
if (command === 'git' && args[0] === 'diff') {
return 'deleted/file.webp\ntasks.md\n';
}
return '';
});

const result = runner._autoCommit(7, {
completedTasks: [completedTask],
filesToStage: ['deleted/file.webp', 'tasks.md'],
verbose: false,
});

expect(execFileSync).toHaveBeenNthCalledWith(
1,
'git',
['add', '-A', '--', 'deleted/file.webp', 'tasks.md'],
expect.any(Object)
);
expect(result).toEqual({ attempted: true, committed: true, anomaly: null });
});

test('blocks protected OpenSpec artifacts from loop-managed commits', () => {
const result = runner._autoCommit(6, {
completedTasks: [completedTask],
Expand Down
Loading
Loading