diff --git a/lib/mini-ralph/history.js b/lib/mini-ralph/history.js index f90d866..976b568 100644 --- a/lib/mini-ralph/history.js +++ b/lib/mini-ralph/history.js @@ -52,6 +52,10 @@ function read(ralphDir) { * @param {Array} entry.toolUsage - Tool usage summary array * @param {Array} entry.filesChanged - Files changed in this iteration * @param {number} entry.exitCode - OpenCode exit code + * @param {boolean} [entry.blockedHandoffDetected] - Whether the iteration emitted + * the configured blocked-handoff promise and stopped for operator action. + * @param {string} [entry.blockedHandoffNote] - Compact, single-line preview of + * the extracted blocker note. The full note is persisted in HANDOFF.md. * @param {number} [entry.promptBytes] - UTF-8 byte length of the assembled prompt * @param {number} [entry.promptChars] - Character length of the assembled prompt * @param {number} [entry.promptTokens] - Estimated token count for the prompt (chars/4, rounded) diff --git a/lib/mini-ralph/index.js b/lib/mini-ralph/index.js index 1115e2d..baae7fe 100644 --- a/lib/mini-ralph/index.js +++ b/lib/mini-ralph/index.js @@ -35,6 +35,7 @@ const prompt = require('./prompt'); * @param {number} [options.maxIterations] - Maximum iterations (default: 50) * @param {string} [options.completionPromise] - Promise string signaling loop completion (default: "COMPLETE") * @param {string} [options.taskPromise] - Promise string signaling task completion (default: "READY_FOR_NEXT_TASK") + * @param {string} [options.blockedHandoffPromise] - Promise string signaling the agent is blocked and requesting human handoff (default: "BLOCKED_HANDOFF") * @param {boolean} [options.tasksMode] - Enable tasks mode (default: false) * @param {string} [options.tasksFile] - Path to tasks file when tasksMode is true * @param {boolean} [options.noCommit] - Suppress auto-commit (default: false) diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js index fb5b902..a0b5d0f 100644 --- a/lib/mini-ralph/runner.js +++ b/lib/mini-ralph/runner.js @@ -28,6 +28,14 @@ const DEFAULTS = { maxIterations: 50, completionPromise: 'COMPLETE', taskPromise: 'READY_FOR_NEXT_TASK', + // Emitted by the agent when a task's `Stop and hand off if:` clause fires + // (i.e. external decision required: revert protected drift, file an + // out-of-scope refactor, escalate to a human reviewer, etc). The runner + // recognizes this as a *clean* exit distinct from `stalled` — it preserves + // the agent's diagnosis under `/HANDOFF.md` and surfaces + // `exitReason='blocked_handoff'` so operators can tell "this task is + // genuinely blocked on me" apart from "the loop livelocked." + blockedHandoffPromise: 'BLOCKED_HANDOFF', tasksMode: false, noCommit: false, verbose: false, @@ -48,11 +56,16 @@ const DEFAULTS = { * Determine whether an iteration made any forward progress. * * An iteration is considered productive if any of the following are true: - * - OpenCode emitted the task or completion promise + * - OpenCode emitted the task, completion, or blocked-handoff promise * - One or more tasks transitioned to "completed" during the iteration * - At least one repo-tracked file was observed to have changed * - The iteration failed outright (its signal is handled separately) * + * Note: a blocked-handoff iteration is intentionally excluded from "stalled" + * because the agent followed protocol — it surfaced a structured exit, the + * runner caught it, and the loop will break this iteration. We never want + * to penalize the agent (or the operator) for the canonical hand-off path. + * * @param {object} iterationSignals * @returns {boolean} */ @@ -61,6 +74,7 @@ function _iterationIsStalled(iterationSignals) { if (iterationSignals.iterationFailed) return false; if (iterationSignals.hasCompletion) return false; if (iterationSignals.hasTask) return false; + if (iterationSignals.hasBlockedHandoff) return false; if (iterationSignals.completedTasksCount > 0) return false; if (iterationSignals.filesChangedCount > 0) return false; return true; @@ -118,6 +132,243 @@ function _errorText(err) { return String(err); } +/** + * Extract the agent's blocker note from iteration output. The convention is: + * the line containing `BLOCKED_HANDOFF` MAY be preceded by + * a free-text rationale block (any number of lines up to a sentinel header + * `## Blocker` / `## Blocker Note` / `Blocker:`), and MAY include `## Why:` / + * `## Done-When-Will-Be:` / `## Suggested Next Step:` sections. We capture + * everything from the first sentinel header up to the promise tag, with a + * fallback to the last 40 non-blank lines preceding the tag if no sentinel + * is present, so the operator gets *something* useful even when the agent + * skips the structured format. + * + * @param {string} outputText full iteration stdout + * @param {string} promiseName configured BLOCKED_HANDOFF promise name + * @returns {string} the extracted note (empty string if the tag is absent) + */ +function _extractBlockerNote(outputText, promiseName) { + if (!outputText || !promiseName) return ''; + const tag = `${promiseName}`; + const lines = outputText.split(/\r?\n/); + let tagIdx = -1; + for (let i = 0; i < lines.length; i++) { + if (lines[i].trim() === tag) { + tagIdx = i; + break; + } + } + if (tagIdx === -1) return ''; + + // Look backwards for a sentinel header. + const sentinel = /^\s*(##\s*Blocker(\s+Note)?|Blocker:)/i; + let startIdx = tagIdx; + for (let i = tagIdx - 1; i >= 0; i--) { + if (sentinel.test(lines[i])) { + startIdx = i; + break; + } + } + + if (startIdx === tagIdx) { + // No sentinel — fall back to the last 40 non-blank lines before the tag. + const window = []; + for (let i = tagIdx - 1; i >= 0 && window.length < 40; i--) { + const l = lines[i]; + if (l.trim()) window.unshift(l); + } + return window.join('\n').trim(); + } + + return lines.slice(startIdx, tagIdx).join('\n').trim(); +} + +/** + * Scan well-known locations for blocker / diagnostic artifacts the agent + * may have written during the most recent iteration, and return their + * content (truncated) so we can tee it into the next iteration's prompt. + * + * The motivation is the failure mode we observed in the wild: the agent + * writes `/shared-chrome-invariant-report.txt` with a clear + * `STATUS=BLOCKED REASON=...` diagnosis, then on the next iteration starts + * from a blank slate, re-derives the same diagnosis, and burns another full + * LLM cycle. By auto-detecting and surfacing the artifact, the agent gets + * its own prior diagnosis as input on the next turn, freeing it to either + * (a) act on it, or (b) emit BLOCKED_HANDOFF with a richer note. + * + * Probe paths (relative to ralphDir's parent — i.e. the change root): + * - /HANDOFF.md + * - /BLOCKED.md + * - /blocker.md / blocker-note.md + * - /.ralph/baselines//*report*.{txt,md} + * - any file under matching /(blocker|handoff|invariant-report)\.[a-z]+$/i + * + * We cap the returned text at 1500 chars per artifact and 3 artifacts total + * so the feedback block stays bounded. Freshness is required by default to + * avoid carrying stale diagnostics forever; when a prior run explicitly ended + * with BLOCKED_HANDOFF, the canonical handoff files may be included even when + * stale because they are the persisted operator-facing diagnosis. + * + * @param {string} ralphDir + * @param {object} [options] { repoRoot, maxArtifacts = 3, maxCharsEach = 1500, includeStaleHandoff = false } + * @returns {Array<{ path: string, content: string, truncated: boolean }>} + */ +function _detectBlockerArtifacts(ralphDir, options) { + const fs = require('fs'); + const fsPath = require('path'); + const opts = Object.assign( + { + repoRoot: process.cwd(), + maxArtifacts: 3, + maxCharsEach: 1500, + includeStaleHandoff: false, + }, + options || {} + ); + + if (!ralphDir || !fs.existsSync(ralphDir)) return []; + + const matches = new Map(); // path -> mtimeMs (dedup by absolute path) + const isHandoffArtifact = (name) => + /^(handoff|blocked|blocker(-note)?)\.(md|txt)$/i.test(name); + const isInteresting = (name) => + isHandoffArtifact(name) || + /(invariant|blocker|handoff).*report\.(md|txt)$/i.test(name) || + /report\.(md|txt)$/i.test(name); + + const consider = (p) => { + try { + const st = fs.statSync(p); + if (!st.isFile()) return; + // Files larger than 1MB are almost certainly not human-curated blocker + // notes; skip them so we don't load logs or screenshots into the prompt. + if (st.size > 1024 * 1024) return; + // Only surface artifacts touched within the last ~10 minutes — older + // files are almost always stale leftovers from prior runs, and the + // failure mode we care about (repeated diagnosis with no progress) + // produces fresh writes every iteration. + const stale = Date.now() - st.mtimeMs > 10 * 60 * 1000; + if (stale && !(opts.includeStaleHandoff && isHandoffArtifact(fsPath.basename(p)))) { + return; + } + matches.set(fsPath.resolve(p), st.mtimeMs); + } catch (_) { + // ENOENT / permission errors: ignore — this is a best-effort probe. + } + }; + + // 1) Direct ralphDir scan, one level deep. .ralph/ is small, so a flat + // listing is cheap and bounded. + try { + const entries = fs.readdirSync(ralphDir, { withFileTypes: true }); + for (const ent of entries) { + if (ent.isFile() && isInteresting(ent.name)) { + consider(fsPath.join(ralphDir, ent.name)); + } + } + } catch (_) { /* ignore */ } + + // 2) Convention-based baseline location used by spec-and-loop changes: + // /.ralph/baselines//*report*.{txt,md} + // The change name is the parent directory of ralphDir's parent in the + // OpenSpec layout (e.g. .../changes//.ralph), so we derive it. + try { + const changeDir = fsPath.dirname(ralphDir); + const changeName = fsPath.basename(changeDir); + const baselinesDir = fsPath.join(opts.repoRoot, '.ralph', 'baselines', changeName); + if (fs.existsSync(baselinesDir)) { + const entries = fs.readdirSync(baselinesDir, { withFileTypes: true }); + for (const ent of entries) { + if (ent.isFile() && isInteresting(ent.name)) { + consider(fsPath.join(baselinesDir, ent.name)); + } + } + } + } catch (_) { /* ignore */ } + + if (matches.size === 0) return []; + + // Sort by mtime descending so the freshest artifact wins when we cap. + const sorted = Array.from(matches.entries()) + .sort((a, b) => b[1] - a[1]) + .map(([p]) => p); + + const out = []; + for (const p of sorted.slice(0, opts.maxArtifacts)) { + try { + const raw = fs.readFileSync(p, 'utf8'); + const truncated = raw.length > opts.maxCharsEach; + const content = truncated ? raw.slice(0, opts.maxCharsEach) : raw; + out.push({ + path: fsPath.relative(opts.repoRoot, p) || p, + content: content.trim(), + truncated, + }); + } catch (_) { + // Ignore unreadable artifacts. + } + } + + return out; +} + +/** + * Write the agent's blocker note to /HANDOFF.md with iteration + * metadata so an operator can reproduce the context. Appends rather than + * overwrites: a single change can hit several BLOCKED_HANDOFFs over time + * (operator unblocks, loop resumes, hits a different blocker), and we want + * the full audit trail in one file. + * + * @param {string} ralphDir + * @param {object} entry { iteration, task, note, completionPromise, taskPromise } + * @returns {string} the absolute path to HANDOFF.md + */ +function _writeHandoff(ralphDir, entry) { + const fs = require('fs'); + const fsPath = require('path'); + if (!fs.existsSync(ralphDir)) { + fs.mkdirSync(ralphDir, { recursive: true }); + } + const handoffPath = fsPath.join(ralphDir, 'HANDOFF.md'); + const ts = new Date().toISOString(); + const taskLine = entry.task && entry.task !== 'N/A' + ? entry.task + : '(no task in progress)'; + const noteBlock = entry.note && entry.note.trim() + ? entry.note.trim() + : '(agent emitted BLOCKED_HANDOFF without a structured blocker note;\n' + + 'check the iteration stdout log for the rationale)'; + + const section = [ + '', + `## Iteration ${entry.iteration} — ${ts}`, + '', + `**Task:** ${taskLine}`, + '', + '**Agent blocker note:**', + '', + noteBlock, + '', + '**Operator next step:** investigate the blocker, take one of the actions', + 'the task spec authorizes (revert / isolate / justify / escalate), then', + 'rerun `ralph-run` to resume.', + '', + '---', + '', + ].join('\n'); + + let existing = ''; + if (fs.existsSync(handoffPath)) { + existing = fs.readFileSync(handoffPath, 'utf8'); + } else { + existing = '# Ralph Handoff Log\n\nThis file is appended whenever the loop\n' + + 'exits with `BLOCKED_HANDOFF`. Each section is one blocker the\n' + + 'agent surfaced — review newest first.\n'; + } + fs.writeFileSync(handoffPath, existing + section, 'utf8'); + return handoffPath; +} + function _appendFatalIterationFailure(ralphDir, entry) { errors.append(ralphDir, { iteration: entry.iteration, @@ -155,6 +406,14 @@ function _appendFatalIterationFailure(ralphDir, entry) { }); } +function _summarizeBlockerNote(note, limit = 500) { + if (!note || typeof note !== 'string') return ''; + const oneLine = note.replace(/\s+/g, ' ').trim(); + if (!oneLine) return ''; + if (oneLine.length <= limit) return oneLine; + return `${oneLine.slice(0, Math.max(0, limit - 1)).replace(/\s+$/, '')}…`; +} + /** * Run the iteration loop. * @@ -175,6 +434,7 @@ async function run(opts) { const minIterations = options.minIterations; const completionPromise = options.completionPromise; const taskPromise = options.taskPromise; + const blockedHandoffPromise = options.blockedHandoffPromise; const stallThreshold = typeof options.stallThreshold === 'number' && options.stallThreshold >= 0 ? Math.floor(options.stallThreshold) @@ -200,6 +460,8 @@ async function run(opts) { // otherwise start fresh at 1. const existingState = state.read(ralphDir); const resumeIteration = _resolveStartIteration(existingState, options); + const priorRunWasBlockedHandoff = + existingState && existingState.exitReason === 'blocked_handoff'; if (options.verbose && resumeIteration > 1) { process.stderr.write( @@ -234,6 +496,7 @@ async function run(opts) { maxIterations, completionPromise, taskPromise, + blockedHandoffPromise, tasksMode: options.tasksMode, tasksFile: options.tasksFile || null, promptFile: options.promptFile || null, @@ -294,8 +557,19 @@ async function run(opts) { // dedup collapses identical entries into a single "same failure as // iteration N" line, so the 3-entry window is sufficient to surface // recurring patterns without bloating the prompt. + const recentHistory = history.recent(ralphDir, 3); const errorEntries = errors.readEntries(ralphDir, 3); - const iterationFeedback = _buildIterationFeedback(history.recent(ralphDir, 3), errorEntries); + const blockerArtifacts = _detectBlockerArtifacts(ralphDir, { + repoRoot: process.cwd(), + includeStaleHandoff: + priorRunWasBlockedHandoff || + recentHistory.some((entry) => entry && entry.blockedHandoffDetected), + }); + const iterationFeedback = _buildIterationFeedback( + recentHistory, + errorEntries, + blockerArtifacts, + ); // Inject any pending context const pendingContext = context.consume(ralphDir); @@ -392,6 +666,14 @@ async function run(opts) { const iterationSucceeded = _wasSuccessfulIteration(result); const hasCompletion = iterationSucceeded && _containsPromise(outputText, completionPromise); const hasTask = iterationSucceeded && _containsPromise(outputText, taskPromise); + // Blocked-handoff is also a successful-iteration signal (the agent + // followed protocol and explicitly emitted a structured exit). We + // treat it as a third top-level outcome alongside completion/task. + const hasBlockedHandoff = iterationSucceeded + && _containsPromise(outputText, blockedHandoffPromise); + const blockerNote = hasBlockedHandoff + ? _extractBlockerNote(outputText, blockedHandoffPromise) + : ''; const tasksAfter = options.tasksMode && options.tasksFile ? tasks.parseTasks(options.tasksFile) : []; @@ -435,6 +717,10 @@ async function run(opts) { duration, completionDetected: hasCompletion, taskDetected: hasTask, + blockedHandoffDetected: hasBlockedHandoff, + ...(blockerNote ? { blockedHandoffNote: _summarizeBlockerNote(blockerNote) } : {}), + taskNumber: currentTaskMeta.number, + taskDescription: currentTaskMeta.description, toolUsage: result.toolUsage || [], filesChanged: result.filesChanged || [], exitCode: result.exitCode, @@ -472,6 +758,7 @@ async function run(opts) { iterationFailed, hasCompletion, hasTask, + hasBlockedHandoff, completedTasksCount: completedTasks.length, filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0, }); @@ -487,12 +774,15 @@ async function run(opts) { durationMs: duration, outcome: iterationFailed ? 'failure' - : stalledThisIteration - ? 'stalled' - : 'success', + : hasBlockedHandoff + ? 'blocked' + : stalledThisIteration + ? 'stalled' + : 'success', committed: commitResult.committed === true, hasCompletion, hasTask, + hasBlockedHandoff, completedTasksCount: completedTasks.length, filesChangedCount: Array.isArray(result.filesChanged) ? result.filesChanged.length : 0, stallStreak, @@ -508,6 +798,44 @@ async function run(opts) { break; } + // Blocked-handoff exits the loop *immediately* (no minIterations + // floor). The agent has signaled an external decision is required; + // we want the operator unblocked as fast as possible. We persist the + // agent's note before breaking so it survives even a hard-kill on + // the parent process (e.g. the operator hits Ctrl-C right after). + if (hasBlockedHandoff) { + let handoffPath = ''; + try { + handoffPath = _writeHandoff(ralphDir, { + iteration: iterationCount, + task: currentTask, + note: blockerNote, + completionPromise, + taskPromise, + }); + } catch (writeErr) { + // Don't let a HANDOFF.md write failure mask the original signal — + // we still want to exit cleanly with `blocked_handoff`. Surface + // the write error to stderr so it's diagnosable. + process.stderr.write( + `[mini-ralph] warning: failed to write HANDOFF.md: ${writeErr.message}\n` + ); + } + reporter.note( + handoffPath + ? `agent emitted ${blockedHandoffPromise}; blocker note saved to ${handoffPath}.` + : `agent emitted ${blockedHandoffPromise}; halting (HANDOFF.md write failed; see stderr).`, + 'warn' + ); + if (options.verbose) { + process.stderr.write( + `[mini-ralph] ${blockedHandoffPromise} detected at iteration ${iterationCount}; halting.\n` + ); + } + exitReason = 'blocked_handoff'; + break; + } + if (stallThreshold > 0 && stallStreak >= stallThreshold) { reporter.note( `stall detector: ${stallStreak} consecutive no-op iteration(s); halting.`, @@ -976,16 +1304,19 @@ function _failureFingerprint(entry, errorEntries) { stderrHead = _firstNonEmptyLine(match && match.stderr, 120); } // A "no promise emitted" iteration is also a distinguishable failure mode - // even when exitCode===0 and there's no stderr (e.g. the agent explicitly - // refuses to continue). Encoding it in the fingerprint lets the dedup - // collapse repeated hand-off iterations into a single actionable line - // instead of N identical bullets. - const noPromise = !entry.completionDetected && !entry.taskDetected; + // even when exitCode===0 and there's no stderr (e.g. the agent refuses to + // continue without using the control protocol). Encoding it separately keeps + // no-progress stalls distinct from explicit BLOCKED_HANDOFF stops. + const noPromise = + !entry.completionDetected && + !entry.taskDetected && + !entry.blockedHandoffDetected; return JSON.stringify({ failureStage: entry.failureStage || '', exitCode: entry.exitCode, stderrHead, noPromise, + blockedHandoff: Boolean(entry.blockedHandoffDetected), commitAnomalyType: entry.commitAnomalyType || '', }); } @@ -998,6 +1329,7 @@ function _isEmptyFingerprint(fingerprint) { obj.exitCode === 0 && !obj.stderrHead && !obj.noPromise && + !obj.blockedHandoff && !obj.commitAnomalyType ); } catch { @@ -1005,14 +1337,23 @@ function _isEmptyFingerprint(fingerprint) { } } -function _buildIterationFeedback(recentHistory, errorEntries) { - if (!Array.isArray(recentHistory) || recentHistory.length === 0) { +function _buildIterationFeedback(recentHistory, errorEntries, blockerArtifacts) { + const hasArtifacts = Array.isArray(blockerArtifacts) && blockerArtifacts.length > 0; + if ((!Array.isArray(recentHistory) || recentHistory.length === 0) && !hasArtifacts) { return ''; } + if (!Array.isArray(recentHistory)) recentHistory = []; const problemLines = []; // Track fingerprint -> first iteration number for dedup const fingerprintSeen = new Map(); + // Track which task each *problematic* iteration was working when it failed + // / produced no progress. The same `taskNumber|taskDescription` repeating + // across the recent window is the strongest livelock signal we have — the + // agent is hitting the same wall with no new information. Persist the run + // length so we can emit a HARD prefix above the per-iteration list when + // the streak crosses the noise floor (3+ consecutive on the same task). + const recentTasks = []; for (const entry of recentHistory) { const issues = []; @@ -1029,11 +1370,28 @@ function _buildIterationFeedback(recentHistory, errorEntries) { issues.push(`commit anomaly: ${entry.commitAnomaly}`); } - if (!entry.completionDetected && !entry.taskDetected) { + if (entry.blockedHandoffDetected) { + issues.push('agent emitted BLOCKED_HANDOFF and requested operator handoff'); + } else if (!entry.completionDetected && !entry.taskDetected) { issues.push('no loop promise emitted'); } if (issues.length > 0) { + // Build the task-identity stamp (used both for the per-line prefix and + // for streak detection). Empty when the runner had no task context for + // the iteration (non-tasks-mode, or pre-resume entries written by an + // older runner version). + const rawTaskId = entry.taskNumber + ? `${entry.taskNumber}|${entry.taskDescription || ''}` + : (entry.taskDescription || ''); + const taskStamp = entry.taskNumber + ? `Task ${entry.taskNumber}` + + (entry.taskDescription ? ` (${entry.taskDescription})` : '') + : (entry.taskDescription + ? `Task ${entry.taskDescription}` + : ''); + if (rawTaskId) recentTasks.push(rawTaskId); + // Compute fingerprint for dedup const fp = _failureFingerprint(entry, errorEntries); const isRealFailure = !_isEmptyFingerprint(fp); @@ -1047,13 +1405,19 @@ function _buildIterationFeedback(recentHistory, errorEntries) { if (isRealFailure && fingerprintSeen.has(fp) && !isIgnoreFilterAnomaly) { const firstIteration = fingerprintSeen.get(fp); + const stampSuffix = taskStamp ? ` [${taskStamp}]` : ''; problemLines.push( - `- Iteration ${entry.iteration}: same failure as iteration ${firstIteration} (see above).` + `- Iteration ${entry.iteration}${stampSuffix}: same failure as iteration ${firstIteration} (see above).` ); } else { if (isRealFailure && !isIgnoreFilterAnomaly) fingerprintSeen.set(fp, entry.iteration); - let line = `- Iteration ${entry.iteration}: ${issues.join('; ')}.`; + const stampPrefix = taskStamp ? ` [${taskStamp}]` : ''; + let line = `- Iteration ${entry.iteration}${stampPrefix}: ${issues.join('; ')}.`; + + if (entry.blockedHandoffDetected && entry.blockedHandoffNote) { + line += ` Blocker note: ${entry.blockedHandoffNote}`; + } // For paths_ignored_filtered / all_paths_ignored, append the first two // ignored paths inline (with a (+N more) suffix) so the agent can see @@ -1116,14 +1480,82 @@ function _buildIterationFeedback(recentHistory, errorEntries) { } } - if (problemLines.length === 0) { + if (problemLines.length === 0 && !hasArtifacts) { return ''; } - return [ - 'Use these signals to avoid repeating the same failed approach:', - ...problemLines, - ].join('\n'); + // Detect the longest *trailing* run of the same task identity in the + // problematic-iteration window. Trailing because the only thing that + // matters is "is the most recent stretch still the same task?" — a stale + // streak from earlier in the window is irrelevant once the task changed. + let sameTaskStreak = 0; + let stuckTaskId = ''; + if (recentTasks.length > 0) { + const last = recentTasks[recentTasks.length - 1]; + if (last) { + stuckTaskId = last; + for (let i = recentTasks.length - 1; i >= 0; i--) { + if (recentTasks[i] === last) { + sameTaskStreak++; + } else { + break; + } + } + } + } + + const sections = []; + // The 3-iteration threshold matches the default `stallThreshold` so the + // hard-prefix and the eventual stall halt are aligned: the agent sees the + // warning one iteration before the stall detector fires, giving it a final + // chance to hand off cleanly via BLOCKED_HANDOFF rather than livelock. + if (sameTaskStreak >= 3 && stuckTaskId) { + const display = stuckTaskId.includes('|') + ? stuckTaskId.replace('|', ' — ') + : stuckTaskId; + sections.push( + [ + '⚠ STUCK ON SAME TASK', + `You have failed to make progress on the same task ${sameTaskStreak} iterations in a row: ${display}.`, + 'Stop retrying the same approach. Re-read the task spec, then either:', + ' 1. Pick a materially different approach (different files, different invariant).', + ' 2. If the task spec authorizes it (e.g. a "Stop and hand off if:" clause fired), emit BLOCKED_HANDOFF with a structured Blocker Note and stop. The runner will save it to .ralph/HANDOFF.md.', + '', + ].join('\n') + ); + } + + if (problemLines.length > 0) { + sections.push( + [ + 'Use these signals to avoid repeating the same failed approach:', + ...problemLines, + ].join('\n') + ); + } + + if (hasArtifacts) { + const artifactBlocks = blockerArtifacts.map((art) => { + const header = `### ${art.path}${art.truncated ? ' (truncated)' : ''}`; + // Code-fence the body so MDX-y artifacts (` ` `, ``) don't + // collide with the surrounding prompt markdown. + return [ + header, + '```', + art.content, + '```', + ].join('\n'); + }); + + sections.push( + [ + 'Prior-iteration blocker artifacts (read these BEFORE re-deriving the same diagnosis):', + ...artifactBlocks, + ].join('\n\n') + ); + } + + return sections.join('\n'); } function _extractErrorForIteration(errorEntries, iteration) { @@ -1358,4 +1790,7 @@ module.exports = { _failureFingerprint, _firstNonEmptyLine, _iterationIsStalled, + _extractBlockerNote, + _writeHandoff, + _detectBlockerArtifacts, }; diff --git a/package-lock.json b/package-lock.json index abb43b6..95c3544 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "spec-and-loop", - "version": "3.0.3", + "version": "3.3.0", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "spec-and-loop", - "version": "3.0.3", + "version": "3.3.0", "hasInstallScript": true, "license": "GPL-3.0", "os": [ diff --git a/package.json b/package.json index df9ab95..977df8c 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "spec-and-loop", - "version": "3.1.0", + "version": "3.3.0", "description": "OpenSpec + Ralph Loop integration for iterative development with opencode", "main": "index.js", "bin": { diff --git a/scripts/mini-ralph-cli.js b/scripts/mini-ralph-cli.js index e12148a..81df001 100755 --- a/scripts/mini-ralph-cli.js +++ b/scripts/mini-ralph-cli.js @@ -22,6 +22,11 @@ * --stall-threshold Halt after N consecutive no-op iterations (default: 3; 0 disables) * --completion-promise Completion promise string (default: COMPLETE) * --task-promise Task promise string (default: READY_FOR_NEXT_TASK) + * --blocked-handoff-promise + * Blocked-handoff promise string (default: BLOCKED_HANDOFF). + * Loop exits cleanly with `blocked_handoff` when the + * agent emits this tag and writes the agent's note + * to /HANDOFF.md. * --no-commit Suppress auto-commit * --model Optional model override * --verbose Verbose output @@ -53,6 +58,7 @@ function parseArgs(argv) { stallThreshold: 3, completionPromise: 'COMPLETE', taskPromise: 'READY_FOR_NEXT_TASK', + blockedHandoffPromise: 'BLOCKED_HANDOFF', noCommit: false, model: '', verbose: false, @@ -101,6 +107,9 @@ function parseArgs(argv) { case '--task-promise': opts.taskPromise = args[++i]; break; + case '--blocked-handoff-promise': + opts.blockedHandoffPromise = args[++i]; + break; case '--no-commit': opts.noCommit = true; break; @@ -154,6 +163,8 @@ Options: --stall-threshold Halt after N consecutive no-op iterations (default: 3; 0 disables) --completion-promise Completion promise string --task-promise Task promise string + --blocked-handoff-promise + Blocked-handoff promise string (default: BLOCKED_HANDOFF) --no-commit Suppress auto-commit --model Model override --verbose Verbose output @@ -212,6 +223,7 @@ async function main() { stallThreshold: opts.stallThreshold, completionPromise: opts.completionPromise, taskPromise: opts.taskPromise, + blockedHandoffPromise: opts.blockedHandoffPromise, noCommit: opts.noCommit, model: opts.model, verbose: opts.verbose, diff --git a/scripts/ralph-run.sh b/scripts/ralph-run.sh index 2a14e76..14c9b5d 100755 --- a/scripts/ralph-run.sh +++ b/scripts/ralph-run.sh @@ -333,6 +333,19 @@ validate_dependencies() { log_verbose "All dependencies validated" } +should_auto_fix_artifacts() { + case "${RALPH_RUN_AUTO_FIX_ARTIFACTS:-}" in + 1|true|TRUE|yes|YES) + return 0 + ;; + 0|false|FALSE|no|NO) + return 1 + ;; + esac + + [[ -t 0 ]] +} + ensure_artifacts_present() { local change_dir="$1" local change_name="$2" @@ -351,6 +364,13 @@ ensure_artifacts_present() { fi log_info "Blocked artifacts detected: $blocked" + if ! should_auto_fix_artifacts; then + log_error "OpenSpec artifacts are blocked and this is a non-interactive run." + log_error 'Run `ralph-run init` or complete the artifacts manually, then rerun.' + log_error "Set RALPH_RUN_AUTO_FIX_ARTIFACTS=true to opt into opencode artifact repair in automation." + exit 1 + fi + log_info "Invoking opencode to complete missing artifacts..." opencode run "/opsx-ff $change_name" || true @@ -651,15 +671,21 @@ validate_script_state() { log_verbose "Validating script state..." - local required_dirs=( - ".ralph" - ) - - for dir in "${required_dirs[@]}"; do - if [[ ! -d "$change_dir/$dir" ]]; then - log_verbose "Required directory not found: $dir (will be created)" - fi - done + if [[ ! -d "$change_dir/.ralph" ]]; then + log_verbose "Required directory not found: .ralph (will be created)" + fi + + if [[ ! -d "$change_dir/specs" ]]; then + log_error "Required directory not found: specs" + return 1 + fi + + local first_spec="" + first_spec=$(find "$change_dir/specs" -name "spec.md" -type f -print -quit 2>/dev/null || true) + if [[ -z "$first_spec" ]]; then + log_error "No spec.md files found under specs" + return 1 + fi local required_files=( "tasks.md" @@ -949,7 +975,20 @@ You are operating inside an automated loop. Follow these constraints EXACTLY: 1. Implement exactly ONE pending task from the task list /opsx-apply shows you. 2. After marking the task checkbox [x] on disk, output READY_FOR_NEXT_TASK on its own line. 3. If and only if EVERY task checkbox is [x], output COMPLETE instead. -4. Do not ask questions or wait for input. If blocked, output a short failure note describing the blocker and stop. +4. Do not ask questions or wait for input. If you cannot make progress on the current task because an external decision is required (revert protected drift outside the change scope, file an out-of-scope refactor, escalate to a human reviewer, etc.), STOP and emit a structured handoff in this exact form: + + ## Blocker Note + + + ## Why + + + ## Suggested Next Step + + + BLOCKED_HANDOFF + + The runner will save this note to .ralph/HANDOFF.md and exit cleanly with reason=blocked_handoff. Do NOT keep retrying the same task; emit the handoff and stop. Do NOT emit BLOCKED_HANDOFF for transient errors that a retry could fix (network blips, tool-not-found that is fixable by an absolute path, etc.) — those are normal failures the loop will retry on its own. 5. If the task is already satisfied by prior work, still flip the checkbox to [x] before emitting the promise. Do not create git commits yourself. The Ralph runner manages automatic task commits when auto-commit is enabled." @@ -975,12 +1014,53 @@ Do not create git commits yourself. The Ralph runner manages automatic task comm mini_ralph_args+=("--quiet") fi - # Run the internal mini Ralph CLI and capture output - { - node "$MINI_RALPH_CLI" "${mini_ralph_args[@]}" - } > >(tee "$stdout_log") 2> >(tee "$stderr_log") - local node_exit_code=$? - wait + # Run the internal mini Ralph CLI and capture output. + # + # Avoid Bash process substitution here. macOS ships Bash 3.2, and under + # Bats' captured `run` wrapper a bare `wait` after `> >(tee ...)` can hang + # after the node child has already exited. Explicit FIFOs give us concrete + # tee PIDs to wait on and work consistently on macOS and Linux. + local stdout_pipe="$output_dir/ralph-stdout.pipe" + local stderr_pipe="$output_dir/ralph-stderr.pipe" + local node_exit_code=0 + local tee_stdout_pid="" + local tee_stderr_pid="" + local had_errexit=false + case $- in + *e*) + had_errexit=true + set +e + ;; + esac + + if mkfifo "$stdout_pipe" "$stderr_pipe" 2>/dev/null; then + tee "$stdout_log" < "$stdout_pipe" & + tee_stdout_pid=$! + tee "$stderr_log" < "$stderr_pipe" >&2 & + tee_stderr_pid=$! + + node "$MINI_RALPH_CLI" "${mini_ralph_args[@]}" > "$stdout_pipe" 2> "$stderr_pipe" + node_exit_code=$? + + wait "$tee_stdout_pid" 2>/dev/null || true + wait "$tee_stderr_pid" 2>/dev/null || true + rm -f "$stdout_pipe" "$stderr_pipe" + else + log_verbose "mkfifo unavailable; capturing output without live tee" + node "$MINI_RALPH_CLI" "${mini_ralph_args[@]}" > "$stdout_log" 2> "$stderr_log" + node_exit_code=$? + if [[ -s "$stdout_log" ]]; then + cat "$stdout_log" + fi + if [[ -s "$stderr_log" ]]; then + cat "$stderr_log" >&2 + fi + fi + + if [[ "$had_errexit" == true ]]; then + set -e + fi + return $node_exit_code } @@ -1159,6 +1239,7 @@ check_ralphified() { show_ralphify_warning() { local change_name="$1" + local preset_choice="${RALPH_RUN_RALPHIFY_CHOICE:-}" cat >&2 << 'WARNING_BOX' ┌─────────────────────────────────────────────────────────────────────┐ @@ -1173,16 +1254,29 @@ show_ralphify_warning() { └─────────────────────────────────────────────────────────────────────┘ WARNING_BOX + if [[ -z "$preset_choice" && ! -t 0 ]]; then + log_info "Non-interactive environment detected. Continuing without Ralph Wiggum configuration." + log_info 'Run `ralph-run init` to configure Ralph Wiggum best practices before the next interactive run.' + return 0 + fi + while true; do - echo "" >&2 - echo "Choose an option:" >&2 - echo " [A] Run ralphify init and redo the proposal, then continue" >&2 - echo " [C] Continue without init" >&2 - echo " [Q] Quit" >&2 - printf "Enter choice: " >&2 - if ! read -r choice; then - log_info "Non-interactive environment detected. Continuing without Ralph Wiggum configuration." - return 0 + local choice="" + if [[ -n "$preset_choice" ]]; then + choice="$preset_choice" + preset_choice="" + log_info "Using RALPH_RUN_RALPHIFY_CHOICE=$choice" + else + echo "" >&2 + echo "Choose an option:" >&2 + echo " [A] Run ralphify init and redo the proposal, then continue" >&2 + echo " [C] Continue without init" >&2 + echo " [Q] Quit" >&2 + printf "Enter choice: " >&2 + if ! read -r choice; then + log_info "Non-interactive environment detected. Continuing without Ralph Wiggum configuration." + return 0 + fi fi case "$choice" in diff --git a/tests/unit/javascript/mini-ralph-cli-flags.test.js b/tests/unit/javascript/mini-ralph-cli-flags.test.js new file mode 100644 index 0000000..78bcf20 --- /dev/null +++ b/tests/unit/javascript/mini-ralph-cli-flags.test.js @@ -0,0 +1,58 @@ +'use strict'; + +/** + * CLI-flag tests for scripts/mini-ralph-cli.js. + * + * The CLI is spawned as a subprocess with --help to verify that newly added + * flags (currently --blocked-handoff-promise) are advertised, and with + * --blocked-handoff-promise + a stub --add-context to verify that an unknown + * flag would crash so this test catches accidental wiring regressions + * without spinning up the full loop. + */ + +const path = require('path'); +const { spawnSync } = require('child_process'); + +const CLI = path.join(__dirname, '../../../scripts/mini-ralph-cli.js'); + +describe('mini-ralph-cli flag parsing', () => { + test('--help advertises --blocked-handoff-promise', () => { + const result = spawnSync('node', [CLI, '--help'], { encoding: 'utf8' }); + + expect(result.status).toBe(0); + expect(result.stdout).toContain('--blocked-handoff-promise'); + expect(result.stdout).toContain('BLOCKED_HANDOFF'); + }); + + test('--blocked-handoff-promise is accepted (no "unknown option" crash)', () => { + // Use --status with a non-existent ralph-dir so the CLI exits early but + // only AFTER successfully parsing its arguments. Any unknown-flag error + // would happen during parseArgs() and surface on stderr with exit 1. + const result = spawnSync( + 'node', + [ + CLI, + '--blocked-handoff-promise', 'CUSTOM_HANDOFF', + '--status', + '--ralph-dir', path.join(__dirname, 'no-such-dir-for-cli-test'), + ], + { encoding: 'utf8' } + ); + + expect(result.stderr).not.toContain('unknown option'); + // --status path always exits 0 with a "no run yet" message even when the + // dir is missing. + expect(result.status).toBe(0); + }); + + test('rejects an actually-unknown flag with exit 1', () => { + const result = spawnSync( + 'node', + [CLI, '--this-flag-does-not-exist'], + { encoding: 'utf8' } + ); + + expect(result.status).toBe(1); + expect(result.stderr).toContain('unknown option'); + }); +}); diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js index 2d2164e..0db5d77 100644 --- a/tests/unit/javascript/mini-ralph-runner.test.js +++ b/tests/unit/javascript/mini-ralph-runner.test.js @@ -31,6 +31,9 @@ const { _firstNonEmptyLine, _filterGitignored, _autoCommit, + _extractBlockerNote, + _writeHandoff, + _detectBlockerArtifacts, run, } = require('../../../lib/mini-ralph/runner'); @@ -768,6 +771,160 @@ describe('_buildIterationFeedback()', () => { expect(feedback).toContain('Iteration 2: opencode exited via signal SIGTERM'); }); + + test('describes BLOCKED_HANDOFF as an explicit handoff, not a missing promise', () => { + const feedback = _buildIterationFeedback([ + { + iteration: 7, + exitCode: 0, + signal: '', + filesChanged: [], + completionDetected: false, + taskDetected: false, + blockedHandoffDetected: true, + blockedHandoffNote: '## Blocker Note Contract validation failed again; use the prior parser fix.', + taskNumber: '3.1', + taskDescription: 'Align support/content and typed page contracts', + }, + ]); + + expect(feedback).toContain('Iteration 7 [Task 3.1 (Align support/content and typed page contracts)]: agent emitted BLOCKED_HANDOFF and requested operator handoff'); + expect(feedback).toContain('Blocker note: ## Blocker Note Contract validation failed again'); + expect(feedback).not.toContain('no loop promise emitted'); + }); +}); + +describe('_buildIterationFeedback() - task identity & same-task streak', () => { + test('per-iteration line includes task number and short description when present', () => { + const feedback = _buildIterationFeedback([ + { + iteration: 4, + exitCode: 0, + filesChanged: [], + completionDetected: false, + taskDetected: false, + taskNumber: '4.cross-route', + taskDescription: 'Verify shared-chrome carve-out', + }, + ]); + + expect(feedback).toContain('Iteration 4 [Task 4.cross-route (Verify shared-chrome carve-out)]: no loop promise emitted'); + }); + + test('falls back to bare task description when number is missing', () => { + const feedback = _buildIterationFeedback([ + { + iteration: 7, + exitCode: 0, + filesChanged: [], + completionDetected: false, + taskDetected: false, + taskNumber: '', + taskDescription: 'Refresh baseline', + }, + ]); + + expect(feedback).toContain('Iteration 7 [Task Refresh baseline]:'); + }); + + test('omits the task stamp when no task identity is recorded', () => { + const feedback = _buildIterationFeedback([ + { iteration: 2, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false }, + ]); + + expect(feedback).toContain('Iteration 2: no loop promise emitted'); + expect(feedback).not.toContain('[Task'); + }); + + test('emits STUCK ON SAME TASK hard prefix when streak reaches 3', () => { + const sameTask = { taskNumber: '4.cross-route', taskDescription: 'Verify shared-chrome carve-out' }; + const feedback = _buildIterationFeedback([ + { iteration: 1, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...sameTask }, + { iteration: 2, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...sameTask }, + { iteration: 3, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...sameTask }, + ]); + + expect(feedback).toContain('STUCK ON SAME TASK'); + expect(feedback).toContain('3 iterations in a row'); + expect(feedback).toContain('4.cross-route'); + expect(feedback).toContain('BLOCKED_HANDOFF'); + }); + + test('does NOT emit the hard prefix when the streak resets mid-window', () => { + const a = { taskNumber: '4.cross-route', taskDescription: 'A' }; + const b = { taskNumber: '5.final', taskDescription: 'B' }; + const feedback = _buildIterationFeedback([ + { iteration: 1, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...a }, + { iteration: 2, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...a }, + { iteration: 3, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...b }, + ]); + + expect(feedback).not.toContain('STUCK ON SAME TASK'); + }); + + test('does NOT emit the hard prefix when the trailing streak is shorter than 3', () => { + const a = { taskNumber: '1.foo', taskDescription: 'A' }; + const b = { taskNumber: '2.bar', taskDescription: 'B' }; + const feedback = _buildIterationFeedback([ + { iteration: 1, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...a }, + { iteration: 2, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...b }, + { iteration: 3, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false, ...b }, + ]); + + expect(feedback).not.toContain('STUCK ON SAME TASK'); + }); +}); + +describe('_buildIterationFeedback() - blocker artifact tee', () => { + test('appends artifact section with code-fenced content', () => { + const feedback = _buildIterationFeedback( + [{ iteration: 4, exitCode: 0, filesChanged: [], completionDetected: false, taskDetected: false }], + [], + [ + { + path: '.ralph/baselines/foo/shared-chrome-invariant-report.txt', + content: 'STATUS=BLOCKED\nREASON=carve-out hash drift', + truncated: false, + }, + ], + ); + + expect(feedback).toContain('Prior-iteration blocker artifacts'); + expect(feedback).toContain('### .ralph/baselines/foo/shared-chrome-invariant-report.txt'); + expect(feedback).toContain('STATUS=BLOCKED'); + expect(feedback).toContain('REASON=carve-out hash drift'); + }); + + test('marks truncated artifacts and renders multiple artifacts', () => { + const feedback = _buildIterationFeedback( + [], + [], + [ + { path: 'a.txt', content: 'first', truncated: true }, + { path: 'b.txt', content: 'second', truncated: false }, + ], + ); + + expect(feedback).toContain('### a.txt (truncated)'); + expect(feedback).toContain('### b.txt'); + expect(feedback).toContain('first'); + expect(feedback).toContain('second'); + }); + + test('returns empty string when there is neither history nor artifacts', () => { + expect(_buildIterationFeedback([], [], [])).toBe(''); + }); + + test('returns artifacts-only feedback when no problematic iterations exist', () => { + const feedback = _buildIterationFeedback( + [{ iteration: 1, exitCode: 0, filesChanged: ['x'], completionDetected: true, taskDetected: false }], + [], + [{ path: 'HANDOFF.md', content: 'note', truncated: false }], + ); + + expect(feedback).toContain('Prior-iteration blocker artifacts'); + expect(feedback).not.toContain('Use these signals'); + }); }); describe('iteration outcome helpers', () => { @@ -1797,6 +1954,87 @@ describe('run() with mocked invoker', () => { } }); + test('injects prior BLOCKED_HANDOFF and stale HANDOFF.md on resume', async () => { + const ralphDir = path.join(tmpDir, '.ralph-handoff-feedback'); + fs.mkdirSync(ralphDir, { recursive: true }); + history.append(ralphDir, { + iteration: 7, + duration: 123, + completionDetected: false, + taskDetected: false, + blockedHandoffDetected: true, + taskNumber: '3.1', + taskDescription: 'Align contracts', + toolUsage: [], + filesChanged: [], + exitCode: 0, + signal: '', + failureStage: '', + completedTasks: [], + }); + const handoffPath = path.join(ralphDir, 'HANDOFF.md'); + fs.writeFileSync(handoffPath, '## Blocker Note\nRepo-wide contract debt blocks task 3.1.'); + const stale = (Date.now() - 30 * 60 * 1000) / 1000; + fs.utimesSync(handoffPath, stale, stale); + + const prompts = []; + const restore = mockInvoker(invoker, async (opts) => { + prompts.push(opts.prompt); + return { + stdout: 'COMPLETE', + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + await run(makeOptions({ ralphDir, maxIterations: 1, noCommit: true })); + expect(prompts[0]).toContain('agent emitted BLOCKED_HANDOFF and requested operator handoff'); + expect(prompts[0]).not.toContain('no loop promise emitted'); + expect(prompts[0]).toContain('Prior-iteration blocker artifacts'); + expect(prompts[0]).toContain('Repo-wide contract debt blocks task 3.1'); + } finally { + restore(); + } + }); + + test('injects stale HANDOFF.md when older state records blocked_handoff without newer history metadata', async () => { + const ralphDir = path.join(tmpDir, '.ralph-legacy-handoff-feedback'); + fs.mkdirSync(ralphDir, { recursive: true }); + state.init(ralphDir, { + active: false, + iteration: 1, + maxIterations: 2, + tasksMode: false, + tasksFile: null, + exitReason: 'blocked_handoff', + }); + const handoffPath = path.join(ralphDir, 'HANDOFF.md'); + fs.writeFileSync(handoffPath, '## Blocker Note\nLegacy handoff still matters.'); + const stale = (Date.now() - 30 * 60 * 1000) / 1000; + fs.utimesSync(handoffPath, stale, stale); + + const prompts = []; + const restore = mockInvoker(invoker, async (opts) => { + prompts.push(opts.prompt); + return { + stdout: 'COMPLETE', + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + await run(makeOptions({ ralphDir, maxIterations: 2, noCommit: true })); + expect(prompts[0]).toContain('Prior-iteration blocker artifacts'); + expect(prompts[0]).toContain('Legacy handoff still matters'); + } finally { + restore(); + } + }); + test('records protected-artifact auto-commit anomalies in history', async () => { const ralphDir = path.join(tmpDir, '.ralph'); const tasksFile = path.join(tmpDir, 'openspec', 'changes', 'demo', 'tasks.md'); @@ -3341,3 +3579,320 @@ describe('_autoCommit loud stderr block (task 5.1)', () => { } }); }); + +// --------------------------------------------------------------------------- +// BLOCKED_HANDOFF: _extractBlockerNote, _writeHandoff, run() integration +// --------------------------------------------------------------------------- + +describe('_extractBlockerNote()', () => { + test('extracts the structured note from "## Blocker Note" sentinel up to the promise tag', () => { + const out = [ + 'some leading agent narration', + '', + '## Blocker Note', + 'Carve-out drift detected outside the change scope.', + '', + '## Why', + 'Six baseline files have changed; baseline refresh requires human review.', + '', + '## Suggested Next Step', + '- Review the diff and decide between revert / refresh / scope expansion.', + '', + 'BLOCKED_HANDOFF', + 'trailing junk that should be ignored', + ].join('\n'); + + const note = _extractBlockerNote(out, 'BLOCKED_HANDOFF'); + expect(note).toContain('## Blocker Note'); + expect(note).toContain('Carve-out drift detected'); + expect(note).toContain('## Why'); + expect(note).toContain('## Suggested Next Step'); + expect(note).not.toContain('trailing junk'); + expect(note).not.toContain('BLOCKED_HANDOFF'); + }); + + test('falls back to the last 40 non-blank lines when no sentinel header is present', () => { + const lines = []; + for (let i = 1; i <= 60; i++) lines.push(`line ${i}`); + lines.push('BLOCKED_HANDOFF'); + const note = _extractBlockerNote(lines.join('\n'), 'BLOCKED_HANDOFF'); + + expect(note).toContain('line 21'); + expect(note).toContain('line 60'); + expect(note).not.toContain('line 1'); + expect(note).not.toContain('line 20'); + }); + + test('returns empty string when the promise tag is absent', () => { + expect(_extractBlockerNote('nothing of interest here', 'BLOCKED_HANDOFF')).toBe(''); + }); + + test('returns empty string for empty inputs', () => { + expect(_extractBlockerNote('', 'BLOCKED_HANDOFF')).toBe(''); + expect(_extractBlockerNote('text', '')).toBe(''); + }); +}); + +describe('_writeHandoff()', () => { + const fs = require('fs'); + const path = require('path'); + + test('creates HANDOFF.md with iteration and task metadata', () => { + const ralphDir = path.join(tmpDir, '.ralph-handoff'); + const result = _writeHandoff(ralphDir, { + iteration: 5, + task: '4.cross-route Verify shared-chrome', + note: '## Blocker Note\nDetail here.', + }); + + expect(result).toBe(path.join(ralphDir, 'HANDOFF.md')); + const body = fs.readFileSync(result, 'utf8'); + expect(body).toContain('# Ralph Handoff Log'); + expect(body).toContain('## Iteration 5 —'); + expect(body).toContain('**Task:** 4.cross-route Verify shared-chrome'); + expect(body).toContain('## Blocker Note'); + expect(body).toContain('Detail here.'); + expect(body).toContain('Operator next step:'); + }); + + test('appends a second section instead of overwriting on a repeat call', () => { + const ralphDir = path.join(tmpDir, '.ralph-handoff-append'); + _writeHandoff(ralphDir, { iteration: 1, task: 'A', note: 'first' }); + _writeHandoff(ralphDir, { iteration: 2, task: 'B', note: 'second' }); + + const body = fs.readFileSync(path.join(ralphDir, 'HANDOFF.md'), 'utf8'); + expect(body).toContain('## Iteration 1'); + expect(body).toContain('## Iteration 2'); + expect(body).toContain('first'); + expect(body).toContain('second'); + // Ensure header appears once (the ascending-iteration order is by write time) + expect(body.match(/# Ralph Handoff Log/g) || []).toHaveLength(1); + }); + + test('substitutes a placeholder when the agent emits BLOCKED_HANDOFF without a note', () => { + const ralphDir = path.join(tmpDir, '.ralph-handoff-empty'); + _writeHandoff(ralphDir, { iteration: 1, task: 'X', note: '' }); + + const body = fs.readFileSync(path.join(ralphDir, 'HANDOFF.md'), 'utf8'); + expect(body).toContain('agent emitted BLOCKED_HANDOFF without a structured blocker note'); + }); +}); + +describe('_detectBlockerArtifacts()', () => { + const fs = require('fs'); + const path = require('path'); + + test('returns an empty array when ralphDir does not exist', () => { + expect(_detectBlockerArtifacts(path.join(tmpDir, 'no-such-dir'))).toEqual([]); + }); + + test('detects HANDOFF.md / BLOCKED.md inside ralphDir', () => { + const ralphDir = path.join(tmpDir, '.ralph'); + fs.mkdirSync(ralphDir, { recursive: true }); + fs.writeFileSync(path.join(ralphDir, 'HANDOFF.md'), 'handoff body'); + fs.writeFileSync(path.join(ralphDir, 'BLOCKED.md'), 'blocked body'); + + const artifacts = _detectBlockerArtifacts(ralphDir); + const names = artifacts.map(a => a.path).sort(); + expect(names.some(n => n.endsWith('HANDOFF.md'))).toBe(true); + expect(names.some(n => n.endsWith('BLOCKED.md'))).toBe(true); + }); + + test('detects baseline report files under /.ralph/baselines//', () => { + const repoRoot = path.join(tmpDir, 'repo'); + const changeName = 'sample-change'; + const ralphDir = path.join(repoRoot, 'openspec', 'changes', changeName, '.ralph'); + const baselineDir = path.join(repoRoot, '.ralph', 'baselines', changeName); + + fs.mkdirSync(ralphDir, { recursive: true }); + fs.mkdirSync(baselineDir, { recursive: true }); + fs.writeFileSync( + path.join(baselineDir, 'shared-chrome-invariant-report.txt'), + 'STATUS=BLOCKED\nREASON=...' + ); + + const artifacts = _detectBlockerArtifacts(ralphDir, { repoRoot }); + expect(artifacts).toHaveLength(1); + expect(artifacts[0].path).toContain('shared-chrome-invariant-report.txt'); + expect(artifacts[0].content).toContain('STATUS=BLOCKED'); + }); + + test('caps at maxArtifacts and prefers freshest by mtime', () => { + const ralphDir = path.join(tmpDir, '.ralph-cap'); + fs.mkdirSync(ralphDir, { recursive: true }); + const olderPath = path.join(ralphDir, 'old-report.md'); + const newerPath = path.join(ralphDir, 'new-report.md'); + fs.writeFileSync(olderPath, 'old'); + fs.writeFileSync(newerPath, 'new'); + // Backdate the older file so the freshness sort is deterministic. + const past = (Date.now() - 60_000) / 1000; + fs.utimesSync(olderPath, past, past); + + const artifacts = _detectBlockerArtifacts(ralphDir, { maxArtifacts: 1 }); + expect(artifacts).toHaveLength(1); + expect(artifacts[0].path).toContain('new-report.md'); + }); + + test('truncates content to maxCharsEach and marks truncated=true', () => { + const ralphDir = path.join(tmpDir, '.ralph-trunc'); + fs.mkdirSync(ralphDir, { recursive: true }); + fs.writeFileSync(path.join(ralphDir, 'HANDOFF.md'), 'x'.repeat(2000)); + + const artifacts = _detectBlockerArtifacts(ralphDir, { maxCharsEach: 100 }); + expect(artifacts).toHaveLength(1); + expect(artifacts[0].truncated).toBe(true); + expect(artifacts[0].content.length).toBeLessThanOrEqual(100); + }); + + test('skips artifacts older than the 10-minute freshness window', () => { + const ralphDir = path.join(tmpDir, '.ralph-stale'); + fs.mkdirSync(ralphDir, { recursive: true }); + const stalePath = path.join(ralphDir, 'HANDOFF.md'); + fs.writeFileSync(stalePath, 'stale body'); + // Backdate to 30 minutes ago. + const stale = (Date.now() - 30 * 60 * 1000) / 1000; + fs.utimesSync(stalePath, stale, stale); + + expect(_detectBlockerArtifacts(ralphDir)).toEqual([]); + }); + + test('keeps stale HANDOFF.md when prior history says the run stopped for handoff', () => { + const ralphDir = path.join(tmpDir, '.ralph-stale-handoff-allowed'); + fs.mkdirSync(ralphDir, { recursive: true }); + const stalePath = path.join(ralphDir, 'HANDOFF.md'); + fs.writeFileSync(stalePath, 'stale handoff body'); + const stale = (Date.now() - 30 * 60 * 1000) / 1000; + fs.utimesSync(stalePath, stale, stale); + + const artifacts = _detectBlockerArtifacts(ralphDir, { + includeStaleHandoff: true, + }); + + expect(artifacts).toHaveLength(1); + expect(artifacts[0].path).toContain('HANDOFF.md'); + expect(artifacts[0].content).toContain('stale handoff body'); + }); +}); + +describe('run() — BLOCKED_HANDOFF integration', () => { + const fs = require('fs'); + const path = require('path'); + const invoker = require('../../../lib/mini-ralph/invoker'); + + function mockInvoker(mockFn) { + const original = invoker.invoke; + invoker.invoke = mockFn; + return () => { invoker.invoke = original; }; + } + + test('exits with reason=blocked_handoff and writes HANDOFF.md when the promise tag is detected', async () => { + const ralphDir = path.join(tmpDir, '.ralph-handoff-int'); + const restore = mockInvoker(async () => ({ + stdout: [ + '## Blocker Note', + 'Cannot continue: external review required.', + '', + '## Why', + 'Carve-out drift outside change scope.', + '', + '## Suggested Next Step', + '- Operator must decide between revert and baseline refresh.', + '', + 'BLOCKED_HANDOFF', + ].join('\n'), + exitCode: 0, + filesChanged: [], + toolUsage: [], + })); + + try { + const result = await run({ + ralphDir, + promptText: 'Do the thing.', + maxIterations: 5, + minIterations: 1, + }); + + expect(result.completed).toBe(false); + expect(result.iterations).toBe(1); + expect(result.exitReason).toBe('blocked_handoff'); + + const persistedState = state.read(ralphDir); + expect(persistedState.exitReason).toBe('blocked_handoff'); + expect(persistedState.active).toBe(false); + + const handoffPath = path.join(ralphDir, 'HANDOFF.md'); + expect(fs.existsSync(handoffPath)).toBe(true); + const handoffBody = fs.readFileSync(handoffPath, 'utf8'); + expect(handoffBody).toContain('# Ralph Handoff Log'); + expect(handoffBody).toContain('Cannot continue: external review required.'); + expect(handoffBody).toContain('Carve-out drift outside change scope.'); + + const recent = history.recent(ralphDir, 1); + expect(recent[0].blockedHandoffDetected).toBe(true); + expect(recent[0].blockedHandoffNote).toContain('Cannot continue: external review required.'); + expect(recent[0].blockedHandoffNote).toContain('Carve-out drift outside change scope.'); + } finally { + restore(); + } + }); + + test('exits immediately on BLOCKED_HANDOFF even when minIterations has not been reached', async () => { + const ralphDir = path.join(tmpDir, '.ralph-handoff-min'); + const restore = mockInvoker(async () => ({ + stdout: '## Blocker Note\nblocked\nBLOCKED_HANDOFF', + exitCode: 0, + filesChanged: [], + toolUsage: [], + })); + + try { + const result = await run({ + ralphDir, + promptText: 'Do the thing.', + maxIterations: 10, + minIterations: 5, + }); + + expect(result.iterations).toBe(1); + expect(result.exitReason).toBe('blocked_handoff'); + } finally { + restore(); + } + }); + + test('does NOT count a BLOCKED_HANDOFF iteration toward the stall streak', async () => { + let n = 0; + const ralphDir = path.join(tmpDir, '.ralph-handoff-stall'); + const restore = mockInvoker(async () => { + n++; + // Iter 1 + 2 are stalls (no promise, no files), iter 3 is a BLOCKED_HANDOFF. + // With stallThreshold=3 we'd otherwise halt at iter 3 with reason=stalled; + // BLOCKED_HANDOFF must take precedence. + if (n >= 3) { + return { + stdout: '## Blocker Note\nstop\nBLOCKED_HANDOFF', + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + } + return { stdout: 'no progress', exitCode: 0, filesChanged: [], toolUsage: [] }; + }); + + try { + const result = await run({ + ralphDir, + promptText: 'Do the thing.', + maxIterations: 10, + minIterations: 1, + stallThreshold: 3, + }); + + expect(result.iterations).toBe(3); + expect(result.exitReason).toBe('blocked_handoff'); + } finally { + restore(); + } + }); +});