From e5b829e14c5f13cfdcaad57e5dfc315f976f77e3 Mon Sep 17 00:00:00 2001 From: Nixon Cheaz <6854716+ncheaz@users.noreply.github.com> Date: Tue, 5 May 2026 19:15:38 -0400 Subject: [PATCH] Release 3.3.3: auto-resolve safe handoffs Enable bounded handoff auto-resolution by default so obvious verifier-scope blockers can continue without operator edits, while preserving explicit disable flags and retry budgets. --- lib/mini-ralph/runner.js | 258 ++++++++++++++++- package-lock.json | 4 +- package.json | 2 +- scripts/mini-ralph-cli.js | 28 +- scripts/ralph-run.sh | 18 ++ tests/helpers/test-functions.sh | 6 + tests/unit/bash/test-execute-ralph-loop.bats | 74 +++++ .../javascript/mini-ralph-cli-flags.test.js | 44 +++ .../unit/javascript/mini-ralph-runner.test.js | 270 +++++++++++++++++- 9 files changed, 698 insertions(+), 6 deletions(-) diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js index db181e9..d7a1ba9 100644 --- a/lib/mini-ralph/runner.js +++ b/lib/mini-ralph/runner.js @@ -50,6 +50,10 @@ const DEFAULTS = { // toward the streak because their signal is already surfaced via the // `Recent Loop Signals` feedback block. stallThreshold: 3, + // Opt-in continuation after a BLOCKED_HANDOFF only when the handoff note has + // explicit evidence for a safe, bounded resolution class. + autoResolveHandoffs: true, + autoResolveHandoffMaxPerRun: 6, }; /** @@ -80,6 +84,170 @@ function _iterationIsStalled(iterationSignals) { return true; } +function _resolveAutoResolveHandoffConfig(options, existingState) { + const enabled = options.autoResolveHandoffs === true; + const maxPerRun = + Number.isInteger(options.autoResolveHandoffMaxPerRun) && + options.autoResolveHandoffMaxPerRun > 0 + ? options.autoResolveHandoffMaxPerRun + : DEFAULTS.autoResolveHandoffMaxPerRun; + const previous = + existingState && + existingState.autoResolveHandoffs && + typeof existingState.autoResolveHandoffs === 'object' + ? existingState.autoResolveHandoffs + : {}; + const previousAttempts = + previous.attempts && typeof previous.attempts === 'object' + ? previous.attempts + : {}; + const previousTotal = Number.isInteger(previous.totalAttempts) + ? previous.totalAttempts + : 0; + + return { + enabled, + maxPerRun, + state: { + enabled, + maxPerRun, + totalAttempts: previousTotal, + attempts: Object.assign({}, previousAttempts), + lastDecision: previous.lastDecision || null, + }, + }; +} + +function _handoffHasFocusedVerifierEvidence(note) { + if (!note) return false; + const text = String(note); + const mentionsFocusedVerifier = + /\bfocused\b[\s\S]{0,500}\b(verifier|command|test|spec|vitest)\b/i.test(text) || + /\b(verifier|command|test|spec|vitest)\b[\s\S]{0,500}\bfocused\b/i.test(text); + const saysFocusedPasses = + /\b(passes?|passed|exits?\s+0|exit(?:ed)?\s+0|green)\b/i.test(text); + const saysBroadFails = + /\b(broad|full|required|suite|repo-wide)\b[\s\S]{0,500}\b(fails?|failed|red|non[-\s]?zero)\b/i.test(text) || + /\b(fails?|failed|red|non[-\s]?zero)\b[\s\S]{0,500}\b(broad|full|required|suite|repo-wide)\b/i.test(text); + const saysFailuresAreUnrelated = + /\b(unrelated|pre[-\s]?existing|out[-\s]?of[-\s]?scope|known failures?|not introduced|baseline)\b/i.test(text); + + return mentionsFocusedVerifier && saysFocusedPasses && saysBroadFails && saysFailuresAreUnrelated; +} + +function _classifyAutoResolvableHandoff(blockerNote, baselineGateConflict) { + if (_handoffHasFocusedVerifierEvidence(blockerNote)) { + return { + className: 'verifier_narrowing', + summary: 'focused verifier passes while the broad verifier fails on unrelated/pre-existing failures', + allowedFiles: [], + }; + } + + if ( + baselineGateConflict && + baselineGateConflict.mode === 'authorized_cleanup' && + baselineGateConflict.budgetUsed !== true && + Array.isArray(baselineGateConflict.allowedFiles) && + baselineGateConflict.allowedFiles.length > 0 + ) { + return { + className: 'authorized_cleanup', + summary: 'task text explicitly authorizes one cleanup attempt for named files', + allowedFiles: baselineGateConflict.allowedFiles.slice(), + }; + } + + return null; +} + +function _autoResolveHandoffBudgetKey(currentTaskMeta, className) { + const taskId = + currentTaskMeta && currentTaskMeta.number + ? currentTaskMeta.number + : currentTaskMeta && currentTaskMeta.description + ? currentTaskMeta.description + : 'unknown-task'; + return `${taskId}:${className || 'unknown'}`; +} + +function _decideAutoResolveHandoff(config, blockerNote, currentTaskMeta, baselineGateConflict) { + const disabledDecision = { allowed: false, reason: 'disabled', className: '', budgetKey: '' }; + if (!config || config.enabled !== true) return disabledDecision; + + const classification = _classifyAutoResolvableHandoff(blockerNote, baselineGateConflict); + if (!classification) { + return { + allowed: false, + reason: 'ambiguous_or_unsupported_handoff', + className: '', + budgetKey: '', + }; + } + + const budgetKey = _autoResolveHandoffBudgetKey(currentTaskMeta, classification.className); + const totalAttempts = Number.isInteger(config.state && config.state.totalAttempts) + ? config.state.totalAttempts + : 0; + const maxPerRun = Number.isInteger(config.maxPerRun) + ? config.maxPerRun + : DEFAULTS.autoResolveHandoffMaxPerRun; + const attempts = config.state && config.state.attempts ? config.state.attempts : {}; + + if (totalAttempts >= maxPerRun) { + return Object.assign({}, classification, { + allowed: false, + reason: 'global_budget_exhausted', + budgetKey, + }); + } + + if (attempts[budgetKey]) { + return Object.assign({}, classification, { + allowed: false, + reason: 'task_class_budget_exhausted', + budgetKey, + }); + } + + return Object.assign({}, classification, { + allowed: true, + reason: 'authorized', + budgetKey, + }); +} + +function _consumeAutoResolveHandoffBudget(config, decision, iteration) { + if (!config || !config.state || !decision || decision.allowed !== true || !decision.budgetKey) { + return null; + } + + const attempts = Object.assign({}, config.state.attempts || {}); + attempts[decision.budgetKey] = { + className: decision.className, + iteration, + attemptedAt: new Date().toISOString(), + }; + + const totalAttempts = (Number.isInteger(config.state.totalAttempts) + ? config.state.totalAttempts + : 0) + 1; + + config.state = Object.assign({}, config.state, { + totalAttempts, + attempts, + lastDecision: { + className: decision.className, + reason: decision.reason, + budgetKey: decision.budgetKey, + iteration, + allowedFiles: decision.allowedFiles || [], + }, + }); + + return config.state; +} + function _isFailedIteration(result) { if (!result || typeof result !== 'object') return false; if (result.signal !== null && result.signal !== undefined && result.signal !== '') { @@ -462,6 +630,7 @@ async function run(opts) { const resumeIteration = _resolveStartIteration(existingState, options); const priorRunWasBlockedHandoff = existingState && existingState.exitReason === 'blocked_handoff'; + const autoResolveHandoffs = _resolveAutoResolveHandoffConfig(options, existingState); if (options.verbose && resumeIteration > 1) { process.stderr.write( @@ -512,6 +681,7 @@ async function run(opts) { stoppedAt: null, exitReason: null, pendingDirtyPaths, + autoResolveHandoffs: autoResolveHandoffs.state, }); stateInitialized = true; @@ -597,6 +767,7 @@ async function run(opts) { fullHistory, ); const baselineGateFeedback = _formatBaselineGateFeedback(baselineGateConflict); + const autoResolveHandoffFeedback = _buildAutoResolveHandoffFeedback(recentHistory); // Inject any pending context const pendingContext = context.consume(ralphDir); @@ -612,6 +783,10 @@ async function run(opts) { promptSections.push(`## Recent Loop Signals\n\n${iterationFeedback}`); } + if (autoResolveHandoffFeedback) { + promptSections.push(`## Auto-Resolve Handoff\n\n${autoResolveHandoffFeedback}`); + } + if (lessonsSection) { promptSections.push(lessonsSection); } @@ -705,6 +880,24 @@ async function run(opts) { const blockerNote = hasBlockedHandoff ? _extractBlockerNote(outputText, blockedHandoffPromise) : ''; + const autoResolveHandoffDecision = hasBlockedHandoff + ? _decideAutoResolveHandoff( + autoResolveHandoffs, + blockerNote, + currentTaskMeta, + baselineGateConflict, + ) + : null; + if (autoResolveHandoffDecision && autoResolveHandoffDecision.allowed) { + const nextAutoResolveState = _consumeAutoResolveHandoffBudget( + autoResolveHandoffs, + autoResolveHandoffDecision, + iterationCount, + ); + if (nextAutoResolveState) { + state.update(ralphDir, { autoResolveHandoffs: nextAutoResolveState }); + } + } const tasksAfter = options.tasksMode && options.tasksFile ? tasks.parseTasks(options.tasksFile) : []; @@ -787,6 +980,15 @@ async function run(opts) { signal: result.signal || '', failureStage: result.failureStage || '', completedTasks: completedTasks.map((task) => task.fullDescription || task.description), + ...(autoResolveHandoffDecision + ? { + autoResolveHandoffAttempted: autoResolveHandoffDecision.allowed === true, + autoResolveHandoffClass: autoResolveHandoffDecision.className || '', + autoResolveHandoffReason: autoResolveHandoffDecision.reason || '', + autoResolveHandoffBudgetKey: autoResolveHandoffDecision.budgetKey || '', + autoResolveHandoffAllowedFiles: autoResolveHandoffDecision.allowedFiles || [], + } + : {}), commitAttempted: commitResult.attempted, commitCreated: commitResult.committed, commitAnomaly: commitResult.anomaly ? commitResult.anomaly.message : '', @@ -894,9 +1096,21 @@ async function run(opts) { reporter.note( handoffPath ? `agent emitted ${blockedHandoffPromise}; blocker note saved to ${handoffPath}.` - : `agent emitted ${blockedHandoffPromise}; halting (HANDOFF.md write failed; see stderr).`, + : `agent emitted ${blockedHandoffPromise}; HANDOFF.md write failed (see stderr).`, 'warn' ); + if (autoResolveHandoffDecision && autoResolveHandoffDecision.allowed) { + reporter.note( + `auto-resolve handoffs: continuing once for ${autoResolveHandoffDecision.className} (${autoResolveHandoffDecision.budgetKey}).`, + 'warn' + ); + if (options.verbose) { + process.stderr.write( + `[mini-ralph] auto-resolve handoff consumed budget key ${autoResolveHandoffDecision.budgetKey}; continuing.\n` + ); + } + continue; + } if (options.verbose) { process.stderr.write( `[mini-ralph] ${blockedHandoffPromise} detected at iteration ${iterationCount}; halting.\n` @@ -1780,6 +1994,42 @@ function _buildIterationFeedback(recentHistory, errorEntries, blockerArtifacts) return sections.join('\n'); } +function _buildAutoResolveHandoffFeedback(recentHistory) { + if (!Array.isArray(recentHistory) || recentHistory.length === 0) return ''; + + const entry = recentHistory + .slice() + .reverse() + .find((item) => item && item.autoResolveHandoffAttempted === true); + + if (!entry) return ''; + + const className = entry.autoResolveHandoffClass || 'unknown'; + const lines = [ + `The previous iteration emitted BLOCKED_HANDOFF, but auto-resolution is enabled and spent its bounded attempt for ${className}.`, + 'You have exactly one continuation attempt for this task/blocker class. Do not broaden task scope, do not repair unrelated snapshots or UI behavior, and do not keep retrying if the evidence does not hold.', + ]; + + if (className === 'verifier_narrowing') { + lines.push( + 'Allowed action: if the handoff explicitly names a focused verifier that passes and a broad verifier that fails only on unrelated/pre-existing failures, update only the current task verifier from the broad command to that focused command, run the focused command once, and complete the task only if it passes. If the focused command is absent, ambiguous, or fails, emit BLOCKED_HANDOFF instead of retrying.' + ); + } else if (className === 'authorized_cleanup') { + const files = Array.isArray(entry.autoResolveHandoffAllowedFiles) + ? entry.autoResolveHandoffAllowedFiles.filter(Boolean) + : []; + lines.push( + `Allowed action: make one cleanup attempt only in the task-authorized file list${files.length > 0 ? ` (${files.join(', ')})` : ''}. If the gate still fails, emit BLOCKED_HANDOFF instead of continuing.` + ); + } else { + lines.push( + 'Allowed action: continue only if the blocker evidence remains explicit and within the runner-approved safe class; otherwise emit BLOCKED_HANDOFF.' + ); + } + + return lines.join('\n'); +} + function _buildBaselineGateFeedback(ralphDir, tasksFile, currentTaskMeta, recentHistory) { return _formatBaselineGateFeedback( _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory) @@ -2392,6 +2642,12 @@ module.exports = { _formatAutoCommitMessage, _truncateSubjectSummary, _buildIterationFeedback, + _buildAutoResolveHandoffFeedback, + _resolveAutoResolveHandoffConfig, + _handoffHasFocusedVerifierEvidence, + _classifyAutoResolvableHandoff, + _decideAutoResolveHandoff, + _consumeAutoResolveHandoffBudget, _buildBaselineGateFeedback, _analyzeBaselineGateConflict, _formatBaselineGateFeedback, diff --git a/package-lock.json b/package-lock.json index cefe514..4d290c9 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "spec-and-loop", - "version": "3.3.2", + "version": "3.3.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "spec-and-loop", - "version": "3.3.2", + "version": "3.3.3", "hasInstallScript": true, "license": "GPL-3.0", "os": [ diff --git a/package.json b/package.json index 715d87d..784a193 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "spec-and-loop", - "version": "3.3.2", + "version": "3.3.3", "description": "OpenSpec + Ralph Loop integration for iterative development with opencode", "main": "index.js", "bin": { diff --git a/scripts/mini-ralph-cli.js b/scripts/mini-ralph-cli.js index 81df001..51f29d1 100755 --- a/scripts/mini-ralph-cli.js +++ b/scripts/mini-ralph-cli.js @@ -27,6 +27,10 @@ * Loop exits cleanly with `blocked_handoff` when the * agent emits this tag and writes the agent's note * to /HANDOFF.md. + * --auto-resolve-handoffs Enable bounded continuation attempts for + * explicit, safe BLOCKED_HANDOFF classes + * --no-auto-resolve-handoffs + * Disable auto-resolution even when enabled by env * --no-commit Suppress auto-commit * --model Optional model override * --verbose Verbose output @@ -44,6 +48,11 @@ const miniRalph = require('../lib/mini-ralph/index'); // Argument parsing // --------------------------------------------------------------------------- +function _envFlagDefaultEnabled(value) { + if (value === undefined) return true; + return !/^(0|false|no|off)$/i.test(String(value || '').trim()); +} + function parseArgs(argv) { const args = argv.slice(2); const opts = { @@ -59,6 +68,7 @@ function parseArgs(argv) { completionPromise: 'COMPLETE', taskPromise: 'READY_FOR_NEXT_TASK', blockedHandoffPromise: 'BLOCKED_HANDOFF', + autoResolveHandoffs: _envFlagDefaultEnabled(process.env.RALPH_AUTO_RESOLVE_HANDOFFS), noCommit: false, model: '', verbose: false, @@ -110,6 +120,12 @@ function parseArgs(argv) { case '--blocked-handoff-promise': opts.blockedHandoffPromise = args[++i]; break; + case '--auto-resolve-handoffs': + opts.autoResolveHandoffs = true; + break; + case '--no-auto-resolve-handoffs': + opts.autoResolveHandoffs = false; + break; case '--no-commit': opts.noCommit = true; break; @@ -165,6 +181,8 @@ Options: --task-promise Task promise string --blocked-handoff-promise Blocked-handoff promise string (default: BLOCKED_HANDOFF) + --auto-resolve-handoffs Enable bounded continuation for explicit safe handoffs + --no-auto-resolve-handoffs Disable bounded continuation for explicit safe handoffs --no-commit Suppress auto-commit --model Model override --verbose Verbose output @@ -224,6 +242,7 @@ async function main() { completionPromise: opts.completionPromise, taskPromise: opts.taskPromise, blockedHandoffPromise: opts.blockedHandoffPromise, + autoResolveHandoffs: opts.autoResolveHandoffs, noCommit: opts.noCommit, model: opts.model, verbose: opts.verbose, @@ -251,4 +270,11 @@ async function main() { } } -main(); +if (require.main === module) { + main(); +} + +module.exports = { + _envFlagDefaultEnabled, + _parseArgs: parseArgs, +}; diff --git a/scripts/ralph-run.sh b/scripts/ralph-run.sh index a50283a..2c91074 100755 --- a/scripts/ralph-run.sh +++ b/scripts/ralph-run.sh @@ -129,6 +129,7 @@ resolve_ralph_command() { CHANGE_NAME="" MAX_ITERATIONS="" NO_COMMIT=false +AUTO_RESOLVE_HANDOFFS="" SHOW_STATUS=false SHOW_VERSION=false ADD_CONTEXT="" @@ -186,6 +187,9 @@ OPTIONS: --change Specify the OpenSpec change to execute (default: auto-detect) --max-iterations Maximum iterations for Ralph loop (default: 50) --no-commit Suppress automatic git commits during the loop + --auto-resolve-handoffs Enable bounded continuation for explicit safe handoffs + --no-auto-resolve-handoffs + Disable bounded continuation for explicit safe handoffs --verbose, -v Enable verbose mode for debugging --quiet Suppress the per-iteration progress stream --version Print the version and exit @@ -232,6 +236,14 @@ parse_arguments() { NO_COMMIT=true shift ;; + --auto-resolve-handoffs) + AUTO_RESOLVE_HANDOFFS=true + shift + ;; + --no-auto-resolve-handoffs) + AUTO_RESOLVE_HANDOFFS=false + shift + ;; --verbose|-v) VERBOSE=true shift @@ -1006,6 +1018,12 @@ Do not create git commits yourself. The Ralph runner manages automatic task comm mini_ralph_args+=("--no-commit") fi + if [[ "$AUTO_RESOLVE_HANDOFFS" == true ]]; then + mini_ralph_args+=("--auto-resolve-handoffs") + elif [[ "$AUTO_RESOLVE_HANDOFFS" == false ]]; then + mini_ralph_args+=("--no-auto-resolve-handoffs") + fi + if [[ "$VERBOSE" == true ]]; then mini_ralph_args+=("--verbose") fi diff --git a/tests/helpers/test-functions.sh b/tests/helpers/test-functions.sh index 8501a42..47332f2 100644 --- a/tests/helpers/test-functions.sh +++ b/tests/helpers/test-functions.sh @@ -862,6 +862,12 @@ Do not create git commits yourself. The Ralph runner manages automatic task comm mini_ralph_args+=("--no-commit") fi + if [[ "$AUTO_RESOLVE_HANDOFFS" == true ]]; then + mini_ralph_args+=("--auto-resolve-handoffs") + elif [[ "$AUTO_RESOLVE_HANDOFFS" == false ]]; then + mini_ralph_args+=("--no-auto-resolve-handoffs") + fi + if [[ "$VERBOSE" == true ]]; then mini_ralph_args+=("--verbose") fi diff --git a/tests/unit/bash/test-execute-ralph-loop.bats b/tests/unit/bash/test-execute-ralph-loop.bats index f968488..aee0392 100644 --- a/tests/unit/bash/test-execute-ralph-loop.bats +++ b/tests/unit/bash/test-execute-ralph-loop.bats @@ -443,6 +443,80 @@ FAKECLI [ "$status" -ne 0 ] } +@test "execute_ralph_loop: passes --auto-resolve-handoffs when enabled" { + local test_dir + test_dir=$(setup_test_dir) + local change_dir + change_dir=$(_make_change_dir "$test_dir") + local ralph_dir="$test_dir/.ralph" + mkdir -p "$ralph_dir" + + local args_file="$test_dir/cli-args.txt" + cat > "$test_dir/fake-cli.js" << FAKECLI +#!/usr/bin/env node +const fs = require('fs'); +fs.writeFileSync('$args_file', process.argv.slice(2).join('\n') + '\n'); +process.exit(0); +FAKECLI + chmod +x "$test_dir/fake-cli.js" + MINI_RALPH_CLI="$test_dir/fake-cli.js" + AUTO_RESOLVE_HANDOFFS=true + + execute_ralph_loop "$change_dir" "$ralph_dir" 1 + + run grep -q -- "--auto-resolve-handoffs" "$args_file" + [ "$status" -eq 0 ] +} + +@test "execute_ralph_loop: does NOT pass --auto-resolve-handoffs by default" { + local test_dir + test_dir=$(setup_test_dir) + local change_dir + change_dir=$(_make_change_dir "$test_dir") + local ralph_dir="$test_dir/.ralph" + mkdir -p "$ralph_dir" + + local args_file="$test_dir/cli-args.txt" + cat > "$test_dir/fake-cli.js" << FAKECLI +#!/usr/bin/env node +const fs = require('fs'); +fs.writeFileSync('$args_file', process.argv.slice(2).join('\n') + '\n'); +process.exit(0); +FAKECLI + chmod +x "$test_dir/fake-cli.js" + MINI_RALPH_CLI="$test_dir/fake-cli.js" + + execute_ralph_loop "$change_dir" "$ralph_dir" 1 + + run grep -q -- "--auto-resolve-handoffs" "$args_file" + [ "$status" -ne 0 ] +} + +@test "execute_ralph_loop: passes --no-auto-resolve-handoffs when disabled" { + local test_dir + test_dir=$(setup_test_dir) + local change_dir + change_dir=$(_make_change_dir "$test_dir") + local ralph_dir="$test_dir/.ralph" + mkdir -p "$ralph_dir" + + local args_file="$test_dir/cli-args.txt" + cat > "$test_dir/fake-cli.js" << FAKECLI +#!/usr/bin/env node +const fs = require('fs'); +fs.writeFileSync('$args_file', process.argv.slice(2).join('\n') + '\n'); +process.exit(0); +FAKECLI + chmod +x "$test_dir/fake-cli.js" + MINI_RALPH_CLI="$test_dir/fake-cli.js" + AUTO_RESOLVE_HANDOFFS=false + + execute_ralph_loop "$change_dir" "$ralph_dir" 1 + + run grep -q -- "--no-auto-resolve-handoffs" "$args_file" + [ "$status" -eq 0 ] +} + # --------------------------------------------------------------------------- # execute_ralph_loop: missing internal runtime # --------------------------------------------------------------------------- diff --git a/tests/unit/javascript/mini-ralph-cli-flags.test.js b/tests/unit/javascript/mini-ralph-cli-flags.test.js index 78bcf20..2a088c5 100644 --- a/tests/unit/javascript/mini-ralph-cli-flags.test.js +++ b/tests/unit/javascript/mini-ralph-cli-flags.test.js @@ -14,6 +14,7 @@ const path = require('path'); const { spawnSync } = require('child_process'); const CLI = path.join(__dirname, '../../../scripts/mini-ralph-cli.js'); +const { _parseArgs } = require('../../../scripts/mini-ralph-cli.js'); describe('mini-ralph-cli flag parsing', () => { test('--help advertises --blocked-handoff-promise', () => { @@ -21,6 +22,8 @@ describe('mini-ralph-cli flag parsing', () => { expect(result.status).toBe(0); expect(result.stdout).toContain('--blocked-handoff-promise'); + expect(result.stdout).toContain('--auto-resolve-handoffs'); + expect(result.stdout).toContain('--no-auto-resolve-handoffs'); expect(result.stdout).toContain('BLOCKED_HANDOFF'); }); @@ -45,6 +48,47 @@ describe('mini-ralph-cli flag parsing', () => { expect(result.status).toBe(0); }); + test('RALPH_AUTO_RESOLVE_HANDOFFS enables by default and disables only for explicit false values', () => { + const original = process.env.RALPH_AUTO_RESOLVE_HANDOFFS; + try { + delete process.env.RALPH_AUTO_RESOLVE_HANDOFFS; + expect(_parseArgs(['node', CLI]).autoResolveHandoffs).toBe(true); + + for (const value of ['1', 'true', 'TRUE', 'yes', 'on', '']) { + process.env.RALPH_AUTO_RESOLVE_HANDOFFS = value; + expect(_parseArgs(['node', CLI]).autoResolveHandoffs).toBe(true); + } + + for (const value of ['0', 'false', 'FALSE', 'no', 'off']) { + process.env.RALPH_AUTO_RESOLVE_HANDOFFS = value; + expect(_parseArgs(['node', CLI]).autoResolveHandoffs).toBe(false); + } + } finally { + if (original === undefined) { + delete process.env.RALPH_AUTO_RESOLVE_HANDOFFS; + } else { + process.env.RALPH_AUTO_RESOLVE_HANDOFFS = original; + } + } + }); + + test('CLI auto-resolve flags override the env default', () => { + const original = process.env.RALPH_AUTO_RESOLVE_HANDOFFS; + try { + process.env.RALPH_AUTO_RESOLVE_HANDOFFS = '0'; + expect(_parseArgs(['node', CLI, '--auto-resolve-handoffs']).autoResolveHandoffs).toBe(true); + + process.env.RALPH_AUTO_RESOLVE_HANDOFFS = '1'; + expect(_parseArgs(['node', CLI, '--no-auto-resolve-handoffs']).autoResolveHandoffs).toBe(false); + } finally { + if (original === undefined) { + delete process.env.RALPH_AUTO_RESOLVE_HANDOFFS; + } else { + process.env.RALPH_AUTO_RESOLVE_HANDOFFS = original; + } + } + }); + test('rejects an actually-unknown flag with exit 1', () => { const result = spawnSync( 'node', diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js index 31d8380..9ded5cc 100644 --- a/tests/unit/javascript/mini-ralph-runner.test.js +++ b/tests/unit/javascript/mini-ralph-runner.test.js @@ -21,6 +21,9 @@ const { _formatAutoCommitMessage, _truncateSubjectSummary, _buildIterationFeedback, + _buildAutoResolveHandoffFeedback, + _handoffHasFocusedVerifierEvidence, + _decideAutoResolveHandoff, _buildBaselineGateFeedback, _analyzeBaselineGateConflict, _formatBaselineGateFeedback, @@ -1589,6 +1592,74 @@ describe('_buildIterationFeedback() - fingerprint dedup', () => { }); }); +describe('auto-resolve handoff helpers', () => { + test('recognizes explicit focused-verifier evidence', () => { + const note = [ + '## Blocker Note', + 'The focused verifier `pnpm test -t "one scenario"` passes.', + 'The required broad verifier fails on unrelated pre-existing browser failures.', + ].join('\n'); + + expect(_handoffHasFocusedVerifierEvidence(note)).toBe(true); + }); + + test('rejects ambiguous handoffs without focused pass evidence', () => { + const note = [ + '## Blocker Note', + 'The broad suite fails, probably due to unrelated tests.', + 'Please advise.', + ].join('\n'); + + expect(_handoffHasFocusedVerifierEvidence(note)).toBe(false); + }); + + test('decides disabled, authorized, and budget-exhausted cases', () => { + const note = [ + 'Focused verifier passes with exit 0.', + 'The broad verifier failed on unrelated pre-existing failures.', + ].join('\n'); + const task = { number: '4.1', description: 'Emit not-found telemetry' }; + + expect(_decideAutoResolveHandoff({ enabled: false }, note, task, null)).toMatchObject({ + allowed: false, + reason: 'disabled', + }); + + expect(_decideAutoResolveHandoff({ + enabled: true, + maxPerRun: 6, + state: { totalAttempts: 0, attempts: {} }, + }, note, task, null)).toMatchObject({ + allowed: true, + className: 'verifier_narrowing', + budgetKey: '4.1:verifier_narrowing', + }); + + expect(_decideAutoResolveHandoff({ + enabled: true, + maxPerRun: 1, + state: { totalAttempts: 1, attempts: {} }, + }, note, task, null)).toMatchObject({ + allowed: false, + reason: 'global_budget_exhausted', + }); + }); + + test('builds continuation guidance for verifier narrowing', () => { + const feedback = _buildAutoResolveHandoffFeedback([ + { + iteration: 2, + autoResolveHandoffAttempted: true, + autoResolveHandoffClass: 'verifier_narrowing', + }, + ]); + + expect(feedback).toContain('exactly one continuation attempt'); + expect(feedback).toContain('update only the current task verifier'); + expect(feedback).toContain('emit BLOCKED_HANDOFF'); + }); +}); + // --------------------------------------------------------------------------- // _buildIterationFeedback() - paths_ignored_filtered / all_paths_ignored dedup bypass // --------------------------------------------------------------------------- @@ -2456,6 +2527,197 @@ describe('run() with mocked invoker', () => { } }); + test('auto-resolve handoffs can be disabled explicitly', async () => { + const ralphDir = path.join(tmpDir, '.ralph-auto-disabled'); + const tasksFile = path.join(tmpDir, 'tasks.md'); + fs.writeFileSync(tasksFile, '- [ ] 4.1 Emit not-found telemetry\n', 'utf8'); + let callCount = 0; + + const restore = mockInvoker(invoker, async () => { + callCount++; + return { + stdout: [ + '## Blocker Note', + 'The focused verifier `pnpm test -t "not-found"` passes.', + 'The broad verifier fails on unrelated pre-existing tests.', + 'BLOCKED_HANDOFF', + ].join('\n'), + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + const result = await run(makeOptions({ + ralphDir, + tasksMode: true, + tasksFile, + maxIterations: 2, + autoResolveHandoffs: false, + })); + + expect(callCount).toBe(1); + expect(result.exitReason).toBe('blocked_handoff'); + expect(history.recent(ralphDir, 1)[0]).toMatchObject({ + autoResolveHandoffAttempted: false, + autoResolveHandoffReason: 'disabled', + }); + } finally { + restore(); + } + }); + + test('auto-resolve continues by default for explicit focused verifier narrowing', async () => { + const ralphDir = path.join(tmpDir, '.ralph-auto-verifier'); + const tasksFile = path.join(tmpDir, 'tasks.md'); + fs.writeFileSync(tasksFile, '- [ ] 4.1 Emit not-found telemetry\n', 'utf8'); + const prompts = []; + let callCount = 0; + + const restore = mockInvoker(invoker, async (opts) => { + callCount++; + prompts.push(opts.prompt); + if (callCount === 1) { + return { + stdout: [ + '## Blocker Note', + 'The focused verifier `pnpm test -t "not-found"` passes.', + 'The required broad verifier fails on unrelated pre-existing route snapshots.', + 'BLOCKED_HANDOFF', + ].join('\n'), + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + } + + return { + stdout: 'COMPLETE', + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + const result = await run(makeOptions({ + ralphDir, + tasksMode: true, + tasksFile, + maxIterations: 2, + })); + const entries = history.recent(ralphDir, 2); + + expect(result.completed).toBe(true); + expect(result.iterations).toBe(2); + expect(prompts[1]).toContain('## Auto-Resolve Handoff'); + expect(prompts[1]).toContain('update only the current task verifier'); + expect(entries[0]).toMatchObject({ + autoResolveHandoffAttempted: true, + autoResolveHandoffClass: 'verifier_narrowing', + autoResolveHandoffBudgetKey: '4.1:verifier_narrowing', + }); + expect(state.read(ralphDir).autoResolveHandoffs).toMatchObject({ + totalAttempts: 1, + }); + } finally { + restore(); + } + }); + + test('auto-resolve keeps ambiguous handoffs blocked', async () => { + const ralphDir = path.join(tmpDir, '.ralph-auto-ambiguous'); + const tasksFile = path.join(tmpDir, 'tasks.md'); + fs.writeFileSync(tasksFile, '- [ ] 4.1 Emit not-found telemetry\n', 'utf8'); + let callCount = 0; + + const restore = mockInvoker(invoker, async () => { + callCount++; + return { + stdout: [ + '## Blocker Note', + 'The broad verifier fails. Please advise.', + 'BLOCKED_HANDOFF', + ].join('\n'), + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + const result = await run(makeOptions({ + ralphDir, + tasksMode: true, + tasksFile, + maxIterations: 2, + autoResolveHandoffs: true, + })); + + expect(callCount).toBe(1); + expect(result.exitReason).toBe('blocked_handoff'); + expect(history.recent(ralphDir, 1)[0]).toMatchObject({ + autoResolveHandoffAttempted: false, + autoResolveHandoffReason: 'ambiguous_or_unsupported_handoff', + }); + } finally { + restore(); + } + }); + + test('auto-resolve budget exhaustion stops instead of retrying', async () => { + const ralphDir = path.join(tmpDir, '.ralph-auto-budget'); + const tasksFile = path.join(tmpDir, 'tasks.md'); + fs.writeFileSync(tasksFile, '- [ ] 4.1 Emit not-found telemetry\n', 'utf8'); + fs.mkdirSync(ralphDir, { recursive: true }); + state.init(ralphDir, { + active: false, + iteration: 1, + autoResolveHandoffs: { + enabled: true, + maxPerRun: 6, + totalAttempts: 6, + attempts: {}, + }, + }); + let callCount = 0; + + const restore = mockInvoker(invoker, async () => { + callCount++; + return { + stdout: [ + '## Blocker Note', + 'The focused verifier `pnpm test -t "not-found"` passes.', + 'The broad verifier fails on unrelated pre-existing tests.', + 'BLOCKED_HANDOFF', + ].join('\n'), + exitCode: 0, + filesChanged: [], + toolUsage: [], + }; + }); + + try { + const result = await run(makeOptions({ + ralphDir, + tasksMode: true, + tasksFile, + maxIterations: 2, + autoResolveHandoffs: true, + })); + + expect(callCount).toBe(1); + expect(result.exitReason).toBe('blocked_handoff'); + expect(history.recent(ralphDir, 1)[0]).toMatchObject({ + autoResolveHandoffAttempted: false, + autoResolveHandoffReason: 'global_budget_exhausted', + }); + } finally { + restore(); + } + }); + test('injects baseline gate conflict feedback into task prompts', async () => { const ralphDir = path.join(tmpDir, '.ralph-baseline-conflict'); const tasksFile = path.join(tmpDir, 'tasks.md'); @@ -2578,7 +2840,13 @@ describe('run() with mocked invoker', () => { const invokeSpy = jest.spyOn(invoker, 'invoke'); try { - const result = await run(makeOptions({ ralphDir, tasksMode: true, tasksFile, maxIterations: 8 })); + const result = await run(makeOptions({ + ralphDir, + tasksMode: true, + tasksFile, + maxIterations: 8, + autoResolveHandoffs: true, + })); const persistedState = state.read(ralphDir); expect(result.exitReason).toBe('pending_dirty_paths');