From 7f39206e47fd1fd71ca04420d91998f50f27708e Mon Sep 17 00:00:00 2001
From: Nixon Cheaz <6854716+ncheaz@users.noreply.github.com>
Date: Mon, 4 May 2026 10:59:48 -0400
Subject: [PATCH 1/3] Release 3.3.1: preserve blocked handoff dirty paths
Track pending dirty paths across blocked Ralph handoffs so resumed runs either commit the prior task's work or halt before moving on unsafely.
---
lib/mini-ralph/runner.js | 208 +++++++++++++++++-
lib/mini-ralph/status.js | 35 +++
package-lock.json | 4 +-
package.json | 2 +-
.../unit/javascript/mini-ralph-runner.test.js | 185 ++++++++++++++++
.../unit/javascript/mini-ralph-status.test.js | 26 +++
6 files changed, 456 insertions(+), 4 deletions(-)
diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js
index a0b5d0f..782b571 100644
--- a/lib/mini-ralph/runner.js
+++ b/lib/mini-ralph/runner.js
@@ -488,6 +488,9 @@ async function run(opts) {
resumeIteration > 1 && existingState && existingState.startedAt
? existingState.startedAt
: nowIso;
+ let pendingDirtyPaths = _normalizePendingDirtyPaths(
+ existingState && existingState.pendingDirtyPaths
+ );
state.init(ralphDir, {
active: true,
@@ -508,6 +511,7 @@ async function run(opts) {
completedAt: null,
stoppedAt: null,
exitReason: null,
+ pendingDirtyPaths,
});
stateInitialized = true;
@@ -532,6 +536,20 @@ async function run(opts) {
: [];
const currentTask = _getCurrentTaskDescription(tasksBefore);
const currentTaskMeta = _getCurrentTaskMeta(tasksBefore);
+ pendingDirtyPaths = _refreshPendingDirtyPaths(pendingDirtyPaths);
+ state.update(ralphDir, { pendingDirtyPaths });
+
+ if (
+ pendingDirtyPaths &&
+ !_samePendingTask(pendingDirtyPaths, currentTaskMeta, currentTask)
+ ) {
+ reporter.note(
+ _formatPendingDirtyPathsBlock(pendingDirtyPaths, currentTaskMeta, currentTask),
+ 'error'
+ );
+ exitReason = 'pending_dirty_paths';
+ break;
+ }
reporter.iterationStarted({
iteration: iterationCount,
@@ -701,13 +719,42 @@ async function run(opts) {
result.filesChanged.length > 0 &&
(hasCompletion || (options.tasksMode && hasTask))
) {
+ const filesToStage = _buildAutoCommitAllowlist(
+ _mergePathLists(result.filesChanged, pendingDirtyPaths ? pendingDirtyPaths.files : []),
+ completedTasks,
+ options.tasksFile
+ );
commitResult = _autoCommit(iterationCount, {
completedTasks,
- filesToStage: _buildAutoCommitAllowlist(result.filesChanged, completedTasks, options.tasksFile),
+ filesToStage,
tasksFile: options.tasksFile,
verbose: options.verbose,
reporter,
});
+ if (commitResult.committed && pendingDirtyPaths) {
+ pendingDirtyPaths = _remainingPendingDirtyPathsAfterCommit(
+ pendingDirtyPaths,
+ commitResult.anomaly
+ );
+ state.update(ralphDir, { pendingDirtyPaths });
+ }
+ }
+
+ if (
+ !commitResult.committed &&
+ Array.isArray(result.filesChanged) &&
+ result.filesChanged.length > 0 &&
+ (_isFailedIteration(result) || hasBlockedHandoff)
+ ) {
+ pendingDirtyPaths = _recordPendingDirtyPaths(pendingDirtyPaths, {
+ iteration: iterationCount,
+ reason: hasBlockedHandoff ? 'blocked_handoff' : 'failed_iteration',
+ task: currentTask,
+ taskNumber: currentTaskMeta.number,
+ taskDescription: currentTaskMeta.description,
+ files: result.filesChanged,
+ });
+ state.update(ralphDir, { pendingDirtyPaths });
}
// Record iteration in history after commit handling so operator-visible
@@ -915,6 +962,145 @@ function _containsPromise(text, promiseName) {
.some((line) => line.trim() === expectedTag);
}
+function _normalizePendingDirtyPaths(pending) {
+ if (!pending || typeof pending !== 'object') return null;
+ const files = _mergePathLists(pending.files || pending.paths || []);
+ if (files.length === 0) return null;
+
+ return {
+ iteration: typeof pending.iteration === 'number' ? pending.iteration : null,
+ reason: pending.reason || 'blocked_handoff',
+ task: pending.task || '',
+ taskNumber: pending.taskNumber || '',
+ taskDescription: pending.taskDescription || '',
+ files,
+ recordedAt: pending.recordedAt || new Date().toISOString(),
+ };
+}
+
+function _recordPendingDirtyPaths(existing, update) {
+ const normalized = _normalizePendingDirtyPaths({
+ iteration: update && typeof update.iteration === 'number' ? update.iteration : null,
+ reason: update && update.reason ? update.reason : 'blocked_handoff',
+ task: update && update.task ? update.task : '',
+ taskNumber: update && update.taskNumber ? update.taskNumber : '',
+ taskDescription: update && update.taskDescription ? update.taskDescription : '',
+ files: _mergePathLists(
+ existing && existing.files ? existing.files : [],
+ update && update.files ? update.files : []
+ ),
+ recordedAt: update && update.recordedAt ? update.recordedAt : new Date().toISOString(),
+ });
+
+ return normalized;
+}
+
+function _remainingPendingDirtyPathsAfterCommit(pending, anomaly) {
+ const normalized = _normalizePendingDirtyPaths(pending);
+ if (!normalized) return null;
+
+ const ignoredPaths = anomaly && Array.isArray(anomaly.ignoredPaths)
+ ? anomaly.ignoredPaths.map(_repoRelativePath).filter(Boolean)
+ : [];
+ if (ignoredPaths.length === 0) return null;
+
+ const ignoredSet = new Set(ignoredPaths);
+ const files = normalized.files.filter((file) => ignoredSet.has(file));
+ if (files.length === 0) return null;
+ return Object.assign({}, normalized, { files });
+}
+
+function _refreshPendingDirtyPaths(pending) {
+ const normalized = _normalizePendingDirtyPaths(pending);
+ if (!normalized) return null;
+
+ const dirtyPaths = _currentDirtyPathSet();
+ if (!dirtyPaths) return normalized;
+ const files = normalized.files.filter((file) => dirtyPaths.has(file));
+ if (files.length === 0) return null;
+
+ return Object.assign({}, normalized, { files });
+}
+
+function _samePendingTask(pending, currentTaskMeta, currentTask) {
+ if (!pending) return true;
+ const currentNumber = currentTaskMeta && currentTaskMeta.number ? currentTaskMeta.number : '';
+ const currentDescription = currentTaskMeta && currentTaskMeta.description ? currentTaskMeta.description : '';
+ const currentFull = currentTask || '';
+
+ if (pending.taskNumber && currentNumber) {
+ return pending.taskNumber === currentNumber;
+ }
+
+ if (pending.taskDescription && currentDescription) {
+ return pending.taskDescription === currentDescription;
+ }
+
+ return Boolean(pending.task && currentFull && pending.task === currentFull);
+}
+
+function _formatPendingDirtyPathsBlock(pending, currentTaskMeta, currentTask) {
+ const currentStamp = currentTaskMeta && currentTaskMeta.number
+ ? `${currentTaskMeta.number} ${currentTaskMeta.description || ''}`.trim()
+ : (currentTask || 'the current task');
+ const pendingStamp = pending.taskNumber
+ ? `${pending.taskNumber} ${pending.taskDescription || ''}`.trim()
+ : (pending.task || 'a prior blocked handoff');
+ const files = (pending.files || []).slice(0, 8);
+ const extra = (pending.files || []).length - files.length;
+ const fileLines = files.map((file) => ` - ${file}`).join('\n');
+ const suffix = extra > 0 ? `\n - (+${extra} more)` : '';
+
+ return [
+ `pending dirty paths from ${pending.reason || 'blocked_handoff'} iteration ${pending.iteration || 'unknown'} remain unresolved.`,
+ `Prior task: ${pendingStamp}`,
+ `Current task: ${currentStamp}`,
+ 'Resolve the prior patch before Ralph can safely continue: commit it with the same task, revert it, or move it to a separate change.',
+ 'Pending paths:',
+ `${fileLines}${suffix}`,
+ ].join('\n');
+}
+
+function _currentDirtyPathSet() {
+ try {
+ const output = childProcess.execFileSync('git', ['status', '--porcelain'], {
+ encoding: 'utf8',
+ stdio: ['pipe', 'pipe', 'pipe'],
+ });
+ const paths = new Set();
+ for (const line of output.split('\n')) {
+ for (const file of _parseGitStatusPaths(line)) {
+ if (file) paths.add(file);
+ }
+ }
+ return paths;
+ } catch (_) {
+ return null;
+ }
+}
+
+function _parseGitStatusPaths(line) {
+ if (!line || typeof line !== 'string') return [];
+ const rawPath = line.slice(3).trim();
+ if (!rawPath) return [];
+ if (rawPath.includes(' -> ')) {
+ return rawPath.split(' -> ').map(_stripGitStatusQuotes).filter(Boolean);
+ }
+ return [_stripGitStatusQuotes(rawPath)].filter(Boolean);
+}
+
+function _stripGitStatusQuotes(value) {
+ if (!value) return '';
+ const trimmed = value.trim();
+ if (!(trimmed.startsWith('"') && trimmed.endsWith('"'))) {
+ return trimmed;
+ }
+ return trimmed
+ .slice(1, -1)
+ .replace(/\\"/g, '"')
+ .replace(/\\\\/g, '\\');
+}
+
/**
* Validate required options and throw descriptive errors.
*
@@ -1163,6 +1349,19 @@ function _filterGitignored(paths, cwd) {
}
}
+function _mergePathLists(...lists) {
+ const merged = new Set();
+ for (const list of lists) {
+ for (const file of list || []) {
+ const relativeFile = _repoRelativePath(file);
+ if (relativeFile) {
+ merged.add(relativeFile);
+ }
+ }
+ }
+ return Array.from(merged);
+}
+
/**
* Build the explicit per-iteration git staging allowlist.
*
@@ -1769,6 +1968,13 @@ module.exports = {
_validateOptions,
_autoCommit,
_buildAutoCommitAllowlist,
+ _mergePathLists,
+ _normalizePendingDirtyPaths,
+ _recordPendingDirtyPaths,
+ _remainingPendingDirtyPathsAfterCommit,
+ _refreshPendingDirtyPaths,
+ _samePendingTask,
+ _currentDirtyPathSet,
_filterGitignored,
_resolveStartIteration,
_completedTaskDelta,
diff --git a/lib/mini-ralph/status.js b/lib/mini-ralph/status.js
index cf81984..579a896 100644
--- a/lib/mini-ralph/status.js
+++ b/lib/mini-ralph/status.js
@@ -60,6 +60,31 @@ function render(ralphDir, tasksFile) {
lines.push(`Exit reason: ${loopState.exitReason}`);
}
+ const pendingDirtyPaths = _pendingDirtyPaths(loopState);
+ if (pendingDirtyPaths) {
+ lines.push('');
+ lines.push('--- Pending Dirty Paths ---');
+ lines.push(` Reason: ${pendingDirtyPaths.reason || 'blocked_handoff'}`);
+ if (pendingDirtyPaths.iteration) {
+ lines.push(` From iteration: ${pendingDirtyPaths.iteration}`);
+ }
+ const task = pendingDirtyPaths.taskNumber
+ ? `${pendingDirtyPaths.taskNumber} ${pendingDirtyPaths.taskDescription || ''}`.trim()
+ : (pendingDirtyPaths.task || '');
+ if (task) {
+ lines.push(` Prior task: ${task}`);
+ }
+ const files = pendingDirtyPaths.files.slice(0, 10);
+ for (const file of files) {
+ lines.push(` - ${file}`);
+ }
+ if (pendingDirtyPaths.files.length > files.length) {
+ lines.push(` - (+${pendingDirtyPaths.files.length - files.length} more)`);
+ }
+ lines.push(' Resolve before continuing: commit with the same task, revert, or move to a separate change.');
+ lines.push('-'.repeat(50));
+ }
+
const latestCommitAnomaly = _latestCommitAnomaly(history.recent(ralphDir, 20));
if (latestCommitAnomaly) {
lines.push(`Commit issue: ${latestCommitAnomaly.commitAnomaly}`);
@@ -186,6 +211,16 @@ function _promptSummary(loopState) {
return '';
}
+function _pendingDirtyPaths(loopState) {
+ const pending = loopState && loopState.pendingDirtyPaths;
+ if (!pending || typeof pending !== 'object') return null;
+ const files = Array.isArray(pending.files)
+ ? pending.files.filter((file) => typeof file === 'string' && file.trim())
+ : [];
+ if (files.length === 0) return null;
+ return Object.assign({}, pending, { files });
+}
+
/**
* Try to find a tasks file path from loop state.
*
diff --git a/package-lock.json b/package-lock.json
index 95c3544..49fbb16 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "spec-and-loop",
- "version": "3.3.0",
+ "version": "3.3.1",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "spec-and-loop",
- "version": "3.3.0",
+ "version": "3.3.1",
"hasInstallScript": true,
"license": "GPL-3.0",
"os": [
diff --git a/package.json b/package.json
index 977df8c..864712d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "spec-and-loop",
- "version": "3.3.0",
+ "version": "3.3.1",
"description": "OpenSpec + Ralph Loop integration for iterative development with opencode",
"main": "index.js",
"bin": {
diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js
index 0db5d77..9a68bc8 100644
--- a/tests/unit/javascript/mini-ralph-runner.test.js
+++ b/tests/unit/javascript/mini-ralph-runner.test.js
@@ -27,6 +27,10 @@ const {
_gitErrorMessage,
_isFailedIteration,
_wasSuccessfulIteration,
+ _normalizePendingDirtyPaths,
+ _recordPendingDirtyPaths,
+ _remainingPendingDirtyPathsAfterCommit,
+ _samePendingTask,
_failureFingerprint,
_firstNonEmptyLine,
_filterGitignored,
@@ -509,6 +513,55 @@ describe('_buildAutoCommitAllowlist()', () => {
});
});
+describe('pending dirty path helpers', () => {
+ test('normalizes and merges pending blocked-handoff paths', () => {
+ const pending = _recordPendingDirtyPaths(null, {
+ iteration: 7,
+ reason: 'blocked_handoff',
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ task: '3.1 Align contracts',
+ files: ['src/a.js', 'src/a.js', path.join(process.cwd(), 'src/b.js')],
+ recordedAt: '2026-05-01T00:00:00.000Z',
+ });
+
+ expect(pending).toMatchObject({
+ iteration: 7,
+ reason: 'blocked_handoff',
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ files: ['src/a.js', 'src/b.js'],
+ });
+ expect(_normalizePendingDirtyPaths({ files: [] })).toBeNull();
+ });
+
+ test('matches pending patches by task number first', () => {
+ const pending = {
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ task: '3.1 Align contracts',
+ files: ['src/a.js'],
+ };
+
+ expect(_samePendingTask(pending, { number: '3.1', description: 'Renamed task' }, '3.1 Renamed task')).toBe(true);
+ expect(_samePendingTask(pending, { number: '4.1', description: 'Next task' }, '4.1 Next task')).toBe(false);
+ });
+
+ test('keeps only ignored pending paths after a partial commit', () => {
+ const pending = {
+ taskNumber: '3.1',
+ files: ['src/committed.js', 'ignored/output.png'],
+ };
+
+ expect(
+ _remainingPendingDirtyPathsAfterCommit(pending, {
+ ignoredPaths: ['ignored/output.png'],
+ })
+ ).toMatchObject({ files: ['ignored/output.png'] });
+ expect(_remainingPendingDirtyPathsAfterCommit(pending, null)).toBeNull();
+ });
+});
+
describe('_detectProtectedCommitArtifacts()', () => {
test('detects protected proposal, design, and spec artifacts for the active change', () => {
const tasksFile = path.join(process.cwd(), 'openspec/changes/demo/tasks.md');
@@ -2035,6 +2088,138 @@ describe('run() with mocked invoker', () => {
}
});
+ test('persists dirty paths from a blocked handoff for operator recovery', async () => {
+ const ralphDir = path.join(tmpDir, '.ralph-pending-blocked');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, '- [ ] 3.1 Align contracts\n', 'utf8');
+
+ const restore = mockInvoker(invoker, async () => ({
+ stdout: [
+ '## Blocker Note',
+ 'Contract suite is red outside the current task.',
+ 'BLOCKED_HANDOFF',
+ ].join('\n'),
+ exitCode: 0,
+ filesChanged: ['src/data/page-contracts/release.ts'],
+ toolUsage: [],
+ }));
+
+ try {
+ const result = await run(makeOptions({ ralphDir, tasksMode: true, tasksFile, maxIterations: 1 }));
+ const persistedState = state.read(ralphDir);
+
+ expect(result.exitReason).toBe('blocked_handoff');
+ expect(persistedState.pendingDirtyPaths).toMatchObject({
+ iteration: 1,
+ reason: 'blocked_handoff',
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ files: ['src/data/page-contracts/release.ts'],
+ });
+ } finally {
+ restore();
+ }
+ });
+
+ test('commits pending dirty paths when the same blocked task later completes', async () => {
+ const ralphDir = path.join(tmpDir, '.ralph-pending-same-task');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, '- [ ] 3.1 Align contracts\n', 'utf8');
+ state.init(ralphDir, {
+ active: false,
+ iteration: 7,
+ tasksMode: true,
+ tasksFile,
+ exitReason: 'blocked_handoff',
+ pendingDirtyPaths: {
+ iteration: 7,
+ reason: 'blocked_handoff',
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ task: '3.1 Align contracts',
+ files: ['src/data/page-contracts/release.ts'],
+ recordedAt: '2026-05-01T00:00:00.000Z',
+ },
+ });
+
+ const cwdSpy = jest.spyOn(process, 'cwd').mockReturnValue(tmpDir);
+ const execSpy = jest.spyOn(require('child_process'), 'execFileSync').mockImplementation((command, args) => {
+ if (command === 'git' && args[0] === 'status') return ' M src/data/page-contracts/release.ts\n M tasks.md\n';
+ if (command === 'git' && args[0] === 'check-ignore') return '';
+ if (command === 'git' && args[0] === 'add') return '';
+ if (command === 'git' && args[0] === 'diff') return 'tasks.md\nsrc/data/page-contracts/release.ts\n';
+ if (command === 'git' && args[0] === 'commit') return '';
+ return '';
+ });
+ const restore = mockInvoker(invoker, async () => {
+ fs.writeFileSync(tasksFile, '- [x] 3.1 Align contracts\n', 'utf8');
+ return {
+ stdout: 'READY_FOR_NEXT_TASK',
+ exitCode: 0,
+ filesChanged: [tasksFile],
+ toolUsage: [],
+ };
+ });
+
+ try {
+ await run(makeOptions({ ralphDir, tasksMode: true, tasksFile, maxIterations: 8 }));
+
+ expect(execSpy).toHaveBeenCalledWith(
+ 'git',
+ ['add', '-A', '--', 'tasks.md', 'src/data/page-contracts/release.ts'],
+ expect.any(Object)
+ );
+ expect(state.read(ralphDir).pendingDirtyPaths).toBeNull();
+ } finally {
+ restore();
+ execSpy.mockRestore();
+ cwdSpy.mockRestore();
+ }
+ });
+
+ test('halts before another task when pending blocked-handoff paths are still dirty', async () => {
+ const ralphDir = path.join(tmpDir, '.ralph-pending-different-task');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, '- [ ] 4.1 Reconcile visuals\n', 'utf8');
+ state.init(ralphDir, {
+ active: false,
+ iteration: 7,
+ tasksMode: true,
+ tasksFile,
+ exitReason: 'blocked_handoff',
+ pendingDirtyPaths: {
+ iteration: 7,
+ reason: 'blocked_handoff',
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ task: '3.1 Align contracts',
+ files: ['src/data/page-contracts/release.ts'],
+ recordedAt: '2026-05-01T00:00:00.000Z',
+ },
+ });
+
+ const execSpy = jest.spyOn(require('child_process'), 'execFileSync').mockImplementation((command, args) => {
+ if (command === 'git' && args[0] === 'status') return ' M src/data/page-contracts/release.ts\n';
+ return '';
+ });
+ const invokeSpy = jest.spyOn(invoker, 'invoke');
+
+ try {
+ const result = await run(makeOptions({ ralphDir, tasksMode: true, tasksFile, maxIterations: 8 }));
+ const persistedState = state.read(ralphDir);
+
+ expect(result.exitReason).toBe('pending_dirty_paths');
+ expect(invokeSpy).not.toHaveBeenCalled();
+ expect(persistedState.pendingDirtyPaths).toMatchObject({
+ taskNumber: '3.1',
+ files: ['src/data/page-contracts/release.ts'],
+ });
+ } finally {
+ execSpy.mockRestore();
+ invokeSpy.mockRestore();
+ }
+ });
+
test('records protected-artifact auto-commit anomalies in history', async () => {
const ralphDir = path.join(tmpDir, '.ralph');
const tasksFile = path.join(tmpDir, 'openspec', 'changes', 'demo', 'tasks.md');
diff --git a/tests/unit/javascript/mini-ralph-status.test.js b/tests/unit/javascript/mini-ralph-status.test.js
index 7120f09..1190bd4 100644
--- a/tests/unit/javascript/mini-ralph-status.test.js
+++ b/tests/unit/javascript/mini-ralph-status.test.js
@@ -438,6 +438,32 @@ describe('render()', () => {
expect(output).not.toContain('Completed:');
});
+ test('shows pending dirty paths from a blocked handoff', () => {
+ state.init(ralphDir, {
+ active: false,
+ iteration: 8,
+ maxIterations: 50,
+ startedAt: new Date().toISOString(),
+ stoppedAt: '2026-05-01T12:34:56.000Z',
+ exitReason: 'pending_dirty_paths',
+ pendingDirtyPaths: {
+ iteration: 7,
+ reason: 'blocked_handoff',
+ taskNumber: '3.1',
+ taskDescription: 'Align contracts',
+ files: ['src/data/page-contracts/release.ts', 'src/lib/search.ts'],
+ },
+ });
+
+ const output = render(ralphDir);
+ expect(output).toContain('--- Pending Dirty Paths ---');
+ expect(output).toContain('Reason: blocked_handoff');
+ expect(output).toContain('From iteration: 7');
+ expect(output).toContain('Prior task: 3.1 Align contracts');
+ expect(output).toContain('src/data/page-contracts/release.ts');
+ expect(output).toContain('Resolve before continuing');
+ });
+
test('prefers completed lifecycle over stale stopped metadata', () => {
state.init(ralphDir, {
active: false,
From 95cd913a5e103e958c6c4b4ddd97a8ae52f02c4b Mon Sep 17 00:00:00 2001
From: Nixon Cheaz <6854716+ncheaz@users.noreply.github.com>
Date: Tue, 5 May 2026 11:53:03 -0400
Subject: [PATCH 2/3] Release 3.3.2: add baseline gate repair budget
---
OPENSPEC-RALPH-BP.md | 6 +
lib/mini-ralph/runner.js | 357 ++++++++++++++++++
package-lock.json | 4 +-
package.json | 2 +-
.../unit/javascript/mini-ralph-runner.test.js | 287 ++++++++++++++
5 files changed, 653 insertions(+), 3 deletions(-)
diff --git a/OPENSPEC-RALPH-BP.md b/OPENSPEC-RALPH-BP.md
index 129595d..2176c45 100644
--- a/OPENSPEC-RALPH-BP.md
+++ b/OPENSPEC-RALPH-BP.md
@@ -58,6 +58,12 @@ Split test: if the loop stopped halfway, would the repo be clean and reviewable?
- "Pre-existing" requires a before-baseline. Without one, any failure could be a regression.
- First task in a chain that needs clean gates must be a pre-flight baseline that records gate output.
- Explicitly distinguish known-broken validators (document and continue) from required-clean validators (hard stop). If only one is named, the loop generalizes permissively.
+- If a pre-flight baseline records a failing gate, later tasks MUST NOT require only a strict clean result for that same gate unless the task is intentionally responsible for fixing that baseline failure. Use one of these explicit forms:
+ - Baseline classification: `` `` exits 0, or failures match the pre-flight baseline with no new failures in this task's scope ``
+ - Authorized cleanup: `` `` exits 0 after fixing the named baseline failures in `` and `` ``
+ - Hard blocker: `` `` exits 0; baseline failures are not allowed for this task ``
+- When strict clean-gate text conflicts with a failing pre-flight baseline and no classification/cleanup rule is written, `ralph-run` will warn the agent to stop with `BLOCKED_HANDOFF` instead of spending iterations on unauthorized cleanup.
+- Authorized cleanup is intentionally narrow: the named files must be backticked, the cleanup is limited to compiler/lint-only fixes, and `ralph-run` gives the agent one repair attempt for those files on that task. If the gate still fails after that attempt, the next prompt tells the agent to hand off instead of retrying.
Pre-flight template:
```markdown
diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js
index 782b571..26cfb04 100644
--- a/lib/mini-ralph/runner.js
+++ b/lib/mini-ralph/runner.js
@@ -560,6 +560,7 @@ async function run(opts) {
let result;
let promptSize = null;
let responseSize = { bytes: 0, chars: 0, tokens: 0 };
+ let baselineGateConflict = null;
try {
// Build the prompt for this iteration
@@ -576,6 +577,7 @@ async function run(opts) {
// iteration N" line, so the 3-entry window is sufficient to surface
// recurring patterns without bloating the prompt.
const recentHistory = history.recent(ralphDir, 3);
+ const fullHistory = history.read(ralphDir);
const errorEntries = errors.readEntries(ralphDir, 3);
const blockerArtifacts = _detectBlockerArtifacts(ralphDir, {
repoRoot: process.cwd(),
@@ -588,6 +590,13 @@ async function run(opts) {
errorEntries,
blockerArtifacts,
);
+ baselineGateConflict = _analyzeBaselineGateConflict(
+ ralphDir,
+ options.tasksFile,
+ currentTaskMeta,
+ fullHistory,
+ );
+ const baselineGateFeedback = _formatBaselineGateFeedback(baselineGateConflict);
// Inject any pending context
const pendingContext = context.consume(ralphDir);
@@ -595,6 +604,10 @@ async function run(opts) {
const lessonsSection = lessons.inject(ralphDir, { limit: 15 });
const promptSections = [renderedPrompt];
+ if (baselineGateFeedback) {
+ promptSections.push(`## Baseline Gate Conflict\n\n${baselineGateFeedback}`);
+ }
+
if (iterationFeedback) {
promptSections.push(`## Recent Loop Signals\n\n${iterationFeedback}`);
}
@@ -779,6 +792,16 @@ async function run(opts) {
commitAnomaly: commitResult.anomaly ? commitResult.anomaly.message : '',
commitAnomalyType: commitResult.anomaly ? commitResult.anomaly.type : '',
protectedArtifacts: commitResult.anomaly ? commitResult.anomaly.protectedArtifacts || [] : [],
+ ...(baselineGateConflict
+ ? {
+ baselineGateConflictMode: baselineGateConflict.mode,
+ baselineGateRepairAllowedFiles: baselineGateConflict.allowedFiles || [],
+ baselineGateRepairAttempted: _baselineGateRepairAttempted(
+ baselineGateConflict,
+ result.filesChanged || []
+ ),
+ }
+ : {}),
...(commitResult.anomaly && commitResult.anomaly.ignoredPaths && commitResult.anomaly.ignoredPaths.length > 0
? { ignoredPaths: commitResult.anomaly.ignoredPaths }
: {}),
@@ -1757,6 +1780,331 @@ function _buildIterationFeedback(recentHistory, errorEntries, blockerArtifacts)
return sections.join('\n');
}
+function _buildBaselineGateFeedback(ralphDir, tasksFile, currentTaskMeta, recentHistory) {
+ return _formatBaselineGateFeedback(
+ _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory)
+ );
+}
+
+function _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, recentHistory) {
+ if (!ralphDir || !tasksFile || !currentTaskMeta || !currentTaskMeta.description) {
+ return null;
+ }
+
+ const taskBlock = _extractCurrentTaskBlock(tasksFile, currentTaskMeta);
+ if (!taskBlock) return null;
+
+ const strictGates = _detectStrictCleanGates(taskBlock);
+ if (strictGates.length === 0) return null;
+
+ const failingBaselines = _detectFailingBaselineGates(ralphDir);
+ if (failingBaselines.length === 0) return null;
+
+ const baselineByGate = new Map(failingBaselines.map((gate) => [gate.name, gate]));
+ const conflicts = strictGates
+ .map((gate) => ({ gate, baseline: baselineByGate.get(gate.name) }))
+ .filter((item) => item.baseline);
+
+ if (conflicts.length === 0) return null;
+
+ const cleanup = _detectAuthorizedBaselineCleanup(taskBlock);
+ if (cleanup.allowedFiles.length > 0) {
+ return {
+ mode: 'authorized_cleanup',
+ conflicts,
+ allowedFiles: cleanup.allowedFiles,
+ budgetUsed: _baselineGateRepairBudgetUsed(recentHistory, currentTaskMeta, cleanup.allowedFiles),
+ };
+ }
+
+ if (_taskExplicitlyHandlesBaselineFailures(taskBlock)) {
+ return {
+ mode: 'baseline_classification',
+ conflicts,
+ allowedFiles: [],
+ budgetUsed: false,
+ };
+ }
+
+ return {
+ mode: 'missing_policy',
+ conflicts,
+ allowedFiles: [],
+ budgetUsed: false,
+ };
+}
+
+function _formatBaselineGateFeedback(conflict) {
+ if (!conflict || !Array.isArray(conflict.conflicts) || conflict.conflicts.length === 0) {
+ return '';
+ }
+
+ const conflictLines = conflict.conflicts.map(({ gate, baseline }) =>
+ `- ${gate.command}: baseline ${baseline.file} exits ${baseline.exitCode}.`
+ );
+
+ if (conflict.mode === 'authorized_cleanup') {
+ if (conflict.budgetUsed) {
+ return [
+ 'The current task explicitly authorized cleanup for baseline gate failures, but its one repair attempt has already been used.',
+ 'Do not keep iterating on cleanup or broaden the edit scope.',
+ 'If the gate is still failing, emit BLOCKED_HANDOFF with the remaining failing identifiers and ask for either a broader cleanup task or a task-spec change.',
+ '',
+ `Authorized cleanup files: ${conflict.allowedFiles.join(', ')}`,
+ ...conflictLines,
+ ].join('\n');
+ }
+
+ return [
+ 'The current task explicitly authorizes cleanup for baseline gate failures in named files.',
+ 'You have exactly one repair attempt for this task. Limit edits to compiler/lint-only fixes in the authorized files; do not change behavior or edit other files for this cleanup.',
+ 'If this attempt does not clear the gate, emit BLOCKED_HANDOFF instead of continuing to retry.',
+ '',
+ `Authorized cleanup files: ${conflict.allowedFiles.join(', ')}`,
+ ...conflictLines,
+ ].join('\n');
+ }
+
+ if (conflict.mode === 'baseline_classification') {
+ return [
+ 'The current task has strict quality-gate checks, and matching pre-flight baselines are already failing.',
+ 'The task text appears to authorize baseline classification, so do not repair unrelated baseline failures unless the task explicitly names those files.',
+ 'Complete the task only if the current run has no new failures beyond the named baseline failures.',
+ '',
+ ...conflictLines,
+ ].join('\n');
+ }
+
+ return [
+ 'The current task requires a clean gate that already has a failing pre-flight baseline, but the task text does not say whether baseline-matching failures may be classified.',
+ 'Do not spend iterations repairing unrelated files outside the current task scope.',
+ 'If the only remaining gate failures match the baseline, emit BLOCKED_HANDOFF with a task-spec correction request: either allow baseline classification for this gate, or explicitly authorize the named out-of-scope repair.',
+ '',
+ ...conflictLines,
+ ].join('\n');
+}
+
+function _extractCurrentTaskBlock(tasksFile, currentTaskMeta) {
+ const fs = require('fs');
+ if (!tasksFile || !fs.existsSync(tasksFile)) return '';
+
+ const lines = fs.readFileSync(tasksFile, 'utf8').split(/\r?\n/);
+ const taskHeader = /^-\s+\[[ x/]\]\s+(.+)$/;
+ const targetNumber = currentTaskMeta.number || '';
+ const targetDescription = (currentTaskMeta.description || '').trim();
+ let start = -1;
+
+ for (let i = 0; i < lines.length; i++) {
+ const match = lines[i].match(taskHeader);
+ if (!match) continue;
+
+ const fullDescription = match[1].trim();
+ const numMatch = fullDescription.match(/^(\d+\.\d+)\s+(.+)$/);
+ const number = numMatch ? numMatch[1] : '';
+ const description = (numMatch ? numMatch[2] : fullDescription).trim();
+
+ if (
+ (targetNumber && number === targetNumber) ||
+ (!targetNumber && description === targetDescription) ||
+ (targetNumber && description === targetDescription)
+ ) {
+ start = i;
+ break;
+ }
+ }
+
+ if (start === -1) return '';
+
+ let end = lines.length;
+ for (let i = start + 1; i < lines.length; i++) {
+ if (taskHeader.test(lines[i])) {
+ end = i;
+ break;
+ }
+ }
+
+ return lines.slice(start, end).join('\n');
+}
+
+function _detectStrictCleanGates(taskBlock) {
+ if (!taskBlock) return [];
+
+ const gates = [
+ {
+ name: 'typecheck',
+ command: 'pnpm typecheck',
+ pattern: /`?pnpm\s+typecheck`?[^\n]*(?:exits?|returns?)\s+0/i,
+ },
+ {
+ name: 'lint',
+ command: 'pnpm lint',
+ pattern: /`?pnpm\s+lint`?[^\n]*(?:exits?|returns?)\s+0/i,
+ },
+ {
+ name: 'test',
+ command: 'pnpm test',
+ pattern: /`?pnpm\s+test`?[^\n]*(?:exits?|returns?)\s+0/i,
+ },
+ ];
+
+ return gates.filter((gate) => gate.pattern.test(taskBlock));
+}
+
+function _detectFailingBaselineGates(ralphDir) {
+ const fs = require('fs');
+ const fsPath = require('path');
+ const baselinesDir = fsPath.join(ralphDir, 'baselines');
+ if (!fs.existsSync(baselinesDir) || !fs.statSync(baselinesDir).isDirectory()) {
+ return [];
+ }
+
+ const gates = [];
+ for (const name of fs.readdirSync(baselinesDir)) {
+ if (!/\.txt$/i.test(name)) continue;
+
+ const gateName = _gateNameFromBaselineFile(name);
+ if (!gateName) continue;
+
+ const file = fsPath.join(baselinesDir, name);
+ const tail = _readFileTail(file, 16384);
+ const exitMatch = tail.match(/(?:^|\n)EXIT=(\d+)(?:\n|$)/);
+ if (!exitMatch) continue;
+
+ const exitCode = Number(exitMatch[1]);
+ if (!Number.isInteger(exitCode) || exitCode === 0) continue;
+
+ gates.push({ name: gateName, file: fsPath.join('baselines', name), exitCode });
+ }
+
+ const priority = { typecheck: 1, lint: 2, test: 3 };
+ return gates.sort((a, b) =>
+ (priority[a.name] || 99) - (priority[b.name] || 99) ||
+ a.file.localeCompare(b.file)
+ );
+}
+
+function _gateNameFromBaselineFile(fileName) {
+ const normalized = fileName.toLowerCase();
+ if (/(^|[-_.])typecheck([-_.]|\.|$)/.test(normalized)) return 'typecheck';
+ if (/(^|[-_.])lint([-_.]|\.|$)/.test(normalized)) return 'lint';
+ if (/(^|[-_.])test([-_.]|\.|$)/.test(normalized)) return 'test';
+ return '';
+}
+
+function _readFileTail(file, maxBytes) {
+ const fs = require('fs');
+ let fd = null;
+ try {
+ const stat = fs.statSync(file);
+ const length = Math.min(stat.size, maxBytes);
+ const offset = Math.max(0, stat.size - length);
+ const buffer = Buffer.alloc(length);
+ fd = fs.openSync(file, 'r');
+ fs.readSync(fd, buffer, 0, length, offset);
+ return buffer.toString('utf8');
+ } catch {
+ return '';
+ } finally {
+ if (fd !== null) {
+ try {
+ fs.closeSync(fd);
+ } catch {
+ // Ignore close failures while building best-effort feedback.
+ }
+ }
+ }
+}
+
+function _taskExplicitlyHandlesBaselineFailures(taskBlock) {
+ return /\bbaseline\b/i.test(taskBlock) &&
+ /\b(match|matches|matching|classif(?:y|ied|ication)|pre-existing|preexisting|no new failures?)\b/i.test(taskBlock);
+}
+
+function _detectAuthorizedBaselineCleanup(taskBlock) {
+ if (!taskBlock || !/\b(authori[sz]ed cleanup|after fixing|fixing the named baseline failures?)\b/i.test(taskBlock)) {
+ return { allowedFiles: [] };
+ }
+
+ const allowedFiles = [];
+ const seen = new Set();
+ const backtickPattern = /`([^`]+)`/g;
+ let match;
+
+ while ((match = backtickPattern.exec(taskBlock)) !== null) {
+ const candidate = match[1].trim();
+ if (!_looksLikeCleanupPath(candidate)) continue;
+
+ const normalized = candidate.replace(/\\/g, '/');
+ if (seen.has(normalized)) continue;
+
+ seen.add(normalized);
+ allowedFiles.push(normalized);
+ }
+
+ return { allowedFiles };
+}
+
+function _looksLikeCleanupPath(value) {
+ if (!value || /\s/.test(value)) return false;
+ if (/^(pnpm|npm|yarn|node|gtimeout|timeout|rg|git)(\s|$)/i.test(value)) return false;
+ if (/^--?/.test(value)) return false;
+ if (/[*{}]/.test(value)) return false;
+ return value.includes('/') || /\.[A-Za-z0-9]+$/.test(value);
+}
+
+function _baselineGateRepairBudgetUsed(recentHistory, currentTaskMeta, allowedFiles) {
+ if (!Array.isArray(recentHistory) || recentHistory.length === 0) return false;
+
+ return recentHistory.some((entry) => {
+ if (!_historyEntryMatchesTask(entry, currentTaskMeta)) return false;
+ if (entry.baselineGateRepairAttempted === true) return true;
+
+ return _baselineGateRepairAttempted(
+ { mode: 'authorized_cleanup', allowedFiles },
+ entry.filesChanged || []
+ );
+ });
+}
+
+function _baselineGateRepairAttempted(conflict, filesChanged) {
+ if (
+ !conflict ||
+ conflict.mode !== 'authorized_cleanup' ||
+ !Array.isArray(conflict.allowedFiles) ||
+ conflict.allowedFiles.length === 0 ||
+ !Array.isArray(filesChanged) ||
+ filesChanged.length === 0
+ ) {
+ return false;
+ }
+
+ return _pathsIntersect(conflict.allowedFiles, filesChanged);
+}
+
+function _historyEntryMatchesTask(entry, currentTaskMeta) {
+ if (!entry || !currentTaskMeta) return false;
+
+ const currentNumber = currentTaskMeta.number || '';
+ const currentDescription = currentTaskMeta.description || '';
+
+ if (currentNumber && entry.taskNumber === currentNumber) return true;
+ if (!currentNumber && currentDescription && entry.taskDescription === currentDescription) return true;
+
+ return false;
+}
+
+function _pathsIntersect(left, right) {
+ const normalizedLeft = new Set((left || []).map(_normalizeComparablePath));
+ return (right || []).some((pathValue) => normalizedLeft.has(_normalizeComparablePath(pathValue)));
+}
+
+function _normalizeComparablePath(pathValue) {
+ return String(pathValue || '')
+ .replace(/\\/g, '/')
+ .replace(/^\.\//, '')
+ .replace(/\/+$/, '');
+}
+
function _extractErrorForIteration(errorEntries, iteration) {
if (!Array.isArray(errorEntries) || errorEntries.length === 0) return null;
@@ -1981,6 +2329,15 @@ module.exports = {
_formatAutoCommitMessage,
_truncateSubjectSummary,
_buildIterationFeedback,
+ _buildBaselineGateFeedback,
+ _analyzeBaselineGateConflict,
+ _formatBaselineGateFeedback,
+ _extractCurrentTaskBlock,
+ _detectStrictCleanGates,
+ _detectFailingBaselineGates,
+ _detectAuthorizedBaselineCleanup,
+ _baselineGateRepairBudgetUsed,
+ _baselineGateRepairAttempted,
_extractErrorForIteration,
_getCurrentTaskDescription,
_getCurrentTaskMeta,
diff --git a/package-lock.json b/package-lock.json
index 49fbb16..cefe514 100644
--- a/package-lock.json
+++ b/package-lock.json
@@ -1,12 +1,12 @@
{
"name": "spec-and-loop",
- "version": "3.3.1",
+ "version": "3.3.2",
"lockfileVersion": 3,
"requires": true,
"packages": {
"": {
"name": "spec-and-loop",
- "version": "3.3.1",
+ "version": "3.3.2",
"hasInstallScript": true,
"license": "GPL-3.0",
"os": [
diff --git a/package.json b/package.json
index 864712d..715d87d 100644
--- a/package.json
+++ b/package.json
@@ -1,6 +1,6 @@
{
"name": "spec-and-loop",
- "version": "3.3.1",
+ "version": "3.3.2",
"description": "OpenSpec + Ralph Loop integration for iterative development with opencode",
"main": "index.js",
"bin": {
diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js
index 9a68bc8..958c4cb 100644
--- a/tests/unit/javascript/mini-ralph-runner.test.js
+++ b/tests/unit/javascript/mini-ralph-runner.test.js
@@ -21,6 +21,15 @@ const {
_formatAutoCommitMessage,
_truncateSubjectSummary,
_buildIterationFeedback,
+ _buildBaselineGateFeedback,
+ _analyzeBaselineGateConflict,
+ _formatBaselineGateFeedback,
+ _extractCurrentTaskBlock,
+ _detectStrictCleanGates,
+ _detectFailingBaselineGates,
+ _detectAuthorizedBaselineCleanup,
+ _baselineGateRepairBudgetUsed,
+ _baselineGateRepairAttempted,
_extractErrorForIteration,
_getCurrentTaskDescription,
_detectProtectedCommitArtifacts,
@@ -994,6 +1003,246 @@ describe('iteration outcome helpers', () => {
});
});
+// ---------------------------------------------------------------------------
+// _buildBaselineGateFeedback
+// ---------------------------------------------------------------------------
+
+describe('_buildBaselineGateFeedback()', () => {
+ function writeBaseline(ralphDir, name, exitCode) {
+ const baselinesDir = path.join(ralphDir, 'baselines');
+ fs.mkdirSync(baselinesDir, { recursive: true });
+ fs.writeFileSync(
+ path.join(baselinesDir, name),
+ `gate output\n\nEXIT=${exitCode}\n`,
+ 'utf8'
+ );
+ }
+
+ test('detects strict clean gate conflicts against failing baselines', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [x] 0.1 Pre-flight',
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0',
+ '',
+ ].join('\n'), 'utf8');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 2);
+
+ const feedback = _buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ });
+
+ expect(feedback).toContain('requires a clean gate');
+ expect(feedback).toContain('pnpm typecheck');
+ expect(feedback).toContain('baselines/demo-typecheck.txt exits 2');
+ expect(feedback).toContain('emit BLOCKED_HANDOFF');
+ });
+
+ test('allows explicit baseline classification without requesting cleanup', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0, or failures match the 0.1 baseline with no new failures',
+ '',
+ ].join('\n'), 'utf8');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 2);
+
+ const feedback = _buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ });
+
+ expect(feedback).toContain('appears to authorize baseline classification');
+ expect(feedback).toContain('do not repair unrelated baseline failures');
+ });
+
+ test('allows one authorized cleanup attempt for named files', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0 after fixing the named baseline failures in `src/app/docs/page.tsx` and `src/components/docs-mdx.tsx`',
+ '',
+ ].join('\n'), 'utf8');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 2);
+
+ const feedback = _buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ });
+
+ expect(feedback).toContain('exactly one repair attempt');
+ expect(feedback).toContain('Authorized cleanup files: src/app/docs/page.tsx, src/components/docs-mdx.tsx');
+ });
+
+ test('switches authorized cleanup to handoff guidance after the budget is used', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0 after fixing the named baseline failures in `src/app/docs/page.tsx`',
+ '',
+ ].join('\n'), 'utf8');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 2);
+
+ const feedback = _buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ }, [
+ {
+ taskNumber: '0.3',
+ taskDescription: 'Add config gates',
+ baselineGateRepairAttempted: true,
+ filesChanged: ['src/app/docs/page.tsx'],
+ },
+ ]);
+
+ expect(feedback).toContain('one repair attempt has already been used');
+ expect(feedback).toContain('emit BLOCKED_HANDOFF');
+ });
+
+ test('detects authorized cleanup files from backticked paths only', () => {
+ const cleanup = _detectAuthorizedBaselineCleanup([
+ 'Done when:',
+ '- `pnpm typecheck` exits 0 after fixing the named baseline failures in `src/app/docs/page.tsx`, `src/components/docs-mdx.tsx`, and `pnpm typecheck`',
+ ].join('\n'));
+
+ expect(cleanup.allowedFiles).toEqual([
+ 'src/app/docs/page.tsx',
+ 'src/components/docs-mdx.tsx',
+ ]);
+ });
+
+ test('marks repair budget used only when authorized files changed for the same task', () => {
+ const conflict = {
+ mode: 'authorized_cleanup',
+ allowedFiles: ['src/app/docs/page.tsx'],
+ };
+
+ expect(_baselineGateRepairAttempted(conflict, ['src/app/docs/page.tsx'])).toBe(true);
+ expect(_baselineGateRepairAttempted(conflict, ['src/lib/observability/config.ts'])).toBe(false);
+ expect(_baselineGateRepairBudgetUsed([
+ {
+ taskNumber: '0.3',
+ taskDescription: 'Add config gates',
+ filesChanged: ['src/app/docs/page.tsx'],
+ },
+ ], { number: '0.3', description: 'Add config gates' }, ['src/app/docs/page.tsx'])).toBe(true);
+ expect(_baselineGateRepairBudgetUsed([
+ {
+ taskNumber: '0.4',
+ taskDescription: 'Different task',
+ filesChanged: ['src/app/docs/page.tsx'],
+ },
+ ], { number: '0.3', description: 'Add config gates' }, ['src/app/docs/page.tsx'])).toBe(false);
+ });
+
+ test('full history keeps the one-repair budget after it falls out of the recent window', () => {
+ const ralphDir = path.join(tmpDir, '.ralph-full-history-budget');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0 after fixing the named baseline failures in `src/app/docs/page.tsx`',
+ '',
+ ].join('\n'), 'utf8');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 2);
+ history.append(ralphDir, {
+ iteration: 1,
+ duration: 1,
+ completionDetected: false,
+ taskDetected: false,
+ blockedHandoffDetected: false,
+ taskNumber: '0.3',
+ taskDescription: 'Add config gates',
+ filesChanged: ['src/app/docs/page.tsx'],
+ exitCode: 0,
+ });
+ for (let iteration = 2; iteration <= 5; iteration++) {
+ history.append(ralphDir, {
+ iteration,
+ duration: 1,
+ completionDetected: false,
+ taskDetected: false,
+ blockedHandoffDetected: false,
+ taskNumber: '0.3',
+ taskDescription: 'Add config gates',
+ filesChanged: [],
+ exitCode: 0,
+ });
+ }
+
+ expect(_buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ }, history.recent(ralphDir, 3))).toContain('exactly one repair attempt');
+ expect(_buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ }, history.read(ralphDir))).toContain('one repair attempt has already been used');
+ });
+
+ test('returns empty feedback when matching baseline passes', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0',
+ '',
+ ].join('\n'), 'utf8');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 0);
+
+ expect(_buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ })).toBe('');
+ });
+
+ test('extracts current task block and detects strict gates', () => {
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [x] 0.1 Pre-flight',
+ ' - Done when:',
+ ' - `pnpm lint` exits 0',
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0',
+ '',
+ ].join('\n'), 'utf8');
+
+ const block = _extractCurrentTaskBlock(tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ });
+
+ expect(block).toContain('0.3 Add config gates');
+ expect(block).not.toContain('0.1 Pre-flight');
+ expect(_detectStrictCleanGates(block)).toEqual([
+ expect.objectContaining({ name: 'typecheck' }),
+ ]);
+ });
+
+ test('finds failing gate baselines by filename and EXIT footer', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ writeBaseline(ralphDir, 'demo-typecheck.txt', 2);
+ writeBaseline(ralphDir, 'demo-lint.txt', 0);
+ writeBaseline(ralphDir, 'demo-test.txt', 1);
+
+ expect(_detectFailingBaselineGates(ralphDir)).toEqual([
+ { name: 'typecheck', file: path.join('baselines', 'demo-typecheck.txt'), exitCode: 2 },
+ { name: 'test', file: path.join('baselines', 'demo-test.txt'), exitCode: 1 },
+ ]);
+ });
+});
+
// ---------------------------------------------------------------------------
// _extractErrorForIteration
// ---------------------------------------------------------------------------
@@ -2121,6 +2370,44 @@ describe('run() with mocked invoker', () => {
}
});
+ test('injects baseline gate conflict feedback into task prompts', async () => {
+ const ralphDir = path.join(tmpDir, '.ralph-baseline-conflict');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0',
+ '',
+ ].join('\n'), 'utf8');
+ fs.mkdirSync(path.join(ralphDir, 'baselines'), { recursive: true });
+ fs.writeFileSync(
+ path.join(ralphDir, 'baselines', 'demo-typecheck.txt'),
+ 'typecheck output\nEXIT=2\n',
+ 'utf8'
+ );
+
+ let capturedPrompt = '';
+ const restore = mockInvoker(invoker, async (opts) => {
+ capturedPrompt = opts.prompt;
+ return {
+ stdout: 'COMPLETE',
+ exitCode: 0,
+ filesChanged: [],
+ toolUsage: [],
+ };
+ });
+
+ try {
+ await run(makeOptions({ ralphDir, tasksMode: true, tasksFile, maxIterations: 1 }));
+
+ expect(capturedPrompt).toContain('## Baseline Gate Conflict');
+ expect(capturedPrompt).toContain('baseline-matching failures may be classified');
+ expect(capturedPrompt).toContain('baselines/demo-typecheck.txt exits 2');
+ } finally {
+ restore();
+ }
+ });
+
test('commits pending dirty paths when the same blocked task later completes', async () => {
const ralphDir = path.join(tmpDir, '.ralph-pending-same-task');
const tasksFile = path.join(tmpDir, 'tasks.md');
From 200700d148ccb3ac6c9853c786cb0fd26c73e0f8 Mon Sep 17 00:00:00 2001
From: Nixon Cheaz <6854716+ncheaz@users.noreply.github.com>
Date: Tue, 5 May 2026 14:43:29 -0400
Subject: [PATCH 3/3] Make gating tests more surgical
---
OPENSPEC-RALPH-BP.md | 42 +++++++--
lib/mini-ralph/runner.js | 73 +++++++++++++++-
scripts/ralph-run.sh | 4 +-
tests/helpers/test-functions.sh | 2 +
.../unit/javascript/mini-ralph-runner.test.js | 86 +++++++++++++++++++
5 files changed, 196 insertions(+), 11 deletions(-)
diff --git a/OPENSPEC-RALPH-BP.md b/OPENSPEC-RALPH-BP.md
index 2176c45..6d9f0e3 100644
--- a/OPENSPEC-RALPH-BP.md
+++ b/OPENSPEC-RALPH-BP.md
@@ -21,6 +21,7 @@ Enforced rules:
- Title is one outcome, not a list. If you need "and" twice, split.
- Scope names files so the loop does not hunt.
- `Done when` bullets are observable or runnable. No soft verbs (`ensure`, `support`, `validate`, `keep`) without attached evidence.
+- Verifier commands use the narrowest runnable command that proves the scoped change. Prefer a named test file, spec pattern, package script, or static check over a full-suite command.
- `Stop and hand off if` gives the loop written permission to halt.
## Ordering
@@ -52,6 +53,16 @@ Rules:
Split test: if the loop stopped halfway, would the repo be clean and reviewable? If yes and there's a verifier for each half, split. If no half is meaningful alone, don't split.
+## Surgical validation
+
+Task validators must be surgical and efficient so the loop spends tokens on implementation signal, not unrelated test noise.
+
+- Start every task with the cheapest verifier that proves the task's stated scope: direct unit test file, targeted node/browser spec, exact lint/typecheck command for touched files if available, schema validator, or focused `rg` assertion.
+- Verify command routing before writing it into `tasks.md`. If `npm test -- ` or similar still runs unrelated suites in that repo, write the direct runner command instead (for example, `pnpm exec vitest --config --run `).
+- Use broad gates (`npm test`, `pnpm typecheck`, `make all`, browser/e2e suites) only when the task owns repo-wide integration behavior, when they are recorded as pre-flight baselines, or in a final integrated quality-gate task.
+- If a broad gate is still required for a narrow task, pair it with explicit baseline classification: `` `` exits 0, or failures match the pre-flight baseline with no new failures in this task's scope ``.
+- Prefer one focused verifier per task. Add a second verifier only when it proves a different artifact class, such as a schema validator plus one targeted unit test.
+
## Quality gates
- A failing `Done when` check means the task is NOT done. No rationalization.
@@ -63,17 +74,34 @@ Split test: if the loop stopped halfway, would the repo be clean and reviewable?
- Authorized cleanup: `` `` exits 0 after fixing the named baseline failures in `` and `` ``
- Hard blocker: `` `` exits 0; baseline failures are not allowed for this task ``
- When strict clean-gate text conflicts with a failing pre-flight baseline and no classification/cleanup rule is written, `ralph-run` will warn the agent to stop with `BLOCKED_HANDOFF` instead of spending iterations on unauthorized cleanup.
+- When a task refers to a pre-flight baseline, or follows a completed pre-flight baseline task, but the matching `.ralph/baselines/-.txt` artifact is missing, `ralph-run` will warn the agent to stop with `BLOCKED_HANDOFF` instead of treating undocumented failures as known.
+- A pre-flight baseline task must produce runner-recognizable artifacts, not just human-readable logs: baseline files must live under the change-local `.ralph/baselines/` directory that `ralph-run` reads, their filenames must identify the gate (`typecheck`, `lint`, `test`, etc.), and every captured gate file must end with a literal `EXIT=` line.
+- If a later task is allowed to repair baseline artifact compatibility, say so explicitly. Its `Scope:` must name the change-local `.ralph/baselines/` directory and its `Done when:` bullets must require the missing or malformed baseline files to be restored with parseable `EXIT=` footers. Without that authorization, baseline artifact repair remains an operator handoff, not product implementation work.
- Authorized cleanup is intentionally narrow: the named files must be backticked, the cleanup is limited to compiler/lint-only fixes, and `ralph-run` gives the agent one repair attempt for those files on that task. If the gate still fails after that attempt, the next prompt tells the agent to hand off instead of retrying.
Pre-flight template:
```markdown
- [ ] **Pre-flight: record quality gate baselines**
- - Scope: no code edits
+ - Scope: no code edits; writes only under `.ralph/baselines/`
- Change: Capture current state of all gates later tasks require.
- Done when:
- - `.ralph/baselines/-.txt` exists for each gate with full output
- - `.ralph/baselines/-readme.md` lists passing/failing gates and exact failing identifiers
- - Stop and hand off if: any gate is nondeterministic across two runs.
+ - `.ralph/baselines/.txt` or `.ralph/baselines/-.txt` exists for each gate with full output
+ - every captured gate file ends with a literal `EXIT=` line
+ - `.ralph/baselines/-readme.md` lists passing/failing gates, exit codes, and exact failing identifiers
+ - Stop and hand off if: any gate is nondeterministic across two runs, or any captured baseline file is missing the `EXIT=` final line after retrying the capture command.
+```
+
+Baseline artifact compatibility repair template:
+```markdown
+- [ ] **Repair pre-flight baseline artifact compatibility**
+ - Scope: `.ralph/baselines/`, `tasks.md`
+ - Change: Restore or regenerate baseline artifacts so `ralph-run` can classify later quality-gate failures.
+ - Done when:
+ - change-local `.ralph/baselines/.txt` files exist for every gate referenced by later baseline-classified tasks
+ - every restored gate file ends with a literal `EXIT=` line
+ - the baseline readme records the source of any restored artifact and the exit code for each gate
+ - Stop and hand off if:
+ - the original gate output is missing, the original exit code cannot be recovered, or restoring the artifact would require rerunning a nondeterministic gate.
```
## Anti-patterns (do not do these)
@@ -86,6 +114,8 @@ Pre-flight template:
- `Done when` that only checks unit tests when real behavior is end-to-end
- Visual verification without splitting from code changes (context overflow risk)
- "Maybe this, maybe that" wording in tasks or specs once loop starts
+- Repo-wide or slow validators for a narrow task when a focused verifier exists (`npm test`, `make all`, full browser/e2e suites)
+- Ambiguous package-manager forwarding such as `npm test -- event-schema` unless confirmed to execute only the intended test scope
## Examples
@@ -125,7 +155,7 @@ Pre-flight template:
- Change: Harbor components registered once at boot, typed for TSX.
- Done when:
- `rg "registerHarbor" src` returns exactly one call site
- - `npm test -- harbor-bootstrap` passes
+ - `npm exec vitest --run src/components/harbor-bootstrap.test.tsx` exits 0
- Stop and hand off if: more than one registration site is required.
```
@@ -142,7 +172,7 @@ Pre-flight template:
- Change: ReleaseCard renders timestamps through the shared helper.
- Done when:
- `rg "toLocaleDateString" src/components/ReleaseCard.tsx` returns no matches
- - `npm test -- ReleaseCard` passes
+ - `npm exec vitest --run src/components/ReleaseCard.test.tsx` exits 0
- Stop and hand off if: `formatDate` does not cover a required locale.
```
diff --git a/lib/mini-ralph/runner.js b/lib/mini-ralph/runner.js
index 26cfb04..db181e9 100644
--- a/lib/mini-ralph/runner.js
+++ b/lib/mini-ralph/runner.js
@@ -1797,7 +1797,25 @@ function _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, rece
const strictGates = _detectStrictCleanGates(taskBlock);
if (strictGates.length === 0) return null;
- const failingBaselines = _detectFailingBaselineGates(ralphDir);
+ const recordedBaselines = _detectRecordedBaselineGates(ralphDir);
+ const missingBaselines = _detectMissingBaselineGates(
+ strictGates,
+ recordedBaselines,
+ taskBlock,
+ tasksFile
+ );
+
+ if (missingBaselines.length > 0) {
+ return {
+ mode: 'missing_baseline',
+ conflicts: [],
+ missingBaselines,
+ allowedFiles: [],
+ budgetUsed: false,
+ };
+ }
+
+ const failingBaselines = recordedBaselines.filter((gate) => gate.exitCode !== 0);
if (failingBaselines.length === 0) return null;
const baselineByGate = new Map(failingBaselines.map((gate) => [gate.name, gate]));
@@ -1835,13 +1853,31 @@ function _analyzeBaselineGateConflict(ralphDir, tasksFile, currentTaskMeta, rece
}
function _formatBaselineGateFeedback(conflict) {
- if (!conflict || !Array.isArray(conflict.conflicts) || conflict.conflicts.length === 0) {
+ const conflicts = Array.isArray(conflict && conflict.conflicts) ? conflict.conflicts : [];
+ const missingBaselines = Array.isArray(conflict && conflict.missingBaselines)
+ ? conflict.missingBaselines
+ : [];
+
+ if (!conflict || (conflicts.length === 0 && missingBaselines.length === 0)) {
return '';
}
- const conflictLines = conflict.conflicts.map(({ gate, baseline }) =>
+ const conflictLines = conflicts.map(({ gate, baseline }) =>
`- ${gate.command}: baseline ${baseline.file} exits ${baseline.exitCode}.`
);
+ const missingLines = missingBaselines.map((gate) =>
+ `- ${gate.command}: no matching baseline artifact found under .ralph/baselines.`
+ );
+
+ if (conflict.mode === 'missing_baseline') {
+ return [
+ 'The current task uses a strict clean quality gate and the task plan indicates a pre-flight baseline should exist, but the matching baseline artifact is missing.',
+ 'Do not classify failures as pre-existing or spend an implementation iteration trying to satisfy an impossible task contract.',
+ 'emit BLOCKED_HANDOFF and ask the operator to rerun or restore the pre-flight baseline artifact, or update the task spec to authorize a different gate policy.',
+ '',
+ ...missingLines,
+ ].join('\n');
+ }
if (conflict.mode === 'authorized_cleanup') {
if (conflict.budgetUsed) {
@@ -1951,6 +1987,10 @@ function _detectStrictCleanGates(taskBlock) {
}
function _detectFailingBaselineGates(ralphDir) {
+ return _detectRecordedBaselineGates(ralphDir).filter((gate) => gate.exitCode !== 0);
+}
+
+function _detectRecordedBaselineGates(ralphDir) {
const fs = require('fs');
const fsPath = require('path');
const baselinesDir = fsPath.join(ralphDir, 'baselines');
@@ -1971,7 +2011,7 @@ function _detectFailingBaselineGates(ralphDir) {
if (!exitMatch) continue;
const exitCode = Number(exitMatch[1]);
- if (!Number.isInteger(exitCode) || exitCode === 0) continue;
+ if (!Number.isInteger(exitCode)) continue;
gates.push({ name: gateName, file: fsPath.join('baselines', name), exitCode });
}
@@ -1983,6 +2023,29 @@ function _detectFailingBaselineGates(ralphDir) {
);
}
+function _detectMissingBaselineGates(strictGates, recordedBaselines, taskBlock, tasksFile) {
+ if (!Array.isArray(strictGates) || strictGates.length === 0) return [];
+
+ const expectsBaseline =
+ _taskExplicitlyHandlesBaselineFailures(taskBlock) ||
+ _completedPreflightBaselineExists(tasksFile);
+
+ if (!expectsBaseline) return [];
+
+ const recordedNames = new Set((recordedBaselines || []).map((gate) => gate.name));
+ return strictGates.filter((gate) => !recordedNames.has(gate.name));
+}
+
+function _completedPreflightBaselineExists(tasksFile) {
+ const fs = require('fs');
+ if (!tasksFile || !fs.existsSync(tasksFile)) return false;
+
+ const lines = fs.readFileSync(tasksFile, 'utf8').split(/\r?\n/);
+ return lines.some((line) =>
+ /^-\s+\[x\]\s+.*\bpre-?flight\b.*\bbaselines?\b/i.test(line)
+ );
+}
+
function _gateNameFromBaselineFile(fileName) {
const normalized = fileName.toLowerCase();
if (/(^|[-_.])typecheck([-_.]|\.|$)/.test(normalized)) return 'typecheck';
@@ -2335,6 +2398,8 @@ module.exports = {
_extractCurrentTaskBlock,
_detectStrictCleanGates,
_detectFailingBaselineGates,
+ _detectRecordedBaselineGates,
+ _detectMissingBaselineGates,
_detectAuthorizedBaselineCleanup,
_baselineGateRepairBudgetUsed,
_baselineGateRepairAttempted,
diff --git a/scripts/ralph-run.sh b/scripts/ralph-run.sh
index 14c9b5d..a50283a 100755
--- a/scripts/ralph-run.sh
+++ b/scripts/ralph-run.sh
@@ -1182,6 +1182,7 @@ rules:
tasks:
- Use the task template from OPENSPEC-RALPH-BP.md
- Each task has one dominant outcome and one verification cluster
+ - Use surgical, scope-targeted validation commands; reserve broad gates for pre-flight baselines or final integration tasks
- Include explicit stop-and-hand-off conditions
design:
- Do not leave core policy choices unresolved
@@ -1203,6 +1204,7 @@ Before generating any OpenSpec artifacts, you MUST:
- Read `openspec/OPENSPEC-RALPH-BP.md` (Ralph Wiggum authoring guide)
- Verify proposals against the Ralph authoring checklist
- Ensure tasks use the task template with objective done-when conditions
+- Ensure each task uses the narrowest verifier that proves its scope; use broad gates only with baseline classification or final integration tasks
- Include explicit stop-and-hand-off conditions in every task
RALPH_AGENTS
log_verbose "Updated $agents_file with Ralph Wiggum compliance section"
@@ -1311,7 +1313,7 @@ WARNING_BOX
fi
local ralph_guidance=""
if [[ -f "$bp_file" ]]; then
- ralph_guidance=" When creating artifacts, read ${bp_file} and follow the Ralph Wiggum task template and authoring checklist. Ensure the proposal includes explicit scope, non-goals, first-rollout boundaries, and capabilities that map to Ralph-friendly tasks. Ensure tasks use the task template with objective done-when conditions and explicit stop-and-hand-off conditions. Do NOT restore or copy from any .bak backup files - write fresh artifacts from scratch."
+ ralph_guidance=" When creating artifacts, read ${bp_file} and follow the Ralph Wiggum task template and authoring checklist. Ensure the proposal includes explicit scope, non-goals, first-rollout boundaries, and capabilities that map to Ralph-friendly tasks. Ensure tasks use the task template with objective done-when conditions, surgical scope-targeted verifier commands, and explicit stop-and-hand-off conditions. Prefer direct test-file or validator commands over full-suite commands; reserve broad gates for pre-flight baselines or final integration tasks. Do NOT restore or copy from any .bak backup files - write fresh artifacts from scratch."
fi
log_info "Invoking opencode to regenerate proposal and tasks with Ralph Wiggum best practices..."
diff --git a/tests/helpers/test-functions.sh b/tests/helpers/test-functions.sh
index dd838fd..8501a42 100644
--- a/tests/helpers/test-functions.sh
+++ b/tests/helpers/test-functions.sh
@@ -914,6 +914,7 @@ rules:
tasks:
- Use the task template from OPENSPEC-RALPH-BP.md
- Each task has one dominant outcome and one verification cluster
+ - Use surgical, scope-targeted validation commands; reserve broad gates for pre-flight baselines or final integration tasks
- Include explicit stop-and-hand-off conditions
design:
- Do not leave core policy choices unresolved
@@ -935,6 +936,7 @@ Before generating any OpenSpec artifacts, you MUST:
- Read `OPENSPEC-RALPH-BP.md` in the project root
- Verify proposals against the Ralph authoring checklist
- Ensure tasks use the task template with objective done-when conditions
+- Ensure each task uses the narrowest verifier that proves its scope; use broad gates only with baseline classification or final integration tasks
- Include explicit stop-and-hand-off conditions in every task
RALPH_AGENTS
log_verbose "Updated $agents_file with Ralph Wiggum compliance section"
diff --git a/tests/unit/javascript/mini-ralph-runner.test.js b/tests/unit/javascript/mini-ralph-runner.test.js
index 958c4cb..31d8380 100644
--- a/tests/unit/javascript/mini-ralph-runner.test.js
+++ b/tests/unit/javascript/mini-ralph-runner.test.js
@@ -27,6 +27,8 @@ const {
_extractCurrentTaskBlock,
_detectStrictCleanGates,
_detectFailingBaselineGates,
+ _detectRecordedBaselineGates,
+ _detectMissingBaselineGates,
_detectAuthorizedBaselineCleanup,
_baselineGateRepairBudgetUsed,
_baselineGateRepairAttempted,
@@ -1206,6 +1208,64 @@ describe('_buildBaselineGateFeedback()', () => {
})).toBe('');
});
+ test('warns when a completed pre-flight baseline task has no matching baseline artifact', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [x] 0.1 Pre-flight: record quality gate baselines',
+ ' - Done when:',
+ ' - `.ralph/baselines/demo-typecheck.txt` exists',
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0',
+ '',
+ ].join('\n'), 'utf8');
+
+ const feedback = _buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ });
+
+ expect(feedback).toContain('matching baseline artifact is missing');
+ expect(feedback).toContain('pnpm typecheck');
+ expect(feedback).toContain('emit BLOCKED_HANDOFF');
+ });
+
+ test('warns when baseline classification is explicit but the baseline artifact is missing', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0, or failures match the pre-flight baseline with no new failures',
+ '',
+ ].join('\n'), 'utf8');
+
+ const feedback = _buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ });
+
+ expect(feedback).toContain('matching baseline artifact is missing');
+ expect(feedback).toContain('restore the pre-flight baseline artifact');
+ });
+
+ test('does not warn about missing baselines when no pre-flight baseline policy is present', () => {
+ const ralphDir = path.join(tmpDir, '.ralph');
+ const tasksFile = path.join(tmpDir, 'tasks.md');
+ fs.writeFileSync(tasksFile, [
+ '- [ ] 0.3 Add config gates',
+ ' - Done when:',
+ ' - `pnpm typecheck` exits 0',
+ '',
+ ].join('\n'), 'utf8');
+
+ expect(_buildBaselineGateFeedback(ralphDir, tasksFile, {
+ number: '0.3',
+ description: 'Add config gates',
+ })).toBe('');
+ });
+
test('extracts current task block and detects strict gates', () => {
const tasksFile = path.join(tmpDir, 'tasks.md');
fs.writeFileSync(tasksFile, [
@@ -1236,11 +1296,37 @@ describe('_buildBaselineGateFeedback()', () => {
writeBaseline(ralphDir, 'demo-lint.txt', 0);
writeBaseline(ralphDir, 'demo-test.txt', 1);
+ expect(_detectRecordedBaselineGates(ralphDir)).toEqual([
+ { name: 'typecheck', file: path.join('baselines', 'demo-typecheck.txt'), exitCode: 2 },
+ { name: 'lint', file: path.join('baselines', 'demo-lint.txt'), exitCode: 0 },
+ { name: 'test', file: path.join('baselines', 'demo-test.txt'), exitCode: 1 },
+ ]);
expect(_detectFailingBaselineGates(ralphDir)).toEqual([
{ name: 'typecheck', file: path.join('baselines', 'demo-typecheck.txt'), exitCode: 2 },
{ name: 'test', file: path.join('baselines', 'demo-test.txt'), exitCode: 1 },
]);
});
+
+ test('detects missing baseline gates only when a baseline policy exists', () => {
+ const strictGates = [
+ { name: 'typecheck', command: 'pnpm typecheck' },
+ { name: 'lint', command: 'pnpm lint' },
+ ];
+
+ expect(_detectMissingBaselineGates(
+ strictGates,
+ [{ name: 'lint', file: 'baselines/demo-lint.txt', exitCode: 0 }],
+ 'Done when:\n- `pnpm typecheck` exits 0, or failures match the pre-flight baseline',
+ path.join(tmpDir, 'missing-tasks.md')
+ )).toEqual([{ name: 'typecheck', command: 'pnpm typecheck' }]);
+
+ expect(_detectMissingBaselineGates(
+ strictGates,
+ [],
+ 'Done when:\n- `pnpm typecheck` exits 0',
+ path.join(tmpDir, 'missing-tasks.md')
+ )).toEqual([]);
+ });
});
// ---------------------------------------------------------------------------