From 9aefefaa8d3f325996b8c7435c578cbd49c1dae4 Mon Sep 17 00:00:00 2001 From: mark-dingwall Date: Tue, 14 Apr 2026 19:20:45 +1000 Subject: [PATCH] fix: sanitize LLM-sourced quality gate commands (backticks, $ prompts) LLM responses wrapped in inline backticks (`command`) were stored verbatim in project config. Shell interpreted backticks as command substitution, causing exit 127 on every quality gate run. Additionally, LLM-sourced commands were never invalidated on failure, creating a permanent stuck state. Three-layer fix: - sanitizeLLMResponse() strips inline backticks and leading $ prompts - ensureQualityConfig() retroactively sanitizes existing poisoned configs - quality-gate-runner flags failed LLM commands as llm-failed for re-detection Co-Authored-By: Claude Opus 4.6 (1M context) --- lib/quality-detection.js | 50 ++++++++++++++++++++++-- scripts/quality-gate-runner.js | 5 ++- tests/quality-detection.test.js | 69 +++++++++++++++++++++++++++++++++ tests/quality-gate.test.js | 6 +-- 4 files changed, 121 insertions(+), 9 deletions(-) diff --git a/lib/quality-detection.js b/lib/quality-detection.js index 526bdfee..64719ab1 100644 --- a/lib/quality-detection.js +++ b/lib/quality-detection.js @@ -15,6 +15,7 @@ const QUALITY_FILE = '.zeroshot-quality'; const SOURCE_HEURISTIC = 'heuristic'; const SOURCE_LLM = 'llm'; const SOURCE_HEURISTIC_FAILED = 'heuristic-failed'; +const SOURCE_LLM_FAILED = 'llm-failed'; // Provider binary mapping (avoids lib/ → src/ dependency) const PROVIDER_BINARIES = { @@ -534,6 +535,11 @@ function sanitizeLLMResponse(response) { cleaned = lines[0].trim(); + // Strip inline backticks: `command` → command + cleaned = cleaned.replace(/^`(.*)`$/, '$1').trim(); + // Strip leading $ prompt: $ npm test → npm test + cleaned = cleaned.replace(/^\$\s+/, '').trim(); + // Reject empty if (!cleaned) return null; @@ -556,6 +562,7 @@ function detectWithLLM(projectDir, provider) { const prompt = 'Given this project, output a single shell command chain (using &&) that runs ' + "the project's linter and tests. Output ONLY the command, nothing else. " + + 'Do not wrap in backticks or markdown formatting. ' + 'If you cannot determine a quality gate command, output exactly: NONE\n\n' + context; @@ -590,15 +597,46 @@ function ensureQualityConfig(projectDir) { // 2. Check existing project config const existing = loadProjectConfig(projectDir); - if (existing && existing.source && existing.source !== SOURCE_HEURISTIC_FAILED) { - return { created: false, command: null }; + if ( + existing && + existing.source && + existing.source !== SOURCE_HEURISTIC_FAILED && + existing.source !== SOURCE_LLM_FAILED + ) { + // Retroactively sanitize LLM-sourced commands (may contain backticks from pre-fix detection) + if (existing.source === SOURCE_LLM && existing.qualityCommand) { + const sanitized = sanitizeLLMResponse(existing.qualityCommand); + if (sanitized && sanitized !== existing.qualityCommand) { + saveProjectConfig(projectDir, { + ...existing, + qualityCommand: sanitized, + updatedAt: new Date().toISOString(), + }); + } + if (!sanitized) { + // Sanitization nullified command — mark as failed, fall through to re-detection + saveProjectConfig(projectDir, { + ...existing, + qualityCommand: null, + source: SOURCE_LLM_FAILED, + updatedAt: new Date().toISOString(), + }); + } else { + return { created: false, command: null }; + } + } else { + return { created: false, command: null }; + } } let command = null; let source = null; let ecosystems = []; - if (existing && existing.source === SOURCE_HEURISTIC_FAILED) { + if ( + existing && + (existing.source === SOURCE_HEURISTIC_FAILED || existing.source === SOURCE_LLM_FAILED) + ) { // 3. Re-detect: try LLM first (heuristic already failed), then heuristic retry const settings = require('./settings').loadSettings(); const provider = settings.defaultProvider || 'claude'; @@ -641,7 +679,10 @@ function ensureQualityConfig(projectDir) { } // Nothing detected — record heuristic-failed so LLM is tried next time - if (!existing || existing.source !== SOURCE_HEURISTIC_FAILED) { + if ( + !existing || + (existing.source !== SOURCE_HEURISTIC_FAILED && existing.source !== SOURCE_LLM_FAILED) + ) { saveProjectConfig(projectDir, { qualityCommand: null, source: SOURCE_HEURISTIC_FAILED, @@ -660,6 +701,7 @@ module.exports = { SOURCE_HEURISTIC, SOURCE_LLM, SOURCE_HEURISTIC_FAILED, + SOURCE_LLM_FAILED, // Exported for testing buildProjectContext, buildCLIArgs, diff --git a/scripts/quality-gate-runner.js b/scripts/quality-gate-runner.js index f0e86fd7..27f3b8d4 100644 --- a/scripts/quality-gate-runner.js +++ b/scripts/quality-gate-runner.js @@ -82,12 +82,13 @@ function run() { // Flag heuristic-detected commands as failed so LLM re-detection is tried next time if (exitCode !== 0 && commandSource === 'project-config' && projectConfig) { - if (projectConfig.source === 'heuristic') { + if (projectConfig.source === 'heuristic' || projectConfig.source === 'llm') { + const failedSource = projectConfig.source === 'heuristic' ? 'heuristic-failed' : 'llm-failed'; try { const { saveProjectConfig } = require('../lib/project-config'); saveProjectConfig(cwd, { ...projectConfig, - source: 'heuristic-failed', + source: failedSource, updatedAt: new Date().toISOString(), }); } catch { diff --git a/tests/quality-detection.test.js b/tests/quality-detection.test.js index a9f6e778..b326c5a5 100644 --- a/tests/quality-detection.test.js +++ b/tests/quality-detection.test.js @@ -14,6 +14,7 @@ const { buildCLIArgs, SOURCE_HEURISTIC, SOURCE_HEURISTIC_FAILED, + SOURCE_LLM_FAILED, } = require('../lib/quality-detection'); function makeTmpDir() { @@ -520,6 +521,59 @@ describe('ensureQualityConfig', function () { assert.strictEqual(config.source, SOURCE_HEURISTIC); }); + it('should re-detect when source is llm-failed', function () { + const projectDir = path.join(tmpDir, 'myproject'); + fs.mkdirSync(projectDir, { recursive: true }); + + // Pre-create llm-failed config, then add ecosystem files + getProjectConfig().saveProjectConfig(projectDir, { + qualityCommand: null, + source: SOURCE_LLM_FAILED, + ecosystems: [], + updatedAt: '2026-01-01T00:00:00.000Z', + }); + + // Now add project files so heuristic succeeds on retry + writeFile( + projectDir, + 'package.json', + JSON.stringify({ + scripts: { test: 'jest' }, + }) + ); + + const result = getQualityDetection().ensureQualityConfig(projectDir); + + // LLM detection will fail (no CLI available in test), but heuristic retry should succeed + assert.strictEqual(result.created, true); + assert.ok(result.command.includes('npm test')); + + const config = getProjectConfig().loadProjectConfig(projectDir); + assert.strictEqual(config.source, SOURCE_HEURISTIC); + }); + + it('should retroactively sanitize backtick-wrapped LLM command', function () { + const projectDir = path.join(tmpDir, 'myproject'); + fs.mkdirSync(projectDir, { recursive: true }); + + // Pre-create LLM config with backtick-wrapped command (pre-fix poisoned data) + getProjectConfig().saveProjectConfig(projectDir, { + qualityCommand: '`npm run lint && npm test`', + source: 'llm', + ecosystems: [], + updatedAt: '2026-01-01T00:00:00.000Z', + }); + + const result = getQualityDetection().ensureQualityConfig(projectDir); + + // Should return early (not re-detect) but sanitize the stored command + assert.strictEqual(result.created, false); + + const config = getProjectConfig().loadProjectConfig(projectDir); + assert.strictEqual(config.qualityCommand, 'npm run lint && npm test'); + assert.strictEqual(config.source, 'llm'); + }); + it('should not write to project directory (no .zeroshot-quality, no .gitignore changes)', function () { const projectDir = path.join(tmpDir, 'myproject'); fs.mkdirSync(projectDir, { recursive: true }); @@ -564,6 +618,21 @@ describe('sanitizeLLMResponse', function () { assert.strictEqual(sanitizeLLMResponse(''), null); assert.strictEqual(sanitizeLLMResponse(null), null); }); + + it('should strip inline backticks', function () { + assert.strictEqual(sanitizeLLMResponse('`npm test`'), 'npm test'); + }); + + it('should strip leading $ prompt', function () { + assert.strictEqual(sanitizeLLMResponse('$ npm test'), 'npm test'); + }); + + it('should strip inline backticks and leading $ combined', function () { + assert.strictEqual( + sanitizeLLMResponse('`$ npm run lint && npm test`'), + 'npm run lint && npm test' + ); + }); }); describe('buildProjectContext', function () { diff --git a/tests/quality-gate.test.js b/tests/quality-gate.test.js index 0e770038..b64fd3f9 100644 --- a/tests/quality-gate.test.js +++ b/tests/quality-gate.test.js @@ -330,7 +330,7 @@ describe('quality-gate-runner.js', function () { assert.strictEqual(config.source, 'heuristic-failed'); }); - it('should NOT flag llm source on command failure', function () { + it('should flag llm source as llm-failed on command failure', function () { process.env.ZEROSHOT_PROJECTS_DIR = projectsDir; delete require.cache[require.resolve('../lib/project-config')]; const { saveProjectConfig } = require('../lib/project-config'); @@ -351,11 +351,11 @@ describe('quality-gate-runner.js', function () { assert.ok(error.status > 0); } - // Verify source unchanged + // Verify source updated to llm-failed delete require.cache[require.resolve('../lib/project-config')]; const { loadProjectConfig: reload } = require('../lib/project-config'); const config = reload(tmpDir); - assert.strictEqual(config.source, 'llm'); + assert.strictEqual(config.source, 'llm-failed'); }); it('should run command from .zeroshot-quality and exit non-zero on failure', function () {