diff --git a/.claude/settings.json b/.claude/settings.json deleted file mode 100644 index d1c583f..0000000 --- a/.claude/settings.json +++ /dev/null @@ -1,7 +0,0 @@ -{ - "permissions": { - "allow": ["Bash(npm *)", "Read(~/**)"], - "deny": ["Read(**/*.pem)"] - }, - "hooks": {} -} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 30571ff..f86289a 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -12,12 +12,18 @@ permissions: jobs: build-test: runs-on: ubuntu-latest + strategy: + fail-fast: false + matrix: + # agent-gov-core@>=0.7 requires Node >=20; test the supported + # range so package consumers aren't surprised on LTS Node 20/22. + node-version: [20, 22, 24] steps: - uses: actions/checkout@v6 - uses: actions/setup-node@v6 with: - node-version: 24 + node-version: ${{ matrix.node-version }} cache: npm - run: npm ci diff --git a/.gitignore b/.gitignore index c12810d..9d893b9 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ node_modules/ -# Local codex working state — not for tracking. The detector fixtures -# under test/fixtures/**/.codex/ are deliberately included; only the -# top-level dogfood .codex/ is ignored. +# Local agent working state — not for tracking. The historical demo PR +# (#3) intentionally introduced these at the repo root with risky +# settings; they were merged into main and then needed cleanup. Ignore +# them at the root going forward so a passing scan can't silently +# regress. Detector fixtures under test/fixtures/**/ stay tracked. /.codex/ +/.claude/ +/.mcp.json diff --git a/.mcp.json b/.mcp.json deleted file mode 100644 index ced49f6..0000000 --- a/.mcp.json +++ /dev/null @@ -1,8 +0,0 @@ -{ - "mcpServers": { - "stripe-admin": { - "command": "npx", - "args": ["-y", "@vendor/stripe-mcp@latest"] - } - } -} diff --git a/action.yml b/action.yml index 611bd62..9b50532 100644 --- a/action.yml +++ b/action.yml @@ -67,48 +67,37 @@ runs: # Single scan: stdout streams GitHub annotations so the runner # picks up ::warning lines, while --out-markdown and --out-json - # capture the other two renderings from the same run. Previously - # this ran the CLI three times (markdown/json/github), repeating - # both git snapshot materialization and full detector work. + # capture the other two renderings from the same run. The CLI + # itself enforces --fail-on (exit 1) so we don't reimplement + # the rank table in bash. Capture the CLI status without + # `set -e` halting before outputs are written. + set +e node "$GITHUB_ACTION_PATH/dist/index.js" diff \ --repo "$repo" --base "$base" --head "$head" \ --format github \ --out-markdown "$report_file" \ - --out-json "$json_file" + --out-json "$json_file" \ + --fail-on "$fail_on" + cli_status=$? + set -e # Surface the markdown report in the Action log for parity # with the prior `tee` of `--format markdown`. - cat "$report_file" - - if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then - cat "$report_file" >> "$GITHUB_STEP_SUMMARY" + if [ -f "$report_file" ]; then + cat "$report_file" + if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then + cat "$report_file" >> "$GITHUB_STEP_SUMMARY" + fi fi - rating="$(node -e "const fs = require('node:fs'); console.log(JSON.parse(fs.readFileSync(process.argv[1], 'utf8')).rating)" "$json_file")" - finding_count="$(node -e "const fs = require('node:fs'); console.log(JSON.parse(fs.readFileSync(process.argv[1], 'utf8')).findingCount)" "$json_file")" - echo "rating=$rating" >> "$GITHUB_OUTPUT" - echo "finding-count=$finding_count" >> "$GITHUB_OUTPUT" - - rank() { - case "$1" in - none) echo 0 ;; - low) echo 1 ;; - medium) echo 2 ;; - high) echo 3 ;; - critical) echo 4 ;; - *) echo -1 ;; - esac - } - - fail_rank="$(rank "$fail_on")" - rating_rank="$(rank "$rating")" - - if [ "$fail_rank" -lt 0 ]; then - echo "::error::Invalid fail-on value '$fail_on'. Use none, low, medium, high, or critical." - exit 2 + if [ -f "$json_file" ]; then + rating="$(node -e "const fs = require('node:fs'); console.log(JSON.parse(fs.readFileSync(process.argv[1], 'utf8')).rating)" "$json_file")" + finding_count="$(node -e "const fs = require('node:fs'); console.log(JSON.parse(fs.readFileSync(process.argv[1], 'utf8')).findingCount)" "$json_file")" + echo "rating=$rating" >> "$GITHUB_OUTPUT" + echo "finding-count=$finding_count" >> "$GITHUB_OUTPUT" fi - if [ "$fail_rank" -gt 0 ] && [ "$rating_rank" -ge "$fail_rank" ]; then - echo "::error::ScopeTrail permission drift rating $rating meets fail-on threshold $fail_on." - exit 1 + if [ "$cli_status" -eq 1 ]; then + echo "::error::ScopeTrail permission drift rating ${rating:-unknown} meets fail-on threshold $fail_on." fi + exit "$cli_status" diff --git a/dist/detectors/codex-config.js b/dist/detectors/codex-config.js index 3a694fe..5c871bd 100644 --- a/dist/detectors/codex-config.js +++ b/dist/detectors/codex-config.js @@ -34,7 +34,7 @@ export async function detectCodexConfigDrift(oldRoot, newRoot) { kind: 'scope_trail.codex_sandbox_widened', severity: sandboxRank(newEntry.value) >= 3 ? 'critical' : 'high', file: CODEX_CONFIG_FILE, - line: newEntry.line, + line: newEntry.line || undefined, subject: key, message: `Codex sandbox setting was widened to ${newEntry.value}.`, recommendation: 'Keep Codex sandbox settings as narrow as the workflow allows and review full-access/elevated changes carefully.' @@ -48,7 +48,7 @@ export async function detectCodexConfigDrift(oldRoot, newRoot) { kind: 'scope_trail.codex_approval_weakened', severity: newApproval.value === 'never' ? 'high' : 'medium', file: CODEX_CONFIG_FILE, - line: newApproval.line, + line: newApproval.line || undefined, subject: 'approval_policy', message: `Codex approval policy was weakened to ${newApproval.value}.`, recommendation: 'Require human approval for risky commands unless the repository has a reviewed reason to run without prompts.' @@ -62,7 +62,7 @@ export async function detectCodexConfigDrift(oldRoot, newRoot) { kind: 'scope_trail.codex_network_enabled', severity: 'medium', file: CODEX_CONFIG_FILE, - line: newEntry.line, + line: newEntry.line || undefined, subject: key, message: `Codex network access was enabled for ${key}.`, recommendation: 'Confirm network access is needed and that commands cannot exfiltrate secrets or fetch unreviewed code.' @@ -267,61 +267,66 @@ function isPlainObject(value) { return typeof value === 'object' && value !== null && !Array.isArray(value); } async function readCodexConfig(root) { - let text = ''; + const text = await readCodexText(root); + if (!text) { + return new Map(); + } + // Use the same parsed-TOML walk as readTrustedProjects so inline + // tables — `sandbox_workspace_write = { network_access = true }` and + // `windows = { sandbox = "danger-full-access" }` — surface their leaf + // keys. The previous line-regex parser stopped at `{` and silently + // returned rating: "none" for valid TOML that widened the sandbox. + let parsed; try { - text = await readFile(configPath(root, CODEX_CONFIG_FILE), 'utf8'); + parsed = parseToml(text); } - catch (error) { - if (isNodeError(error) && error.code === 'ENOENT') { - return new Map(); - } - throw error; + catch { + // detectCodexConfigDrift already short-circuits on parse errors via + // readCodexParseError; reaching here with bad TOML shouldn't happen, + // and an empty map is the right fallback if it does. + return new Map(); } - return parseTomlEntries(text); -} -function parseTomlEntries(text) { const entries = new Map(); - let section = ''; - const lines = text.split(/\r?\n/); - for (let index = 0; index < lines.length; index += 1) { - const line = lines[index]; - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) { - continue; - } - const sectionMatch = /^\[([^\]]+)\]$/.exec(trimmed); - if (sectionMatch) { - section = normalizeSection(sectionMatch[1]); + collectTomlEntries(parsed, '', text, entries); + return entries; +} +function collectTomlEntries(node, prefix, text, out) { + for (const [rawKey, value] of Object.entries(node)) { + const key = rawKey.toLowerCase(); + const dotted = prefix ? `${prefix}.${key}` : key; + if (isPlainObject(value)) { + collectTomlEntries(value, dotted, text, out); continue; } - const keyMatch = /^([A-Za-z0-9_.-]+)\s*=\s*(.+)$/.exec(trimmed); - if (!keyMatch) { - continue; + out.set(dotted, { + line: locateTomlLine(text, dotted), + value: stringifyScalar(value) + }); + } +} +function locateTomlLine(text, dottedKey) { + // Inline tables defeat dotted-key line locators (they collapse to + // line 0). Walk up the prefix so we still point at the assignment + // line rather than dropping the locator entirely. + let current = dottedKey; + while (current) { + const line = lineOfTomlKey(text, current); + if (line > 0) { + return line; } - const key = normalizeKey(section, keyMatch[1]); - const value = parseScalarValue(keyMatch[2]); - if (value !== undefined) { - entries.set(key, { line: index + 1, value }); + const lastDot = current.lastIndexOf('.'); + if (lastDot === -1) { + return 0; } + current = current.slice(0, lastDot); } - return entries; -} -function normalizeSection(section) { - const normalized = section.trim().toLowerCase(); - return normalized.startsWith('projects.') ? 'projects' : normalized; -} -function normalizeKey(section, key) { - const normalizedKey = key.trim().toLowerCase(); - return section ? `${section}.${normalizedKey}` : normalizedKey; + return 0; } -function parseScalarValue(rawValue) { - const trimmed = rawValue.trim(); - const stringMatch = /^"([^"]*)"/.exec(trimmed) ?? /^'([^']*)'/.exec(trimmed); - if (stringMatch) { - return stringMatch[1].toLowerCase(); - } - const bareMatch = /^(true|false|[A-Za-z0-9_.-]+)/.exec(trimmed); - return bareMatch?.[1].toLowerCase(); +function stringifyScalar(value) { + if (typeof value === 'string') { + return value.toLowerCase(); + } + return String(value).toLowerCase(); } function sandboxRank(value) { if (!value) { diff --git a/dist/git-snapshot.js b/dist/git-snapshot.js index 152ec0a..a184519 100644 --- a/dist/git-snapshot.js +++ b/dist/git-snapshot.js @@ -55,7 +55,23 @@ async function snapshotPathsForRef(repo, ref) { return [...paths].sort(); } async function verifyGitRef(repo, ref) { - await execFileAsync('git', ['-C', repo, 'rev-parse', '--verify', `${ref}^{commit}`]); + try { + await execFileAsync('git', ['-C', repo, 'rev-parse', '--verify', `${ref}^{commit}`]); + } + catch (error) { + // Without wrapping, the raw `execFile` rejection escapes as a Node + // stack trace mentioning `git rev-parse --verify`. The most common + // CI cause is a shallow checkout (`fetch-depth: 1`) that doesn't + // include the PR base ref, so surface that hint up front. + throw new ScopeTrailError(`Could not resolve git ref "${ref}" in ${repo}. ` + + 'If this is a CI run, ensure actions/checkout uses fetch-depth: 0 so the PR base and head are both available locally.', { cause: error }); + } +} +export class ScopeTrailError extends Error { + constructor(message, options) { + super(message, options); + this.name = 'ScopeTrailError'; + } } async function listPathsAtRef(repo, ref) { const { stdout } = await execFileAsync('git', ['-C', repo, 'ls-tree', '-r', '--name-only', ref], { diff --git a/dist/index.js b/dist/index.js index e953c0c..e49c8ac 100644 --- a/dist/index.js +++ b/dist/index.js @@ -4,11 +4,11 @@ import { fileURLToPath } from 'node:url'; import { detectClaudeSettingsDrift } from './detectors/claude-settings.js'; import { detectCodexConfigDrift } from './detectors/codex-config.js'; import { detectMcpDrift } from './detectors/mcp.js'; -import { materializeGitSnapshot } from './git-snapshot.js'; -import { createReport, renderReport } from './report.js'; +import { materializeGitSnapshot, ScopeTrailError } from './git-snapshot.js'; +import { createReport, isDriftRating, meetsFailOnThreshold, renderReport } from './report.js'; export async function main(argv = process.argv.slice(2)) { if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) { - process.stdout.write('Usage: scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]\n'); + process.stdout.write(`${usage()}\n`); return 0; } if (argv[0] === 'diff') { @@ -31,13 +31,22 @@ async function runDiff(argv) { newRoot = parsed.newRoot; } else { - const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); - const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); - oldRoot = baseSnapshot.root; - newRoot = headSnapshot.root; - cleanup = async () => { - await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); - }; + try { + const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); + const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); + oldRoot = baseSnapshot.root; + newRoot = headSnapshot.root; + cleanup = async () => { + await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); + }; + } + catch (error) { + if (error instanceof ScopeTrailError) { + process.stderr.write(`${error.message}\n`); + return 2; + } + throw error; + } } try { // Run all detectors once and render the resulting report into @@ -57,6 +66,10 @@ async function runDiff(argv) { await writeFile(parsed.outJson, renderReport(report, 'json')); } process.stdout.write(renderReport(report, parsed.format)); + if (meetsFailOnThreshold(report.rating, parsed.failOn)) { + process.stderr.write(`ScopeTrail rating ${report.rating} meets --fail-on threshold ${parsed.failOn}.\n`); + return 1; + } return 0; } finally { @@ -72,6 +85,7 @@ function parseDiffArgs(argv) { let format = 'text'; let outMarkdown; let outJson; + let failOn = 'none'; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; const value = argv[index + 1]; @@ -116,6 +130,13 @@ function parseDiffArgs(argv) { outJson = value; index += 1; } + else if (arg === '--fail-on') { + if (!value || !isDriftRating(value)) { + return { ok: false, error: `Invalid --fail-on value: ${value ?? ''}. Use none, low, medium, high, or critical.` }; + } + failOn = value; + index += 1; + } else { return { ok: false, error: `Unknown argument: ${arg}` }; } @@ -132,7 +153,7 @@ function parseDiffArgs(argv) { if (!head) { return { ok: false, error: 'Missing required --head argument.' }; } - return { ok: true, mode: 'git', repo, base, head, format, outMarkdown, outJson }; + return { ok: true, mode: 'git', repo, base, head, format, outMarkdown, outJson, failOn }; } if (!oldRoot) { return { ok: false, error: 'Missing required --old argument or --base argument.' }; @@ -140,7 +161,7 @@ function parseDiffArgs(argv) { if (!newRoot) { return { ok: false, error: 'Missing required --new argument.' }; } - return { ok: true, mode: 'directories', oldRoot, newRoot, format, outMarkdown, outJson }; + return { ok: true, mode: 'directories', oldRoot, newRoot, format, outMarkdown, outJson, failOn }; } function isReportFormat(value) { return value === 'text' || value === 'markdown' || value === 'json' || value === 'github'; @@ -152,7 +173,7 @@ if (invokedPath) { function usage() { return [ 'Usage:', - ' scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]', - ' scopetrail diff --repo --base --head [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]' + ' scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH] [--fail-on none|low|medium|high|critical]', + ' scopetrail diff --repo --base --head [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH] [--fail-on none|low|medium|high|critical]' ].join('\n'); } diff --git a/dist/mcp-risk.js b/dist/mcp-risk.js index 7e21518..06969d0 100644 --- a/dist/mcp-risk.js +++ b/dist/mcp-risk.js @@ -30,34 +30,58 @@ export function isUnpinnedCommand(spec) { (cmd === 'pnpm' && (sub === 'dlx' || sub === 'exec' || sub === 'x')); if (isExecutor) { const packageArgs = packageLikeArgs.slice(1).filter((arg) => !arg.startsWith('-')); - if (packageArgs.length > 0) { - const pkg = packageArgs[0]; - if (looksLikePackageName(pkg) && !hasExactVersion(pkg)) { - return true; - } + if (packageArgs.length > 0 && isUnpinnedPackageSpec(packageArgs[0])) { + return true; } } } // `bunx` is Bun's npx equivalent and ships as its own binary, so it // surfaces as `command: "bunx"` in MCP configs. return ['npx', 'uvx', 'pipx', 'bunx'].includes(cmd) - && packageLikeArgs.some((arg) => looksLikePackageName(arg) && !hasExactVersion(arg)); + && packageLikeArgs.some(isUnpinnedPackageSpec); } export function isPipeToShellCommand(spec) { const normalized = serverCommand(spec).toLowerCase(); return /\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized) || /\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized); } -function looksLikePackageName(value) { - return /^[a-z0-9@][a-z0-9._/@-]+$/i.test(value) && !value.startsWith('-'); -} -function hasExactVersion(value) { - const packageVersion = value.startsWith('@') ? value.indexOf('@', 1) : value.indexOf('@'); - if (packageVersion === -1) { +// A package spec covers `name`, `name@`, and the +// occasional `name>=1.2.3` form. Only `name@` is pinned; +// anything else (bare name, `@latest`, `^`, `~`, `>=`, `*`) is unpinned. +// The previous narrow `looksLikePackageName` regex rejected any value +// containing range operators, so `@vendor/helper@^1.2.3` slipped past +// the unpinned check entirely. +function isUnpinnedPackageSpec(value) { + const spec = parsePackageSpec(value); + if (!spec) { return false; } - const version = value.slice(packageVersion + 1); - return /^\d+\.\d+\.\d+/.test(version); + if (spec.versionSpec === undefined) { + return true; + } + return !/^@\d+\.\d+\.\d+/.test(spec.versionSpec); +} +function parsePackageSpec(value) { + if (!value || value.startsWith('-')) { + return undefined; + } + // For scoped names (`@scope/name`), skip the leading `@` so we don't + // mistake it for the version separator. + const scanFrom = value.startsWith('@') ? 1 : 0; + let cut = -1; + for (let index = scanFrom; index < value.length; index += 1) { + const char = value[index]; + if (char === '@' || char === '>' || char === '<' || char === '=') { + cut = index; + break; + } + } + const name = cut === -1 ? value : value.slice(0, cut); + const versionSpec = cut === -1 ? undefined : value.slice(cut); + if (!/^@?[a-z0-9][a-z0-9._/-]*$/i.test(name)) { + return undefined; + } + return { name, versionSpec }; } export function remoteEndpoint(spec) { return [spec.url, spec.serverUrl].find((value) => Boolean(value && isRemoteEndpoint(value))); diff --git a/dist/report.js b/dist/report.js index 298d410..d6a267c 100644 --- a/dist/report.js +++ b/dist/report.js @@ -5,6 +5,16 @@ const severityRank = { high: 3, critical: 4 }; +export function isDriftRating(value) { + return value in severityRank; +} +// Returns true when `rating` is at least as severe as `threshold` and +// `threshold` isn't `none`. Used by the CLI's --fail-on gate so non- +// GitHub CI (local pre-push, GitLab, CircleCI) can share the same +// threshold semantics as the Action. +export function meetsFailOnThreshold(rating, threshold) { + return threshold !== 'none' && severityRank[rating] >= severityRank[threshold]; +} export function createReport(findings) { return { rating: rateFindings(findings), diff --git a/package-lock.json b/package-lock.json index 1a3d4b0..ef877e3 100644 --- a/package-lock.json +++ b/package-lock.json @@ -17,6 +17,9 @@ "devDependencies": { "@types/node": "^24.0.0", "typescript": "^5.9.3" + }, + "engines": { + "node": ">=20" } }, "node_modules/@types/node": { @@ -30,9 +33,9 @@ } }, "node_modules/agent-gov-core": { - "version": "0.7.0", - "resolved": "https://registry.npmjs.org/agent-gov-core/-/agent-gov-core-0.7.0.tgz", - "integrity": "sha512-lFmoafZGSyglrRZNfyjj/xsz4t19txoj5iIifTYH1ZHxLOuKiITNCgP4NMPWIgVfgojph0abz/N9JTF4BlvTRg==", + "version": "0.7.1", + "resolved": "https://registry.npmjs.org/agent-gov-core/-/agent-gov-core-0.7.1.tgz", + "integrity": "sha512-YnKcIpYCAMncTfMf4gikBZBbSAdJdgY0snNTqYHAjrBDSgsEw9Ne5yfPZGGRyNdGfup2L2WNEZluosuj117JKA==", "license": "MIT", "engines": { "node": ">=20" diff --git a/package.json b/package.json index cf69fe4..4a7f358 100644 --- a/package.json +++ b/package.json @@ -14,6 +14,9 @@ "README.md", "LICENSE" ], + "engines": { + "node": ">=20" + }, "scripts": { "build": "tsc -p tsconfig.json", "test": "node --test" diff --git a/src/detectors/codex-config.ts b/src/detectors/codex-config.ts index e4ce88a..04f74d9 100644 --- a/src/detectors/codex-config.ts +++ b/src/detectors/codex-config.ts @@ -55,7 +55,7 @@ export async function detectCodexConfigDrift(oldRoot: string, newRoot: string): kind: 'scope_trail.codex_sandbox_widened', severity: sandboxRank(newEntry.value) >= 3 ? 'critical' : 'high', file: CODEX_CONFIG_FILE, - line: newEntry.line, + line: newEntry.line || undefined, subject: key, message: `Codex sandbox setting was widened to ${newEntry.value}.`, recommendation: 'Keep Codex sandbox settings as narrow as the workflow allows and review full-access/elevated changes carefully.' @@ -70,7 +70,7 @@ export async function detectCodexConfigDrift(oldRoot: string, newRoot: string): kind: 'scope_trail.codex_approval_weakened', severity: newApproval.value === 'never' ? 'high' : 'medium', file: CODEX_CONFIG_FILE, - line: newApproval.line, + line: newApproval.line || undefined, subject: 'approval_policy', message: `Codex approval policy was weakened to ${newApproval.value}.`, recommendation: 'Require human approval for risky commands unless the repository has a reviewed reason to run without prompts.' @@ -85,7 +85,7 @@ export async function detectCodexConfigDrift(oldRoot: string, newRoot: string): kind: 'scope_trail.codex_network_enabled', severity: 'medium', file: CODEX_CONFIG_FILE, - line: newEntry.line, + line: newEntry.line || undefined, subject: key, message: `Codex network access was enabled for ${key}.`, recommendation: 'Confirm network access is needed and that commands cannot exfiltrate secrets or fetch unreviewed code.' @@ -305,71 +305,75 @@ function isPlainObject(value: unknown): value is Record { } async function readCodexConfig(root: string): Promise> { - let text = ''; - try { - text = await readFile(configPath(root, CODEX_CONFIG_FILE), 'utf8'); - } catch (error) { - if (isNodeError(error) && error.code === 'ENOENT') { - return new Map(); - } - throw error; + const text = await readCodexText(root); + if (!text) { + return new Map(); } - return parseTomlEntries(text); -} + // Use the same parsed-TOML walk as readTrustedProjects so inline + // tables — `sandbox_workspace_write = { network_access = true }` and + // `windows = { sandbox = "danger-full-access" }` — surface their leaf + // keys. The previous line-regex parser stopped at `{` and silently + // returned rating: "none" for valid TOML that widened the sandbox. + let parsed: Record; + try { + parsed = parseToml(text); + } catch { + // detectCodexConfigDrift already short-circuits on parse errors via + // readCodexParseError; reaching here with bad TOML shouldn't happen, + // and an empty map is the right fallback if it does. + return new Map(); + } -function parseTomlEntries(text: string): Map { const entries = new Map(); - let section = ''; - - const lines = text.split(/\r?\n/); - for (let index = 0; index < lines.length; index += 1) { - const line = lines[index]; - const trimmed = line.trim(); - if (!trimmed || trimmed.startsWith('#')) { - continue; - } + collectTomlEntries(parsed, '', text, entries); + return entries; +} - const sectionMatch = /^\[([^\]]+)\]$/.exec(trimmed); - if (sectionMatch) { - section = normalizeSection(sectionMatch[1]); +function collectTomlEntries( + node: Record, + prefix: string, + text: string, + out: Map +): void { + for (const [rawKey, value] of Object.entries(node)) { + const key = rawKey.toLowerCase(); + const dotted = prefix ? `${prefix}.${key}` : key; + if (isPlainObject(value)) { + collectTomlEntries(value, dotted, text, out); continue; } + out.set(dotted, { + line: locateTomlLine(text, dotted), + value: stringifyScalar(value) + }); + } +} - const keyMatch = /^([A-Za-z0-9_.-]+)\s*=\s*(.+)$/.exec(trimmed); - if (!keyMatch) { - continue; +function locateTomlLine(text: string, dottedKey: string): number { + // Inline tables defeat dotted-key line locators (they collapse to + // line 0). Walk up the prefix so we still point at the assignment + // line rather than dropping the locator entirely. + let current = dottedKey; + while (current) { + const line = lineOfTomlKey(text, current); + if (line > 0) { + return line; } - - const key = normalizeKey(section, keyMatch[1]); - const value = parseScalarValue(keyMatch[2]); - if (value !== undefined) { - entries.set(key, { line: index + 1, value }); + const lastDot = current.lastIndexOf('.'); + if (lastDot === -1) { + return 0; } + current = current.slice(0, lastDot); } - - return entries; -} - -function normalizeSection(section: string): string { - const normalized = section.trim().toLowerCase(); - return normalized.startsWith('projects.') ? 'projects' : normalized; -} - -function normalizeKey(section: string, key: string): string { - const normalizedKey = key.trim().toLowerCase(); - return section ? `${section}.${normalizedKey}` : normalizedKey; + return 0; } -function parseScalarValue(rawValue: string): string | undefined { - const trimmed = rawValue.trim(); - const stringMatch = /^"([^"]*)"/.exec(trimmed) ?? /^'([^']*)'/.exec(trimmed); - if (stringMatch) { - return stringMatch[1].toLowerCase(); +function stringifyScalar(value: unknown): string { + if (typeof value === 'string') { + return value.toLowerCase(); } - - const bareMatch = /^(true|false|[A-Za-z0-9_.-]+)/.exec(trimmed); - return bareMatch?.[1].toLowerCase(); + return String(value).toLowerCase(); } function sandboxRank(value: string | undefined): number { diff --git a/src/git-snapshot.ts b/src/git-snapshot.ts index aaf3ab9..10093d1 100644 --- a/src/git-snapshot.ts +++ b/src/git-snapshot.ts @@ -68,7 +68,26 @@ async function snapshotPathsForRef(repo: string, ref: string): Promise } async function verifyGitRef(repo: string, ref: string): Promise { - await execFileAsync('git', ['-C', repo, 'rev-parse', '--verify', `${ref}^{commit}`]); + try { + await execFileAsync('git', ['-C', repo, 'rev-parse', '--verify', `${ref}^{commit}`]); + } catch (error) { + // Without wrapping, the raw `execFile` rejection escapes as a Node + // stack trace mentioning `git rev-parse --verify`. The most common + // CI cause is a shallow checkout (`fetch-depth: 1`) that doesn't + // include the PR base ref, so surface that hint up front. + throw new ScopeTrailError( + `Could not resolve git ref "${ref}" in ${repo}. ` + + 'If this is a CI run, ensure actions/checkout uses fetch-depth: 0 so the PR base and head are both available locally.', + { cause: error } + ); + } +} + +export class ScopeTrailError extends Error { + constructor(message: string, options?: { cause?: unknown }) { + super(message, options); + this.name = 'ScopeTrailError'; + } } async function listPathsAtRef(repo: string, ref: string): Promise { diff --git a/src/index.ts b/src/index.ts index 38c8620..753807d 100644 --- a/src/index.ts +++ b/src/index.ts @@ -5,12 +5,19 @@ import { fileURLToPath } from 'node:url'; import { detectClaudeSettingsDrift } from './detectors/claude-settings.js'; import { detectCodexConfigDrift } from './detectors/codex-config.js'; import { detectMcpDrift } from './detectors/mcp.js'; -import { materializeGitSnapshot } from './git-snapshot.js'; -import { createReport, renderReport, type ReportFormat } from './report.js'; +import { materializeGitSnapshot, ScopeTrailError } from './git-snapshot.js'; +import { + createReport, + isDriftRating, + meetsFailOnThreshold, + renderReport, + type DriftRating, + type ReportFormat +} from './report.js'; export async function main(argv = process.argv.slice(2)): Promise { if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) { - process.stdout.write('Usage: scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]\n'); + process.stdout.write(`${usage()}\n`); return 0; } @@ -37,13 +44,21 @@ async function runDiff(argv: string[]): Promise { oldRoot = parsed.oldRoot; newRoot = parsed.newRoot; } else { - const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); - const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); - oldRoot = baseSnapshot.root; - newRoot = headSnapshot.root; - cleanup = async () => { - await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); - }; + try { + const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); + const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); + oldRoot = baseSnapshot.root; + newRoot = headSnapshot.root; + cleanup = async () => { + await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); + }; + } catch (error) { + if (error instanceof ScopeTrailError) { + process.stderr.write(`${error.message}\n`); + return 2; + } + throw error; + } } try { @@ -65,6 +80,12 @@ async function runDiff(argv: string[]): Promise { await writeFile(parsed.outJson, renderReport(report, 'json')); } process.stdout.write(renderReport(report, parsed.format)); + if (meetsFailOnThreshold(report.rating, parsed.failOn)) { + process.stderr.write( + `ScopeTrail rating ${report.rating} meets --fail-on threshold ${parsed.failOn}.\n` + ); + return 1; + } return 0; } finally { await cleanup?.(); @@ -75,6 +96,7 @@ interface CommonDiffArgs { format: ReportFormat; outMarkdown?: string; outJson?: string; + failOn: DriftRating; } type ParsedDiffArgs = @@ -91,6 +113,7 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { let format: ReportFormat = 'text'; let outMarkdown: string | undefined; let outJson: string | undefined; + let failOn: DriftRating = 'none'; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; @@ -129,6 +152,12 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { } outJson = value; index += 1; + } else if (arg === '--fail-on') { + if (!value || !isDriftRating(value)) { + return { ok: false, error: `Invalid --fail-on value: ${value ?? ''}. Use none, low, medium, high, or critical.` }; + } + failOn = value; + index += 1; } else { return { ok: false, error: `Unknown argument: ${arg}` }; } @@ -150,7 +179,7 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { return { ok: false, error: 'Missing required --head argument.' }; } - return { ok: true, mode: 'git', repo, base, head, format, outMarkdown, outJson }; + return { ok: true, mode: 'git', repo, base, head, format, outMarkdown, outJson, failOn }; } if (!oldRoot) { @@ -161,7 +190,7 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { return { ok: false, error: 'Missing required --new argument.' }; } - return { ok: true, mode: 'directories', oldRoot, newRoot, format, outMarkdown, outJson }; + return { ok: true, mode: 'directories', oldRoot, newRoot, format, outMarkdown, outJson, failOn }; } function isReportFormat(value: string | undefined): value is ReportFormat { @@ -177,7 +206,7 @@ if (invokedPath) { function usage(): string { return [ 'Usage:', - ' scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]', - ' scopetrail diff --repo --base --head [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]' + ' scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH] [--fail-on none|low|medium|high|critical]', + ' scopetrail diff --repo --base --head [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH] [--fail-on none|low|medium|high|critical]' ].join('\n'); } diff --git a/src/mcp-risk.ts b/src/mcp-risk.ts index 6c52aa6..4ee5782 100644 --- a/src/mcp-risk.ts +++ b/src/mcp-risk.ts @@ -45,11 +45,8 @@ export function isUnpinnedCommand(spec: McpCommandShape): boolean { (cmd === 'pnpm' && (sub === 'dlx' || sub === 'exec' || sub === 'x')); if (isExecutor) { const packageArgs = packageLikeArgs.slice(1).filter((arg) => !arg.startsWith('-')); - if (packageArgs.length > 0) { - const pkg = packageArgs[0]; - if (looksLikePackageName(pkg) && !hasExactVersion(pkg)) { - return true; - } + if (packageArgs.length > 0 && isUnpinnedPackageSpec(packageArgs[0])) { + return true; } } } @@ -57,7 +54,7 @@ export function isUnpinnedCommand(spec: McpCommandShape): boolean { // `bunx` is Bun's npx equivalent and ships as its own binary, so it // surfaces as `command: "bunx"` in MCP configs. return ['npx', 'uvx', 'pipx', 'bunx'].includes(cmd) - && packageLikeArgs.some((arg) => looksLikePackageName(arg) && !hasExactVersion(arg)); + && packageLikeArgs.some(isUnpinnedPackageSpec); } export function isPipeToShellCommand(spec: McpCommandShape): boolean { @@ -66,18 +63,47 @@ export function isPipeToShellCommand(spec: McpCommandShape): boolean { || /\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized); } -function looksLikePackageName(value: string): boolean { - return /^[a-z0-9@][a-z0-9._/@-]+$/i.test(value) && !value.startsWith('-'); +// A package spec covers `name`, `name@`, and the +// occasional `name>=1.2.3` form. Only `name@` is pinned; +// anything else (bare name, `@latest`, `^`, `~`, `>=`, `*`) is unpinned. +// The previous narrow `looksLikePackageName` regex rejected any value +// containing range operators, so `@vendor/helper@^1.2.3` slipped past +// the unpinned check entirely. +function isUnpinnedPackageSpec(value: string): boolean { + const spec = parsePackageSpec(value); + if (!spec) { + return false; + } + if (spec.versionSpec === undefined) { + return true; + } + return !/^@\d+\.\d+\.\d+/.test(spec.versionSpec); } -function hasExactVersion(value: string): boolean { - const packageVersion = value.startsWith('@') ? value.indexOf('@', 1) : value.indexOf('@'); - if (packageVersion === -1) { - return false; +function parsePackageSpec(value: string): { name: string; versionSpec?: string } | undefined { + if (!value || value.startsWith('-')) { + return undefined; } - const version = value.slice(packageVersion + 1); - return /^\d+\.\d+\.\d+/.test(version); + // For scoped names (`@scope/name`), skip the leading `@` so we don't + // mistake it for the version separator. + const scanFrom = value.startsWith('@') ? 1 : 0; + let cut = -1; + for (let index = scanFrom; index < value.length; index += 1) { + const char = value[index]; + if (char === '@' || char === '>' || char === '<' || char === '=') { + cut = index; + break; + } + } + + const name = cut === -1 ? value : value.slice(0, cut); + const versionSpec = cut === -1 ? undefined : value.slice(cut); + + if (!/^@?[a-z0-9][a-z0-9._/-]*$/i.test(name)) { + return undefined; + } + return { name, versionSpec }; } export function remoteEndpoint(spec: McpCommandShape): string | undefined { diff --git a/src/report.ts b/src/report.ts index b71daad..2085a73 100644 --- a/src/report.ts +++ b/src/report.ts @@ -17,6 +17,18 @@ const severityRank: Record = { critical: 4 }; +export function isDriftRating(value: string): value is DriftRating { + return value in severityRank; +} + +// Returns true when `rating` is at least as severe as `threshold` and +// `threshold` isn't `none`. Used by the CLI's --fail-on gate so non- +// GitHub CI (local pre-push, GitLab, CircleCI) can share the same +// threshold semantics as the Action. +export function meetsFailOnThreshold(rating: DriftRating, threshold: DriftRating): boolean { + return threshold !== 'none' && severityRank[rating] >= severityRank[threshold]; +} + export function createReport(findings: Finding[]): DriftReport { return { rating: rateFindings(findings), diff --git a/test/action-metadata.test.mjs b/test/action-metadata.test.mjs index d69faad..dafefc7 100644 --- a/test/action-metadata.test.mjs +++ b/test/action-metadata.test.mjs @@ -24,6 +24,10 @@ test('GitHub Action metadata exposes PR drift inputs', async () => { assert.match(action, /--format github/); assert.match(action, /--out-markdown/); assert.match(action, /--out-json/); + // Threshold logic lives in the CLI, not bash — see src/report.ts's + // meetsFailOnThreshold. The action forwards the fail-on input. + assert.match(action, /--fail-on "?\$\{?fail_on/); + assert.doesNotMatch(action, /rank\(\)\s*\{/); }); test('GitHub Action invokes the ScopeTrail CLI once per run', async () => { diff --git a/test/ci-workflow.test.mjs b/test/ci-workflow.test.mjs index 0c7c8c0..bf39148 100644 --- a/test/ci-workflow.test.mjs +++ b/test/ci-workflow.test.mjs @@ -14,12 +14,22 @@ test('repository has public CI for build and tests', async () => { assert.match(workflow, /^ pull_request:/m); assert.match(workflow, /actions\/checkout@v6/); assert.match(workflow, /actions\/setup-node@v6/); - assert.match(workflow, /node-version:\s*24/); + // Matrix covers Node >= engines.node (20) plus the next LTS lines. + assert.match(workflow, /node-version:\s*\[\s*20\s*,\s*22\s*,\s*24\s*\]/); + assert.match(workflow, /node-version:\s*\$\{\{\s*matrix\.node-version\s*\}\}/); assert.match(workflow, /npm ci/); assert.match(workflow, /npm run build/); assert.match(workflow, /npm test/); }); +test('package.json declares the supported Node range', async () => { + // agent-gov-core@>=0.7 declares engines.node: ">=20". Without ScopeTrail + // declaring its own engines, `npm install` on Node 18 produces no warning + // and the failure mode is a confusing runtime error from the dependency. + const pkg = JSON.parse(await readFile(join(packageRoot, 'package.json'), 'utf8')); + assert.equal(pkg.engines?.node, '>=20'); +}); + test('CI verifies committed Action runtime is current after build', async () => { const workflow = await readFile(join(packageRoot, '.github', 'workflows', 'ci.yml'), 'utf8'); diff --git a/test/cli-output.test.mjs b/test/cli-output.test.mjs index 280a215..70a110a 100644 --- a/test/cli-output.test.mjs +++ b/test/cli-output.test.mjs @@ -77,6 +77,55 @@ test('CLI emits GitHub warning annotations for permission drift findings', async assert.doesNotMatch(stdout, /::error/); }); +test('CLI --fail-on exits 1 when rating meets the threshold (and 0 below it)', async () => { + // Threshold logic used to live only in action.yml, so local/other-CI + // users had to grep the JSON report. The CLI now mirrors the Action. + const oldDir = join(testDir, 'fixtures', 'combined', 'old'); + const newDir = join(testDir, 'fixtures', 'combined', 'new'); + + // Below threshold: rating "critical" with --fail-on critical+1 doesn't + // exist, so test the "above threshold" case at high. + let aboveStatus = 0; + try { + await execFileAsync( + process.execPath, + ['dist/index.js', 'diff', '--old', oldDir, '--new', newDir, '--format', 'json', '--fail-on', 'high'], + { cwd: packageRoot } + ); + } catch (error) { + aboveStatus = error.code ?? 0; + } + assert.equal(aboveStatus, 1, 'rating critical >= threshold high should exit 1'); + + // Below: same diff, but --fail-on none should still exit 0. + const { stdout: belowStdout } = await execFileAsync( + process.execPath, + ['dist/index.js', 'diff', '--old', oldDir, '--new', newDir, '--format', 'json', '--fail-on', 'none'], + { cwd: packageRoot } + ); + assert.equal(JSON.parse(belowStdout).rating, 'critical'); +}); + +test('CLI --fail-on rejects unknown values with exit 2', async () => { + const oldDir = join(testDir, 'fixtures', 'combined', 'old'); + const newDir = join(testDir, 'fixtures', 'combined', 'new'); + + let status = 0; + let stderr = ''; + try { + await execFileAsync( + process.execPath, + ['dist/index.js', 'diff', '--old', oldDir, '--new', newDir, '--fail-on', 'severe'], + { cwd: packageRoot } + ); + } catch (error) { + status = error.code ?? 0; + stderr = error.stderr ?? ''; + } + assert.equal(status, 2); + assert.match(stderr, /Invalid --fail-on value/); +}); + test('CLI renders markdown and JSON to files alongside stdout annotations in a single scan', async () => { // The GitHub Action used to invoke the CLI three times (one per // format), repeating snapshot materialization and detector work on diff --git a/test/codex-config-drift.test.mjs b/test/codex-config-drift.test.mjs index c03b186..2ea3ec2 100644 --- a/test/codex-config-drift.test.mjs +++ b/test/codex-config-drift.test.mjs @@ -14,8 +14,7 @@ test('codex_config_syntax_error: malformed TOML surfaces a finding instead of re const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); const { tmpdir } = await import('node:os'); - const root = mkdtempSync(join(testDir, '..', 'node_modules', '.scopetrail-codex-malformed-') - .replaceAll('\\', '/')); + const root = mkdtempSync(join(tmpdir(), 'scopetrail-codex-malformed-')); try { const oldDir = join(root, 'old'); const newDir = join(root, 'new'); @@ -47,8 +46,7 @@ test('codex_project_trusted: each [projects.] is tracked independently', a const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); const { tmpdir } = await import('node:os'); - const root = mkdtempSync(join(testDir, '..', 'node_modules', '.scopetrail-codex-projects-') - .replaceAll('\\', '/')); + const root = mkdtempSync(join(tmpdir(), 'scopetrail-codex-projects-')); try { const oldDir = join(root, 'old'); const newDir = join(root, 'new'); @@ -80,6 +78,54 @@ test('codex_project_trusted: each [projects.] is tracked independently', a } }); +test('inline-table sandbox/network keys are detected (parsed TOML, not regex)', async () => { + // Pre-fix gap: parseTomlEntries used a line-regex that bailed on + // values starting with `{`, so `sandbox_workspace_write = { network_access = true }` + // and `windows = { sandbox = "danger-full-access" }` returned + // rating: "none" / findingCount: 0 even though they're valid TOML + // that widens the Codex sandbox or enables network access. + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { tmpdir } = await import('node:os'); + + const root = mkdtempSync(join(tmpdir(), 'scopetrail-codex-inline-')); + try { + const oldDir = join(root, 'old'); + const newDir = join(root, 'new'); + mkdirSync(join(oldDir, '.codex'), { recursive: true }); + mkdirSync(join(newDir, '.codex'), { recursive: true }); + writeFileSync( + join(oldDir, '.codex', 'config.toml'), + 'sandbox_mode = "workspace-write"\napproval_policy = "on-request"\n' + ); + writeFileSync( + join(newDir, '.codex', 'config.toml'), + 'sandbox_workspace_write = { network_access = true }\n' + + 'windows = { sandbox = "danger-full-access" }\n' + + 'approval_policy = "never"\n' + ); + + const findings = await detectCodexConfigDrift(oldDir, newDir); + const byKind = (kind) => findings.filter((f) => f.kind === kind); + + const sandboxFindings = byKind('scope_trail.codex_sandbox_widened'); + assert.ok( + sandboxFindings.some((f) => f.subject === 'windows.sandbox'), + 'expected windows.sandbox inline-table widening to be detected' + ); + + const networkFindings = byKind('scope_trail.codex_network_enabled'); + assert.ok( + networkFindings.some((f) => f.subject === 'sandbox_workspace_write.network_access'), + 'expected inline-table sandbox_workspace_write.network_access to be detected' + ); + + const approvalFindings = byKind('scope_trail.codex_approval_weakened'); + assert.equal(approvalFindings.length, 1, 'approval_policy regression check'); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + test('detects Codex config permission drift', async () => { const oldDir = join(testDir, 'fixtures', 'codex-config-drift', 'old'); const newDir = join(testDir, 'fixtures', 'codex-config-drift', 'new'); diff --git a/test/git-diff.test.mjs b/test/git-diff.test.mjs index 7065948..860359b 100644 --- a/test/git-diff.test.mjs +++ b/test/git-diff.test.mjs @@ -174,6 +174,39 @@ test('CLI git diff snapshots platform-suffixed MCP example paths', async () => { } }); +test('CLI surfaces a friendly error when a git ref cannot be resolved', async () => { + // Pre-fix gap: rev-parse failures escaped as a raw Node child_process + // stack trace. The most common cause in CI is a shallow checkout that + // doesn't include the PR base ref, so the message now mentions + // fetch-depth: 0 explicitly. + const fx = await makeGitRepo({ + prefix: 'scopetrail-git-bad-ref-', + initialFiles: { 'README.md': 'base\n' }, + initialMessage: 'base', + }); + try { + let stderr = ''; + let exitCode = 0; + try { + await execFileAsync( + process.execPath, + ['dist/index.js', 'diff', '--repo', fx.repo, '--base', 'does-not-exist', '--head', 'HEAD', '--format', 'json'], + { cwd: packageRoot } + ); + } catch (error) { + stderr = error.stderr ?? ''; + exitCode = error.code ?? 0; + } + + assert.equal(exitCode, 2, 'expected exit code 2 for unresolvable ref'); + assert.match(stderr, /does-not-exist/, 'error should name the ref'); + assert.match(stderr, /fetch-depth: 0/, 'error should hint at fetch-depth: 0'); + assert.doesNotMatch(stderr, /\bat \w+ \(/, 'error should not leak a Node stack trace'); + } finally { + await fx.cleanup(); + } +}); + test('CLI git diff snapshots prefixed MCP config example paths', async () => { const fx = await makeGitRepo({ prefix: 'scopetrail-git-prefixed-sample-', diff --git a/test/mcp-drift.test.mjs b/test/mcp-drift.test.mjs index 9ba807b..c65b0b2 100644 --- a/test/mcp-drift.test.mjs +++ b/test/mcp-drift.test.mjs @@ -247,6 +247,51 @@ test('isUnpinnedCommand flags npm exec / yarn dlx / pnpm dlx packages without ex }); +test('isUnpinnedCommand flags semver-range package specs (^, ~, >=, *)', async () => { + // Pre-fix gap: looksLikePackageName's char class accepted only + // [a-z0-9._/@-], so `@vendor/helper@^1.2.3`, `~1.2.3`, and the + // less-common `mcp-server>=1.2.3` form fell out of the package-shape + // check entirely — producing medium command-change findings instead + // of high unpinned findings. + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { tmpdir } = await import('node:os'); + + const root = mkdtempSync(join(tmpdir(), 'scopetrail-ranges-')); + try { + const oldDir = join(root, 'old'); + const newDir = join(root, 'new'); + mkdirSync(oldDir, { recursive: true }); + mkdirSync(newDir, { recursive: true }); + writeFileSync(join(oldDir, '.mcp.json'), JSON.stringify({ mcpServers: {} })); + writeFileSync( + join(newDir, '.mcp.json'), + JSON.stringify({ + mcpServers: { + 'caret-range': { command: 'npx', args: ['-y', '@vendor/helper@^1.2.3'] }, + 'tilde-range': { command: 'npx', args: ['-y', '@vendor/helper@~1.2.3'] }, + 'gte-range': { command: 'npx', args: ['-y', '@vendor/helper@>=1.2.3'] }, + 'star-range': { command: 'npx', args: ['-y', '@vendor/helper@*'] }, + 'bare-name': { command: 'npx', args: ['-y', '@vendor/helper'] }, + 'compare-form':{ command: 'npx', args: ['mcp-server>=1.2.3'] }, + 'exact-pin': { command: 'npx', args: ['-y', '@vendor/helper@1.2.3'] } + } + }) + ); + + const findings = await detectMcpDrift(oldDir, newDir); + const unpinned = findings.filter((f) => f.kind === 'scope_trail.unpinned_mcp_command'); + const subjects = new Set(unpinned.map((f) => f.subject)); + + for (const name of ['caret-range', 'tilde-range', 'gte-range', 'star-range', 'bare-name', 'compare-form']) { + assert.ok(subjects.has(name), `expected unpinned finding for ${name}`); + } + assert.equal(subjects.has('exact-pin'), false, 'exact pin should not be flagged unpinned'); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + + test('detects added MCP server with unpinned command', async () => { const oldDir = join(testDir, 'fixtures', 'mcp-drift', 'old'); const newDir = join(testDir, 'fixtures', 'mcp-drift', 'new'); diff --git a/test/package-surface.test.mjs b/test/package-surface.test.mjs index 85b5363..927a753 100644 --- a/test/package-surface.test.mjs +++ b/test/package-surface.test.mjs @@ -70,3 +70,19 @@ test('npm publish surface only ships runtime files', async () => { ); } }); + +test('git tree does not carry live risky agent configs at the repo root', async () => { + // The demo PR (#3) for this project intentionally added .mcp.json, + // .claude/settings.json, and .codex/config.toml at the repo root. + // It was merged into main and shipped on v0.1.6+ as tracked files — + // meaning anyone running Claude Code, Codex, or a permission scanner + // against this checkout loaded a live `stripe-admin` MCP server and + // broad `Bash(npm *)` / `Read(~/**)` Claude allow rules. The demo is + // archived on PR #3; the live files have been untracked and the + // .gitignore now keeps them from coming back. + const { stdout } = await exec('git', ['ls-files', '-z', '--full-name', '--', '.mcp.json', '.claude', '.codex'], { + cwd: packageRoot + }); + const tracked = stdout.split('\0').filter(Boolean); + assert.deepEqual(tracked, [], `unexpected tracked demo configs at repo root: ${tracked.join(', ')}`); +});