diff --git a/.gitattributes b/.gitattributes index 176a458..7b3c517 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1 +1,18 @@ * text=auto + +# Pin source and generated text files to LF so Windows checkouts under +# core.autocrlf=true don't oscillate between LF and CRLF. +*.js text eol=lf +*.mjs text eol=lf +*.cjs text eol=lf +*.ts text eol=lf +*.json text eol=lf +*.jsonc text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.toml text eol=lf +*.md text eol=lf +*.sh text eol=lf +.gitattributes text eol=lf +.gitignore text eol=lf +LICENSE text eol=lf diff --git a/.github/ISSUE_TEMPLATE/team-adoption.yml b/.github/ISSUE_TEMPLATE/team-adoption.yml new file mode 100644 index 0000000..95092ec --- /dev/null +++ b/.github/ISSUE_TEMPLATE/team-adoption.yml @@ -0,0 +1,48 @@ +name: Team adoption signal +description: Share rollout pain when ScopeTrail's gap is about ownership, reporting, exceptions, or many repositories. +title: "[team adoption]: " +labels: ["adoption", "team-signal"] +body: + - type: textarea + id: pain + attributes: + label: Rollout pain + description: Describe the team-level gap. ScopeTrail is intentionally small at v0, so signal beats prescription here. + validations: + required: true + - type: dropdown + id: theme + attributes: + label: Primary theme + description: Which adoption gap does this fall under? + options: + - Ownership (who triages findings?) + - Reporting (rollup across repos / over time) + - Exceptions (allowlists, suppressions, justified drift) + - Many repositories (scale of rollout) + - Other + validations: + required: true + - type: dropdown + id: scope + attributes: + label: Affected scope + description: Help prioritize whether this matters for one repo or repeated team workflows. + options: + - One repository + - Multiple repositories + - Organization-wide pattern + - Not sure yet + validations: + required: true + - type: input + id: repository-count + attributes: + label: Approximate repository count + description: Optional estimate if this affects more than one repository. + placeholder: "Example: 1, 5, 20+" + - type: textarea + id: current-workaround + attributes: + label: Current workaround + description: How is the team handling this today? (Manual review, spreadsheet, a different tool, nothing yet, etc.) diff --git a/.gitignore b/.gitignore index 5f1c5c9..c12810d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,4 +1,6 @@ node_modules/ -# Local codex working state — not for tracking -.codex/ +# Local codex working state — not for tracking. The detector fixtures +# under test/fixtures/**/.codex/ are deliberately included; only the +# top-level dogfood .codex/ is ignored. +/.codex/ diff --git a/README.md b/README.md index 45810db..2d579f7 100644 --- a/README.md +++ b/README.md @@ -129,6 +129,7 @@ ScopeTrail v0 detects: - Removed Claude Code deny rules for sensitive files such as `.env`. - Claude Code hook changes: **removed**, **added**, and **command-changed** (a strict `PreToolUse` swapped for a no-op script is the same risk as a removal — both are now caught). - Codex config drift such as full-access/elevated sandboxes, weakened approval policy, enabled network access, or trusted project settings. +- Codex `[mcp_servers.NAME]` additions, launch-command changes, and unpinned commands (the same risk model as `.mcp.json` MCP detection, applied to TOML). The git-mode snapshot list is derived from the detectors themselves, so adding a new surface in one place can never leave the GitHub Action silently blind to it. A regression test fails the build if a detector's target paths aren't covered. diff --git a/action.yml b/action.yml index e599806..611bd62 100644 --- a/action.yml +++ b/action.yml @@ -65,9 +65,20 @@ runs: report_file="${RUNNER_TEMP:-.}/scopetrail-report.md" json_file="${RUNNER_TEMP:-.}/scopetrail-report.json" - node "$GITHUB_ACTION_PATH/dist/index.js" diff --repo "$repo" --base "$base" --head "$head" --format markdown | tee "$report_file" - node "$GITHUB_ACTION_PATH/dist/index.js" diff --repo "$repo" --base "$base" --head "$head" --format json > "$json_file" - node "$GITHUB_ACTION_PATH/dist/index.js" diff --repo "$repo" --base "$base" --head "$head" --format github + # Single scan: stdout streams GitHub annotations so the runner + # picks up ::warning lines, while --out-markdown and --out-json + # capture the other two renderings from the same run. Previously + # this ran the CLI three times (markdown/json/github), repeating + # both git snapshot materialization and full detector work. + node "$GITHUB_ACTION_PATH/dist/index.js" diff \ + --repo "$repo" --base "$base" --head "$head" \ + --format github \ + --out-markdown "$report_file" \ + --out-json "$json_file" + + # Surface the markdown report in the Action log for parity + # with the prior `tee` of `--format markdown`. + cat "$report_file" if [ -n "${GITHUB_STEP_SUMMARY:-}" ]; then cat "$report_file" >> "$GITHUB_STEP_SUMMARY" diff --git a/dist/detectors/claude-settings.js b/dist/detectors/claude-settings.js index b0b5bdd..4e3a872 100644 --- a/dist/detectors/claude-settings.js +++ b/dist/detectors/claude-settings.js @@ -42,18 +42,22 @@ export async function detectClaudeSettingsDrift(oldRoot, newRoot) { }); continue; } - // Swapping a strict guard for a no-op script is just as material as - // removing the hook outright — and the previous detector missed it. + // Any drift in the command set is material — swapping a strict + // guard for a no-op, dropping one guard out of a multi-guard hook, + // or appending a no-op alongside the strict guard. The previous + // check required `newCommands.size === oldCommands.size`, which + // missed adds and drops that changed the count. const newCommands = newSettings.hookCommands.get(hookName) ?? new Set(); - const changed = [...newCommands].filter((command) => !oldCommands.has(command)); - if (changed.length > 0 && newCommands.size === oldCommands.size) { + const added = [...newCommands].filter((command) => !oldCommands.has(command)); + const removed = [...oldCommands].filter((command) => !newCommands.has(command)); + if (added.length > 0 || removed.length > 0) { findings.push({ kind: 'scope_trail.hook_command_changed', severity: isHighImpactHook(hookName) ? 'high' : 'medium', file: CLAUDE_SETTINGS_FILE, subject: hookName, - message: `Claude hook "${hookName}" command(s) changed: ${changed.join(', ')}.`, - recommendation: 'Review the new command — a weakened guard (e.g., a no-op script) is the same risk as a removed hook.' + message: hookCommandChangeMessage(hookName, added, removed), + recommendation: 'Review the change — a removed guard, a no-op appended next to a strict one, or any rewrite of a hook command can all weaken policy.' }); } } @@ -140,15 +144,23 @@ function hookHasEntries(value) { // its grants properly. Bare tokens and explicit wildcards are still broad. export function isBroadAllow(permission) { const normalized = permission.toLowerCase(); - if (/\bbash\([^)]*\*[^)]*\)/.test(normalized)) { + // Bare verb (no parentheses) or wildcard-scoped verb for any of the + // dangerous operations. This catches `"Bash"`, `"Read"`, `"Write"`, + // `"Edit"`, `"WebFetch"`, etc. — bare tokens that grant unlimited + // access. Previously only WebFetch/WebSearch/Task went through this + // check, which silently let `"Bash"` and bare `"Read"`/`"Write"`/ + // `"Edit"` slip past unflagged. + if (isBroadVerbGrant(normalized, ['bash', 'read', 'write', 'edit', 'webfetch', 'websearch', 'task'])) { return true; } + // Scoped grants whose target is a rooted path (absolute, home-rel, + // or Windows drive). `Read(/etc/passwd)` doesn't include `*` but is + // still broader than a workspace-relative path. Stays separate from + // the verb-grant check because that path-shape rule only applies to + // file-access verbs, not network/task verbs. if (/\b(read|write|edit)\((~|[a-z]:\\|\/|\*\*)/.test(normalized)) { return true; } - if (isBroadVerbGrant(normalized, ['webfetch', 'websearch', 'task'])) { - return true; - } if (isBroadMcpGrant(normalized)) { return true; } @@ -208,3 +220,13 @@ function severityForRemovedDeny(permission) { function isHighImpactHook(hookName) { return ['pretooluse', 'posttooluse', 'permissionrequest', 'sessionend'].includes(hookName.toLowerCase()); } +function hookCommandChangeMessage(hookName, added, removed) { + const parts = []; + if (added.length > 0) { + parts.push(`added: ${added.join(', ')}`); + } + if (removed.length > 0) { + parts.push(`removed: ${removed.join(', ')}`); + } + return `Claude hook "${hookName}" command(s) changed (${parts.join('; ')}).`; +} diff --git a/dist/detectors/codex-config.js b/dist/detectors/codex-config.js index 19bd0b1..925e939 100644 --- a/dist/detectors/codex-config.js +++ b/dist/detectors/codex-config.js @@ -1,5 +1,7 @@ import { readFile } from 'node:fs/promises'; +import { lineOfTomlKey, parseToml } from 'agent-gov-core'; import { configPath } from '../discovery.js'; +import { isUnpinnedCommand, serverCommand } from '../mcp-risk.js'; export const CODEX_CONFIG_FILE = '.codex/config.toml'; export const CODEX_TARGET_PATHS = [CODEX_CONFIG_FILE]; export async function detectCodexConfigDrift(oldRoot, newRoot) { @@ -62,8 +64,114 @@ export async function detectCodexConfigDrift(oldRoot, newRoot) { recommendation: 'Only mark projects trusted when repository instructions, hooks, and tool permissions are reviewed.' }); } + for (const finding of await detectCodexMcpDrift(oldRoot, newRoot)) { + findings.push(finding); + } + return findings; +} +// Codex `.codex/config.toml` carries the same `[mcp_servers.NAME]` +// shape that ScopeTrail already flags in `.mcp.json` — without this +// detector, a Codex user can add `[mcp_servers.stripe-admin]` with +// `args = ["-y", "@vendor/stripe-mcp@latest"]` and the unpinned MCP +// risk model never sees it. +async function detectCodexMcpDrift(oldRoot, newRoot) { + const findings = []; + const oldServers = await readCodexMcpServers(oldRoot); + const newServers = await readCodexMcpServers(newRoot); + for (const [name, newServer] of newServers) { + const oldServer = oldServers.get(name); + const commandChanged = oldServer && serverCommand(newServer) !== serverCommand(oldServer); + if (!oldServer) { + findings.push({ + kind: 'scope_trail.codex_mcp_server_added', + severity: 'high', + file: CODEX_CONFIG_FILE, + line: lineForServer(newServer), + subject: name, + message: `Codex MCP server "${name}" was added.`, + recommendation: 'Review the server package, pin its version, and confirm the tools it exposes before merging.' + }); + } + else if (commandChanged) { + findings.push({ + kind: 'scope_trail.codex_mcp_server_command_changed', + severity: 'medium', + file: CODEX_CONFIG_FILE, + line: lineForServer(newServer), + subject: name, + message: `Codex MCP server "${name}" changed its launch command.`, + recommendation: 'Confirm the command change is intentional and still points at a trusted, pinned package.' + }); + } + if ((!oldServer || commandChanged) && isUnpinnedCommand(newServer)) { + findings.push({ + kind: 'scope_trail.codex_unpinned_mcp_command', + severity: 'high', + file: CODEX_CONFIG_FILE, + line: lineForServer(newServer), + subject: name, + message: `Codex MCP server "${name}" uses an unpinned command: ${serverCommand(newServer)}.`, + recommendation: 'Pin executable packages to an exact version and avoid pipe-to-shell installation commands.' + }); + } + } return findings; } +async function readCodexMcpServers(root) { + const path = configPath(root, CODEX_CONFIG_FILE); + let text = ''; + try { + text = await readFile(path, 'utf8'); + } + catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return new Map(); + } + throw error; + } + let parsed; + try { + parsed = parseToml(text); + } + catch { + return new Map(); + } + const rawServers = parsed.mcp_servers; + if (!isPlainObject(rawServers)) { + return new Map(); + } + const servers = new Map(); + for (const [name, entry] of Object.entries(rawServers)) { + if (!isPlainObject(entry)) { + continue; + } + servers.set(name, { + name, + text, + command: typeof entry.command === 'string' ? entry.command : undefined, + args: Array.isArray(entry.args) + ? entry.args.filter((arg) => typeof arg === 'string') + : undefined, + url: typeof entry.url === 'string' ? entry.url : undefined + }); + } + return servers; +} +function lineForServer(server) { + // Point at the leaf the reviewer most needs to see — `command` + // first, then any of the args/url keys. Fall back to file-level + // when nothing matches so the finding still surfaces. + for (const leaf of ['command', 'args', 'url']) { + const line = lineOfTomlKey(server.text, `mcp_servers.${server.name}.${leaf}`); + if (line) { + return line; + } + } + return undefined; +} +function isPlainObject(value) { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} async function readCodexConfig(root) { let text = ''; try { diff --git a/dist/detectors/mcp.js b/dist/detectors/mcp.js index e42fc36..9823048 100644 --- a/dist/detectors/mcp.js +++ b/dist/detectors/mcp.js @@ -1,5 +1,6 @@ import { readdir } from 'node:fs/promises'; import { configPath, isRecord, lineOfJsonKey, lineOfJsonStringValue, readJsonObjectWithSource } from '../discovery.js'; +import { isPipeToShellCommand, isUnpinnedCommand, serverCommand } from '../mcp-risk.js'; const MCP_CONFIGS = [ { path: '.mcp.json', serverKeys: ['mcpServers'] }, { path: '.cursor/mcp.json', serverKeys: ['mcpServers', 'servers'] }, @@ -130,14 +131,24 @@ export async function detectMcpDrift(oldRoot, newRoot) { } const endpoint = remoteEndpoint(newServer); if ((!oldServer || changed) && endpoint) { + const unencrypted = isUnencryptedEndpoint(endpoint); findings.push({ kind: 'scope_trail.mcp_sample_remote_endpoint', - severity: 'medium', + // An `http://` endpoint in a sample config is worse than an + // `https://` one: anyone who copies the sample inherits a + // MitM-vulnerable connection. Bump to high; https stays at + // medium because the copy-and-paste risk is "is this the + // right endpoint?" not "is this transport safe?". + severity: unencrypted ? 'high' : 'medium', file: path, line: lineForRemoteEndpoint(newServer) ?? newServer.line, subject: name, - message: `Sample/disabled MCP server "${name}" points at remote endpoint: ${endpoint}.`, - recommendation: 'Confirm the endpoint is intended for copied sample configs and does not expose unexpected data or tools.' + message: unencrypted + ? `Sample/disabled MCP server "${name}" points at an unencrypted remote endpoint: ${endpoint}.` + : `Sample/disabled MCP server "${name}" points at remote endpoint: ${endpoint}.`, + recommendation: unencrypted + ? 'Use https:// for sample remote MCP endpoints — copy-pasted samples should not silently downgrade users to unencrypted transport.' + : 'Confirm the endpoint is intended for copied sample configs and does not expose unexpected data or tools.' }); } } @@ -211,18 +222,22 @@ async function collectMcpSampleConfigPaths(root, relativeDir, paths) { } throw error; } - for (const entry of entries) { + // Walk subdirectories in parallel. Each `readdir` is independent + // and `paths` is a Set mutated from the same event loop, so add + // operations are race-free in single-threaded Node. The caller + // already sorts the result, so insertion order doesn't matter. + await Promise.all(entries.map(async (entry) => { const relativePath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name; if (entry.isDirectory()) { if (!IGNORED_SAMPLE_SCAN_DIRS.has(entry.name)) { await collectMcpSampleConfigPaths(root, relativePath, paths); } - continue; + return; } if (entry.isFile() && isMcpSampleConfigPath(relativePath)) { paths.add(relativePath); } - } + })); } function readServerMap(json, serverKeys) { for (const key of serverKeys) { @@ -232,36 +247,9 @@ function readServerMap(json, serverKeys) { } return undefined; } -function serverCommand(server) { - return [server.command, ...(server.args ?? []), server.url, server.serverUrl].filter(Boolean).join(' '); -} -function isUnpinnedCommand(server) { - const command = serverCommand(server); - const normalized = command.toLowerCase(); - if (normalized.includes('@latest')) { - return true; - } - if (/https:\/\/github\.com\/[^ ]+/.test(normalized)) { - return true; - } - if (/\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized)) { - return true; - } - if (/\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized)) { - return true; - } - const packageLikeArgs = server.args ?? []; - return ['npx', 'uvx', 'pipx'].includes((server.command ?? '').toLowerCase()) - && packageLikeArgs.some((arg) => looksLikePackageName(arg) && !hasExactVersion(arg)); -} function severityForSampleCommandRisk(server) { return isPipeToShellCommand(server) ? 'high' : 'medium'; } -function isPipeToShellCommand(server) { - const normalized = serverCommand(server).toLowerCase(); - return /\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized) - || /\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized); -} function looksLikePackageName(value) { return /^[a-z0-9@][a-z0-9._/@-]+$/i.test(value) && !value.startsWith('-'); } @@ -311,6 +299,14 @@ function isRemoteEndpoint(value) { return false; } } +function isUnencryptedEndpoint(value) { + try { + return new URL(value).protocol === 'http:'; + } + catch { + return false; + } +} function firstLineForValues(server, values, predicate = () => true) { const source = getSourceText(server); for (const value of values) { diff --git a/dist/index.js b/dist/index.js index 87324e2..e953c0c 100644 --- a/dist/index.js +++ b/dist/index.js @@ -1,4 +1,5 @@ #!/usr/bin/env node +import { writeFile } from 'node:fs/promises'; import { fileURLToPath } from 'node:url'; import { detectClaudeSettingsDrift } from './detectors/claude-settings.js'; import { detectCodexConfigDrift } from './detectors/codex-config.js'; @@ -7,7 +8,7 @@ import { materializeGitSnapshot } from './git-snapshot.js'; import { createReport, renderReport } from './report.js'; export async function main(argv = process.argv.slice(2)) { if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) { - process.stdout.write('Usage: scopetrail diff --old --new [--format text|markdown|json]\n'); + process.stdout.write('Usage: scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]\n'); return 0; } if (argv[0] === 'diff') { @@ -22,28 +23,44 @@ async function runDiff(argv) { process.stderr.write(`${parsed.error}\n${usage()}\n`); return 2; } + let oldRoot; + let newRoot; + let cleanup; if (parsed.mode === 'directories') { - const findings = [ - ...(await detectMcpDrift(parsed.oldRoot, parsed.newRoot)), - ...(await detectClaudeSettingsDrift(parsed.oldRoot, parsed.newRoot)), - ...(await detectCodexConfigDrift(parsed.oldRoot, parsed.newRoot)) - ]; - process.stdout.write(renderReport(createReport(findings), parsed.format)); - return 0; + oldRoot = parsed.oldRoot; + newRoot = parsed.newRoot; + } + else { + const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); + const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); + oldRoot = baseSnapshot.root; + newRoot = headSnapshot.root; + cleanup = async () => { + await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); + }; } - const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); - const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); try { + // Run all detectors once and render the resulting report into + // every requested output. Previously the GitHub Action invoked + // the CLI three times for markdown/json/github, which repeated + // git snapshot materialization and detector work on each call. const findings = [ - ...(await detectMcpDrift(baseSnapshot.root, headSnapshot.root)), - ...(await detectClaudeSettingsDrift(baseSnapshot.root, headSnapshot.root)), - ...(await detectCodexConfigDrift(baseSnapshot.root, headSnapshot.root)) + ...(await detectMcpDrift(oldRoot, newRoot)), + ...(await detectClaudeSettingsDrift(oldRoot, newRoot)), + ...(await detectCodexConfigDrift(oldRoot, newRoot)) ]; - process.stdout.write(renderReport(createReport(findings), parsed.format)); + const report = createReport(findings); + if (parsed.outMarkdown) { + await writeFile(parsed.outMarkdown, renderReport(report, 'markdown')); + } + if (parsed.outJson) { + await writeFile(parsed.outJson, renderReport(report, 'json')); + } + process.stdout.write(renderReport(report, parsed.format)); return 0; } finally { - await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); + await cleanup?.(); } } function parseDiffArgs(argv) { @@ -53,6 +70,8 @@ function parseDiffArgs(argv) { let head; let repo = process.cwd(); let format = 'text'; + let outMarkdown; + let outJson; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; const value = argv[index + 1]; @@ -83,6 +102,20 @@ function parseDiffArgs(argv) { format = value; index += 1; } + else if (arg === '--out-markdown') { + if (!value) { + return { ok: false, error: 'Missing path for --out-markdown.' }; + } + outMarkdown = value; + index += 1; + } + else if (arg === '--out-json') { + if (!value) { + return { ok: false, error: 'Missing path for --out-json.' }; + } + outJson = value; + index += 1; + } else { return { ok: false, error: `Unknown argument: ${arg}` }; } @@ -99,7 +132,7 @@ function parseDiffArgs(argv) { if (!head) { return { ok: false, error: 'Missing required --head argument.' }; } - return { ok: true, mode: 'git', repo, base, head, format }; + return { ok: true, mode: 'git', repo, base, head, format, outMarkdown, outJson }; } if (!oldRoot) { return { ok: false, error: 'Missing required --old argument or --base argument.' }; @@ -107,7 +140,7 @@ function parseDiffArgs(argv) { if (!newRoot) { return { ok: false, error: 'Missing required --new argument.' }; } - return { ok: true, mode: 'directories', oldRoot, newRoot, format }; + return { ok: true, mode: 'directories', oldRoot, newRoot, format, outMarkdown, outJson }; } function isReportFormat(value) { return value === 'text' || value === 'markdown' || value === 'json' || value === 'github'; @@ -119,7 +152,7 @@ if (invokedPath) { function usage() { return [ 'Usage:', - ' scopetrail diff --old --new [--format text|markdown|json|github]', - ' scopetrail diff --repo --base --head [--format text|markdown|json|github]' + ' scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]', + ' scopetrail diff --repo --base --head [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]' ].join('\n'); } diff --git a/dist/mcp-risk.js b/dist/mcp-risk.js new file mode 100644 index 0000000..56390da --- /dev/null +++ b/dist/mcp-risk.js @@ -0,0 +1,61 @@ +// Shared MCP launch-command risk model. Both .mcp.json (JSON) and +// .codex/config.toml (TOML) carry the same shape of risky command — +// @latest tags, github tarballs, curl-pipe-sh installers, unpinned +// npx/uvx/pipx packages. Keeping the heuristic in one module means +// the two detectors stay aligned as the risk model evolves. +export function serverCommand(spec) { + return [spec.command, ...(spec.args ?? []), spec.url, spec.serverUrl].filter(Boolean).join(' '); +} +export function isUnpinnedCommand(spec) { + const command = serverCommand(spec); + const normalized = command.toLowerCase(); + if (normalized.includes('@latest')) { + return true; + } + if (/https:\/\/github\.com\/[^ ]+/.test(normalized)) { + return true; + } + if (/\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized)) { + return true; + } + if (/\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized)) { + return true; + } + const packageLikeArgs = spec.args ?? []; + const cmd = (spec.command ?? '').toLowerCase(); + if (['npm', 'yarn', 'pnpm'].includes(cmd) && packageLikeArgs.length > 1) { + const sub = packageLikeArgs[0].toLowerCase(); + const isExecutor = (cmd === 'npm' && (sub === 'exec' || sub === 'x')) || + (cmd === 'yarn' && sub === 'dlx') || + (cmd === 'pnpm' && (sub === 'dlx' || sub === 'exec' || sub === 'x')); + if (isExecutor) { + const packageArgs = packageLikeArgs.slice(1).filter((arg) => !arg.startsWith('-')); + if (packageArgs.length > 0) { + const pkg = packageArgs[0]; + if (looksLikePackageName(pkg) && !hasExactVersion(pkg)) { + return true; + } + } + } + } + // `bunx` is Bun's npx equivalent and ships as its own binary, so it + // surfaces as `command: "bunx"` in MCP configs. + return ['npx', 'uvx', 'pipx', 'bunx'].includes(cmd) + && packageLikeArgs.some((arg) => looksLikePackageName(arg) && !hasExactVersion(arg)); +} +export function isPipeToShellCommand(spec) { + const normalized = serverCommand(spec).toLowerCase(); + return /\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized) + || /\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized); +} +function looksLikePackageName(value) { + return /^[a-z0-9@][a-z0-9._/@-]+$/i.test(value) && !value.startsWith('-'); +} +function hasExactVersion(value) { + const packageVersion = value.startsWith('@') ? value.indexOf('@', 1) : value.indexOf('@'); + if (packageVersion === -1) { + return false; + } + const version = value.slice(packageVersion + 1); + return /^\d+\.\d+\.\d+/.test(version); +} diff --git a/package-lock.json b/package-lock.json index f911f8a..7d5d528 100644 --- a/package-lock.json +++ b/package-lock.json @@ -9,7 +9,7 @@ "version": "0.1.11", "license": "MIT", "dependencies": { - "agent-gov-core": "^0.4.0" + "agent-gov-core": "^0.5.0" }, "bin": { "scopetrail": "dist/index.js" @@ -30,9 +30,9 @@ } }, "node_modules/agent-gov-core": { - "version": "0.4.3", - "resolved": "https://registry.npmjs.org/agent-gov-core/-/agent-gov-core-0.4.3.tgz", - "integrity": "sha512-vf8C+AahSlq+e7lHfgylKFH/wrIbemEKGPVU4kSCc7CoMqhoEw07dWvY4jqqsBW0M97hc7ki9g/QeGGIMA7NNQ==", + "version": "0.5.0", + "resolved": "https://registry.npmjs.org/agent-gov-core/-/agent-gov-core-0.5.0.tgz", + "integrity": "sha512-rOfbEQM6PrU+gquFe3zWw1FA7U/Aaql7laSkHdvmC345NXOXlTww4jwC5FE9orY82Bv2M8czc+ImoS38K2Z/+g==", "license": "MIT", "engines": { "node": ">=20" diff --git a/package.json b/package.json index 1f8693c..ae81be9 100644 --- a/package.json +++ b/package.json @@ -6,12 +6,20 @@ "bin": { "scopetrail": "./dist/index.js" }, + "files": [ + "dist", + "action.yml", + "docs", + "assets/demo-pr-annotations.png", + "README.md", + "LICENSE" + ], "scripts": { "build": "tsc -p tsconfig.json", "test": "node --test" }, "dependencies": { - "agent-gov-core": "^0.4.0" + "agent-gov-core": "^0.5.0" }, "devDependencies": { "@types/node": "^24.0.0", diff --git a/src/detectors/claude-settings.ts b/src/detectors/claude-settings.ts index b679eee..4bd11fa 100644 --- a/src/detectors/claude-settings.ts +++ b/src/detectors/claude-settings.ts @@ -49,18 +49,22 @@ export async function detectClaudeSettingsDrift(oldRoot: string, newRoot: string continue; } - // Swapping a strict guard for a no-op script is just as material as - // removing the hook outright — and the previous detector missed it. + // Any drift in the command set is material — swapping a strict + // guard for a no-op, dropping one guard out of a multi-guard hook, + // or appending a no-op alongside the strict guard. The previous + // check required `newCommands.size === oldCommands.size`, which + // missed adds and drops that changed the count. const newCommands = newSettings.hookCommands.get(hookName) ?? new Set(); - const changed = [...newCommands].filter((command) => !oldCommands.has(command)); - if (changed.length > 0 && newCommands.size === oldCommands.size) { + const added = [...newCommands].filter((command) => !oldCommands.has(command)); + const removed = [...oldCommands].filter((command) => !newCommands.has(command)); + if (added.length > 0 || removed.length > 0) { findings.push({ kind: 'scope_trail.hook_command_changed', severity: isHighImpactHook(hookName) ? 'high' : 'medium', file: CLAUDE_SETTINGS_FILE, subject: hookName, - message: `Claude hook "${hookName}" command(s) changed: ${changed.join(', ')}.`, - recommendation: 'Review the new command — a weakened guard (e.g., a no-op script) is the same risk as a removed hook.' + message: hookCommandChangeMessage(hookName, added, removed), + recommendation: 'Review the change — a removed guard, a no-op appended next to a strict one, or any rewrite of a hook command can all weaken policy.' }); } } @@ -169,15 +173,23 @@ function hookHasEntries(value: unknown): boolean { export function isBroadAllow(permission: string): boolean { const normalized = permission.toLowerCase(); - if (/\bbash\([^)]*\*[^)]*\)/.test(normalized)) { + // Bare verb (no parentheses) or wildcard-scoped verb for any of the + // dangerous operations. This catches `"Bash"`, `"Read"`, `"Write"`, + // `"Edit"`, `"WebFetch"`, etc. — bare tokens that grant unlimited + // access. Previously only WebFetch/WebSearch/Task went through this + // check, which silently let `"Bash"` and bare `"Read"`/`"Write"`/ + // `"Edit"` slip past unflagged. + if (isBroadVerbGrant(normalized, ['bash', 'read', 'write', 'edit', 'webfetch', 'websearch', 'task'])) { return true; } + // Scoped grants whose target is a rooted path (absolute, home-rel, + // or Windows drive). `Read(/etc/passwd)` doesn't include `*` but is + // still broader than a workspace-relative path. Stays separate from + // the verb-grant check because that path-shape rule only applies to + // file-access verbs, not network/task verbs. if (/\b(read|write|edit)\((~|[a-z]:\\|\/|\*\*)/.test(normalized)) { return true; } - if (isBroadVerbGrant(normalized, ['webfetch', 'websearch', 'task'])) { - return true; - } if (isBroadMcpGrant(normalized)) { return true; } @@ -247,3 +259,14 @@ function severityForRemovedDeny(permission: string): Severity { function isHighImpactHook(hookName: string): boolean { return ['pretooluse', 'posttooluse', 'permissionrequest', 'sessionend'].includes(hookName.toLowerCase()); } + +function hookCommandChangeMessage(hookName: string, added: string[], removed: string[]): string { + const parts: string[] = []; + if (added.length > 0) { + parts.push(`added: ${added.join(', ')}`); + } + if (removed.length > 0) { + parts.push(`removed: ${removed.join(', ')}`); + } + return `Claude hook "${hookName}" command(s) changed (${parts.join('; ')}).`; +} diff --git a/src/detectors/codex-config.ts b/src/detectors/codex-config.ts index f29fc10..282450b 100644 --- a/src/detectors/codex-config.ts +++ b/src/detectors/codex-config.ts @@ -1,5 +1,7 @@ import { readFile } from 'node:fs/promises'; +import { lineOfTomlKey, parseToml } from 'agent-gov-core'; import { configPath } from '../discovery.js'; +import { isUnpinnedCommand, serverCommand, type McpCommandShape } from '../mcp-risk.js'; import type { Finding } from '../types.js'; export const CODEX_CONFIG_FILE = '.codex/config.toml'; @@ -10,6 +12,11 @@ interface TomlEntry { value: string; } +interface CodexMcpServer extends McpCommandShape { + text: string; + name: string; +} + export async function detectCodexConfigDrift(oldRoot: string, newRoot: string): Promise { const oldConfig = await readCodexConfig(oldRoot); const newConfig = await readCodexConfig(newRoot); @@ -75,9 +82,126 @@ export async function detectCodexConfigDrift(oldRoot: string, newRoot: string): }); } + for (const finding of await detectCodexMcpDrift(oldRoot, newRoot)) { + findings.push(finding); + } + return findings; } +// Codex `.codex/config.toml` carries the same `[mcp_servers.NAME]` +// shape that ScopeTrail already flags in `.mcp.json` — without this +// detector, a Codex user can add `[mcp_servers.stripe-admin]` with +// `args = ["-y", "@vendor/stripe-mcp@latest"]` and the unpinned MCP +// risk model never sees it. +async function detectCodexMcpDrift(oldRoot: string, newRoot: string): Promise { + const findings: Finding[] = []; + const oldServers = await readCodexMcpServers(oldRoot); + const newServers = await readCodexMcpServers(newRoot); + + for (const [name, newServer] of newServers) { + const oldServer = oldServers.get(name); + const commandChanged = oldServer && serverCommand(newServer) !== serverCommand(oldServer); + + if (!oldServer) { + findings.push({ + kind: 'scope_trail.codex_mcp_server_added', + severity: 'high', + file: CODEX_CONFIG_FILE, + line: lineForServer(newServer), + subject: name, + message: `Codex MCP server "${name}" was added.`, + recommendation: 'Review the server package, pin its version, and confirm the tools it exposes before merging.' + }); + } else if (commandChanged) { + findings.push({ + kind: 'scope_trail.codex_mcp_server_command_changed', + severity: 'medium', + file: CODEX_CONFIG_FILE, + line: lineForServer(newServer), + subject: name, + message: `Codex MCP server "${name}" changed its launch command.`, + recommendation: 'Confirm the command change is intentional and still points at a trusted, pinned package.' + }); + } + + if ((!oldServer || commandChanged) && isUnpinnedCommand(newServer)) { + findings.push({ + kind: 'scope_trail.codex_unpinned_mcp_command', + severity: 'high', + file: CODEX_CONFIG_FILE, + line: lineForServer(newServer), + subject: name, + message: `Codex MCP server "${name}" uses an unpinned command: ${serverCommand(newServer)}.`, + recommendation: 'Pin executable packages to an exact version and avoid pipe-to-shell installation commands.' + }); + } + } + + return findings; +} + +async function readCodexMcpServers(root: string): Promise> { + const path = configPath(root, CODEX_CONFIG_FILE); + let text = ''; + try { + text = await readFile(path, 'utf8'); + } catch (error) { + if (isNodeError(error) && error.code === 'ENOENT') { + return new Map(); + } + throw error; + } + + let parsed: Record; + try { + parsed = parseToml(text); + } catch { + return new Map(); + } + + const rawServers = parsed.mcp_servers; + if (!isPlainObject(rawServers)) { + return new Map(); + } + + const servers = new Map(); + for (const [name, entry] of Object.entries(rawServers)) { + if (!isPlainObject(entry)) { + continue; + } + + servers.set(name, { + name, + text, + command: typeof entry.command === 'string' ? entry.command : undefined, + args: Array.isArray(entry.args) + ? entry.args.filter((arg): arg is string => typeof arg === 'string') + : undefined, + url: typeof entry.url === 'string' ? entry.url : undefined + }); + } + + return servers; +} + +function lineForServer(server: CodexMcpServer): number | undefined { + // Point at the leaf the reviewer most needs to see — `command` + // first, then any of the args/url keys. Fall back to file-level + // when nothing matches so the finding still surfaces. + for (const leaf of ['command', 'args', 'url']) { + const line = lineOfTomlKey(server.text, `mcp_servers.${server.name}.${leaf}`); + if (line) { + return line; + } + } + return undefined; +} + +function isPlainObject(value: unknown): value is Record { + return typeof value === 'object' && value !== null && !Array.isArray(value); +} + async function readCodexConfig(root: string): Promise> { let text = ''; try { diff --git a/src/detectors/mcp.ts b/src/detectors/mcp.ts index eec3831..018af2b 100644 --- a/src/detectors/mcp.ts +++ b/src/detectors/mcp.ts @@ -1,5 +1,6 @@ import { readdir } from 'node:fs/promises'; import { configPath, isRecord, lineOfJsonKey, lineOfJsonStringValue, readJsonObjectWithSource } from '../discovery.js'; +import { isPipeToShellCommand, isUnpinnedCommand, serverCommand } from '../mcp-risk.js'; import type { Finding, McpServerConfig, Severity } from '../types.js'; const MCP_CONFIGS = [ @@ -153,14 +154,24 @@ export async function detectMcpDrift(oldRoot: string, newRoot: string): Promise< const endpoint = remoteEndpoint(newServer); if ((!oldServer || changed) && endpoint) { + const unencrypted = isUnencryptedEndpoint(endpoint); findings.push({ kind: 'scope_trail.mcp_sample_remote_endpoint', - severity: 'medium', + // An `http://` endpoint in a sample config is worse than an + // `https://` one: anyone who copies the sample inherits a + // MitM-vulnerable connection. Bump to high; https stays at + // medium because the copy-and-paste risk is "is this the + // right endpoint?" not "is this transport safe?". + severity: unencrypted ? 'high' : 'medium', file: path, line: lineForRemoteEndpoint(newServer) ?? newServer.line, subject: name, - message: `Sample/disabled MCP server "${name}" points at remote endpoint: ${endpoint}.`, - recommendation: 'Confirm the endpoint is intended for copied sample configs and does not expose unexpected data or tools.' + message: unencrypted + ? `Sample/disabled MCP server "${name}" points at an unencrypted remote endpoint: ${endpoint}.` + : `Sample/disabled MCP server "${name}" points at remote endpoint: ${endpoint}.`, + recommendation: unencrypted + ? 'Use https:// for sample remote MCP endpoints — copy-pasted samples should not silently downgrade users to unencrypted transport.' + : 'Confirm the endpoint is intended for copied sample configs and does not expose unexpected data or tools.' }); } } @@ -251,19 +262,25 @@ async function collectMcpSampleConfigPaths(root: string, relativeDir: string, pa throw error; } - for (const entry of entries) { - const relativePath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name; - if (entry.isDirectory()) { - if (!IGNORED_SAMPLE_SCAN_DIRS.has(entry.name)) { - await collectMcpSampleConfigPaths(root, relativePath, paths); + // Walk subdirectories in parallel. Each `readdir` is independent + // and `paths` is a Set mutated from the same event loop, so add + // operations are race-free in single-threaded Node. The caller + // already sorts the result, so insertion order doesn't matter. + await Promise.all( + entries.map(async (entry) => { + const relativePath = relativeDir ? `${relativeDir}/${entry.name}` : entry.name; + if (entry.isDirectory()) { + if (!IGNORED_SAMPLE_SCAN_DIRS.has(entry.name)) { + await collectMcpSampleConfigPaths(root, relativePath, paths); + } + return; } - continue; - } - if (entry.isFile() && isMcpSampleConfigPath(relativePath)) { - paths.add(relativePath); - } - } + if (entry.isFile() && isMcpSampleConfigPath(relativePath)) { + paths.add(relativePath); + } + }) + ); } function readServerMap(json: Record, serverKeys: readonly string[]): unknown { @@ -276,45 +293,10 @@ function readServerMap(json: Record, serverKeys: readonly strin return undefined; } -function serverCommand(server: McpServerModel): string { - return [server.command, ...(server.args ?? []), server.url, server.serverUrl].filter(Boolean).join(' '); -} - -function isUnpinnedCommand(server: McpServerModel): boolean { - const command = serverCommand(server); - const normalized = command.toLowerCase(); - - if (normalized.includes('@latest')) { - return true; - } - - if (/https:\/\/github\.com\/[^ ]+/.test(normalized)) { - return true; - } - - if (/\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized)) { - return true; - } - - if (/\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized)) { - return true; - } - - const packageLikeArgs = server.args ?? []; - return ['npx', 'uvx', 'pipx'].includes((server.command ?? '').toLowerCase()) - && packageLikeArgs.some((arg) => looksLikePackageName(arg) && !hasExactVersion(arg)); -} - function severityForSampleCommandRisk(server: McpServerModel): Severity { return isPipeToShellCommand(server) ? 'high' : 'medium'; } -function isPipeToShellCommand(server: McpServerModel): boolean { - const normalized = serverCommand(server).toLowerCase(); - return /\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized) - || /\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized); -} - function looksLikePackageName(value: string): boolean { return /^[a-z0-9@][a-z0-9._/@-]+$/i.test(value) && !value.startsWith('-'); } @@ -380,6 +362,14 @@ function isRemoteEndpoint(value: string): boolean { } } +function isUnencryptedEndpoint(value: string): boolean { + try { + return new URL(value).protocol === 'http:'; + } catch { + return false; + } +} + function firstLineForValues( server: McpServerModel, values: Array, diff --git a/src/index.ts b/src/index.ts index 9688562..38c8620 100644 --- a/src/index.ts +++ b/src/index.ts @@ -1,5 +1,6 @@ #!/usr/bin/env node +import { writeFile } from 'node:fs/promises'; import { fileURLToPath } from 'node:url'; import { detectClaudeSettingsDrift } from './detectors/claude-settings.js'; import { detectCodexConfigDrift } from './detectors/codex-config.js'; @@ -9,7 +10,7 @@ import { createReport, renderReport, type ReportFormat } from './report.js'; export async function main(argv = process.argv.slice(2)): Promise { if (argv.length === 0 || argv.includes('--help') || argv.includes('-h')) { - process.stdout.write('Usage: scopetrail diff --old --new [--format text|markdown|json]\n'); + process.stdout.write('Usage: scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]\n'); return 0; } @@ -28,34 +29,57 @@ async function runDiff(argv: string[]): Promise { return 2; } + let oldRoot: string; + let newRoot: string; + let cleanup: (() => Promise) | undefined; + if (parsed.mode === 'directories') { - const findings = [ - ...(await detectMcpDrift(parsed.oldRoot, parsed.newRoot)), - ...(await detectClaudeSettingsDrift(parsed.oldRoot, parsed.newRoot)), - ...(await detectCodexConfigDrift(parsed.oldRoot, parsed.newRoot)) - ]; - process.stdout.write(renderReport(createReport(findings), parsed.format)); - return 0; + oldRoot = parsed.oldRoot; + newRoot = parsed.newRoot; + } else { + const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); + const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); + oldRoot = baseSnapshot.root; + newRoot = headSnapshot.root; + cleanup = async () => { + await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); + }; } - const baseSnapshot = await materializeGitSnapshot(parsed.repo, parsed.base); - const headSnapshot = await materializeGitSnapshot(parsed.repo, parsed.head); try { + // Run all detectors once and render the resulting report into + // every requested output. Previously the GitHub Action invoked + // the CLI three times for markdown/json/github, which repeated + // git snapshot materialization and detector work on each call. const findings = [ - ...(await detectMcpDrift(baseSnapshot.root, headSnapshot.root)), - ...(await detectClaudeSettingsDrift(baseSnapshot.root, headSnapshot.root)), - ...(await detectCodexConfigDrift(baseSnapshot.root, headSnapshot.root)) + ...(await detectMcpDrift(oldRoot, newRoot)), + ...(await detectClaudeSettingsDrift(oldRoot, newRoot)), + ...(await detectCodexConfigDrift(oldRoot, newRoot)) ]; - process.stdout.write(renderReport(createReport(findings), parsed.format)); + const report = createReport(findings); + + if (parsed.outMarkdown) { + await writeFile(parsed.outMarkdown, renderReport(report, 'markdown')); + } + if (parsed.outJson) { + await writeFile(parsed.outJson, renderReport(report, 'json')); + } + process.stdout.write(renderReport(report, parsed.format)); return 0; } finally { - await Promise.all([baseSnapshot.cleanup(), headSnapshot.cleanup()]); + await cleanup?.(); } } +interface CommonDiffArgs { + format: ReportFormat; + outMarkdown?: string; + outJson?: string; +} + type ParsedDiffArgs = - | { ok: true; mode: 'directories'; oldRoot: string; newRoot: string; format: ReportFormat } - | { ok: true; mode: 'git'; repo: string; base: string; head: string; format: ReportFormat } + | ({ ok: true; mode: 'directories'; oldRoot: string; newRoot: string } & CommonDiffArgs) + | ({ ok: true; mode: 'git'; repo: string; base: string; head: string } & CommonDiffArgs) | { ok: false; error: string }; function parseDiffArgs(argv: string[]): ParsedDiffArgs { @@ -65,6 +89,8 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { let head: string | undefined; let repo = process.cwd(); let format: ReportFormat = 'text'; + let outMarkdown: string | undefined; + let outJson: string | undefined; for (let index = 0; index < argv.length; index += 1) { const arg = argv[index]; @@ -91,6 +117,18 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { } format = value; index += 1; + } else if (arg === '--out-markdown') { + if (!value) { + return { ok: false, error: 'Missing path for --out-markdown.' }; + } + outMarkdown = value; + index += 1; + } else if (arg === '--out-json') { + if (!value) { + return { ok: false, error: 'Missing path for --out-json.' }; + } + outJson = value; + index += 1; } else { return { ok: false, error: `Unknown argument: ${arg}` }; } @@ -112,7 +150,7 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { return { ok: false, error: 'Missing required --head argument.' }; } - return { ok: true, mode: 'git', repo, base, head, format }; + return { ok: true, mode: 'git', repo, base, head, format, outMarkdown, outJson }; } if (!oldRoot) { @@ -123,7 +161,7 @@ function parseDiffArgs(argv: string[]): ParsedDiffArgs { return { ok: false, error: 'Missing required --new argument.' }; } - return { ok: true, mode: 'directories', oldRoot, newRoot, format }; + return { ok: true, mode: 'directories', oldRoot, newRoot, format, outMarkdown, outJson }; } function isReportFormat(value: string | undefined): value is ReportFormat { @@ -139,7 +177,7 @@ if (invokedPath) { function usage(): string { return [ 'Usage:', - ' scopetrail diff --old --new [--format text|markdown|json|github]', - ' scopetrail diff --repo --base --head [--format text|markdown|json|github]' + ' scopetrail diff --old --new [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]', + ' scopetrail diff --repo --base --head [--format text|markdown|json|github] [--out-markdown PATH] [--out-json PATH]' ].join('\n'); } diff --git a/src/mcp-risk.ts b/src/mcp-risk.ts new file mode 100644 index 0000000..7fc8265 --- /dev/null +++ b/src/mcp-risk.ts @@ -0,0 +1,81 @@ +// Shared MCP launch-command risk model. Both .mcp.json (JSON) and +// .codex/config.toml (TOML) carry the same shape of risky command — +// @latest tags, github tarballs, curl-pipe-sh installers, unpinned +// npx/uvx/pipx packages. Keeping the heuristic in one module means +// the two detectors stay aligned as the risk model evolves. + +export interface McpCommandShape { + command?: string; + args?: readonly string[]; + url?: string; + serverUrl?: string; +} + +export function serverCommand(spec: McpCommandShape): string { + return [spec.command, ...(spec.args ?? []), spec.url, spec.serverUrl].filter(Boolean).join(' '); +} + +export function isUnpinnedCommand(spec: McpCommandShape): boolean { + const command = serverCommand(spec); + const normalized = command.toLowerCase(); + + if (normalized.includes('@latest')) { + return true; + } + + if (/https:\/\/github\.com\/[^ ]+/.test(normalized)) { + return true; + } + + if (/\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized)) { + return true; + } + + if (/\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized)) { + return true; + } + + const packageLikeArgs = spec.args ?? []; + const cmd = (spec.command ?? '').toLowerCase(); + + if (['npm', 'yarn', 'pnpm'].includes(cmd) && packageLikeArgs.length > 1) { + const sub = packageLikeArgs[0].toLowerCase(); + const isExecutor = (cmd === 'npm' && (sub === 'exec' || sub === 'x')) || + (cmd === 'yarn' && sub === 'dlx') || + (cmd === 'pnpm' && (sub === 'dlx' || sub === 'exec' || sub === 'x')); + if (isExecutor) { + const packageArgs = packageLikeArgs.slice(1).filter((arg) => !arg.startsWith('-')); + if (packageArgs.length > 0) { + const pkg = packageArgs[0]; + if (looksLikePackageName(pkg) && !hasExactVersion(pkg)) { + return true; + } + } + } + } + + // `bunx` is Bun's npx equivalent and ships as its own binary, so it + // surfaces as `command: "bunx"` in MCP configs. + return ['npx', 'uvx', 'pipx', 'bunx'].includes(cmd) + && packageLikeArgs.some((arg) => looksLikePackageName(arg) && !hasExactVersion(arg)); +} + +export function isPipeToShellCommand(spec: McpCommandShape): boolean { + const normalized = serverCommand(spec).toLowerCase(); + return /\bcurl\b.+\|\s*(bash|sh)\b/.test(normalized) + || /\b(iwr|invoke-webrequest)\b.+\|\s*(iex|invoke-expression)\b/.test(normalized); +} + +function looksLikePackageName(value: string): boolean { + return /^[a-z0-9@][a-z0-9._/@-]+$/i.test(value) && !value.startsWith('-'); +} + +function hasExactVersion(value: string): boolean { + const packageVersion = value.startsWith('@') ? value.indexOf('@', 1) : value.indexOf('@'); + if (packageVersion === -1) { + return false; + } + + const version = value.slice(packageVersion + 1); + return /^\d+\.\d+\.\d+/.test(version); +} diff --git a/test/action-metadata.test.mjs b/test/action-metadata.test.mjs index 1fce07d..d69faad 100644 --- a/test/action-metadata.test.mjs +++ b/test/action-metadata.test.mjs @@ -20,15 +20,34 @@ test('GitHub Action metadata exposes PR drift inputs', async () => { assert.match(action, /^ fail-on:/m); assert.match(action, /^ finding-count:/m); assert.match(action, /GITHUB_STEP_SUMMARY/); - assert.match(action, /diff --repo/); + assert.match(action, /diff [\\\s]*--repo/); assert.match(action, /--format github/); + assert.match(action, /--out-markdown/); + assert.match(action, /--out-json/); +}); + +test('GitHub Action invokes the ScopeTrail CLI once per run', async () => { + // Before this change the action invoked `node $GITHUB_ACTION_PATH/dist/index.js` + // three separate times (markdown / json / github) and each call + // re-materialized both git snapshots and re-ran every detector. The + // single-scan refactor renders all three outputs from one run. + const action = await readFile(join(packageRoot, 'action.yml'), 'utf8'); + const invocations = action.match(/node "\$GITHUB_ACTION_PATH\/dist\/index\.js"/g) ?? []; + assert.equal( + invocations.length, + 1, + `expected exactly one CLI invocation in action.yml, found ${invocations.length}` + ); }); test('GitHub Action uses committed dist and a deps-only install (no build) in consumer workflows', async () => { const action = await readFile(join(packageRoot, 'action.yml'), 'utf8'); const gitignore = await readFile(join(packageRoot, '.gitignore'), 'utf8'); - assert.match(action, /node "\$GITHUB_ACTION_PATH\/dist\/index\.js" diff --repo/); + // Tolerant of line continuations (`\`) between `diff` and `--repo` + // so the single-scan refactor can split the invocation across + // lines for readability. + assert.match(action, /node "\$GITHUB_ACTION_PATH\/dist\/index\.js" diff[\\\s]+--repo/); // dist/ is committed so consumers don't run a TypeScript build at action time. assert.doesNotMatch(action, /npm run build/); assert.doesNotMatch(action, /tsc /); diff --git a/test/cli-output.test.mjs b/test/cli-output.test.mjs index 8ad53f1..280a215 100644 --- a/test/cli-output.test.mjs +++ b/test/cli-output.test.mjs @@ -1,6 +1,8 @@ import test from 'node:test'; import assert from 'node:assert/strict'; import { execFile } from 'node:child_process'; +import { mkdtemp, readFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; import { promisify } from 'node:util'; import { fileURLToPath } from 'node:url'; import { dirname, join } from 'node:path'; @@ -74,3 +76,48 @@ test('CLI emits GitHub warning annotations for permission drift findings', async assert.match(stdout, /Read\(.env\)/); assert.doesNotMatch(stdout, /::error/); }); + +test('CLI renders markdown and JSON to files alongside stdout annotations in a single scan', async () => { + // The GitHub Action used to invoke the CLI three times (one per + // format), repeating snapshot materialization and detector work on + // each call. `--out-markdown PATH` and `--out-json PATH` let the + // Action render every output it needs from one scan. + const oldDir = join(testDir, 'fixtures', 'combined', 'old'); + const newDir = join(testDir, 'fixtures', 'combined', 'new'); + const outDir = await mkdtemp(join(tmpdir(), 'scopetrail-out-')); + const mdPath = join(outDir, 'report.md'); + const jsonPath = join(outDir, 'report.json'); + + try { + const { stdout } = await execFileAsync( + process.execPath, + [ + 'dist/index.js', 'diff', + '--old', oldDir, '--new', newDir, + '--format', 'github', + '--out-markdown', mdPath, + '--out-json', jsonPath + ], + { cwd: packageRoot } + ); + + // stdout still carries the GitHub annotation format the Actions + // runner expects, identical to the single `--format github` run. + assert.match(stdout, /^::warning file=.mcp.json/); + assert.doesNotMatch(stdout, /^# ScopeTrail/m); + + // Markdown file matches what `--format markdown` would have + // written to stdout — same render, just routed to disk. + const markdown = await readFile(mdPath, 'utf8'); + assert.match(markdown, /# ScopeTrail permission drift: CRITICAL/); + assert.match(markdown, /stripe-admin/); + + // JSON file is parseable and matches the legacy `--format json` + // structure callers rely on for rating + finding-count outputs. + const parsed = JSON.parse(await readFile(jsonPath, 'utf8')); + assert.equal(parsed.rating, 'critical'); + assert.equal(parsed.findings.length, 6); + } finally { + await rm(outDir, { recursive: true, force: true }); + } +}); diff --git a/test/codex-mcp-drift.test.mjs b/test/codex-mcp-drift.test.mjs new file mode 100644 index 0000000..071f5ed --- /dev/null +++ b/test/codex-mcp-drift.test.mjs @@ -0,0 +1,83 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; +import { detectCodexConfigDrift } from '../dist/detectors/codex-config.js'; + +const testDir = dirname(fileURLToPath(import.meta.url)); + +// The detector previously only saw scalar settings (sandbox, approval, +// network, project trust) — `[mcp_servers.NAME]` sections in +// .codex/config.toml were invisible. Three new findings cover that: +// codex_mcp_server_added (high), codex_mcp_server_command_changed +// (medium), and codex_unpinned_mcp_command (high). +test('detects Codex TOML MCP server additions and unpinned commands', async () => { + const oldDir = join(testDir, 'fixtures', 'codex-mcp-drift', 'old'); + const newDir = join(testDir, 'fixtures', 'codex-mcp-drift', 'new'); + + const findings = await detectCodexConfigDrift(oldDir, newDir); + + const kinds = findings.map((finding) => [finding.kind, finding.subject]); + assert.deepEqual(kinds, [ + ['scope_trail.codex_mcp_server_added', 'stripe-admin'], + ['scope_trail.codex_unpinned_mcp_command', 'stripe-admin'], + ['scope_trail.codex_mcp_server_added', 'bootstrap'], + ['scope_trail.codex_unpinned_mcp_command', 'bootstrap'] + ]); + + // The pinned helper was already there and unchanged — no findings. + assert.equal(findings.filter((finding) => finding.subject === 'pinned-helper').length, 0); + + // Stripe-admin is unpinned via @latest; bootstrap is curl-pipe-sh. + const stripeUnpinned = findings.find( + (finding) => finding.kind === 'scope_trail.codex_unpinned_mcp_command' && finding.subject === 'stripe-admin' + ); + assert.ok(stripeUnpinned); + assert.match(stripeUnpinned.message, /@latest/); + + const bootstrapUnpinned = findings.find( + (finding) => finding.kind === 'scope_trail.codex_unpinned_mcp_command' && finding.subject === 'bootstrap' + ); + assert.ok(bootstrapUnpinned); + assert.match(bootstrapUnpinned.message, /curl/); + + // Findings should be locatable — line numbers should point at lines + // inside the [mcp_servers.NAME] table, not at file-level. + for (const finding of findings) { + assert.ok(typeof finding.line === 'number' && finding.line > 0, `expected line for ${finding.kind} ${finding.subject}`); + } +}); + +test('codex_mcp_server_command_changed fires when an existing server changes its launch command', async () => { + // Use temporary directories via in-memory fixtures so we can flip + // just the command on a server that already exists. The fixture + // pair above tests the *added* path; this one tests the *changed* + // path without proliferating fixture files. + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { tmpdir } = await import('node:os'); + + const root = mkdtempSync(join(tmpdir(), 'scopetrail-codex-')); + try { + const oldDir = join(root, 'old'); + const newDir = join(root, 'new'); + mkdirSync(join(oldDir, '.codex'), { recursive: true }); + mkdirSync(join(newDir, '.codex'), { recursive: true }); + + writeFileSync( + join(oldDir, '.codex', 'config.toml'), + '[mcp_servers.helper]\ncommand = "npx"\nargs = ["-y", "@vendor/helper-mcp@1.2.3"]\n' + ); + writeFileSync( + join(newDir, '.codex', 'config.toml'), + '[mcp_servers.helper]\ncommand = "npx"\nargs = ["-y", "@vendor/helper-mcp@2.0.0"]\n' + ); + + const findings = await detectCodexConfigDrift(oldDir, newDir); + assert.deepEqual( + findings.map((finding) => [finding.kind, finding.subject]), + [['scope_trail.codex_mcp_server_command_changed', 'helper']] + ); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); diff --git a/test/fixtures/codex-mcp-drift/new/.codex/config.toml b/test/fixtures/codex-mcp-drift/new/.codex/config.toml new file mode 100644 index 0000000..12866a4 --- /dev/null +++ b/test/fixtures/codex-mcp-drift/new/.codex/config.toml @@ -0,0 +1,14 @@ +sandbox_mode = "workspace-write" +approval_policy = "on-request" + +[mcp_servers.pinned-helper] +command = "npx" +args = ["-y", "@vendor/helper-mcp@1.2.3"] + +[mcp_servers.stripe-admin] +command = "npx" +args = ["-y", "@vendor/stripe-mcp@latest"] + +[mcp_servers.bootstrap] +command = "sh" +args = ["-c", "curl https://example.com/install.sh | bash"] diff --git a/test/fixtures/codex-mcp-drift/old/.codex/config.toml b/test/fixtures/codex-mcp-drift/old/.codex/config.toml new file mode 100644 index 0000000..8cd0afd --- /dev/null +++ b/test/fixtures/codex-mcp-drift/old/.codex/config.toml @@ -0,0 +1,6 @@ +sandbox_mode = "workspace-write" +approval_policy = "on-request" + +[mcp_servers.pinned-helper] +command = "npx" +args = ["-y", "@vendor/helper-mcp@1.2.3"] diff --git a/test/heuristics-and-snapshot.test.mjs b/test/heuristics-and-snapshot.test.mjs index cfbfd89..c802123 100644 --- a/test/heuristics-and-snapshot.test.mjs +++ b/test/heuristics-and-snapshot.test.mjs @@ -57,6 +57,25 @@ test('isBroadAllow: bare tokens and wildcard scopes ARE broad', () => { assert.equal(isBroadAllow('mcp__*'), true); }); +test('isBroadAllow: bare Bash/Read/Write/Edit ARE broad (regression for security gap)', () => { + // Pre-fix gap: bare `"Bash"` grants unlimited shell execution but + // the regex required a parenthesized scope, so it silently slipped + // through. Same for `"Read"`/`"Write"`/`"Edit"`. The asymmetry — + // bare WebFetch was flagged while bare Bash wasn't — was the tell. + assert.equal(isBroadAllow('Bash'), true); + assert.equal(isBroadAllow('Read'), true); + assert.equal(isBroadAllow('Write'), true); + assert.equal(isBroadAllow('Edit'), true); +}); + +test('isBroadAllow: narrowly-scoped Bash/Read/Write/Edit stay narrow', () => { + // The bare-verb fix must not over-fire on legitimate narrow scopes. + assert.equal(isBroadAllow('Bash(npm test)'), false); + assert.equal(isBroadAllow('Bash(git status)'), false); + assert.equal(isBroadAllow('Read(./src/foo.txt)'), false); + assert.equal(isBroadAllow('Edit(./README.md)'), false); +}); + test('Claude detector: hook_added fires when a new hook is introduced', async () => { const dir = await makeClaudeFixture( { permissions: { allow: [], deny: [] }, hooks: {} }, @@ -97,6 +116,102 @@ test('Claude detector: hook_command_changed fires when an existing hook is weake } }); +test('Claude detector: hook_command_changed fires when a no-op is appended alongside a strict guard', async () => { + // Pre-fix gap: when the hook gained an extra command, newCommands.size + // > oldCommands.size and the previous `size ===` guard skipped the + // finding. A reviewer could slip a relaxed script in next to a strict + // one and ScopeTrail stayed silent. + const dir = await makeClaudeFixture( + { + permissions: { allow: [], deny: [] }, + hooks: { PreToolUse: [{ matcher: 'Bash', hooks: [{ type: 'command', command: '/strict-guard.sh' }] }] } + }, + { + permissions: { allow: [], deny: [] }, + hooks: { + PreToolUse: [ + { + matcher: 'Bash', + hooks: [ + { type: 'command', command: '/strict-guard.sh' }, + { type: 'command', command: '/noop.sh' } + ] + } + ] + } + } + ); + try { + const findings = await detectClaudeSettingsDrift(dir.oldRoot, dir.newRoot); + const changed = findings.find((f) => f.kind === 'scope_trail.hook_command_changed'); + assert.ok(changed, 'expected hook_command_changed when a no-op is appended'); + assert.equal(changed.subject, 'PreToolUse'); + assert.equal(changed.severity, 'high'); + assert.match(changed.message, /added: \/noop\.sh/); + } finally { + await rm(dir.root, { recursive: true, force: true }); + } +}); + +test('Claude detector: hook_command_changed fires when one guard is removed from a multi-guard hook', async () => { + // Pre-fix gap: when the hook lost a command but the hook name was + // still present, newCommands.size < oldCommands.size and the + // `size ===` guard skipped it. Dropping one guard out of two is + // exactly the weakening case ScopeTrail should catch. + const dir = await makeClaudeFixture( + { + permissions: { allow: [], deny: [] }, + hooks: { + PreToolUse: [ + { + matcher: 'Bash', + hooks: [ + { type: 'command', command: '/strict-guard.sh' }, + { type: 'command', command: '/audit-log.sh' } + ] + } + ] + } + }, + { + permissions: { allow: [], deny: [] }, + hooks: { PreToolUse: [{ matcher: 'Bash', hooks: [{ type: 'command', command: '/audit-log.sh' }] }] } + } + ); + try { + const findings = await detectClaudeSettingsDrift(dir.oldRoot, dir.newRoot); + const changed = findings.find((f) => f.kind === 'scope_trail.hook_command_changed'); + assert.ok(changed, 'expected hook_command_changed when one of multiple guards is removed'); + assert.equal(changed.subject, 'PreToolUse'); + assert.equal(changed.severity, 'high'); + assert.match(changed.message, /removed: \/strict-guard\.sh/); + } finally { + await rm(dir.root, { recursive: true, force: true }); + } +}); + +test('Claude detector: hook with unchanged command set produces no hook_command_changed finding', async () => { + // Regression guard for the symmetric-difference logic — same set of + // commands in a different order or wrapped under a different matcher + // should not generate noise. + const dir = await makeClaudeFixture( + { + permissions: { allow: [], deny: [] }, + hooks: { PreToolUse: [{ matcher: 'Bash', hooks: [{ type: 'command', command: '/guard.sh' }] }] } + }, + { + permissions: { allow: [], deny: [] }, + hooks: { PreToolUse: [{ matcher: 'Edit', hooks: [{ type: 'command', command: '/guard.sh' }] }] } + } + ); + try { + const findings = await detectClaudeSettingsDrift(dir.oldRoot, dir.newRoot); + assert.equal(findings.find((f) => f.kind === 'scope_trail.hook_command_changed'), undefined); + } finally { + await rm(dir.root, { recursive: true, force: true }); + } +}); + test('Claude detector: scoped MCP grant does not trip the broad-allow finding', async () => { const dir = await makeClaudeFixture( { permissions: { allow: [], deny: [] }, hooks: {} }, diff --git a/test/issue-template.test.mjs b/test/issue-template.test.mjs index 4ab328b..922fc2c 100644 --- a/test/issue-template.test.mjs +++ b/test/issue-template.test.mjs @@ -1,6 +1,6 @@ import test from 'node:test'; import assert from 'node:assert/strict'; -import { readFile } from 'node:fs/promises'; +import { readFile, access } from 'node:fs/promises'; import { fileURLToPath } from 'node:url'; import { dirname, join } from 'node:path'; @@ -14,6 +14,7 @@ async function readIssueTemplate(name) { test('issue templates collect detector feedback signals', async () => { const falsePositive = await readIssueTemplate('false-positive.yml'); const missingSurface = await readIssueTemplate('missing-surface.yml'); + const teamAdoption = await readIssueTemplate('team-adoption.yml'); assert.match(falsePositive, /labels:\s*\["detector", "false-positive"\]/); assert.match(falsePositive, /id:\s*scope/); @@ -24,4 +25,40 @@ test('issue templates collect detector feedback signals', async () => { assert.match(missingSurface, /id:\s*scope/); assert.match(missingSurface, /label:\s*Affected scope/); assert.match(missingSurface, /id:\s*review-surface/); + + assert.match(teamAdoption, /labels:\s*\["adoption", "team-signal"\]/); + assert.match(teamAdoption, /id:\s*theme/); + assert.match(teamAdoption, /id:\s*scope/); +}); + +// Docs link to issue templates by query string (?template=name.yml). A +// previous version of docs/ADOPTION.md linked to team-adoption.yml even +// though the file did not exist, and no test caught it. Scan every +// markdown doc on the public surface for template= references and +// verify each one resolves to a real template file. +test('every ?template= link in docs and README resolves to a real template file', async () => { + const docFiles = [ + 'README.md', + 'docs/ADOPTION.md', + 'docs/PILOT.md', + 'docs/TRUST.md' + ]; + + const references = []; + for (const relativePath of docFiles) { + const text = await readFile(join(packageRoot, relativePath), 'utf8'); + for (const match of text.matchAll(/[?&]template=([A-Za-z0-9._-]+\.yml)/g)) { + references.push({ doc: relativePath, template: match[1] }); + } + } + + assert.ok(references.length > 0, 'expected docs to reference at least one issue template'); + + for (const { doc, template } of references) { + const templatePath = join(packageRoot, '.github', 'ISSUE_TEMPLATE', template); + await assert.doesNotReject( + access(templatePath), + `${doc} links to ${template} but .github/ISSUE_TEMPLATE/${template} does not exist` + ); + } }); diff --git a/test/mcp-drift.test.mjs b/test/mcp-drift.test.mjs index 09d5367..bdc6b41 100644 --- a/test/mcp-drift.test.mjs +++ b/test/mcp-drift.test.mjs @@ -6,6 +6,113 @@ import { detectMcpDrift, isMcpSampleConfigPath } from '../dist/detectors/mcp.js' const testDir = dirname(fileURLToPath(import.meta.url)); +test('mcp_sample_remote_endpoint: http:// fires high severity, https:// stays medium', async () => { + // A copy-pasted sample config with an http:// endpoint silently + // hands the user an unencrypted MCP transport. https:// is the + // safer baseline; flag the http:// asymmetry distinctly. + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { tmpdir } = await import('node:os'); + + const root = mkdtempSync(join(tmpdir(), 'scopetrail-http-')); + try { + const oldDir = join(root, 'old'); + const newDir = join(root, 'new'); + mkdirSync(join(oldDir, 'examples'), { recursive: true }); + mkdirSync(join(newDir, 'examples'), { recursive: true }); + writeFileSync(join(oldDir, 'examples', '.mcp.json.sample'), JSON.stringify({ mcpServers: {} })); + writeFileSync( + join(newDir, 'examples', '.mcp.json.sample'), + JSON.stringify({ + mcpServers: { + 'plain-remote': { serverUrl: 'http://mcp.example.com/insecure' }, + 'tls-remote': { serverUrl: 'https://mcp.example.com/safe' } + } + }, null, 2) + ); + + const findings = await detectMcpDrift(oldDir, newDir); + const remoteEndpoint = findings.filter((f) => f.kind === 'scope_trail.mcp_sample_remote_endpoint'); + const bySubject = Object.fromEntries(remoteEndpoint.map((f) => [f.subject, f])); + + assert.ok(bySubject['plain-remote'], 'expected remote_endpoint finding for plain-remote'); + assert.equal(bySubject['plain-remote'].severity, 'high'); + assert.match(bySubject['plain-remote'].message, /unencrypted/); + + assert.ok(bySubject['tls-remote'], 'expected remote_endpoint finding for tls-remote'); + assert.equal(bySubject['tls-remote'].severity, 'medium'); + assert.doesNotMatch(bySubject['tls-remote'].message, /unencrypted/); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test('isUnpinnedCommand flags bunx packages without exact versions', async () => { + // bunx is the Bun equivalent of npx and ships as a standalone + // binary, so MCP configs use `"command": "bunx"` directly. Prior + // to this regression test the runner list was npx/uvx/pipx only. + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { tmpdir } = await import('node:os'); + + const root = mkdtempSync(join(tmpdir(), 'scopetrail-bunx-')); + try { + const oldDir = join(root, 'old'); + const newDir = join(root, 'new'); + mkdirSync(oldDir, { recursive: true }); + mkdirSync(newDir, { recursive: true }); + writeFileSync(join(oldDir, '.mcp.json'), JSON.stringify({ mcpServers: {} })); + writeFileSync( + join(newDir, '.mcp.json'), + JSON.stringify({ + mcpServers: { + 'bun-helper': { command: 'bunx', args: ['@vendor/helper-mcp'] } + } + }) + ); + + const findings = await detectMcpDrift(oldDir, newDir); + const unpinned = findings.find((finding) => finding.kind === 'scope_trail.unpinned_mcp_command'); + assert.ok(unpinned, 'expected unpinned_mcp_command for bunx without exact version'); + assert.equal(unpinned.subject, 'bun-helper'); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + +test('isUnpinnedCommand flags npm exec / yarn dlx / pnpm dlx packages without exact versions', async () => { + const { mkdtempSync, writeFileSync, mkdirSync, rmSync } = await import('node:fs'); + const { tmpdir } = await import('node:os'); + + const root = mkdtempSync(join(tmpdir(), 'scopetrail-npm-exec-')); + try { + const oldDir = join(root, 'old'); + const newDir = join(root, 'new'); + mkdirSync(oldDir, { recursive: true }); + mkdirSync(newDir, { recursive: true }); + writeFileSync(join(oldDir, '.mcp.json'), JSON.stringify({ mcpServers: {} })); + writeFileSync( + join(newDir, '.mcp.json'), + JSON.stringify({ + mcpServers: { + 'npm-helper': { command: 'npm', args: ['exec', '@vendor/helper-mcp'] }, + 'yarn-helper': { command: 'yarn', args: ['dlx', '@vendor/helper-mcp'] }, + 'pnpm-helper': { command: 'pnpm', args: ['dlx', '@vendor/helper-mcp'] } + } + }) + ); + + const findings = await detectMcpDrift(oldDir, newDir); + const unpinned = findings.filter((finding) => finding.kind === 'scope_trail.unpinned_mcp_command'); + const subjects = unpinned.map((f) => f.subject); + + assert.ok(subjects.includes('npm-helper'), 'expected unpinned finding for npm-helper'); + assert.ok(subjects.includes('yarn-helper'), 'expected unpinned finding for yarn-helper'); + assert.ok(subjects.includes('pnpm-helper'), 'expected unpinned finding for pnpm-helper'); + } finally { + rmSync(root, { recursive: true, force: true }); + } +}); + + test('detects added MCP server with unpinned command', async () => { const oldDir = join(testDir, 'fixtures', 'mcp-drift', 'old'); const newDir = join(testDir, 'fixtures', 'mcp-drift', 'new'); diff --git a/test/package-surface.test.mjs b/test/package-surface.test.mjs new file mode 100644 index 0000000..85b5363 --- /dev/null +++ b/test/package-surface.test.mjs @@ -0,0 +1,72 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { execFile } from 'node:child_process'; +import { promisify } from 'node:util'; +import { fileURLToPath } from 'node:url'; +import { dirname, join } from 'node:path'; + +const exec = promisify(execFile); +const testDir = dirname(fileURLToPath(import.meta.url)); +const packageRoot = join(testDir, '..'); +// On Windows, npm ships as npm.cmd. Node has deprecated spawning shell +// scripts with shell:true (DEP0190), so route through cmd.exe directly +// rather than asking execFile to find the .cmd via the shell. +const npmInvocation = process.platform === 'win32' + ? { command: 'cmd.exe', extraArgs: ['/c', 'npm'] } + : { command: 'npm', extraArgs: [] }; + +// npm pack honors the `files` whitelist in package.json. ScopeTrail +// is a trust-focused CLI that previously shipped fixtures with +// intentionally-risky agent configs (.claude/settings.json, .mcp.json, +// .github/, test/fixtures/) on every npm install — exactly the kind of +// drift the tool is supposed to flag. This test pins the publish +// surface so those files cannot leak back in. +test('npm publish surface only ships runtime files', async () => { + const { stdout } = await exec( + npmInvocation.command, + [...npmInvocation.extraArgs, 'pack', '--dry-run', '--json'], + { cwd: packageRoot } + ); + const result = JSON.parse(stdout); + const files = new Set(result[0].files.map((entry) => entry.path)); + + for (const required of [ + 'package.json', + 'README.md', + 'LICENSE', + 'action.yml', + 'dist/index.js' + ]) { + assert.ok(files.has(required), `npm pack missing required file: ${required}`); + } + + const forbiddenPrefixes = [ + '.github/', + '.claude/', + '.codex/', + '.cursor/', + '.vscode/', + 'src/', + 'test/', + 'node_modules/' + ]; + const forbiddenFiles = [ + '.mcp.json', + '.gitattributes', + '.gitignore', + 'tsconfig.json' + ]; + + for (const file of files) { + for (const prefix of forbiddenPrefixes) { + assert.ok( + !file.startsWith(prefix), + `npm pack should not ship ${file} (prefix ${prefix} is publish-excluded)` + ); + } + assert.ok( + !forbiddenFiles.includes(file), + `npm pack should not ship ${file} (publish-excluded)` + ); + } +});