From 437883cba3183588c3dd1883dddbc5c69bc72d26 Mon Sep 17 00:00:00 2001 From: Conal <33135619+Conalh@users.noreply.github.com> Date: Fri, 29 May 2026 08:47:14 -0700 Subject: [PATCH] feat(detectors): broaden Claude grant/deny coverage and harden instruction scan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Claude permissions (parsers/claude.ts): - Treat a bare `Bash`, `Read`, `Write`, or `Edit` allow as broad. In Claude Code a bare tool name matches every use of the tool, so it grants unrestricted shell / filesystem access — previously only bare WebFetch / WebSearch / Task and wildcard/broad-root forms were flagged. Filesystem verbs keep narrow semantics for scoped subtree globs (Read(src/**) stays narrow); only bare or broad-root forms (Read(/), Write(C:\), Read(~/**)) are broad. Bash/Web/Task remain broad on any wildcard scope. - Expand isSensitiveDeny beyond .env/secret/credential/.pem to cover SSH and signing keys, tokens, .npmrc/.pypirc/.netrc, kubeconfig, cloud credential stores, and .p12/.pfx/.key. The posture-gap and overlap detectors depend on recognising strict denies, so the list errs toward inclusion. Instruction scan (parsers/instructions.ts): - Scan the legacy single-file `.cursorrules` alongside the modern surfaces. - Track Markdown fenced code blocks (``` / ~~~) and skip their contents, so a documentation example showing a "bad" instruction is no longer flagged as a live directive. Tests cover bare vs scoped grants, the expanded sensitive-deny list, .cursorrules scanning, and fenced-code skipping (real directive outside the fence still fires). Co-Authored-By: Claude Opus 4.8 --- dist/parsers/claude.js | 52 +++++++++++++++--- dist/parsers/instructions.js | 17 +++++- src/parsers/claude.ts | 54 ++++++++++++++++--- src/parsers/instructions.ts | 19 ++++++- test/cli-output.test.mjs | 40 ++++++++++++++ .../instructions-cursorrules/.cursorrules | 4 ++ test/fixtures/instructions-fenced/CLAUDE.md | 10 ++++ test/heuristics.test.mjs | 40 +++++++++++++- 8 files changed, 221 insertions(+), 15 deletions(-) create mode 100644 test/fixtures/instructions-cursorrules/.cursorrules create mode 100644 test/fixtures/instructions-fenced/CLAUDE.md diff --git a/dist/parsers/claude.js b/dist/parsers/claude.js index 80587bc..0707d18 100644 --- a/dist/parsers/claude.js +++ b/dist/parsers/claude.js @@ -38,15 +38,32 @@ export async function parseClaudePolicy(root) { // `mcp__github__get_issue` are narrow — the previous heuristic flagged // both as broad, which produced false positives on every PR that scoped // its grants properly. Bare tokens and explicit wildcards are still broad. +// +// In Claude Code a permission rule is `Tool` or `Tool(specifier)`; a BARE +// tool name matches every use of that tool. So a bare `Bash`, `Read`, +// `Write`, or `Edit` grants unrestricted shell / filesystem access and is +// just as broad as a bare `WebFetch`. +// +// The filesystem verbs need different scope handling than the others: a +// wildcard in a Read/Write/Edit scope is usually a normal subtree glob +// (`Read(src/**)`) and is NOT broad. Those are broad only when bare or +// rooted at a broad path (`Read(/)`, `Write(C:\)`, `Read(~/**)`, +// `Read(**)`). Bash/WebFetch/WebSearch/Task, by contrast, ARE broad when +// their scope contains a wildcard (`Bash(npm *)` runs any npm command). +const BARE_FS_VERBS = ['read', 'write', 'edit']; +const WILDCARD_BROAD_VERBS = ['webfetch', 'websearch', 'task', 'bash']; export function isBroadAllow(permission) { const normalized = permission.toLowerCase(); - if (/\bbash\([^)]*\*[^)]*\)/.test(normalized)) { + // Bare filesystem verb (no scope) — unrestricted file access. + if (BARE_FS_VERBS.includes(normalized.trim())) { return true; } + // Filesystem verb rooted at a broad path. if (/\b(read|write|edit)\((~|[a-z]:\\|\/|\*\*)/.test(normalized)) { return true; } - if (isBroadVerbGrant(normalized, ['webfetch', 'websearch', 'task'])) { + // Bare or wildcard-scoped shell / web / task grant. + if (isBroadVerbGrant(normalized, WILDCARD_BROAD_VERBS)) { return true; } if (isBroadMcpGrant(normalized)) { @@ -97,12 +114,35 @@ function isBroadMcpGrant(normalized) { } return !tool || tool.includes('*'); } +// Substrings that mark a deny rule as protecting something sensitive +// (secrets, keys, tokens, credential stores). The posture-gap and +// deny/allow-overlap detectors depend on recognising these, so the list +// errs toward inclusion: a deny rule is the protective side, and counting +// one more path as "sensitive" is far cheaper than missing a real secret. +const SENSITIVE_DENY_TERMS = [ + '.env', // also matches .env.local / .env.production + 'secret', + 'credential', // also matches .aws/credentials + 'token', + '.pem', + '.key', + '.p12', + '.pfx', + '.ssh', + 'id_rsa', + 'id_ed25519', + 'private key', + 'private_key', + '.npmrc', + '.pypirc', + '.netrc', + 'kubeconfig', + '.gcp', + '.azure' +]; export function isSensitiveDeny(permission) { const normalized = permission.toLowerCase(); - return normalized.includes('.env') - || normalized.includes('secret') - || normalized.includes('credential') - || normalized.includes('.pem'); + return SENSITIVE_DENY_TERMS.some((term) => normalized.includes(term)); } function readStringArray(value) { return Array.isArray(value) ? value.filter((entry) => typeof entry === 'string') : []; diff --git a/dist/parsers/instructions.js b/dist/parsers/instructions.js index 988a128..6abec5e 100644 --- a/dist/parsers/instructions.js +++ b/dist/parsers/instructions.js @@ -3,7 +3,10 @@ import { configPath } from '../discovery.js'; const ROOT_FILES = [ 'AGENTS.md', 'CLAUDE.md', - '.github/copilot-instructions.md' + '.github/copilot-instructions.md', + // Legacy single-file Cursor rules (predates .cursor/rules/*.md|.mdc). + // Plenty of repos still carry one, so it must be scanned too. + '.cursorrules' ]; const CURSOR_RULES_DIR = '.cursor/rules'; /** @@ -85,9 +88,21 @@ async function scanFile(root, relativePath, matches) { throw error; } const lines = text.split(/\r?\n/); + let inFence = false; for (let index = 0; index < lines.length; index += 1) { const line = lines[index]; const stripped = line.trim(); + // Track Markdown fenced code blocks (``` or ~~~). Risky-looking text + // inside a documentation example — e.g. a "do NOT write this" block + // showing `ignore safety checks` — must not be reported as a live + // instruction. The fence delimiter line itself is skipped too. + if (/^(```|~~~)/.test(stripped)) { + inFence = !inFence; + continue; + } + if (inFence) { + continue; + } if (!stripped || stripped.startsWith('