From 57f7026f5981160f1c70607a9496466d0b6a4b3d Mon Sep 17 00:00:00 2001 From: Conal <33135619+Conalh@users.noreply.github.com> Date: Thu, 21 May 2026 13:15:02 -0700 Subject: [PATCH] Python detector + dependency-addition detector MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closes the audit's biggest coverage gap and addresses the missing manifest-layer capability signal. Python detector (src/detectors/py-capability.ts): - requests / httpx / urllib network calls, gated on a literal URL on the same line (parallel to the JS gate, same false-positive posture). - subprocess, os.system, os.popen, pty.spawn — high severity. - eval, exec, compile, __import__, importlib.import_module — critical when not in test files. - pickle.load/loads, marshal.load/loads, yaml.load without SafeLoader — separate 'unsafe_deserialize_added' finding kind. - Test-file downgrade applies to .py via the existing isTestFile, extended to recognize 'tests/', 'test_*.py', and '*_test.py'. - isScannable and the comment-line check were extended to include .py/.pyw and '#' comments respectively. Dependency-addition detector (src/detectors/package-deps.ts): - Diffs package.json across dependencies / devDependencies / optionalDependencies / peerDependencies on both sides of the change. - Emits 'high_capability_dep_added' (high) for headless browsers (puppeteer, playwright, cypress, etc.), subprocess/PTY wrappers (execa, cross-spawn, node-pty, shelljs, zx), arbitrary HTTP clients (node-fetch, undici, got, axios), VM/eval libs (vm2, isolated-vm), and SSH/proxy primitives. - Emits 'telemetry_dep_added' (medium) for Sentry/Segment/Mixpanel/ Amplitude/PostHog SDKs. - Reuses the diff infrastructure from package-scripts.ts (PackageDiffMode, readPackageTextAt, listChangedPackageJsonFiles are now exported). Diff pipeline wires both detectors in. Python adds 11 unit tests and the dependency detector adds 5 fixture-based tests; total suite is 30/30 green. Why this matters: agents that ship Python were previously invisible to CapabilityEcho even though Python is the dominant agent runtime. And a PR that adds 'puppeteer' to dependencies is materially more interesting than a PR that just imports something already declared. --- README.md | 4 +- dist/detectors/package-deps.js | 104 +++++++++++++++++++++++++ dist/detectors/package-scripts.js | 4 +- dist/detectors/py-capability.js | 105 +++++++++++++++++++++++++ dist/diff.js | 16 +++- dist/paths.js | 17 +++- src/detectors/package-deps.ts | 125 ++++++++++++++++++++++++++++++ src/detectors/package-scripts.ts | 6 +- src/detectors/py-capability.ts | 123 +++++++++++++++++++++++++++++ src/diff.ts | 17 +++- src/paths.ts | 20 ++++- test/package-deps.test.mjs | 88 +++++++++++++++++++++ test/py-capability.test.mjs | 97 +++++++++++++++++++++++ 13 files changed, 706 insertions(+), 20 deletions(-) create mode 100644 dist/detectors/package-deps.js create mode 100644 dist/detectors/py-capability.js create mode 100644 src/detectors/package-deps.ts create mode 100644 src/detectors/py-capability.ts create mode 100644 test/package-deps.test.mjs create mode 100644 test/py-capability.test.mjs diff --git a/README.md b/README.md index 9d1573f..89af9ab 100644 --- a/README.md +++ b/README.md @@ -9,7 +9,7 @@ Code review for AI agent capability drift in pull requests. CapabilityEcho is a free OSS CLI and GitHub Action that reviews pull requests for risky **code and workflow changes** that expand what agents can reach — even when agent config files did not change. -- JavaScript and TypeScript network, subprocess, and dynamic-eval signals +- JavaScript, TypeScript, **and Python** network, subprocess, and dynamic-eval signals - `package.json` lifecycle and pipe-to-shell install scripts - GitHub Actions write permissions and external network steps - Terminal, Markdown, JSON, and line-level GitHub annotation output @@ -115,6 +115,8 @@ CapabilityEcho v0 detects: - External network fetch calls in added JavaScript or TypeScript lines. - Subprocess or shell spawn calls in added JavaScript or TypeScript lines. - Dynamic code execution such as `eval()` or `new Function()` in added lines. +- **Python equivalents:** `requests`/`httpx`/`urllib` network calls (URL-gated), `subprocess`/`os.system`/`os.popen`/`pty.spawn`, `eval`/`exec`/`compile`/`__import__`/`importlib.import_module`, and unsafe deserialization (`pickle.load`, `marshal.load`, `yaml.load` without `SafeLoader`). +- **Newly-added dependencies with high capability surface:** headless browsers (`puppeteer`, `playwright`, `cypress`), subprocess/PTY wrappers (`execa`, `cross-spawn`, `node-pty`, `shelljs`, `zx`), arbitrary HTTP clients (`node-fetch`, `undici`, `got`, `axios`), VM/eval libraries (`vm2`, `isolated-vm`), and SSH/proxy primitives. Telemetry SDKs are flagged at medium. - GitHub Actions write permissions in added workflow lines. - External network requests in added workflow steps. - Workflow steps that combine secrets or env values with external requests. diff --git a/dist/detectors/package-deps.js b/dist/detectors/package-deps.js new file mode 100644 index 0000000..8cf417b --- /dev/null +++ b/dist/detectors/package-deps.js @@ -0,0 +1,104 @@ +import { isRecord, lineOfJsonStringValue, lineOfJsonKey } from '../discovery.js'; +import { listPackageJsonFiles } from '../git-diff.js'; +import { listChangedPackageJsonFiles, readPackageTextAt } from './package-scripts.js'; +// Adding a dependency is, by itself, a capability expansion: the agent +// now has whatever the dep can do, transitively. Some additions are +// materially higher-leverage than others — a headless browser, a +// subprocess wrapper, or an arbitrary-fetch HTTP client lets the agent +// reach the network or the OS in ways the existing audit detects only +// when used. Catching them at the manifest layer flags the intent. +const HIGH_CAPABILITY_DEPS = new Set([ + // Headless browsers and full UI automation. + 'puppeteer', 'puppeteer-core', 'playwright', 'playwright-core', + 'cypress', 'webdriverio', 'selenium-webdriver', 'nightwatch', + // Subprocess and PTY wrappers. + 'execa', 'cross-spawn', 'node-pty', 'shelljs', 'zx', 'tinyspawn', + // Arbitrary HTTP clients (the agent can now fetch anywhere without + // touching `fetch` or `axios.get` in code we'd catch via js-capability). + 'node-fetch', 'undici', 'got', 'axios', 'request', 'superagent', + // Remote-code-execution-shaped libraries. + 'vm2', 'isolated-vm', + // Network primitives. + 'socks-proxy-agent', 'https-proxy-agent', 'ssh2', 'node-ssh', + // Telemetry / analytics SDKs that ship a phone-home in their happy + // path (medium risk — flagged separately below). +]); +const TELEMETRY_DEPS = new Set([ + '@segment/analytics-node', 'mixpanel', 'amplitude-js', 'posthog-js', + '@sentry/node', '@sentry/browser' +]); +const DEP_SECTIONS = ['dependencies', 'devDependencies', 'optionalDependencies', 'peerDependencies']; +export async function detectPackageDeps(mode) { + const files = mode.mode === 'directories' + ? await listPackageJsonFiles(mode.newRoot) + : await listChangedPackageJsonFiles(mode.repo, mode.base, mode.head); + const findings = []; + for (const file of files) { + const oldText = await readPackageTextAt(mode, file, 'old'); + const newText = await readPackageTextAt(mode, file, 'new'); + findings.push(...compareDeps(file, oldText, newText)); + } + return findings; +} +function compareDeps(file, oldText, newText) { + const oldDeps = readAllDeps(oldText); + const newDeps = readAllDeps(newText); + const findings = []; + for (const [name, version] of newDeps.entries()) { + if (oldDeps.has(name)) { + continue; + } + if (HIGH_CAPABILITY_DEPS.has(name)) { + findings.push({ + kind: 'high_capability_dep_added', + severity: 'high', + file, + line: lineOfJsonStringValue(newText, version) ?? lineOfJsonKey(newText, name), + subject: name, + message: `Added dependency "${name}" can reach the network, spawn subprocesses, or evaluate code.`, + recommendation: 'Confirm this dependency is required for the stated change and that its usage is scoped.' + }); + continue; + } + if (TELEMETRY_DEPS.has(name)) { + findings.push({ + kind: 'telemetry_dep_added', + severity: 'medium', + file, + line: lineOfJsonStringValue(newText, version) ?? lineOfJsonKey(newText, name), + subject: name, + message: `Added telemetry/analytics dependency "${name}" — ships an outbound network surface by default.`, + recommendation: 'Verify the telemetry destination, payload, and opt-out posture.' + }); + } + } + return findings; +} +function readAllDeps(text) { + const result = new Map(); + if (!text.trim()) { + return result; + } + let parsed; + try { + parsed = JSON.parse(text); + } + catch { + return result; + } + if (!isRecord(parsed)) { + return result; + } + for (const section of DEP_SECTIONS) { + const block = parsed[section]; + if (!isRecord(block)) { + continue; + } + for (const [name, version] of Object.entries(block)) { + if (typeof version === 'string') { + result.set(name, version); + } + } + } + return result; +} diff --git a/dist/detectors/package-scripts.js b/dist/detectors/package-scripts.js index e40a19d..fc51bae 100644 --- a/dist/detectors/package-scripts.js +++ b/dist/detectors/package-scripts.js @@ -15,7 +15,7 @@ export async function detectPackageScripts(mode) { } return findings; } -async function listChangedPackageJsonFiles(repo, base, head) { +export async function listChangedPackageJsonFiles(repo, base, head) { const all = await listPackageJsonFiles(repo); const changed = []; for (const file of all) { @@ -49,7 +49,7 @@ async function readScriptsAt(mode, file, side) { return {}; } } -async function readPackageTextAt(mode, file, side) { +export async function readPackageTextAt(mode, file, side) { if (mode.mode === 'directories') { const root = side === 'old' ? mode.oldRoot : mode.newRoot; try { diff --git a/dist/detectors/py-capability.js b/dist/detectors/py-capability.js new file mode 100644 index 0000000..d943417 --- /dev/null +++ b/dist/detectors/py-capability.js @@ -0,0 +1,105 @@ +import { isCommentLine, isPyFile, isTestFile } from '../paths.js'; +// Python capability detection. Agents that ship code edits in Python - +// which is most of them once you leave the frontend - can quietly expand +// reach by adding a `requests.post`, a `subprocess.Popen`, or an `eval` +// without ever touching .mcp.json or .claude/settings.json. These are +// the same shapes detect-js-capability flags for the JS world. +export function detectPyCapability(lines) { + const findings = []; + for (const added of lines) { + if (!isPyFile(added.file) || isCommentLine(added.content)) { + continue; + } + const testFile = isTestFile(added.file); + findings.push(...detectPyNetwork(added, testFile)); + findings.push(...detectPySubprocess(added, testFile)); + findings.push(...detectPyDynamicExec(added, testFile)); + findings.push(...detectPyUnsafeDeserialize(added, testFile)); + } + return findings; +} +function detectPyNetwork(added, testFile) { + // Common network entry points across requests, httpx, aiohttp, and the + // urllib family (including the Python 2 legacy `urllib2` that still + // appears in older agent-generated code). + const networkVerbPattern = /\b(?:requests|httpx)\.(?:get|post|put|delete|patch|head|options|request)\s*\(|\burllib(?:2)?\.(?:request\.)?urlopen\s*\(|\burlopen\s*\(|\burllib\.request\.urlretrieve\s*\(|\baiohttp\.ClientSession\s*\(/i; + if (!networkVerbPattern.test(added.content)) { + return []; + } + // Gate on a literal external URL on the same added line — keeps the + // detector aligned with the JS side and cuts false positives from code + // that takes the URL from a constant defined elsewhere. + if (!/(?:https?:\/\/|['"]https?:\/\/)/i.test(added.content)) { + return []; + } + return [ + { + kind: 'external_fetch_added', + severity: testFile ? 'low' : 'medium', + file: added.file, + line: added.line, + subject: 'External network call (Python)', + message: 'Added Python performs an external HTTP request that expands network reach.', + recommendation: 'Review the endpoint, request payload, and whether the call belongs in this change.' + } + ]; +} +function detectPySubprocess(added, testFile) { + // Subprocess and shell-execution surfaces. `commands.getoutput` is the + // Python 2 legacy still seen in older agent-generated code. + const subprocessPattern = /\bsubprocess\.(?:run|call|Popen|check_call|check_output|getoutput|getstatusoutput)\s*\(|\bos\.(?:system|popen|execv\w*|spawnv?\w*)\s*\(|\bcommands\.getoutput\s*\(|\bpty\.spawn\s*\(/i; + if (!subprocessPattern.test(added.content)) { + return []; + } + return [ + { + kind: 'subprocess_spawn_added', + severity: testFile ? 'low' : 'high', + file: added.file, + line: added.line, + subject: 'Subprocess spawn (Python)', + message: 'Added Python can spawn shell commands or subprocesses.', + recommendation: 'Confirm the command source is trusted and scoped to the task.' + } + ]; +} +function detectPyDynamicExec(added, testFile) { + // Dynamic code execution. We also catch `__import__` and + // `importlib.import_module` with a string literal argument — these are + // the standard primitives for "load whatever the LLM names next." + const dynamicPattern = /\beval\s*\(|\bexec\s*\(|\bcompile\s*\(|\b__import__\s*\(|\bimportlib\.import_module\s*\(/i; + if (!dynamicPattern.test(added.content)) { + return []; + } + return [ + { + kind: 'dynamic_eval_added', + severity: testFile ? 'medium' : 'critical', + file: added.file, + line: added.line, + subject: 'Dynamic code execution (Python)', + message: 'Added Python can evaluate dynamic code or import modules by name at runtime.', + recommendation: 'Avoid eval-style execution unless strictly required; never feed user input to these.' + } + ]; +} +function detectPyUnsafeDeserialize(added, testFile) { + // pickle.load and marshal.load on attacker-controlled bytes are a + // remote-code-execution primitive. yaml.load (without SafeLoader) is + // the same shape and is the most common real-world footgun. + const unsafeDeserializePattern = /\bpickle\.(?:load|loads)\s*\(|\bmarshal\.(?:load|loads)\s*\(|\byaml\.load\s*\((?![^)]*Loader\s*=\s*(?:yaml\.)?SafeLoader)/i; + if (!unsafeDeserializePattern.test(added.content)) { + return []; + } + return [ + { + kind: 'unsafe_deserialize_added', + severity: testFile ? 'medium' : 'critical', + file: added.file, + line: added.line, + subject: 'Unsafe deserialization (Python)', + message: 'Added Python deserializes untrusted-shaped input (pickle / marshal / yaml.load).', + recommendation: 'Use yaml.safe_load and avoid pickle/marshal on data crossing trust boundaries.' + } + ]; +} diff --git a/dist/diff.js b/dist/diff.js index 9e80a65..5e689ff 100644 --- a/dist/diff.js +++ b/dist/diff.js @@ -1,5 +1,7 @@ import { detectJsCapability } from './detectors/js-capability.js'; +import { detectPackageDeps } from './detectors/package-deps.js'; import { detectPackageScripts } from './detectors/package-scripts.js'; +import { detectPyCapability } from './detectors/py-capability.js'; import { detectWorkflowPermissions } from './detectors/workflow-permissions.js'; import { collectDirectoryDiff, collectGitDiff } from './git-diff.js'; import { createReport } from './report.js'; @@ -7,13 +9,19 @@ export async function runCapabilityDiff(options) { const context = options.mode === 'directories' ? await collectDirectoryDiff(options.oldRoot, options.newRoot) : await collectGitDiff(options.repo, options.base, options.head); - const packageFindings = options.mode === 'directories' - ? await detectPackageScripts({ mode: 'directories', oldRoot: options.oldRoot, newRoot: options.newRoot }) - : await detectPackageScripts({ mode: 'git', repo: options.repo, base: options.base, head: options.head }); + const packageMode = options.mode === 'directories' + ? ({ mode: 'directories', oldRoot: options.oldRoot, newRoot: options.newRoot }) + : ({ mode: 'git', repo: options.repo, base: options.base, head: options.head }); + const [scriptFindings, depFindings] = await Promise.all([ + detectPackageScripts(packageMode), + detectPackageDeps(packageMode) + ]); const findings = [ ...detectWorkflowPermissions(context.addedLines), ...detectJsCapability(context.addedLines), - ...packageFindings + ...detectPyCapability(context.addedLines), + ...scriptFindings, + ...depFindings ]; return createReport(findings, context); } diff --git a/dist/paths.js b/dist/paths.js index 33f0282..0d1b9fa 100644 --- a/dist/paths.js +++ b/dist/paths.js @@ -28,18 +28,25 @@ export function isScannable(relativePath) { if (normalized.startsWith('.github/workflows/') && /\.(ya?ml)$/i.test(normalized)) { return true; } - return /\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized); + return /\.(js|jsx|ts|tsx|mjs|cjs|py|pyw)$/i.test(normalized); } export function isTestFile(relativePath) { const normalized = normalizeRelativePath(relativePath); - if (normalized.includes('__tests__/')) { + if (normalized.includes('__tests__/') || normalized.includes('/tests/')) { + return true; + } + if (/(^|\/)test_[^/]+\.py$/i.test(normalized) || /_test\.py$/i.test(normalized)) { return true; } return /\.(test|spec)\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized); } export function isCommentLine(content) { const trimmed = content.trim(); - return trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*') || trimmed.startsWith('*/'); + return (trimmed.startsWith('//') || + trimmed.startsWith('/*') || + trimmed.startsWith('*') || + trimmed.startsWith('*/') || + trimmed.startsWith('#')); } export function isWorkflowFile(relativePath) { const normalized = normalizeRelativePath(relativePath); @@ -53,3 +60,7 @@ export function isJsFile(relativePath) { const normalized = normalizeRelativePath(relativePath); return /\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized); } +export function isPyFile(relativePath) { + const normalized = normalizeRelativePath(relativePath); + return /\.(py|pyw)$/i.test(normalized); +} diff --git a/src/detectors/package-deps.ts b/src/detectors/package-deps.ts new file mode 100644 index 0000000..add4e45 --- /dev/null +++ b/src/detectors/package-deps.ts @@ -0,0 +1,125 @@ +import { isRecord, lineOfJsonStringValue, lineOfJsonKey } from '../discovery.js'; +import { listPackageJsonFiles } from '../git-diff.js'; +import { + listChangedPackageJsonFiles, + readPackageTextAt, + type PackageDiffMode +} from './package-scripts.js'; +import type { Finding } from '../types.js'; + +// Adding a dependency is, by itself, a capability expansion: the agent +// now has whatever the dep can do, transitively. Some additions are +// materially higher-leverage than others — a headless browser, a +// subprocess wrapper, or an arbitrary-fetch HTTP client lets the agent +// reach the network or the OS in ways the existing audit detects only +// when used. Catching them at the manifest layer flags the intent. +const HIGH_CAPABILITY_DEPS = new Set([ + // Headless browsers and full UI automation. + 'puppeteer', 'puppeteer-core', 'playwright', 'playwright-core', + 'cypress', 'webdriverio', 'selenium-webdriver', 'nightwatch', + // Subprocess and PTY wrappers. + 'execa', 'cross-spawn', 'node-pty', 'shelljs', 'zx', 'tinyspawn', + // Arbitrary HTTP clients (the agent can now fetch anywhere without + // touching `fetch` or `axios.get` in code we'd catch via js-capability). + 'node-fetch', 'undici', 'got', 'axios', 'request', 'superagent', + // Remote-code-execution-shaped libraries. + 'vm2', 'isolated-vm', + // Network primitives. + 'socks-proxy-agent', 'https-proxy-agent', 'ssh2', 'node-ssh', + // Telemetry / analytics SDKs that ship a phone-home in their happy + // path (medium risk — flagged separately below). +]); + +const TELEMETRY_DEPS = new Set([ + '@segment/analytics-node', 'mixpanel', 'amplitude-js', 'posthog-js', + '@sentry/node', '@sentry/browser' +]); + +const DEP_SECTIONS = ['dependencies', 'devDependencies', 'optionalDependencies', 'peerDependencies'] as const; + +export async function detectPackageDeps(mode: PackageDiffMode): Promise { + const files = + mode.mode === 'directories' + ? await listPackageJsonFiles(mode.newRoot) + : await listChangedPackageJsonFiles(mode.repo, mode.base, mode.head); + + const findings: Finding[] = []; + for (const file of files) { + const oldText = await readPackageTextAt(mode, file, 'old'); + const newText = await readPackageTextAt(mode, file, 'new'); + findings.push(...compareDeps(file, oldText, newText)); + } + + return findings; +} + +function compareDeps(file: string, oldText: string, newText: string): Finding[] { + const oldDeps = readAllDeps(oldText); + const newDeps = readAllDeps(newText); + const findings: Finding[] = []; + + for (const [name, version] of newDeps.entries()) { + if (oldDeps.has(name)) { + continue; + } + + if (HIGH_CAPABILITY_DEPS.has(name)) { + findings.push({ + kind: 'high_capability_dep_added', + severity: 'high', + file, + line: lineOfJsonStringValue(newText, version) ?? lineOfJsonKey(newText, name), + subject: name, + message: `Added dependency "${name}" can reach the network, spawn subprocesses, or evaluate code.`, + recommendation: 'Confirm this dependency is required for the stated change and that its usage is scoped.' + }); + continue; + } + + if (TELEMETRY_DEPS.has(name)) { + findings.push({ + kind: 'telemetry_dep_added', + severity: 'medium', + file, + line: lineOfJsonStringValue(newText, version) ?? lineOfJsonKey(newText, name), + subject: name, + message: `Added telemetry/analytics dependency "${name}" — ships an outbound network surface by default.`, + recommendation: 'Verify the telemetry destination, payload, and opt-out posture.' + }); + } + } + + return findings; +} + +function readAllDeps(text: string): Map { + const result = new Map(); + if (!text.trim()) { + return result; + } + + let parsed: unknown; + try { + parsed = JSON.parse(text); + } catch { + return result; + } + + if (!isRecord(parsed)) { + return result; + } + + for (const section of DEP_SECTIONS) { + const block = parsed[section]; + if (!isRecord(block)) { + continue; + } + for (const [name, version] of Object.entries(block)) { + if (typeof version === 'string') { + result.set(name, version); + } + } + } + + return result; +} diff --git a/src/detectors/package-scripts.ts b/src/detectors/package-scripts.ts index f655d14..082832a 100644 --- a/src/detectors/package-scripts.ts +++ b/src/detectors/package-scripts.ts @@ -6,7 +6,7 @@ import type { Finding } from '../types.js'; const LIFECYCLE_KEYS = ['postinstall', 'preinstall', 'prepare', 'install'] as const; -type PackageDiffMode = +export type PackageDiffMode = | { mode: 'directories'; oldRoot: string; newRoot: string } | { mode: 'git'; repo: string; base: string; head: string }; @@ -27,7 +27,7 @@ export async function detectPackageScripts(mode: PackageDiffMode): Promise { +export async function listChangedPackageJsonFiles(repo: string, base: string, head: string): Promise { const all = await listPackageJsonFiles(repo); const changed: string[] = []; @@ -70,7 +70,7 @@ async function readScriptsAt( } } -async function readPackageTextAt(mode: PackageDiffMode, file: string, side: 'old' | 'new'): Promise { +export async function readPackageTextAt(mode: PackageDiffMode, file: string, side: 'old' | 'new'): Promise { if (mode.mode === 'directories') { const root = side === 'old' ? mode.oldRoot : mode.newRoot; try { diff --git a/src/detectors/py-capability.ts b/src/detectors/py-capability.ts new file mode 100644 index 0000000..7d4fefb --- /dev/null +++ b/src/detectors/py-capability.ts @@ -0,0 +1,123 @@ +import type { AddedLine, Finding } from '../types.js'; +import { isCommentLine, isPyFile, isTestFile } from '../paths.js'; + +// Python capability detection. Agents that ship code edits in Python - +// which is most of them once you leave the frontend - can quietly expand +// reach by adding a `requests.post`, a `subprocess.Popen`, or an `eval` +// without ever touching .mcp.json or .claude/settings.json. These are +// the same shapes detect-js-capability flags for the JS world. +export function detectPyCapability(lines: AddedLine[]): Finding[] { + const findings: Finding[] = []; + + for (const added of lines) { + if (!isPyFile(added.file) || isCommentLine(added.content)) { + continue; + } + + const testFile = isTestFile(added.file); + findings.push(...detectPyNetwork(added, testFile)); + findings.push(...detectPySubprocess(added, testFile)); + findings.push(...detectPyDynamicExec(added, testFile)); + findings.push(...detectPyUnsafeDeserialize(added, testFile)); + } + + return findings; +} + +function detectPyNetwork(added: AddedLine, testFile: boolean): Finding[] { + // Common network entry points across requests, httpx, aiohttp, and the + // urllib family (including the Python 2 legacy `urllib2` that still + // appears in older agent-generated code). + const networkVerbPattern = + /\b(?:requests|httpx)\.(?:get|post|put|delete|patch|head|options|request)\s*\(|\burllib(?:2)?\.(?:request\.)?urlopen\s*\(|\burlopen\s*\(|\burllib\.request\.urlretrieve\s*\(|\baiohttp\.ClientSession\s*\(/i; + if (!networkVerbPattern.test(added.content)) { + return []; + } + + // Gate on a literal external URL on the same added line — keeps the + // detector aligned with the JS side and cuts false positives from code + // that takes the URL from a constant defined elsewhere. + if (!/(?:https?:\/\/|['"]https?:\/\/)/i.test(added.content)) { + return []; + } + + return [ + { + kind: 'external_fetch_added', + severity: testFile ? 'low' : 'medium', + file: added.file, + line: added.line, + subject: 'External network call (Python)', + message: 'Added Python performs an external HTTP request that expands network reach.', + recommendation: 'Review the endpoint, request payload, and whether the call belongs in this change.' + } + ]; +} + +function detectPySubprocess(added: AddedLine, testFile: boolean): Finding[] { + // Subprocess and shell-execution surfaces. `commands.getoutput` is the + // Python 2 legacy still seen in older agent-generated code. + const subprocessPattern = + /\bsubprocess\.(?:run|call|Popen|check_call|check_output|getoutput|getstatusoutput)\s*\(|\bos\.(?:system|popen|execv\w*|spawnv?\w*)\s*\(|\bcommands\.getoutput\s*\(|\bpty\.spawn\s*\(/i; + if (!subprocessPattern.test(added.content)) { + return []; + } + + return [ + { + kind: 'subprocess_spawn_added', + severity: testFile ? 'low' : 'high', + file: added.file, + line: added.line, + subject: 'Subprocess spawn (Python)', + message: 'Added Python can spawn shell commands or subprocesses.', + recommendation: 'Confirm the command source is trusted and scoped to the task.' + } + ]; +} + +function detectPyDynamicExec(added: AddedLine, testFile: boolean): Finding[] { + // Dynamic code execution. We also catch `__import__` and + // `importlib.import_module` with a string literal argument — these are + // the standard primitives for "load whatever the LLM names next." + const dynamicPattern = + /\beval\s*\(|\bexec\s*\(|\bcompile\s*\(|\b__import__\s*\(|\bimportlib\.import_module\s*\(/i; + if (!dynamicPattern.test(added.content)) { + return []; + } + + return [ + { + kind: 'dynamic_eval_added', + severity: testFile ? 'medium' : 'critical', + file: added.file, + line: added.line, + subject: 'Dynamic code execution (Python)', + message: 'Added Python can evaluate dynamic code or import modules by name at runtime.', + recommendation: 'Avoid eval-style execution unless strictly required; never feed user input to these.' + } + ]; +} + +function detectPyUnsafeDeserialize(added: AddedLine, testFile: boolean): Finding[] { + // pickle.load and marshal.load on attacker-controlled bytes are a + // remote-code-execution primitive. yaml.load (without SafeLoader) is + // the same shape and is the most common real-world footgun. + const unsafeDeserializePattern = + /\bpickle\.(?:load|loads)\s*\(|\bmarshal\.(?:load|loads)\s*\(|\byaml\.load\s*\((?![^)]*Loader\s*=\s*(?:yaml\.)?SafeLoader)/i; + if (!unsafeDeserializePattern.test(added.content)) { + return []; + } + + return [ + { + kind: 'unsafe_deserialize_added', + severity: testFile ? 'medium' : 'critical', + file: added.file, + line: added.line, + subject: 'Unsafe deserialization (Python)', + message: 'Added Python deserializes untrusted-shaped input (pickle / marshal / yaml.load).', + recommendation: 'Use yaml.safe_load and avoid pickle/marshal on data crossing trust boundaries.' + } + ]; +} diff --git a/src/diff.ts b/src/diff.ts index 84b0104..14ae25b 100644 --- a/src/diff.ts +++ b/src/diff.ts @@ -1,5 +1,7 @@ import { detectJsCapability } from './detectors/js-capability.js'; +import { detectPackageDeps } from './detectors/package-deps.js'; import { detectPackageScripts } from './detectors/package-scripts.js'; +import { detectPyCapability } from './detectors/py-capability.js'; import { detectWorkflowPermissions } from './detectors/workflow-permissions.js'; import { collectDirectoryDiff, collectGitDiff } from './git-diff.js'; import { createReport, type EchoReport } from './report.js'; @@ -14,15 +16,22 @@ export async function runCapabilityDiff(options: DiffMode): Promise ? await collectDirectoryDiff(options.oldRoot, options.newRoot) : await collectGitDiff(options.repo, options.base, options.head); - const packageFindings = + const packageMode = options.mode === 'directories' - ? await detectPackageScripts({ mode: 'directories', oldRoot: options.oldRoot, newRoot: options.newRoot }) - : await detectPackageScripts({ mode: 'git', repo: options.repo, base: options.base, head: options.head }); + ? ({ mode: 'directories' as const, oldRoot: options.oldRoot, newRoot: options.newRoot }) + : ({ mode: 'git' as const, repo: options.repo, base: options.base, head: options.head }); + + const [scriptFindings, depFindings] = await Promise.all([ + detectPackageScripts(packageMode), + detectPackageDeps(packageMode) + ]); const findings = [ ...detectWorkflowPermissions(context.addedLines), ...detectJsCapability(context.addedLines), - ...packageFindings + ...detectPyCapability(context.addedLines), + ...scriptFindings, + ...depFindings ]; return createReport(findings, context); diff --git a/src/paths.ts b/src/paths.ts index 0aa47b0..5085eb4 100644 --- a/src/paths.ts +++ b/src/paths.ts @@ -35,12 +35,15 @@ export function isScannable(relativePath: string): boolean { return true; } - return /\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized); + return /\.(js|jsx|ts|tsx|mjs|cjs|py|pyw)$/i.test(normalized); } export function isTestFile(relativePath: string): boolean { const normalized = normalizeRelativePath(relativePath); - if (normalized.includes('__tests__/')) { + if (normalized.includes('__tests__/') || normalized.includes('/tests/')) { + return true; + } + if (/(^|\/)test_[^/]+\.py$/i.test(normalized) || /_test\.py$/i.test(normalized)) { return true; } @@ -49,7 +52,13 @@ export function isTestFile(relativePath: string): boolean { export function isCommentLine(content: string): boolean { const trimmed = content.trim(); - return trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*') || trimmed.startsWith('*/'); + return ( + trimmed.startsWith('//') || + trimmed.startsWith('/*') || + trimmed.startsWith('*') || + trimmed.startsWith('*/') || + trimmed.startsWith('#') + ); } export function isWorkflowFile(relativePath: string): boolean { @@ -66,3 +75,8 @@ export function isJsFile(relativePath: string): boolean { const normalized = normalizeRelativePath(relativePath); return /\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized); } + +export function isPyFile(relativePath: string): boolean { + const normalized = normalizeRelativePath(relativePath); + return /\.(py|pyw)$/i.test(normalized); +} diff --git a/test/package-deps.test.mjs b/test/package-deps.test.mjs new file mode 100644 index 0000000..71d14e1 --- /dev/null +++ b/test/package-deps.test.mjs @@ -0,0 +1,88 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { mkdtemp, mkdir, writeFile, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { detectPackageDeps } from '../dist/detectors/package-deps.js'; + +async function makeFixture(oldPackage, newPackage) { + const root = await mkdtemp(join(tmpdir(), 'ce-deps-')); + const oldRoot = join(root, 'old'); + const newRoot = join(root, 'new'); + await mkdir(oldRoot, { recursive: true }); + await mkdir(newRoot, { recursive: true }); + await writeFile(join(oldRoot, 'package.json'), JSON.stringify(oldPackage, null, 2)); + await writeFile(join(newRoot, 'package.json'), JSON.stringify(newPackage, null, 2)); + return { root, oldRoot, newRoot }; +} + +test('flags newly added high-capability dep (puppeteer)', async () => { + const fixture = await makeFixture( + { name: 'app', dependencies: { lodash: '^4.0.0' } }, + { name: 'app', dependencies: { lodash: '^4.0.0', puppeteer: '^22.0.0' } } + ); + try { + const findings = await detectPackageDeps({ mode: 'directories', oldRoot: fixture.oldRoot, newRoot: fixture.newRoot }); + const f = findings.find((finding) => finding.kind === 'high_capability_dep_added'); + assert.ok(f); + assert.equal(f.subject, 'puppeteer'); + assert.equal(f.severity, 'high'); + } finally { + await rm(fixture.root, { recursive: true, force: true }); + } +}); + +test('does not flag pre-existing deps', async () => { + const fixture = await makeFixture( + { name: 'app', dependencies: { puppeteer: '^22.0.0' } }, + { name: 'app', dependencies: { puppeteer: '^22.0.0', 'lodash': '^4.0.0' } } + ); + try { + const findings = await detectPackageDeps({ mode: 'directories', oldRoot: fixture.oldRoot, newRoot: fixture.newRoot }); + assert.equal(findings.find((f) => f.kind === 'high_capability_dep_added'), undefined); + } finally { + await rm(fixture.root, { recursive: true, force: true }); + } +}); + +test('flags telemetry dep at medium severity', async () => { + const fixture = await makeFixture( + { name: 'app', dependencies: {} }, + { name: 'app', dependencies: { '@sentry/node': '^8.0.0' } } + ); + try { + const findings = await detectPackageDeps({ mode: 'directories', oldRoot: fixture.oldRoot, newRoot: fixture.newRoot }); + const f = findings.find((finding) => finding.kind === 'telemetry_dep_added'); + assert.ok(f); + assert.equal(f.severity, 'medium'); + } finally { + await rm(fixture.root, { recursive: true, force: true }); + } +}); + +test('finds deps added to devDependencies and optionalDependencies', async () => { + const fixture = await makeFixture( + { name: 'app' }, + { name: 'app', devDependencies: { 'node-fetch': '^3.0.0' }, optionalDependencies: { execa: '^9.0.0' } } + ); + try { + const findings = await detectPackageDeps({ mode: 'directories', oldRoot: fixture.oldRoot, newRoot: fixture.newRoot }); + assert.ok(findings.find((f) => f.subject === 'node-fetch')); + assert.ok(findings.find((f) => f.subject === 'execa')); + } finally { + await rm(fixture.root, { recursive: true, force: true }); + } +}); + +test('ignores benign dep additions (no false positives)', async () => { + const fixture = await makeFixture( + { name: 'app', dependencies: { lodash: '^4.0.0' } }, + { name: 'app', dependencies: { lodash: '^4.0.0', 'date-fns': '^3.0.0', 'zod': '^3.22.0' } } + ); + try { + const findings = await detectPackageDeps({ mode: 'directories', oldRoot: fixture.oldRoot, newRoot: fixture.newRoot }); + assert.equal(findings.length, 0); + } finally { + await rm(fixture.root, { recursive: true, force: true }); + } +}); diff --git a/test/py-capability.test.mjs b/test/py-capability.test.mjs new file mode 100644 index 0000000..46807cd --- /dev/null +++ b/test/py-capability.test.mjs @@ -0,0 +1,97 @@ +import test from 'node:test'; +import assert from 'node:assert/strict'; +import { detectPyCapability } from '../dist/detectors/py-capability.js'; + +function line(file, content, lineNumber = 1) { + return { file, line: lineNumber, content }; +} + +test('py: requests.get with literal URL flags external fetch', () => { + const findings = detectPyCapability([ + line('agent.py', 'resp = requests.get("https://api.example.com/v1/things")') + ]); + assert.ok(findings.find((f) => f.kind === 'external_fetch_added')); +}); + +test('py: requests.get without literal URL does not over-fire', () => { + const findings = detectPyCapability([ + line('agent.py', 'resp = requests.get(url, headers=h)') + ]); + assert.equal(findings.find((f) => f.kind === 'external_fetch_added'), undefined); +}); + +test('py: urllib.request.urlopen counts as external fetch', () => { + const findings = detectPyCapability([ + line('agent.py', 'from urllib.request import urlopen; r = urlopen("https://x.example/y")') + ]); + assert.ok(findings.find((f) => f.kind === 'external_fetch_added')); +}); + +test('py: subprocess.Popen flags as high', () => { + const findings = detectPyCapability([ + line('agent.py', 'subprocess.Popen(["ls", "-la"], shell=False)') + ]); + const f = findings.find((finding) => finding.kind === 'subprocess_spawn_added'); + assert.ok(f); + assert.equal(f.severity, 'high'); +}); + +test('py: os.system flags subprocess spawn', () => { + const findings = detectPyCapability([ + line('agent.py', 'os.system("rm -rf /tmp/cache")') + ]); + assert.ok(findings.find((f) => f.kind === 'subprocess_spawn_added')); +}); + +test('py: eval() flags as critical dynamic exec', () => { + const findings = detectPyCapability([ + line('agent.py', 'result = eval(user_input)') + ]); + const f = findings.find((finding) => finding.kind === 'dynamic_eval_added'); + assert.ok(f); + assert.equal(f.severity, 'critical'); +}); + +test('py: importlib.import_module flags as dynamic exec', () => { + const findings = detectPyCapability([ + line('agent.py', 'mod = importlib.import_module(name)') + ]); + assert.ok(findings.find((f) => f.kind === 'dynamic_eval_added')); +}); + +test('py: pickle.loads flags as unsafe deserialize', () => { + const findings = detectPyCapability([ + line('agent.py', 'obj = pickle.loads(payload)') + ]); + const f = findings.find((finding) => finding.kind === 'unsafe_deserialize_added'); + assert.ok(f); + assert.equal(f.severity, 'critical'); +}); + +test('py: yaml.load without SafeLoader flags; yaml.safe_load does not', () => { + const unsafe = detectPyCapability([ + line('agent.py', 'config = yaml.load(open("config.yml"))') + ]); + assert.ok(unsafe.find((f) => f.kind === 'unsafe_deserialize_added')); + + const safe = detectPyCapability([ + line('agent.py', 'config = yaml.safe_load(open("config.yml"))') + ]); + assert.equal(safe.find((f) => f.kind === 'unsafe_deserialize_added'), undefined); +}); + +test('py: comment lines are ignored', () => { + const findings = detectPyCapability([ + line('agent.py', '# requests.get("https://example.com")') + ]); + assert.equal(findings.length, 0); +}); + +test('py: test file downgrades severity', () => { + const findings = detectPyCapability([ + line('tests/test_agent.py', 'subprocess.Popen(["echo", "test"])') + ]); + const f = findings.find((finding) => finding.kind === 'subprocess_spawn_added'); + assert.ok(f); + assert.equal(f.severity, 'low'); +});