Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ Code review for AI agent capability drift in pull requests.

CapabilityEcho is a free OSS CLI and GitHub Action that reviews pull requests for risky **code and workflow changes** that expand what agents can reach — even when agent config files did not change.

- JavaScript and TypeScript network, subprocess, and dynamic-eval signals
- JavaScript, TypeScript, **and Python** network, subprocess, and dynamic-eval signals
- `package.json` lifecycle and pipe-to-shell install scripts
- GitHub Actions write permissions and external network steps
- Terminal, Markdown, JSON, and line-level GitHub annotation output
Expand Down Expand Up @@ -115,6 +115,8 @@ CapabilityEcho v0 detects:
- External network fetch calls in added JavaScript or TypeScript lines.
- Subprocess or shell spawn calls in added JavaScript or TypeScript lines.
- Dynamic code execution such as `eval()` or `new Function()` in added lines.
- **Python equivalents:** `requests`/`httpx`/`urllib` network calls (URL-gated), `subprocess`/`os.system`/`os.popen`/`pty.spawn`, `eval`/`exec`/`compile`/`__import__`/`importlib.import_module`, and unsafe deserialization (`pickle.load`, `marshal.load`, `yaml.load` without `SafeLoader`).
- **Newly-added dependencies with high capability surface:** headless browsers (`puppeteer`, `playwright`, `cypress`), subprocess/PTY wrappers (`execa`, `cross-spawn`, `node-pty`, `shelljs`, `zx`), arbitrary HTTP clients (`node-fetch`, `undici`, `got`, `axios`), VM/eval libraries (`vm2`, `isolated-vm`), and SSH/proxy primitives. Telemetry SDKs are flagged at medium.
- GitHub Actions write permissions in added workflow lines.
- External network requests in added workflow steps.
- Workflow steps that combine secrets or env values with external requests.
Expand Down
104 changes: 104 additions & 0 deletions dist/detectors/package-deps.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
import { isRecord, lineOfJsonStringValue, lineOfJsonKey } from '../discovery.js';
import { listPackageJsonFiles } from '../git-diff.js';
import { listChangedPackageJsonFiles, readPackageTextAt } from './package-scripts.js';
// Adding a dependency is, by itself, a capability expansion: the agent
// now has whatever the dep can do, transitively. Some additions are
// materially higher-leverage than others — a headless browser, a
// subprocess wrapper, or an arbitrary-fetch HTTP client lets the agent
// reach the network or the OS in ways the existing audit detects only
// when used. Catching them at the manifest layer flags the intent.
const HIGH_CAPABILITY_DEPS = new Set([
// Headless browsers and full UI automation.
'puppeteer', 'puppeteer-core', 'playwright', 'playwright-core',
'cypress', 'webdriverio', 'selenium-webdriver', 'nightwatch',
// Subprocess and PTY wrappers.
'execa', 'cross-spawn', 'node-pty', 'shelljs', 'zx', 'tinyspawn',
// Arbitrary HTTP clients (the agent can now fetch anywhere without
// touching `fetch` or `axios.get` in code we'd catch via js-capability).
'node-fetch', 'undici', 'got', 'axios', 'request', 'superagent',
// Remote-code-execution-shaped libraries.
'vm2', 'isolated-vm',
// Network primitives.
'socks-proxy-agent', 'https-proxy-agent', 'ssh2', 'node-ssh',
// Telemetry / analytics SDKs that ship a phone-home in their happy
// path (medium risk — flagged separately below).
]);
const TELEMETRY_DEPS = new Set([
'@segment/analytics-node', 'mixpanel', 'amplitude-js', 'posthog-js',
'@sentry/node', '@sentry/browser'
]);
const DEP_SECTIONS = ['dependencies', 'devDependencies', 'optionalDependencies', 'peerDependencies'];
export async function detectPackageDeps(mode) {
const files = mode.mode === 'directories'
? await listPackageJsonFiles(mode.newRoot)
: await listChangedPackageJsonFiles(mode.repo, mode.base, mode.head);
const findings = [];
for (const file of files) {
const oldText = await readPackageTextAt(mode, file, 'old');
const newText = await readPackageTextAt(mode, file, 'new');
findings.push(...compareDeps(file, oldText, newText));
}
return findings;
}
function compareDeps(file, oldText, newText) {
const oldDeps = readAllDeps(oldText);
const newDeps = readAllDeps(newText);
const findings = [];
for (const [name, version] of newDeps.entries()) {
if (oldDeps.has(name)) {
continue;
}
if (HIGH_CAPABILITY_DEPS.has(name)) {
findings.push({
kind: 'high_capability_dep_added',
severity: 'high',
file,
line: lineOfJsonStringValue(newText, version) ?? lineOfJsonKey(newText, name),
subject: name,
message: `Added dependency "${name}" can reach the network, spawn subprocesses, or evaluate code.`,
recommendation: 'Confirm this dependency is required for the stated change and that its usage is scoped.'
});
continue;
}
if (TELEMETRY_DEPS.has(name)) {
findings.push({
kind: 'telemetry_dep_added',
severity: 'medium',
file,
line: lineOfJsonStringValue(newText, version) ?? lineOfJsonKey(newText, name),
subject: name,
message: `Added telemetry/analytics dependency "${name}" — ships an outbound network surface by default.`,
recommendation: 'Verify the telemetry destination, payload, and opt-out posture.'
});
}
}
return findings;
}
function readAllDeps(text) {
const result = new Map();
if (!text.trim()) {
return result;
}
let parsed;
try {
parsed = JSON.parse(text);
}
catch {
return result;
}
if (!isRecord(parsed)) {
return result;
}
for (const section of DEP_SECTIONS) {
const block = parsed[section];
if (!isRecord(block)) {
continue;
}
for (const [name, version] of Object.entries(block)) {
if (typeof version === 'string') {
result.set(name, version);
}
}
}
return result;
}
4 changes: 2 additions & 2 deletions dist/detectors/package-scripts.js
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ export async function detectPackageScripts(mode) {
}
return findings;
}
async function listChangedPackageJsonFiles(repo, base, head) {
export async function listChangedPackageJsonFiles(repo, base, head) {
const all = await listPackageJsonFiles(repo);
const changed = [];
for (const file of all) {
Expand Down Expand Up @@ -49,7 +49,7 @@ async function readScriptsAt(mode, file, side) {
return {};
}
}
async function readPackageTextAt(mode, file, side) {
export async function readPackageTextAt(mode, file, side) {
if (mode.mode === 'directories') {
const root = side === 'old' ? mode.oldRoot : mode.newRoot;
try {
Expand Down
105 changes: 105 additions & 0 deletions dist/detectors/py-capability.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { isCommentLine, isPyFile, isTestFile } from '../paths.js';
// Python capability detection. Agents that ship code edits in Python -
// which is most of them once you leave the frontend - can quietly expand
// reach by adding a `requests.post`, a `subprocess.Popen`, or an `eval`
// without ever touching .mcp.json or .claude/settings.json. These are
// the same shapes detect-js-capability flags for the JS world.
export function detectPyCapability(lines) {
const findings = [];
for (const added of lines) {
if (!isPyFile(added.file) || isCommentLine(added.content)) {
continue;
}
const testFile = isTestFile(added.file);
findings.push(...detectPyNetwork(added, testFile));
findings.push(...detectPySubprocess(added, testFile));
findings.push(...detectPyDynamicExec(added, testFile));

Check warning on line 16 in dist/detectors/py-capability.js

View workflow job for this annotation

GitHub Actions / capability-drift

CapabilityEcho high capability drift

Added code can spawn shell commands or subprocesses. Recommendation: Confirm the command source is trusted and scoped to the task.
findings.push(...detectPyUnsafeDeserialize(added, testFile));
}
return findings;
}
function detectPyNetwork(added, testFile) {
// Common network entry points across requests, httpx, aiohttp, and the
// urllib family (including the Python 2 legacy `urllib2` that still
// appears in older agent-generated code).
const networkVerbPattern = /\b(?:requests|httpx)\.(?:get|post|put|delete|patch|head|options|request)\s*\(|\burllib(?:2)?\.(?:request\.)?urlopen\s*\(|\burlopen\s*\(|\burllib\.request\.urlretrieve\s*\(|\baiohttp\.ClientSession\s*\(/i;
if (!networkVerbPattern.test(added.content)) {
return [];
}
// Gate on a literal external URL on the same added line — keeps the
// detector aligned with the JS side and cuts false positives from code
// that takes the URL from a constant defined elsewhere.
if (!/(?:https?:\/\/|['"]https?:\/\/)/i.test(added.content)) {
return [];
}
return [
{
kind: 'external_fetch_added',
severity: testFile ? 'low' : 'medium',
file: added.file,
line: added.line,
subject: 'External network call (Python)',
message: 'Added Python performs an external HTTP request that expands network reach.',
recommendation: 'Review the endpoint, request payload, and whether the call belongs in this change.'
}
];
}
function detectPySubprocess(added, testFile) {
// Subprocess and shell-execution surfaces. `commands.getoutput` is the
// Python 2 legacy still seen in older agent-generated code.
const subprocessPattern = /\bsubprocess\.(?:run|call|Popen|check_call|check_output|getoutput|getstatusoutput)\s*\(|\bos\.(?:system|popen|execv\w*|spawnv?\w*)\s*\(|\bcommands\.getoutput\s*\(|\bpty\.spawn\s*\(/i;
if (!subprocessPattern.test(added.content)) {
return [];
}
return [
{
kind: 'subprocess_spawn_added',
severity: testFile ? 'low' : 'high',
file: added.file,
line: added.line,
subject: 'Subprocess spawn (Python)',

Check warning on line 60 in dist/detectors/py-capability.js

View workflow job for this annotation

GitHub Actions / capability-drift

CapabilityEcho high capability drift

Added code can spawn shell commands or subprocesses. Recommendation: Confirm the command source is trusted and scoped to the task.
message: 'Added Python can spawn shell commands or subprocesses.',
recommendation: 'Confirm the command source is trusted and scoped to the task.'
}
];
}
function detectPyDynamicExec(added, testFile) {

Check warning on line 66 in dist/detectors/py-capability.js

View workflow job for this annotation

GitHub Actions / capability-drift

CapabilityEcho high capability drift

Added code can spawn shell commands or subprocesses. Recommendation: Confirm the command source is trusted and scoped to the task.
// Dynamic code execution. We also catch `__import__` and
// `importlib.import_module` with a string literal argument — these are
// the standard primitives for "load whatever the LLM names next."
const dynamicPattern = /\beval\s*\(|\bexec\s*\(|\bcompile\s*\(|\b__import__\s*\(|\bimportlib\.import_module\s*\(/i;
if (!dynamicPattern.test(added.content)) {
return [];
}
return [
{
kind: 'dynamic_eval_added',
severity: testFile ? 'medium' : 'critical',
file: added.file,
line: added.line,
subject: 'Dynamic code execution (Python)',
message: 'Added Python can evaluate dynamic code or import modules by name at runtime.',
recommendation: 'Avoid eval-style execution unless strictly required; never feed user input to these.'
}
];
}
function detectPyUnsafeDeserialize(added, testFile) {
// pickle.load and marshal.load on attacker-controlled bytes are a
// remote-code-execution primitive. yaml.load (without SafeLoader) is
// the same shape and is the most common real-world footgun.
const unsafeDeserializePattern = /\bpickle\.(?:load|loads)\s*\(|\bmarshal\.(?:load|loads)\s*\(|\byaml\.load\s*\((?![^)]*Loader\s*=\s*(?:yaml\.)?SafeLoader)/i;
if (!unsafeDeserializePattern.test(added.content)) {
return [];
}
return [
{
kind: 'unsafe_deserialize_added',
severity: testFile ? 'medium' : 'critical',
file: added.file,
line: added.line,
subject: 'Unsafe deserialization (Python)',
message: 'Added Python deserializes untrusted-shaped input (pickle / marshal / yaml.load).',
recommendation: 'Use yaml.safe_load and avoid pickle/marshal on data crossing trust boundaries.'
}
];
}
16 changes: 12 additions & 4 deletions dist/diff.js
Original file line number Diff line number Diff line change
@@ -1,19 +1,27 @@
import { detectJsCapability } from './detectors/js-capability.js';
import { detectPackageDeps } from './detectors/package-deps.js';
import { detectPackageScripts } from './detectors/package-scripts.js';
import { detectPyCapability } from './detectors/py-capability.js';
import { detectWorkflowPermissions } from './detectors/workflow-permissions.js';
import { collectDirectoryDiff, collectGitDiff } from './git-diff.js';
import { createReport } from './report.js';
export async function runCapabilityDiff(options) {
const context = options.mode === 'directories'
? await collectDirectoryDiff(options.oldRoot, options.newRoot)
: await collectGitDiff(options.repo, options.base, options.head);
const packageFindings = options.mode === 'directories'
? await detectPackageScripts({ mode: 'directories', oldRoot: options.oldRoot, newRoot: options.newRoot })
: await detectPackageScripts({ mode: 'git', repo: options.repo, base: options.base, head: options.head });
const packageMode = options.mode === 'directories'
? ({ mode: 'directories', oldRoot: options.oldRoot, newRoot: options.newRoot })
: ({ mode: 'git', repo: options.repo, base: options.base, head: options.head });
const [scriptFindings, depFindings] = await Promise.all([
detectPackageScripts(packageMode),
detectPackageDeps(packageMode)
]);
const findings = [
...detectWorkflowPermissions(context.addedLines),
...detectJsCapability(context.addedLines),
...packageFindings
...detectPyCapability(context.addedLines),
...scriptFindings,
...depFindings
];
return createReport(findings, context);
}
17 changes: 14 additions & 3 deletions dist/paths.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,18 +28,25 @@ export function isScannable(relativePath) {
if (normalized.startsWith('.github/workflows/') && /\.(ya?ml)$/i.test(normalized)) {
return true;
}
return /\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized);
return /\.(js|jsx|ts|tsx|mjs|cjs|py|pyw)$/i.test(normalized);
}
export function isTestFile(relativePath) {
const normalized = normalizeRelativePath(relativePath);
if (normalized.includes('__tests__/')) {
if (normalized.includes('__tests__/') || normalized.includes('/tests/')) {
return true;
}
if (/(^|\/)test_[^/]+\.py$/i.test(normalized) || /_test\.py$/i.test(normalized)) {
return true;
}
return /\.(test|spec)\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized);
}
export function isCommentLine(content) {
const trimmed = content.trim();
return trimmed.startsWith('//') || trimmed.startsWith('/*') || trimmed.startsWith('*') || trimmed.startsWith('*/');
return (trimmed.startsWith('//') ||
trimmed.startsWith('/*') ||
trimmed.startsWith('*') ||
trimmed.startsWith('*/') ||
trimmed.startsWith('#'));
}
export function isWorkflowFile(relativePath) {
const normalized = normalizeRelativePath(relativePath);
Expand All @@ -53,3 +60,7 @@ export function isJsFile(relativePath) {
const normalized = normalizeRelativePath(relativePath);
return /\.(js|jsx|ts|tsx|mjs|cjs)$/i.test(normalized);
}
export function isPyFile(relativePath) {
const normalized = normalizeRelativePath(relativePath);
return /\.(py|pyw)$/i.test(normalized);
}
Loading