From 86200d906ebb9d159c6c412ce2cf6a3c7039b86e Mon Sep 17 00:00:00 2001 From: Matt Partida Date: Mon, 4 May 2026 17:30:19 -0700 Subject: [PATCH] feat: document agent security rules --- CONTRIBUTING.md | 63 +++++++++++++ README.md | 4 + SECURITY.md | 37 ++++++++ .../high-risk-agent-security-review.md | 94 +++++++++++++++++++ skills/agent-security/SKILL.md | 1 + skills/agent-security/references/rules.md | 40 ++++++++ .../scripts/config_risk_summary.py | 23 +++++ tests/test_config_risk_summary.py | 21 +++++ 8 files changed, 283 insertions(+) create mode 100644 CONTRIBUTING.md create mode 100644 SECURITY.md create mode 100644 examples/reports/high-risk-agent-security-review.md create mode 100644 skills/agent-security/references/rules.md diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md new file mode 100644 index 0000000..67f10a9 --- /dev/null +++ b/CONTRIBUTING.md @@ -0,0 +1,63 @@ +# Contributing + +Thanks for improving the OpenClaw Agent Security Skillpack. This repository is intentionally dependency-light and should stay easy to audit. + +## Development setup + +```bash +python3 -m compileall -q skills tests +python3 -m pytest -q +ruff check . +./package-skills.sh +``` + +If `pytest` is unavailable locally, you can still run individual script checks with `python` and rely on CI for the full matrix. + +## Adding or changing a rule + +1. Add or update the detection in `skills/agent-security/scripts/config_risk_summary.py`. +2. Assign a stable `ASG-###` rule ID in the script's `RULE_IDS` map. +3. Document the rule in `skills/agent-security/references/rules.md`. +4. Add a test in `tests/test_config_risk_summary.py` proving the rule appears for a representative config. +5. Include a recommended mitigation and, where useful, the relevant config field. + +Rule IDs should remain stable once published. If a rule is split, keep the old ID documented and add new IDs for the narrower cases. + +## Adding prompt-injection examples + +Prompt-injection examples must be safe and clearly marked as test data. + +Good examples: + +- use fake secrets and fake attacker URLs +- include `TEST ONLY` when practical +- demonstrate one behavior at a time +- assert that the correct behavior is to treat the text as untrusted data + +Avoid: + +- real credentials or real private URLs +- working malware commands +- instructions that could cause harm if copied into a live unsandboxed agent + +## Documentation standards + +For checklist or reference changes, prefer concrete operational guidance: + +- affected trust boundary +- exploit path +- severity rationale +- mitigation +- verification test +- rollback notes for config changes + +Avoid presenting prompts as security boundaries. Guidance should prefer enforced controls: permissions, sandboxes, allowlists, egress gates, logging, and regression tests. + +## Pull request checklist + +- [ ] Tests added or updated for behavior changes +- [ ] `python3 -m compileall -q skills tests` passes +- [ ] `python3 -m pytest -q` passes, or CI is expected to run it +- [ ] `ruff check .` passes, or changes are limited to documentation +- [ ] `./package-skills.sh` succeeds if skill contents changed +- [ ] Documentation and examples do not contain real secrets diff --git a/README.md b/README.md index 153407a..1acb966 100644 --- a/README.md +++ b/README.md @@ -67,6 +67,7 @@ Key files: - `skills/agent-security/SKILL.md` — operational audit checklist and report template - `skills/agent-security/references/prompt-injection.md` — prompt-injection probes and mitigations +- `skills/agent-security/references/rules.md` — stable `ASG-###` rule IDs and mitigations - `skills/agent-security/scripts/config_risk_summary.py` — schema-tolerant config risk summary - `skills/agent-security/scripts/score_prompt_injection_exposure.py` — exposure scoring for agent configs - `skills/agent-security/scripts/flag_prompt_injection_signals.py` — prompt-injection text detector @@ -86,6 +87,8 @@ Use for: examples/ high-risk-agent-config.json hardened-agent-config.json + reports/ + high-risk-agent-security-review.md skills/ agent-security/ SKILL.md @@ -107,6 +110,7 @@ tests/ | --- | --- | --- | | `examples/high-risk-agent-config.json` | Demonstrates shared channel + exec + private-network browser + persistence risk | Critical/high findings | | `examples/hardened-agent-config.json` | Demonstrates a constrained, approval-gated, read-oriented setup | No high/critical findings | +| `examples/reports/high-risk-agent-security-review.md` | Shows the recommended human-readable audit report format | Critical shared-runtime review with `ASG-###` rule IDs | ## Packaging diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 0000000..e52a4f3 --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,37 @@ +# Security Policy + +## Supported versions + +This repository is currently pre-1.0. Security fixes are made on the `main` branch and included in the next tagged release. + +## Reporting a vulnerability + +If you find a vulnerability in the skillpack, helper scripts, examples, CI workflow, or packaged `.skill` artifacts, please open a private report through GitHub's vulnerability reporting flow if available, or contact the maintainer through the GitHub profile. + +Please include: + +- affected file, script, or workflow +- reproduction steps +- expected vs. actual behavior +- impact assessment +- any safe proof-of-concept input + +Do **not** include real secrets, private keys, production credentials, or non-consensual data in reports. + +## What counts as a security issue + +Examples: + +- a helper script that executes untrusted input +- a bypass in prompt-injection signal detection that affects documented guarantees +- incorrect high-risk config scoring that can hide a dangerous tool combination +- packaged skill contents that broaden permissions or weaken approval guidance unexpectedly +- CI or packaging behavior that publishes stale or unsafe artifacts + +## Test content warning + +This repository intentionally contains benign prompt-injection examples and fake malicious instructions for defensive testing. Treat all examples, fixtures, and copied test strings as untrusted data. Do not connect test prompts to real outbound tools, real credentials, or production agent runtimes without sandboxing and explicit approval gates. + +## Disclosure expectations + +This project provides defensive guidance and dependency-light analysis helpers. It does not claim to provide complete protection against prompt injection, tool misuse, or agent compromise. Reports that improve detection, documentation, or safe defaults are welcome. diff --git a/examples/reports/high-risk-agent-security-review.md b/examples/reports/high-risk-agent-security-review.md new file mode 100644 index 0000000..005bb82 --- /dev/null +++ b/examples/reports/high-risk-agent-security-review.md @@ -0,0 +1,94 @@ +# Example Agent Security Review: High-Risk Shared Agent + +This is a sample report for `examples/high-risk-agent-config.json`. It demonstrates the expected shape and level of detail for an agent-security review. The example config is intentionally unsafe test data. + +## Executive summary + +- **Overall posture:** Critical +- **Highest-risk issue:** Shared Discord/channel runtime can combine private-network browser access, unrestricted exec, broad filesystem access, elevated tools, and persistence. +- **Trust boundary:** Mixed/shared channel with untrusted or semi-trusted content. +- **Recommended next action:** Disable private-network browser access and full exec for the shared runtime, then split untrusted-content handling from privileged/local credentials. + +## Findings + +| Severity | Rule | Area | Evidence | Risk | Recommendation | +| --- | --- | --- | --- | --- | --- | +| Critical | ASG-006 | Browser / channel binding | Shared Discord channel binding plus `browser.ssrfPolicy.dangerouslyAllowPrivateNetwork=true` | A malicious webpage, chat message, or tool result could steer the browser toward localhost or private-network services. | Disable private-network browsing for shared agents or move browser work to an isolated runtime. | +| High | ASG-001 | Runtime execution | Shared channel binding plus exec surface | Shared-channel content can influence shell/runtime actions. | Remove exec from shared-channel agents or require strict sender-specific approvals and sandboxing. | +| High | ASG-004 | Elevated tools | Elevated tools enabled without a narrow allowlist | Privileged actions may be reachable from a broad or ambiguous approval surface. | Disable elevated tools or configure explicit sender/resource allowlists. | +| High | ASG-008 | Exec policy | `tools.exec.security=full` | Commands can run without sufficient containment or review. | Use approval-gated and sandboxed execution; avoid full exec for shared agents. | +| Warn | ASG-003 | Persistence | Memory/cron/notes persistence reachable in a context with untrusted content | Prompt injection can become cross-turn or delayed. | Require review before memory/cron writes sourced from web, documents, email, or group chat. | + +## Immediate risks + +- Private-network/SSRF exposure from browser tools. +- Confused-deputy path from shared chat or remote content to shell/runtime tools. +- Persistent prompt injection through memory, notes, summaries, skills, config, or cron. +- Ambiguous approval authority if group participants can influence approval text. + +## Reversible config hardening + +- **Field:** `browser.ssrfPolicy.dangerouslyAllowPrivateNetwork` + - **Current:** `true` + - **Recommended:** `false` + - **Effect:** Blocks browser access to localhost/RFC1918/private network targets. + - **Rollback:** Re-enable only for a dedicated, isolated internal-network agent. + - **Restart required:** Usually yes, depending on runtime config reload behavior. + +- **Field:** `tools.exec.security` + - **Current:** `full` + - **Recommended:** `approval` or sandboxed execution; remove from shared agents where possible. + - **Effect:** Prevents unreviewed command execution. + - **Rollback:** Restore previous setting only after documenting the trusted-user boundary. + - **Restart required:** Usually yes. + +- **Field:** `tools.elevated.allowFrom` + - **Current:** missing or broad + - **Recommended:** explicit sender-specific approvers only. + - **Effect:** Prevents channel-wide or spoofable privileged action. + - **Rollback:** Remove elevated tools rather than broadening allowlists. + - **Restart required:** Usually yes. + +## Prompt-injection paths + +- **Source:** Webpage or search result + - **Path:** browser result -> model -> browser/private-network request or shell tool + - **Possible impact:** SSRF, local service exposure, command execution, credential discovery + - **Containment:** Disable private-network browser access and require approval before shell/runtime use. + +- **Source:** Discord/group chat content + - **Path:** chat message -> model -> exec/elevated/persistence tool + - **Possible impact:** unauthorized commands, social-engineered approvals, persistent memory poisoning + - **Containment:** Sender-specific approvers, minimal shared-agent tools, no blind persistence. + +- **Source:** Tool output + - **Path:** shell/browser/MCP output -> model -> outbound message or config change + - **Possible impact:** exfiltration or security weakening by following tool-output instructions + - **Containment:** Treat tool output as data, inspect before action, require outbound/config-change approvals. + +## Structural recommendations + +1. Split shared channel agents from personal/private-data agents. +2. Keep browser/web research agents read-only by default. +3. Put outbound actions, repo writes, email, deploys, and config edits behind explicit confirmation gates. +4. Store prompt-injection probes as regression tests whenever a new exploit path is found. + +## Deferred items / unknowns + +- Whether actual sender identity checks are enforced by the gateway. +- Whether filesystem access is workspace-only at runtime. +- Whether secrets are available in environment variables or local dotfiles. +- Whether logs redact prompts, tool inputs, and credentials. + +## Verification performed + +For this example report: + +```bash +python3 skills/agent-security/scripts/config_risk_summary.py \ + < examples/high-risk-agent-config.json +python3 skills/agent-security/scripts/score_prompt_injection_exposure.py \ + < examples/high-risk-agent-config.json +``` + +Expected result: high/critical findings with stable `ASG-###` rule IDs. diff --git a/skills/agent-security/SKILL.md b/skills/agent-security/SKILL.md index 8684ca7..358c71d 100644 --- a/skills/agent-security/SKILL.md +++ b/skills/agent-security/SKILL.md @@ -339,6 +339,7 @@ If prompt injection is part of the risk, also include: Use these helper resources when useful: - `references/prompt-injection.md` +- `references/rules.md` - `scripts/config_risk_summary.py` - `scripts/score_prompt_injection_exposure.py` - `scripts/flag_prompt_injection_signals.py` diff --git a/skills/agent-security/references/rules.md b/skills/agent-security/references/rules.md new file mode 100644 index 0000000..700f43f --- /dev/null +++ b/skills/agent-security/references/rules.md @@ -0,0 +1,40 @@ +# Agent Security Rule Reference + +Stable rule IDs are emitted by `scripts/config_risk_summary.py` when a finding maps to a documented agent-security risk. IDs are intended for CI baselines, issue tracking, reports, and future SARIF/code-scanning output. + +## Rule index + +| Rule | Risk key | Severity | What it flags | Preferred mitigation | +| --- | --- | ---: | --- | --- | +| ASG-001 | `shared_channel_with_exec_surface` | High | A shared Discord/channel binding can reach shell/runtime execution. | Separate shared agents from exec tools, or require strict sender-specific approvals and sandboxing. | +| ASG-002 | `browser_private_network_allowed` | High | Browser SSRF policy allows localhost/RFC1918/private-network access. | Disable private-network browser access unless explicitly required and isolated. | +| ASG-003 | `persistence_available_in_untrusted_content_context` | Warn | Memory, cron, notes, or similar persistence is available where untrusted content may be present. | Require human review before persistence and isolate untrusted-content workflows. | +| ASG-004 | `elevated_enabled_without_allowlist`, `agent_elevated_without_allowlist` | High | Elevated tools are enabled without a specific sender/resource allowlist. | Add narrow sender-specific allowlists or disable elevated tools. | +| ASG-005 | `risky_default_model`, `risky_agent_model_with_tools` | Warn | Small, local, cheap, custom, or unknown models are combined with defaults or tool-enabled agents. | Use stronger models for high-risk tools, remove risky fallbacks, or narrow/sandbox tools. | +| ASG-006 | `shared_channel_with_private_network_browser` | Critical | Shared channel binding combines with private-network browser access. | Split the runtime or disable private-network browsing for shared agents. | +| ASG-007 | `shared_channel_with_elevated_surface` | High | Shared channel binding can reach elevated tools. | Separate shared runtimes from elevated tools and use explicit sender approvals. | +| ASG-008 | `exec_security_full` | High | Shell/runtime execution is configured as full/unrestricted. | Use approval-gated or sandboxed execution, preferably workspace-scoped. | +| ASG-009 | `discord_exec_approvals_enabled_without_approvers` | High | Discord exec approvals are enabled without explicit approvers. | Configure explicit approvers by sender identity. | +| ASG-010 | `discord_exec_approvals_missing` | Warn | Discord is enabled with exec surface but no exec approval config was found. | Add clear exec approval policy or remove exec from Discord-bound agents. | +| ASG-011 | `filesystem_not_workspace_only` | Warn | Filesystem access is broader than workspace-only. | Prefer workspace-only file access for project and shared agents. | +| ASG-012 | `sandbox_disabled` | Warn | Sandbox config is present and explicitly disabled. | Enable sandboxing for runtime, browser, and filesystem access where available. | +| ASG-013 | `exec_or_commands_without_owner_allow_from` | Warn | Exec/command surface exists without owner approver config. | Configure owner/sender-specific approval sources. | +| ASG-014 | `discord_group_chat_surface` | Info | Discord group/channel surface is enabled. | Treat group content as untrusted and tighten tools/approvals. | +| ASG-015 | `discord_channel_binding` | Info | An agent is bound to a Discord channel peer. | Confirm the channel trust boundary and avoid ambient private credentials. | + +## Severity guidance + +- **Critical** — dangerous cross-boundary combination likely to enable private-network access, exfiltration, or privileged action from a shared/untrusted surface. +- **High** — high-impact tool, approval, or isolation weakness that should be fixed before production/shared use. +- **Warn** — meaningful hardening gap or risky combination that may be acceptable only with compensating controls. +- **Info** — context that helps classify trust boundaries and review scope. + +## Adding new rules + +When adding a rule: + +1. choose the next unused `ASG-###` ID; +2. emit `rule_id` from `config_risk_summary.py`; +3. add or update a representative test; +4. document severity, risk key, exploit path, and mitigation here; +5. keep the ID stable after release. diff --git a/skills/agent-security/scripts/config_risk_summary.py b/skills/agent-security/scripts/config_risk_summary.py index 12683b2..957e04a 100644 --- a/skills/agent-security/scripts/config_risk_summary.py +++ b/skills/agent-security/scripts/config_risk_summary.py @@ -16,6 +16,26 @@ SMALL_MODEL_MARKERS = ("haiku", "mini", "nano", "gemma", "phi", "qwen", "mistral") UNKNOWN_MODEL_MARKERS = ("custom", "unknown") +RULE_IDS = { + "shared_channel_with_exec_surface": "ASG-001", + "browser_private_network_allowed": "ASG-002", + "persistence_available_in_untrusted_content_context": "ASG-003", + "elevated_enabled_without_allowlist": "ASG-004", + "agent_elevated_without_allowlist": "ASG-004", + "risky_agent_model_with_tools": "ASG-005", + "risky_default_model": "ASG-005", + "shared_channel_with_private_network_browser": "ASG-006", + "shared_channel_with_elevated_surface": "ASG-007", + "exec_security_full": "ASG-008", + "discord_exec_approvals_enabled_without_approvers": "ASG-009", + "discord_exec_approvals_missing": "ASG-010", + "filesystem_not_workspace_only": "ASG-011", + "sandbox_disabled": "ASG-012", + "exec_or_commands_without_owner_allow_from": "ASG-013", + "discord_group_chat_surface": "ASG-014", + "discord_channel_binding": "ASG-015", +} + def load_json() -> tuple[dict[str, Any] | None, list[dict[str, Any]]]: raw = sys.stdin.read() @@ -86,6 +106,9 @@ def main() -> int: def add(severity: str, risk: str, **extra: Any) -> None: item = {"severity": severity, "risk": risk} + rule_id = RULE_IDS.get(risk) + if rule_id: + item["rule_id"] = rule_id item.update(extra) findings.append(item) diff --git a/tests/test_config_risk_summary.py b/tests/test_config_risk_summary.py index 020bfc7..99f0ccf 100644 --- a/tests/test_config_risk_summary.py +++ b/tests/test_config_risk_summary.py @@ -46,3 +46,24 @@ def test_compound_shared_channel_private_network_is_critical(): proc = run_script(payload) data = json.loads(proc.stdout) assert any(f["risk"] == "shared_channel_with_private_network_browser" for f in data["findings"]) + + +def test_key_findings_include_stable_rule_ids(): + payload = { + "channels": {"discord": {"enabled": True}}, + "browser": {"enabled": True, "ssrfPolicy": {"dangerouslyAllowPrivateNetwork": True}}, + "tools": { + "exec": {"security": "full"}, + "elevated": {"enabled": True}, + }, + "bindings": [{"agentId": "shared", "match": {"channel": "discord", "peer": {"kind": "channel"}}}], + "memory": {"enabled": True}, + } + proc = run_script(payload) + data = json.loads(proc.stdout) + findings_by_risk = {finding["risk"]: finding for finding in data["findings"]} + assert findings_by_risk["shared_channel_with_exec_surface"]["rule_id"] == "ASG-001" + assert findings_by_risk["browser_private_network_allowed"]["rule_id"] == "ASG-002" + assert findings_by_risk["persistence_available_in_untrusted_content_context"]["rule_id"] == "ASG-003" + assert findings_by_risk["elevated_enabled_without_allowlist"]["rule_id"] == "ASG-004" + assert findings_by_risk["exec_security_full"]["rule_id"] == "ASG-008"