From 9af65e471a63cbf57cdc42fae659534d91eb097c Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Tue, 5 May 2026 14:37:29 -0400 Subject: [PATCH 1/4] Define agent harness risk controls --- ai-infra/AGENT_HARNESS_RISK_CONTROLS.md | 242 ++++++++++++++++++++++++ 1 file changed, 242 insertions(+) create mode 100644 ai-infra/AGENT_HARNESS_RISK_CONTROLS.md diff --git a/ai-infra/AGENT_HARNESS_RISK_CONTROLS.md b/ai-infra/AGENT_HARNESS_RISK_CONTROLS.md new file mode 100644 index 0000000..367a62a --- /dev/null +++ b/ai-infra/AGENT_HARNESS_RISK_CONTROLS.md @@ -0,0 +1,242 @@ +# Agent Harness Risk Controls + +Status: v0.1 planning baseline +Owner plane: SCOPE-D defensive validation +Consumers: Delivery Excellence, Policy Fabric, AgentPlane, Memory Mesh, SourceOS, BearBrowser, TurtleTerm, SocioSphere + +## Purpose + +The Aden/Hive production-agent pattern introduces reusable skills, MCP servers, browser automation, terminal actions, memory writeback, generated graphs, judge loops, and evolution patches. SCOPE-D owns the defensive validation lane for those capability surfaces. + +This document defines the risk-control surface that turns agent harness risk into measurable delivery/security evidence. + +## Boundary + +SCOPE-D owns: + +- safe synthetic validation exercises +- AI infrastructure risk assessment +- MCP/tool server risk assessment +- agent skill risk assessment +- memory poisoning and recall/writeback abuse checks +- browser automation abuse checks +- terminal command risk checks +- graph robustness checks +- detection/control packaging +- verified run summaries for handoff + +SCOPE-D does not own: + +- runtime execution authority; AgentPlane owns it +- policy admission authority; Policy Fabric owns it +- Delivery Excellence scoreboards and KPI definitions +- Memory Mesh runtime implementation +- BearBrowser or TurtleTerm product behavior +- SocioSphere topology authority + +## Risk-control lanes + +### SkillRiskAssessment + +Targets portable `SKILL.md` and governed SourceOS skill envelopes. + +Checks: + +- prompt-injection content +- hidden tool instructions +- unsafe script execution +- filesystem overreach +- network overreach +- credential access requests +- missing evals +- missing threat model +- missing license/provenance +- unpinned dependencies +- trust-tier mismatch + +### MCPServerRiskAssessment + +Targets MCP servers and tool namespaces. + +Checks: + +- tool schema drift +- namespace collision +- unpinned server version +- unsafe transport +- unauthenticated local endpoint +- credential scope overreach +- filesystem/network overreach +- missing health check +- prompt/tool poisoning surface +- revocation behavior + +### BrowserAutomationRiskAssessment + +Targets BearBrowser/browser-use workflows. + +Checks: + +- credential exfiltration +- CSRF/local-origin abuse +- unauthorized form submit +- stealth/evasion misuse +- malicious download bypass +- upload leakage +- external message-send risk +- account-setting mutation +- page-content prompt injection +- domain allowlist bypass + +### TerminalActionRiskAssessment + +Targets TurtleTerm, agent-term, shell receipts, and workstation contracts. + +Checks: + +- command injection +- shell escape +- destructive command bypass +- privilege escalation +- unauthorized filesystem mutation +- secret exfiltration +- package install risk +- unauthorized service exposure +- host/cluster mutation bypass +- generated script risk + +### MemoryRiskAssessment + +Targets Memory Mesh and context-pack flows. + +Checks: + +- memory poisoning +- cross-tenant recall bleed +- sensitive-payload storage bypass +- retrieval prompt injection +- writeback abuse +- redaction bypass +- artifact pointer spoofing +- vector-store drift +- contradiction/drift propagation + +### GraphRobustnessAssessment + +Targets AgentPlane graphs, policy graphs, memory graphs, trust graphs, and evidence graphs. + +Checks: + +- unbounded loops +- missing failure edges +- unsafe retry paths +- hidden side-effect nodes +- judge confidence bypass +- promotion gate bypass +- graph injection +- policy-edge inconsistency +- dependency-cycle risk +- missing rollback path + +### EvolutionPatchRiskAssessment + +Targets proposed graph/prompt/tool/skill/policy/code changes. + +Checks: + +- self-promotion attempt +- hidden authority expansion +- policy weakening +- test removal +- evidence removal +- rollback removal +- prompt injection in patch text +- risky dependency introduction +- supply-chain degradation + +## Evidence outputs + +Every SCOPE-D agent-harness risk run should emit: + +- risk assessment manifest +- safe synthetic event stream +- finding records +- recommended controls +- policy gate refs +- affected asset refs +- run receipt +- verified run summary +- Delivery Excellence projection fields + +## Delivery Excellence projection + +SCOPE-D should export or support projection into these Delivery Excellence metrics: + +- skill risk score +- MCP server risk score +- browser automation risk score +- terminal action risk score +- memory risk score +- graph robustness score +- open high-risk findings +- blocked promotion count +- verified-run count +- control coverage count +- defect/regression count + +Delivery Excellence consumes risk scores and verified summaries, not unsafe exploit payloads. + +## Policy Fabric integration + +Policy Fabric should consume SCOPE-D findings as inputs to: + +- SkillGrantGate +- MCPGrantGate +- BrowserActionGate +- TerminalActionGate +- MemoryGate +- GraphAdmissionGate +- PromotionGate + +High-risk or unverified assets should default to blocked, quarantined, or human-review-required posture. + +## AgentPlane integration + +AgentPlane should cite SCOPE-D risk results in: + +- EvidencePack +- PromotionGate +- FailureDiagnosis +- EvolutionPatch review +- graph/template/skill promotion records + +## Memory Mesh integration + +Memory-related findings should reference Memory Mesh artifact pointers and context-pack refs instead of raw sensitive payloads. + +## SourceOS integration + +SourceOS execution receipt classes should be SCOPE-D assessment targets: + +- LocalAgentRuntimeReceipt +- ShellReceiptEvent +- BrowserActionReceipt +- ModelCarryRouteReceipt +- HostMutationBoundaryReceipt +- DownloadArtifactReceipt + +## Non-negotiables + +- SCOPE-D defaults to read-only, synthetic, and dry-run validation. +- No deployable malware, C2 payloads, credential theft workflows, destructive automation, or unauthorized execution logic belongs in this repo. +- Risk findings must preserve provenance and safe evidence. +- Risk scores must be traceable to verified run summaries. +- Delivery Excellence receives risk metrics and customer-safe summaries, not unsafe payloads. + +## Near-term implementation path + +1. Add schemas/examples for the assessment lanes above. +2. Add safe synthetic fixtures for skill, MCP, browser, terminal, memory, graph, and evolution-patch risk. +3. Extend verified run reporting with agent-harness risk fields. +4. Add Delivery Excellence metric projection examples. +5. Wire Policy Fabric gate docs to consume SCOPE-D risk results. From 1963045dd0af8d88d228e50c4ee2ebeb0228ebd3 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Thu, 7 May 2026 02:58:08 -0400 Subject: [PATCH 2/4] Add agent harness risk assessment schema --- .../agent-harness-risk-assessment.schema.json | 82 +++++++++++++++++++ 1 file changed, 82 insertions(+) create mode 100644 config/schemas/agent-harness-risk-assessment.schema.json diff --git a/config/schemas/agent-harness-risk-assessment.schema.json b/config/schemas/agent-harness-risk-assessment.schema.json new file mode 100644 index 0000000..d8f458c --- /dev/null +++ b/config/schemas/agent-harness-risk-assessment.schema.json @@ -0,0 +1,82 @@ +{ + "$schema": "https://json-schema.org/draft/2020-12/schema", + "$id": "https://socioprophet.org/schemas/scope-d/agent-harness-risk-assessment.schema.json", + "title": "AgentHarnessRiskAssessment", + "type": "object", + "additionalProperties": false, + "required": [ + "schemaVersion", + "assessmentId", + "assessmentMode", + "target", + "riskLanes", + "findings", + "safetyBoundary", + "deliveryProjection" + ], + "properties": { + "schemaVersion": { "const": "v0.1" }, + "assessmentId": { "type": "string" }, + "assessmentMode": { "type": "string", "enum": ["read_only", "synthetic_only", "dry_run"] }, + "target": { + "type": "object", + "additionalProperties": false, + "required": ["assetRef", "assetType", "ownerRepo"], + "properties": { + "assetRef": { "type": "string" }, + "assetType": { "type": "string", "enum": ["skill", "mcp-server", "browser-workflow", "terminal-workflow", "memory-flow", "agent-graph", "evolution-patch"] }, + "ownerRepo": { "type": "string" } + } + }, + "riskLanes": { + "type": "array", + "items": { + "type": "string", + "enum": ["skill", "mcp", "browser", "terminal", "memory", "graph", "evolution-patch"] + } + }, + "findings": { + "type": "array", + "items": { + "type": "object", + "additionalProperties": false, + "required": ["findingId", "lane", "severity", "title", "evidenceRef", "recommendedControl"], + "properties": { + "findingId": { "type": "string" }, + "lane": { "type": "string", "enum": ["skill", "mcp", "browser", "terminal", "memory", "graph", "evolution-patch"] }, + "severity": { "type": "string", "enum": ["info", "low", "medium", "high", "critical"] }, + "title": { "type": "string" }, + "evidenceRef": { "type": "string" }, + "recommendedControl": { "type": "string" } + } + } + }, + "safetyBoundary": { + "type": "object", + "additionalProperties": false, + "required": ["liveActionsExecuted", "syntheticOnly", "credentialCollectionAllowed", "publicNetworkScanningAllowed", "destructiveActionsAllowed"], + "properties": { + "liveActionsExecuted": { "type": "integer" }, + "syntheticOnly": { "type": "boolean" }, + "credentialCollectionAllowed": { "type": "boolean" }, + "publicNetworkScanningAllowed": { "type": "boolean" }, + "destructiveActionsAllowed": { "type": "boolean" } + } + }, + "policyFabricRefs": { "type": "array", "items": { "type": "string" } }, + "agentplaneRefs": { "type": "array", "items": { "type": "string" } }, + "memoryMeshRefs": { "type": "array", "items": { "type": "string" } }, + "sourceosReceiptRefs": { "type": "array", "items": { "type": "string" } }, + "deliveryProjection": { + "type": "object", + "additionalProperties": false, + "required": ["metricFamily", "riskScore", "scoreStatus", "deliveryMetricRefs"], + "properties": { + "metricFamily": { "const": "safety-security" }, + "riskScore": { "type": "number" }, + "scoreStatus": { "type": "string", "enum": ["green", "yellow", "red", "unknown"] }, + "deliveryMetricRefs": { "type": "array", "items": { "type": "string" } } + } + } + } +} From 2411555241c39eae566c7fd5571565579cf8a569 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Thu, 7 May 2026 02:58:48 -0400 Subject: [PATCH 3/4] Add agent harness risk assessment example --- ...agent-harness-risk-assessment.example.json | 50 +++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 examples/scope-d/agent-harness-risk-assessment.example.json diff --git a/examples/scope-d/agent-harness-risk-assessment.example.json b/examples/scope-d/agent-harness-risk-assessment.example.json new file mode 100644 index 0000000..ad4390e --- /dev/null +++ b/examples/scope-d/agent-harness-risk-assessment.example.json @@ -0,0 +1,50 @@ +{ + "schemaVersion": "v0.1", + "assessmentId": "agent-harness-risk-assessment-browser-terminal-v0.1", + "assessmentMode": "synthetic_only", + "target": { + "assetRef": "sourceos://tool-pack/browser-terminal-governed-surfaces", + "assetType": "browser-workflow", + "ownerRepo": "SourceOS-Linux/BearBrowser" + }, + "riskLanes": ["browser", "terminal", "memory", "graph"], + "findings": [ + { + "findingId": "finding-browser-form-submit-human-review", + "lane": "browser", + "severity": "medium", + "title": "Externally mutating browser actions require explicit human review.", + "evidenceRef": "github://SourceOS-Linux/BearBrowser/pull/22", + "recommendedControl": "Require BrowserActionGate and HumanControlEvent before live form-submit, upload, purchase/order, ticket creation, or message-send actions." + }, + { + "findingId": "finding-terminal-host-mutation-gate", + "lane": "terminal", + "severity": "medium", + "title": "Host mutation must remain policy-gated and receipt-backed.", + "evidenceRef": "github://SourceOS-Linux/TurtleTerm/pull/5", + "recommendedControl": "Require TerminalActionGate, MutationReceipt, rollback reference, and operator approval for live host mutation." + } + ], + "safetyBoundary": { + "liveActionsExecuted": 0, + "syntheticOnly": true, + "credentialCollectionAllowed": false, + "publicNetworkScanningAllowed": false, + "destructiveActionsAllowed": false + }, + "policyFabricRefs": ["github://SocioProphet/policy-fabric/pull/60"], + "agentplaneRefs": ["github://SocioProphet/agentplane/pull/107"], + "memoryMeshRefs": ["github://SocioProphet/memory-mesh/pull/23"], + "sourceosReceiptRefs": [ + "github://SourceOS-Linux/sourceos-spec/pull/93", + "github://SourceOS-Linux/BearBrowser/pull/22", + "github://SourceOS-Linux/TurtleTerm/pull/5" + ], + "deliveryProjection": { + "metricFamily": "safety-security", + "riskScore": 62, + "scoreStatus": "yellow", + "deliveryMetricRefs": ["github://SocioProphet/delivery-excellence-automation/pull/7"] + } +} From 4d32f4e955a5ffaa7d5cd62a98935d547bd185a7 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Thu, 7 May 2026 02:59:48 -0400 Subject: [PATCH 4/4] Validate agent harness risk assessment fixture --- scripts/validate-contracts.js | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/scripts/validate-contracts.js b/scripts/validate-contracts.js index 9625f0c..ca4d605 100644 --- a/scripts/validate-contracts.js +++ b/scripts/validate-contracts.js @@ -21,6 +21,7 @@ const REQUIRED_PAIRS = [ ['config/schemas/agent-skill-risk.schema.json', 'examples/scope-d/agent-skill-risk.example.json'], ['config/schemas/run-receipt.schema.json', 'examples/scope-d/run-receipt.example.json'], ['config/schemas/safety-boundary.schema.json', 'examples/scope-d/safety-boundary.example.json'], + ['config/schemas/agent-harness-risk-assessment.schema.json', 'examples/scope-d/agent-harness-risk-assessment.example.json'], ]; const RUNTIME_SCHEMAS = [ @@ -102,6 +103,16 @@ function validateSafetyInvariants(examplePath, example) { assert(example.networkBoundary && example.networkBoundary.egressMode === 'none', `${examplePath}: example boundary must set egressMode=none`); assert(example.memoryBoundary && example.memoryBoundary.redactionRequired === true, `${examplePath}: example boundary must require redaction`); } + + if (examplePath.includes('agent-harness-risk-assessment')) { + assert(['read_only', 'synthetic_only', 'dry_run'].includes(example.assessmentMode), `${examplePath}: agent harness risk assessment must be read_only, synthetic_only, or dry_run`); + const safety = example.safetyBoundary || {}; + assert(safety.liveActionsExecuted === 0, `${examplePath}: agent harness risk assessment must record zero live actions`); + assert(safety.syntheticOnly === true, `${examplePath}: agent harness risk assessment must be syntheticOnly=true`); + assert(safety.credentialCollectionAllowed === false, `${examplePath}: credential collection must be prohibited`); + assert(safety.publicNetworkScanningAllowed === false, `${examplePath}: public network scanning must be prohibited`); + assert(safety.destructiveActionsAllowed === false, `${examplePath}: destructive actions must be prohibited`); + } } function walkJsonFiles(relDir) {