From beee8284e2a4f3aae6c468e4f7ded3a34966c09d Mon Sep 17 00:00:00 2001 From: "Byron Miller (MOBB)" Date: Mon, 20 Apr 2026 12:02:55 -0500 Subject: [PATCH 1/2] Enforce policy-mediated authority and phase consultations across harness agents. Add pluggable in-process/OPA policy previews to reconcile capability manifests with runtime decisions, and expand tests/docs so least-privilege guarantees match observed behavior. Made-with: Cursor --- harness/README.md | 72 ++++- harness/examples/governed-demo.ts | 73 ++++- harness/examples/opa/README.md | 96 ++++++ .../opa/policies/open_cot/delegation.rego | 81 +++++ harness/package.json | 1 + harness/src/agents/chat-agent.ts | 272 +++++++++++++--- harness/src/agents/coder-agent.ts | 254 +++++++++++++-- harness/src/agents/governed-agent.ts | 160 ++++++++-- harness/src/agents/index.ts | 2 + harness/src/backends/index.ts | 2 + harness/src/backends/openai-compat.ts | 71 ++++- harness/src/backends/types.ts | 22 ++ harness/src/core/index.ts | 2 +- harness/src/core/llm-circuit-breaker.ts | 6 + harness/src/core/tool-registry.ts | 196 ++++++++++++ harness/src/governance/audit-engine.ts | 5 +- harness/src/governance/index.ts | 17 + harness/src/governance/manifest-builder.ts | 60 +++- harness/src/governance/opa-policy-engine.ts | 300 ++++++++++++++++++ harness/src/governance/policy-engine.ts | 296 +++++++++++++++++ harness/src/governance/sandbox-policies.ts | 50 +++ harness/src/index.ts | 3 + harness/src/tools/index.ts | 1 + harness/src/tools/llm-tools.ts | 15 + harness/tests/chat-agent.test.ts | 29 ++ harness/tests/coder-agent.test.ts | 29 ++ .../fixtures/opa-decision-conformance.json | 115 +++++++ harness/tests/governed-agent.test.ts | 77 +++++ harness/tests/manifest.test.ts | 30 ++ harness/tests/openai-compat-streaming.test.ts | 71 +++++ .../tests/policy-engine-conformance.test.ts | 115 +++++++ harness/tests/policy-engine-live.test.ts | 68 ++++ harness/tests/policy-engine.test.ts | 282 ++++++++++++++++ harness/tests/tool-registry.test.ts | 152 +++++++++ 34 files changed, 2921 insertions(+), 104 deletions(-) create mode 100644 harness/examples/opa/README.md create mode 100644 harness/examples/opa/policies/open_cot/delegation.rego create mode 100644 harness/src/governance/opa-policy-engine.ts create mode 100644 harness/src/governance/policy-engine.ts create mode 100644 harness/src/governance/sandbox-policies.ts create mode 100644 harness/src/tools/llm-tools.ts create mode 100644 harness/tests/fixtures/opa-decision-conformance.json create mode 100644 harness/tests/policy-engine-conformance.test.ts create mode 100644 harness/tests/policy-engine-live.test.ts create mode 100644 harness/tests/policy-engine.test.ts create mode 100644 harness/tests/tool-registry.test.ts diff --git a/harness/README.md b/harness/README.md index ffffdb6..f674c63 100644 --- a/harness/README.md +++ b/harness/README.md @@ -32,8 +32,9 @@ src/ tools/ mock-tools.ts search, calculator, readFile, writeFile, runTests agents/ - chat-agent.ts LangGraph-style conversational agent (plan -> act -> verify) - coder-agent.ts Plan-do-act coder (plan -> inspect -> act -> verify -> repair -> summarize) + chat-agent.ts Conversational loop with policy-mediated authority checks + coder-agent.ts Coder loop with policy-mediated authority + repair + governed-agent.ts Full RFC 0007 governed flow with receipts + audit sealing ``` ## Quick start @@ -58,6 +59,73 @@ npx tsx examples/chat-demo.ts npx tsx examples/coder-demo.ts ``` +### Run the governed agent demo + +```bash +npx tsx examples/governed-demo.ts +``` + +Policy modes: + +```bash +npx tsx examples/governed-demo.ts --deny "search for info" +npx tsx examples/governed-demo.ts --narrow "search for info" +``` + +### Choose a policy engine for governed demo + +Use `POLICY_ENGINE`: + +- `inprocess` (default): uses the built-in evaluator +- `opa`: sends delegation requests to OPA and maps decisions into Open CoT objects + +```bash +POLICY_ENGINE=inprocess npx tsx examples/governed-demo.ts +``` + +```bash +POLICY_ENGINE=opa \ +OPA_BASE_URL=http://127.0.0.1:8181 \ +OPA_POLICY_PATH=open_cot/delegation \ +npx tsx examples/governed-demo.ts +``` + +Optional OPA env vars: + +- `OPA_BEARER_TOKEN` +- `OPA_TIMEOUT_MS` (default `2000`) +- `OPA_FALLBACK_INPROCESS` (`true` by default) + +Starter OPA policy package: `examples/opa/README.md` + +Live OPA integration test (targets `http://127.0.0.1:8181` by default): + +```bash +npm run test:opa-live +``` + +Override defaults if needed: + +```bash +OPA_BASE_URL=http://127.0.0.1:8181 \ +OPA_POLICY_PATH=open_cot/delegation \ +OPA_LIVE_POLICY_MODE=allow \ +npm run test:opa-live +``` + +`npm test` still auto-skips the live OPA suite when `OPA_BASE_URL` is not set. + +## Runtime governance guarantees + +Current harness behavior (runtime, not just schema/docs): + +- **Policy mediation for all shipped agents**: `chat-agent`, `coder-agent`, and `governed-agent` route tool execution through a `DelegationPolicyEngine` before dispatch. +- **Dispatch-time least privilege enforcement**: tool arguments are schema-validated and checked against delegated scope constraints (`allowed_fields`, `excluded_fields`, `max_results`) in `ToolRegistry`. +- **Phase consultation checks**: policy consultation hooks are enforced at `frame`, `plan`, `observe_result`, `critique_verify`, and `finalize`. +- **Manifest/policy reconciliation**: capability manifests can be compiled from policy-engine tool previews (including OPA-backed decisions), so model-visible tool posture reflects live policy outcomes. + +`chat-agent` and `coder-agent` default to an in-process policy derived from sandbox allow/block lists. You can override this by passing explicit `policies` and/or a custom `policyEngine`. + ### Use a real LLM (Ollama example) ```bash diff --git a/harness/examples/governed-demo.ts b/harness/examples/governed-demo.ts index afdeea0..92eb629 100644 --- a/harness/examples/governed-demo.ts +++ b/harness/examples/governed-demo.ts @@ -12,6 +12,16 @@ * npx tsx examples/governed-demo.ts "calculate 2+2" # calculator (allowed) * npx tsx examples/governed-demo.ts "search for open source" # search (allowed) * npx tsx examples/governed-demo.ts --deny "search for info" # search (denied by policy) + * + * Policy engine selection via env: + * POLICY_ENGINE=inprocess|opa + * + * OPA settings (when POLICY_ENGINE=opa): + * OPA_BASE_URL=http://127.0.0.1:8181 + * OPA_POLICY_PATH=open_cot/delegation + * OPA_BEARER_TOKEN=... + * OPA_TIMEOUT_MS=2000 + * OPA_FALLBACK_INPROCESS=true|false */ import { runGovernedAgent } from "../src/agents/governed-agent.js"; @@ -21,6 +31,11 @@ import { OpenAICompatBackend } from "../src/backends/openai-compat.js"; import { createMockToolRegistry } from "../src/tools/mock-tools.js"; import type { PolicySet } from "../src/governance/policy-evaluator.js"; import type { LLMBackend } from "../src/backends/types.js"; +import { + InProcessPolicyEngine, + OpaPolicyEngine, + type DelegationPolicyEngine, +} from "../src/governance/index.js"; function pickBackend(): LLMBackend { if (process.env["OPENAI_BASE_URL"] || process.env["OPENAI_API_KEY"]) { @@ -76,6 +91,56 @@ const NARROW_SEARCH_POLICY: PolicySet = { priority: 10, }; +interface PolicyEngineSelection { + engine: DelegationPolicyEngine; + engineLabel: string; + manifestPolicies: PolicySet[]; +} + +function parsePositiveInt(value: string | undefined): number | undefined { + if (!value) return undefined; + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) return undefined; + return parsed; +} + +function pickPolicyEngine( + policies: PolicySet[], + policyMode: "allow" | "deny" | "narrow", +): PolicyEngineSelection { + const engineChoice = (process.env["POLICY_ENGINE"] ?? "inprocess").toLowerCase(); + if (engineChoice !== "opa") { + return { + engine: new InProcessPolicyEngine(policies), + engineLabel: "in-process", + manifestPolicies: policies, + }; + } + + const opaBaseUrl = process.env["OPA_BASE_URL"] ?? "http://127.0.0.1:8181"; + const opaPolicyPath = process.env["OPA_POLICY_PATH"] ?? "open_cot/delegation"; + const fallbackEnabled = + (process.env["OPA_FALLBACK_INPROCESS"] ?? "true").toLowerCase() !== "false"; + const fallbackEngine = fallbackEnabled + ? new InProcessPolicyEngine(policies) + : undefined; + + return { + engine: new OpaPolicyEngine({ + baseUrl: opaBaseUrl, + policyPath: opaPolicyPath, + bearerToken: process.env["OPA_BEARER_TOKEN"], + timeoutMs: parsePositiveInt(process.env["OPA_TIMEOUT_MS"]), + inputContext: { + policy_mode: policyMode, + }, + fallbackEngine, + }), + engineLabel: `opa (${opaBaseUrl}/v1/data/${opaPolicyPath})${fallbackEnabled ? " + fallback" : ""}`, + manifestPolicies: fallbackEnabled ? policies : [], + }; +} + async function main() { const args = process.argv.slice(2); const denyMode = args.includes("--deny"); @@ -85,24 +150,30 @@ async function main() { const policies: PolicySet[] = [ALLOW_ALL_POLICY]; let mode = "ALLOW ALL"; + let policyMode: "allow" | "deny" | "narrow" = "allow"; if (denyMode) { policies.unshift(DENY_SEARCH_POLICY); mode = "DENY SEARCH"; + policyMode = "deny"; } else if (narrowMode) { policies.unshift(NARROW_SEARCH_POLICY); mode = "NARROW SEARCH"; + policyMode = "narrow"; } + const policyEngineSelection = pickPolicyEngine(policies, policyMode); console.log(`\n--- Governed Agent Demo ---`); console.log(`Policy mode: ${mode}`); + console.log(`Policy engine: ${policyEngineSelection.engineLabel}`); console.log(`Question: ${question}\n`); const config: GovernedAgentConfig = { objective: question, backend: pickBackend(), toolRegistry: createMockToolRegistry(), - policies, + policies: policyEngineSelection.manifestPolicies, + policyEngine: policyEngineSelection.engine, agentId: "demo-agent-01", }; diff --git a/harness/examples/opa/README.md b/harness/examples/opa/README.md new file mode 100644 index 0000000..d5cea86 --- /dev/null +++ b/harness/examples/opa/README.md @@ -0,0 +1,96 @@ +# Starter OPA Policy Package + +This folder contains a minimal OPA/Rego package you can use with the governed demo. + +## Start OPA with the starter policies + +```bash +cd harness +opa run --server examples/opa/policies +``` + +The starter package exposes decision data at `open_cot/delegation`. + +## Run governed demo against OPA + +```bash +cd harness +POLICY_ENGINE=opa \ +OPA_BASE_URL=http://127.0.0.1:8181 \ +OPA_POLICY_PATH=open_cot/delegation \ +npx tsx examples/governed-demo.ts +``` + +## Try policy modes + +The governed demo sends `input.context.policy_mode` to OPA: + +- default: `"allow"` +- `--deny`: `"deny"` (deny search) +- `--narrow`: `"narrow"` (narrow search scope) + +Examples: + +```bash +# deny search requests +POLICY_ENGINE=opa OPA_BASE_URL=http://127.0.0.1:8181 \ + npx tsx examples/governed-demo.ts --deny "search for open source" + +# narrow search requests +POLICY_ENGINE=opa OPA_BASE_URL=http://127.0.0.1:8181 \ + npx tsx examples/governed-demo.ts --narrow "search for open source" +``` + +## Response contract expected by harness + +OPA `result` should return an object like: + +```json +{ + "status": "approved | denied | narrowed | escalated", + "policy_refs": ["policy.id"], + "narrowed_scope": { + "resource": "tool:search", + "action": "execute", + "constraints": { "max_results": 5 } + }, + "denial_reason": "optional reason", + "escalation_target": "optional target", + "decided_by": { "kind": "policy", "policy_id": "policy.id" } +} +``` + +The harness uses this same decision shape for: + +- tool authorization requests (`resource: "tool:"`) +- manifest reconciliation previews (`resource: "tool:"`, preview context) +- phase consultation hooks (`resource: "phase:"`) + +The starter policy allows `phase:*` by default so runtime consultation does not block the run unless you explicitly add phase-deny rules. + +Conformance fixtures for this mapping live at: + +- `tests/fixtures/opa-decision-conformance.json` +- `tests/policy-engine-conformance.test.ts` + +## Optional live OPA integration test + +Use the dedicated script: + +```bash +cd harness +npm run test:opa-live +``` + +Override defaults if needed: + +```bash +cd harness +OPA_BASE_URL=http://127.0.0.1:8181 \ +OPA_POLICY_PATH=open_cot/delegation \ +OPA_LIVE_POLICY_MODE=allow \ +npm run test:opa-live +``` + +The live test checks end-to-end request/response integration and decision-shape mapping +against a real OPA server. diff --git a/harness/examples/opa/policies/open_cot/delegation.rego b/harness/examples/opa/policies/open_cot/delegation.rego new file mode 100644 index 0000000..25c183f --- /dev/null +++ b/harness/examples/opa/policies/open_cot/delegation.rego @@ -0,0 +1,81 @@ +package open_cot.delegation + +import rego.v1 + +policy_mode := object.get(object.get(input, "context", {}), "policy_mode", "allow") +requested_scope := input.request.requested_scope +is_search_request if requested_scope.resource == "tool:search" + +default_result := { + "status": "denied", + "policy_refs": ["starter.default_deny"], + "denial_reason": "No matching rule in starter OPA policy", + "decided_by": { + "kind": "policy", + "policy_id": "starter.default_deny", + }, +} + +allow_result := { + "status": "approved", + "policy_refs": ["starter.allow_all_tools"], + "decided_by": { + "kind": "policy", + "policy_id": "starter.allow_all_tools", + }, +} + +allow_phase_result := { + "status": "approved", + "policy_refs": ["starter.allow_phase_hooks"], + "decided_by": { + "kind": "policy", + "policy_id": "starter.allow_phase_hooks", + }, +} + +deny_result := { + "status": "denied", + "policy_refs": ["starter.deny_search"], + "denial_reason": "Search access is restricted by starter OPA policy", + "decided_by": { + "kind": "policy", + "policy_id": "starter.deny_search", + }, +} + +narrow_result := { + "status": "narrowed", + "policy_refs": ["starter.narrow_search"], + "narrowed_scope": { + "resource": requested_scope.resource, + "action": requested_scope.action, + "constraints": object.union( + object.get(requested_scope, "constraints", {}), + { + "max_results": 5, + "excluded_fields": ["raw_html", "cached_page"], + }, + ), + }, + "decided_by": { + "kind": "policy", + "policy_id": "starter.narrow_search", + }, +} + +result := deny_result if { + policy_mode == "deny" + is_search_request +} +else := narrow_result if { + policy_mode == "narrow" + is_search_request +} +else := allow_result if { + startswith(requested_scope.resource, "tool:") +} +else := allow_phase_result if { + startswith(requested_scope.resource, "phase:") +} +else := default_result diff --git a/harness/package.json b/harness/package.json index 6a3e8da..fb6c5a7 100644 --- a/harness/package.json +++ b/harness/package.json @@ -8,6 +8,7 @@ "scripts": { "build": "tsc", "test": "vitest run", + "test:opa-live": "OPA_BASE_URL=${OPA_BASE_URL:-http://127.0.0.1:8181} OPA_POLICY_PATH=${OPA_POLICY_PATH:-open_cot/delegation} OPA_LIVE_POLICY_MODE=${OPA_LIVE_POLICY_MODE:-allow} vitest run tests/policy-engine-live.test.ts", "test:watch": "vitest", "typecheck": "tsc --noEmit", "chat-demo": "tsx examples/chat-demo.ts", diff --git a/harness/src/agents/chat-agent.ts b/harness/src/agents/chat-agent.ts index 3bd12c5..541d460 100644 --- a/harness/src/agents/chat-agent.ts +++ b/harness/src/agents/chat-agent.ts @@ -1,8 +1,8 @@ /** - * Chat agent — simple governed mode with pre-authorized tool shortcut - * (plan → execute_tool, skipping delegation when sandbox allows the tool). + * Chat agent — policy-governed conversational loop with optional tool use. */ +import { randomUUID } from "node:crypto"; import type { LLMBackend, LLMMessage, LLMResponseWithTools } from "../backends/types.js"; import type { AgentState } from "../core/state.js"; import { createAgentState } from "../core/state.js"; @@ -26,15 +26,23 @@ import type { SandboxConfig } from "../schemas/sandbox.js"; import { DEFAULT_SANDBOX_CONFIG } from "../schemas/sandbox.js"; import { buildManifest, serializeManifest } from "../governance/manifest-builder.js"; import type { WireFormat } from "../governance/manifest-builder.js"; +import { toLLMToolDefinitions } from "../tools/llm-tools.js"; +import type { PolicySet } from "../governance/policy-evaluator.js"; +import type { + DelegationPolicyEngine, +} from "../governance/policy-engine.js"; +import { InProcessPolicyEngine } from "../governance/policy-engine.js"; +import { buildSandboxPolicySets } from "../governance/sandbox-policies.js"; +import type { DelegationRequest } from "../schemas/delegation.js"; +import type { Phase } from "../schemas/agent-loop.js"; function halted(state: AgentState): boolean { return state.phase === "audit_seal"; } -function isPreAuthorized(toolName: string, sandbox: SandboxConfig): boolean { - if (sandbox.blockedTools.includes(toolName)) return false; - if (sandbox.allowedTools.includes("*")) return true; - return sandbox.allowedTools.includes(toolName); +export interface ChatGovernanceOptions { + policies?: PolicySet[]; + policyEngine?: DelegationPolicyEngine; } export async function runChatAgent( @@ -44,10 +52,16 @@ export async function runChatAgent( budgetPolicy?: BudgetPolicy, sandbox?: SandboxConfig, wireFormat?: WireFormat, + governance?: ChatGovernanceOptions, ): Promise { resetStepCounter(); const budget = createBudgetTracker(); const sb = sandbox ?? DEFAULT_SANDBOX_CONFIG; + const toolContracts = toolRegistry.listTools(); + const defaultPolicies = buildSandboxPolicySets(sb); + const effectivePolicies = governance?.policies ?? defaultPolicies; + const policyEngine = + governance?.policyEngine ?? new InProcessPolicyEngine(effectivePolicies); const state = createAgentState({ objective, budgetPolicy, @@ -56,7 +70,10 @@ export async function runChatAgent( let lastResponse: LLMResponseWithTools | undefined; - const callLLM = async (messages: LLMMessage[]): Promise => { + const callLLM = async ( + messages: LLMMessage[], + modelVisibleTools = toLLMToolDefinitions(toolContracts), + ): Promise => { const response = await callLLMWithCircuitBreaker({ backend, messages, @@ -67,6 +84,8 @@ export async function runChatAgent( safety: { maxDecodedChars: 12_000, }, + tools: modelVisibleTools, + toolChoice: "auto", }); lastResponse = response; return response; @@ -77,6 +96,83 @@ export async function runChatAgent( return state.trace; }; + const consultPhase = async ( + phase: Phase, + context?: Record, + ): Promise => { + if (!policyEngine.consultPhase) { + return true; + } + const decision = await policyEngine.consultPhase({ + runId: state.runId, + agentId: state.telemetry.agent_id, + objective, + phase, + context, + }); + if (decision.status === "allowed") { + return true; + } + forceStop( + state, + "denied", + `Policy denied at phase ${phase}: ${decision.reason ?? "Denied by policy"}`, + ); + return false; + }; + + const manifestHeartbeat = async (phase: Phase) => { + let toolOverrides: + | Record< + string, + { + accessLevel: "pre_authorized" | "requires_delegation" | "blocked"; + constraints?: Record; + reason?: string; + } + > + | undefined; + if (policyEngine.previewToolAccess) { + const preview = await policyEngine.previewToolAccess({ + runId: state.runId, + agentId: state.telemetry.agent_id, + objective, + phase, + tools: toolContracts, + sandbox: state.sandbox, + context: { phase }, + }); + toolOverrides = Object.fromEntries( + Object.entries(preview).map(([name, result]) => [ + name, + { + accessLevel: result.accessLevel, + constraints: result.constraints, + reason: result.reason, + }, + ]), + ); + } + const manifest = buildManifest({ + runId: state.runId, + agentId: state.telemetry.agent_id, + phase, + toolContracts, + sandbox: sb, + policies: effectivePolicies, + budget: state.budget, + toolOverrides, + }); + state.capabilityManifest = manifest; + return { + manifestText: serializeManifest(manifest, wireFormat), + modelVisibleTools: toLLMToolDefinitions( + toolContracts, + new Set(manifest.tools.available.map((tool) => tool.name)), + ), + }; + }; + // receive emitThought(state, `[receive] ${objective}`); budget.recordStep(state, "receive"); @@ -84,10 +180,13 @@ export async function runChatAgent( // frame if (halted(state)) return end(""); + if (!(await consultPhase("frame"))) { + return end("Request denied by policy."); + } const frameResp = await callLLM([ { role: "system", content: "Interpret the task. Do not use tools." }, { role: "user", content: `[harness:frame]\n${objective}` }, - ]); + ], []); if (halted(state)) return end(frameResp.content); emitThought(state, `[frame] ${frameResp.content}`); budget.recordStep(state, "frame"); @@ -95,21 +194,17 @@ export async function runChatAgent( // plan (with capability manifest) if (halted(state)) return end(""); - const manifest = buildManifest({ - runId: state.runId, - agentId: state.telemetry.agent_id, - phase: "plan", - toolContracts: toolRegistry.listTools(), - sandbox: sb, - policies: [], - budget: state.budget, - }); - state.capabilityManifest = manifest; - const manifestText = serializeManifest(manifest, wireFormat); + if (!(await consultPhase("plan"))) { + return end("Request denied by policy."); + } + const planHeartbeat = await manifestHeartbeat("plan"); const planResp = await callLLM([ - { role: "system", content: `Plan and propose actions; use tools only if needed.\n\n${manifestText}` }, + { + role: "system", + content: `Plan and propose actions; use tools only if needed.\n\n${planHeartbeat.manifestText}`, + }, { role: "user", content: `[harness:plan]\n${objective}` }, - ]); + ], planHeartbeat.modelVisibleTools); if (halted(state)) return end(planResp.content); const planStep = emitPlan(state, planResp.content); budget.recordStep(state, "plan"); @@ -119,25 +214,25 @@ export async function runChatAgent( if (toolCalls.length === 0) { transition(state, "finalize", "Pure reasoning path"); if (halted(state)) return end(planResp.content); + if (!(await consultPhase("finalize"))) { + return end("Request denied by policy."); + } + const noToolFinalizeHeartbeat = await manifestHeartbeat("finalize"); const fin = await callLLM([ - { role: "system", content: "Produce the final answer for the user." }, + { + role: "system", + content: `Produce the final answer for the user.\n\n${noToolFinalizeHeartbeat.manifestText}`, + }, { role: "user", content: `[harness:finalize]\n${objective}\n\nPlan:\n${planResp.content}`, }, - ]); + ], noToolFinalizeHeartbeat.modelVisibleTools); if (halted(state)) return end(fin.content || planResp.content); transition(state, "audit_seal", "Complete"); return end(fin.content || planResp.content); } - for (const tc of toolCalls) { - if (!isPreAuthorized(tc.toolName, state.sandbox)) { - forceStop(state, "failed", `Tool "${tc.toolName}" is not pre-authorized in sandbox`); - return end(`Tool "${tc.toolName}" is not allowed in simple chat mode.`); - } - } - for (let i = 0; i < toolCalls.length; i++) { if (halted(state)) return end(""); const tc = toolCalls[i]!; @@ -147,8 +242,81 @@ export async function runChatAgent( emitThought(state, `[plan] executing remaining tool ${tc.toolName}`); budget.recordStep(state, "plan"); } + if ( + !(await consultPhase("plan", { + tool_name: tc.toolName, + })) + ) { + return end("Request denied by policy."); + } - transition(state, "execute_tool", `Pre-authorized call: ${tc.toolName}`); + transition(state, "request_authority", `Evaluate authority for ${tc.toolName}`); + budget.recordStep(state, "request_authority"); + const delegationRequest: DelegationRequest = { + schema_version: "0.2", + request_id: `req-${randomUUID()}`, + requester: state.telemetry.agent_id, + run_id: state.runId, + intent: `Use ${tc.toolName} to support objective`, + justification: `Model selected tool ${tc.toolName}`, + requested_scope: { + resource: `tool:${tc.toolName}`, + action: "execute", + constraints: + tc.arguments && typeof tc.arguments === "object" + ? (tc.arguments as Record) + : undefined, + }, + observed_at: new Date().toISOString(), + }; + transition(state, "validate_authority", "Policy evaluation complete"); + const decision = await policyEngine.evaluate( + delegationRequest, + state.telemetry.agent_id, + ); + budget.recordStep(state, "validate_authority"); + if (decision.status === "denied") { + state.completionStatus = "denied"; + transition( + state, + "deny", + decision.denial_reason ?? "Denied by policy engine", + ); + transition(state, "audit_seal", "Denied"); + return end( + decision.denial_reason ?? + `Tool "${tc.toolName}" denied by policy engine.`, + ); + } + if (decision.status === "escalated") { + state.completionStatus = "escalation_timeout"; + transition( + state, + "escalate", + decision.escalation_target ?? "Escalation required", + ); + transition(state, "audit_seal", "Escalated"); + return end( + decision.escalation_target + ? `Escalation required: ${decision.escalation_target}` + : `Tool "${tc.toolName}" requires escalation.`, + ); + } + transition(state, "delegate_narrow", "Authority granted"); + emitThought( + state, + decision.status === "narrowed" + ? `[delegate_narrow] ${tc.toolName} narrowed by policy` + : `[delegate_narrow] ${tc.toolName} approved by policy`, + ); + budget.recordStep(state, "delegate_narrow"); + + const grantedScope = + decision.status === "narrowed" && decision.narrowed_scope + ? decision.narrowed_scope + : delegationRequest.requested_scope; + + transition(state, "execute_tool", `Policy-authorized call: ${tc.toolName}`); const inv: ToolInvocation = { tool_name: tc.toolName, arguments: tc.arguments, @@ -159,7 +327,13 @@ export async function runChatAgent( let result; try { - result = await toolRegistry.call(tc.toolName, tc.arguments, state.sandbox); + result = await toolRegistry.call(tc.toolName, tc.arguments, state.sandbox, { + kind: "receipt", + permissionId: decision.decision_id, + grantedScope, + isPermissionValid: (permissionId: string) => + permissionId === decision.decision_id, + }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); forceStop(state, "fail_safe", msg); @@ -167,19 +341,38 @@ export async function runChatAgent( } transition(state, "observe_result", "Tool finished"); + if ( + !(await consultPhase("observe_result", { + tool_name: tc.toolName, + result_status: result.error ? "error" : "success", + })) + ) { + return end("Request denied by policy."); + } const obs = result.error ? `Error: ${result.error}` : JSON.stringify(result.output); emitObservation(state, obs, actionStep.id); budget.recordStep(state, "observe_result"); transition(state, "critique_verify", "Check tool output"); if (halted(state)) return end(""); + if ( + !(await consultPhase("critique_verify", { + tool_name: tc.toolName, + })) + ) { + return end("Request denied by policy."); + } + const critiqueHeartbeat = await manifestHeartbeat("critique_verify"); const critique = await callLLM([ - { role: "system", content: "Briefly verify the observation against the user goal." }, + { + role: "system", + content: `Briefly verify the observation against the user goal.\n\n${critiqueHeartbeat.manifestText}`, + }, { role: "user", content: `[harness:critique]\nObjective: ${objective}\nObservation:\n${obs}`, }, - ]); + ], critiqueHeartbeat.modelVisibleTools); if (halted(state)) return end(critique.content); emitCritique(state, critique.content); budget.recordStep(state, "critique_verify"); @@ -187,10 +380,17 @@ export async function runChatAgent( if (halted(state)) return end(""); transition(state, "finalize", "Compose final answer"); + if (!(await consultPhase("finalize"))) { + return end("Request denied by policy."); + } + const finalizeHeartbeat = await manifestHeartbeat("finalize"); const finalResp = await callLLM([ - { role: "system", content: "Answer the user using the plan and tool observations." }, + { + role: "system", + content: `Answer the user using the plan and tool observations.\n\n${finalizeHeartbeat.manifestText}`, + }, { role: "user", content: `[harness:finalize]\n${objective}` }, - ]); + ], finalizeHeartbeat.modelVisibleTools); if (halted(state)) return end(finalResp.content || planResp.content); const verifyText = finalResp.content; const ok = diff --git a/harness/src/agents/coder-agent.ts b/harness/src/agents/coder-agent.ts index 4e4545b..3339d64 100644 --- a/harness/src/agents/coder-agent.ts +++ b/harness/src/agents/coder-agent.ts @@ -1,7 +1,8 @@ /** - * Coder agent — plan / execute (pre-authorized) / observe / critique with repair loop. + * Coder agent — policy-governed plan / execute / observe / critique with repair loop. */ +import { randomUUID } from "node:crypto"; import type { LLMBackend, LLMMessage, LLMResponseWithTools } from "../backends/types.js"; import type { AgentState } from "../core/state.js"; import { createAgentState } from "../core/state.js"; @@ -24,15 +25,23 @@ import type { BudgetPolicy } from "../schemas/budget.js"; import type { SandboxConfig } from "../schemas/sandbox.js"; import { DEFAULT_SANDBOX_CONFIG } from "../schemas/sandbox.js"; import { buildManifest, manifestToCompactText } from "../governance/manifest-builder.js"; +import { toLLMToolDefinitions } from "../tools/llm-tools.js"; +import type { PolicySet } from "../governance/policy-evaluator.js"; +import type { + DelegationPolicyEngine, +} from "../governance/policy-engine.js"; +import { InProcessPolicyEngine } from "../governance/policy-engine.js"; +import { buildSandboxPolicySets } from "../governance/sandbox-policies.js"; +import type { DelegationRequest } from "../schemas/delegation.js"; +import type { Phase } from "../schemas/agent-loop.js"; function halted(state: AgentState): boolean { return state.phase === "audit_seal"; } -function isPreAuthorized(toolName: string, sandbox: SandboxConfig): boolean { - if (sandbox.blockedTools.includes(toolName)) return false; - if (sandbox.allowedTools.includes("*")) return true; - return sandbox.allowedTools.includes(toolName); +export interface CoderGovernanceOptions { + policies?: PolicySet[]; + policyEngine?: DelegationPolicyEngine; } export async function runCoderAgent( @@ -42,11 +51,17 @@ export async function runCoderAgent( budgetPolicy?: BudgetPolicy, sandbox?: SandboxConfig, maxRepairAttempts?: number, + governance?: CoderGovernanceOptions, ): Promise { resetStepCounter(); const maxRepairs = maxRepairAttempts ?? 3; const budget = createBudgetTracker(); const sb = sandbox ?? DEFAULT_SANDBOX_CONFIG; + const toolContracts = toolRegistry.listTools(); + const defaultPolicies = buildSandboxPolicySets(sb); + const effectivePolicies = governance?.policies ?? defaultPolicies; + const policyEngine = + governance?.policyEngine ?? new InProcessPolicyEngine(effectivePolicies); const state = createAgentState({ objective, budgetPolicy, @@ -55,7 +70,10 @@ export async function runCoderAgent( let lastResponse: LLMResponseWithTools | undefined; - const callLLM = async (messages: LLMMessage[]): Promise => { + const callLLM = async ( + messages: LLMMessage[], + modelVisibleTools = toLLMToolDefinitions(toolContracts), + ): Promise => { const response = await callLLMWithCircuitBreaker({ backend, messages, @@ -66,6 +84,8 @@ export async function runCoderAgent( safety: { maxDecodedChars: 20_000, }, + tools: modelVisibleTools, + toolChoice: "auto", }); lastResponse = response; return response; @@ -76,6 +96,83 @@ export async function runCoderAgent( return state.trace; }; + const consultPhase = async ( + phase: Phase, + context?: Record, + ): Promise => { + if (!policyEngine.consultPhase) { + return true; + } + const decision = await policyEngine.consultPhase({ + runId: state.runId, + agentId: state.telemetry.agent_id, + objective, + phase, + context, + }); + if (decision.status === "allowed") { + return true; + } + forceStop( + state, + "denied", + `Policy denied at phase ${phase}: ${decision.reason ?? "Denied by policy"}`, + ); + return false; + }; + + const manifestHeartbeat = async (phase: Phase) => { + let toolOverrides: + | Record< + string, + { + accessLevel: "pre_authorized" | "requires_delegation" | "blocked"; + constraints?: Record; + reason?: string; + } + > + | undefined; + if (policyEngine.previewToolAccess) { + const preview = await policyEngine.previewToolAccess({ + runId: state.runId, + agentId: state.telemetry.agent_id, + objective, + phase, + tools: toolContracts, + sandbox: state.sandbox, + context: { phase }, + }); + toolOverrides = Object.fromEntries( + Object.entries(preview).map(([name, result]) => [ + name, + { + accessLevel: result.accessLevel, + constraints: result.constraints, + reason: result.reason, + }, + ]), + ); + } + const manifest = buildManifest({ + runId: state.runId, + agentId: state.telemetry.agent_id, + phase, + toolContracts, + sandbox: state.sandbox, + policies: effectivePolicies, + budget: state.budget, + toolOverrides, + }); + state.capabilityManifest = manifest; + return { + manifestText: manifestToCompactText(manifest), + modelVisibleTools: toLLMToolDefinitions( + toolContracts, + new Set(manifest.tools.available.map((tool) => tool.name)), + ), + }; + }; + // receive emitThought(state, `[receive] ${objective}`); budget.recordStep(state, "receive"); @@ -83,43 +180,38 @@ export async function runCoderAgent( // frame if (halted(state)) return end(""); + if (!(await consultPhase("frame"))) { + return end("Request denied by policy."); + } const frameResp = await callLLM([ { role: "system", content: "Frame the coding task: goals, files, risks. Do not call tools yet.", }, { role: "user", content: `[harness:frame]\n${objective}` }, - ]); + ], []); if (halted(state)) return end(frameResp.content); emitThought(state, `[frame] ${frameResp.content}`); budget.recordStep(state, "frame"); transition(state, "plan", "Framed"); - const manifest = buildManifest({ - runId: state.runId, - agentId: state.telemetry.agent_id, - phase: "plan", - toolContracts: toolRegistry.listTools(), - sandbox: state.sandbox, - policies: [], - budget: state.budget, - }); - state.capabilityManifest = manifest; - const manifestText = manifestToCompactText(manifest); - let repairCount = 0; let lastCritique = ""; while (repairCount <= maxRepairs && !halted(state)) { if (halted(state)) return end(""); + if (!(await consultPhase("plan", { repair_cycle: repairCount }))) { + return end("Request denied by policy."); + } + const planHeartbeat = await manifestHeartbeat("plan"); const planResp = await callLLM([ { role: "system", - content: `You are a coding assistant. Propose concrete steps and use tools when needed.\n\n${manifestText}`, + content: `You are a coding assistant. Propose concrete steps and use tools when needed.\n\n${planHeartbeat.manifestText}`, }, { role: "user", content: `[harness:plan]\n${objective}` }, - ]); + ], planHeartbeat.modelVisibleTools); if (halted(state)) return end(planResp.content); const planStep = emitPlan(state, planResp.content); budget.recordStep(state, "plan"); @@ -132,13 +224,6 @@ export async function runCoderAgent( break; } - for (const tc of toolCalls) { - if (!isPreAuthorized(tc.toolName, state.sandbox)) { - forceStop(state, "failed", `Tool "${tc.toolName}" is not pre-authorized`); - return end(`Tool "${tc.toolName}" is blocked or not allowlisted.`); - } - } - for (let i = 0; i < toolCalls.length; i++) { if (halted(state)) return end(""); const tc = toolCalls[i]!; @@ -148,6 +233,80 @@ export async function runCoderAgent( emitThought(state, `[plan] continue with ${tc.toolName}`); budget.recordStep(state, "plan"); } + if ( + !(await consultPhase("plan", { + tool_name: tc.toolName, + tool_index: i, + })) + ) { + return end("Request denied by policy."); + } + + transition(state, "request_authority", `Evaluate authority for ${tc.toolName}`); + budget.recordStep(state, "request_authority"); + const delegationRequest: DelegationRequest = { + schema_version: "0.2", + request_id: `req-${randomUUID()}`, + requester: state.telemetry.agent_id, + run_id: state.runId, + intent: `Use ${tc.toolName} for coding objective`, + justification: `Model selected ${tc.toolName}`, + requested_scope: { + resource: `tool:${tc.toolName}`, + action: "execute", + constraints: + tc.arguments && typeof tc.arguments === "object" + ? (tc.arguments as Record) + : undefined, + }, + observed_at: new Date().toISOString(), + }; + transition(state, "validate_authority", "Policy evaluation complete"); + const decision = await policyEngine.evaluate( + delegationRequest, + state.telemetry.agent_id, + ); + budget.recordStep(state, "validate_authority"); + if (decision.status === "denied") { + state.completionStatus = "denied"; + transition( + state, + "deny", + decision.denial_reason ?? "Denied by policy engine", + ); + transition(state, "audit_seal", "Denied"); + return end( + decision.denial_reason ?? + `Tool "${tc.toolName}" denied by policy engine.`, + ); + } + if (decision.status === "escalated") { + state.completionStatus = "escalation_timeout"; + transition( + state, + "escalate", + decision.escalation_target ?? "Escalation required", + ); + transition(state, "audit_seal", "Escalated"); + return end( + decision.escalation_target + ? `Escalation required: ${decision.escalation_target}` + : `Tool "${tc.toolName}" requires escalation.`, + ); + } + transition(state, "delegate_narrow", "Authority granted"); + emitThought( + state, + decision.status === "narrowed" + ? `[delegate_narrow] ${tc.toolName} narrowed by policy` + : `[delegate_narrow] ${tc.toolName} approved by policy`, + ); + budget.recordStep(state, "delegate_narrow"); + + const grantedScope = + decision.status === "narrowed" && decision.narrowed_scope + ? decision.narrowed_scope + : delegationRequest.requested_scope; transition(state, "execute_tool", `Execute ${tc.toolName}`); const inv: ToolInvocation = { @@ -160,7 +319,13 @@ export async function runCoderAgent( let result; try { - result = await toolRegistry.call(tc.toolName, tc.arguments, state.sandbox); + result = await toolRegistry.call(tc.toolName, tc.arguments, state.sandbox, { + kind: "receipt", + permissionId: decision.decision_id, + grantedScope, + isPermissionValid: (permissionId: string) => + permissionId === decision.decision_id, + }); } catch (err) { const msg = err instanceof Error ? err.message : String(err); forceStop(state, "fail_safe", msg); @@ -168,22 +333,38 @@ export async function runCoderAgent( } transition(state, "observe_result", "Captured tool output"); + if ( + !(await consultPhase("observe_result", { + tool_name: tc.toolName, + result_status: result.error ? "error" : "success", + })) + ) { + return end("Request denied by policy."); + } const obs = result.error ? `Error: ${result.error}` : JSON.stringify(result.output); emitObservation(state, obs, actionStep.id); budget.recordStep(state, "observe_result"); transition(state, "critique_verify", "Verify step output"); if (halted(state)) return end(""); + if ( + !(await consultPhase("critique_verify", { + tool_name: tc.toolName, + })) + ) { + return end("Request denied by policy."); + } + const critiqueHeartbeat = await manifestHeartbeat("critique_verify"); const critiqueResp = await callLLM([ { role: "system", - content: "Critique the tool output. Say whether changes look correct or need repair.", + content: `Critique the tool output. Say whether changes look correct or need repair.\n\n${critiqueHeartbeat.manifestText}`, }, { role: "user", content: `[harness:critique]\nTask: ${objective}\nObservation:\n${obs}`, }, - ]); + ], critiqueHeartbeat.modelVisibleTools); if (halted(state)) return end(critiqueResp.content); emitCritique(state, critiqueResp.content); budget.recordStep(state, "critique_verify"); @@ -211,10 +392,17 @@ export async function runCoderAgent( if (halted(state)) return end(""); transition(state, "finalize", "Summarize coding outcome"); + if (!(await consultPhase("finalize"))) { + return end("Request denied by policy."); + } + const finalizeHeartbeat = await manifestHeartbeat("finalize"); const summaryResp = await callLLM([ - { role: "system", content: "Summarize what was done and the final state of the code." }, + { + role: "system", + content: `Summarize what was done and the final state of the code.\n\n${finalizeHeartbeat.manifestText}`, + }, { role: "user", content: `[harness:finalize]\n${objective}` }, - ]); + ], finalizeHeartbeat.modelVisibleTools); if (halted(state)) return end(summaryResp.content); emitSummary(state, summaryResp.content); budget.recordStep(state, "finalize"); diff --git a/harness/src/agents/governed-agent.ts b/harness/src/agents/governed-agent.ts index bade29a..29e1bac 100644 --- a/harness/src/agents/governed-agent.ts +++ b/harness/src/agents/governed-agent.ts @@ -12,10 +12,13 @@ import { createBudgetTracker } from "../core/budget-tracker.js"; import { callLLMWithCircuitBreaker } from "../core/llm-circuit-breaker.js"; import type { ToolRegistry } from "../core/tool-registry.js"; import { PermissionManager } from "../governance/permission-manager.js"; -import { PolicyEvaluator } from "../governance/policy-evaluator.js"; import type { PolicySet } from "../governance/policy-evaluator.js"; import { AuthBroker } from "../governance/auth-broker.js"; import { AuditEngine } from "../governance/audit-engine.js"; +import { + InProcessPolicyEngine, + type DelegationPolicyEngine, +} from "../governance/policy-engine.js"; import * as emit from "../core/trace-emitter.js"; import type { Trace } from "../schemas/trace.js"; import type { AuditEnvelope } from "../schemas/audit-envelope.js"; @@ -26,6 +29,8 @@ import type { BudgetPolicy } from "../schemas/budget.js"; import type { SandboxConfig } from "../schemas/sandbox.js"; import { buildManifest, serializeManifest } from "../governance/manifest-builder.js"; import type { WireFormat } from "../governance/manifest-builder.js"; +import { toLLMToolDefinitions } from "../tools/llm-tools.js"; +import type { Phase } from "../schemas/agent-loop.js"; function sha256(data: string): string { return createHash("sha256").update(data).digest("hex"); @@ -40,6 +45,7 @@ export interface GovernedAgentConfig { backend: LLMBackend; toolRegistry: ToolRegistry; policies?: PolicySet[]; + policyEngine?: DelegationPolicyEngine; agentId?: string; budgetPolicy?: BudgetPolicy; sandbox?: SandboxConfig; @@ -65,16 +71,22 @@ export async function runGovernedAgent( }); const permissionMgr = new PermissionManager(); - const policyEval = new PolicyEvaluator(); - for (const p of config.policies ?? []) { - policyEval.addPolicy(p); - } + const policyEngine = + config.policyEngine ?? new InProcessPolicyEngine(config.policies ?? []); const broker = new AuthBroker(permissionMgr); const audit = new AuditEngine(); let lastResponse: LLMResponseWithTools | undefined; + const toolContracts = config.toolRegistry.listTools(); + + const syncPermissionState = () => { + state.activePermissions = state.activePermissions.map( + (perm) => permissionMgr.get(perm.permission_id) ?? perm, + ); + }; const callLLM = async ( messages: LLMMessage[], + tools?: ReturnType, ): Promise => { const response = await callLLMWithCircuitBreaker({ backend: config.backend, @@ -86,6 +98,8 @@ export async function runGovernedAgent( safety: { maxDecodedChars: 16_000, }, + tools, + toolChoice: tools && tools.length > 0 ? "auto" : undefined, }); lastResponse = response; return response; @@ -93,6 +107,7 @@ export async function runGovernedAgent( const finish = (answer: string): GovernedAgentResult => { permissionMgr.revokeAll("run finalized"); + syncPermissionState(); emit.finalizeTrace( state, answer || state.trace.final_answer || String(state.completionStatus), @@ -108,18 +123,82 @@ export async function runGovernedAgent( * the model always sees the current truth rather than stale context from an * earlier phase. */ - const manifestHeartbeat = () => { + const consultPhase = async ( + phase: Phase, + context?: Record, + ): Promise => { + if (!policyEngine.consultPhase) { + return true; + } + const decision = await policyEngine.consultPhase({ + runId: state.runId, + agentId: state.telemetry.agent_id, + objective: config.objective, + phase, + context, + }); + if (decision.status === "allowed") { + return true; + } + forceStop( + state, + "denied", + `Policy denied at phase ${phase}: ${decision.reason ?? "Denied by policy"}`, + ); + return false; + }; + + const manifestHeartbeat = async () => { + let toolOverrides: + | Record< + string, + { + accessLevel: "pre_authorized" | "requires_delegation" | "blocked"; + constraints?: Record; + reason?: string; + } + > + | undefined; + if (policyEngine.previewToolAccess) { + const preview = await policyEngine.previewToolAccess({ + runId: state.runId, + agentId: state.telemetry.agent_id, + objective: config.objective, + phase: state.phase, + tools: toolContracts, + sandbox: state.sandbox, + context: { + phase: state.phase, + }, + }); + toolOverrides = Object.fromEntries( + Object.entries(preview).map(([toolName, result]) => [ + toolName, + { + accessLevel: result.accessLevel, + constraints: result.constraints, + reason: result.reason, + }, + ]), + ); + } const manifest = buildManifest({ runId: state.runId, agentId: state.telemetry.agent_id, phase: state.phase, - toolContracts: config.toolRegistry.listTools(), + toolContracts, sandbox: state.sandbox, policies: config.policies ?? [], budget: state.budget, + toolOverrides, }); state.capabilityManifest = manifest; - return serializeManifest(manifest, config.wireFormat); + const visibleTools = new Set(manifest.tools.available.map((tool) => tool.name)); + const llmTools = toLLMToolDefinitions(toolContracts, visibleTools); + return { + text: serializeManifest(manifest, config.wireFormat), + llmTools, + }; }; // --- receive --- @@ -129,13 +208,17 @@ export async function runGovernedAgent( // --- frame --- if (halted(state)) return finish(""); + if (!(await consultPhase("frame"))) { + return finish("Request denied by policy."); + } + const frameHeartbeat = await manifestHeartbeat(); const frameResp = await callLLM([ { role: "system", - content: `Interpret and frame the user's task. Do not call tools.\n\n${manifestHeartbeat()}`, + content: `Interpret and frame the user's task. Do not call tools.\n\n${frameHeartbeat.text}`, }, { role: "user", content: `[harness:frame]\n${config.objective}` }, - ]); + ], frameHeartbeat.llmTools); if (halted(state)) return finish(frameResp.content); emit.emitThought(state, `[frame] ${frameResp.content}`); budget.recordStep(state, "frame"); @@ -143,13 +226,17 @@ export async function runGovernedAgent( // --- plan --- if (halted(state)) return finish(""); + if (!(await consultPhase("plan"))) { + return finish("Request denied by policy."); + } + const planHeartbeat = await manifestHeartbeat(); const planResp = await callLLM([ { role: "system", - content: `Propose concrete actions. You may request tools via tool_calls when needed.\n\n${manifestHeartbeat()}`, + content: `Propose concrete actions. You may request tools via tool_calls when needed.\n\n${planHeartbeat.text}`, }, { role: "user", content: `[harness:plan]\n${config.objective}` }, - ]); + ], planHeartbeat.llmTools); if (halted(state)) return finish(planResp.content); const planStep = emit.emitPlan(state, planResp.content); budget.recordStep(state, "plan"); @@ -160,13 +247,20 @@ export async function runGovernedAgent( if (toolCalls.length === 0) { transition(state, "finalize", "No tools required"); if (halted(state)) return finish(planResp.content); + if (!(await consultPhase("finalize"))) { + return finish("Request denied by policy."); + } + const noToolsFinalizeHeartbeat = await manifestHeartbeat(); const fin = await callLLM([ - { role: "system", content: `Produce the final user-facing answer.\n\n${manifestHeartbeat()}` }, + { + role: "system", + content: `Produce the final user-facing answer.\n\n${noToolsFinalizeHeartbeat.text}`, + }, { role: "user", content: `[harness:finalize]\n${config.objective}\n\nPlan:\n${planResp.content}`, }, - ]); + ], noToolsFinalizeHeartbeat.llmTools); if (halted(state)) return finish(fin.content || planResp.content); transition(state, "audit_seal", "Run complete"); return finish(fin.content || planResp.content); @@ -211,7 +305,7 @@ export async function runGovernedAgent( // --- validate_authority --- transition(state, "validate_authority", `Validate ${tc.toolName}`); - const decision = policyEval.evaluate( + const decision = await policyEngine.evaluate( request, state.telemetry.agent_id, ); @@ -276,6 +370,13 @@ export async function runGovernedAgent( tc.toolName, tc.arguments, state.sandbox, + { + kind: "receipt", + permissionId: receipt.permission_id, + grantedScope: receipt.granted_scope, + isPermissionValid: (permissionId: string) => + permissionMgr.isValid(permissionId), + }, ); } catch (err) { const msg = err instanceof Error ? err.message : String(err); @@ -286,10 +387,19 @@ export async function runGovernedAgent( // Consume one-shot permission if (receipt.one_shot) { permissionMgr.consume(receipt.permission_id); + syncPermissionState(); } // --- observe_result --- transition(state, "observe_result", "Tool returned"); + if ( + !(await consultPhase("observe_result", { + tool_name: tc.toolName, + result_status: toolResult.error ? "error" : "success", + })) + ) { + return finish("Request denied by policy."); + } const obsText = toolResult.error ? `Error: ${toolResult.error}` : JSON.stringify(toolResult.output); @@ -323,16 +433,24 @@ export async function runGovernedAgent( // --- critique_verify --- transition(state, "critique_verify", "Evaluate tool output"); if (halted(state)) return finish(""); + if ( + !(await consultPhase("critique_verify", { + tool_name: tc.toolName, + })) + ) { + return finish("Request denied by policy."); + } + const critiqueHeartbeat = await manifestHeartbeat(); const critique = await callLLM([ { role: "system", - content: `Critique tool results for correctness and safety.\n\n${manifestHeartbeat()}`, + content: `Critique tool results for correctness and safety.\n\n${critiqueHeartbeat.text}`, }, { role: "user", content: `[harness:critique]\nTool: ${tc.toolName}\nObservation:\n${obsText}`, }, - ]); + ], critiqueHeartbeat.llmTools); if (halted(state)) return finish(critique.content); emit.emitCritique(state, critique.content); budget.recordStep(state, "critique_verify"); @@ -341,13 +459,17 @@ export async function runGovernedAgent( // --- finalize --- if (halted(state)) return finish(""); transition(state, "finalize", "Synthesize final answer"); + if (!(await consultPhase("finalize"))) { + return finish("Request denied by policy."); + } + const finalizeHeartbeat = await manifestHeartbeat(); const finalResp = await callLLM([ { role: "system", - content: `Produce the final user-facing answer from the plan and observations.\n\n${manifestHeartbeat()}`, + content: `Produce the final user-facing answer from the plan and observations.\n\n${finalizeHeartbeat.text}`, }, { role: "user", content: `[harness:finalize]\n${config.objective}` }, - ]); + ], finalizeHeartbeat.llmTools); if (halted(state)) return finish(finalResp.content); transition(state, "audit_seal", "Governed run complete"); return finish(finalResp.content); diff --git a/harness/src/agents/index.ts b/harness/src/agents/index.ts index 38f5e91..7515344 100644 --- a/harness/src/agents/index.ts +++ b/harness/src/agents/index.ts @@ -1,5 +1,7 @@ export { runChatAgent } from "./chat-agent.js"; export { runCoderAgent } from "./coder-agent.js"; export { runGovernedAgent } from "./governed-agent.js"; +export type { ChatGovernanceOptions } from "./chat-agent.js"; +export type { CoderGovernanceOptions } from "./coder-agent.js"; export type { GovernedAgentConfig, GovernedAgentResult } from "./governed-agent.js"; export type { WireFormat } from "../governance/manifest-builder.js"; diff --git a/harness/src/backends/index.ts b/harness/src/backends/index.ts index 32ee8db..ce98584 100644 --- a/harness/src/backends/index.ts +++ b/harness/src/backends/index.ts @@ -6,6 +6,8 @@ export type { LLMResponse, LLMResponseWithTools, LLMStreamChunk, + LLMToolChoice, + LLMToolDefinition, ToolCallRequest, } from "./types.js"; export { MockLLMBackend } from "./mock.js"; diff --git a/harness/src/backends/openai-compat.ts b/harness/src/backends/openai-compat.ts index 925c830..6f449f8 100644 --- a/harness/src/backends/openai-compat.ts +++ b/harness/src/backends/openai-compat.ts @@ -70,12 +70,20 @@ export class OpenAICompatBackend implements LLMBackend { options?: LLMChatOptions, ): Promise { const url = `${this.config.baseUrl}/chat/completions`; - const body = { + const body: Record = { model: this.config.model, messages, temperature: this.config.temperature, max_tokens: this.resolveMaxTokens(options?.maxOutputTokens), }; + const mappedTools = mapTools(options?.tools); + if (mappedTools) { + body.tools = mappedTools; + } + const mappedToolChoice = mapToolChoice(options?.toolChoice); + if (mappedToolChoice) { + body.tool_choice = mappedToolChoice; + } const res = await fetch(url, { method: "POST", @@ -119,7 +127,7 @@ export class OpenAICompatBackend implements LLMBackend { options: LLMChatOptions, ): Promise { const url = `${this.config.baseUrl}/chat/completions`; - const body = { + const body: Record = { model: this.config.model, messages, temperature: this.config.temperature, @@ -127,6 +135,14 @@ export class OpenAICompatBackend implements LLMBackend { stream: true, stream_options: { include_usage: true }, }; + const mappedTools = mapTools(options.tools); + if (mappedTools) { + body.tools = mappedTools; + } + const mappedToolChoice = mapToolChoice(options.toolChoice); + if (mappedToolChoice) { + body.tool_choice = mappedToolChoice; + } const res = await fetch(url, { method: "POST", @@ -240,6 +256,26 @@ interface OpenAIStreamChunk { }; } +interface OpenAIToolDefinition { + type: "function"; + function: { + name: string; + description: string; + parameters: Record; + }; +} + +type OpenAIToolChoice = + | "auto" + | "none" + | "required" + | { + type: "function"; + function: { + name: string; + }; + }; + interface OpenAIToolCallDelta { index?: number; id?: string; @@ -442,3 +478,34 @@ async function consumeSseEvent( return next; } + +function mapTools(tools?: LLMChatOptions["tools"]): OpenAIToolDefinition[] | undefined { + if (!tools || tools.length === 0) { + return undefined; + } + return tools.map((tool) => ({ + type: "function", + function: { + name: tool.name, + description: tool.description, + parameters: tool.inputSchema, + }, + })); +} + +function mapToolChoice( + choice?: LLMChatOptions["toolChoice"], +): OpenAIToolChoice | undefined { + if (!choice) { + return undefined; + } + if (typeof choice === "string") { + return choice; + } + return { + type: "function", + function: { + name: choice.name, + }, + }; +} diff --git a/harness/src/backends/types.ts b/harness/src/backends/types.ts index d9f2ad6..ad9ba23 100644 --- a/harness/src/backends/types.ts +++ b/harness/src/backends/types.ts @@ -24,6 +24,20 @@ export interface ToolCallRequest { arguments: Record; } +export interface LLMToolDefinition { + name: string; + description: string; + inputSchema: Record; +} + +export type LLMToolChoice = + | "auto" + | "none" + | "required" + | { + name: string; + }; + export interface LLMResponseWithTools extends LLMResponse { toolCalls?: ToolCallRequest[]; } @@ -66,6 +80,14 @@ export interface LLMChatOptions { * Stream callback invoked for each content/tool delta. */ onChunk?: (chunk: LLMStreamChunk) => void | Promise; + /** + * Optional tool definitions exposed to the model for native tool-calling. + */ + tools?: LLMToolDefinition[]; + /** + * Optional tool choice policy for providers that support it. + */ + toolChoice?: LLMToolChoice; } export interface LLMBackend { diff --git a/harness/src/core/index.ts b/harness/src/core/index.ts index dc1e208..920e203 100644 --- a/harness/src/core/index.ts +++ b/harness/src/core/index.ts @@ -22,7 +22,7 @@ export { resetStepCounter, } from "./trace-emitter.js"; export { ToolRegistry } from "./tool-registry.js"; -export type { ToolHandler } from "./tool-registry.js"; +export type { ToolExecutionAuthority, ToolHandler } from "./tool-registry.js"; export { checkPolicy, DEFAULT_LOOP_POLICY } from "./loop-policy.js"; export type { LoopPolicy, PolicyViolation } from "./loop-policy.js"; export { diff --git a/harness/src/core/llm-circuit-breaker.ts b/harness/src/core/llm-circuit-breaker.ts index f1b3ce0..5bb5306 100644 --- a/harness/src/core/llm-circuit-breaker.ts +++ b/harness/src/core/llm-circuit-breaker.ts @@ -3,6 +3,8 @@ import type { LLMMessage, LLMResponseWithTools, LLMStreamChunk, + LLMToolChoice, + LLMToolDefinition, } from "../backends/types.js"; import type { BudgetTracker } from "./budget-tracker.js"; import type { AgentState } from "./state.js"; @@ -38,6 +40,8 @@ export interface CircuitBreakerOptions { llmReason?: string; stream?: boolean; safety?: Partial; + tools?: LLMToolDefinition[]; + toolChoice?: LLMToolChoice; } function isAbortError(err: unknown): boolean { @@ -163,6 +167,8 @@ export async function callLLMWithCircuitBreaker( maxOutputTokens: requestOutputCap, signal: controller.signal, onChunk: options.stream === false ? undefined : onChunk, + tools: options.tools, + toolChoice: options.toolChoice, }); budget.recordTokens(state, response.tokensUsed, llmReason); return response; diff --git a/harness/src/core/tool-registry.ts b/harness/src/core/tool-registry.ts index 3705d07..4fd3cc1 100644 --- a/harness/src/core/tool-registry.ts +++ b/harness/src/core/tool-registry.ts @@ -7,6 +7,7 @@ import type { ToolContract, ToolResult, ErrorCategory } from "../schemas/tool-invocation.js"; import type { SandboxConfig } from "../schemas/sandbox.js"; +import type { RequestedScope } from "../schemas/delegation.js"; export type ToolHandler = ( args: Record, @@ -17,8 +18,28 @@ interface RegisteredTool { handler: ToolHandler; } +interface ValidationErrorItem { + instancePath?: string; + message?: string; +} + +type JsonSchemaValidator = ((data: unknown) => boolean) & { + errors?: ValidationErrorItem[]; +}; + +type ValidatorFactory = (schema: Record) => JsonSchemaValidator; + +export interface ToolExecutionAuthority { + kind: "standing" | "receipt"; + permissionId?: string; + grantedScope?: RequestedScope; + isPermissionValid?: (permissionId: string) => boolean; +} + export class ToolRegistry { private tools = new Map(); + private argValidators = new Map(); + private validatorFactoryPromise: Promise | null = null; register(contract: ToolContract, handler: ToolHandler): void { this.tools.set(contract.name, { contract, handler }); @@ -43,6 +64,7 @@ export class ToolRegistry { name: string, args: Record, sandbox: SandboxConfig, + authority?: ToolExecutionAuthority, ): Promise { if (!this.isAllowed(name, sandbox)) { return { @@ -62,6 +84,24 @@ export class ToolRegistry { } const { contract, handler } = entry; + const argError = await this.validateArguments(contract, args); + if (argError) { + return { + output: null, + error: argError, + errorCategory: "invalid_input", + }; + } + + const authorityError = this.validateAuthority(name, args, authority); + if (authorityError) { + return { + output: null, + error: authorityError, + errorCategory: "permission_denied", + }; + } + const start = Date.now(); try { @@ -95,6 +135,132 @@ export class ToolRegistry { } return true; } + + private async validateArguments( + contract: ToolContract, + args: Record, + ): Promise { + const validator = await this.getArgValidator(contract); + if (validator(args)) { + return null; + } + + const details = (validator.errors ?? []) + .slice(0, 3) + .map((err) => `${err.instancePath || "/"} ${err.message ?? "invalid"}`) + .join("; "); + const suffix = details ? ` (${details})` : ""; + return `Invalid arguments for tool "${contract.name}"${suffix}`; + } + + private validateAuthority( + toolName: string, + args: Record, + authority?: ToolExecutionAuthority, + ): string | null { + if (!authority || authority.kind === "standing") { + return null; + } + + if (!authority.permissionId || !authority.grantedScope) { + return `Missing authority receipt context for tool "${toolName}"`; + } + + if ( + authority.isPermissionValid && + !authority.isPermissionValid(authority.permissionId) + ) { + return `Permission "${authority.permissionId}" is not active`; + } + + if (!scopeResourceMatches(toolName, authority.grantedScope.resource)) { + return `Granted scope "${authority.grantedScope.resource}" does not permit tool "${toolName}"`; + } + + if (authority.grantedScope.action !== "execute") { + return `Granted scope action "${authority.grantedScope.action}" cannot execute tool "${toolName}"`; + } + + return this.validateScopeConstraints(toolName, args, authority.grantedScope); + } + + private validateScopeConstraints( + toolName: string, + args: Record, + scope: RequestedScope, + ): string | null { + const constraints = scope.constraints; + if (!constraints) { + return null; + } + + const argKeys = Object.keys(args); + const allowedFields = readStringArray(constraints["allowed_fields"]); + if (allowedFields) { + const disallowed = argKeys.filter((key) => !allowedFields.includes(key)); + if (disallowed.length > 0) { + return `Tool "${toolName}" arguments violate allowed_fields: ${disallowed.join(", ")}`; + } + } + + const excludedFields = readStringArray(constraints["excluded_fields"]); + if (excludedFields) { + const forbidden = argKeys.filter((key) => excludedFields.includes(key)); + if (forbidden.length > 0) { + return `Tool "${toolName}" arguments include excluded_fields: ${forbidden.join(", ")}`; + } + } + + const maxResults = readFiniteNumber(constraints["max_results"]); + if (maxResults !== undefined) { + const rawMaxResults = args["max_results"]; + if (typeof rawMaxResults === "number" && rawMaxResults > maxResults) { + return `Tool "${toolName}" requested max_results=${rawMaxResults}, exceeds ${maxResults}`; + } + const rawLimit = args["limit"]; + if (typeof rawLimit === "number" && rawLimit > maxResults) { + return `Tool "${toolName}" requested limit=${rawLimit}, exceeds ${maxResults}`; + } + } + + return null; + } + + private async getArgValidator( + contract: ToolContract, + ): Promise { + const cached = this.argValidators.get(contract.name); + if (cached) { + return cached; + } + + const factory = await this.getValidatorFactory(); + const validate = factory(contract.inputSchema); + this.argValidators.set(contract.name, validate); + return validate; + } + + private async getValidatorFactory(): Promise { + if (!this.validatorFactoryPromise) { + this.validatorFactoryPromise = (async () => { + const AjvMod = await import("ajv"); + const formatsMod = await import("ajv-formats"); + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const Ajv = (AjvMod as any).default ?? AjvMod; + // eslint-disable-next-line @typescript-eslint/no-explicit-any + const addFormats = (formatsMod as any).default ?? formatsMod; + const ajv = new (Ajv as new (opts: Record) => { + compile: (schema: Record) => JsonSchemaValidator; + })({ + allErrors: true, + strict: false, + }); + addFormats(ajv); + return (schema: Record) => ajv.compile(schema); + })(); + } + return this.validatorFactoryPromise; + } } function timeout(ms: number, toolName: string): Promise { @@ -105,3 +271,33 @@ function timeout(ms: number, toolName: string): Promise { ), ); } + +function scopeResourceMatches(toolName: string, resource: string): boolean { + const expected = `tool:${toolName}`; + if (resource === expected || resource === "tool:*") { + return true; + } + if (resource.endsWith("*")) { + const prefix = resource.slice(0, -1); + return expected.startsWith(prefix); + } + return false; +} + +function readStringArray(value: unknown): string[] | null { + if (!Array.isArray(value)) { + return null; + } + const parsed = value.filter((item): item is string => typeof item === "string"); + if (parsed.length !== value.length) { + return null; + } + return parsed; +} + +function readFiniteNumber(value: unknown): number | undefined { + if (typeof value === "number" && Number.isFinite(value)) { + return value; + } + return undefined; +} diff --git a/harness/src/governance/audit-engine.ts b/harness/src/governance/audit-engine.ts index f11d643..79306fa 100644 --- a/harness/src/governance/audit-engine.ts +++ b/harness/src/governance/audit-engine.ts @@ -50,9 +50,12 @@ function toEnvelopeCompletion(state: AgentState): CompletionStatus { function delegationSummaryFromState(state: AgentState): DelegationSummary { const decisions = state.delegationDecisions; + const grantedCount = decisions.filter( + (d) => d.status === "approved" || d.status === "narrowed", + ).length; return { total_requested: state.delegationRequests.length, - total_granted: decisions.filter((d) => d.status === "approved").length, + total_granted: grantedCount, total_denied: decisions.filter((d) => d.status === "denied").length, total_narrowed: decisions.filter((d) => d.status === "narrowed").length, total_escalated: decisions.filter((d) => d.status === "escalated").length, diff --git a/harness/src/governance/index.ts b/harness/src/governance/index.ts index ea1379e..7ca36cb 100644 --- a/harness/src/governance/index.ts +++ b/harness/src/governance/index.ts @@ -1,7 +1,24 @@ export { PermissionManager } from "./permission-manager.js"; export { PolicyEvaluator } from "./policy-evaluator.js"; export type { PolicyRule, PolicySet } from "./policy-evaluator.js"; +export { + InProcessPolicyEngine, + createDelegationDecision, +} from "./policy-engine.js"; +export type { + DelegationDecisionDraft, + DelegationPolicyEngine, + ManifestToolAccessLevel, + PolicyPhaseConsultationDecision, + PolicyPhaseConsultationInput, + ToolAccessPreview, + ToolAccessPreviewInput, +} from "./policy-engine.js"; +export { OpaPolicyEngine } from "./opa-policy-engine.js"; +export type { OpaPolicyEngineConfig } from "./opa-policy-engine.js"; +export { buildSandboxPolicySets } from "./sandbox-policies.js"; export { AuthBroker } from "./auth-broker.js"; export { AuditEngine } from "./audit-engine.js"; export type { AuditEvent } from "./audit-engine.js"; export { buildManifest, manifestToCompactText } from "./manifest-builder.js"; +export type { ManifestToolOverride } from "./manifest-builder.js"; diff --git a/harness/src/governance/manifest-builder.ts b/harness/src/governance/manifest-builder.ts index 33dff7b..1cd8741 100644 --- a/harness/src/governance/manifest-builder.ts +++ b/harness/src/governance/manifest-builder.ts @@ -20,6 +20,12 @@ import type { Phase } from "../schemas/agent-loop.js"; export type WireFormat = "json" | "compact-text" | "toon"; +export interface ManifestToolOverride { + accessLevel: ToolAccessLevel | "blocked"; + constraints?: Record; + reason?: string; +} + export interface ManifestInput { runId: string; agentId: string; @@ -29,6 +35,7 @@ export interface ManifestInput { policies: PolicySet[]; budget: BudgetSnapshot; trustLevel?: "untrusted" | "low" | "medium" | "high"; + toolOverrides?: Record; } function isBlocked(toolName: string, sandbox: SandboxConfig): boolean { @@ -129,21 +136,31 @@ export function buildManifest(input: ManifestInput): CapabilityManifest { const activeConstraints: string[] = []; for (const contract of input.toolContracts) { - const level = determineAccessLevel( - contract.name, - input.sandbox, - input.policies, - ); + const override = input.toolOverrides?.[contract.name]; + const level = + override?.accessLevel ?? + determineAccessLevel(contract.name, input.sandbox, input.policies); if (level === "blocked") { blocked.push(contract.name); + if (override?.reason) { + activeConstraints.push(`${contract.name}: ${override.reason}`); + } continue; } - const { constraints, descriptions } = collectConstraints( - contract.name, - input.policies, - ); + const { + constraints, + descriptions, + } = override?.constraints + ? { + constraints: override.constraints, + descriptions: describeConstraints(contract.name, override.constraints), + } + : collectConstraints(contract.name, input.policies); + if (override?.reason) { + descriptions.push(`${contract.name}: ${override.reason}`); + } activeConstraints.push(...descriptions); available.push({ @@ -173,6 +190,31 @@ export function buildManifest(input: ManifestInput): CapabilityManifest { }; } +function describeConstraints( + toolName: string, + constraints: Record, +): string[] { + const descriptions: string[] = []; + const allowedFields = constraints["allowed_fields"]; + if (Array.isArray(allowedFields) && allowedFields.every((item) => typeof item === "string")) { + descriptions.push(`${toolName}: fields limited to ${allowedFields.join(", ")}`); + } + const excludedFields = constraints["excluded_fields"]; + if ( + Array.isArray(excludedFields) && + excludedFields.every((item) => typeof item === "string") + ) { + descriptions.push(`${toolName}: ${excludedFields.join(", ")} excluded`); + } + if (typeof constraints["max_results"] === "number") { + descriptions.push(`${toolName}: max ${constraints["max_results"]} results`); + } + if (descriptions.length === 0) { + descriptions.push(`${toolName}: constrained by policy`); + } + return descriptions; +} + /** * Serialize a manifest to compact text for model-context injection. * Designed to be readable by any model without JSON parsing. diff --git a/harness/src/governance/opa-policy-engine.ts b/harness/src/governance/opa-policy-engine.ts new file mode 100644 index 0000000..f75db5b --- /dev/null +++ b/harness/src/governance/opa-policy-engine.ts @@ -0,0 +1,300 @@ +import type { + DecidedBy, + DelegationDecision, + DelegationRequest, + DelegationStatus, + RequestedScope, +} from "../schemas/delegation.js"; +import type { SandboxConfig } from "../schemas/sandbox.js"; +import type { + DelegationPolicyEngine, + PolicyPhaseConsultationDecision, + PolicyPhaseConsultationInput, + ToolAccessPreview, + ToolAccessPreviewInput, +} from "./policy-engine.js"; +import { createDelegationDecision } from "./policy-engine.js"; + +type OpaResult = { + status?: DelegationStatus; + policy_refs?: string[]; + narrowed_scope?: RequestedScope; + denial_reason?: string; + escalation_target?: string; + decided_by?: Partial; +}; + +interface OpaResponseEnvelope { + result?: unknown; +} + +export interface OpaPolicyEngineConfig { + baseUrl: string; + policyPath: string; + bearerToken?: string; + timeoutMs?: number; + inputContext?: Record; + fallbackEngine?: DelegationPolicyEngine; +} + +const DEFAULT_TIMEOUT_MS = 2_000; + +export class OpaPolicyEngine implements DelegationPolicyEngine { + readonly name = "opa"; + private config: OpaPolicyEngineConfig; + + constructor(config: OpaPolicyEngineConfig) { + this.config = config; + } + + async evaluate( + request: DelegationRequest, + agentId: string, + ): Promise { + try { + const result = await this.queryOpa(request, agentId); + return this.toDecision(request, agentId, result); + } catch (err) { + if (this.config.fallbackEngine) { + return this.config.fallbackEngine.evaluate(request, agentId); + } + const message = err instanceof Error ? err.message : String(err); + return createDelegationDecision(request, agentId, { + status: "denied", + decidedBy: { kind: "harness" }, + policyRefs: [], + denialReason: `OPA policy evaluation failed: ${message}`, + outcomeKind: "opa_error", + }); + } + } + + async consultPhase( + input: PolicyPhaseConsultationInput, + ): Promise { + const decision = await this.evaluate( + createSyntheticRequest({ + runId: input.runId, + requester: input.agentId, + scope: { + resource: `phase:${input.phase}`, + action: "read", + constraints: input.context, + }, + intent: `Consult policy hook for phase ${input.phase}`, + justification: `Runtime policy consultation at ${input.phase}`, + }), + input.agentId, + ); + if (decision.status === "denied" || decision.status === "escalated") { + return { + status: "denied", + reason: + decision.denial_reason ?? + decision.escalation_target ?? + "Denied by OPA policy", + policyRefs: decision.policy_refs, + }; + } + return { + status: "allowed", + policyRefs: decision.policy_refs, + }; + } + + async previewToolAccess( + input: ToolAccessPreviewInput, + ): Promise> { + const entries = await Promise.all( + input.tools.map(async (tool) => { + if (!isToolAllowedBySandbox(tool.name, input.sandbox)) { + return [ + tool.name, + { + accessLevel: "blocked", + reason: "Blocked by sandbox policy", + policyRefs: [], + } satisfies ToolAccessPreview, + ] as const; + } + const decision = await this.evaluate( + createSyntheticRequest({ + runId: input.runId, + requester: input.agentId, + scope: { + resource: `tool:${tool.name}`, + action: "execute", + constraints: input.context, + }, + intent: `Preview tool access for ${tool.name}`, + justification: `Manifest compilation for phase ${input.phase}`, + }), + input.agentId, + ); + return [tool.name, toToolAccessPreview(decision)] as const; + }), + ); + return Object.fromEntries(entries); + } + + private async queryOpa( + request: DelegationRequest, + agentId: string, + ): Promise { + const controller = new AbortController(); + const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS; + const timeoutId = setTimeout(() => controller.abort(), timeoutMs); + try { + const path = this.config.policyPath.replace(/^\/+/, ""); + const url = `${this.config.baseUrl.replace(/\/+$/, "")}/v1/data/${path}`; + const response = await fetch(url, { + method: "POST", + headers: { + "Content-Type": "application/json", + ...(this.config.bearerToken + ? { Authorization: `Bearer ${this.config.bearerToken}` } + : {}), + }, + body: JSON.stringify({ + input: { + request, + agent_id: agentId, + context: this.config.inputContext ?? {}, + }, + }), + signal: controller.signal, + }); + if (!response.ok) { + const text = await response.text(); + throw new Error( + `OPA HTTP ${response.status}: ${text.slice(0, 500)}`, + ); + } + const data = (await response.json()) as OpaResponseEnvelope; + if (!data.result || typeof data.result !== "object") { + throw new Error("OPA response missing decision object in result"); + } + return data.result as OpaResult; + } finally { + clearTimeout(timeoutId); + } + } + + private toDecision( + request: DelegationRequest, + agentId: string, + result: OpaResult, + ): DelegationDecision { + if (!result.status) { + throw new Error("OPA response missing status"); + } + + const policyRefs = normalizePolicyRefs(result.policy_refs); + const decidedBy = normalizeDecidedBy(result.decided_by, policyRefs); + const denialReason = + result.status === "denied" + ? result.denial_reason ?? "Denied by OPA policy" + : result.denial_reason; + + return createDelegationDecision(request, agentId, { + status: result.status, + decidedBy, + policyRefs, + narrowedScope: result.narrowed_scope, + denialReason, + escalationTarget: result.escalation_target, + outcomeKind: `opa_${result.status}`, + }); + } +} + +function normalizePolicyRefs(value: unknown): string[] { + if (!Array.isArray(value)) { + return []; + } + return value.filter((item): item is string => typeof item === "string"); +} + +function normalizeDecidedBy( + value: unknown, + policyRefs: string[], +): DecidedBy { + if (value && typeof value === "object") { + const maybe = value as Partial; + if ( + maybe.kind === "policy" || + maybe.kind === "human" || + maybe.kind === "harness" + ) { + return { + kind: maybe.kind, + policy_id: maybe.policy_id, + human_approver: maybe.human_approver, + }; + } + } + return { + kind: "policy", + policy_id: policyRefs[0], + }; +} + +function createSyntheticRequest(args: { + runId: string; + requester: string; + scope: RequestedScope; + intent: string; + justification: string; +}): DelegationRequest { + return { + schema_version: "0.2", + request_id: `req-synth-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`, + requester: args.requester, + run_id: args.runId, + intent: args.intent, + justification: args.justification, + requested_scope: args.scope, + observed_at: new Date().toISOString(), + }; +} + +function toToolAccessPreview(decision: DelegationDecision): ToolAccessPreview { + if (decision.status === "approved") { + return { + accessLevel: "pre_authorized", + policyRefs: decision.policy_refs, + }; + } + if (decision.status === "narrowed") { + return { + accessLevel: "requires_delegation", + constraints: decision.narrowed_scope?.constraints, + reason: "Requires delegated narrowed authority", + policyRefs: decision.policy_refs, + }; + } + if (decision.status === "escalated") { + return { + accessLevel: "requires_delegation", + reason: decision.escalation_target + ? `Escalation required: ${decision.escalation_target}` + : "Escalation required", + policyRefs: decision.policy_refs, + }; + } + return { + accessLevel: "blocked", + reason: decision.denial_reason ?? "Denied by OPA policy", + policyRefs: decision.policy_refs, + }; +} + +function isToolAllowedBySandbox(toolName: string, sandbox: SandboxConfig): boolean { + if (sandbox.blockedTools.includes(toolName)) { + return false; + } + if (sandbox.allowedTools.includes("*")) { + return true; + } + return sandbox.allowedTools.includes(toolName); +} diff --git a/harness/src/governance/policy-engine.ts b/harness/src/governance/policy-engine.ts new file mode 100644 index 0000000..9b1606c --- /dev/null +++ b/harness/src/governance/policy-engine.ts @@ -0,0 +1,296 @@ +import { createHash } from "node:crypto"; +import type { + DecidedBy, + DelegationDecision, + DelegationRequest, + DelegationStatus, + RequestedScope, +} from "../schemas/delegation.js"; +import type { Phase } from "../schemas/agent-loop.js"; +import type { SandboxConfig } from "../schemas/sandbox.js"; +import type { ToolContract } from "../schemas/tool-invocation.js"; +import { PolicyEvaluator } from "./policy-evaluator.js"; +import type { PolicySet } from "./policy-evaluator.js"; + +export type ManifestToolAccessLevel = + | "pre_authorized" + | "requires_delegation" + | "blocked"; + +export interface ToolAccessPreview { + accessLevel: ManifestToolAccessLevel; + constraints?: Record; + reason?: string; + policyRefs?: string[]; +} + +export interface ToolAccessPreviewInput { + runId: string; + agentId: string; + objective: string; + phase: Phase; + tools: ToolContract[]; + sandbox: SandboxConfig; + context?: Record; +} + +export interface PolicyPhaseConsultationInput { + runId: string; + agentId: string; + objective: string; + phase: Phase; + context?: Record; +} + +export interface PolicyPhaseConsultationDecision { + status: "allowed" | "denied"; + reason?: string; + policyRefs?: string[]; +} + +export interface DelegationPolicyEngine { + readonly name: string; + evaluate( + request: DelegationRequest, + agentId: string, + ): Promise; + consultPhase?( + input: PolicyPhaseConsultationInput, + ): Promise; + previewToolAccess?( + input: ToolAccessPreviewInput, + ): Promise>; +} + +export interface DelegationDecisionDraft { + status: DelegationStatus; + decidedBy: DecidedBy; + policyRefs?: string[]; + narrowedScope?: RequestedScope; + denialReason?: string; + escalationTarget?: string; + decidedAt?: string; + outcomeKind?: string; +} + +function sha256Hex(input: string): string { + return createHash("sha256").update(input, "utf8").digest("hex"); +} + +function stableStringify(value: unknown): string { + if (value === null || typeof value !== "object") { + return JSON.stringify(value); + } + if (Array.isArray(value)) { + return `[${value.map((item) => stableStringify(item)).join(",")}]`; + } + const obj = value as Record; + const keys = Object.keys(obj).sort(); + return `{${keys.map((key) => `${JSON.stringify(key)}:${stableStringify(obj[key])}`).join(",")}}`; +} + +export function createDelegationDecision( + request: DelegationRequest, + agentId: string, + draft: DelegationDecisionDraft, +): DelegationDecision { + const decidedAt = draft.decidedAt ?? new Date().toISOString(); + const policyRefs = draft.policyRefs ?? []; + const basis = stableStringify({ + request_id: request.request_id, + agent_id: agentId, + scope: request.requested_scope, + status: draft.status, + decided_by: draft.decidedBy, + policy_refs: policyRefs, + narrowed_scope: draft.narrowedScope, + denial_reason: draft.denialReason, + escalation_target: draft.escalationTarget, + outcomeKind: draft.outcomeKind ?? draft.status, + decided_at: decidedAt, + }); + const decision_id = sha256Hex(basis); + + return { + schema_version: "0.2", + decision_id, + request_id: request.request_id, + status: draft.status, + decided_by: draft.decidedBy, + policy_refs: policyRefs, + narrowed_scope: draft.narrowedScope, + denial_reason: draft.denialReason, + escalation_target: draft.escalationTarget, + decided_at: decidedAt, + }; +} + +export class InProcessPolicyEngine implements DelegationPolicyEngine { + readonly name = "in-process"; + private evaluator = new PolicyEvaluator(); + private policies: PolicySet[] = []; + + constructor(policies: PolicySet[] = []) { + for (const policy of policies) { + this.policies.push(policy); + this.evaluator.addPolicy(policy); + } + } + + addPolicy(policy: PolicySet): void { + this.policies.push(policy); + this.evaluator.addPolicy(policy); + } + + removePolicy(policyId: string): void { + this.policies = this.policies.filter((policy) => policy.policy_id !== policyId); + this.evaluator.removePolicy(policyId); + } + + async evaluate( + request: DelegationRequest, + agentId: string, + ): Promise { + return this.evaluator.evaluate(request, agentId); + } + + async consultPhase( + input: PolicyPhaseConsultationInput, + ): Promise { + if (!this.hasApplicablePhasePolicy(input.phase)) { + return { status: "allowed", policyRefs: [] }; + } + const decision = this.evaluator.evaluate( + createSyntheticRequest({ + runId: input.runId, + requester: input.agentId, + intent: `Consult phase ${input.phase}`, + justification: `Policy consultation at phase ${input.phase}`, + scope: { + resource: `phase:${input.phase}`, + action: "read", + constraints: input.context, + }, + }), + input.agentId, + ); + return toPhaseConsultationDecision(decision); + } + + async previewToolAccess( + input: ToolAccessPreviewInput, + ): Promise> { + const preview: Record = {}; + for (const tool of input.tools) { + if (!isToolAllowedBySandbox(tool.name, input.sandbox)) { + preview[tool.name] = { + accessLevel: "blocked", + reason: "Blocked by sandbox policy", + policyRefs: [], + }; + continue; + } + const decision = this.evaluator.evaluate( + createSyntheticRequest({ + runId: input.runId, + requester: input.agentId, + intent: `Preview tool access for ${tool.name}`, + justification: `Manifest compilation for phase ${input.phase}`, + scope: { + resource: `tool:${tool.name}`, + action: "execute", + constraints: input.context, + }, + }), + input.agentId, + ); + preview[tool.name] = toToolAccessPreview(decision); + } + return preview; + } + + private hasApplicablePhasePolicy(phase: Phase): boolean { + return this.policies.some((policy) => + policy.rules.some( + (rule) => + rule.resource === "phase:*" || rule.resource === `phase:${phase}`, + ), + ); + } +} + +function createSyntheticRequest(args: { + runId: string; + requester: string; + intent: string; + justification: string; + scope: RequestedScope; +}): DelegationRequest { + return { + schema_version: "0.2", + request_id: `req-synth-${Date.now().toString(36)}-${Math.random().toString(36).slice(2, 8)}`, + requester: args.requester, + run_id: args.runId, + intent: args.intent, + justification: args.justification, + requested_scope: args.scope, + observed_at: new Date().toISOString(), + }; +} + +function toPhaseConsultationDecision( + decision: DelegationDecision, +): PolicyPhaseConsultationDecision { + if (decision.status === "denied" || decision.status === "escalated") { + return { + status: "denied", + reason: decision.denial_reason ?? decision.escalation_target ?? "Denied by policy", + policyRefs: decision.policy_refs, + }; + } + return { + status: "allowed", + policyRefs: decision.policy_refs, + }; +} + +function toToolAccessPreview(decision: DelegationDecision): ToolAccessPreview { + if (decision.status === "approved") { + return { + accessLevel: "pre_authorized", + policyRefs: decision.policy_refs, + }; + } + if (decision.status === "narrowed") { + return { + accessLevel: "requires_delegation", + constraints: decision.narrowed_scope?.constraints, + reason: "Requires delegated narrowed authority", + policyRefs: decision.policy_refs, + }; + } + if (decision.status === "escalated") { + return { + accessLevel: "requires_delegation", + reason: decision.escalation_target + ? `Escalation required: ${decision.escalation_target}` + : "Escalation required", + policyRefs: decision.policy_refs, + }; + } + return { + accessLevel: "blocked", + reason: decision.denial_reason ?? "Denied by policy", + policyRefs: decision.policy_refs, + }; +} + +function isToolAllowedBySandbox(toolName: string, sandbox: SandboxConfig): boolean { + if (sandbox.blockedTools.includes(toolName)) { + return false; + } + if (sandbox.allowedTools.includes("*")) { + return true; + } + return sandbox.allowedTools.includes(toolName); +} diff --git a/harness/src/governance/sandbox-policies.ts b/harness/src/governance/sandbox-policies.ts new file mode 100644 index 0000000..a70cba5 --- /dev/null +++ b/harness/src/governance/sandbox-policies.ts @@ -0,0 +1,50 @@ +import type { SandboxConfig } from "../schemas/sandbox.js"; +import type { PolicySet, PolicyRule } from "./policy-evaluator.js"; + +export function buildSandboxPolicySets(sandbox: SandboxConfig): PolicySet[] { + const policies: PolicySet[] = []; + + const denyRules: PolicyRule[] = sandbox.blockedTools.map((toolName, idx) => ({ + rule_id: `sandbox-deny-${idx + 1}`, + action: "deny", + resource: `tool:${toolName}`, + reason: `Tool "${toolName}" is blocked by sandbox`, + })); + if (denyRules.length > 0) { + policies.push({ + policy_id: "sandbox-deny", + policy_type: "safety", + priority: 1, + rules: denyRules, + }); + } + + const allowRules = buildAllowRules(sandbox.allowedTools); + if (allowRules.length > 0) { + policies.push({ + policy_id: "sandbox-allow", + policy_type: "operational", + priority: 100, + rules: allowRules, + }); + } + + return policies; +} + +function buildAllowRules(allowedTools: string[]): PolicyRule[] { + if (allowedTools.includes("*")) { + return [ + { + rule_id: "sandbox-allow-all", + action: "allow", + resource: "tool:*", + }, + ]; + } + return allowedTools.map((toolName, idx) => ({ + rule_id: `sandbox-allow-${idx + 1}`, + action: "allow", + resource: `tool:${toolName}`, + })); +} diff --git a/harness/src/index.ts b/harness/src/index.ts index e947042..3fd10a3 100644 --- a/harness/src/index.ts +++ b/harness/src/index.ts @@ -17,6 +17,9 @@ export * from "./backends/index.js"; // Tools export * from "./tools/index.js"; +// Governance +export * from "./governance/index.js"; + // Adapters export * from "./adapters/index.js"; diff --git a/harness/src/tools/index.ts b/harness/src/tools/index.ts index 6a4ed1d..1d73aea 100644 --- a/harness/src/tools/index.ts +++ b/harness/src/tools/index.ts @@ -1,3 +1,4 @@ export { createMockToolRegistry, resetMockFileSystem, getMockFileSystem } from "./mock-tools.js"; +export { toLLMToolDefinitions } from "./llm-tools.js"; export { defineToolContract } from "./tool-types.js"; export type { ToolContract, ToolResult, ErrorCategory, ToolCallRecord } from "./tool-types.js"; diff --git a/harness/src/tools/llm-tools.ts b/harness/src/tools/llm-tools.ts new file mode 100644 index 0000000..2edbfbb --- /dev/null +++ b/harness/src/tools/llm-tools.ts @@ -0,0 +1,15 @@ +import type { LLMToolDefinition } from "../backends/types.js"; +import type { ToolContract } from "../schemas/tool-invocation.js"; + +export function toLLMToolDefinitions( + contracts: ToolContract[], + allowlist?: ReadonlySet, +): LLMToolDefinition[] { + return contracts + .filter((contract) => (allowlist ? allowlist.has(contract.name) : true)) + .map((contract) => ({ + name: contract.name, + description: contract.description, + inputSchema: contract.inputSchema, + })); +} diff --git a/harness/tests/chat-agent.test.ts b/harness/tests/chat-agent.test.ts index d7cc190..a7a2730 100644 --- a/harness/tests/chat-agent.test.ts +++ b/harness/tests/chat-agent.test.ts @@ -7,6 +7,7 @@ import { validateActionObservationPairing, validateTermination, } from "../src/core/validator.js"; +import type { PolicySet } from "../src/governance/policy-evaluator.js"; describe("ChatAgent (mock backend)", () => { beforeEach(() => { @@ -86,4 +87,32 @@ describe("ChatAgent (mock backend)", () => { const ids = trace.steps.map((s) => s.id); expect(new Set(ids).size).toBe(ids.length); }); + + it("routes tool calls through policy engine decisions", async () => { + const denySearch: PolicySet = { + policy_id: "deny-search", + policy_type: "safety", + priority: 1, + rules: [ + { + rule_id: "deny-search-rule", + action: "deny", + resource: "tool:search", + reason: "Search denied for test", + }, + ], + }; + const trace = await runChatAgent( + new MockLLMBackend(), + "Search for weather alerts.", + createMockToolRegistry(), + undefined, + undefined, + undefined, + { policies: [denySearch] }, + ); + + expect(trace.termination).toBe("denied"); + expect(trace.final_answer).toContain("Search denied for test"); + }); }); diff --git a/harness/tests/coder-agent.test.ts b/harness/tests/coder-agent.test.ts index 797f54c..3795883 100644 --- a/harness/tests/coder-agent.test.ts +++ b/harness/tests/coder-agent.test.ts @@ -10,6 +10,7 @@ import { validateActionObservationPairing, validateTermination, } from "../src/core/validator.js"; +import type { PolicySet } from "../src/governance/policy-evaluator.js"; describe("CoderAgent (mock backend)", () => { beforeEach(() => { @@ -116,4 +117,32 @@ describe("CoderAgent (mock backend)", () => { trace.final_answer.includes("not allowlisted") || trace.final_answer.includes("blocked"); expect(blocked || stoppedEarly || trace.termination === "succeeded").toBe(true); }); + + it("routes coder tool calls through policy engine decisions", async () => { + const denyWrite: PolicySet = { + policy_id: "deny-write", + policy_type: "safety", + priority: 1, + rules: [ + { + rule_id: "deny-write-rule", + action: "deny", + resource: "tool:writeFile", + reason: "Write access disabled in test", + }, + ], + }; + const trace = await runCoderAgent( + new MockLLMBackend(), + "Modify src/main.ts to add a greeting.", + createMockToolRegistry(), + undefined, + undefined, + undefined, + { policies: [denyWrite] }, + ); + + expect(trace.termination).toBe("denied"); + expect(trace.final_answer).toContain("Write access disabled in test"); + }); }); diff --git a/harness/tests/fixtures/opa-decision-conformance.json b/harness/tests/fixtures/opa-decision-conformance.json new file mode 100644 index 0000000..c469373 --- /dev/null +++ b/harness/tests/fixtures/opa-decision-conformance.json @@ -0,0 +1,115 @@ +{ + "cases": [ + { + "name": "approved decision maps policy refs and decided_by", + "request": { + "requested_scope": { + "resource": "tool:search", + "action": "execute" + } + }, + "opa_result": { + "status": "approved", + "policy_refs": ["opa.allow.search"], + "decided_by": { + "kind": "policy", + "policy_id": "opa.allow.search" + } + }, + "expected": { + "status": "approved", + "policy_refs": ["opa.allow.search"], + "decided_by": { + "kind": "policy", + "policy_id": "opa.allow.search" + } + } + }, + { + "name": "narrowed decision preserves narrowed scope constraints", + "request": { + "requested_scope": { + "resource": "tool:search", + "action": "execute" + } + }, + "opa_result": { + "status": "narrowed", + "policy_refs": ["opa.narrow.search"], + "narrowed_scope": { + "resource": "tool:search", + "action": "execute", + "constraints": { + "max_results": 3, + "excluded_fields": ["raw_html"] + } + } + }, + "expected": { + "status": "narrowed", + "policy_refs": ["opa.narrow.search"], + "narrowed_scope": { + "resource": "tool:search", + "action": "execute", + "constraints": { + "max_results": 3, + "excluded_fields": ["raw_html"] + } + } + } + }, + { + "name": "denied decision defaults denial reason", + "request": { + "requested_scope": { + "resource": "tool:search", + "action": "execute" + } + }, + "opa_result": { + "status": "denied", + "policy_refs": ["opa.deny.search"] + }, + "expected": { + "status": "denied", + "policy_refs": ["opa.deny.search"], + "denial_reason": "Denied by OPA policy" + } + }, + { + "name": "missing status fails closed", + "request": { + "requested_scope": { + "resource": "tool:search", + "action": "execute" + } + }, + "opa_result": { + "policy_refs": ["opa.invalid"] + }, + "expected": { + "status": "denied", + "denial_reason_contains": "OPA policy evaluation failed" + } + }, + { + "name": "malformed policy_refs are normalized to empty list", + "request": { + "requested_scope": { + "resource": "tool:search", + "action": "execute" + } + }, + "opa_result": { + "status": "approved", + "policy_refs": { + "id": "opa.bad" + } + }, + "expected": { + "status": "approved", + "policy_refs": [] + } + } + ] +} diff --git a/harness/tests/governed-agent.test.ts b/harness/tests/governed-agent.test.ts index 47ab278..8985d48 100644 --- a/harness/tests/governed-agent.test.ts +++ b/harness/tests/governed-agent.test.ts @@ -143,4 +143,81 @@ describe("runGovernedAgent (mock backend)", () => { result.state.delegationRequests.length, ); }); + + it("enforces narrowed constraints at dispatch time", async () => { + const backend = new MockLLMBackend([ + { + pattern: /./, + response: "I need to call search.", + toolCalls: [ + { + toolName: "search", + arguments: { query: "tokyo", user_email: "user@example.com" }, + }, + ], + }, + ]); + const narrowSearch: PolicySet = { + policy_id: "narrow-search", + policy_type: "compliance", + priority: 1, + rules: [ + { + rule_id: "exclude-email", + action: "narrow", + resource: "tool:search", + narrowing: { excluded_fields: ["user_email"] }, + }, + ], + }; + + const result = await runGovernedAgent( + config({ + objective: "Search records.", + backend, + policies: [narrowSearch], + }), + ); + + expect(result.state.toolExecutionReceipts.length).toBe(1); + expect(result.state.toolExecutionReceipts[0]?.status).toBe("error"); + expect(result.state.toolExecutionReceipts[0]?.error_category).toBe( + "permission_denied", + ); + const deniedObservation = result.trace.steps.find( + (step) => + step.type === "observation" && step.content.includes("excluded_fields"), + ); + expect(deniedObservation).toBeDefined(); + }); + + it("enforces policy consultation hooks at finalize", async () => { + const denyFinalize: PolicySet = { + policy_id: "deny-finalize", + policy_type: "safety", + priority: 1, + rules: [ + { + rule_id: "deny-finalize-phase", + action: "deny", + resource: "phase:finalize", + reason: "Final responses require explicit approval", + }, + ], + }; + + const result = await runGovernedAgent( + config({ + objective: "Explain this architecture in one paragraph.", + policies: [denyFinalize, allowAllTools], + }), + ); + + expect(result.state.completionStatus).toBe("denied"); + expect(result.trace.final_answer).toContain("Request denied by policy"); + const sawFinalizeDenial = result.trace.steps.some((step) => + step.content.includes("Policy denied at phase finalize"), + ); + expect(sawFinalizeDenial).toBe(true); + }); }); diff --git a/harness/tests/manifest.test.ts b/harness/tests/manifest.test.ts index aac1f98..5ea814c 100644 --- a/harness/tests/manifest.test.ts +++ b/harness/tests/manifest.test.ts @@ -195,6 +195,36 @@ describe("buildManifest", () => { expect(manifest.manifest_id).toBeTruthy(); expect(manifest.timestamp).toBeTruthy(); }); + + it("applies tool overrides from policy engine previews", () => { + const manifest = buildManifest({ + runId: "run-override", + agentId: "agent-preview", + phase: "plan", + toolContracts: [searchContract, calcContract], + sandbox: DEFAULT_SANDBOX_CONFIG, + policies: [], + budget, + toolOverrides: { + search: { + accessLevel: "blocked", + reason: "Denied by external policy engine", + }, + calculator: { + accessLevel: "requires_delegation", + constraints: { max_results: 1 }, + reason: "Requires narrowed delegation", + }, + }, + }); + + expect(manifest.tools.blocked).toContain("search"); + const calc = manifest.tools.available.find((tool) => tool.name === "calculator"); + expect(calc).toBeDefined(); + expect(calc?.access_level).toBe("requires_delegation"); + expect(calc?.constraints).toEqual({ max_results: 1 }); + expect(manifest.active_constraints.some((line) => line.includes("Denied by external policy engine"))).toBe(true); + }); }); describe("manifestToCompactText", () => { diff --git a/harness/tests/openai-compat-streaming.test.ts b/harness/tests/openai-compat-streaming.test.ts index 41d8199..22670a4 100644 --- a/harness/tests/openai-compat-streaming.test.ts +++ b/harness/tests/openai-compat-streaming.test.ts @@ -161,4 +161,75 @@ describe("OpenAICompatBackend streaming", () => { const body = JSON.parse(String(init.body)); expect(body.max_tokens).toBe(32); }); + + it("serializes tool schemas and tool choice in requests", async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response( + JSON.stringify({ + model: "buffered-model", + choices: [ + { + message: { content: "ok" }, + finish_reason: "stop", + }, + ], + usage: { prompt_tokens: 2, completion_tokens: 1 }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock); + + const backend = new OpenAICompatBackend({ + baseUrl: "https://example.test/v1", + apiKey: "", + model: "buffered-model", + maxTokens: 4096, + }); + + await backend.chat(messages, { + tools: [ + { + name: "search", + description: "Search records", + inputSchema: { + type: "object", + properties: { + query: { type: "string" }, + }, + required: ["query"], + }, + }, + ], + toolChoice: { name: "search" }, + }); + + const [, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + const body = JSON.parse(String(init.body)); + expect(body.tools).toEqual([ + { + type: "function", + function: { + name: "search", + description: "Search records", + parameters: { + type: "object", + properties: { + query: { type: "string" }, + }, + required: ["query"], + }, + }, + }, + ]); + expect(body.tool_choice).toEqual({ + type: "function", + function: { + name: "search", + }, + }); + }); }); diff --git a/harness/tests/policy-engine-conformance.test.ts b/harness/tests/policy-engine-conformance.test.ts new file mode 100644 index 0000000..dae0e1b --- /dev/null +++ b/harness/tests/policy-engine-conformance.test.ts @@ -0,0 +1,115 @@ +import { readFileSync } from "node:fs"; +import { dirname, resolve } from "node:path"; +import { fileURLToPath } from "node:url"; +import { afterEach, describe, expect, it, vi } from "vitest"; +import { OpaPolicyEngine } from "../src/governance/index.js"; +import type { + DecidedBy, + DelegationRequest, + DelegationStatus, + RequestedScope, +} from "../src/schemas/delegation.js"; + +interface FixtureExpected { + status: DelegationStatus; + policy_refs?: string[]; + narrowed_scope?: RequestedScope; + denial_reason?: string; + denial_reason_contains?: string; + decided_by?: Partial; +} + +interface FixtureCase { + name: string; + request?: Partial; + opa_result: Record; + expected: FixtureExpected; +} + +interface FixtureFile { + cases: FixtureCase[]; +} + +function loadFixtureFile(): FixtureFile { + const filePath = resolve( + dirname(fileURLToPath(import.meta.url)), + "fixtures/opa-decision-conformance.json", + ); + return JSON.parse(readFileSync(filePath, "utf8")) as FixtureFile; +} + +function makeRequest(overrides?: Partial): DelegationRequest { + const base: DelegationRequest = { + schema_version: "0.2", + request_id: "req-conformance", + requester: "agent-conformance", + run_id: "run-conformance", + intent: "Conformance policy check", + justification: "Verify OPA decision mapping contract", + requested_scope: { + resource: "tool:search", + action: "execute", + }, + observed_at: "2026-01-01T00:00:00.000Z", + }; + return { + ...base, + ...overrides, + requested_scope: overrides?.requested_scope ?? base.requested_scope, + }; +} + +afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllGlobals(); +}); + +describe("OPA decision conformance fixtures", () => { + const fixtures = loadFixtureFile(); + + for (const fixtureCase of fixtures.cases) { + it(fixtureCase.name, async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response( + JSON.stringify({ + result: fixtureCase.opa_result, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock); + + const engine = new OpaPolicyEngine({ + baseUrl: "https://opa.example", + policyPath: "open_cot/delegation", + }); + const request = makeRequest(fixtureCase.request); + const decision = await engine.evaluate(request, "agent-1"); + + expect(decision.request_id).toBe(request.request_id); + expect(decision.decision_id).toMatch(/^[0-9a-f]{64}$/); + expect(decision.status).toBe(fixtureCase.expected.status); + + if (fixtureCase.expected.policy_refs !== undefined) { + expect(decision.policy_refs).toEqual(fixtureCase.expected.policy_refs); + } + if (fixtureCase.expected.narrowed_scope !== undefined) { + expect(decision.narrowed_scope).toEqual(fixtureCase.expected.narrowed_scope); + } + if (fixtureCase.expected.denial_reason !== undefined) { + expect(decision.denial_reason).toBe(fixtureCase.expected.denial_reason); + } + if (fixtureCase.expected.denial_reason_contains !== undefined) { + expect(decision.denial_reason ?? "").toContain( + fixtureCase.expected.denial_reason_contains, + ); + } + if (fixtureCase.expected.decided_by !== undefined) { + expect(decision.decided_by).toMatchObject(fixtureCase.expected.decided_by); + } + }); + } +}); diff --git a/harness/tests/policy-engine-live.test.ts b/harness/tests/policy-engine-live.test.ts new file mode 100644 index 0000000..c2bbfdb --- /dev/null +++ b/harness/tests/policy-engine-live.test.ts @@ -0,0 +1,68 @@ +import { describe, expect, it } from "vitest"; +import { OpaPolicyEngine } from "../src/governance/index.js"; +import type { DelegationRequest } from "../src/schemas/delegation.js"; + +function makeRequest(overrides?: Partial): DelegationRequest { + return { + schema_version: "0.2", + request_id: "req-live-opa", + requester: "agent-live", + run_id: "run-live", + intent: "Live OPA integration check", + justification: "Validate runtime OPA decision mapping", + requested_scope: { + resource: "tool:search", + action: "execute", + }, + observed_at: new Date().toISOString(), + ...overrides, + }; +} + +const opaBaseUrl = process.env["OPA_BASE_URL"]; +const opaPolicyPath = process.env["OPA_POLICY_PATH"] ?? "open_cot/delegation"; +const hasLiveConfig = Boolean(opaBaseUrl); +const describeLive = hasLiveConfig ? describe : describe.skip; + +function parsePositiveInt(value: string | undefined): number | undefined { + if (!value) { + return undefined; + } + const parsed = Number.parseInt(value, 10); + if (!Number.isFinite(parsed) || parsed <= 0) { + return undefined; + } + return parsed; +} + +describeLive("OpaPolicyEngine live integration", () => { + it("queries a live OPA server and returns a valid delegation decision", async () => { + const engine = new OpaPolicyEngine({ + baseUrl: opaBaseUrl!, + policyPath: opaPolicyPath, + bearerToken: process.env["OPA_BEARER_TOKEN"], + timeoutMs: parsePositiveInt(process.env["OPA_TIMEOUT_MS"]), + inputContext: { + policy_mode: process.env["OPA_LIVE_POLICY_MODE"] ?? "allow", + source: "vitest-live", + }, + }); + const request = makeRequest(); + const decision = await engine.evaluate(request, "agent-live-01"); + + expect(decision.request_id).toBe(request.request_id); + expect(decision.decision_id).toMatch(/^[0-9a-f]{64}$/); + expect(decision.status).toMatch(/^(approved|denied|narrowed|escalated)$/); + expect(decision.decided_by.kind).toMatch(/^(policy|human|harness)$/); + expect(Date.parse(decision.decided_at)).not.toBeNaN(); + expect(Array.isArray(decision.policy_refs)).toBe(true); + + if (decision.status === "narrowed") { + expect(decision.narrowed_scope).toBeDefined(); + } + if (decision.status === "denied") { + expect(typeof decision.denial_reason).toBe("string"); + expect((decision.denial_reason ?? "").length).toBeGreaterThan(0); + } + }); +}); diff --git a/harness/tests/policy-engine.test.ts b/harness/tests/policy-engine.test.ts new file mode 100644 index 0000000..bed4a5d --- /dev/null +++ b/harness/tests/policy-engine.test.ts @@ -0,0 +1,282 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; +import { + InProcessPolicyEngine, + OpaPolicyEngine, + createDelegationDecision, + type DelegationPolicyEngine, +} from "../src/governance/index.js"; +import type { PolicySet } from "../src/governance/policy-evaluator.js"; +import type { DelegationRequest } from "../src/schemas/delegation.js"; + +function makeRequest(overrides?: Partial): DelegationRequest { + return { + schema_version: "0.2", + request_id: "req-policy-test", + requester: "agent-test", + run_id: "run-test", + intent: "Search for data", + justification: "Need external evidence", + requested_scope: { + resource: "tool:search", + action: "execute", + }, + observed_at: new Date().toISOString(), + ...overrides, + }; +} + +afterEach(() => { + vi.restoreAllMocks(); + vi.unstubAllGlobals(); +}); + +describe("InProcessPolicyEngine", () => { + it("evaluates policies with existing evaluator semantics", async () => { + const allowSearch: PolicySet = { + policy_id: "allow-search", + policy_type: "operational", + priority: 1, + rules: [ + { + rule_id: "allow-search-tool", + action: "allow", + resource: "tool:search", + }, + ], + }; + const engine = new InProcessPolicyEngine([allowSearch]); + + const decision = await engine.evaluate(makeRequest(), "agent-1"); + expect(decision.status).toBe("approved"); + expect(decision.policy_refs).toEqual(["allow-search"]); + }); + + it("allows phase consultation when no phase policies are configured", async () => { + const engine = new InProcessPolicyEngine([]); + const decision = await engine.consultPhase?.({ + runId: "run-1", + agentId: "agent-1", + objective: "Summarize docs", + phase: "frame", + }); + expect(decision).toEqual({ + status: "allowed", + policyRefs: [], + }); + }); + + it("denies phase consultation when a matching phase rule denies", async () => { + const denyFinalize: PolicySet = { + policy_id: "deny-finalize", + policy_type: "safety", + priority: 1, + rules: [ + { + rule_id: "deny-finalize-phase", + action: "deny", + resource: "phase:finalize", + reason: "Finalization requires human approval", + }, + ], + }; + const engine = new InProcessPolicyEngine([denyFinalize]); + const decision = await engine.consultPhase?.({ + runId: "run-1", + agentId: "agent-1", + objective: "Complete task", + phase: "finalize", + }); + expect(decision?.status).toBe("denied"); + expect(decision?.reason).toContain("Finalization requires human approval"); + }); + + it("previews tool access for manifest reconciliation", async () => { + const policy: PolicySet = { + policy_id: "tool-policy", + policy_type: "operational", + priority: 10, + rules: [ + { + rule_id: "allow-search", + action: "allow", + resource: "tool:search", + }, + { + rule_id: "narrow-calc", + action: "narrow", + resource: "tool:calculator", + narrowing: { max_results: 2 }, + }, + ], + }; + const engine = new InProcessPolicyEngine([policy]); + const preview = await engine.previewToolAccess?.({ + runId: "run-1", + agentId: "agent-1", + objective: "Research and compute", + phase: "plan", + tools: [ + { + name: "search", + description: "Search docs", + inputSchema: { type: "object" }, + expectedSideEffects: [], + timeoutMs: 1_000, + idempotent: true, + retryable: true, + failureTypes: ["not_found"], + }, + { + name: "calculator", + description: "Compute values", + inputSchema: { type: "object" }, + expectedSideEffects: [], + timeoutMs: 1_000, + idempotent: true, + retryable: false, + failureTypes: ["invalid_input"], + }, + { + name: "shell", + description: "Run shell", + inputSchema: { type: "object" }, + expectedSideEffects: ["filesystem"], + timeoutMs: 1_000, + idempotent: false, + retryable: false, + failureTypes: ["permission_denied"], + }, + ], + sandbox: { + allowedTools: ["*"], + blockedTools: ["shell"], + maxSteps: 20, + maxBranches: 3, + memoryAcl: { default: ["read"] }, + }, + }); + + expect(preview?.search?.accessLevel).toBe("pre_authorized"); + expect(preview?.calculator?.accessLevel).toBe("requires_delegation"); + expect(preview?.calculator?.constraints).toEqual({ max_results: 2 }); + expect(preview?.shell?.accessLevel).toBe("blocked"); + }); +}); + +describe("OpaPolicyEngine", () => { + it("maps a valid OPA decision result", async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response( + JSON.stringify({ + result: { + status: "narrowed", + policy_refs: ["opa.search.policy"], + narrowed_scope: { + resource: "tool:search", + action: "execute", + constraints: { max_results: 3 }, + }, + }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock); + + const engine = new OpaPolicyEngine({ + baseUrl: "https://opa.example", + policyPath: "open_cot/delegation", + }); + const decision = await engine.evaluate(makeRequest(), "agent-1"); + + expect(decision.status).toBe("narrowed"); + expect(decision.policy_refs).toEqual(["opa.search.policy"]); + expect(decision.narrowed_scope?.constraints?.max_results).toBe(3); + expect(fetchMock).toHaveBeenCalledTimes(1); + const [url, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + expect(url).toBe("https://opa.example/v1/data/open_cot/delegation"); + const body = JSON.parse(String(init.body)); + expect(body.input.agent_id).toBe("agent-1"); + expect(body.input.request.request_id).toBe("req-policy-test"); + }); + + it("fails closed when OPA result is invalid", async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response(JSON.stringify({ result: { foo: "bar" } }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }), + ); + vi.stubGlobal("fetch", fetchMock); + + const engine = new OpaPolicyEngine({ + baseUrl: "https://opa.example", + policyPath: "open_cot/delegation", + }); + const decision = await engine.evaluate(makeRequest(), "agent-1"); + + expect(decision.status).toBe("denied"); + expect(decision.denial_reason).toContain("OPA policy evaluation failed"); + }); + + it("uses fallback engine when OPA request fails", async () => { + const fetchMock = vi.fn().mockRejectedValue(new Error("network unavailable")); + vi.stubGlobal("fetch", fetchMock); + + const fallback: DelegationPolicyEngine = { + name: "fallback", + evaluate: async (request, agentId) => + createDelegationDecision(request, agentId, { + status: "approved", + decidedBy: { kind: "harness" }, + policyRefs: ["fallback-policy"], + outcomeKind: "fallback_allow", + }), + }; + + const engine = new OpaPolicyEngine({ + baseUrl: "https://opa.example", + policyPath: "open_cot/delegation", + fallbackEngine: fallback, + }); + const decision = await engine.evaluate(makeRequest(), "agent-1"); + + expect(decision.status).toBe("approved"); + expect(decision.policy_refs).toEqual(["fallback-policy"]); + }); + + it("forwards configured inputContext to OPA input", async () => { + const fetchMock = vi.fn().mockResolvedValue( + new Response( + JSON.stringify({ + result: { + status: "approved", + policy_refs: ["opa.allow"], + }, + }), + { + status: 200, + headers: { "Content-Type": "application/json" }, + }, + ), + ); + vi.stubGlobal("fetch", fetchMock); + + const engine = new OpaPolicyEngine({ + baseUrl: "https://opa.example", + policyPath: "open_cot/delegation", + inputContext: { policy_mode: "deny", request_source: "demo" }, + }); + await engine.evaluate(makeRequest(), "agent-1"); + + const [, init] = fetchMock.mock.calls[0] as [string, RequestInit]; + const body = JSON.parse(String(init.body)); + expect(body.input.context).toEqual({ + policy_mode: "deny", + request_source: "demo", + }); + }); +}); diff --git a/harness/tests/tool-registry.test.ts b/harness/tests/tool-registry.test.ts new file mode 100644 index 0000000..659f06a --- /dev/null +++ b/harness/tests/tool-registry.test.ts @@ -0,0 +1,152 @@ +import { describe, expect, it } from "vitest"; +import { ToolRegistry } from "../src/core/tool-registry.js"; +import { defineToolContract } from "../src/tools/tool-types.js"; +import { DEFAULT_SANDBOX_CONFIG } from "../src/schemas/sandbox.js"; + +describe("ToolRegistry authority and argument enforcement", () => { + it("rejects arguments that violate input schema", async () => { + const registry = new ToolRegistry(); + registry.register( + defineToolContract({ + name: "search", + description: "Search records", + inputSchema: { + type: "object", + required: ["query"], + properties: { + query: { type: "string" }, + }, + additionalProperties: false, + }, + }), + () => ({ output: { ok: true } }), + ); + + const result = await registry.call( + "search", + {}, + DEFAULT_SANDBOX_CONFIG, + { kind: "standing" }, + ); + expect(result.errorCategory).toBe("invalid_input"); + expect(result.error).toContain("Invalid arguments"); + }); + + it("blocks receipt-based calls when permission is invalid", async () => { + const registry = new ToolRegistry(); + let executed = false; + registry.register( + defineToolContract({ + name: "search", + description: "Search records", + inputSchema: { + type: "object", + required: ["query"], + properties: { + query: { type: "string" }, + }, + }, + }), + () => { + executed = true; + return { output: { ok: true } }; + }, + ); + + const result = await registry.call( + "search", + { query: "tokyo" }, + DEFAULT_SANDBOX_CONFIG, + { + kind: "receipt", + permissionId: "perm-1", + grantedScope: { resource: "tool:search", action: "execute" }, + isPermissionValid: () => false, + }, + ); + + expect(result.errorCategory).toBe("permission_denied"); + expect(result.error).toContain("not active"); + expect(executed).toBe(false); + }); + + it("enforces granted scope constraints on arguments", async () => { + const registry = new ToolRegistry(); + registry.register( + defineToolContract({ + name: "search", + description: "Search records", + inputSchema: { + type: "object", + required: ["query"], + properties: { + query: { type: "string" }, + user_email: { type: "string" }, + }, + }, + }), + () => ({ output: { ok: true } }), + ); + + const result = await registry.call( + "search", + { query: "tokyo", user_email: "x@example.com" }, + DEFAULT_SANDBOX_CONFIG, + { + kind: "receipt", + permissionId: "perm-2", + grantedScope: { + resource: "tool:search", + action: "execute", + constraints: { + excluded_fields: ["user_email"], + }, + }, + isPermissionValid: () => true, + }, + ); + + expect(result.errorCategory).toBe("permission_denied"); + expect(result.error).toContain("excluded_fields"); + }); + + it("executes when receipt scope and arguments are valid", async () => { + const registry = new ToolRegistry(); + registry.register( + defineToolContract({ + name: "search", + description: "Search records", + inputSchema: { + type: "object", + required: ["query"], + properties: { + query: { type: "string" }, + max_results: { type: "integer" }, + }, + }, + }), + () => ({ output: { answer: "ok" } }), + ); + + const result = await registry.call( + "search", + { query: "tokyo", max_results: 2 }, + DEFAULT_SANDBOX_CONFIG, + { + kind: "receipt", + permissionId: "perm-3", + grantedScope: { + resource: "tool:search", + action: "execute", + constraints: { + max_results: 5, + }, + }, + isPermissionValid: () => true, + }, + ); + + expect(result.error).toBeUndefined(); + expect(result.output).toEqual({ answer: "ok" }); + }); +}); From c2d165df3efd308576dd3ddb3d3bf5314b899ad4 Mon Sep 17 00:00:00 2001 From: "Byron Miller (MOBB)" Date: Mon, 20 Apr 2026 12:06:18 -0500 Subject: [PATCH 2/2] Fix CodeQL regex warning in OPA URL normalization. Replace slash-trimming regex operations with linear string scans so policy path/base URL handling remains safe for untrusted input and avoids ReDoS-pattern alerts. Made-with: Cursor --- harness/src/governance/opa-policy-engine.ts | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/harness/src/governance/opa-policy-engine.ts b/harness/src/governance/opa-policy-engine.ts index f75db5b..14a2eae 100644 --- a/harness/src/governance/opa-policy-engine.ts +++ b/harness/src/governance/opa-policy-engine.ts @@ -145,8 +145,8 @@ export class OpaPolicyEngine implements DelegationPolicyEngine { const timeoutMs = this.config.timeoutMs ?? DEFAULT_TIMEOUT_MS; const timeoutId = setTimeout(() => controller.abort(), timeoutMs); try { - const path = this.config.policyPath.replace(/^\/+/, ""); - const url = `${this.config.baseUrl.replace(/\/+$/, "")}/v1/data/${path}`; + const path = trimLeadingSlashes(this.config.policyPath); + const url = `${trimTrailingSlashes(this.config.baseUrl)}/v1/data/${path}`; const response = await fetch(url, { method: "POST", headers: { @@ -298,3 +298,19 @@ function isToolAllowedBySandbox(toolName: string, sandbox: SandboxConfig): boole } return sandbox.allowedTools.includes(toolName); } + +function trimLeadingSlashes(value: string): string { + let start = 0; + while (start < value.length && value.charCodeAt(start) === 47) { + start += 1; + } + return value.slice(start); +} + +function trimTrailingSlashes(value: string): string { + let end = value.length; + while (end > 0 && value.charCodeAt(end - 1) === 47) { + end -= 1; + } + return value.slice(0, end); +}