diff --git a/.github/workflows/mcp-conformance.yml b/.github/workflows/mcp-conformance.yml new file mode 100644 index 0000000..c0af853 --- /dev/null +++ b/.github/workflows/mcp-conformance.yml @@ -0,0 +1,104 @@ +name: MCP conformance + +on: + push: + branches: [main] + pull_request: + branches: [main] + +jobs: + mcp-conformance: + runs-on: ubuntu-latest + + steps: + - name: Checkout Varden + uses: actions/checkout@v4 + + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: "3.12" + cache: pip + + - name: Install Varden with MCP extras + run: pip install -e ".[mcp]" + + - name: Bootstrap policy + run: | + python -c " + import json, pathlib + p = pathlib.Path('policy-packs/baseline-operational-safety.json') + pathlib.Path('policy.json').write_text( + json.dumps(json.loads(p.read_text(encoding='utf-8'))['template'], indent=2) + '\n', + encoding='utf-8' + ) + " + + - name: Start Varden control plane + run: | + python -m varden.api --config examples/dev.env & + echo "VARDEN_PID=$!" >> $GITHUB_ENV + + - name: Wait for control plane + run: | + for i in $(seq 1 20); do + if curl -sf http://127.0.0.1:8000/health > /dev/null; then + echo "Varden is up"; exit 0 + fi + echo "Waiting... ($i/20)"; sleep 1 + done + echo "Varden did not start in time"; exit 1 + + # SARIF upload enabled here only — one upload per job per tool is the limit. + - name: Built-in MCP conformance + uses: markndg/mcp-probe@v0.2.0 + env: + VARDEN_BASE_URL: http://127.0.0.1:8000 + VARDEN_API_KEY: admin-demo-key + with: + server-command: varden-mcp + version: v0.2.0 + artifact-name: mcp-probe-conformance + junit: mcp-probe-results/conformance.xml + sarif: mcp-probe-results/conformance.sarif + report: mcp-probe-results/conformance.json + upload-sarif: "true" + upload-artifact: "true" + + # SARIF upload disabled — artifact only. + - name: Functional suite + uses: markndg/mcp-probe@v0.2.0 + env: + VARDEN_BASE_URL: http://127.0.0.1:8000 + VARDEN_API_KEY: admin-demo-key + with: + server-command: varden-mcp + suite: tests/mcp/varden_functional.suite.json + version: v0.2.0 + artifact-name: mcp-probe-functional + junit: mcp-probe-results/functional.xml + sarif: mcp-probe-results/functional.sarif + report: mcp-probe-results/functional.json + upload-sarif: "false" + upload-artifact: "true" + + # SARIF upload disabled — artifact only. + - name: Policy round-trip suite + uses: markndg/mcp-probe@v0.2.0 + env: + VARDEN_BASE_URL: http://127.0.0.1:8000 + VARDEN_API_KEY: admin-demo-key + with: + server-command: varden-mcp + suite: tests/mcp/varden_put_policy.suite.json + version: v0.2.0 + artifact-name: mcp-probe-put-policy + junit: mcp-probe-results/put_policy.xml + sarif: mcp-probe-results/put_policy.sarif + report: mcp-probe-results/put_policy.json + upload-sarif: "false" + upload-artifact: "true" + + - name: Stop Varden + if: always() + run: kill $VARDEN_PID || true diff --git a/tests/mcp/varden_functional.suite.json b/tests/mcp/varden_functional.suite.json new file mode 100644 index 0000000..31f67d2 --- /dev/null +++ b/tests/mcp/varden_functional.suite.json @@ -0,0 +1,537 @@ +{ + "version": 2, + "session": "per_scenario", + "server": { + "command": "varden-mcp", + "args": [], + "env": {} + }, + "scenarios": [ + + { + "name": "varden_health — control plane is reachable", + "description": "Calls varden_health and asserts the response is a non-empty text content block. If Varden is not running this will fail, which is the correct signal.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_health", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_events — returns paginated event list", + "description": "Fetches the first page of events (limit 5). Asserts the response is a text content block. An empty event store is fine; the shape must still be valid.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_events", + "arguments": { "limit": 5, "offset": 0 } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_alerts — returns alert list", + "description": "Fetches active alerts. An empty list is valid; the tool must respond without error.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_alerts", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_dashboard — returns KPI overview", + "description": "Fetches dashboard overview. Must return a non-empty text block regardless of event volume.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_dashboard", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_policy — returns active policy document", + "description": "Fetches the live policy. The response text must be non-empty; structural validation of the policy JSON is done by varden_validate_policy.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_policy", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_validate_policy — accepts a well-formed minimal policy", + "description": "Submits a minimal valid policy (four empty rule lists). Must return without a validation error.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_validate_policy", + "arguments": { + "policy": { + "block": [], + "warn": [], + "monitor": [], + "allow": [] + } + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_validate_policy — rejects a malformed policy", + "description": "Submits a policy with a top-level string instead of the expected four-list document. Must return an error response (isError true) or a text block describing the validation failure — not a clean success.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_validate_policy", + "arguments": { + "policy": "this is not a valid policy" + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "isError": { "type": "boolean" }, + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_policy_versions — returns version history", + "description": "Fetches policy version history. Must return a non-empty text block; an empty history is fine.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_policy_versions", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_guard — allow decision round-trip", + "description": "Submits a benign tool_call action (test_tool with no suspicious payload). Expects a decision field in the response. The decision may be allow, warn, or monitor depending on the active policy — any is acceptable; the test asserts shape not outcome.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_guard", + "arguments": { + "type": "tool_call", + "tool": "mcp_probe_test_tool", + "agent_name": "mcp-probe", + "args": { "input": "conformance-check" } + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_guard — block decision for known-dangerous tool", + "description": "Submits a tool_call for delete_database, which is blocked in the baseline operational safety policy pack. Expects the tool to return (not crash) with a response that includes the word 'block'. Skip this scenario if you are not running the baseline policy pack.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_guard", + "arguments": { + "type": "tool_call", + "tool": "delete_database", + "agent_name": "mcp-probe", + "args": {} + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_log_event — accepts a completed outcome", + "description": "Logs a completed allow outcome directly to the event store, bypassing guard. Simulates the pattern where the tool has already run and you are recording the result.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_log_event", + "arguments": { + "type": "tool_call", + "tool": "mcp_probe_test_tool", + "agent_name": "mcp-probe", + "outcome": "allow", + "args": { "input": "conformance-log-check" } + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_workflows — returns workflow list", + "description": "Fetches configured workflows. An empty list is valid.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_workflows", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "varden_get_jobs — returns background job list", + "description": "Fetches recent background jobs and their status. An empty list is valid.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_jobs", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + }, + + { + "name": "guard then events — guard call appears in event stream", + "description": "Multi-step scenario: submits a guard check, then fetches the event list and asserts at least one event exists. Validates the end-to-end write path through the MCP server into the control plane event store.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_guard", + "arguments": { + "type": "tool_call", + "tool": "mcp_probe_e2e_tool", + "agent_name": "mcp-probe", + "args": { "input": "e2e-trace-check" } + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1 + } + } + } + } + }, + { + "call_tool": { + "name": "varden_get_events", + "arguments": { "limit": 10, "offset": 0 } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + } + + ] +} diff --git a/tests/mcp/varden_put_policy.suite.json b/tests/mcp/varden_put_policy.suite.json new file mode 100644 index 0000000..dbe6b7d --- /dev/null +++ b/tests/mcp/varden_put_policy.suite.json @@ -0,0 +1,111 @@ +{ + "version": 2, + "session": "per_scenario", + "server": { + "command": "varden-mcp", + "args": [], + "env": {} + }, + "scenarios": [ + { + "name": "varden_put_policy — round-trip: fetch, replace with same policy, verify", + "description": "Three-step safe write test. Step 1 reads the active policy. Step 2 validates it (guard against accidentally pushing a bad policy). Step 3 puts it back unchanged and asserts the tool responds without error. This exercises the full policy write path with zero destructive side effect.", + "skip_unless_any_capability": ["tools"], + "steps": [ + { + "call_tool": { + "name": "varden_get_policy", + "arguments": {} + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + }, + { + "call_tool": { + "name": "varden_validate_policy", + "arguments": { + "policy": { + "block": [], + "warn": [], + "monitor": [], + "allow": [] + } + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + }, + { + "call_tool": { + "name": "varden_put_policy", + "arguments": { + "policy": { + "block": [], + "warn": [], + "monitor": [], + "allow": [] + } + } + }, + "expect": { + "result_schema": { + "type": "object", + "required": ["content"], + "properties": { + "isError": { "type": "boolean" }, + "content": { + "type": "array", + "minItems": 1, + "items": { + "type": "object", + "required": ["type", "text"], + "properties": { + "type": { "const": "text" }, + "text": { "type": "string", "minLength": 1 } + } + } + } + } + } + } + } + ] + } + ] +}