From 2930d4320ceec87b24635158c4f35b121662ce39 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 14:22:59 -0400 Subject: [PATCH 1/7] Add service health check model --- src/agent_term/health.py | 303 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 303 insertions(+) create mode 100644 src/agent_term/health.py diff --git a/src/agent_term/health.py b/src/agent_term/health.py new file mode 100644 index 0000000..27ae15e --- /dev/null +++ b/src/agent_term/health.py @@ -0,0 +1,303 @@ +"""Service health checks for AgentTerm operator seams. + +Health checks are diagnostic only. They verify local configuration posture and optional +fixture/service seams without becoming authority over Matrix, Agent Registry, or +Policy Fabric. +""" + +from __future__ import annotations + +from dataclasses import dataclass, field +from pathlib import Path +from typing import Iterable + +from agent_term.agent_registry import AgentRegistration, InMemoryAgentRegistryBackend, ToolGrant +from agent_term.agent_registry_service import build_agent_registry_backend_from_config +from agent_term.config import AgentTermConfig +from agent_term.events import AgentTermEvent +from agent_term.matrix_service import MatrixServiceConfigError, NioMatrixServiceBackend +from agent_term.matrix_service import build_matrix_service_backend +from agent_term.policy_fabric import ALLOW, InMemoryPolicyFabricBackend, PolicyDecision +from agent_term.policy_fabric_service import build_policy_fabric_backend_from_config + + +OK = "ok" +WARN = "warn" +BLOCKED = "blocked" + + +@dataclass(frozen=True) +class HealthCheckResult: + """One service seam health result.""" + + name: str + status: str + message: str + metadata: dict[str, object] = field(default_factory=dict) + + @property + def ok(self) -> bool: + return self.status == OK + + def to_dict(self) -> dict[str, object]: + return { + "name": self.name, + "status": self.status, + "message": self.message, + "metadata": self.metadata, + } + + +@dataclass(frozen=True) +class HealthReport: + """Complete health report for operator-facing service seams.""" + + results: tuple[HealthCheckResult, ...] + + @property + def ok(self) -> bool: + return all(result.status in {OK, WARN} for result in self.results) + + @property + def blocked(self) -> bool: + return any(result.status == BLOCKED for result in self.results) + + def to_dict(self) -> dict[str, object]: + return { + "ok": self.ok, + "blocked": self.blocked, + "results": [result.to_dict() for result in self.results], + } + + def render_text(self) -> str: + return "\n".join( + f"{result.name}\t{result.status}\t{result.message}" for result in self.results + ) + + +@dataclass(frozen=True) +class HealthCheckOptions: + """Optional probes for health checks.""" + + agent_id: str | None = None + tool: str | None = None + policy_action: str | None = None + + +class HealthChecker: + """Runs health checks for Matrix, Agent Registry, and Policy Fabric seams.""" + + def __init__(self, config: AgentTermConfig) -> None: + self.config = config + + def run(self, options: HealthCheckOptions | None = None) -> HealthReport: + options = options or HealthCheckOptions() + return HealthReport( + results=( + self.check_matrix(), + self.check_agent_registry(options), + self.check_policy_fabric(options), + ) + ) + + def check_matrix(self) -> HealthCheckResult: + try: + backend = build_matrix_service_backend(self.config) + except MatrixServiceConfigError as exc: + return HealthCheckResult( + name="matrix", + status=BLOCKED, + message=str(exc), + metadata={"enabled": self.config.matrix.enabled}, + ) + + if isinstance(backend, NioMatrixServiceBackend): + return HealthCheckResult( + name="matrix", + status=OK, + message="Matrix live backend is configured.", + metadata={ + "enabled": True, + "homeserver_url": backend.homeserver_url, + "user_id": backend.user_id, + "device_name": backend.device_name, + }, + ) + + return HealthCheckResult( + name="matrix", + status=WARN, + message="Matrix is using the offline/in-memory backend.", + metadata={"enabled": self.config.matrix.enabled}, + ) + + def check_agent_registry(self, options: HealthCheckOptions) -> HealthCheckResult: + fallback = _fallback_agent_registry(options) + try: + backend = build_agent_registry_backend_from_config(self.config, fallback=fallback) + except Exception as exc: # defensive diagnostic path + return HealthCheckResult( + name="agent-registry", + status=BLOCKED, + message=f"Agent Registry backend could not be constructed: {exc}", + ) + + fixture_path = self.config.agent_registration.fixture_path + endpoint_url = self.config.agent_registration.endpoint_url + backend_kind = "fixture" if fixture_path else "http" if endpoint_url else "fallback" + + if fixture_path and not Path(fixture_path).exists(): + return HealthCheckResult( + name="agent-registry", + status=BLOCKED, + message="Agent Registry fixture path does not exist.", + metadata={"fixture_path": fixture_path}, + ) + + if options.agent_id: + agent = backend.resolve_agent(options.agent_id) + if agent is None: + return HealthCheckResult( + name="agent-registry", + status=BLOCKED, + message=f"Agent not resolved: {options.agent_id}", + metadata={"backend": backend_kind, "agent_id": options.agent_id}, + ) + if not agent.is_enabled: + return HealthCheckResult( + name="agent-registry", + status=BLOCKED, + message=f"Agent is not enabled: {options.agent_id}", + metadata={"backend": backend_kind, **agent.to_metadata()}, + ) + if options.tool: + grant = backend.resolve_tool_grant(options.agent_id, options.tool) + if grant is None or not grant.is_active: + return HealthCheckResult( + name="agent-registry", + status=BLOCKED, + message=f"Tool grant is not active: {options.agent_id}:{options.tool}", + metadata={"backend": backend_kind, "agent_id": options.agent_id, "tool": options.tool}, + ) + return HealthCheckResult( + name="agent-registry", + status=OK, + message=f"Agent and tool grant resolved: {options.agent_id}:{options.tool}", + metadata={"backend": backend_kind, **agent.to_metadata(), **grant.to_metadata()}, + ) + return HealthCheckResult( + name="agent-registry", + status=OK, + message=f"Agent resolved: {options.agent_id}", + metadata={"backend": backend_kind, **agent.to_metadata()}, + ) + + status = OK if fixture_path or endpoint_url else WARN + message = ( + "Agent Registry service seam is configured." + if status == OK + else "Agent Registry is using local fallback fixtures." + ) + return HealthCheckResult( + name="agent-registry", + status=status, + message=message, + metadata={"backend": backend_kind, "repository": self.config.agent_registration.repository}, + ) + + def check_policy_fabric(self, options: HealthCheckOptions) -> HealthCheckResult: + fallback = _fallback_policy_fabric(options) + try: + backend = build_policy_fabric_backend_from_config(self.config, fallback=fallback) + except Exception as exc: # defensive diagnostic path + return HealthCheckResult( + name="policy-fabric", + status=BLOCKED, + message=f"Policy Fabric backend could not be constructed: {exc}", + ) + + fixture_path = self.config.policy_fabric.fixture_path + endpoint_url = self.config.policy_fabric.endpoint_url + backend_kind = "fixture" if fixture_path else "http" if endpoint_url else "fallback" + + if fixture_path and not Path(fixture_path).exists(): + return HealthCheckResult( + name="policy-fabric", + status=BLOCKED, + message="Policy Fabric fixture path does not exist.", + metadata={"fixture_path": fixture_path}, + ) + + if options.policy_action: + event = AgentTermEvent( + channel="!policyfabric", + sender="@agent-term", + kind="policy_check", + source="policy-fabric", + body="Health-check policy decision lookup.", + metadata={"policy_action": options.policy_action}, + ) + decision = backend.evaluate(event) + if decision is None: + return HealthCheckResult( + name="policy-fabric", + status=BLOCKED, + message=f"Policy decision not resolved: {options.policy_action}", + metadata={"backend": backend_kind, "policy_action": options.policy_action}, + ) + return HealthCheckResult( + name="policy-fabric", + status=OK if decision.is_allowed else WARN, + message=f"Policy decision resolved: {options.policy_action} -> {decision.status}", + metadata={"backend": backend_kind, **decision.to_metadata()}, + ) + + status = OK if fixture_path or endpoint_url else WARN + message = ( + "Policy Fabric service seam is configured." + if status == OK + else "Policy Fabric is using local fallback fixtures." + ) + return HealthCheckResult( + name="policy-fabric", + status=status, + message=message, + metadata={"backend": backend_kind, "repository": self.config.policy_fabric.repository}, + ) + + +def _fallback_agent_registry(options: HealthCheckOptions) -> InMemoryAgentRegistryBackend: + agents: list[AgentRegistration] = [] + grants: list[ToolGrant] = [] + if options.agent_id: + agents.append( + AgentRegistration( + agent_id=options.agent_id, + registry_ref=f"local://agent-registry/{options.agent_id}", + spec_version="health-check", + session_id=f"session-{options.agent_id.replace('.', '-')}", + ) + ) + if options.agent_id and options.tool: + grants.append( + ToolGrant( + grant_id=f"grant.{options.agent_id}.{options.tool}", + agent_id=options.agent_id, + tool=options.tool, + ) + ) + return InMemoryAgentRegistryBackend(agents=agents, grants=grants) + + +def _fallback_policy_fabric(options: HealthCheckOptions) -> InMemoryPolicyFabricBackend: + decisions: list[PolicyDecision] = [] + if options.policy_action: + decisions.append( + PolicyDecision( + decision_id=f"decision.allow.{options.policy_action}", + action=options.policy_action, + status=ALLOW, + policy_ref="local://policy-fabric/health-check", + ) + ) + return InMemoryPolicyFabricBackend(decisions) From efc5b574c085e24cd0037fffc050168da92ac23f Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 14:24:42 -0400 Subject: [PATCH 2/7] Add service health check CLI --- src/agent_term/health_cli.py | 55 ++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) create mode 100644 src/agent_term/health_cli.py diff --git a/src/agent_term/health_cli.py b/src/agent_term/health_cli.py new file mode 100644 index 0000000..461d741 --- /dev/null +++ b/src/agent_term/health_cli.py @@ -0,0 +1,55 @@ +"""CLI entry point for AgentTerm service health checks.""" + +from __future__ import annotations + +import argparse +import json +import sys + +from agent_term.config import load_config +from agent_term.health import HealthChecker, HealthCheckOptions + + +def build_parser() -> argparse.ArgumentParser: + parser = argparse.ArgumentParser( + prog="agent-term-check", + description="Check configured AgentTerm service seams for Matrix, Agent Registry, and Policy Fabric.", + ) + parser.add_argument("--config", help="Optional AgentTerm JSON config path.") + parser.add_argument("--agent-id", help="Agent ID to resolve through Agent Registry.") + parser.add_argument("--tool", help="Tool grant to resolve for --agent-id.") + parser.add_argument("--policy-action", help="Policy action to resolve through Policy Fabric.") + parser.add_argument("--json", action="store_true", help="Print health report as JSON.") + parser.add_argument( + "--strict", + action="store_true", + help="Return non-zero for warnings as well as blocked checks.", + ) + return parser + + +def main(argv: list[str] | None = None) -> int: + args = build_parser().parse_args(argv) + config = load_config(args.config) + report = HealthChecker(config).run( + HealthCheckOptions( + agent_id=args.agent_id, + tool=args.tool, + policy_action=args.policy_action, + ) + ) + + if args.json: + print(json.dumps(report.to_dict(), indent=2, sort_keys=True)) + else: + print(report.render_text()) + + if report.blocked: + return 1 + if args.strict and not all(result.ok for result in report.results): + return 1 + return 0 + + +if __name__ == "__main__": + raise SystemExit(main(sys.argv[1:])) From d1d1ed930486db2f1b030009e4f850165908f6f2 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 14:25:57 -0400 Subject: [PATCH 3/7] Add health check console script --- pyproject.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pyproject.toml b/pyproject.toml index 01cd14b..f31be67 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -50,6 +50,7 @@ matrix = [ [project.scripts] agent-term = "agent_term.cli:main" +agent-term-check = "agent_term.health_cli:main" agent-term-dispatch = "agent_term.dispatch_cli:main" agent-term-matrix = "agent_term.matrix_cli:main" agent-term-snapshot = "agent_term.snapshot_cli:main" From 5e65328e5faf920630cc4feb0e9caaf2aef5bf11 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 16:27:57 -0400 Subject: [PATCH 4/7] Add health checker tests --- tests/test_health.py | 96 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 96 insertions(+) create mode 100644 tests/test_health.py diff --git a/tests/test_health.py b/tests/test_health.py new file mode 100644 index 0000000..3efd136 --- /dev/null +++ b/tests/test_health.py @@ -0,0 +1,96 @@ +import json + +from agent_term.config import config_from_dict +from agent_term.health import BLOCKED, OK, WARN, HealthChecker, HealthCheckOptions + + +def test_health_checker_defaults_to_warning_for_local_fallbacks(): + report = HealthChecker(config_from_dict({})).run() + + statuses = {result.name: result.status for result in report.results} + assert statuses == { + "matrix": WARN, + "agent-registry": WARN, + "policy-fabric": WARN, + } + assert report.ok is True + assert report.blocked is False + + +def test_health_checker_blocks_missing_agent_registry_fixture(): + config = config_from_dict({"agentRegistration": {"fixturePath": "/missing/agent-registry.json"}}) + + result = HealthChecker(config).check_agent_registry(HealthCheckOptions()) + + assert result.status == BLOCKED + assert "fixture path does not exist" in result.message + + +def test_health_checker_resolves_agent_and_tool_from_fixture(tmp_path): + fixture = tmp_path / "agent-registry.json" + fixture.write_text( + json.dumps( + { + "agents": [{"agent_id": "agent.github", "spec_version": "v1"}], + "tool_grants": [ + {"grant_id": "grant.repo-write", "agent_id": "agent.github", "tool": "repo-write"} + ], + } + ), + encoding="utf-8", + ) + config = config_from_dict({"agentRegistration": {"fixturePath": str(fixture)}}) + + result = HealthChecker(config).check_agent_registry( + HealthCheckOptions(agent_id="agent.github", tool="repo-write") + ) + + assert result.status == OK + assert result.metadata["agent_id"] == "agent.github" + assert result.metadata["grant_id"] == "grant.repo-write" + + +def test_health_checker_blocks_missing_policy_fabric_fixture(): + config = config_from_dict({"policyFabric": {"fixturePath": "/missing/policy-fabric.json"}}) + + result = HealthChecker(config).check_policy_fabric(HealthCheckOptions()) + + assert result.status == BLOCKED + assert "fixture path does not exist" in result.message + + +def test_health_checker_resolves_policy_from_fixture(tmp_path): + fixture = tmp_path / "policy-fabric.json" + fixture.write_text( + json.dumps( + { + "decisions": [ + { + "decision_id": "decision.allow.github.pr.create", + "action": "github.pr.create", + "status": "allow", + "policy_ref": "fixture://policy/github-pr-create", + } + ] + } + ), + encoding="utf-8", + ) + config = config_from_dict({"policyFabric": {"fixturePath": str(fixture)}}) + + result = HealthChecker(config).check_policy_fabric( + HealthCheckOptions(policy_action="github.pr.create") + ) + + assert result.status == OK + assert result.metadata["policy_decision_id"] == "decision.allow.github.pr.create" + + +def test_health_report_json_shape(): + report = HealthChecker(config_from_dict({})).run() + + value = report.to_dict() + + assert value["ok"] is True + assert value["blocked"] is False + assert len(value["results"]) == 3 From 9cad9a5a6fbc1c0927f48219bd153e6633a345ca Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 16:30:07 -0400 Subject: [PATCH 5/7] Add service health CLI tests --- tests/test_health_cli.py | 109 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 109 insertions(+) create mode 100644 tests/test_health_cli.py diff --git a/tests/test_health_cli.py b/tests/test_health_cli.py new file mode 100644 index 0000000..70e7f02 --- /dev/null +++ b/tests/test_health_cli.py @@ -0,0 +1,109 @@ +import json + +from agent_term.health_cli import main + + +def test_health_cli_prints_default_warnings(capsys): + exit_code = main([]) + + captured = capsys.readouterr() + assert exit_code == 0 + assert "matrix\twarn\tMatrix is using the offline/in-memory backend." in captured.out + assert "agent-registry\twarn\tAgent Registry is using local fallback fixtures." in captured.out + assert "policy-fabric\twarn\tPolicy Fabric is using local fallback fixtures." in captured.out + + +def test_health_cli_strict_returns_nonzero_for_warnings(capsys): + exit_code = main(["--strict"]) + + captured = capsys.readouterr() + assert exit_code == 1 + assert "matrix\twarn" in captured.out + + +def test_health_cli_json_output(capsys): + exit_code = main(["--json"]) + + captured = capsys.readouterr() + value = json.loads(captured.out) + assert exit_code == 0 + assert value["ok"] is True + assert value["blocked"] is False + assert {item["name"] for item in value["results"]} == { + "matrix", + "agent-registry", + "policy-fabric", + } + + +def test_health_cli_blocks_missing_fixture(tmp_path, capsys): + config_path = tmp_path / "agent-term.json" + config_path.write_text( + json.dumps({"agentRegistration": {"fixturePath": str(tmp_path / "missing.json")}}), + encoding="utf-8", + ) + + exit_code = main(["--config", str(config_path)]) + + captured = capsys.readouterr() + assert exit_code == 1 + assert "agent-registry\tblocked\tAgent Registry fixture path does not exist." in captured.out + + +def test_health_cli_resolves_fixture_agent_tool_and_policy(tmp_path, capsys): + agent_fixture = tmp_path / "agent-registry.json" + agent_fixture.write_text( + json.dumps( + { + "agents": [{"agent_id": "agent.github", "spec_version": "v1"}], + "tool_grants": [ + {"grant_id": "grant.repo-write", "agent_id": "agent.github", "tool": "repo-write"} + ], + } + ), + encoding="utf-8", + ) + policy_fixture = tmp_path / "policy-fabric.json" + policy_fixture.write_text( + json.dumps( + { + "decisions": [ + { + "decision_id": "decision.allow.github.pr.create", + "action": "github.pr.create", + "status": "allow", + "policy_ref": "fixture://policy/github-pr-create", + } + ] + } + ), + encoding="utf-8", + ) + config_path = tmp_path / "agent-term.json" + config_path.write_text( + json.dumps( + { + "agentRegistration": {"fixturePath": str(agent_fixture)}, + "policyFabric": {"fixturePath": str(policy_fixture)}, + } + ), + encoding="utf-8", + ) + + exit_code = main( + [ + "--config", + str(config_path), + "--agent-id", + "agent.github", + "--tool", + "repo-write", + "--policy-action", + "github.pr.create", + ] + ) + + captured = capsys.readouterr() + assert exit_code == 0 + assert "agent-registry\tok\tAgent and tool grant resolved: agent.github:repo-write" in captured.out + assert "policy-fabric\tok\tPolicy decision resolved: github.pr.create -> allow" in captured.out From 6fb5fa44cb46e3fe4d1af25fc65452112844712b Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 16:33:26 -0400 Subject: [PATCH 6/7] Remove unused Iterable import in health checker --- src/agent_term/health.py | 1 - 1 file changed, 1 deletion(-) diff --git a/src/agent_term/health.py b/src/agent_term/health.py index 27ae15e..55de6c2 100644 --- a/src/agent_term/health.py +++ b/src/agent_term/health.py @@ -9,7 +9,6 @@ from dataclasses import dataclass, field from pathlib import Path -from typing import Iterable from agent_term.agent_registry import AgentRegistration, InMemoryAgentRegistryBackend, ToolGrant from agent_term.agent_registry_service import build_agent_registry_backend_from_config From b020abdbf7b6ae45432807a36bdb47b6367139b6 Mon Sep 17 00:00:00 2001 From: mdheller <21163552+mdheller@users.noreply.github.com> Date: Sun, 3 May 2026 16:35:36 -0400 Subject: [PATCH 7/7] Check health fixture paths before backend construction --- src/agent_term/health.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/src/agent_term/health.py b/src/agent_term/health.py index 55de6c2..39065c5 100644 --- a/src/agent_term/health.py +++ b/src/agent_term/health.py @@ -131,16 +131,6 @@ def check_matrix(self) -> HealthCheckResult: ) def check_agent_registry(self, options: HealthCheckOptions) -> HealthCheckResult: - fallback = _fallback_agent_registry(options) - try: - backend = build_agent_registry_backend_from_config(self.config, fallback=fallback) - except Exception as exc: # defensive diagnostic path - return HealthCheckResult( - name="agent-registry", - status=BLOCKED, - message=f"Agent Registry backend could not be constructed: {exc}", - ) - fixture_path = self.config.agent_registration.fixture_path endpoint_url = self.config.agent_registration.endpoint_url backend_kind = "fixture" if fixture_path else "http" if endpoint_url else "fallback" @@ -153,6 +143,16 @@ def check_agent_registry(self, options: HealthCheckOptions) -> HealthCheckResult metadata={"fixture_path": fixture_path}, ) + fallback = _fallback_agent_registry(options) + try: + backend = build_agent_registry_backend_from_config(self.config, fallback=fallback) + except Exception as exc: # defensive diagnostic path + return HealthCheckResult( + name="agent-registry", + status=BLOCKED, + message=f"Agent Registry backend could not be constructed: {exc}", + ) + if options.agent_id: agent = backend.resolve_agent(options.agent_id) if agent is None: @@ -205,16 +205,6 @@ def check_agent_registry(self, options: HealthCheckOptions) -> HealthCheckResult ) def check_policy_fabric(self, options: HealthCheckOptions) -> HealthCheckResult: - fallback = _fallback_policy_fabric(options) - try: - backend = build_policy_fabric_backend_from_config(self.config, fallback=fallback) - except Exception as exc: # defensive diagnostic path - return HealthCheckResult( - name="policy-fabric", - status=BLOCKED, - message=f"Policy Fabric backend could not be constructed: {exc}", - ) - fixture_path = self.config.policy_fabric.fixture_path endpoint_url = self.config.policy_fabric.endpoint_url backend_kind = "fixture" if fixture_path else "http" if endpoint_url else "fallback" @@ -227,6 +217,16 @@ def check_policy_fabric(self, options: HealthCheckOptions) -> HealthCheckResult: metadata={"fixture_path": fixture_path}, ) + fallback = _fallback_policy_fabric(options) + try: + backend = build_policy_fabric_backend_from_config(self.config, fallback=fallback) + except Exception as exc: # defensive diagnostic path + return HealthCheckResult( + name="policy-fabric", + status=BLOCKED, + message=f"Policy Fabric backend could not be constructed: {exc}", + ) + if options.policy_action: event = AgentTermEvent( channel="!policyfabric",