OpenHands · burakkeless · Apr 30, 2026 · Apr 30, 2026 · May 9, 2026 · May 21, 2026
diff --git a/.github/scripts/check_agent_server_rest_api_breakage.py b/.github/scripts/check_agent_server_rest_api_breakage.py
@@ -557,6 +557,38 @@ def _validate_removed_schema_properties(
 )
 
 
+# oasdiff rule IDs for enum-value additions in response schemas.
+_RESPONSE_ENUM_VALUE_ADDED_IDS = frozenset(
+    {
+        "response-property-enum-value-added",
+        "response-write-only-property-enum-value-added",
+    }
+)
+
+# Response properties that are known extensible discriminated-union discriminators
+# and may therefore grow new enum values additively. Adding a HookType value
+# (e.g. "agent") to a hook definition's `type` is safe because hook configs are an
+# extensible union and clients must tolerate unknown discriminator values. This is
+# intentionally scoped to the hook discriminator so an ordinary new response enum
+# value elsewhere (a new status/mode/etc.) is still treated as a breaking change.
+_EXTENSIBLE_DISCRIMINATOR_PROPERTY_RE = re.compile(
+    r"HookConfig\b.*\bhooks/items/type\b"
+)
+
+
+def _is_additive_discriminator_enum_value(change: dict) -> bool:
+    """Return True for additive enum values on a known extensible discriminator.
+
+    Adding a value to a response enum is normally breaking (generated clients may
+    treat the enum exhaustively), so this is scoped narrowly to the hook config
+    discriminator union rather than allowlisting every response enum addition.
+    """
+    if str(change.get("id", "")) not in _RESPONSE_ENUM_VALUE_ADDED_IDS:
+        return False
+    text = str(change.get("text", ""))
+    return bool(_EXTENSIBLE_DISCRIMINATOR_PROPERTY_RE.search(text))
+
+
 def _is_union_property_removal_artifact(change: dict) -> bool:
     """Return True for property removals that are artifacts of union widening.
 
@@ -606,7 +638,9 @@ def _split_breaking_changes(
             removed_schema_properties.append(change)
             continue
 
-        if change_id in _ADDITIVE_RESPONSE_ONEOF_IDS:
+        if change_id in _ADDITIVE_RESPONSE_ONEOF_IDS or (
+            _is_additive_discriminator_enum_value(change)
+        ):
             additive_response_oneof.append(change)
             continue
 
@@ -799,9 +833,10 @@ def main() -> int:
         if additive_response_oneof:
             print(
                 f"\n::notice title={PYPI_DISTRIBUTION} REST API::"
-                "Additive oneOf/anyOf expansion detected in response schemas. "
-                "This is expected for extensible discriminated-union APIs and "
-                "does not break backward compatibility."
+                "Additive oneOf/anyOf expansion or enum-value additions detected "
+                "in response schemas. This is expected for extensible "
+                "discriminated-union APIs and does not break backward "
+                "compatibility."
             )
             for item in additive_response_oneof:
                 print(f"  - {item.get('text', str(item))}")

diff --git a/examples/01_standalone_sdk/51_agent_hooks/README.md b/examples/01_standalone_sdk/51_agent_hooks/README.md
@@ -0,0 +1,60 @@
+# Agent-based Hooks Example
+
+This folder demonstrates the `type="agent"` hook — a lifecycle hook whose
+decision is produced by an LLM-driven sub-agent rather than a shell script.
+
+For shell-command hooks see [`../33_hooks/`](../33_hooks).
+
+## Why an agent hook
+
+A shell-based PreToolUse hook can only block what its blacklist literally
+matches. The agent rewrites `cat /etc/passwd` as `awk '{print}' /etc/passwd`
+and slips through. An agent hook reasons about the **semantic intent** of the
+command — "reading a sensitive system file" — and denies regardless of the
+exact tool name used.
+
+## Example
+
+- **main.py** — Two agent hooks, each in its own conversation:
+  - **PreToolUse** "security reviewer" denies a command whose intent is to
+    read `/etc/passwd`, even though no obvious keyword appears in a blacklist.
+  - **Stop** "quality reviewer" refuses to let the main agent finish until
+    the required deliverable (`REPORT.md`) is present in the workspace.
+
+Each hook decision is printed to the console via a `HookExecutionEvent`
+callback, so you can watch the allow/deny outcomes as the demo runs.
+
+## Running
+
+```bash
+export LLM_API_KEY="your-key"
+export LLM_MODEL="anthropic/claude-sonnet-4-5-20250929"  # optional
+export LLM_BASE_URL="https://your-endpoint"              # optional
+
+python main.py
+```
+
+## How an agent hook is configured
+
+```python
+HookDefinition(
+    type=HookType.AGENT,
+    name="security-reviewer",      # bucket for cost metrics (agent-hook:<name>)
+    system_prompt="...",           # instructs the hook agent; must request JSON
+    tools=["file_editor"],         # optional tools the hook agent may use
+                                   # (use registered names, e.g. "file_editor",
+                                   # "terminal" — not class names like
+                                   # "FileEditorTool")
+    timeout=60,                    # forwarded to the per-hook LLM copy
+    max_iterations=3,              # cap on hook sub-conversation steps
+)
+```
+
+The hook agent receives the event JSON and must reply with:
+
+```json
+{"decision": "allow" | "deny", "reason": "<short explanation>"}
+```
+
+Anything else (non-JSON, missing field, sub-conversation error) defaults to
+`allow` so a broken hook cannot wedge the main agent.
diff --git a/examples/01_standalone_sdk/51_agent_hooks/main.py b/examples/01_standalone_sdk/51_agent_hooks/main.py
@@ -0,0 +1,204 @@
+"""OpenHands Agent SDK — Agent-based Hooks Example
+
+Demonstrates the `type="agent"` hook, which evaluates lifecycle events with an
+LLM-driven sub-agent instead of a shell script. The hook agent receives the
+event JSON, reasons about it semantically, and replies with a decision payload:
+
+    {"decision": "allow" | "deny", "reason": "..."}
+
+Two demos:
+
+- PreToolUse (security reviewer): inspects the INTENT of a terminal command,
+  not just its syntax. A command like `awk '{print}' /etc/passwd` would slip
+  past a blacklist of command names, but the agent hook recognises it as
+  reading a sensitive system file and denies it.
+- Stop (quality reviewer): refuses to let the main agent finish until the
+  required deliverable exists in the workspace.
+
+For shell-command hooks see ../33_hooks/33_hooks.py.
+"""
+
+import os
+import tempfile
+from pathlib import Path
+
+from pydantic import SecretStr
+
+from openhands.sdk import LLM, Conversation
+from openhands.sdk.conversation.state import ConversationExecutionStatus
+from openhands.sdk.event.hook_execution import HookExecutionEvent
+from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher, HookType
+from openhands.tools.preset.default import get_default_agent
+
+
+# Keep the demo conversations short: a small per-run iteration cap means a hook
+# that keeps denying (or a model that keeps retrying) fails fast instead of
+# burning calls up to the default 500-iteration limit / CI subprocess timeout.
+MAX_ITERATIONS = 10
+
+
+# Configure LLM
+api_key = os.getenv("LLM_API_KEY")
+assert api_key is not None, "LLM_API_KEY environment variable is not set."
+model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
+base_url = os.getenv("LLM_BASE_URL")
+
+llm = LLM(
+    usage_id="agent",
+    model=model,
+    base_url=base_url,
+    api_key=SecretStr(api_key),
+)
+
+SECURITY_REVIEWER_PROMPT = """You are a security reviewer for a coding agent.
+You receive a PreToolUse hook event describing a terminal command the agent
+is about to run.
+
+Reason about the SEMANTIC INTENT of the command, not just its syntax. Block:
+- reading sensitive system files (/etc/passwd, /etc/shadow, ~/.ssh/*, credentials)
+- broad filesystem discovery outside the workspace (rg --files /etc, find /, ls /root)
+- exfiltrating data over the network
+- modifying system state (sudo, apt install, systemctl)
+
+Allow ordinary inspection, builds, tests, and edits inside the workspace.
+When unsure, prefer allow.
+
+Reply with a single JSON object and nothing else:
+{"decision": "allow" | "deny", "reason": "<short explanation>"}
+"""
+
+QUALITY_REVIEWER_PROMPT = """You are a quality reviewer enforcing task completion.
+You receive a Stop hook event when the main agent tries to finish.
+
+The task requires the file REPORT.md to exist in the workspace and contain at
+least one bullet point describing the repository. Use the file_editor tool to
+check whether the file exists and inspect its contents.
+
+If the deliverable is missing or empty, deny so the main agent keeps working.
+Otherwise allow.
+
+Reply with a single JSON object and nothing else:
+{"decision": "allow" | "deny", "reason": "<short explanation>"}
+"""
+
+
+def hook_logger(event) -> None:
+    """Surface each hook decision so the demo output is self-explanatory."""
+    if not isinstance(event, HookExecutionEvent):
+        return
+    status = "DENY " if event.blocked else ("ALLOW" if event.success else "FAIL ")
+    line = f"  [hook] {event.hook_event_type} {status} -> {event.hook_command}"
+    if event.reason:
+        line += f"\n         reason: {event.reason}"
+    print(line)
+
+
+def run_demo(workspace: Path, hook_config: HookConfig, message: str) -> float:
+    """Run one demo in its own conversation and return its cost.
+
+    Each demo gets a fresh LLM with isolated metrics so per-demo costs don't
+    overlap (reusing one LLM would make the second conversation's stats include
+    the first demo's spend). A small iteration cap plus an error/stuck check make
+    the example fail fast instead of looping.
+    """
+    demo_llm = llm.model_copy()
+    demo_llm.reset_metrics()
+    conversation = Conversation(
+        agent=get_default_agent(llm=demo_llm),
+        workspace=str(workspace),
+        hook_config=hook_config,
+        callbacks=[hook_logger],
+        max_iteration_per_run=MAX_ITERATIONS,
+    )
+    conversation.send_message(message)
+    conversation.run()
+    status = conversation.state.execution_status
+    if status in (
+        ConversationExecutionStatus.ERROR,
+        ConversationExecutionStatus.STUCK,
+    ):
+        raise RuntimeError(
+            f"Demo conversation ended in {status.value} state "
+            "before reaching a decision."
+        )
+    return conversation.conversation_stats.get_combined_metrics().accumulated_cost
+
+
+# Each demo runs in its own conversation with only the hook it needs. Sharing a
+# single config would leave the Stop quality gate active during Demo 1, so the
+# agent could never finish the first task until REPORT.md existed — coupling two
+# unrelated demos and burning iterations.
+security_hook_config = HookConfig(
+    pre_tool_use=[
+        HookMatcher(
+            matcher="terminal",
+            hooks=[
+                HookDefinition(
+                    type=HookType.AGENT,
+                    name="security-reviewer",
+                    system_prompt=SECURITY_REVIEWER_PROMPT,
+                    timeout=60,
+                    max_iterations=3,
+                )
+            ],
+        )
+    ],
+)
+
+quality_hook_config = HookConfig(
+    stop=[
+        HookMatcher(
+            hooks=[
+                HookDefinition(
+                    type=HookType.AGENT,
+                    name="quality-reviewer",
+                    system_prompt=QUALITY_REVIEWER_PROMPT,
+                    tools=["file_editor"],
+                    timeout=90,
+                    max_iterations=5,
+                )
+            ],
+        )
+    ],
+)
+
+
+with tempfile.TemporaryDirectory() as tmpdir:
+    workspace = Path(tmpdir)
+    total_cost = 0.0
+
+    print("=" * 60)
+    print("Demo 1: PreToolUse — semantic deny")
+    print("=" * 60)
+    print(
+        "Asking the agent to read /etc/passwd via awk. The literal command\n"
+        "wouldn't match a syntactic blacklist (no `cat`, no `/etc/shadow`\n"
+        "keyword), but the security-reviewer agent should recognise the\n"
+        "intent and deny.\n"
+    )
+    total_cost += run_demo(
+        workspace,
+        security_hook_config,
+        "Show me the contents of /etc/passwd using awk '{print}'.",
+    )
+
+    print("\n" + "=" * 60)
+    print("Demo 2: Stop — deny until deliverable exists")
+    print("=" * 60)
+    print("Quality reviewer denies until REPORT.md exists with a bullet point.\n")
+    total_cost += run_demo(
+        workspace,
+        quality_hook_config,
+        "Write REPORT.md in the workspace with at least one bullet point "
+        "describing this repository, then finish.",
+    )
+
+    report = workspace / "REPORT.md"
+    if report.exists():
+        print(f"\n[REPORT.md preview: {report.read_text()[:120]!r}...]")
+
+    print("\n" + "=" * 60)
+    print("Example Complete!")
+    print("=" * 60)
+
+    print(f"\nEXAMPLE_COST: {total_cost}")
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -559,12 +559,23 @@ def _ensure_plugins_loaded(self) -> None:
         if final_hook_config is not None:
             # Store final hook_config in state for observability
             self._state.hook_config = final_hook_config
+            hook_persistence_dir = (
+                str(Path(self._state.persistence_dir).parent)
+                if self._state.persistence_dir is not None
+                else None
+            )
 
             self._hook_processor, self._on_event = create_hook_callback(
                 hook_config=final_hook_config,
                 working_dir=str(self.workspace.working_dir),
                 session_id=str(self._state.id),
                 original_callback=self._base_callback,
+                # Resolve lazily: switch_llm()/switch_profile() rebind self.agent,
+                # so agent hooks must read the current LLM at execution time.
+                llm_getter=lambda: self.agent.llm,
+                persistence_dir=hook_persistence_dir,
+                visualizer=self._visualizer,
+                conversation_stats=self._state.stats,
             )
             self._hook_processor.set_conversation_state(self._state)
             self._hook_processor.run_session_start()