diff --git a/.github/scripts/check_agent_server_rest_api_breakage.py b/.github/scripts/check_agent_server_rest_api_breakage.py
index 47080fcf9c..5100b6031a 100644
--- a/.github/scripts/check_agent_server_rest_api_breakage.py
+++ b/.github/scripts/check_agent_server_rest_api_breakage.py
@@ -557,6 +557,38 @@ def _validate_removed_schema_properties(
 )
 
 
+# oasdiff rule IDs for enum-value additions in response schemas.
+_RESPONSE_ENUM_VALUE_ADDED_IDS = frozenset(
+    {
+        "response-property-enum-value-added",
+        "response-write-only-property-enum-value-added",
+    }
+)
+
+# Response properties that are known extensible discriminated-union discriminators
+# and may therefore grow new enum values additively. Adding a HookType value
+# (e.g. "agent") to a hook definition's `type` is safe because hook configs are an
+# extensible union and clients must tolerate unknown discriminator values. This is
+# intentionally scoped to the hook discriminator so an ordinary new response enum
+# value elsewhere (a new status/mode/etc.) is still treated as a breaking change.
+_EXTENSIBLE_DISCRIMINATOR_PROPERTY_RE = re.compile(
+    r"HookConfig\b.*\bhooks/items/type\b"
+)
+
+
+def _is_additive_discriminator_enum_value(change: dict) -> bool:
+    """Return True for additive enum values on a known extensible discriminator.
+
+    Adding a value to a response enum is normally breaking (generated clients may
+    treat the enum exhaustively), so this is scoped narrowly to the hook config
+    discriminator union rather than allowlisting every response enum addition.
+    """
+    if str(change.get("id", "")) not in _RESPONSE_ENUM_VALUE_ADDED_IDS:
+        return False
+    text = str(change.get("text", ""))
+    return bool(_EXTENSIBLE_DISCRIMINATOR_PROPERTY_RE.search(text))
+
+
 def _is_union_property_removal_artifact(change: dict) -> bool:
     """Return True for property removals that are artifacts of union widening.
 
@@ -606,7 +638,9 @@ def _split_breaking_changes(
             removed_schema_properties.append(change)
             continue
 
-        if change_id in _ADDITIVE_RESPONSE_ONEOF_IDS:
+        if change_id in _ADDITIVE_RESPONSE_ONEOF_IDS or (
+            _is_additive_discriminator_enum_value(change)
+        ):
             additive_response_oneof.append(change)
             continue
 
@@ -799,9 +833,10 @@ def main() -> int:
         if additive_response_oneof:
             print(
                 f"\n::notice title={PYPI_DISTRIBUTION} REST API::"
-                "Additive oneOf/anyOf expansion detected in response schemas. "
-                "This is expected for extensible discriminated-union APIs and "
-                "does not break backward compatibility."
+                "Additive oneOf/anyOf expansion or enum-value additions detected "
+                "in response schemas. This is expected for extensible "
+                "discriminated-union APIs and does not break backward "
+                "compatibility."
             )
             for item in additive_response_oneof:
                 print(f"  - {item.get('text', str(item))}")
diff --git a/examples/01_standalone_sdk/51_agent_hooks/README.md b/examples/01_standalone_sdk/51_agent_hooks/README.md
new file mode 100644
index 0000000000..a3d6cd6d97
--- /dev/null
+++ b/examples/01_standalone_sdk/51_agent_hooks/README.md
@@ -0,0 +1,60 @@
+# Agent-based Hooks Example
+
+This folder demonstrates the `type="agent"` hook — a lifecycle hook whose
+decision is produced by an LLM-driven sub-agent rather than a shell script.
+
+For shell-command hooks see [`../33_hooks/`](../33_hooks).
+
+## Why an agent hook
+
+A shell-based PreToolUse hook can only block what its blacklist literally
+matches. The agent rewrites `cat /etc/passwd` as `awk '{print}' /etc/passwd`
+and slips through. An agent hook reasons about the **semantic intent** of the
+command — "reading a sensitive system file" — and denies regardless of the
+exact tool name used.
+
+## Example
+
+- **main.py** — Two agent hooks, each in its own conversation:
+  - **PreToolUse** "security reviewer" denies a command whose intent is to
+    read `/etc/passwd`, even though no obvious keyword appears in a blacklist.
+  - **Stop** "quality reviewer" refuses to let the main agent finish until
+    the required deliverable (`REPORT.md`) is present in the workspace.
+
+Each hook decision is printed to the console via a `HookExecutionEvent`
+callback, so you can watch the allow/deny outcomes as the demo runs.
+
+## Running
+
+```bash
+export LLM_API_KEY="your-key"
+export LLM_MODEL="anthropic/claude-sonnet-4-5-20250929"  # optional
+export LLM_BASE_URL="https://your-endpoint"              # optional
+
+python main.py
+```
+
+## How an agent hook is configured
+
+```python
+HookDefinition(
+    type=HookType.AGENT,
+    name="security-reviewer",      # bucket for cost metrics (agent-hook:<name>)
+    system_prompt="...",           # instructs the hook agent; must request JSON
+    tools=["file_editor"],         # optional tools the hook agent may use
+                                   # (use registered names, e.g. "file_editor",
+                                   # "terminal" — not class names like
+                                   # "FileEditorTool")
+    timeout=60,                    # forwarded to the per-hook LLM copy
+    max_iterations=3,              # cap on hook sub-conversation steps
+)
+```
+
+The hook agent receives the event JSON and must reply with:
+
+```json
+{"decision": "allow" | "deny", "reason": "<short explanation>"}
+```
+
+Anything else (non-JSON, missing field, sub-conversation error) defaults to
+`allow` so a broken hook cannot wedge the main agent.
diff --git a/examples/01_standalone_sdk/51_agent_hooks/main.py b/examples/01_standalone_sdk/51_agent_hooks/main.py
new file mode 100644
index 0000000000..7820c4fde3
--- /dev/null
+++ b/examples/01_standalone_sdk/51_agent_hooks/main.py
@@ -0,0 +1,204 @@
+"""OpenHands Agent SDK — Agent-based Hooks Example
+
+Demonstrates the `type="agent"` hook, which evaluates lifecycle events with an
+LLM-driven sub-agent instead of a shell script. The hook agent receives the
+event JSON, reasons about it semantically, and replies with a decision payload:
+
+    {"decision": "allow" | "deny", "reason": "..."}
+
+Two demos:
+
+- PreToolUse (security reviewer): inspects the INTENT of a terminal command,
+  not just its syntax. A command like `awk '{print}' /etc/passwd` would slip
+  past a blacklist of command names, but the agent hook recognises it as
+  reading a sensitive system file and denies it.
+- Stop (quality reviewer): refuses to let the main agent finish until the
+  required deliverable exists in the workspace.
+
+For shell-command hooks see ../33_hooks/33_hooks.py.
+"""
+
+import os
+import tempfile
+from pathlib import Path
+
+from pydantic import SecretStr
+
+from openhands.sdk import LLM, Conversation
+from openhands.sdk.conversation.state import ConversationExecutionStatus
+from openhands.sdk.event.hook_execution import HookExecutionEvent
+from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher, HookType
+from openhands.tools.preset.default import get_default_agent
+
+
+# Keep the demo conversations short: a small per-run iteration cap means a hook
+# that keeps denying (or a model that keeps retrying) fails fast instead of
+# burning calls up to the default 500-iteration limit / CI subprocess timeout.
+MAX_ITERATIONS = 10
+
+
+# Configure LLM
+api_key = os.getenv("LLM_API_KEY")
+assert api_key is not None, "LLM_API_KEY environment variable is not set."
+model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
+base_url = os.getenv("LLM_BASE_URL")
+
+llm = LLM(
+    usage_id="agent",
+    model=model,
+    base_url=base_url,
+    api_key=SecretStr(api_key),
+)
+
+SECURITY_REVIEWER_PROMPT = """You are a security reviewer for a coding agent.
+You receive a PreToolUse hook event describing a terminal command the agent
+is about to run.
+
+Reason about the SEMANTIC INTENT of the command, not just its syntax. Block:
+- reading sensitive system files (/etc/passwd, /etc/shadow, ~/.ssh/*, credentials)
+- broad filesystem discovery outside the workspace (rg --files /etc, find /, ls /root)
+- exfiltrating data over the network
+- modifying system state (sudo, apt install, systemctl)
+
+Allow ordinary inspection, builds, tests, and edits inside the workspace.
+When unsure, prefer allow.
+
+Reply with a single JSON object and nothing else:
+{"decision": "allow" | "deny", "reason": "<short explanation>"}
+"""
+
+QUALITY_REVIEWER_PROMPT = """You are a quality reviewer enforcing task completion.
+You receive a Stop hook event when the main agent tries to finish.
+
+The task requires the file REPORT.md to exist in the workspace and contain at
+least one bullet point describing the repository. Use the file_editor tool to
+check whether the file exists and inspect its contents.
+
+If the deliverable is missing or empty, deny so the main agent keeps working.
+Otherwise allow.
+
+Reply with a single JSON object and nothing else:
+{"decision": "allow" | "deny", "reason": "<short explanation>"}
+"""
+
+
+def hook_logger(event) -> None:
+    """Surface each hook decision so the demo output is self-explanatory."""
+    if not isinstance(event, HookExecutionEvent):
+        return
+    status = "DENY " if event.blocked else ("ALLOW" if event.success else "FAIL ")
+    line = f"  [hook] {event.hook_event_type} {status} -> {event.hook_command}"
+    if event.reason:
+        line += f"\n         reason: {event.reason}"
+    print(line)
+
+
+def run_demo(workspace: Path, hook_config: HookConfig, message: str) -> float:
+    """Run one demo in its own conversation and return its cost.
+
+    Each demo gets a fresh LLM with isolated metrics so per-demo costs don't
+    overlap (reusing one LLM would make the second conversation's stats include
+    the first demo's spend). A small iteration cap plus an error/stuck check make
+    the example fail fast instead of looping.
+    """
+    demo_llm = llm.model_copy()
+    demo_llm.reset_metrics()
+    conversation = Conversation(
+        agent=get_default_agent(llm=demo_llm),
+        workspace=str(workspace),
+        hook_config=hook_config,
+        callbacks=[hook_logger],
+        max_iteration_per_run=MAX_ITERATIONS,
+    )
+    conversation.send_message(message)
+    conversation.run()
+    status = conversation.state.execution_status
+    if status in (
+        ConversationExecutionStatus.ERROR,
+        ConversationExecutionStatus.STUCK,
+    ):
+        raise RuntimeError(
+            f"Demo conversation ended in {status.value} state "
+            "before reaching a decision."
+        )
+    return conversation.conversation_stats.get_combined_metrics().accumulated_cost
+
+
+# Each demo runs in its own conversation with only the hook it needs. Sharing a
+# single config would leave the Stop quality gate active during Demo 1, so the
+# agent could never finish the first task until REPORT.md existed — coupling two
+# unrelated demos and burning iterations.
+security_hook_config = HookConfig(
+    pre_tool_use=[
+        HookMatcher(
+            matcher="terminal",
+            hooks=[
+                HookDefinition(
+                    type=HookType.AGENT,
+                    name="security-reviewer",
+                    system_prompt=SECURITY_REVIEWER_PROMPT,
+                    timeout=60,
+                    max_iterations=3,
+                )
+            ],
+        )
+    ],
+)
+
+quality_hook_config = HookConfig(
+    stop=[
+        HookMatcher(
+            hooks=[
+                HookDefinition(
+                    type=HookType.AGENT,
+                    name="quality-reviewer",
+                    system_prompt=QUALITY_REVIEWER_PROMPT,
+                    tools=["file_editor"],
+                    timeout=90,
+                    max_iterations=5,
+                )
+            ],
+        )
+    ],
+)
+
+
+with tempfile.TemporaryDirectory() as tmpdir:
+    workspace = Path(tmpdir)
+    total_cost = 0.0
+
+    print("=" * 60)
+    print("Demo 1: PreToolUse — semantic deny")
+    print("=" * 60)
+    print(
+        "Asking the agent to read /etc/passwd via awk. The literal command\n"
+        "wouldn't match a syntactic blacklist (no `cat`, no `/etc/shadow`\n"
+        "keyword), but the security-reviewer agent should recognise the\n"
+        "intent and deny.\n"
+    )
+    total_cost += run_demo(
+        workspace,
+        security_hook_config,
+        "Show me the contents of /etc/passwd using awk '{print}'.",
+    )
+
+    print("\n" + "=" * 60)
+    print("Demo 2: Stop — deny until deliverable exists")
+    print("=" * 60)
+    print("Quality reviewer denies until REPORT.md exists with a bullet point.\n")
+    total_cost += run_demo(
+        workspace,
+        quality_hook_config,
+        "Write REPORT.md in the workspace with at least one bullet point "
+        "describing this repository, then finish.",
+    )
+
+    report = workspace / "REPORT.md"
+    if report.exists():
+        print(f"\n[REPORT.md preview: {report.read_text()[:120]!r}...]")
+
+    print("\n" + "=" * 60)
+    print("Example Complete!")
+    print("=" * 60)
+
+    print(f"\nEXAMPLE_COST: {total_cost}")
diff --git a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
index a6bb1ec60e..46f6a8ef6a 100644
--- a/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
+++ b/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py
@@ -559,12 +559,23 @@ def _ensure_plugins_loaded(self) -> None:
         if final_hook_config is not None:
             # Store final hook_config in state for observability
             self._state.hook_config = final_hook_config
+            hook_persistence_dir = (
+                str(Path(self._state.persistence_dir).parent)
+                if self._state.persistence_dir is not None
+                else None
+            )
 
             self._hook_processor, self._on_event = create_hook_callback(
                 hook_config=final_hook_config,
                 working_dir=str(self.workspace.working_dir),
                 session_id=str(self._state.id),
                 original_callback=self._base_callback,
+                # Resolve lazily: switch_llm()/switch_profile() rebind self.agent,
+                # so agent hooks must read the current LLM at execution time.
+                llm_getter=lambda: self.agent.llm,
+                persistence_dir=hook_persistence_dir,
+                visualizer=self._visualizer,
+                conversation_stats=self._state.stats,
             )
             self._hook_processor.set_conversation_state(self._state)
             self._hook_processor.run_session_start()
diff --git a/openhands-sdk/openhands/sdk/hooks/config.py b/openhands-sdk/openhands/sdk/hooks/config.py
index 594597cf8b..2398fe3e5d 100644
--- a/openhands-sdk/openhands/sdk/hooks/config.py
+++ b/openhands-sdk/openhands/sdk/hooks/config.py
@@ -41,40 +41,74 @@ class HookType(StrEnum):
 
     COMMAND = "command"  # Shell command executed via subprocess
     PROMPT = "prompt"  # LLM-based evaluation (future)
+    AGENT = "agent"  # Agent-based evaluation with tool access
 
 
 class HookDefinition(BaseModel):
     """A single hook definition."""
 
     type: HookType = HookType.COMMAND
-    command: str
+    name: str | None = None
+    # `command` is kept a non-nullable string that is always present in the
+    # serialized output and reported as required in the JSON schema (see
+    # __get_pydantic_json_schema__). This preserves the published REST response
+    # contract for ConversationInfo.hook_config: making it optional/nullable
+    # would be flagged as a breaking change by the oasdiff REST API check.
+    # Command-less hook types (PROMPT/AGENT) simply leave it as "".
+    command: str = ""
     prompt: str | None = None
+    system_prompt: str | None = None
+    tools: list[str] = Field(default_factory=list)
     timeout: int = 60
+    max_iterations: int = 3
     async_: bool = Field(default=False, alias="async")  # 'async' is a reserved keyword
 
     model_config = {
         "populate_by_name": True,  # Allow both 'async' and 'async_' in input
     }
 
-    @model_validator(mode="before")
     @classmethod
-    def _set_command_for_prompt_hooks(cls, data: Any) -> Any:
-        if (
-            isinstance(data, dict)
-            and data.get("type") == "prompt"
-            and "command" not in data
-        ):
-            data["command"] = ""
-        return data
+    def __get_pydantic_json_schema__(cls, core_schema, handler):  # type: ignore[override]
+        # Report `command` as a required, non-defaulted string to keep the
+        # published REST response contract identical to releases where the field
+        # had no default. The runtime default ("") only eases construction of
+        # command-less hook types; it never surfaces as null in responses.
+        json_schema = handler(core_schema)
+        json_schema = handler.resolve_ref_schema(json_schema)
+        command_schema = json_schema.get("properties", {}).get("command")
+        if command_schema is not None:
+            command_schema.pop("default", None)
+        required = json_schema.setdefault("required", [])
+        if "command" not in required:
+            required.append("command")
+        return json_schema
 
     @model_validator(mode="after")
-    def _check_required_fields(self) -> "HookDefinition":
+    def _validate_type_fields(self) -> "HookDefinition":
         if self.type == HookType.COMMAND and not self.command:
             raise ValueError("'command' is required when type is 'command'")
         if self.type == HookType.PROMPT and not self.prompt:
             raise ValueError("'prompt' is required when type is 'prompt'")
+        if self.type == HookType.AGENT and self.command:
+            raise ValueError(
+                "'command' must not be set when type is 'agent'; "
+                "use 'system_prompt' instead"
+            )
+        if self.type == HookType.AGENT and self.async_:
+            raise ValueError("'async' is not supported for agent hooks")
         return self
 
+    @property
+    def display_command(self) -> str:
+        """Human-readable label for this hook used in logs and events."""
+        if self.command:
+            return self.command
+        if self.name is not None:
+            return f"agent-hook:{self.name}"
+        if self.system_prompt:
+            return f"agent-hook:{self.system_prompt[:20]}"
+        return "agent-hook:agent"
+
 
 class HookMatcher(BaseModel):
     """Matches events to hooks based on patterns.
diff --git a/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py b/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py
index 4eaac1c1ac..960fbab589 100644
--- a/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py
+++ b/openhands-sdk/openhands/sdk/hooks/conversation_hooks.py
@@ -3,6 +3,7 @@
 from collections.abc import Callable
 from typing import TYPE_CHECKING, Any
 
+from openhands.sdk.conversation.visualizer import ConversationVisualizerBase
 from openhands.sdk.event import (
     ActionEvent,
     Event,
@@ -19,7 +20,9 @@
 
 
 if TYPE_CHECKING:
+    from openhands.sdk.conversation.conversation_stats import ConversationStats
     from openhands.sdk.conversation.state import ConversationState
+    from openhands.sdk.llm import LLM
 
 logger = get_logger(__name__)
 
@@ -149,7 +152,7 @@ def _handle_pre_tool_use(self, event: ActionEvent) -> None:
         for hook, result in zip(hooks, results, strict=False):
             self._emit_hook_execution_event(
                 hook_event_type=HookEventType.PRE_TOOL_USE,
-                hook_command=hook.command,
+                hook_command=hook.display_command,
                 result=result,
                 tool_name=tool_name,
                 action_id=event.id,
@@ -224,7 +227,7 @@ def _handle_post_tool_use(self, event: ObservationEvent) -> None:
         for hook, result in zip(hooks, results, strict=False):
             self._emit_hook_execution_event(
                 hook_event_type=HookEventType.POST_TOOL_USE,
-                hook_command=hook.command,
+                hook_command=hook.display_command,
                 result=result,
                 tool_name=tool_name,
                 action_id=action_event.id,
@@ -266,7 +269,7 @@ def _handle_user_prompt_submit(self, event: MessageEvent) -> MessageEvent:
         for hook, result in zip(hooks, results, strict=False):
             self._emit_hook_execution_event(
                 hook_event_type=HookEventType.USER_PROMPT_SUBMIT,
-                hook_command=hook.command,
+                hook_command=hook.display_command,
                 result=result,
                 message_id=event.id,
                 hook_input={"message": message},
@@ -328,7 +331,7 @@ def run_session_start(self) -> None:
         for hook, result in zip(hooks, results, strict=False):
             self._emit_hook_execution_event(
                 hook_event_type=HookEventType.SESSION_START,
-                hook_command=hook.command,
+                hook_command=hook.display_command,
                 result=result,
             )
             if result.error:
@@ -342,7 +345,7 @@ def run_session_end(self) -> None:
         for hook, result in zip(hooks, results, strict=False):
             self._emit_hook_execution_event(
                 hook_event_type=HookEventType.SESSION_END,
-                hook_command=hook.command,
+                hook_command=hook.display_command,
                 result=result,
             )
             if result.error:
@@ -360,7 +363,7 @@ def run_stop(self, reason: str | None = None) -> tuple[bool, str | None]:
         for hook, result in zip(hooks, results, strict=False):
             self._emit_hook_execution_event(
                 hook_event_type=HookEventType.STOP,
-                hook_command=hook.command,
+                hook_command=hook.display_command,
                 result=result,
                 hook_input={"reason": reason} if reason else None,
             )
@@ -389,6 +392,13 @@ def create_hook_callback(
     session_id: str | None = None,
     original_callback: Any = None,
     emit_hook_events: bool = True,
+    llm: "LLM | None" = None,
+    llm_getter: "Callable[[], LLM | None] | None" = None,
+    persistence_dir: str | None = None,
+    visualizer: type[ConversationVisualizerBase]
+    | ConversationVisualizerBase
+    | None = None,
+    conversation_stats: "ConversationStats | None" = None,
 ) -> tuple[HookEventProcessor, Any]:
     """Create a hook-enabled event callback. Returns (processor, callback).
 
@@ -399,6 +409,12 @@ def create_hook_callback(
         original_callback: Callback to chain after hook processing.
         emit_hook_events: If True, emit HookExecutionEvent for each hook execution.
             Defaults to True for full observability.
+        llm: LLM instance inherited from the parent conversation, used by agent hooks.
+        llm_getter: Callable returning the conversation's current LLM. Preferred
+            over ``llm`` so agent hooks follow switch_llm()/switch_profile().
+        persistence_dir: Directory used to persist agent hook sub-conversation events.
+        visualizer: Visualizer instance passed to agent hook sub-conversations.
+        conversation_stats: Parent conversation stats that should include hook spend.
 
     Returns:
         Tuple of (HookEventProcessor, callback function).
@@ -407,6 +423,11 @@ def create_hook_callback(
         config=hook_config,
         working_dir=working_dir,
         session_id=session_id,
+        llm=llm,
+        llm_getter=llm_getter,
+        persistence_dir=persistence_dir,
+        visualizer=visualizer,
+        conversation_stats=conversation_stats,
     )
 
     processor = HookEventProcessor(
diff --git a/openhands-sdk/openhands/sdk/hooks/executor.py b/openhands-sdk/openhands/sdk/hooks/executor.py
index 6f3ce897b1..7134adc8de 100644
--- a/openhands-sdk/openhands/sdk/hooks/executor.py
+++ b/openhands-sdk/openhands/sdk/hooks/executor.py
@@ -1,19 +1,30 @@
-"""Hook executor - runs shell commands with JSON I/O."""
+"""Hook executor - runs shell commands and agent evaluations with JSON I/O."""
 
+import contextlib
 import json
 import logging
 import os
 import signal
 import subprocess
 import time
+from collections.abc import Callable
+from typing import TYPE_CHECKING
 
 from pydantic import BaseModel
 
-from openhands.sdk.hooks.config import HookDefinition
+from openhands.sdk.conversation.visualizer import ConversationVisualizerBase
+from openhands.sdk.hooks.config import HookDefinition, HookType
 from openhands.sdk.hooks.types import HookDecision, HookEvent
+from openhands.sdk.observability.laminar import observe
 from openhands.sdk.utils import sanitized_env
 
 
+if TYPE_CHECKING:
+    from openhands.sdk.conversation.base import BaseConversation
+    from openhands.sdk.conversation.conversation_stats import ConversationStats
+    from openhands.sdk.llm import LLM
+
+
 class HookResult(BaseModel):
     """Result from executing a hook.
 
@@ -27,6 +38,11 @@ class HookResult(BaseModel):
       to ``False`` and the error is logged, but the operation still proceeds.
       In particular, exit code ``1`` does **not** block — only ``2`` does.
       Hooks intended to enforce a policy must exit with ``2``.
+
+    For agent / prompt hooks, ``success=True`` means the hook produced a
+    deliberate verdict (parsed ``allow`` or ``deny``). Fall-open paths set
+    ``success=False`` with ``error`` populated, so a "we couldn't decide" allow
+    is detectable as ``decision == ALLOW and not success``.
     """
 
     success: bool = True
@@ -138,15 +154,215 @@ def cleanup_all(self) -> None:
 
 
 class HookExecutor:
-    """Executes hook commands with JSON I/O."""
+    """Executes hook commands and agent evaluations with JSON I/O."""
+
+    _JSON_DECODER = json.JSONDecoder()
 
     def __init__(
         self,
         working_dir: str | None = None,
         async_process_manager: AsyncProcessManager | None = None,
+        llm: "LLM | None" = None,
+        llm_getter: "Callable[[], LLM | None] | None" = None,
+        persistence_dir: str | None = None,
+        visualizer: type[ConversationVisualizerBase]
+        | ConversationVisualizerBase
+        | None = None,
+        conversation_stats: "ConversationStats | None" = None,
     ):
         self.working_dir = working_dir or os.getcwd()
         self.async_process_manager = async_process_manager or AsyncProcessManager()
+        self._llm = llm
+        # Prefer a getter so agent hooks always use the conversation's *current*
+        # LLM: switch_llm()/switch_profile() replace agent.llm after the executor
+        # is built, and a captured instance would go stale.
+        self._llm_getter = llm_getter
+        self.persistence_dir = persistence_dir
+        self.visualizer = visualizer
+        self.conversation_stats = conversation_stats
+
+    @property
+    def llm(self) -> "LLM | None":
+        """The LLM agent hooks should use, resolved live when a getter is set."""
+        if self._llm_getter is not None:
+            return self._llm_getter()
+        return self._llm
+
+    def _fall_open(
+        self,
+        reason: str,
+        *,
+        error: str | None = None,
+    ) -> HookResult:
+        return HookResult(
+            success=False,
+            decision=HookDecision.ALLOW,
+            reason=reason,
+            error=error or reason,
+        )
+
+    @observe(
+        name="hook.execute.agent",
+        ignore_inputs=["self", "hook", "event"],
+        ignore_output=True,
+    )
+    def _execute_agent_hook(
+        self,
+        hook: HookDefinition,
+        event: HookEvent,
+    ) -> HookResult:
+        # Lazy imports to avoid circular dependency:
+        # executor <- manager <- conversation_hooks <- local_conversation -> executor
+        from openhands.sdk.agent import Agent  # type: ignore[attr-defined]
+        from openhands.sdk.conversation.impl.local_conversation import LocalConversation
+        from openhands.sdk.conversation.response_utils import get_agent_final_response
+        from openhands.sdk.tool.spec import Tool
+
+        event_type = (
+            event.event_type
+            if isinstance(event.event_type, str)
+            else event.event_type.value
+        )
+
+        # Resolve the active conversation LLM once (a getter may rebuild it).
+        llm = self.llm
+        if llm is None:
+            logger.warning(
+                f"Agent hook has no LLM configured for event '{event_type}'"
+                " — defaulting to allow"
+            )
+            return self._fall_open("No LLM configured for agent hook")
+
+        hook_llm = llm.model_copy(
+            update={
+                "usage_id": f"agent-hook:{hook.name or 'default'}",
+                "timeout": hook.timeout,
+            }
+        )
+        # Isolate Metrics so hook spend doesn't accrue to the parent's bucket.
+        hook_llm.reset_metrics()
+
+        # Never hand the parent's already-initialized visualizer instance to the
+        # sub-conversation: LocalConversation.__init__ calls initialize() on it,
+        # which would rebind the parent visualizer to the hook's child state. Mirror
+        # the delegate pattern and ask the parent visualizer for a fresh sub-
+        # visualizer (returns None for visualizers that don't support sub-agents).
+        hook_visualizer = self.visualizer
+        if isinstance(self.visualizer, ConversationVisualizerBase):
+            hook_visualizer = self.visualizer.create_sub_visualizer(
+                f"agent-hook:{hook.name or 'default'}"
+            )
+
+        conversation = None
+        try:
+            agent = Agent(
+                llm=hook_llm,
+                tools=[Tool(name=t) for t in hook.tools],
+                include_default_tools=["FinishTool"],
+                system_prompt=hook.system_prompt,
+            )
+            # hook_config=None disables hooks in the sub-conversation (no recursion)
+            conversation = LocalConversation(
+                agent=agent,
+                workspace=self.working_dir,
+                plugins=None,
+                hook_config=None,
+                persistence_dir=self.persistence_dir,
+                visualizer=hook_visualizer,
+                max_iteration_per_run=hook.max_iterations,
+            )
+            conversation.send_message(
+                f"Evaluate this {event_type} hook event and make your decision.\n\n"
+                f"## Hook Event\n```json\n{event.model_dump_json(indent=2)}\n```"
+            )
+            conversation.run()
+            raw = get_agent_final_response(conversation.state.events)
+        except Exception as e:
+            logger.warning(
+                f"Agent hook sub-conversation failed for event '{event_type}'"
+                f" — defaulting to allow: {e}"
+            )
+            return self._fall_open(
+                "Agent hook execution failed — defaulting to allow",
+                error=str(e),
+            )
+        finally:
+            if conversation is not None:
+                self._merge_hook_conversation_stats(conversation)
+                conversation.close()
+
+        return self._parse_decision(raw, event_type)
+
+    def _extract_first_json_object(self, text: str) -> dict | None:
+        # Scan for the first decodable JSON object so prose / ```json fences
+        # around the payload don't defeat parsing.
+        for i, ch in enumerate(text):
+            if ch != "{":
+                continue
+            with contextlib.suppress(json.JSONDecodeError):
+                obj, _ = self._JSON_DECODER.raw_decode(text[i:])
+                if isinstance(obj, dict):
+                    return obj
+        return None
+
+    def _parse_decision(self, raw: str, event_type: str) -> HookResult:
+        if not raw:
+            logger.warning(
+                f"Agent hook produced no final response for event '{event_type}'"
+                " — defaulting to allow"
+            )
+            return self._fall_open(
+                "Agent hook produced no final response — defaulting to allow"
+            )
+
+        data = self._extract_first_json_object(raw)
+        if data is None:
+            logger.warning(
+                f"Agent hook returned no parseable JSON object for event"
+                f" '{event_type}' — defaulting to allow: {repr(raw)[:200]}"
+            )
+            return self._fall_open(
+                "Agent hook returned no parseable JSON — defaulting to allow"
+            )
+
+        decision_str = str(data.get("decision", "")).lower()
+        reason = str(data.get("reason", ""))
+        if decision_str == "deny":
+            return HookResult(
+                success=True,
+                blocked=True,
+                decision=HookDecision.DENY,
+                reason=reason,
+            )
+        if decision_str == "allow":
+            return HookResult(
+                success=True,
+                decision=HookDecision.ALLOW,
+                reason=reason,
+            )
+        # Missing or unknown decision: this is not a deliberate verdict, so it
+        # must be a detectable fall-open (success=False) rather than a silent
+        # allow that masquerades as a real decision.
+        logger.warning(
+            f"Agent hook returned an invalid decision for event '{event_type}'"
+            f" — defaulting to allow: {repr(decision_str)[:200]}"
+        )
+        return self._fall_open(
+            "Agent hook returned an invalid decision — defaulting to allow"
+        )
+
+    def _merge_hook_conversation_stats(self, conversation: "BaseConversation") -> None:
+        if self.conversation_stats is None:
+            return
+
+        child_stats = conversation.conversation_stats
+        for usage_id, metrics in child_stats.usage_to_metrics.items():
+            if usage_id in self.conversation_stats.usage_to_metrics:
+                existing = self.conversation_stats.usage_to_metrics[usage_id]
+                if existing is not metrics:
+                    existing.merge(metrics)
+            else:
+                self.conversation_stats.usage_to_metrics[usage_id] = metrics.deep_copy()
 
     def execute(
         self,
@@ -155,6 +371,22 @@ def execute(
         env: dict[str, str] | None = None,
     ) -> HookResult:
         """Execute a single hook."""
+        if hook.type == HookType.AGENT:
+            return self._execute_agent_hook(hook, event)
+        if hook.type == HookType.PROMPT:
+            event_type = (
+                event.event_type
+                if isinstance(event.event_type, str)
+                else event.event_type.value
+            )
+            logger.warning(
+                f"PROMPT hooks are not yet implemented — defaulting to allow"
+                f" (event_type={event_type})"
+            )
+            return self._fall_open(
+                "PROMPT hooks are not yet implemented — defaulting to allow"
+            )
+
         # Prepare environment
         hook_env = sanitized_env()
         hook_env["OPENHANDS_PROJECT_DIR"] = self.working_dir
@@ -172,6 +404,14 @@ def execute(
         # Cleanup expired async processes before starting new ones
         self.async_process_manager.cleanup_expired()
 
+        command = hook.command
+        if not command:
+            return HookResult(
+                success=False,
+                exit_code=-1,
+                error="'command' is required when type is 'command'",
+            )
+
         # Handle async hooks: fire and forget
         if hook.async_:
             try:
@@ -182,7 +422,7 @@ def execute(
                     start_new_session = False
 
                 process = subprocess.Popen(
-                    hook.command,
+                    command,
                     shell=True,
                     cwd=self.working_dir,
                     env=hook_env,
@@ -203,7 +443,7 @@ def execute(
 
                 # Track for cleanup
                 self.async_process_manager.add_process(process, hook.timeout)
-                logger.debug(f"Started async hook (PID {process.pid}): {hook.command}")
+                logger.debug(f"Started async hook (PID {process.pid}): {command}")
 
                 # Return placeholder success result
                 return HookResult(
@@ -221,7 +461,7 @@ def execute(
         try:
             # Execute the hook command synchronously
             result = subprocess.run(
-                hook.command,
+                command,
                 shell=True,
                 cwd=self.working_dir,
                 env=hook_env,
diff --git a/openhands-sdk/openhands/sdk/hooks/manager.py b/openhands-sdk/openhands/sdk/hooks/manager.py
index af57be2734..923a07cabf 100644
--- a/openhands-sdk/openhands/sdk/hooks/manager.py
+++ b/openhands-sdk/openhands/sdk/hooks/manager.py
@@ -1,13 +1,20 @@
 """Hook manager - orchestrates hook execution within conversations."""
 
 import logging
-from typing import Any
+from collections.abc import Callable
+from typing import TYPE_CHECKING, Any
 
+from openhands.sdk.conversation.visualizer import ConversationVisualizerBase
 from openhands.sdk.hooks.config import HookConfig
 from openhands.sdk.hooks.executor import HookExecutor, HookResult
 from openhands.sdk.hooks.types import HookEvent, HookEventType
 
 
+if TYPE_CHECKING:
+    from openhands.sdk.conversation.conversation_stats import ConversationStats
+    from openhands.sdk.llm import LLM
+
+
 logger = logging.getLogger(__name__)
 
 
@@ -19,9 +26,23 @@ def __init__(
         config: HookConfig | None = None,
         working_dir: str | None = None,
         session_id: str | None = None,
+        llm: "LLM | None" = None,
+        llm_getter: "Callable[[], LLM | None] | None" = None,
+        persistence_dir: str | None = None,
+        visualizer: type[ConversationVisualizerBase]
+        | ConversationVisualizerBase
+        | None = None,
+        conversation_stats: "ConversationStats | None" = None,
     ):
         self.config = config or HookConfig.load(working_dir=working_dir)
-        self.executor = HookExecutor(working_dir=working_dir)
+        self.executor = HookExecutor(
+            working_dir=working_dir,
+            llm=llm,
+            llm_getter=llm_getter,
+            persistence_dir=persistence_dir,
+            visualizer=visualizer,
+            conversation_stats=conversation_stats,
+        )
         self.session_id = session_id
         self.working_dir = working_dir
 
diff --git a/tests/cross/test_check_agent_server_rest_api_breakage.py b/tests/cross/test_check_agent_server_rest_api_breakage.py
index be88df728c..bb0a29514c 100644
--- a/tests/cross/test_check_agent_server_rest_api_breakage.py
+++ b/tests/cross/test_check_agent_server_rest_api_breakage.py
@@ -666,6 +666,22 @@ def test_split_breaking_changes_separates_three_buckets():
             "details": {},
             "text": "added body anyOf member",
         },
+        {
+            # Additive value on the hook discriminator union -> downgraded.
+            "id": "response-property-enum-value-added",
+            "details": {},
+            "text": (
+                "added the new `agent` enum value to the "
+                "`hook_config/anyOf[subschema #1: HookConfig]/stop/items/"
+                "hooks/items/type` response property for the response status `200`"
+            ),
+        },
+        {
+            # Enum value on an ordinary (non-discriminator) property -> breaking.
+            "id": "response-property-enum-value-added",
+            "details": {},
+            "text": "added the new `archived` enum value to the `status` response property",
+        },
         {
             "id": "response-property-removed",
             "details": {},
@@ -688,9 +704,22 @@ def test_split_breaking_changes_separates_three_buckets():
         "response-property-one-of-added",
         "response-body-one-of-added",
         "response-body-any-of-added",
+        "response-property-enum-value-added",
+    }
+    # The hook-discriminator enum addition is downgraded; the unrelated `status`
+    # enum addition and the body change remain breaking.
+    assert {
+        change["text"] for change in additive_oneof if "enum value" in change["text"]
+    } == {
+        "added the new `agent` enum value to the "
+        "`hook_config/anyOf[subschema #1: HookConfig]/stop/items/"
+        "hooks/items/type` response property for the response status `200`"
+    }
+    assert {change["id"] for change in other} == {
+        "response-property-enum-value-added",
+        "response-body-changed",
     }
-    assert len(other) == 1
-    assert other[0]["id"] == "response-body-changed"
+    assert any("`status`" in change["text"] for change in other)
 
 
 def test_main_passes_when_only_additive_oneof(monkeypatch, capsys):
@@ -723,7 +752,7 @@ def test_main_passes_when_only_additive_oneof(monkeypatch, capsys):
     assert _prod.main() == 0
 
     captured = capsys.readouterr()
-    assert "Additive oneOf/anyOf expansion detected" in captured.out
+    assert "Additive oneOf/anyOf expansion or enum-value additions" in captured.out
     assert "additive response oneOf expansions" in captured.out
 
 
@@ -790,7 +819,7 @@ def test_main_passes_when_body_union_addition_reports_removed_properties(
     assert _prod.main() == 0
 
     captured = capsys.readouterr()
-    assert "Additive oneOf/anyOf expansion detected" in captured.out
+    assert "Additive oneOf/anyOf expansion or enum-value additions" in captured.out
     assert "ignored 3 request/response-property removal artifact" in captured.out
     assert "ignored 1 request/response type-change artifact" in captured.out
 
@@ -878,7 +907,7 @@ def test_main_fails_when_additive_oneof_mixed_with_real_breakage(monkeypatch, ca
     assert _prod.main() == 1
 
     captured = capsys.readouterr()
-    assert "Additive oneOf/anyOf expansion detected" in captured.out
+    assert "Additive oneOf/anyOf expansion or enum-value additions" in captured.out
     assert "other than removing previously-deprecated operations" in captured.out
 
 
diff --git a/tests/examples/test_examples.py b/tests/examples/test_examples.py
index 8399aaabf7..82b94fc46b 100644
--- a/tests/examples/test_examples.py
+++ b/tests/examples/test_examples.py
@@ -28,6 +28,7 @@
     # These examples live under subdirectories (each with a single `main.py`).
     EXAMPLES_ROOT / "01_standalone_sdk" / "33_hooks",
     EXAMPLES_ROOT / "01_standalone_sdk" / "37_llm_profile_store",
+    EXAMPLES_ROOT / "01_standalone_sdk" / "51_agent_hooks",
     EXAMPLES_ROOT / "01_standalone_sdk" / "43_mixed_marketplace_skills",
     EXAMPLES_ROOT / "02_remote_agent_server" / "06_custom_tool",
     EXAMPLES_ROOT / "05_skills_and_plugins" / "01_loading_agentskills",
@@ -97,6 +98,9 @@ def test_directory_example_is_discovered() -> None:
     assert (
         EXAMPLES_ROOT / "02_remote_agent_server" / "06_custom_tool" / "main.py"
     ) in EXAMPLES
+    assert (
+        EXAMPLES_ROOT / "01_standalone_sdk" / "51_agent_hooks" / "main.py"
+    ) in EXAMPLES
 
 
 @pytest.mark.parametrize("example_path", EXAMPLES, ids=_normalize_path)
diff --git a/tests/sdk/conversation/test_local_conversation_plugins.py b/tests/sdk/conversation/test_local_conversation_plugins.py
index c58a74c6b9..cfa7b066de 100644
--- a/tests/sdk/conversation/test_local_conversation_plugins.py
+++ b/tests/sdk/conversation/test_local_conversation_plugins.py
@@ -2,6 +2,7 @@
 
 import json
 from pathlib import Path
+from unittest.mock import MagicMock, patch
 
 import pytest
 from pydantic import SecretStr
@@ -188,6 +189,45 @@ def test_plugin_hooks_combined_with_explicit_hooks(
         # (The actual hook_processor is internal, but we trust the merging works)
         conversation.close()
 
+    def test_hook_sub_conversations_receive_persistence_base_dir(
+        self, tmp_path: Path, basic_agent
+    ):
+        """Agent hook persistence should not nest under the parent conversation id."""
+        workspace = tmp_path / "workspace"
+        workspace.mkdir()
+        persistence_base = tmp_path / "state"
+        hook_config = HookConfig(
+            pre_tool_use=[
+                HookMatcher(matcher="*", hooks=[HookDefinition(command="echo test")])
+            ]
+        )
+
+        processor = MagicMock()
+        processor.on_event = MagicMock()
+        processor.set_conversation_state = MagicMock()
+        processor.run_session_start = MagicMock()
+
+        conversation = LocalConversation(
+            agent=basic_agent,
+            workspace=workspace,
+            persistence_dir=persistence_base,
+            hook_config=hook_config,
+            visualizer=None,
+        )
+
+        with patch(
+            "openhands.sdk.conversation.impl.local_conversation.create_hook_callback",
+            return_value=(processor, processor.on_event),
+        ) as mock_create_hook_callback:
+            conversation._ensure_plugins_loaded()
+
+        assert conversation.state.persistence_dir is not None
+        assert Path(conversation.state.persistence_dir).parent == persistence_base
+        assert mock_create_hook_callback.call_args.kwargs["persistence_dir"] == str(
+            persistence_base
+        )
+        conversation.close()
+
     def test_plugins_not_loaded_until_needed(self, tmp_path: Path, basic_agent):
         """Test that plugins are not loaded in constructor (lazy loading)."""
         plugin_dir = create_test_plugin(
diff --git a/tests/sdk/hooks/test_config.py b/tests/sdk/hooks/test_config.py
index f5a1c832e4..64865d968d 100644
--- a/tests/sdk/hooks/test_config.py
+++ b/tests/sdk/hooks/test_config.py
@@ -10,6 +10,124 @@
 from openhands.sdk.hooks.types import HookEventType
 
 
+def test_command_hook_requires_command():
+    with pytest.raises(ValidationError, match="'command' is required"):
+        HookDefinition(type=HookType.COMMAND)
+
+
+def test_command_hook_valid():
+    h = HookDefinition(command="echo hi")
+    assert h.type == HookType.COMMAND
+    assert h.command == "echo hi"
+
+
+@pytest.mark.parametrize(
+    "kwargs",
+    [
+        {"type": "agent", "system_prompt": "Block writes to /etc"},
+        {"type": "agent"},
+    ],
+    ids=["with-system-prompt", "without-system-prompt"],
+)
+def test_agent_hook_valid(kwargs):
+    h = HookDefinition(**kwargs)
+    assert h.type == HookType.AGENT
+
+
+@pytest.mark.parametrize(
+    "kwargs,expected",
+    [
+        ({"command": "block.sh"}, "block.sh"),
+        (
+            {
+                "type": "agent",
+                "name": "block-deletions",
+                "system_prompt": "Block rm -rf",
+            },
+            "agent-hook:block-deletions",
+        ),
+        (
+            {"type": "agent", "system_prompt": "Block network calls to external IPs"},
+            "agent-hook:Block network calls ",
+        ),
+        ({"type": "agent", "system_prompt": "A" * 100}, f"agent-hook:{'A' * 20}"),
+        ({"type": "agent"}, "agent-hook:agent"),
+    ],
+    ids=[
+        "command",
+        "agent-named",
+        "agent-prompt-prefix",
+        "agent-prompt-truncated",
+        "agent-fallback",
+    ],
+)
+def test_display_command(kwargs, expected):
+    h = HookDefinition(**kwargs)
+    assert h.display_command == expected
+
+
+def test_multiple_agent_hooks_are_distinguishable():
+    hooks = [
+        HookDefinition(
+            type=HookType.AGENT,
+            name="block-deletions",
+            system_prompt="Block rm -rf",
+        ),
+        HookDefinition(
+            type=HookType.AGENT,
+            system_prompt="Block network calls to external IPs",
+        ),
+        HookDefinition(
+            type=HookType.AGENT,
+            system_prompt="Verify all tasks are complete",
+        ),
+    ]
+    assert len({h.display_command for h in hooks}) == 3
+
+
+@pytest.mark.parametrize(
+    "kwargs,match",
+    [
+        (
+            {"type": "agent", "command": "echo hi"},
+            "'command' must not be set when type is 'agent'",
+        ),
+        ({"type": "command"}, "'command' is required"),
+    ],
+    ids=["agent-rejects-command", "command-requires-command"],
+)
+def test_hook_definition_validation_errors(kwargs, match):
+    with pytest.raises(Exception, match=match):
+        HookDefinition(**kwargs)
+
+
+def test_agent_hook_rejects_async():
+    with pytest.raises(Exception, match="not supported for agent hooks"):
+        HookDefinition.model_validate({"type": "agent", "async": True})
+
+
+def test_agent_hook_from_json():
+    data = {
+        "stop": [
+            {
+                "hooks": [
+                    {
+                        "type": "agent",
+                        "system_prompt": "Verify all tasks are done",
+                        "timeout": 30,
+                    }
+                ]
+            }
+        ]
+    }
+    config = HookConfig.from_dict(data)
+    hooks = config.get_hooks_for_event(HookEventType.STOP)
+    assert len(hooks) == 1
+    assert hooks[0].type == HookType.AGENT
+    assert hooks[0].system_prompt == "Verify all tasks are done"
+    assert hooks[0].timeout == 30
+
+
 class TestHookMatcher:
     """Tests for HookMatcher pattern matching."""
 
diff --git a/tests/sdk/hooks/test_executor.py b/tests/sdk/hooks/test_executor.py
index 26d31d0565..00c5640991 100644
--- a/tests/sdk/hooks/test_executor.py
+++ b/tests/sdk/hooks/test_executor.py
@@ -3,12 +3,17 @@
 import json
 import subprocess
 from unittest import mock
+from unittest.mock import MagicMock, patch
 
 import pytest
+from pydantic import SecretStr
 
-from openhands.sdk.hooks.config import HookDefinition
+from openhands.sdk.conversation.conversation_stats import ConversationStats
+from openhands.sdk.hooks.config import HookDefinition, HookType
 from openhands.sdk.hooks.executor import HookExecutor
 from openhands.sdk.hooks.types import HookDecision, HookEvent, HookEventType
+from openhands.sdk.llm import LLM
+from openhands.sdk.llm.utils.metrics import Metrics
 from tests.command_utils import python_command
 
 
@@ -421,3 +426,469 @@ def test_cleanup_expired_keeps_active_processes(self, tmp_path):
 
         # Clean up for test teardown
         process.terminate()
+
+
+class TestAgentHookExecution:
+    """Tests for HookType.AGENT execution path."""
+
+    # Patch at the package/module level so lazy `from X import Y` picks up the mock.
+    _AGENT_PATH = "openhands.sdk.agent.Agent"
+    _CONV_PATH = "openhands.sdk.conversation.impl.local_conversation.LocalConversation"
+    _RESPONSE_PATH = (
+        "openhands.sdk.conversation.response_utils.get_agent_final_response"
+    )
+
+    @pytest.fixture
+    def mock_llm(self):
+        return LLM(model="gpt-4o", api_key=SecretStr("test-key"), usage_id="test")
+
+    @pytest.fixture
+    def executor(self, tmp_path, mock_llm):
+        return HookExecutor(working_dir=str(tmp_path), llm=mock_llm)
+
+    @pytest.fixture
+    def executor_no_llm(self, tmp_path):
+        return HookExecutor(working_dir=str(tmp_path), llm=None)
+
+    @pytest.fixture
+    def sample_event(self):
+        return HookEvent(
+            event_type=HookEventType.STOP,
+            session_id="test-session",
+        )
+
+    def test_execute_dispatches_to_agent_hook(self, executor, sample_event):
+        """execute() routes AGENT type to _execute_agent_hook, not subprocess."""
+        from openhands.sdk.hooks.executor import HookResult
+
+        hook = HookDefinition(
+            type=HookType.AGENT,
+            system_prompt="Verify task completion",
+        )
+
+        with patch.object(
+            executor,
+            "_execute_agent_hook",
+            return_value=HookResult(
+                success=True, decision=HookDecision.ALLOW, reason="ok"
+            ),
+        ) as mock_agent:
+            result = executor.execute(hook, sample_event)
+            mock_agent.assert_called_once_with(hook, sample_event)
+
+        assert result.decision == HookDecision.ALLOW
+        assert result.should_continue
+
+    def test_no_llm_defaults_to_allow(self, executor_no_llm, sample_event):
+        """Agent hook with no LLM configured defaults to allow without error."""
+        hook = HookDefinition(
+            type=HookType.AGENT,
+            system_prompt="Check something",
+        )
+        result = executor_no_llm.execute(hook, sample_event)
+
+        assert not result.success
+        assert result.decision == HookDecision.ALLOW
+        assert not result.blocked
+
+    def test_deny_decision_blocks(self, executor, sample_event):
+        """_execute_agent_hook parsing deny JSON produces a blocked HookResult."""
+        deny_json = '{"decision": "deny", "reason": "Tasks not complete"}'
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=deny_json),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(
+                type=HookType.AGENT,
+                system_prompt="Check tasks",
+            )
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert result.blocked
+        assert result.decision == HookDecision.DENY
+        assert result.reason == "Tasks not complete"
+        assert not result.should_continue
+
+    def test_allow_decision_passes(self, executor, sample_event):
+        """_execute_agent_hook parsing allow JSON produces a non-blocking HookResult."""
+        allow_json = '{"decision": "allow", "reason": "Looks safe"}'
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=allow_json),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert not result.blocked
+        assert result.decision == HookDecision.ALLOW
+        assert result.reason == "Looks safe"
+
+    @pytest.mark.parametrize(
+        "payload",
+        [
+            '{"reason": "no decision field"}',
+            '{"decision": "maybe", "reason": "not allow or deny"}',
+            '{"decision": "", "reason": "empty decision"}',
+        ],
+    )
+    def test_invalid_decision_falls_open(self, executor, sample_event, payload):
+        """Missing/unknown decision is a fall-open, not a deliberate allow."""
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=payload),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert not result.blocked
+        assert result.decision == HookDecision.ALLOW
+        assert result.should_continue
+        assert result.success is False
+        assert result.error is not None
+
+    def test_markdown_wrapped_deny_is_parsed(self, executor, sample_event):
+        """```json fenced JSON is honoured, not treated as non-JSON."""
+        fenced = '```json\n{"decision": "deny", "reason": "Sensitive file read"}\n```'
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=fenced),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert result.blocked
+        assert result.decision == HookDecision.DENY
+        assert result.reason == "Sensitive file read"
+
+    def test_plain_fence_without_language_tag_is_parsed(self, executor, sample_event):
+        """Some LLMs use ``` without a language tag — still extract the JSON."""
+        fenced = '```\n{"decision": "allow", "reason": "ok"}\n```'
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=fenced),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert not result.blocked
+        assert result.decision == HookDecision.ALLOW
+        assert result.reason == "ok"
+
+    def test_prose_prefix_before_json_is_parsed(self, executor, sample_event):
+        """Prose before the JSON object must not defeat the parser."""
+        prose_then_json = (
+            "After reviewing the workspace I found REPORT.md is missing.\n\n"
+            '{"decision": "deny", "reason": "missing deliverable"}'
+        )
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=prose_then_json),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert result.blocked
+        assert result.decision == HookDecision.DENY
+        assert result.reason == "missing deliverable"
+
+    def test_prose_suffix_after_json_is_parsed(self, executor, sample_event):
+        """Trailing chatter after the JSON object must not defeat the parser."""
+        json_then_prose = (
+            '{"decision": "deny", "reason": "sensitive file"}\n\n'
+            "Let me know if you need more details."
+        )
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=json_then_prose),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert result.blocked
+        assert result.decision == HookDecision.DENY
+        assert result.reason == "sensitive file"
+
+    def test_invalid_json_defaults_to_allow(self, executor, sample_event):
+        """Non-JSON response falls open: ALLOW + success=False + error set."""
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value="I think you should allow this."),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert not result.blocked
+        assert result.decision == HookDecision.ALLOW
+        assert result.should_continue
+        assert result.success is False
+        assert result.error is not None
+
+    def test_sub_conversation_failure_defaults_to_allow(self, executor, sample_event):
+        """_execute_agent_hook when sub-conversation raises defaults to allow."""
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH, side_effect=RuntimeError("workspace error")),
+        ):
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert not result.blocked
+        assert result.decision == HookDecision.ALLOW
+        assert result.error is not None
+
+    def test_empty_response_defaults_to_allow(self, executor, sample_event):
+        """_execute_agent_hook when agent produces no response defaults to allow."""
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH) as mock_conv_cls,
+            patch(self._RESPONSE_PATH, return_value=""),
+        ):
+            mock_conv_cls.return_value = MagicMock()
+            hook = HookDefinition(type=HookType.AGENT)
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert not result.blocked
+        assert result.decision == HookDecision.ALLOW
+        assert result.success is False
+
+    def test_timeout_propagated_to_hook_llm(self, executor, sample_event):
+        """hook.timeout is forwarded to the copied LLM, parent's timeout untouched."""
+        parent_timeout = executor.llm.timeout
+        captured_llm = {}
+
+        def capture_agent_init(**kwargs):
+            captured_llm["llm"] = kwargs.get("llm")
+            return MagicMock()
+
+        with (
+            patch(self._AGENT_PATH, side_effect=capture_agent_init),
+            patch(self._CONV_PATH, side_effect=RuntimeError("stop early")),
+        ):
+            hook = HookDefinition(type=HookType.AGENT, timeout=7)
+            executor._execute_agent_hook(hook, sample_event)
+
+        hook_llm = captured_llm.get("llm")
+        assert hook_llm is not None
+        assert hook_llm.timeout == 7
+        assert executor.llm.timeout == parent_timeout
+
+    def test_hook_metrics_under_usage_id(self, executor, sample_event):
+        """Hook LLM uses per-hook usage_id and an isolated Metrics object."""
+        parent_metrics = executor.llm.metrics
+
+        captured_llm = {}
+
+        def capture_agent_init(**kwargs):
+            captured_llm["llm"] = kwargs.get("llm")
+            return MagicMock()
+
+        with (
+            patch(self._AGENT_PATH, side_effect=capture_agent_init),
+            patch(self._CONV_PATH, side_effect=RuntimeError("stop early")),
+        ):
+            hook = HookDefinition(type=HookType.AGENT)
+            executor._execute_agent_hook(hook, sample_event)
+
+        hook_llm = captured_llm.get("llm")
+        assert hook_llm is not None
+        assert hook_llm is not executor.llm
+        assert hook_llm.usage_id == "agent-hook:default"
+        assert hook_llm.metrics is not parent_metrics
+
+    def test_llm_getter_is_resolved_live(self, tmp_path, sample_event):
+        """An llm_getter is read at execution time, so agent hooks follow
+        switch_llm()/switch_profile() instead of using a stale captured LLM."""
+        current = {
+            "llm": LLM(model="gpt-4o", api_key=SecretStr("k1"), usage_id="first")
+        }
+        executor = HookExecutor(
+            working_dir=str(tmp_path),
+            llm_getter=lambda: current["llm"],
+        )
+
+        # Simulate switch_llm(): the conversation rebinds its agent's LLM.
+        current["llm"] = LLM(
+            model="gpt-5.5", api_key=SecretStr("k2"), usage_id="second"
+        )
+
+        captured_llm = {}
+
+        def capture_agent_init(**kwargs):
+            captured_llm["llm"] = kwargs.get("llm")
+            return MagicMock()
+
+        with (
+            patch(self._AGENT_PATH, side_effect=capture_agent_init),
+            patch(self._CONV_PATH, side_effect=RuntimeError("stop early")),
+        ):
+            executor._execute_agent_hook(
+                HookDefinition(type=HookType.AGENT), sample_event
+            )
+
+        hook_llm = captured_llm.get("llm")
+        assert hook_llm is not None
+        # Copied from the *current* LLM (gpt-5.5), not the one present at init.
+        assert hook_llm.model == "gpt-5.5"
+
+    def test_hook_metrics_are_merged_into_parent_stats(
+        self, tmp_path, mock_llm, sample_event
+    ):
+        """Child agent-hook spend is included in parent conversation stats."""
+        parent_stats = ConversationStats()
+        existing_hook_metrics = Metrics(model_name="gpt-4o")
+        existing_hook_metrics.add_cost(0.25)
+        parent_stats.usage_to_metrics["agent-hook:security-check"] = (
+            existing_hook_metrics
+        )
+
+        child_hook_metrics = Metrics(model_name="gpt-4o")
+        child_hook_metrics.add_cost(0.75)
+        child_stats = ConversationStats(
+            usage_to_metrics={"agent-hook:security-check": child_hook_metrics}
+        )
+        mock_conversation = MagicMock()
+        mock_conversation.conversation_stats = child_stats
+        mock_conversation.state.events = []
+
+        executor = HookExecutor(
+            working_dir=str(tmp_path),
+            llm=mock_llm,
+            conversation_stats=parent_stats,
+        )
+
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH, return_value=mock_conversation),
+            patch(
+                self._RESPONSE_PATH,
+                return_value='{"decision": "allow", "reason": "ok"}',
+            ),
+        ):
+            hook = HookDefinition(type=HookType.AGENT, name="security-check")
+            result = executor._execute_agent_hook(hook, sample_event)
+
+        assert result.decision == HookDecision.ALLOW
+        assert parent_stats.usage_to_metrics[
+            "agent-hook:security-check"
+        ].accumulated_cost == pytest.approx(1.0)
+        assert parent_stats.get_combined_metrics().accumulated_cost == pytest.approx(
+            1.0
+        )
+
+    def test_hook_usage_id_uses_hook_name(self, executor, sample_event):
+        """A named hook gets its own usage_id bucket: agent-hook:<name>."""
+        captured_llm = {}
+
+        def capture_agent_init(**kwargs):
+            captured_llm["llm"] = kwargs.get("llm")
+            return MagicMock()
+
+        with (
+            patch(self._AGENT_PATH, side_effect=capture_agent_init),
+            patch(self._CONV_PATH, side_effect=RuntimeError("stop early")),
+        ):
+            hook = HookDefinition(type=HookType.AGENT, name="security-check")
+            executor._execute_agent_hook(hook, sample_event)
+
+        hook_llm = captured_llm.get("llm")
+        assert hook_llm is not None
+        assert hook_llm.usage_id == "agent-hook:security-check"
+
+    def test_parent_visualizer_instance_is_not_rebound(self, tmp_path, mock_llm):
+        """The parent visualizer instance must never be handed to the hook conv.
+
+        LocalConversation.initialize() rebinds a visualizer instance to its own
+        state, so the executor must request a fresh sub-visualizer instead.
+        """
+        from openhands.sdk.conversation.visualizer import ConversationVisualizerBase
+
+        sub_viz = MagicMock(spec=ConversationVisualizerBase)
+
+        class _Viz(ConversationVisualizerBase):
+            def on_event(self, event):  # pragma: no cover - not exercised
+                pass
+
+            def create_sub_visualizer(self, agent_id):
+                self.requested_agent_id = agent_id
+                return sub_viz
+
+        parent_viz = _Viz()
+        executor = HookExecutor(
+            working_dir=str(tmp_path),
+            llm=mock_llm,
+            visualizer=parent_viz,
+        )
+
+        captured = {}
+
+        def capture_conv_init(**kwargs):
+            captured["visualizer"] = kwargs.get("visualizer")
+            raise RuntimeError("stop early")
+
+        sample_event = HookEvent(
+            event_type=HookEventType.PRE_TOOL_USE, tool_name="BashTool"
+        )
+        with (
+            patch(self._AGENT_PATH),
+            patch(self._CONV_PATH, side_effect=capture_conv_init),
+        ):
+            hook = HookDefinition(type=HookType.AGENT, name="security-check")
+            executor._execute_agent_hook(hook, sample_event)
+
+        assert captured["visualizer"] is sub_viz
+        assert captured["visualizer"] is not parent_viz
+        assert parent_viz.requested_agent_id == "agent-hook:security-check"
+
+
+class TestPromptHookNotImplemented:
+    """HookType.PROMPT is a future stub — execution defaults to allow, never crashes."""
+
+    @pytest.fixture
+    def executor(self, tmp_path):
+        return HookExecutor(working_dir=str(tmp_path))
+
+    @pytest.fixture
+    def sample_event(self):
+        return HookEvent(event_type=HookEventType.PRE_TOOL_USE, tool_name="BashTool")
+
+    def test_prompt_hook_defaults_to_allow(self, executor, sample_event):
+        """Executing a PROMPT hook returns allow instead of crashing."""
+        hook = HookDefinition(type=HookType.PROMPT, prompt="evaluate this event")
+        result = executor.execute(hook, sample_event)
+        assert result.decision == HookDecision.ALLOW
+        assert result.success is False
+        assert "not yet implemented" in (result.reason or "")
+
+    def test_prompt_hook_does_not_block(self, executor, sample_event):
+        """PROMPT hook must not block the operation while unimplemented."""
+        hook = HookDefinition(type=HookType.PROMPT, prompt="evaluate this event")
+        result = executor.execute(hook, sample_event)
+        assert result.blocked is False
+        assert result.should_continue is True
+
+    def test_prompt_hook_without_command_validates(self):
+        """PROMPT hook with no command is valid at config time (future use)."""
+        hook = HookDefinition(type=HookType.PROMPT, prompt="evaluate this event")
+        assert hook.command == ""