Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 39 additions & 4 deletions .github/scripts/check_agent_server_rest_api_breakage.py
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,38 @@ def _validate_removed_schema_properties(
)


# oasdiff rule IDs for enum-value additions in response schemas.
_RESPONSE_ENUM_VALUE_ADDED_IDS = frozenset(
{
"response-property-enum-value-added",
"response-write-only-property-enum-value-added",
}
)

# Response properties that are known extensible discriminated-union discriminators
# and may therefore grow new enum values additively. Adding a HookType value
# (e.g. "agent") to a hook definition's `type` is safe because hook configs are an
# extensible union and clients must tolerate unknown discriminator values. This is
# intentionally scoped to the hook discriminator so an ordinary new response enum
# value elsewhere (a new status/mode/etc.) is still treated as a breaking change.
_EXTENSIBLE_DISCRIMINATOR_PROPERTY_RE = re.compile(
r"HookConfig\b.*\bhooks/items/type\b"
)


def _is_additive_discriminator_enum_value(change: dict) -> bool:
"""Return True for additive enum values on a known extensible discriminator.

Adding a value to a response enum is normally breaking (generated clients may
treat the enum exhaustively), so this is scoped narrowly to the hook config
discriminator union rather than allowlisting every response enum addition.
"""
if str(change.get("id", "")) not in _RESPONSE_ENUM_VALUE_ADDED_IDS:
return False
text = str(change.get("text", ""))
return bool(_EXTENSIBLE_DISCRIMINATOR_PROPERTY_RE.search(text))


def _is_union_property_removal_artifact(change: dict) -> bool:
"""Return True for property removals that are artifacts of union widening.

Expand Down Expand Up @@ -606,7 +638,9 @@ def _split_breaking_changes(
removed_schema_properties.append(change)
continue

if change_id in _ADDITIVE_RESPONSE_ONEOF_IDS:
if change_id in _ADDITIVE_RESPONSE_ONEOF_IDS or (
_is_additive_discriminator_enum_value(change)
):
additive_response_oneof.append(change)
continue

Expand Down Expand Up @@ -799,9 +833,10 @@ def main() -> int:
if additive_response_oneof:
print(
f"\n::notice title={PYPI_DISTRIBUTION} REST API::"
"Additive oneOf/anyOf expansion detected in response schemas. "
"This is expected for extensible discriminated-union APIs and "
"does not break backward compatibility."
"Additive oneOf/anyOf expansion or enum-value additions detected "
"in response schemas. This is expected for extensible "
"discriminated-union APIs and does not break backward "
"compatibility."
)
for item in additive_response_oneof:
print(f" - {item.get('text', str(item))}")
Expand Down
60 changes: 60 additions & 0 deletions examples/01_standalone_sdk/51_agent_hooks/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Agent-based Hooks Example

This folder demonstrates the `type="agent"` hook — a lifecycle hook whose
decision is produced by an LLM-driven sub-agent rather than a shell script.

For shell-command hooks see [`../33_hooks/`](../33_hooks).

## Why an agent hook

A shell-based PreToolUse hook can only block what its blacklist literally
matches. The agent rewrites `cat /etc/passwd` as `awk '{print}' /etc/passwd`
and slips through. An agent hook reasons about the **semantic intent** of the
command — "reading a sensitive system file" — and denies regardless of the
exact tool name used.

## Example

- **main.py** — Two agent hooks, each in its own conversation:
- **PreToolUse** "security reviewer" denies a command whose intent is to
read `/etc/passwd`, even though no obvious keyword appears in a blacklist.
- **Stop** "quality reviewer" refuses to let the main agent finish until
the required deliverable (`REPORT.md`) is present in the workspace.

Each hook decision is printed to the console via a `HookExecutionEvent`
callback, so you can watch the allow/deny outcomes as the demo runs.

## Running

```bash
export LLM_API_KEY="your-key"
export LLM_MODEL="anthropic/claude-sonnet-4-5-20250929" # optional
export LLM_BASE_URL="https://your-endpoint" # optional

python main.py
```

## How an agent hook is configured

```python
HookDefinition(
type=HookType.AGENT,
name="security-reviewer", # bucket for cost metrics (agent-hook:<name>)
system_prompt="...", # instructs the hook agent; must request JSON
tools=["file_editor"], # optional tools the hook agent may use
# (use registered names, e.g. "file_editor",
# "terminal" — not class names like
# "FileEditorTool")
timeout=60, # forwarded to the per-hook LLM copy
max_iterations=3, # cap on hook sub-conversation steps
)
```

The hook agent receives the event JSON and must reply with:

```json
{"decision": "allow" | "deny", "reason": "<short explanation>"}
```

Anything else (non-JSON, missing field, sub-conversation error) defaults to
`allow` so a broken hook cannot wedge the main agent.
204 changes: 204 additions & 0 deletions examples/01_standalone_sdk/51_agent_hooks/main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,204 @@
"""OpenHands Agent SDK — Agent-based Hooks Example

Demonstrates the `type="agent"` hook, which evaluates lifecycle events with an
LLM-driven sub-agent instead of a shell script. The hook agent receives the
event JSON, reasons about it semantically, and replies with a decision payload:

{"decision": "allow" | "deny", "reason": "..."}

Two demos:

- PreToolUse (security reviewer): inspects the INTENT of a terminal command,
not just its syntax. A command like `awk '{print}' /etc/passwd` would slip
past a blacklist of command names, but the agent hook recognises it as
reading a sensitive system file and denies it.
- Stop (quality reviewer): refuses to let the main agent finish until the
required deliverable exists in the workspace.

For shell-command hooks see ../33_hooks/33_hooks.py.
"""

import os
import tempfile
from pathlib import Path

from pydantic import SecretStr

from openhands.sdk import LLM, Conversation
from openhands.sdk.conversation.state import ConversationExecutionStatus
from openhands.sdk.event.hook_execution import HookExecutionEvent
from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher, HookType
from openhands.tools.preset.default import get_default_agent


# Keep the demo conversations short: a small per-run iteration cap means a hook
# that keeps denying (or a model that keeps retrying) fails fast instead of
# burning calls up to the default 500-iteration limit / CI subprocess timeout.
MAX_ITERATIONS = 10


# Configure LLM
api_key = os.getenv("LLM_API_KEY")
assert api_key is not None, "LLM_API_KEY environment variable is not set."
model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929")
base_url = os.getenv("LLM_BASE_URL")

llm = LLM(
usage_id="agent",
model=model,
base_url=base_url,
api_key=SecretStr(api_key),
)

SECURITY_REVIEWER_PROMPT = """You are a security reviewer for a coding agent.
You receive a PreToolUse hook event describing a terminal command the agent
is about to run.

Reason about the SEMANTIC INTENT of the command, not just its syntax. Block:
- reading sensitive system files (/etc/passwd, /etc/shadow, ~/.ssh/*, credentials)
- broad filesystem discovery outside the workspace (rg --files /etc, find /, ls /root)
- exfiltrating data over the network
- modifying system state (sudo, apt install, systemctl)

Allow ordinary inspection, builds, tests, and edits inside the workspace.
When unsure, prefer allow.

Reply with a single JSON object and nothing else:
{"decision": "allow" | "deny", "reason": "<short explanation>"}
"""

QUALITY_REVIEWER_PROMPT = """You are a quality reviewer enforcing task completion.
You receive a Stop hook event when the main agent tries to finish.

The task requires the file REPORT.md to exist in the workspace and contain at
least one bullet point describing the repository. Use the file_editor tool to
check whether the file exists and inspect its contents.

If the deliverable is missing or empty, deny so the main agent keeps working.
Otherwise allow.

Reply with a single JSON object and nothing else:
{"decision": "allow" | "deny", "reason": "<short explanation>"}
"""


def hook_logger(event) -> None:
"""Surface each hook decision so the demo output is self-explanatory."""
if not isinstance(event, HookExecutionEvent):
return
status = "DENY " if event.blocked else ("ALLOW" if event.success else "FAIL ")
line = f" [hook] {event.hook_event_type} {status} -> {event.hook_command}"
if event.reason:
line += f"\n reason: {event.reason}"
print(line)


def run_demo(workspace: Path, hook_config: HookConfig, message: str) -> float:
"""Run one demo in its own conversation and return its cost.

Each demo gets a fresh LLM with isolated metrics so per-demo costs don't
overlap (reusing one LLM would make the second conversation's stats include
the first demo's spend). A small iteration cap plus an error/stuck check make
the example fail fast instead of looping.
"""
demo_llm = llm.model_copy()
demo_llm.reset_metrics()
conversation = Conversation(
agent=get_default_agent(llm=demo_llm),
workspace=str(workspace),
hook_config=hook_config,
callbacks=[hook_logger],
max_iteration_per_run=MAX_ITERATIONS,
)
conversation.send_message(message)
conversation.run()
status = conversation.state.execution_status
if status in (
ConversationExecutionStatus.ERROR,
ConversationExecutionStatus.STUCK,
):
raise RuntimeError(
f"Demo conversation ended in {status.value} state "
"before reaching a decision."
)
return conversation.conversation_stats.get_combined_metrics().accumulated_cost


# Each demo runs in its own conversation with only the hook it needs. Sharing a
# single config would leave the Stop quality gate active during Demo 1, so the
# agent could never finish the first task until REPORT.md existed — coupling two
# unrelated demos and burning iterations.
security_hook_config = HookConfig(
pre_tool_use=[
HookMatcher(
matcher="terminal",
hooks=[
HookDefinition(
type=HookType.AGENT,
name="security-reviewer",
system_prompt=SECURITY_REVIEWER_PROMPT,
timeout=60,
max_iterations=3,
)
],
)
],
)

quality_hook_config = HookConfig(
stop=[
HookMatcher(
hooks=[
HookDefinition(
type=HookType.AGENT,
name="quality-reviewer",
system_prompt=QUALITY_REVIEWER_PROMPT,
tools=["file_editor"],
timeout=90,
max_iterations=5,
)
],
)
],
)


with tempfile.TemporaryDirectory() as tmpdir:
workspace = Path(tmpdir)
total_cost = 0.0

print("=" * 60)
print("Demo 1: PreToolUse — semantic deny")
print("=" * 60)
print(
"Asking the agent to read /etc/passwd via awk. The literal command\n"
"wouldn't match a syntactic blacklist (no `cat`, no `/etc/shadow`\n"
"keyword), but the security-reviewer agent should recognise the\n"
"intent and deny.\n"
)
total_cost += run_demo(
workspace,
security_hook_config,
"Show me the contents of /etc/passwd using awk '{print}'.",
)

print("\n" + "=" * 60)
print("Demo 2: Stop — deny until deliverable exists")
print("=" * 60)
print("Quality reviewer denies until REPORT.md exists with a bullet point.\n")
total_cost += run_demo(
workspace,
quality_hook_config,
"Write REPORT.md in the workspace with at least one bullet point "
"describing this repository, then finish.",
)

report = workspace / "REPORT.md"
if report.exists():
print(f"\n[REPORT.md preview: {report.read_text()[:120]!r}...]")

print("\n" + "=" * 60)
print("Example Complete!")
print("=" * 60)

print(f"\nEXAMPLE_COST: {total_cost}")
Original file line number Diff line number Diff line change
Expand Up @@ -559,12 +559,23 @@ def _ensure_plugins_loaded(self) -> None:
if final_hook_config is not None:
# Store final hook_config in state for observability
self._state.hook_config = final_hook_config
hook_persistence_dir = (
str(Path(self._state.persistence_dir).parent)
if self._state.persistence_dir is not None
else None
)

self._hook_processor, self._on_event = create_hook_callback(
hook_config=final_hook_config,
working_dir=str(self.workspace.working_dir),
session_id=str(self._state.id),
original_callback=self._base_callback,
# Resolve lazily: switch_llm()/switch_profile() rebind self.agent,
# so agent hooks must read the current LLM at execution time.
llm_getter=lambda: self.agent.llm,
persistence_dir=hook_persistence_dir,
visualizer=self._visualizer,
Comment thread
VascoSch92 marked this conversation as resolved.
conversation_stats=self._state.stats,
)
self._hook_processor.set_conversation_state(self._state)
self._hook_processor.run_session_start()
Expand Down
Loading
Loading