From 1fd03787197aea36d191dd03e17898bebfbbbfb7 Mon Sep 17 00:00:00 2001 From: Graham Neubig Date: Thu, 28 May 2026 12:26:44 -0400 Subject: [PATCH 1/3] docs: clarify agent server setup --- llms-full.txt | 5213 ++++++++++++++++------ llms.txt | 21 +- sdk/arch/agent-server.mdx | 581 +-- sdk/guides/agent-server/local-server.mdx | 425 +- 4 files changed, 4108 insertions(+), 2132 deletions(-) diff --git a/llms-full.txt b/llms-full.txt index a1764d554..d9f13d765 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -924,7 +924,7 @@ Initialize the conversation. * `hook_config` – Optional hook configuration to auto-wire session hooks. If plugins are loaded, their hooks are combined with this config. * `max_iteration_per_run` – Maximum number of iterations per run - * `visualizer` – + * `visualizer` – Visualization configuration. Can be: - ConversationVisualizerBase subclass: Class to instantiate @@ -1107,7 +1107,7 @@ Remote conversation proxy that talks to an agent server. ‘monologue’, ‘alternating_pattern’. Values are integers representing the number of repetitions before triggering. * `hook_config` – Optional hook configuration for session hooks - * `visualizer` – + * `visualizer` – Visualization configuration. Can be: - ConversationVisualizerBase subclass: Class to instantiate @@ -3243,7 +3243,7 @@ Features: Simple tool with no parameters: : class FinishTool(ToolDefinition[FinishAction, FinishObservation]): : @classmethod - def create(cls, conv_state=None, + def create(cls, conv_state=None, `
` ``` ** @@ -3257,7 +3257,7 @@ Complex tool with initialization parameters: : class TerminalTool(ToolDefinition[TerminalAction, : TerminalObservation]): @classmethod - def create(cls, conv_state, + def create(cls, conv_state, `
` ``` ** @@ -3918,18 +3918,18 @@ flowchart TB Events["Event History"] Context["Agent Context
Skills + Prompts"] end - + subgraph Core["Agent Core"] Condense["Condenser
History compression"] Reason["LLM Query
Generate actions"] Security["Security Analyzer
Risk assessment"] end - + subgraph Execution[" "] Tools["Tool Executor
Action → Observation"] Results["Observation Events"] end - + Events --> Condense Context -.->|Skills| Reason Condense --> Reason @@ -3937,11 +3937,11 @@ flowchart TB Security --> Tools Tools --> Results Results -.->|Feedback| Events - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Reason primary class Condense,Security secondary class Tools tertiary @@ -3967,55 +3967,55 @@ flowchart TB Start["step() called"] Pending{"Pending
actions?"} ExecutePending["Execute pending actions"] - + HasCondenser{"Has
condenser?"} Condense["Call condenser.condense()"] CondenseResult{"Result
type?"} EmitCondensation["Emit Condensation event"] UseView["Use View events"] UseRaw["Use raw events"] - + Query["Query LLM with messages"] ContextExceeded{"Context
window
exceeded?"} EmitRequest["Emit CondensationRequest"] - + Parse{"Response
type?"} CreateActions["Create ActionEvents"] CreateMessage["Create MessageEvent"] - + Confirmation{"Need
confirmation?"} SetWaiting["Set WAITING_FOR_CONFIRMATION"] - + Execute["Execute actions"] Observe["Create ObservationEvents"] - + Return["Return"] - + Start --> Pending Pending -->|Yes| ExecutePending --> Return Pending -->|No| HasCondenser - + HasCondenser -->|Yes| Condense HasCondenser -->|No| UseRaw Condense --> CondenseResult CondenseResult -->|Condensation| EmitCondensation --> Return CondenseResult -->|View| UseView --> Query UseRaw --> Query - + Query --> ContextExceeded ContextExceeded -->|Yes| EmitRequest --> Return ContextExceeded -->|No| Parse - + Parse -->|Tool calls| CreateActions Parse -->|Message| CreateMessage --> Return - + CreateActions --> Confirmation Confirmation -->|Yes| SetWaiting --> Return Confirmation -->|No| Execute - + Execute --> Observe Observe --> Return - + style Query fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Condense fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Confirmation fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4050,26 +4050,26 @@ The agent applies `AgentContext` which includes **skills** and **prompts** to sh %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR Context["AgentContext"] - + subgraph Skills["Skills"] Repo["repo
Always active"] Knowledge["knowledge
Trigger-based"] end SystemAug["System prompt prefix/suffix
Per-conversation"] System["Prompt template
Per-conversation"] - + subgraph Application["Applied to LLM"] SysPrompt["System Prompt"] UserMsg["User Messages"] end - + Context --> Skills Context --> SystemAug Repo --> SysPrompt Knowledge -.->|When triggered| UserMsg System --> SysPrompt SystemAug --> SysPrompt - + style Context fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Repo fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Knowledge fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4092,26 +4092,26 @@ Tools follow a **strict action-observation pattern**: flowchart TB LLM["LLM generates tool_call"] Convert["Convert to ActionEvent"] - + Decision{"Confirmation
mode?"} Defer["Store as pending"] - + Execute["Execute tool"] Success{"Success?"} - + Obs["ObservationEvent
with result"] Error["ObservationEvent
with error"] - + LLM --> Convert Convert --> Decision - + Decision -->|Yes| Defer Decision -->|No| Execute - + Execute --> Success Success -->|Yes| Obs Success -->|No| Error - + style Convert fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Execute fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Decision fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4143,14 +4143,14 @@ flowchart LR LLM["LLM"] Tools["Tools"] Context["AgentContext"] - + Conv -->|.step calls| Agent Agent -->|Reads events| Conv Agent -->|Query| LLM Agent -->|Execute| Tools Context -.->|Skills and Context| Agent Agent -.->|New events| Conv - + style Agent fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Conv fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style LLM fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4174,535 +4174,178 @@ flowchart LR ### Agent Server Package Source: https://docs.openhands.dev/sdk/arch/agent-server.md -The Agent Server package (`openhands.agent_server`) provides an HTTP API server for remote agent execution. It enables building multi-user systems, SaaS products, and distributed agent platforms. +The Agent Server package (`openhands-agent-server`) runs the OpenHands Software Agent SDK behind an HTTP and WebSocket API. Use it when another service, such as an Agent Canvas backend, needs to start conversations, stream events, and run file or command operations in a workspace without embedding the SDK directly in the same process. -**Source**: [`openhands/agent_server/`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-agent-server/openhands/agent_server) +## When to Use It -## Purpose +Use the Agent Server when you need: -The Agent Server enables: -- **Remote execution**: Clients interact with agents via HTTP API -- **Multi-user isolation**: Each user gets isolated workspace -- **Container orchestration**: Manages Docker containers for workspaces -- **Centralized management**: Monitor and control all agents -- **Scalability**: Horizontal scaling with multiple servers +- A backend process that clients can reach over HTTP/WebSocket. +- A long-running service for conversations and workspace files. +- A server API that can be protected with a session API key. +- A clean boundary between your application backend and the agent runtime. -## Architecture Overview +For a single local script, the standalone SDK is usually simpler. For a backend service, web UI, automation system, or Agent Canvas-style deployment, run an Agent Server and connect to it from the client service. -```mermaid -graph TB - Client[Web/Mobile Client] -->|HTTPS| API[FastAPI Server] - - API --> Auth[Authentication] - API --> Router[API Router] - - Router --> WS[Workspace Manager] - Router --> Conv[Conversation Handler] - - WS --> Docker[Docker Manager] - Docker --> C1[Container 1
User A] - Docker --> C2[Container 2
User B] - Docker --> C3[Container 3
User C] - - Conv --> Agent[Software Agent SDK] - Agent --> C1 - Agent --> C2 - Agent --> C3 - - style Client fill:#e1f5fe - style API fill:#fff3e0 - style WS fill:#e8f5e8 - style Docker fill:#f3e5f5 - style Agent fill:#fce4ec -``` - -### Key Components - -**1. FastAPI Server** -- HTTP REST API endpoints -- Authentication and authorization -- Request validation -- WebSocket support for streaming - -**2. Workspace Manager** -- Creates and manages Docker containers -- Isolates workspaces per user -- Handles container lifecycle -- Manages resource limits - -**3. Conversation Handler** -- Routes requests to appropriate workspace -- Manages conversation state -- Handles concurrent requests -- Supports streaming responses - -**4. Docker Manager** -- Interfaces with Docker daemon -- Builds and pulls images -- Creates and destroys containers -- Monitors container health - -## Design Decisions - -### Why HTTP API? - -Alternative approaches considered: -- **gRPC**: More efficient but harder for web clients -- **WebSockets only**: Good for streaming but not RESTful -- **HTTP + WebSockets**: Best of both worlds - -**Decision**: HTTP REST for operations, WebSockets for streaming -- ✅ Works from any client (web, mobile, CLI) -- ✅ Easy to debug (curl, Postman) -- ✅ Standard authentication (API keys, OAuth) -- ✅ Streaming where needed - -### Why Container Per User? - -Alternative approaches: -- **Shared container**: Multiple users in one container -- **Container per session**: New container each conversation -- **Container per user**: One container per user (chosen) +## Install -**Decision**: Container per user -- ✅ Strong isolation between users -- ✅ Persistent workspace across sessions -- ✅ Better resource management -- ⚠️ More containers, but worth it for isolation - -### Why FastAPI? - -Alternative frameworks: -- **Flask**: Simpler but less type-safe -- **Django**: Too heavyweight -- **FastAPI**: Modern, fast, type-safe (chosen) - -**Decision**: FastAPI -- ✅ Automatic API documentation (OpenAPI) -- ✅ Type validation with Pydantic -- ✅ Async support for performance -- ✅ WebSocket support built-in - -## API Design - -### Key Endpoints - -**Workspace Management** -``` -POST /workspaces Create new workspace -GET /workspaces/{id} Get workspace info -DELETE /workspaces/{id} Delete workspace -POST /workspaces/{id}/execute Execute command -``` - -**Conversation Management** -``` -POST /conversations Create conversation -GET /conversations/{id} Get conversation -POST /conversations/{id}/messages Send message -GET /conversations/{id}/stream Stream responses (WebSocket) -``` - -**Health & Monitoring** -``` -GET /health Server health check -GET /metrics Prometheus metrics -``` - -### Authentication +Install the SDK packages in the Python environment that will run the server: -**API Key Authentication** ```bash -curl -H "Authorization: Bearer YOUR_API_KEY" \ - https://agent-server.example.com/conversations +python -m venv .venv +source .venv/bin/activate +pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server ``` -**Per-user workspace isolation** -- API key → user ID mapping -- Each user gets separate workspace -- Users can't access each other's workspaces +If you are working from the `OpenHands/software-agent-sdk` repository, use the repository's normal `uv` setup instead: -### Streaming Responses - -**WebSocket for real-time updates** -```python -async with websocket_connect(url) as ws: - # Send message - await ws.send_json({"message": "Hello"}) - - # Receive events - async for event in ws: - if event["type"] == "message": - print(event["content"]) +```bash +git clone https://github.com/OpenHands/software-agent-sdk.git +cd software-agent-sdk +uv sync +uv run python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -**Why streaming?** -- Real-time feedback to users -- Show agent thinking process -- Better UX for long-running tasks - -## Deployment Models +## Start a Local Server -### 1. Local Development +For local-only use, bind to `127.0.0.1`: -Run server locally for testing: ```bash -# Start server -openhands-agent-server --port 8000 - -# Or with Docker -docker run -p 8000:8000 \ - -v /var/run/docker.sock:/var/run/docker.sock \ - ghcr.io/all-hands-ai/agent-server:latest +python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -**Use case**: Development and testing +Check that the server is alive: -### 2. Single-Server Deployment - -Deploy on one server (VPS, EC2, etc.): ```bash -# Install -pip install openhands-agent-server - -# Run with systemd/supervisor -openhands-agent-server \ - --host 0.0.0.0 \ - --port 8000 \ - --workers 4 +curl http://127.0.0.1:8000/health ``` -**Use case**: Small deployments, prototypes, MVPs +The interactive API docs are available at: -### 3. Multi-Server Deployment - -Scale horizontally with load balancer: -``` - Load Balancer - | - +-------------+-------------+ - | | | - Server 1 Server 2 Server 3 - (Agents) (Agents) (Agents) - | | | - +-------------+-------------+ - | - Shared State Store - (Database, Redis, etc.) +```text +http://127.0.0.1:8000/docs ``` -**Use case**: Production SaaS, high traffic, need redundancy - -### 4. Kubernetes Deployment +## Secure the Server -Container orchestration with Kubernetes: -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: agent-server -spec: - replicas: 3 - template: - spec: - containers: - - name: agent-server - image: ghcr.io/all-hands-ai/agent-server:latest - ports: - - containerPort: 8000 -``` - -**Use case**: Enterprise deployments, auto-scaling, high availability - -## Resource Management +By default, the Agent Server starts without API authentication. Before exposing it to another process, container, host, or user, set at least one session API key. -### Container Limits +```bash +export OH_SESSION_API_KEYS_0="$(openssl rand -hex 32)" +export OH_SECRET_KEY="$(openssl rand -hex 32)" -Set per-workspace resource limits: -```python -# In server configuration -WORKSPACE_CONFIG = { - "resource_limits": { - "memory": "2g", # 2GB RAM - "cpus": "2", # 2 CPU cores - "disk": "10g" # 10GB disk - }, - "timeout": 300, # 5 min timeout -} +python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -**Why limit resources?** -- Prevent one user from consuming all resources -- Fair usage across users -- Protect server from runaway processes -- Cost control - -### Cleanup & Garbage Collection - -**Container lifecycle**: -- Containers created on first use -- Kept alive between requests (warm) -- Cleaned up after inactivity timeout -- Force cleanup on server shutdown - -**Storage management**: -- Old workspaces deleted automatically -- Disk usage monitored -- Alerts when approaching limits - -## Security Considerations - -### Multi-Tenant Isolation - -**Container isolation**: -- Each user gets separate container -- Containers can't communicate -- Network isolation (optional) -- File system isolation - -**API isolation**: -- API keys mapped to users -- Users can only access their workspaces -- Server validates all permissions - -### Input Validation - -**Server validates**: -- API request schemas -- Command injection attempts -- Path traversal attempts -- File size limits - -**Defense in depth**: -- API validation -- Container validation -- Docker security features -- OS-level security - -### Network Security - -**Best practices**: -- HTTPS only (TLS certificates) -- Firewall rules (only port 443/8000) -- Rate limiting -- DDoS protection +Clients must send the session key in the `X-Session-API-Key` header: -**Container networking**: -```python -# Disable network for workspace -WORKSPACE_CONFIG = { - "network_mode": "none" # No network access -} - -# Or allow specific hosts -WORKSPACE_CONFIG = { - "allowed_hosts": ["api.example.com"] -} +```bash +curl \ + -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ + http://127.0.0.1:8000/api/conversations ``` -## Monitoring & Observability - -### Health Checks +Use additional indexed variables when you need key rotation: ```bash -# Simple health check -curl https://agent-server.example.com/health - -# Response -{ - "status": "healthy", - "docker": "connected", - "workspaces": 15, - "uptime": 86400 -} +export OH_SESSION_API_KEYS_0="current-key" +export OH_SESSION_API_KEYS_1="next-key" ``` -### Metrics - -**Prometheus metrics**: -- Request count and latency -- Active workspaces -- Container resource usage -- Error rates + + `OH_SECRET_KEY` encrypts sensitive values stored with conversations, including LLM API keys and secrets. Keep it stable across restarts. If it changes, previously encrypted values cannot be restored. + -**Logging**: -- Structured JSON logs -- Per-request tracing -- Workspace events -- Error tracking +## Connect From Python -### Alerting +Pass the server URL and API key to `Workspace`. The SDK sends the key as `X-Session-API-Key` and uses remote HTTP/WebSocket calls for workspace and conversation operations. -**Alert on**: -- Server down -- High error rate -- Resource exhaustion -- Container failures +```python +import os -## Client SDK +from pydantic import SecretStr -Python SDK for interacting with Agent Server: +from openhands.sdk import Conversation, LLM, Workspace +from openhands.tools.preset.default import get_default_agent -```python -from openhands.client import AgentServerClient -client = AgentServerClient( - url="https://agent-server.example.com", - api_key="your-api-key" +llm = LLM( + model=os.environ.get("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), + api_key=SecretStr(os.environ["LLM_API_KEY"]), ) +agent = get_default_agent(llm=llm, cli_mode=True) -# Create conversation -conversation = client.create_conversation() - -# Send message -response = client.send_message( - conversation_id=conversation.id, - message="Hello, agent!" +workspace = Workspace( + host="http://127.0.0.1:8000", + api_key=os.environ["OH_SESSION_API_KEYS_0"], + working_dir="workspace/project", ) -# Stream responses -for event in client.stream_conversation(conversation.id): - if event.type == "message": - print(event.content) +conversation = Conversation(agent=agent, workspace=workspace) +conversation.send_message("Create a TODO.md file with three setup tasks.") +conversation.run() +conversation.close() ``` -**Client handles**: -- Authentication -- Request/response serialization -- Error handling -- Streaming -- Retries +## Expose It Safely -## Cost Considerations +If another service runs on the same machine, keep the server bound to `127.0.0.1` and let that service connect locally. -### Server Costs +If another host must connect to the server: -**Compute**: CPU and memory for containers -- Each active workspace = 1 container -- Typically 1-2 GB RAM per workspace -- 0.5-1 CPU core per workspace +1. Set `OH_SESSION_API_KEYS_0` and `OH_SECRET_KEY`. +2. Bind the server to a reachable interface, for example `--host 0.0.0.0`. +3. Put the server behind TLS, a private network, or a trusted reverse proxy. +4. Restrict firewall access to only the services that need it. +5. Configure CORS only for browser clients that must call the server directly. -**Storage**: Workspace files and conversation state -- ~1-10 GB per workspace (depends on usage) -- Conversation history in database - -**Network**: API requests and responses -- Minimal (mostly text) -- Streaming adds bandwidth - -### Cost Optimization +```bash +export OH_SESSION_API_KEYS_0="$(openssl rand -hex 32)" +export OH_SECRET_KEY="$(openssl rand -hex 32)" +export OH_ALLOW_CORS_ORIGINS_0="https://your-frontend.example.com" -**1. Idle timeout**: Shutdown containers after inactivity -```python -WORKSPACE_CONFIG = { - "idle_timeout": 3600 # 1 hour -} +python -m openhands.agent_server --host 0.0.0.0 --port 8000 ``` -**2. Resource limits**: Don't over-provision -```python -WORKSPACE_CONFIG = { - "resource_limits": { - "memory": "1g", # Smaller limit - "cpus": "0.5" # Fractional CPU - } -} -``` - -**3. Shared resources**: Use single server for multiple low-traffic apps - -**4. Auto-scaling**: Scale servers based on demand - -## When to Use Agent Server - -### Use Agent Server When: - -✅ **Multi-user system**: Web app with many users -✅ **Remote clients**: Mobile app, web frontend -✅ **Centralized management**: Need to monitor all agents -✅ **Workspace isolation**: Users shouldn't interfere -✅ **SaaS product**: Building agent-as-a-service -✅ **Scaling**: Need to handle concurrent users - -**Examples**: -- Chatbot platforms -- Code assistant web apps -- Agent marketplaces -- Enterprise agent deployments - -### Use Standalone SDK When: - -✅ **Single-user**: Personal tool or script -✅ **Local execution**: Running on your machine -✅ **Full control**: Need programmatic access -✅ **Simpler deployment**: No server management -✅ **Lower latency**: No network overhead - -**Examples**: -- CLI tools -- Automation scripts -- Local development -- Desktop applications - -### Hybrid Approach - -Use SDK locally but RemoteAPIWorkspace for execution: -- Agent logic in your Python code -- Execution happens on remote server -- Best of both worlds - -## Building Custom Agent Server - -The server is extensible for custom needs: - -**Custom authentication**: -```python -from openhands.agent_server import AgentServer + + Do not expose an unauthenticated Agent Server on a public network. It can execute commands and read or write files in its configured workspace. + -class CustomAgentServer(AgentServer): - async def authenticate(self, request): - # Custom auth logic - return await oauth_verify(request) -``` +## Runtime Files -**Custom workspace configuration**: -```python -server = AgentServer( - workspace_factory=lambda user: DockerWorkspace( - image=f"custom-image-{user.tier}", - resource_limits=user.resource_limits - ) -) -``` +By default, the server stores conversation and workspace data under `workspace/` relative to the process working directory: -**Custom middleware**: -```python -@server.middleware -async def logging_middleware(request, call_next): - # Custom logging - response = await call_next(request) - return response +```text +workspace/ +|-- bash_events/ +|-- conversations/ +`-- project/ ``` -## Next Steps +Run the server from a directory with enough disk space and with permissions appropriate for the files the agent should access. -### For Usage Examples +## Useful Endpoints -- [Local Agent Server](/sdk/guides/agent-server/local-server) - Run locally -- [Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox) - Docker setup -- [API Sandboxed Server](/sdk/guides/agent-server/api-sandbox) - Remote API -- [Remote Agent Server Overview](/sdk/guides/agent-server/overview) - All options +- `GET /health` - Basic health check. +- `GET /ready` - Readiness check after startup initialization. +- `GET /server_info` - Version, uptime, and available tool information. +- `GET /docs` - Interactive OpenAPI documentation. +- `/api/*` - Authenticated conversation, workspace, file, command, and settings APIs when session API keys are configured. -### For Related Architecture +## Troubleshooting -- [Workspace Architecture](/sdk/arch/workspace) - RemoteAPIWorkspace details -- [SDK Architecture](/sdk/arch/sdk) - Core framework -- [Architecture Overview](/sdk/arch/overview) - System design +- **401 responses**: Send `X-Session-API-Key` with one of the configured `OH_SESSION_API_KEYS_*` values. +- **Secrets disappear after restart**: Set a stable `OH_SECRET_KEY` before starting the server. +- **Port already in use**: Change the port with `--port`. +- **Browser CORS errors**: Add the browser origin with `OH_ALLOW_CORS_ORIGINS_0`. +- **Cannot reach the server from another host**: Check `--host`, firewall rules, reverse proxy routing, and TLS configuration. -### For Implementation Details +## Next Steps -- [`openhands/agent_server/`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-agent-server/openhands/agent_server) - Server source -- [`examples/`](https://github.com/OpenHands/software-agent-sdk/tree/main/examples) - Working examples +- [Local Agent Server](/sdk/guides/agent-server/local-server) - Run and connect to a local server. +- [Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox) - Run the server in an isolated Docker workspace. +- [API Sandboxed Server](/sdk/guides/agent-server/api-sandbox) - Start agent servers through a hosted runtime API. +- [Agent Server API Reference](/sdk/guides/agent-server/api-reference/server-details/alive) - Browse the generated REST API docs. ### Condenser Source: https://docs.openhands.dev/sdk/arch/condenser.md @@ -4728,40 +4371,40 @@ flowchart TB subgraph Interface["Abstract Interface"] Base["CondenserBase
Abstract base"] end - + subgraph Implementations["Concrete Implementations"] NoOp["NoOpCondenser
No compression"] LLM["LLMSummarizingCondenser
LLM-based"] Pipeline["PipelineCondenser
Multi-stage"] end - + subgraph Process["Condensation Process"] View["View
Event history"] Check["should_condense()?"] Condense["get_condensation()"] Result["View | Condensation"] end - + subgraph Output["Condensation Output"] CondEvent["Condensation Event
Summary metadata"] NewView["Condensed View
Reduced tokens"] end - + Base --> NoOp Base --> LLM Base --> Pipeline - + View --> Check Check -->|Yes| Condense Check -->|No| Result Condense --> CondEvent CondEvent --> NewView NewView --> Result - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base primary class LLM,Pipeline secondary class Check,Condense tertiary @@ -4791,9 +4434,9 @@ flowchart LR View["View"] NoOp["NoOpCondenser"] Same["Same View"] - + View --> NoOp --> Same - + style NoOp fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -4812,7 +4455,7 @@ flowchart LR AddToHistory["Add to History"] NextStep["Next Step: View.from_events()"] NewView["Condensed View"] - + View --> Check Check -->|Yes| Summarize Summarize --> Summary @@ -4820,7 +4463,7 @@ flowchart LR Metadata --> AddToHistory AddToHistory --> NextStep NextStep --> NewView - + style Check fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Summarize fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style NewView fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4851,9 +4494,9 @@ flowchart LR C2["Condenser 2"] C3["Condenser 3"] Final["Final View"] - + View --> C1 --> C2 --> C3 --> Final - + style C1 fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style C2 fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style C3 fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4876,9 +4519,9 @@ flowchart TB Check1["condenser.condense(view)"] Trigger1["should_condense()?"] end - + Agent1 --> Build1 --> Check1 --> Trigger1 - + style Check1 fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -4897,9 +4540,9 @@ flowchart TB NextStep["Next Agent Step"] Trigger2["condense() detects request"] end - + Error --> Request --> NextStep --> Trigger2 - + style Request fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` **Manual Trigger:** @@ -4913,11 +4556,11 @@ flowchart TB %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart TB Start["Agent calls condense(view)"] - + Decision{"should_condense?"} - + ReturnView["Return View
Agent proceeds"] - + Extract["Select Events to Keep/Forget"] Generate["LLM Generates Summary"] Create["Create Condensation Event"] @@ -4927,7 +4570,7 @@ flowchart TB FilterEvents["Filter forgotten events"] InsertSummary["Insert summary at offset"] NewView["New condensed view"] - + Start --> Decision Decision -->|No| ReturnView Decision -->|Yes| Extract @@ -4939,7 +4582,7 @@ flowchart TB NextStep --> FilterEvents FilterEvents --> InsertSummary InsertSummary --> NewView - + style Decision fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Generate fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Create fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4971,14 +4614,14 @@ flowchart LR View["View
LLMConvertibleEvents"] Convert["events_to_messages()"] LLM["LLM Input"] - + Events --> FromEvents FromEvents --> Filter Filter --> Insert Insert --> View View --> Convert Convert --> LLM - + style View fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style FromEvents fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5001,12 +4644,12 @@ flowchart LR Event["Condensation Event
forgotten_event_ids"] Applied["View.from_events()"] New["New View
~60 events + summary"] - + Old -.->|Summarized| Summary Summary --> Event Event --> Applied Applied --> New - + style Event fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Summary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5026,33 +4669,33 @@ flowchart LR flowchart TB View["Current View
120+ events"] Check["Count Events"] - + Compare{"Count >
max_size?"} - + Keep["Keep All Events"] - + Split["Split Events"] Head["Head
First 4 events"] Middle["Middle
~56 events"] Tail["Tail
~56 events"] Summarize["LLM Summarizes Middle"] Result["Head + Summary + Tail
~60 events total"] - + View --> Check Check --> Compare - + Compare -->|Under| Keep Compare -->|Over| Split - + Split --> Head Split --> Middle Split --> Tail - + Middle --> Summarize Head --> Result Summarize --> Result Tail --> Result - + style Compare fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Split fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Summarize fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5075,13 +4718,13 @@ flowchart LR Condenser["Condenser"] State["Conversation State"] Events["Event Log"] - + Agent -->|"View.from_events()"| State State -->|View| Agent Agent -->|"condense(view)"| Condenser Condenser -->|"View | Condensation"| Agent Agent -->|Adds Condensation| Events - + style Condenser fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Events fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5122,7 +4765,7 @@ The Conversation system has four primary responsibilities: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 25, "rankSpacing": 35}} }%% flowchart LR User["User Code"] - + subgraph Factory[" "] Entry["Conversation()"] end @@ -5131,26 +4774,26 @@ flowchart LR Local["LocalConversation
Direct execution"] Remote["RemoteConversation
Via agent-server API"] end - + subgraph Core[" "] State["ConversationState
• agent
workspace • stats • ..."] EventLog["ConversationState.events
Event storage"] end - + User --> Entry Entry -.->|LocalWorkspace| Local Entry -.->|RemoteWorkspace| Remote - + Local --> State Remote --> State - + State --> EventLog - + classDef factory fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef impl fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef core fill:#fff4df,stroke:#b7791f,stroke-width:2px classDef service fill:#e9f9ef,stroke:#2f855a,stroke-width:1.5px - + class Entry factory class Local,Remote impl class State,EventLog core @@ -5178,11 +4821,11 @@ flowchart LR Check{Workspace Type?} Local["LocalConversation
Agent runs in-process"] Remote["RemoteConversation
Agent runs via API"] - + Input --> Check Check -->|str or LocalWorkspace| Local Check -->|RemoteWorkspace| Remote - + style Input fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Local fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Remote fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px @@ -5204,13 +4847,13 @@ flowchart TB Start["State Update Request"] Lock["Acquire FIFO Lock"] Decision{New Event?} - + StateOnly["Update State Fields
stats, status, metadata"] EventPath["Append to Event Log
messages, actions, observations"] - + Callback["Trigger Callbacks"] Release["Release Lock"] - + Start --> Lock Lock --> Decision Decision -->|No| StateOnly @@ -5218,7 +4861,7 @@ flowchart TB StateOnly --> Callback EventPath --> Callback Callback --> Release - + style Decision fill:#fff4df,stroke:#b7791f,stroke-width:2px style EventPath fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style StateOnly fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px @@ -5305,13 +4948,13 @@ flowchart LR WS["Workspace"] Tools["Tools"] LLM["LLM"] - + Conv -->|Delegates to| Agent Conv -->|Configures| WS Agent -.->|Updates| Conv Agent -->|Uses| Tools Agent -->|Queries| LLM - + style Conv fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style WS fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5339,52 +4982,52 @@ The **OpenHands Software Agent SDK** is part of the [OpenHands V1](https://openh ## Optional Isolation over Mandatory Sandboxing -**V0 Challenge:** -Every tool call in V0 executed in a sandboxed Docker container by default. While this guaranteed reproducibility and security, it also created friction — the agent and sandbox ran as separate processes, states diverged easily, and multi-tenant workloads could crash each other. +**V0 Challenge:** +Every tool call in V0 executed in a sandboxed Docker container by default. While this guaranteed reproducibility and security, it also created friction — the agent and sandbox ran as separate processes, states diverged easily, and multi-tenant workloads could crash each other. Moreover, with the rise of the Model Context Protocol (MCP), which assumes local execution and direct access to user environments, V0's rigid isolation model became incompatible. -**V1 Principle:** -**Sandboxing should be opt-in, not universal.** -V1 unifies agent and tool execution within a single process by default, aligning with MCP's local-execution model. +**V1 Principle:** +**Sandboxing should be opt-in, not universal.** +V1 unifies agent and tool execution within a single process by default, aligning with MCP's local-execution model. When isolation is needed, the same stack can be transparently containerized, maintaining flexibility without complexity. ## Stateless by Default, One Source of Truth for State -**V0 Challenge:** +**V0 Challenge:** V0 relied on mutable Python objects and dynamic typing, which led to silent inconsistencies — failed session restores, version drift, and non-deterministic behavior. Each subsystem tracked its own transient state, making debugging and recovery painful. -**V1 Principle:** -**Keep everything stateless, with exactly one mutable state.** -All components (agents, tools, LLMs, and configurations) are immutable Pydantic models validated at construction. +**V1 Principle:** +**Keep everything stateless, with exactly one mutable state.** +All components (agents, tools, LLMs, and configurations) are immutable Pydantic models validated at construction. The only mutable entity is the [conversation state](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/event/conversation_state.py), a single source of truth that enables deterministic replay and robust persistence across sessions or distributed systems. ## Clear Boundaries between Agent and Applications -**V0 Challenge:** -The same codebase powered the CLI, web interface, and integrations (e.g., Github, Gitlab, etc). Over time, application-specific conditionals and prompts polluted the agent core, making it brittle. +**V0 Challenge:** +The same codebase powered the CLI, web interface, and integrations (e.g., Github, Gitlab, etc). Over time, application-specific conditionals and prompts polluted the agent core, making it brittle. Heavy research dependencies and benchmark integrations further bloated production builds. -**V1 Principle:** -**Maintain strict separation of concerns.** -V1 divides the system into stable, isolated layers: the [SDK (agent core)](/sdk/arch/overview#1-sdk-%E2%80%93-openhands-sdk), [tools (set of tools)](/sdk/arch/overview#2-tools-%E2%80%93-openhands-tools), [workspace (sandbox)](/sdk/arch/overview#3-workspace-%E2%80%93-openhands-workspace), and [agent server (server that runs inside sandbox)](/sdk/arch/overview#4-agent-server-%E2%80%93-openhands-agent-server). +**V1 Principle:** +**Maintain strict separation of concerns.** +V1 divides the system into stable, isolated layers: the [SDK (agent core)](/sdk/arch/overview#1-sdk-%E2%80%93-openhands-sdk), [tools (set of tools)](/sdk/arch/overview#2-tools-%E2%80%93-openhands-tools), [workspace (sandbox)](/sdk/arch/overview#3-workspace-%E2%80%93-openhands-workspace), and [agent server (server that runs inside sandbox)](/sdk/arch/overview#4-agent-server-%E2%80%93-openhands-agent-server). Applications communicate with the agent via APIs rather than embedding it directly, ensuring research and production can evolve independently. ## Composable Components for Extensibility -**V0 Challenge:** +**V0 Challenge:** Because agent logic was hard-coded into the core application, extending behavior (e.g., adding new tools or entry points) required branching logic for different entrypoints. This rigidity limited experimentation and discouraged contributions. -**V1 Principle:** -**Everything should be composable and safe to extend.** -Agents are defined as graphs of interchangeable components—tools, prompts, LLMs, and contexts—each described declaratively with strong typing. +**V1 Principle:** +**Everything should be composable and safe to extend.** +Agents are defined as graphs of interchangeable components—tools, prompts, LLMs, and contexts—each described declaratively with strong typing. Developers can reconfigure capabilities (e.g., swap toolsets, override prompts, add delegation logic) without modifying core code, preserving stability while fostering rapid innovation. ### Events @@ -5410,37 +5053,37 @@ The Event System has four primary responsibilities: flowchart TB Base["Event
Base class"] LLMBase["LLMConvertibleEvent
Abstract base"] - + subgraph LLMTypes["LLM-Convertible Events
Visible to the LLM"] Message["MessageEvent
User/assistant text"] Action["ActionEvent
Tool calls"] System["SystemPromptEvent
Initial system prompt"] CondSummary["CondensationSummaryEvent
Condenser summary"] - + ObsBase["ObservationBaseEvent
Base for tool responses"] Observation["ObservationEvent
Tool results"] UserReject["UserRejectObservation
User rejected action"] AgentError["AgentErrorEvent
Agent error"] end - + subgraph Internals["Internal Events
NOT visible to the LLM"] ConvState["ConversationStateUpdateEvent
State updates"] CondReq["CondensationRequest
Request compression"] Cond["Condensation
Compression result"] Pause["PauseEvent
User pause"] end - + Base --> LLMBase Base --> Internals LLMBase --> LLMTypes ObsBase --> Observation ObsBase --> UserReject ObsBase --> AgentError - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base,LLMBase,Message,Action,SystemPromptEvent primary class ObsBase,Observation,UserReject,AgentError secondary class ConvState,CondReq,Cond,Pause tertiary @@ -5493,12 +5136,12 @@ flowchart LR Group["Group ActionEvents
by llm_response_id"] Convert["Convert to Messages"] LLM["LLM Input"] - + Events --> Filter Filter --> Group Group --> Convert Convert --> LLM - + style Filter fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Group fill:#fff4df,stroke:#b7791f,stroke-width:2px style Convert fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px @@ -5565,13 +5208,13 @@ flowchart LR Conversation["Conversation"] Tools["Tools"] Services["Auxiliary Services"] - + Agent -->|Reads| Events Agent -->|Writes| Events Conversation -->|Manages| Events Tools -->|Creates| Events Events -.->|Stream| Services - + style Events fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Conversation fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5637,37 +5280,37 @@ flowchart TB JSON["JSON Files
config/llm.json"] Code["Programmatic
LLM(...)"] end - + subgraph Core["Core LLM"] Model["LLM Model
Pydantic configuration"] Pipeline["Request Pipeline
Retry, timeout, telemetry"] end - + subgraph Backend["LiteLLM Backend"] Providers["100+ Providers
OpenAI, Anthropic, etc."] end - + subgraph Output["Telemetry"] Usage["Token Usage"] Cost["Cost Tracking"] Latency["Latency Metrics"] end - + Env --> Model JSON --> Model Code --> Model - + Model --> Pipeline Pipeline --> Providers - + Pipeline --> Usage Pipeline --> Cost Pipeline --> Latency - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Model primary class Pipeline secondary class LiteLLM tertiary @@ -5698,10 +5341,10 @@ flowchart LR Code["Python Code"] LLM["LLM(model=...)"] Agent["Agent"] - + Code --> LLM LLM --> Agent - + style LLM fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -5762,30 +5405,30 @@ If you need to include secrets in JSON, use `llm.model_dump_json(exclude_none=Tr flowchart TB Request["completion() or responses() call"] Validate["Validate Config"] - + Attempt["LiteLLM Request"] Success{"Success?"} - + Retry{"Retries
remaining?"} Wait["Exponential Backoff"] - + Telemetry["Record Telemetry"] Response["Return Response"] Error["Raise Error"] - + Request --> Validate Validate --> Attempt Attempt --> Success - + Success -->|Yes| Telemetry Success -->|No| Retry - + Retry -->|Yes| Wait Retry -->|No| Error - + Wait --> Attempt Telemetry --> Response - + style Attempt fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Retry fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Telemetry fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5809,37 +5452,37 @@ In addition to the standard chat completion API, the LLM system supports [OpenAI %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart TB Check{"Model supports
Responses API?"} - + subgraph Standard["Standard Path"] ChatFormat["Format as
Chat Messages"] ChatCall["litellm.completion()"] end - + subgraph ResponsesPath["Responses Path"] RespFormat["Format as
instructions + input[]"] RespCall["litellm.responses()"] end - + ChatResponse["ModelResponse"] RespResponse["ResponsesAPIResponse"] - + Parse["Parse to Message"] Return["LLMResponse"] - + Check -->|No| ChatFormat Check -->|Yes| RespFormat - + ChatFormat --> ChatCall RespFormat --> RespCall - + ChatCall --> ChatResponse RespCall --> RespResponse - + ChatResponse --> Parse RespResponse --> Parse - + Parse --> Return - + style RespFormat fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style RespCall fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5866,7 +5509,7 @@ Software Agent SDK uses LiteLLM for provider abstraction: flowchart TB SDK["Software Agent SDK"] LiteLLM["LiteLLM"] - + subgraph Providers["100+ Providers"] OpenAI["OpenAI"] Anthropic["Anthropic"] @@ -5874,14 +5517,14 @@ flowchart TB Azure["Azure"] Others["..."] end - + SDK --> LiteLLM LiteLLM --> OpenAI LiteLLM --> Anthropic LiteLLM --> Google LiteLLM --> Azure LiteLLM --> Others - + style LiteLLM fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style SDK fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5928,23 +5571,23 @@ LLM requests automatically collect metrics: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR Request["LLM Request"] - + subgraph Metrics Tokens["Token Counts
Input/Output"] Cost["Cost
USD"] Latency["Latency
ms"] end - + Events["Event Log"] - + Request --> Tokens Request --> Cost Request --> Latency - + Tokens --> Events Cost --> Events Latency --> Events - + style Metrics fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Events fill:#fff4df,stroke:#b7791f,stroke-width:2px ``` @@ -5987,13 +5630,13 @@ flowchart LR Events["Events"] Security["Security Analyzer"] Condenser["Context Condenser"] - + Agent -->|Uses| LLM LLM -->|Records| Events Security -.->|Optional| LLM Condenser -.->|Optional| LLM Conversation -->|Provides context| Agent - + style LLM fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Events fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6039,38 +5682,38 @@ flowchart TB Sync["MCPClient
Sync/Async bridge"] Async["AsyncMCPClient
FastMCP base"] end - + subgraph Bridge["Tool Bridge"] Def["MCPToolDefinition
Schema conversion"] Exec["MCPToolExecutor
Execution handler"] end - + subgraph Integration["Agent Integration"] Action["MCPToolAction
Dynamic model"] Obs["MCPToolObservation
Result wrapper"] end - + subgraph External["External"] Server["MCP Server
stdio/HTTP"] Tools["External Tools"] end - + Sync --> Async Async --> Server - + Server --> Def Def --> Exec - + Exec --> Action Action --> Server Server --> Obs - + Server -.->|Spawns| Tools - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Sync,Async primary class Def,Exec secondary class Action,Obs tertiary @@ -6101,14 +5744,14 @@ flowchart TB Async["Async MCP Call"] Server["MCP Server"] Result["Result"] - + Sync --> Bridge Bridge --> Executor Executor --> Async Async --> Server Server --> Result Result --> Sync - + style Bridge fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Executor fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Async fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6159,23 +5802,23 @@ flowchart TB Config["MCP Config"] Spawn["Spawn Server"] List["List Tools"] - + subgraph Convert["Convert Each Tool"] Schema["MCP Schema"] Action["Generate Action Model"] Def["Create ToolDefinition"] end - + Register["Register in ToolRegistry"] - + Config --> Spawn Spawn --> List List --> Schema - + Schema --> Action Action --> Def Def --> Register - + style Spawn fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Action fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Register fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6201,11 +5844,11 @@ flowchart LR Parse["Parse Parameters"] Model["Dynamic Pydantic Model
MCPToolAction"] Def["ToolDefinition
SDK format"] - + MCP --> Parse Parse --> Model Model --> Def - + style Parse fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Model fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -6254,15 +5897,15 @@ flowchart TB Agent["Agent generates action"] Action["MCPToolAction"] Executor["MCPToolExecutor"] - + Convert["Convert to MCP format"] Call["MCP call_tool"] Server["MCP Server"] - + Result["MCP Result"] Obs["MCPToolObservation"] Return["Return to Agent"] - + Agent --> Action Action --> Executor Executor --> Convert @@ -6271,7 +5914,7 @@ flowchart TB Server --> Result Result --> Obs Obs --> Return - + style Executor fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Call fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Obs fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6297,10 +5940,10 @@ flowchart LR Executor["MCPToolExecutor"] Client["MCP Client"] Name["tool_name"] - + Executor -->|Uses| Client Executor -->|Knows| Name - + style Executor fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Client fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -6324,32 +5967,32 @@ flowchart TB Spawn["Spawn MCP Servers"] Discover["Discover Tools"] Register["Register Tools"] - + Ready["Agent Ready"] - + Step["Agent Step"] LLM["LLM Tool Call"] Execute["Execute MCP Tool"] Result["Return Observation"] - + End["End Conversation"] Cleanup["Close MCP Clients"] - + Load --> Start Start --> Spawn Spawn --> Discover Discover --> Register Register --> Ready - + Ready --> Step Step --> LLM LLM --> Execute Execute --> Result Result --> Step - + Step --> End End --> Cleanup - + style Spawn fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Execute fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Cleanup fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6372,22 +6015,22 @@ MCP tools can include metadata hints for agents: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR Tool["MCP Tool"] - + subgraph Annotations ReadOnly["readOnlyHint"] Destructive["destructiveHint"] Progress["progressEnabled"] end - + Security["Security Analysis"] - + Tool --> ReadOnly Tool --> Destructive Tool --> Progress - + ReadOnly --> Security Destructive --> Security - + style Destructive fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Security fill:#fff4df,stroke:#b7791f,stroke-width:2px ``` @@ -6414,12 +6057,12 @@ flowchart LR Tools["Tool Registry"] Agent["Agent"] Security["Security"] - + Skills -->|Configures| MCP MCP -->|Registers| Tools Agent -->|Uses| Tools MCP -->|Provides hints| Security - + style MCP fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Skills fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Agent fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6478,7 +6121,7 @@ graph TB end SDK[Software Agent SDK
openhands.sdk + tools + workspace] - + subgraph External["External Services"] LLM[LLM Providers
OpenAI, Anthropic, etc.] Runtime[Runtime Services
Docker, Remote API, etc.] @@ -6487,14 +6130,14 @@ graph TB UI --> SDK CLI --> SDK Custom --> SDK - + SDK --> LLM SDK --> Runtime - + classDef interface fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef sdk fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef external fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class UI,CLI,Custom interface class SDK sdk class LLM,Runtime external @@ -6531,9 +6174,9 @@ pip install openhands-sdk openhands-tools flowchart LR SDK["openhands.sdk
Agent · LLM · Conversation
+ LocalWorkspace"]:::sdk Tools["openhands.tools
BashTool · FileEditor · GrepTool · …"]:::tools - + SDK -->|uses| Tools - + classDef sdk fill:#e8f3ff,stroke:#2b6cb0,color:#0f2a45,stroke-width:2px,rx:8,ry:8 classDef tools fill:#e9f9ef,stroke:#2f855a,color:#14532d,stroke-width:2px,rx:8,ry:8 ``` @@ -6556,31 +6199,31 @@ pip install openhands-sdk openhands-tools openhands-workspace openhands-agent-se ```mermaid %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 20, "rankSpacing": 30}} }%% flowchart LR - + WSBase["openhands.sdk
Base Classes:
Workspace · Local · Remote"]:::sdk - + subgraph WS[" "] direction LR Docker["openhands.workspace DockerWorkspace
extends RemoteWorkspace"]:::ws Remote["openhands.workspace RemoteAPIWorkspace
extends RemoteWorkspace"]:::ws end - + Server["openhands.agent_server
FastAPI + WebSocket"]:::server Agent["openhands.sdk
Agent · LLM · Conversation"]:::sdk Tools["openhands.tools
BashTool · FileEditor · …"]:::tools - + WSBase -.->|extended by| Docker WSBase -.->|extended by| Remote Docker -->|spawns container with| Server Remote -->|connects via HTTP to| Server Server -->|runs| Agent Agent -->|uses| Tools - + classDef sdk fill:#e8f3ff,stroke:#2b6cb0,color:#0f2a45,stroke-width:1.1px,rx:8,ry:8 classDef ws fill:#fff4df,stroke:#b7791f,color:#5b3410,stroke-width:1.1px,rx:8,ry:8 classDef server fill:#f3e8ff,stroke:#7c3aed,color:#3b2370,stroke-width:1.1px,rx:8,ry:8 classDef tools fill:#e9f9ef,stroke:#2f855a,color:#14532d,stroke-width:1.1px,rx:8,ry:8 - + style WS stroke:#b7791f,stroke-width:1.5px,stroke-dasharray: 4 3,rx:8,ry:8,fill:none ``` @@ -6674,7 +6317,7 @@ sequenceDiagram participant Agent participant LLM participant Tool - + You->>Conversation: "Create hello.txt" Conversation->>Agent: Process message Agent->>LLM: What should I do? @@ -6699,17 +6342,17 @@ graph TB subgraph "Your Code (Unchanged)" Code["Agent + Tools + LLM"] end - + subgraph "Deployment Options" Local["Local
Direct execution"] Docker["Docker
Containerized"] Remote["Remote
Multi-user server"] end - + Code -->|LocalWorkspace| Local Code -->|DockerWorkspace| Docker Code -->|RemoteAPIWorkspace| Remote - + style Code fill:#e1f5fe style Local fill:#e8f5e8 style Docker fill:#e8f5e8 @@ -6776,18 +6419,18 @@ The SDK package handles: ```mermaid graph TB Conv[Conversation
Lifecycle Manager] --> Agent[Agent
Reasoning Loop] - + Agent --> LLM[LLM
Language Model] Agent --> Tools[Tool System
Capabilities] Agent --> Micro[Skills
Behavior Modules] Agent --> Cond[Condenser
Memory Manager] - + Tools --> Workspace[Workspace
Execution] - + Conv --> Events[Events
Communication] Tools --> MCP[MCP
External Tools] Workspace --> Security[Security
Validation] - + style Conv fill:#e1f5fe style Agent fill:#f3e5f5 style LLM fill:#e8f5e8 @@ -6924,7 +6567,7 @@ graph TB 2. **Observation**: Output schema (what the tool returns) 3. **ToolExecutor**: Logic that transforms Action → Observation -**Why this pattern?** +**Why this pattern?** - Type safety catches errors early - LLMs get accurate schemas for tool calling - Tools are testable in isolation @@ -7242,46 +6885,46 @@ flowchart TB subgraph Interface["Abstract Interface"] Base["SecurityAnalyzerBase
Abstract analyzer"] end - + subgraph Implementations["Concrete Analyzers"] LLM["LLMSecurityAnalyzer
Inline risk prediction"] NoOp["NoOpSecurityAnalyzer
No analysis"] end - + subgraph Risk["Risk Levels"] Low["LOW
Safe operations"] Medium["MEDIUM
Moderate risk"] High["HIGH
Dangerous ops"] Unknown["UNKNOWN
Unanalyzed"] end - + subgraph Policy["Confirmation Policy"] Check["should_require_confirmation()"] Mode["Confirmation Mode"] Decision["Require / Allow"] end - + Base --> LLM Base --> NoOp - + Implementations --> Low Implementations --> Medium Implementations --> High Implementations --> Unknown - + Low --> Check Medium --> Check High --> Check Unknown --> Check - + Check --> Mode Mode --> Decision - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px classDef danger fill:#ffe8e8,stroke:#dc2626,stroke-width:2px - + class Base primary class LLM secondary class High danger @@ -7307,20 +6950,20 @@ Security analyzers return one of four risk levels: flowchart TB Action["ActionEvent"] Analyze["Security Analyzer"] - + subgraph Levels["Risk Levels"] Low["LOW
Read-only, safe"] Medium["MEDIUM
Modify files"] High["HIGH
Delete, execute"] Unknown["UNKNOWN
Not analyzed"] end - + Action --> Analyze Analyze --> Low Analyze --> Medium Analyze --> High Analyze --> Unknown - + style Low fill:#d1fae5,stroke:#10b981,stroke-width:2px style Medium fill:#fef3c7,stroke:#f59e0b,stroke-width:2px style High fill:#ffe8e8,stroke:#dc2626,stroke-width:2px @@ -7351,13 +6994,13 @@ flowchart TB Extract["Extract security_risk
from arguments"] ActionEvent["ActionEvent
with security_risk set"] Analyzer["LLMSecurityAnalyzer
returns security_risk"] - + Schema --> LLM LLM --> ToolCall ToolCall --> Extract Extract --> ActionEvent ActionEvent --> Analyzer - + style Schema fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Extract fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Analyzer fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7400,9 +7043,9 @@ flowchart LR Action["ActionEvent"] NoOp["NoOpSecurityAnalyzer"] Unknown["SecurityRisk.UNKNOWN"] - + Action --> NoOp --> Unknown - + style NoOp fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -7433,20 +7076,20 @@ flowchart TB CheckUnknown{"Risk ==
UNKNOWN?"} UseConfirmUnknown{"confirm_unknown
setting?"} CheckThreshold{"risk.is_riskier
(threshold)?"} - + Confirm["Require Confirmation"] Allow["Allow Execution"] - + Risk --> CheckUnknown CheckUnknown -->|Yes| UseConfirmUnknown CheckUnknown -->|No| CheckThreshold - + UseConfirmUnknown -->|True| Confirm UseConfirmUnknown -->|False| Allow - + CheckThreshold -->|Yes| Confirm CheckThreshold -->|No| Allow - + style CheckUnknown fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Confirm fill:#ffe8e8,stroke:#dc2626,stroke-width:2px style Allow fill:#d1fae5,stroke:#10b981,stroke-width:2px @@ -7503,12 +7146,12 @@ flowchart LR Conversation["Conversation"] Tools["Tools"] MCP["MCP Tools"] - + Agent -->|Validates actions| Security Security -->|Checks| Tools Security -->|Uses hints| MCP Conversation -->|Pauses for confirmation| Agent - + style Security fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Conversation fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7554,47 +7197,47 @@ flowchart TB Knowledge["Knowledge Skill
trigger: KeywordTrigger"] Task["Task Skill
trigger: TaskTrigger"] end - + subgraph Triggers["Trigger Evaluation"] Always["Always Active
Repository guidelines"] Keyword["Keyword Match
String matching on user messages"] TaskMatch["Keyword Match + Inputs
Same as KeywordTrigger + user inputs"] end - + subgraph Content["Skill Content"] Markdown["Markdown with Frontmatter"] Dynamic["Dynamic Commands
!`command` execution"] MCPTools["MCP Tools Config
Repo skills only"] Inputs["Input Metadata
Task skills only"] end - + subgraph Integration["Agent Integration"] Context["Agent Context"] Prompt["System Prompt"] end - + Repo --> Always Knowledge --> Keyword Task --> TaskMatch - + Always --> Markdown Keyword --> Markdown TaskMatch --> Markdown - + Markdown -.->|Optional| Dynamic Repo -.->|Optional| MCPTools Task -.->|Requires| Inputs - + Markdown --> Context Dynamic --> Context MCPTools --> Context Context --> Prompt - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px classDef dynamic fill:#e9f9ef,stroke:#2f855a,stroke-width:2px - + class Repo,Knowledge,Task primary class Always,Keyword,TaskMatch secondary class Context tertiary @@ -7627,11 +7270,11 @@ flowchart LR Parse["Parse Frontmatter"] Skill["Skill(trigger=None)"] Context["Always in Context"] - + File --> Parse Parse --> Skill Skill --> Context - + style Skill fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Context fill:#fff4df,stroke:#b7791f,stroke-width:2px ``` @@ -7664,13 +7307,13 @@ flowchart TB Activate["Activate Skill"] Skip["Skip Skill"] Context["Add to Context"] - + User --> Check Check --> Match Match -->|Yes| Activate Match -->|No| Skip Activate --> Context - + style Check fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Activate fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7704,13 +7347,13 @@ flowchart TB Template["Apply Template"] Context["Add to Context"] Skip["Skip Skill"] - + User --> Match Match -->|Yes| Inputs Match -->|No| Skip Inputs --> Template Template --> Context - + style Match fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Template fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7744,35 +7387,35 @@ Skills are evaluated at different points in the agent lifecycle: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart TB Start["Agent Step Start"] - + Repo["Check Repository Skills
trigger: None"] AddRepo["Always Add to Context"] - + Message["Check User Message"] Keyword["Match Keyword Triggers"] AddKeyword["Add Matched Skills"] - + TaskType["Check Task Type"] TaskMatch["Match Task Triggers"] AddTask["Add Task Skill"] - + Build["Build Agent Context"] - + Start --> Repo Repo --> AddRepo - + Start --> Message Message --> Keyword Keyword --> AddKeyword - + Start --> TaskType TaskType --> TaskMatch TaskMatch --> AddTask - + AddRepo --> Build AddKeyword --> Build AddTask --> Build - + style Repo fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Keyword fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style TaskMatch fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7799,11 +7442,11 @@ flowchart LR MCPConfig["mcp_tools Config"] Client["MCP Client"] Tools["Tool Registry"] - + Skill -->|Contains| MCPConfig MCPConfig -->|Spawns| Client Client -->|Registers| Tools - + style Skill fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style MCPConfig fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Tools fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7892,12 +7535,12 @@ flowchart LR Context["Agent Context"] Agent["Agent"] MCP["MCP Client"] - + Skills -->|Injects content| Context Skills -.->|Spawns tools| MCP Context -->|System prompt| Agent MCP -->|Tool| Agent - + style Skills fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Context fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Agent fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7943,7 +7586,7 @@ flowchart TB Observation["Observation
Output schema"] Executor["Executor
Business logic"] end - + subgraph Framework["Tool Framework"] Base["ToolBase
Abstract base"] Impl["Tool Implementation
Concrete tool"] @@ -7955,7 +7598,7 @@ flowchart TB ToolSpec["Tool Spec
name + params"] Base -.->|Extends| Impl - + ToolSpec -->|resolve_tool| Registry Registry -->|Create instances| Impl Impl -->|Available in| Agent @@ -7964,11 +7607,11 @@ flowchart TB Agent -->|Parse & validate| Action Agent -->|Execute via Tool.\_\_call\_\_| Executor Executor -->|Return| Observation - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base primary class Action,Observation,Executor secondary class Registry tertiary @@ -8002,14 +7645,14 @@ flowchart TB WrapObs["ObservationEvent
wraps Observation"] Error["AgentErrorEvent"] end - + subgraph ToolSystem["Tool System"] ActionType["Action
Pydantic model"] ToolCall2["tool.\_\_call\_\_(action)
type-safe execution"] Execute["ToolExecutor
business logic"] ObsType["Observation
Pydantic model"] end - + ToolCall --> ParseJSON ParseJSON -->|Valid JSON| CreateAction ParseJSON -->|Invalid JSON| Error @@ -8020,7 +7663,7 @@ flowchart TB ToolCall2 --> Execute Execute --> ObsType ObsType --> WrapObs - + style ToolSystem fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style ActionType fill:#ddd6fe,stroke:#7c3aed,stroke-width:2px @@ -8047,11 +7690,11 @@ flowchart LR Obs["Define Observation
with to_llm_content"] Exec["Define Executor
stateless logic"] Tool["ToolDefinition(...,
executor=Executor())"] - + Action --> Tool Obs --> Tool Exec --> Tool - + style Tool fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -8073,12 +7716,12 @@ flowchart LR Exec["Define Executor
with \_\_init\_\_ and state"] Subclass["class MyTool(ToolDefinition)
with create() method"] Instance["Return [MyTool(...,
executor=instance)]"] - + Action --> Subclass Obs --> Subclass Exec --> Subclass Subclass --> Instance - + style Instance fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -8096,21 +7739,21 @@ flowchart TB P1E["Define ToolExecutor
with \_\_call\_\_()"] P1T["ToolDefinition(...,
executor=Executor())"] end - + subgraph Pattern2["Pattern 2: Subclass with Factory"] P2A["Define Action/Observation
with visualize/to_llm_content"] P2E["Define Stateful ToolExecutor
with \_\_init\_\_() and \_\_call\_\_()"] P2C["class MyTool(ToolDefinition)
@classmethod create()"] P2I["Return [MyTool(...,
executor=instance)]"] end - + P1A --> P1E P1E --> P1T - + P2A --> P2E P2E --> P2C P2C --> P2I - + style P1T fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style P2I fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -8154,20 +7797,20 @@ The registry enables **dynamic tool discovery** and instantiation from tool spec %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR ToolSpec["Tool Spec
name + params"] - + subgraph Registry["Tool Registry"] Resolver["Resolver
name → factory"] Factory["Factory
create(params)"] end - + Instance["Tool Instance
with executor"] Agent["Agent"] - + ToolSpec -->|"resolve_tool(spec)"| Resolver Resolver -->|Lookup factory| Factory Factory -->|"create(**params)"| Instance Instance -->|Used by| Agent - + style Registry fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Factory fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -8231,18 +7874,18 @@ flowchart TB Server["MCP Server
stdio/HTTP"] ExtTools["External Tools"] end - + subgraph Bridge["MCP Integration Layer"] MCPClient["MCPClient
Sync/Async bridge"] Convert["Schema Conversion
MCP → MCPToolDefinition"] MCPExec["MCPToolExecutor
Bridges to MCP calls"] end - + subgraph Agent["Agent System"] ToolsMap["tools_map
str -> ToolDefinition"] AgentLogic["Agent Execution"] end - + Server -.->|Spawns| ExtTools MCPClient --> Server Server --> Convert @@ -8251,11 +7894,11 @@ flowchart TB ToolsMap --> AgentLogic AgentLogic -->|Tool call| MCPExec MCPExec --> MCPClient - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef external fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class MCPClient primary class Convert,MCPExec secondary class Server,ExtTools external @@ -8284,13 +7927,13 @@ flowchart LR Loop["Background Event Loop"] Async["Async MCP Call"] Result["Return Result"] - + Sync --> Bridge Bridge --> Loop Loop --> Async Async --> Result Result --> Sync - + style Bridge fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Loop fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -8311,16 +7954,16 @@ flowchart TB Config["MCP Server Config
command + args"] Spawn["Spawn Server Process
MCPClient"] List["List Available Tools
client.list_tools()"] - + subgraph Convert["For Each MCP Tool"] Store["Store MCP metadata
name, description, inputSchema"] CreateExec["Create MCPToolExecutor
bound to tool + client"] Def["Create MCPToolDefinition
generic MCPToolAction type"] end - + Register["Add to Agent's tools_map
bypasses ToolRegistry"] Ready["Tools Available
Dynamic models created on-demand"] - + Config --> Spawn Spawn --> List List --> Store @@ -8328,7 +7971,7 @@ flowchart TB CreateExec --> Def Def --> Register Register --> Ready - + style Spawn fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Def fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Register fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8389,18 +8032,18 @@ flowchart TB Native["Native Tools"] MCP["MCP Tools"] end - + Registry["Tool Registry
resolve_tool"] ToolsMap["Agent.tools_map
Merged tool dict"] - + subgraph AgentSystem["Agent System"] Agent["Agent Logic"] LLM["LLM"] end - + Security["Security Analyzer"] Conversation["Conversation State"] - + Native -->|register_tool| Registry Registry --> ToolsMap MCP -->|create_mcp_tools| ToolsMap @@ -8408,7 +8051,7 @@ flowchart TB Agent -->|Execute tools| ToolsMap ToolsMap -.->|Action risk| Security ToolsMap -.->|Read state| Conversation - + style ToolsMap fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Security fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8455,38 +8098,38 @@ flowchart TB subgraph Interface["Abstract Interface"] Base["BaseWorkspace
Abstract base class"] end - + subgraph Implementations["Concrete Implementations"] Local["LocalWorkspace
Direct subprocess"] Remote["RemoteWorkspace
HTTP API calls"] end - + subgraph Operations["Core Operations"] Command["execute_command()"] Upload["file_upload()"] Download["file_download()"] Context["__enter__ / __exit__"] end - + subgraph Targets["Execution Targets"] Process["Local Process"] Container["Docker Container"] Server["Remote Server"] end - + Base --> Local Base --> Remote - + Base -.->|Defines| Operations - + Local --> Process Remote --> Container Remote --> Server - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base primary class Local,Remote secondary class Command,Upload tertiary @@ -8523,21 +8166,21 @@ flowchart TB %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart LR Tool["Tool invokes
execute_command()"] - + Decision{"Workspace
type?"} - + LocalExec["subprocess.run()
Direct execution"] RemoteExec["POST /command
HTTP API"] - + Result["CommandResult
stdout, stderr, exit_code"] - + Tool --> Decision Decision -->|Local| LocalExec Decision -->|Remote| RemoteExec - + LocalExec --> Result RemoteExec --> Result - + style Decision fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style LocalExec fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style RemoteExec fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8581,16 +8224,16 @@ The SDK provides remote workspace implementations in `openhands-workspace` packa %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 50}} }%% flowchart TB Base["RemoteWorkspace
SDK base class"] - + Docker["DockerWorkspace
Auto-spawn containers"] API["RemoteAPIWorkspace
Connect to existing server"] - + Base -.->|Extended by| Docker Base -.->|Extended by| API - + Docker -->|Creates| Container["Docker Container
with agent-server"] API -->|Connects| Server["Remote Agent Server"] - + style Base fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Docker fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style API fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8604,7 +8247,7 @@ flowchart TB | **DockerWorkspace** | Spawn container | Container | Multi-user, untrusted code | | **RemoteAPIWorkspace** | Connect to URL | Remote server | Distributed systems, cloud | -**Source:** +**Source:** - **DockerWorkspace**: [`openhands-workspace/openhands/workspace/docker`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-workspace/openhands/workspace/docker) - **RemoteAPIWorkspace**: [`openhands-workspace/openhands/workspace/remote_api`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-workspace/openhands/workspace/remote_api) @@ -8618,10 +8261,10 @@ flowchart LR Workspace["Workspace"] Conversation["Conversation"] AgentServer["Agent Server"] - + Conversation -->|Configures| Workspace Workspace -.->|Remote type| AgentServer - + style Workspace fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Conversation fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -8993,12 +8636,17 @@ export LLM_API_KEY=your-api-key-here ```bash - pip install openhands-sdk # Core SDK (openhands.sdk) - pip install openhands-tools # Built-in tools (openhands.tools) - # Optional: required for sandboxed workspaces in Docker or remote servers - pip install openhands-workspace # Workspace backends (openhands.workspace) - pip install openhands-agent-server # Remote agent server (openhands.agent_server) + # Core SDK + built-in tools — install together so their versions stay aligned + pip install -U openhands-sdk openhands-tools + + # Optional: sandboxed workspaces in Docker or remote servers. + # List every package in one command so they all resolve to the same version. + pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server ``` + + + `openhands-sdk` and `openhands-tools` are a matched set: they are built, tested, and released together at the same version number, and `openhands-tools` imports `openhands-sdk` internals directly. Always install and upgrade them in a **single** `pip` command so their versions match. Installing them separately can leave a newer `openhands-tools` against an older `openhands-sdk` (for example, when a previously installed copy is not upgraded), which fails at import with errors like `ModuleNotFoundError: No module named 'openhands.sdk.utils.path'`. To pin a specific release, use the same version for both, e.g. `pip install "openhands-sdk==1.22.1" "openhands-tools==1.22.1"`. + @@ -9805,7 +9453,7 @@ def get_planning_tools() -> list[Tool]: The planning agent uses: - **GlobTool**: For discovering files and directories matching patterns -- **GrepTool**: For searching specific content across files +- **GrepTool**: For searching specific content across files - **PlanningFileEditorTool**: For writing structured plans to `PLAN.md` only This read-only approach (except for `PLAN.md`) keeps the agent focused on analysis without implementation distractions. @@ -11767,6 +11415,17 @@ cd agent-sdk uv run python examples/02_remote_agent_server/10_cloud_workspace_share_credentials.py ``` + +## Settings and Secrets API Examples + +The remote agent-server examples also include end-to-end scripts for settings-backed secrets and authenticated LLM configuration: + +- [examples/02_remote_agent_server/12_settings_and_secrets_api.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/12_settings_and_secrets_api.py) demonstrates storing secrets through the Settings and Secrets API, referencing them with `LookupSecret`, and cleaning them up after use. +- [examples/02_remote_agent_server/13_workspace_get_llm.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/13_workspace_get_llm.py) demonstrates configuring LLM settings on an authenticated agent-server and retrieving them through `RemoteWorkspace.get_llm()`. + + + + ## Next Steps - **[API-based Sandbox](/sdk/guides/agent-server/api-sandbox)** - Connect to Runtime API service @@ -12013,7 +11672,7 @@ ENV PYTHONPATH="/app:${PYTHONPATH}" This example is available on GitHub: [examples/02_remote_agent_server/06_custom_tool/](https://github.com/OpenHands/software-agent-sdk/tree/main/examples/02_remote_agent_server/06_custom_tool) -```python icon="python" expandable examples/02_remote_agent_server/06_custom_tool/custom_tool_example.py +```python icon="python" expandable examples/02_remote_agent_server/06_custom_tool/main.py """Example: Using custom tools with remote agent server. This example demonstrates how to use custom tools with a remote agent server @@ -12292,7 +11951,7 @@ The docker sandboxed agent server demonstrates how to run agents in isolated Doc This provides complete isolation from the host system, making it ideal for production deployments, testing, and executing untrusted code safely. -Use `DockerWorkspace` with a pre-built agent server image for the fastest startup. When you need to build your own image from a base image, switch to `DockerDevWorkspace`. +Use `DockerWorkspace` with a pre-built agent server image for the fastest startup. When you need to build your own image from a base image, switch to `DockerDevWorkspace`. the Docker sandbox image ships with features configured in the [Dockerfile](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-agent-server/openhands/agent_server/docker/Dockerfile) (e.g., secure defaults and services like VSCode and VNC exposed behind well-defined ports), which are not available in the local (non-Docker) agent server. @@ -12779,7 +12438,7 @@ agent = get_default_agent( When `cli_mode=False`, the agent gains access to browser automation tools for web interaction. -When VNC is available and `extra_ports=True`, the browser will be opened in the VNC desktop to visualize agent's work. You can watch the browser in real-time via VNC. Demo video: +When VNC is available and `extra_ports=True`, the browser will be opened in the VNC desktop to visualize agent's work. You can watch the browser in real-time via VNC. Demo video: +## Agent-based Hooks + +Besides shell scripts, a hook can delegate its decision to an LLM-driven +sub-agent by setting `type="agent"`. The sub-agent receives the lifecycle event +as JSON, reasons about it semantically, and replies with a decision payload: + +```json +{"decision": "allow" | "deny", "reason": ""} +``` + +This is useful when a syntactic blacklist is not enough — for example, a +`PreToolUse` reviewer that recognises `awk '{print}' /etc/passwd` as *reading a +sensitive file* even though no obvious keyword (`cat`, `/etc/shadow`) appears. + +Key fields on an agent `HookDefinition`: + +- `name` — a label for the hook; identifies it in logs, events, and its + `agent-hook:` metrics bucket. +- `system_prompt` — the policy the reviewer agent follows. +- `tools` — optional tools the reviewer may use (e.g. `["file_editor"]` to + inspect the workspace before deciding). +- `timeout` / `max_iterations` — bound how long the reviewer runs. + +The agent hook runs in an isolated sub-conversation (its own ephemeral state, no +nested hooks), and its LLM spend is tracked under an `agent-hook:` usage +bucket that is merged back into the parent conversation's metrics. If no LLM is +available or the reviewer fails to produce a valid decision, the hook *falls +open* (allows) so it never blocks the agent on an internal error. + + +This example is available on GitHub: [examples/01_standalone_sdk/51_agent_hooks](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/51_agent_hooks/) + + +```python icon="python" expandable examples/01_standalone_sdk/51_agent_hooks/main.py +"""OpenHands Agent SDK — Agent-based Hooks Example + +Demonstrates the `type="agent"` hook, which evaluates lifecycle events with an +LLM-driven sub-agent instead of a shell script. The hook agent receives the +event JSON, reasons about it semantically, and replies with a decision payload: + + {"decision": "allow" | "deny", "reason": "..."} + +Two demos: + +- PreToolUse (security reviewer): inspects the INTENT of a terminal command, + not just its syntax. A command like `awk '{print}' /etc/passwd` would slip + past a blacklist of command names, but the agent hook recognises it as + reading a sensitive system file and denies it. +- Stop (quality reviewer): refuses to let the main agent finish until the + required deliverable exists in the workspace. + +For shell-command hooks see ../33_hooks/main.py. +""" + +import os +import tempfile +from pathlib import Path + +from pydantic import SecretStr + +from openhands.sdk import LLM, Conversation +from openhands.sdk.conversation.state import ConversationExecutionStatus +from openhands.sdk.event.hook_execution import HookExecutionEvent +from openhands.sdk.hooks import HookConfig, HookDefinition, HookMatcher, HookType +from openhands.tools.preset.default import get_default_agent + + +# Keep the demo conversations short: a small per-run iteration cap means a hook +# that keeps denying (or a model that keeps retrying) fails fast instead of +# burning calls up to the default 500-iteration limit / CI subprocess timeout. +MAX_ITERATIONS = 10 + + +# Configure LLM +api_key = os.getenv("LLM_API_KEY") +assert api_key is not None, "LLM_API_KEY environment variable is not set." +model = os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929") +base_url = os.getenv("LLM_BASE_URL") + +llm = LLM( + usage_id="agent", + model=model, + base_url=base_url, + api_key=SecretStr(api_key), +) + +SECURITY_REVIEWER_PROMPT = """You are a security reviewer for a coding agent. +You receive a PreToolUse hook event describing a terminal command the agent +is about to run. + +Reason about the SEMANTIC INTENT of the command, not just its syntax. Block: +- reading sensitive system files (/etc/passwd, /etc/shadow, ~/.ssh/*, credentials) +- broad filesystem discovery outside the workspace (rg --files /etc, find /, ls /root) +- exfiltrating data over the network +- modifying system state (sudo, apt install, systemctl) + +Allow ordinary inspection, builds, tests, and edits inside the workspace. +When unsure, prefer allow. + +Reply with a single JSON object and nothing else: +{"decision": "allow" | "deny", "reason": ""} +""" + +QUALITY_REVIEWER_PROMPT = """You are a quality reviewer enforcing task completion. +You receive a Stop hook event when the main agent tries to finish. + +The task requires the file REPORT.md to exist in the workspace and contain at +least one bullet point describing the repository. Use the file_editor tool to +check whether the file exists and inspect its contents. + +If the deliverable is missing or empty, deny so the main agent keeps working. +Otherwise allow. + +Reply with a single JSON object and nothing else: +{"decision": "allow" | "deny", "reason": ""} +""" + + +def hook_logger(event) -> None: + """Surface each hook decision so the demo output is self-explanatory.""" + if not isinstance(event, HookExecutionEvent): + return + status = "DENY " if event.blocked else ("ALLOW" if event.success else "FAIL ") + line = f" [hook] {event.hook_event_type} {status} -> {event.hook_command}" + if event.reason: + line += f"\n reason: {event.reason}" + print(line) + + +def run_demo(workspace: Path, hook_config: HookConfig, message: str) -> float: + """Run one demo in its own conversation and return its cost. + + Each demo gets a fresh LLM with isolated metrics so per-demo costs don't + overlap (reusing one LLM would make the second conversation's stats include + the first demo's spend). A small iteration cap plus an error/stuck check make + the example fail fast instead of looping. + """ + demo_llm = llm.model_copy() + demo_llm.reset_metrics() + conversation = Conversation( + agent=get_default_agent(llm=demo_llm), + workspace=str(workspace), + hook_config=hook_config, + callbacks=[hook_logger], + max_iteration_per_run=MAX_ITERATIONS, + ) + conversation.send_message(message) + conversation.run() + status = conversation.state.execution_status + if status in ( + ConversationExecutionStatus.ERROR, + ConversationExecutionStatus.STUCK, + ): + raise RuntimeError( + f"Demo conversation ended in {status.value} state " + "before reaching a decision." + ) + return conversation.conversation_stats.get_combined_metrics().accumulated_cost + + +# Each demo runs in its own conversation with only the hook it needs. Sharing a +# single config would leave the Stop quality gate active during Demo 1, so the +# agent could never finish the first task until REPORT.md existed — coupling two +# unrelated demos and burning iterations. +security_hook_config = HookConfig( + pre_tool_use=[ + HookMatcher( + matcher="terminal", + hooks=[ + HookDefinition( + type=HookType.AGENT, + name="security-reviewer", + system_prompt=SECURITY_REVIEWER_PROMPT, + timeout=60, + max_iterations=3, + ) + ], + ) + ], +) + +quality_hook_config = HookConfig( + stop=[ + HookMatcher( + hooks=[ + HookDefinition( + type=HookType.AGENT, + name="quality-reviewer", + system_prompt=QUALITY_REVIEWER_PROMPT, + tools=["file_editor"], + timeout=90, + max_iterations=5, + ) + ], + ) + ], +) + + +with tempfile.TemporaryDirectory() as tmpdir: + workspace = Path(tmpdir) + total_cost = 0.0 + + print("=" * 60) + print("Demo 1: PreToolUse — semantic deny") + print("=" * 60) + print( + "Asking the agent to read /etc/passwd via awk. The literal command\n" + "wouldn't match a syntactic blacklist (no `cat`, no `/etc/shadow`\n" + "keyword), but the security-reviewer agent should recognise the\n" + "intent and deny.\n" + ) + total_cost += run_demo( + workspace, + security_hook_config, + "Show me the contents of /etc/passwd using awk '{print}'.", + ) + + print("\n" + "=" * 60) + print("Demo 2: Stop — deny until deliverable exists") + print("=" * 60) + print("Quality reviewer denies until REPORT.md exists with a bullet point.\n") + total_cost += run_demo( + workspace, + quality_hook_config, + "Write REPORT.md in the workspace with at least one bullet point " + "describing this repository, then finish.", + ) + + report = workspace / "REPORT.md" + if report.exists(): + print(f"\n[REPORT.md preview: {report.read_text()[:120]!r}...]") + + print("\n" + "=" * 60) + print("Example Complete!") + print("=" * 60) + + print(f"\nEXAMPLE_COST: {total_cost}") +``` + + + ## Next Steps - See also: [Metrics and Observability](/sdk/guides/metrics) @@ -19947,6 +19650,17 @@ store.delete("gpt") + +## Agent-Driven LLM Switching + +Saved profiles can also be exposed to the agent through the `switch_llm` built-in tool. The tool call switches the conversation's active profile after the current model finishes the tool call, so future model calls use the selected profile. + + +This example is available on GitHub: [examples/01_standalone_sdk/49_switch_llm_tool.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/49_switch_llm_tool.py) + + + + ## Next Steps - **[LLM Registry](/sdk/guides/llm-registry)** - Manage multiple LLMs in memory at runtime @@ -21549,7 +21263,7 @@ The `conversation.conversation_stats` object provides cost tracking across all L #### Key Methods and Properties - **`usage_to_metrics`**: A dictionary mapping usage IDs to their respective `Metrics` objects. This allows you to track costs separately for each LLM used in the conversation. - + - **`get_combined_metrics()`**: Returns a single `Metrics` object that aggregates costs across all LLMs used in the conversation. This gives you the total cost of the entire conversation. - **`get_metrics_for_usage(usage_id: str)`**: Retrieves the `Metrics` object for a specific usage ID, allowing you to inspect costs for individual LLMs. @@ -23389,13 +23103,13 @@ from openhands.sdk.event.llm_convertible import ActionEvent class CustomSecurityAnalyzer(SecurityAnalyzerBase): """Custom security analyzer with domain-specific rules.""" - + def security_risk(self, action: ActionEvent) -> SecurityRisk: """Evaluate security risk based on custom rules. - + Args: action: The ActionEvent to analyze - + Returns: SecurityRisk level (LOW, MEDIUM, HIGH, or UNKNOWN) """ @@ -23405,11 +23119,11 @@ class CustomSecurityAnalyzer(SecurityAnalyzerBase): # High-risk patterns if any(pattern in action_str for pattern in ['rm -rf', 'sudo', 'chmod 777']): return SecurityRisk.HIGH - + # Medium-risk patterns if any(pattern in action_str for pattern in ['curl', 'wget', 'git clone']): return SecurityRisk.MEDIUM - + # Default to low risk return SecurityRisk.LOW @@ -23582,6 +23296,33 @@ replacement for either. | Content past 30k chars is invisible | Hard cap prevents regex denial-of-service | Raise the cap (increases ReDoS exposure) | | `thinking_blocks` not scanned | Scanning model reasoning risks false positives on deliberation | Separate injection-only CoT scan | +#### Extraction budget and primary-surface-first ordering + +The 30k-character cap is applied per scanning corpus, not per field: every +field competes for one shared budget (the `_BoundedSegments` buffer in +`defense_in_depth/utils.py`). That creates a secondary risk — a single +oversized field could consume the whole budget and leave higher-value +fields unscanned. `tool_name` has no length validation in the SDK, so a 30k +hallucinated name is a real starvation vector, not just a theoretical one. + +The analyzer addresses this by **extraction order**, not a per-field cap: +the primary attack surface is added first, so it always receives budget +even when a later field is adversarially large. + +- Executable corpus: `tool_call.arguments` (the primary prompt-injection + surface) → `tool_name` → `tool_call.name`. +- Reasoning corpus: `summary` (what the agent is about to do) → + `reasoning_content` → `thought`. + +The two corpora are extracted with separate budgets and concatenated +without a second outer cap, so a budget-filling `arguments` payload cannot +crowd `summary` out of the injection scan. + +**Remaining boundary** (a strict xfail in the test suite): a payload past +30k characters *within a single field* is still truncated and invisible. +That is the deliberate ReDoS trade-off already listed above; extraction +order does not change it. + Ready-to-run example: [examples/01_standalone_sdk/47_defense_in_depth_security.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/47_defense_in_depth_security.py) @@ -27405,7 +27146,7 @@ The agent will automatically have access to tools provided by enabled MCP server ```bash # For stdio servers python -m my_mcp_server - + # For HTTP servers, check the URL is reachable curl https://api.example.com/mcp ``` @@ -28478,10 +28219,6 @@ Key aspects of the plugin system: ### Creating Automations Source: https://docs.openhands.dev/openhands/usage/automations/creating-automations.md - -**Beta Feature**: Automations is currently in beta and available for **OpenHands Cloud** and **OpenHands Enterprise** users only. - - The easiest way to create an automation is to ask OpenHands directly. The Automation Skill handles all the details—you just describe what you want. ## Prompt vs Plugin Automations @@ -28493,8 +28230,8 @@ There are two types of automations: Most automations are prompt-based. Just describe the task in natural language: ``` - Create an automation called "Daily Standup Summary" that runs every weekday - at 9 AM Eastern. It should check our GitHub repo for PRs merged yesterday + Create an automation called "Daily Standup Summary" that runs every weekday + at 9 AM Eastern. It should check our GitHub repo for PRs merged yesterday and post a summary to #engineering on Slack. ``` @@ -28504,7 +28241,7 @@ There are two types of automations: For specialized capabilities, include one or more plugins from the [OpenHands extensions repository](https://github.com/OpenHands/extensions): ``` - Create an automation using the code-review plugin that runs every weekday + Create an automation using the code-review plugin that runs every weekday at 9 AM. It should review any Python files changed in the last 24 hours. ``` @@ -28556,7 +28293,7 @@ The prompt is what the AI agent executes each time the automation runs. Write it Tell the automation what to do with its output: -- "Post to the #alerts Slack channel" (requires [Slack MCP](/openhands/usage/cloud/slack-installation)) +- "Post to the #alerts Slack channel" (requires [Slack MCP](/openhands/usage/settings/mcp-settings)) - "Save to `reports/weekly-summary.md`" - "Create a GitHub issue with the findings" (automatic if you logged in with GitHub) - "Send a message via the configured notification service" @@ -28571,7 +28308,7 @@ For monitoring tasks, explain what should happen when things go wrong: ``` Check the health endpoint at https://api.example.com/health. -If it returns anything other than 200 OK, send an alert to #ops +If it returns anything other than 200 OK, send an alert to #ops with the status code and response body. If it's healthy, just log success without alerting. ``` @@ -28586,7 +28323,7 @@ Each automation runs in a full OpenHands sandbox with: - **Your secrets**: Access API keys stored in Settings > Secrets - **MCP integrations**: Use your configured MCP servers - **Network access**: Make HTTP requests, connect to APIs -- **Git provider access**: Tokens from your Cloud login (GitHub, GitLab, or Bitbucket) are automatically included +- **Git provider access**: Tokens from your login (GitHub, GitLab, or Bitbucket) are automatically included ## Schedules @@ -28622,10 +28359,6 @@ Once your automation is created: ### Event-Based Automations Source: https://docs.openhands.dev/openhands/usage/automations/event-automations.md - -**Beta Feature**: Event-based automations are in beta for **OpenHands Cloud** and **OpenHands Enterprise** users. - - Event-based automations run when something happens—a PR is opened, an issue is commented on, or a webhook fires—instead of on a schedule. This is ideal for responsive workflows like auto-reviewing PRs, triaging issues, or reacting to external service events. ## Built-In vs. Custom Integrations @@ -28716,7 +28449,9 @@ For services beyond GitHub—like Linear, Stripe, or Slack—register a custom w ### Walkthrough: Linear Integration -This example walks through setting up a Linear webhook to auto-triage new issues. + +This example walks through setting up a Linear webhook to auto-triage new issues using Automations in **[OpenHands Cloud](https://app.all-hands.dev)**. + #### Step 1: Get Your Webhook Secret from Linear @@ -28830,10 +28565,6 @@ New to automations? Start with the [Automations Overview](/openhands/usage/autom ### Managing Automations Source: https://docs.openhands.dev/openhands/usage/automations/managing-automations.md - -**Beta Feature**: Automations is currently in beta and available for **OpenHands Cloud** and **OpenHands Enterprise** users only. - - You can manage your automations by asking OpenHands directly—just like you created them. ## Viewing Your Automations @@ -28931,13 +28662,9 @@ Deleting an automation is permanent. Consider disabling it instead if you might ### Automations Overview Source: https://docs.openhands.dev/openhands/usage/automations/overview.md - -**Beta Feature**: Automations is currently in beta and available for **OpenHands Cloud** and **OpenHands Enterprise** users only. - - Automations let you schedule AI-powered tasks that run automatically—daily reports, health checks, data syncs, and more. Each automation runs a full OpenHands conversation on your chosen schedule, with access to your LLM settings, stored secrets, and integrations. -Your git provider credentials are automatically available—if you logged into OpenHands Cloud with GitHub, GitLab, or Bitbucket, that access is included by default. +Your git provider credentials are automatically available—if you logged into OpenHands with GitHub, GitLab, or Bitbucket, that access is included by default. ## What Can Automations Do? @@ -28948,7 +28675,7 @@ Your git provider credentials are automatically available—if you logged into O - **Send notifications**: Post updates to Slack, create GitHub issues, or send alerts -Automations can only interact with services you've configured access to. For example, posting to Slack requires the [Slack MCP integration](/openhands/usage/cloud/slack-installation). Git providers you logged in with (GitHub, GitLab, Bitbucket) are automatically available. +Automations can only interact with services you've configured access to. For example, posting to Slack requires the [Slack MCP integration](/openhands/usage/settings/mcp-settings). Git providers you logged in with (GitHub, GitLab, Bitbucket) are automatically available. ## Two Types of Automations @@ -28966,14 +28693,14 @@ Both types are created the same way—just describe what you want and OpenHands Just ask OpenHands to create one: ``` -Create an automation that runs every Monday at 9 AM and summarizes +Create an automation that runs every Monday at 9 AM and summarizes our open GitHub issues, then posts the summary to #engineering on Slack. ``` For plugin-based automations, mention the plugin: ``` -Create an automation using the code-review plugin that runs daily +Create an automation using the code-review plugin that runs daily and reviews any Python files changed in the last 24 hours. ``` @@ -28995,40 +28722,27 @@ When your automation runs: Automations are user-scoped—each automation and its runs belong to you. Conversations created by your automations automatically appear in your conversations list, just like any other conversation you start. -Your automation has access to everything a normal OpenHands conversation does: terminal, file editing, your configured LLM, stored secrets, and MCP integrations. Git provider tokens from your Cloud login (GitHub, GitLab, or Bitbucket) are automatically included. +Your automation has access to everything a normal OpenHands conversation does: terminal, file editing, your configured LLM, stored secrets, and MCP integrations. Git provider tokens from your login (GitHub, GitLab, or Bitbucket) are automatically included. ## Getting Started -Before creating automations, complete this one-time setup: - -### 1. Create an OpenHands API Key +**Prerequisites** -Go to [Settings > API Keys](https://app.all-hands.dev/settings/api-keys) and create a new API key. - -### 2. Save the API Key as a Secret - -Copy the API key value and go to [Settings > Secrets](https://app.all-hands.dev/settings/secrets). Create a new secret with: -- **Name**: `OPENHANDS_API_KEY` -- **Value**: Your API key from step 1 - -This allows the Automation Skill to create and manage automations on your behalf. - -### 3. Start a Conversation +- **Configured LLM** in your settings +- **Stored secrets** (optional) for any additional API keys your automations need (e.g., Slack tokens) Open a new conversation in OpenHands and ask it to create an automation: ``` -Create an automation that runs every Monday at 9 AM and summarizes +Create an automation that runs every Monday at 9 AM and summarizes our open GitHub issues, then posts to #engineering on Slack. ``` -You can also list existing automations, enable/disable them, or trigger manual runs—all through conversation. +Once you create an automation, you can view them by clicking on the "Automations" icon on the left-hand navigation. + +You can also ask OpenHands to list [existing automations, enable/disable them, or trigger manual runs](/openhands/usage/automations/managing-automations). -## Prerequisites -- **OpenHands Cloud or Enterprise account** (not available in open-source) -- **Configured LLM** in your [settings](https://app.all-hands.dev/settings) -- **Stored secrets** (optional) for any additional API keys your automations need (e.g., Slack tokens) --- @@ -29060,6 +28774,13 @@ Each use case has a ready-to-use automation prompt. Click a card to see the full > Monitor API health, analyze errors, and alert your team automatically. + + Functionally test PR changes by exercising the software as a real user would. + &1; then + echo '{"decision":"deny","reason":"Quality checks failed. Fix them before finishing."}' + exit 2 fi exit 0 ``` +Then register it in `.openhands/hooks.json`: + +```json .openhands/hooks.json +{ + "stop": [ + { + "matcher": "*", + "hooks": [ + { "command": ".openhands/hooks/quality_gate.sh", "timeout": 120 } + ] + } + ] +} +``` + +If you currently use `.openhands/pre-commit.sh`, migrate those checks to Stop hooks when you want quality gates to apply +to current agent-server-backed OpenHands flows. Move the check commands into a Stop hook script like the one above. See the +[Hooks](/openhands/usage/customization/hooks) guide for complete behavior and JSON response details. + ### Debugging Source: https://docs.openhands.dev/openhands/usage/developers/debugging.md @@ -30521,7 +30258,7 @@ Fix the TypeError in src/api/users.py line 45. Error message: TypeError: 'NoneType' object has no attribute 'get' -Expected behavior: The get_user_preferences() function should return +Expected behavior: The get_user_preferences() function should return default preferences when the user has no saved preferences. Actual behavior: It crashes with the error above when user.preferences is None. @@ -30564,7 +30301,7 @@ Requirements: Follow the existing patterns in src/api/routes.js for route structure. Use the existing db.query() helper in src/db/index.js for database access. -Success criteria: I can call the endpoint with valid credentials +Success criteria: I can call the endpoint with valid credentials and receive a JWT token that works with our existing auth middleware. ``` @@ -30688,7 +30425,7 @@ Context: - We use Redis (already available in the project) - Our API follows the controller pattern in src/controllers/ -Requirement: Limit each API key to 100 requests per minute with +Requirement: Limit each API key to 100 requests per minute with appropriate 429 responses and Retry-After headers. ``` @@ -30744,15 +30481,15 @@ Constraints to specify: ``` - The dashboard takes 5 seconds to load. - + The dashboard takes 5 seconds to load. + Profile it and optimize to load in under 1 second. - + Likely issues: - N+1 queries in getWidgetData() - Uncompressed images - Missing database indexes - + Focus on the biggest wins first. ``` @@ -30769,13 +30506,13 @@ Constraints to specify: ``` Add caching to the product catalog API. - + Context: - 95% of requests are for the same 1000 products - Product data changes only via admin panel (rare) - We already have Redis running for sessions - Current response time is 200ms, target is <50ms - + Cache strategy: Cache product data in Redis with 5-minute TTL, invalidate on product update. ``` @@ -30792,15 +30529,15 @@ Constraints to specify: ``` - Create a Go microservice for the image processing currently in + Create a Go microservice for the image processing currently in src/php/ImageProcessor.php. - - This is the first step in our gradual migration. + + This is the first step in our gradual migration. The Go service should: 1. Expose the same API endpoints 2. Be deployable alongside the existing PHP app 3. Include a feature flag to route traffic - + Start with just the resize and crop functions. ``` @@ -30817,15 +30554,15 @@ Constraints to specify: ``` Users can't log in since yesterday's deployment. - + Symptoms: - Login form submits but returns 500 error - Server logs show: "Redis connection refused" - Redis was moved to a new host yesterday - - The issue is likely in src/config/redis.js which may + + The issue is likely in src/config/redis.js which may have the old host hardcoded. - + Expected: Login should work with the new Redis at redis.internal:6380 ``` @@ -30898,7 +30635,7 @@ This task is complete when: Build on previous work: ``` -In our last session, you added the login endpoint. +In our last session, you added the login endpoint. Now add the logout functionality: 1. POST /api/auth/logout endpoint @@ -31145,7 +30882,7 @@ review: - performance - test_coverage - documentation - + severity_levels: block_merge: - critical @@ -31155,7 +30892,7 @@ review: informational: - minor - suggestion - + ignore_patterns: - "*.generated.*" - "vendor/*" @@ -31181,15 +30918,15 @@ quality_gates: - name: test_coverage threshold: 80% action: block_merge - + - name: security_issues threshold: 0 critical action: block_merge - + - name: code_review_score threshold: 7/10 action: require_review - + - name: documentation requirement: all_public_apis action: warn @@ -31316,8 +31053,8 @@ OpenHands excels at many development tasks, but knowing when to use it—and whe **Example prompt:** ``` -Add a calculateDiscount() function to src/utils/pricing.js that takes -a price and discount percentage, returns the discounted price. +Add a calculateDiscount() function to src/utils/pricing.js that takes +a price and discount percentage, returns the discounted price. Add unit tests. ``` @@ -31359,13 +31096,13 @@ Add a user profile endpoint to our API: ``` Break large tasks into phases: -Phase 1: "Analyze the current authentication system and document +Phase 1: "Analyze the current authentication system and document all touch points that need to change for OAuth2 migration." -Phase 2: "Implement the OAuth2 provider configuration and basic +Phase 2: "Implement the OAuth2 provider configuration and basic token flow, keeping existing auth working in parallel." -Phase 3: "Migrate the user login flow to use OAuth2, maintaining +Phase 3: "Migrate the user login flow to use OAuth2, maintaining backwards compatibility." ``` @@ -31485,7 +31222,7 @@ Prepare your repository: ## AGENTS.md Checklist - [ ] Build commands documented -- [ ] Test commands documented +- [ ] Test commands documented - [ ] Code style guidelines noted - [ ] Architecture overview included - [ ] Common patterns described @@ -32428,20 +32165,38 @@ for the canonical list of supported parameters. ## Model Recommendations -Based on our evaluations of language models for coding tasks (using the SWE-bench dataset), we can provide some -recommendations for model selection. Our latest benchmarking results can be found in -[this spreadsheet](https://docs.google.com/spreadsheets/d/1wOUdFCMyY6Nt0AIqF705KN4JKOWgeI4wUGUP60krXXs/edit?gid=0). +Model quality for coding agents changes quickly. These recommendations are based on current +[OpenHands Index](https://index.openhands.dev/home) results where available. The linked +[openhands-index-results repository](https://github.com/OpenHands/openhands-index-results) contains the full scores and +trajectories for each run. + +Use the strongest model you can afford for long-running or high-stakes tasks. Use lower-cost profiles for routine edits, +then switch back to a stronger model for planning, debugging, and review. -Based on these findings and community feedback, these are the latest models that have been verified to work reasonably well with OpenHands: +### Best Cloud Models by Family -### Cloud / API-Based Models +| Family | Recommended Model | Model String | OpenHands Index Average | Notes | +|--------|-------------------|--------------|-------------------------|-------| +| Claude | [Claude Opus 4.7](https://github.com/OpenHands/openhands-index-results/tree/main/results/claude-opus-4-7) | `anthropic/claude-opus-4-7` | 68.2 | Best Claude-series result in the OpenHands Index. Use it for complex, long-running software work. Claude Opus 4.6 is close behind at 66.7. | +| GPT | [GPT-5.5](https://github.com/OpenHands/openhands-index-results/tree/main/results/GPT-5.5) | `openai/gpt-5.5` | 65.9 | Best GPT-series result in the OpenHands Index. GPT-5.4 is close behind at 64.3. | +| Gemini | [Gemini 3.1 Pro](https://github.com/OpenHands/openhands-index-results/tree/main/results/Gemini-3.1-Pro) | `gemini/gemini-3.1-pro-preview` | 57.0 | Best Gemini-series result in the OpenHands Index. Use Gemini 3 Flash when cost or latency is more important than top accuracy. | -- [anthropic/claude-sonnet-4-20250514](https://www.anthropic.com/api) (recommended) -- [anthropic/claude-sonnet-4-5-20250929](https://www.anthropic.com/api) (recommended) -- [openai/gpt-5-2025-08-07](https://openai.com/api/) (recommended) -- [gemini/gemini-3-pro-preview](https://blog.google/products/gemini/gemini-3/) -- [deepseek/deepseek-chat](https://api-docs.deepseek.com/) -- [moonshot/kimi-k2-0711-preview](https://platform.moonshot.ai/docs/pricing/chat#generation-model-kimi-k2) +### Strong Open / Open-Weight Models + +These open or open-weight models have good OpenHands Index scores or are recommended for local OpenHands setups: + +| Model | Suggested Model String | OpenHands Index Average | Notes | +|-------|------------------------|-------------------------|-------| +| [GLM-5.1](https://github.com/OpenHands/openhands-index-results/tree/main/results/GLM-5.1) | `openrouter/z-ai/glm-5.1` | 58.2 | Strongest open-weight result currently listed in the OpenHands Index. | +| [Kimi-K2.6](https://github.com/OpenHands/openhands-index-results/tree/main/results/Kimi-K2.6) | `openrouter/moonshotai/kimi-k2.6` | 57.1 | Strong open-weight option, especially for coding and information-gathering tasks. | +| [DeepSeek-V4-Pro](https://github.com/OpenHands/openhands-index-results/tree/main/results/DeepSeek-V4-Pro) | `openrouter/deepseek/deepseek-v4-pro` | 51.3 | Strong coding and test-generation scores; current Index entry covers three benchmarks. | +| [MiniMax-M2.7](https://github.com/OpenHands/openhands-index-results/tree/main/results/MiniMax-M2.7) | `openrouter/minimax/minimax-m2.7` | 43.4 | Recommended as a lower-cost open-weight option with strong SWE-bench and SWT-bench scores. Also available from MiniMax-compatible OpenAI endpoints as `openai/MiniMax-M2.7`. | +| [Qwen3.6-35B-A3B](https://huggingface.co/Qwen/Qwen3.6-35B-A3B) | `openai/Qwen3.6-35B-A3B` for local OpenAI-compatible servers, or `openrouter/qwen/qwen3.6-35b-a3b` through OpenRouter | Not yet listed | Recommended local / self-hosted model for OpenHands. It is open-weight, supports a large context window, and is featured in the [local LLM guide](/openhands/usage/llms/local-llms). | + + +Hosted model strings can vary by provider and region. If a model string is not accepted, check the provider console and +the [LiteLLM provider list](https://docs.litellm.ai/docs/providers), then use the provider-specific model ID shown there. + If you have successfully run OpenHands with specific providers, we encourage you to open a PR to share your setup process to help others using the same provider! @@ -32456,15 +32211,16 @@ limits and monitor usage. ### Local / Self-Hosted Models -- [mistralai/devstral-small](https://openhands.dev/blog/devstral-a-new-state-of-the-art-open-model-for-coding-agents) (20 May 2025) -- also available through [OpenRouter](https://openrouter.ai/mistralai/devstral-small:free) -- [all-hands/openhands-lm-32b-v0.1](https://openhands.dev/blog/introducing-openhands-lm-32b----a-strong-open-coding-agent-model) (31 March 2025) -- also available through [OpenRouter](https://openrouter.ai/all-hands/openhands-lm-32b-v0.1) +For local and self-hosted usage, start with +[Qwen3.6-35B-A3B](https://huggingface.co/Qwen/Qwen3.6-35B-A3B). See the +[local LLM guide](/openhands/usage/llms/local-llms) for LM Studio, Ollama, SGLang, and vLLM setup examples. ### Known Issues -Most current local and open source models are not as powerful. When using such models, you may see long -wait times between messages, poor responses, or errors about malformed JSON. OpenHands can only be as powerful as the -models driving it. However, if you do find ones that work, please add them to the verified list above. +Open-weight and local models still vary widely in tool-use reliability. If you see long wait times, poor responses, or +errors about malformed JSON, try a stronger model, increase the context window, or switch to a frontier cloud model for +that task. ## LLM Configuration @@ -32543,7 +32299,7 @@ Source: https://docs.openhands.dev/openhands/usage/llms/local-llms.md ## News -- 2025/12/12: We now recommend two powerful local models for OpenHands: [Qwen3-Coder-30B-A3B-Instruct](https://huggingface.co/Qwen/Qwen3-Coder-30B-A3B-Instruct) and [Devstral Small 2 (24B)](https://huggingface.co/mistralai/Devstral-Small-2-24B-Instruct-2512). Both models deliver excellent performance on coding tasks and work great with OpenHands! +- 2026/05/21: We now recommend [Qwen3.6-35B-A3B](https://huggingface.co/Qwen/Qwen3.6-35B-A3B) as the first local model to try with OpenHands. It is an open-weight MoE model built for agentic coding, supports a large context window, and is available through LM Studio, Ollama, vLLM, and SGLang. ## Quickstart: Running OpenHands with a Local LLM using LM Studio @@ -32551,13 +32307,13 @@ This guide explains how to serve a local LLM using [LM Studio](https://lmstudio. We recommend: - **LM Studio** as the local model server, which handles metadata downloads automatically and offers a simple, user-friendly interface for configuration. -- **Qwen3-Coder-30B-A3B-Instruct** as the LLM for software development. This model is optimized for coding tasks and works excellently with agent-style workflows like OpenHands. +- **Qwen3.6-35B-A3B** as the LLM for software development. This model is optimized for agentic coding and works well with tool-heavy workflows like OpenHands. ### Hardware Requirements -Running Qwen3-Coder-30B-A3B-Instruct requires: -- A recent GPU with at least 12GB of VRAM (tested on RTX 3060 with 12GB VRAM + 64GB RAM), or -- A Mac with Apple Silicon with at least 32GB of RAM +Running Qwen3.6-35B-A3B requires: +- A recent GPU with at least 24GB of VRAM for quantized variants, or multiple GPUs for full precision and larger context windows, or +- A Mac with Apple Silicon with at least 64GB of unified memory for quantized variants ### 1. Install LM Studio @@ -32570,7 +32326,7 @@ Download and install the LM Studio desktop app from [lmstudio.ai](https://lmstud ![image](./screenshots/01_lm_studio_open_model_hub.png) -3. Search for **"Qwen3-Coder-30B-A3B-Instruct"**, confirm you're downloading from the official Qwen publisher, then proceed to download. +3. Search for **"Qwen3.6-35B-A3B"**, confirm you're downloading from the official Qwen publisher, then proceed to download. ![image](./screenshots/02_lm_studio_download_devstral.png) @@ -32584,7 +32340,7 @@ Download and install the LM Studio desktop app from [lmstudio.ai](https://lmstud ![image](./screenshots/03_lm_studio_open_load_model.png) 3. Enable the "Manually choose model load parameters" switch. -4. Select **Qwen3-Coder-30B-A3B-Instruct** from the model list. +4. Select **Qwen3.6-35B-A3B** from the model list. ![image](./screenshots/04_lm_studio_setup_devstral_part_1.png) @@ -32646,7 +32402,7 @@ When started for the first time, OpenHands will prompt you to set up the LLM pro 2. Enable the "Advanced" switch at the top of the page to show all the available settings. 3. Set the following values: - - **Custom Model**: `openai/qwen/qwen3-coder-30b-a3b-instruct` (the Model API identifier from LM Studio, prefixed with "openai/") + - **Custom Model**: `openai/qwen/qwen3.6-35b-a3b` (the Model API identifier from LM Studio, prefixed with "openai/") - **Base URL**: `http://host.docker.internal:1234/v1` - **API Key**: `local-llm` @@ -32675,14 +32431,14 @@ This section describes how to run local LLMs with OpenHands using alternative ba ### Create an OpenAI-Compatible Endpoint with Ollama - Install Ollama following [the official documentation](https://ollama.com/download). -- Example launch command for Qwen3-Coder-30B-A3B-Instruct: +- Example launch command for Qwen3.6-35B-A3B: ```bash # ⚠️ WARNING: OpenHands requires a large context size to work properly. # When using Ollama, set OLLAMA_CONTEXT_LENGTH to at least 22000. # The default (4096) is way too small — not even the system prompt will fit, and the agent will not behave correctly. OLLAMA_CONTEXT_LENGTH=32768 OLLAMA_HOST=0.0.0.0:11434 OLLAMA_KEEP_ALIVE=-1 nohup ollama serve & -ollama pull qwen3-coder:30b +ollama pull qwen3.6:35b-a3b ``` ### Create an OpenAI-Compatible Endpoint with vLLM or SGLang @@ -32690,7 +32446,7 @@ ollama pull qwen3-coder:30b First, download the model checkpoint: ```bash -huggingface-cli download Qwen/Qwen3-Coder-30B-A3B-Instruct --local-dir Qwen/Qwen3-Coder-30B-A3B-Instruct +huggingface-cli download Qwen/Qwen3.6-35B-A3B --local-dir Qwen/Qwen3.6-35B-A3B ``` #### Serving the model using SGLang @@ -32700,8 +32456,8 @@ huggingface-cli download Qwen/Qwen3-Coder-30B-A3B-Instruct --local-dir Qwen/Qwen ```bash SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \ - --model Qwen/Qwen3-Coder-30B-A3B-Instruct \ - --served-model-name Qwen3-Coder-30B-A3B-Instruct \ + --model Qwen/Qwen3.6-35B-A3B \ + --served-model-name Qwen3.6-35B-A3B \ --port 8000 \ --tp 2 --dp 1 \ --host 0.0.0.0 \ @@ -32714,11 +32470,11 @@ SGLANG_ALLOW_OVERWRITE_LONGER_CONTEXT_LEN=1 python3 -m sglang.launch_server \ - Example launch command (with at least 2 GPUs): ```bash -vllm serve Qwen/Qwen3-Coder-30B-A3B-Instruct \ +vllm serve Qwen/Qwen3.6-35B-A3B \ --host 0.0.0.0 --port 8000 \ --api-key mykey \ --tensor-parallel-size 2 \ - --served-model-name Qwen3-Coder-30B-A3B-Instruct \ + --served-model-name Qwen3.6-35B-A3B \ --enable-prefix-caching ``` @@ -32735,11 +32491,11 @@ pip install git+https://github.com/snowflakedb/ArcticInference.git 2. Run the launch command with speculative decoding enabled: ```bash -vllm serve Qwen/Qwen3-Coder-30B-A3B-Instruct \ +vllm serve Qwen/Qwen3.6-35B-A3B \ --host 0.0.0.0 --port 8000 \ --api-key mykey \ --tensor-parallel-size 2 \ - --served-model-name Qwen3-Coder-30B-A3B-Instruct \ + --served-model-name Qwen3.6-35B-A3B \ --speculative-config '{"method": "suffix"}' ``` @@ -32763,8 +32519,8 @@ Once OpenHands is running, open the Settings page in the UI and go to the `LLM` 2. Enable the **Advanced** toggle at the top of the page. 3. Set the following parameters, if you followed the examples above: - **Custom Model**: `openai/` - - For **Ollama**: `openai/qwen3-coder:30b` - - For **SGLang/vLLM**: `openai/Qwen3-Coder-30B-A3B-Instruct` + - For **Ollama**: `openai/qwen3.6:35b-a3b` + - For **SGLang/vLLM**: `openai/Qwen3.6-35B-A3B` - **Base URL**: `http://host.docker.internal:/v1` Use port `11434` for Ollama, or `8000` for SGLang and vLLM. - **API Key**: @@ -33755,6 +33511,163 @@ conversations. - `Enable memory condensation` - Turn on this setting to activate this feature. - `Memory condenser max history size` - The condenser will summarize the history after this many events. +## LLM Profiles + +LLM profiles allow you to save multiple LLM configurations and switch between them, even during an active conversation. +This is useful when you want to use different models for different tasks—for example, a faster model for simple tasks +and a more powerful model for complex reasoning. + +### Creating an LLM Profile + +Profiles are automatically created when you save a configuration on the LLM settings page. To create a new profile: + +1. Navigate to `Settings > LLM`. +2. Configure your desired LLM provider, model, and API key. +3. Click `Save Changes`. + +A new profile will be created with your configuration. The most recently saved profile becomes the active profile +for new conversations. + +Alternatively, you can click the `Add LLM Profile` button in the Available Profiles section to create a new profile +directly. + +### Managing LLM Profiles + +You can manage your saved profiles in the `Available Profiles` section of the LLM settings page. Each profile shows: + +- **Profile name**: A unique identifier for the configuration +- **Model**: The LLM model associated with the profile +- **Active badge**: Indicates which profile is currently active + +Click the menu icon (three dots) on any profile to access these actions: + +- **Edit**: Modify the profile's LLM configuration +- **Rename**: Change the profile name +- **Set as Active**: Make this profile the default for new conversations +- **Delete**: Remove the profile + + +You can save up to 10 LLM profiles per account. Delete unused profiles if you need to create new ones. + + +### Switching Profiles During a Conversation + +One of the most powerful features of LLM profiles is the ability to switch models mid-conversation without losing context. +This allows you to: + +- Start with a fast, cost-effective model for initial exploration +- Switch to a more powerful model when the task requires deeper reasoning +- Use specialized models for specific types of tasks + +For example, you might create profiles like these: + +| Example Profile | Example Use | Example Cost Pattern | +| --- | --- | --- | +| `claude-opus-4-7` | Frontend design and visual polish | Higher cost | +| `gpt-5.5` | Planning, instruction following, or review | Balanced for complex reasoning | +| `minimax-m2.7` | Day-to-day implementation | Lower cost | + +The profile names above are examples. Use names that match the saved profiles in your OpenHands environment. + +To switch profiles during an active conversation: + +1. Look for the **profile selector button** in the chat input area. It displays the name of the currently active profile. +2. Click the button to open the profile menu. +3. Select the profile you want to switch to. + +The conversation will continue with the new model, maintaining all previous context and history. The switch takes effect +immediately for subsequent messages. + + +The profile selector shows a checkmark next to the currently active profile. If no profile matches the running model, +the button will show "Select a model" as a placeholder. + + +### Switching Profiles with the `/model` Slash Command + +You can also list and switch profiles directly from the chat input using the `/model` slash command: + +- `/model` — Lists your saved LLM profiles. +- `/model ` — Switches the running conversation to that profile. + +This is equivalent to using the profile selector button and works without leaving the chat. Profile names must match the +saved profile exactly. The switch applies to future agent steps; it does not rerun earlier messages. + +A common workflow is to use a stronger model for planning and then switch to a lower-cost model for implementation: + +1. Start the conversation with `gpt-5.5` selected. +2. Ask OpenHands to plan the work before editing files: + + ```text + Plan the OpenHands features page. Do not edit files yet. + ``` + +3. Send `/model` to list available profiles. +4. Send `/model minimax-m2.7` to switch profiles. +5. Ask OpenHands to implement the plan: + + ```text + Now implement the plan. + ``` + +![Agent Canvas showing example /model command output that lists saved profiles and switches to another profile](/openhands/static/img/model-command-agent-canvas.png) + + +Model switching requires saved LLM profiles. If `/model` is not suggested in the chat input, create profiles in +`Settings > LLM` and confirm that your backend supports profile switching. + + +### Letting the Agent Select Models Dynamically + +When the model selection tool is available, the agent can choose a saved profile for the next phase of work. For example, +it can implement frontend changes with a design-focused model and then switch to an instruction-following model for review. + +In the Agent SDK, this capability is exposed as the built-in `SwitchLLMTool`, which produces `switch_llm` tool calls. +Agent Canvas displays those tool calls as `Switch LLM profile` events in the conversation timeline so you can see when +and why the model changed. + +Create the profiles you want the agent to choose from, then ask OpenHands to use specific profiles for different phases +of the task. For example: + +```text +Implement a simple web page on the features of OpenHands with Claude Opus 4.7, and then switch to GPT-5.5 and review the code. +``` + +![Agent Canvas showing example switch_llm tool calls that move a task between saved profiles](/openhands/static/img/model-selection-tool-agent-canvas.png) + +The model selection tool behaves as follows: + +- The current model decides to call the tool and provides a short reason. +- The switch takes effect on the next LLM call after the tool succeeds. +- Conversation history, files, and task state are preserved. +- If a profile name is missing or misspelled, the tool returns an error and the agent should choose a valid profile or + ask for help. + +For custom SDK agents, include `SwitchLLMTool` when constructing the agent. See the SDK example: +[examples/01_standalone_sdk/49_switch_llm_tool.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/01_standalone_sdk/49_switch_llm_tool.py). + +### How Profile Switching Works + +When you switch profiles during a conversation: + +1. The new LLM configuration is loaded from your saved profile +2. The conversation context (all previous messages and actions) is preserved +3. Future messages are processed using the new model +4. The conversation metadata is updated to reflect the new model + +This seamless switching allows you to leverage different models' strengths without starting a new conversation or +losing your progress. + +### Best Practices for Using LLM Profiles + +- **Name profiles descriptively**: Use names like "Claude Sonnet - Fast" or "GPT-4 - Complex Tasks" to easily + identify which profile to use. +- **Create task-specific profiles**: Set up profiles optimized for different workflows, such as code review, + documentation, or debugging. +- **Keep API keys updated**: Ensure each profile has a valid API key. +- **Test before critical work**: When switching profiles mid-conversation, send a simple test message to confirm + the new model is responding correctly. + ### Model Context Protocol (MCP) Source: https://docs.openhands.dev/openhands/usage/settings/mcp-settings.md @@ -34238,10 +34151,10 @@ After the GitHub organization rename from `All-Hands-AI` to `OpenHands`, you may ```bash # Check current remote git remote get-url origin - + # Update SSH remote git remote set-url origin git@github.com:OpenHands/OpenHands.git - + # Or update HTTPS remote git remote set-url origin https://github.com/OpenHands/OpenHands.git ``` @@ -34356,11 +34269,11 @@ while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS: # Migrating agent converts COBOL to Java migration_conversation.send_message(migration_prompt) migration_conversation.run() - + # Critiquing agent evaluates the conversion critique_conversation.send_message(critique_prompt) critique_conversation.run() - + # Parse the score and decide whether to continue current_score = parse_critique_score(critique_file) ``` @@ -34774,13 +34687,13 @@ See real automated reviews in action on the OpenHands Software Agent SDK reposit - Verify the workflow file is in `.github/workflows/` - Check the Actions tab for workflow run errors - + - Ensure `GITHUB_TOKEN` has `pull-requests: write` permission - Check the workflow logs for API errors - Verify the PR is not from a fork with restricted permissions - + - Large PRs may take longer to analyze - Consider splitting large PRs into smaller ones @@ -34896,7 +34809,7 @@ Perform straightforward version updates: ``` Update all patch and minor versions in package.json: - + 1. Review each update for changelog notes 2. Update package.json with new versions 3. Update package-lock.json @@ -34907,7 +34820,7 @@ Perform straightforward version updates: ``` Update dependencies in requirements.txt: - + 1. Check each package for updates 2. Update requirements.txt with compatible versions 3. Update requirements-dev.txt similarly @@ -34918,7 +34831,7 @@ Perform straightforward version updates: ``` Update dependencies in pom.xml: - + 1. Check for newer versions of each dependency 2. Update version numbers in pom.xml 3. Run mvn dependency:tree to check conflicts @@ -35242,12 +35155,12 @@ Analyze this stack trace from our production error: ``` Analyze this Java exception: - + java.lang.OutOfMemoryError: Java heap space at java.util.Arrays.copyOf(Arrays.java:3210) at java.util.ArrayList.grow(ArrayList.java:265) at com.myapp.DataProcessor.loadAllRecords(DataProcessor.java:142) - + Identify: 1. What operation is consuming memory? 2. Is there a memory leak or just too much data? @@ -35257,25 +35170,25 @@ Analyze this stack trace from our production error: ``` Analyze this Python traceback: - + Traceback (most recent call last): File "app/api/orders.py", line 45, in create_order order = OrderService.create(data) File "app/services/order.py", line 89, in create inventory.reserve(item_id, quantity) AttributeError: 'NoneType' object has no attribute 'reserve' - + What's None and why? ``` ``` Analyze this Node.js error: - + TypeError: Cannot read property 'map' of undefined at processItems (/app/src/handlers/items.js:23:15) at async handleRequest (/app/src/api/router.js:45:12) - + What's undefined and how should we handle it? ``` @@ -35419,6 +35332,13 @@ Each use case can be implemented in different ways—as a one-off conversation, > Set up automated PR reviews to maintain code quality and catch bugs early. + + Validate PR changes by actually running the software as a real user would. + +### Automated QA Testing +Source: https://docs.openhands.dev/openhands/usage/use-cases/qa-changes.md + + + Check out the complete QA changes plugin with ready-to-use code and configuration. + + +Automated QA testing goes beyond code review and CI: instead of reading diffs or running the test suite, the QA agent actually **runs the software** and verifies that changes work as claimed. It sets up the environment, exercises changed behavior as a real user would (browser, CLI, API requests), and posts a structured report with evidence. + +This is Layer 2 of the [Verification Stack](https://www.openhands.dev/blog/verification-stack), complementing the [code review agent](/openhands/usage/use-cases/code-review). + +## Overview + +The QA agent follows a four-phase methodology: + +1. **Understand** — Reads the PR diff, title, and description. Classifies changes (new feature, bug fix, refactor, config) and identifies entry points (CLI commands, API endpoints, UI pages). +2. **Setup** — Bootstraps the repository: installs dependencies, builds the project, notes CI status. +3. **Exercise** — The core phase: spins up servers, opens browsers, runs CLI commands, makes HTTP requests — testing the changed behavior as a real user would. For bug fixes, it reproduces the bug on the base branch and verifies the fix on the PR branch. +4. **Report** — Posts a structured QA report as a PR comment, with evidence (commands run, outputs, screenshots) and a verdict (PASS / FAIL / PARTIAL). + +The QA agent knows when to give up: after exhausting multiple approaches without progress, it reports what it tried and stops — rather than spinning endlessly. + +## What It Does (and Doesn't) + + + + - Run the actual application and interact with it + - Make real HTTP requests, run real CLI commands + - Open browsers and verify UI changes + - Reproduce bugs and verify fixes end-to-end + - Report with evidence (commands, outputs, screenshots) + + + - Run the test suite (that's CI's job) + - Analyze code for style or structure (that's code review's job) + - Run linters, formatters, or type checkers + - Substitute `--help` or `--dry-run` for real execution + + + +## Quick Start + +### GitHub Actions + +Create `.github/workflows/qa-changes.yml` in your repository: + +```yaml +name: QA Changes + +on: + pull_request: + types: [opened, ready_for_review, labeled] + +permissions: + contents: read + pull-requests: write + issues: write + +jobs: + qa: + if: | + (github.event.action == 'opened' && github.event.pull_request.draft == false) || + github.event.action == 'ready_for_review' || + github.event.label.name == 'qa-this' + runs-on: ubuntu-latest + steps: + - name: Run QA Changes + uses: OpenHands/extensions/plugins/qa-changes@main + with: + llm-model: anthropic/claude-sonnet-4-20250514 + llm-api-key: ${{ secrets.LLM_API_KEY }} + github-token: ${{ secrets.GITHUB_TOKEN }} +``` + +Add your `LLM_API_KEY` to your repository's **Settings → Secrets and variables → Actions**. + +### In a Conversation + +You can also trigger QA manually in any OpenHands conversation. First, install the skill: + +``` +/add-skill https://github.com/OpenHands/extensions/tree/main/skills/qa-changes +``` + +Then invoke it: + +``` +/qa-changes +``` + +The agent will ask for the PR to test, or you can provide context directly: + +``` +/qa-changes — Please QA PR #42 on the my-org/my-repo repository. +Focus on the new dashboard page and verify it renders correctly. +``` + +## QA Report Format + +The QA agent posts a structured report as a PR comment: + +``` +## QA Report + +**Status: PASS** ✅ + +### Changes Tested +- New `/api/health` endpoint returns 200 with version info +- Dashboard page renders at `/dashboard` with correct data + +### Evidence +1. Started server with `npm run dev` +2. `curl http://localhost:3000/api/health` → 200 OK, body: {"status":"ok","version":"1.2.0"} +3. Navigated to http://localhost:3000/dashboard — page renders correctly + [screenshot attached] + +### Edge Cases +- Empty database state: dashboard shows "No data" placeholder ✅ +- Invalid auth token: returns 401 as expected ✅ +``` + +## Customization + +### Change Types + +The QA agent adapts its approach based on the type of change: + +| Change Type | QA Approach | +|-------------|-------------| +| **Frontend / UI** | Starts dev server, opens browser, verifies visual changes, tests interactions | +| **CLI** | Runs commands with realistic arguments, verifies output, tests edge cases | +| **API / Backend** | Starts server, makes HTTP requests, verifies responses and side effects | +| **Bug fix** | Reproduces bug on base branch, verifies fix on PR branch (before/after) | +| **Library / SDK** | Writes and runs a short script that imports and calls changed functions | + +### Repository-Specific QA Guidelines + +Add repo-specific QA instructions by creating `.agents/skills/qa-guide.md`: + +```markdown +--- +name: qa-guide +description: Project-specific QA guidelines +triggers: +- /qa-changes +--- + +# QA Guidelines for [Your Project] + +## Environment Setup +- Run `make setup` to initialize the development environment +- The dev server runs on port 8080 + +## Key Test Scenarios +- Always verify the admin dashboard at /admin after backend changes +- For API changes, test with both authenticated and unauthenticated requests + +## Known Limitations +- The payment module requires a Stripe test key — skip payment flow testing +``` + +## Integration with the Verification Stack + +The QA agent is most powerful when used alongside the [code review agent](/openhands/usage/use-cases/code-review) and the [iterate skill](https://github.com/OpenHands/extensions/tree/main/skills/iterate) as part of the full [Verification Stack](https://www.openhands.dev/blog/verification-stack): + +1. **Code review** catches issues by reading the diff (style, security, data structures) +2. **QA** catches issues by running the software (behavioral regressions, UI bugs) +3. **Iterate** orchestrates the loop — fixing issues flagged by either verifier and re-polling until the PR is clean + +## Troubleshooting + + + + Ensure your repository's setup instructions are documented in `README.md` or `AGENTS.md`. The agent follows these to bootstrap the environment. If setup requires special steps, add them to a custom QA guide. + + + + PARTIAL means some scenarios passed and others failed or couldn't be tested. Read the report details — it will explain what worked and what didn't. Common causes: missing environment variables, external service dependencies, or insufficient permissions. + + + + For large PRs with many changed entry points, the agent may need more time. Consider splitting large PRs into smaller, focused changes. You can also add a custom QA guide that prioritizes the most important scenarios. + + + +## Related Resources + +- [QA Changes Plugin](https://github.com/OpenHands/extensions/tree/main/plugins/qa-changes) — GitHub Actions plugin +- [QA Changes Skill](https://github.com/OpenHands/extensions/tree/main/skills/qa-changes) — Detailed skill methodology +- [Verification Stack](https://www.openhands.dev/blog/verification-stack) — How QA fits into the full verification pipeline +- [Automated Code Review](/openhands/usage/use-cases/code-review) — The complementary code review agent + ### Spark Migrations Source: https://docs.openhands.dev/openhands/usage/use-cases/spark-migrations.md @@ -35496,7 +35612,7 @@ In this document, we will explore how OpenHands contributes to Spark migrations, ## Understanding -Before changin any code, it helps to build a clear picture of what is affected and where the risk is concentrated. Spark migrations touch a large surface area, between API deprecations, behavioral changes, configuration defaults, and dependency versions, and the interactions between them are hard to reason about manually. +Before changing any code, it helps to build a clear picture of what is affected and where the risk is concentrated. Spark migrations touch a large surface area, between API deprecations, behavioral changes, configuration defaults, and dependency versions, and the interactions between them are hard to reason about manually. Apache releases detailed lists of changes between each major and minor version of Spark. OpenHands can utilize this list of changes while scanning your codebase to produce a structured inventory of everything that needs attention. This inventory becomes the foundation for the migration itself, helping you prioritize work and track progress. @@ -35735,18 +35851,18 @@ Fix identified vulnerabilities: ``` Fix the SQL injection vulnerability in src/api/users.py: - + Current code: query = f"SELECT * FROM users WHERE id = {user_id}" cursor.execute(query) - + Requirements: 1. Use parameterized queries 2. Add input validation 3. Maintain the same functionality 4. Add a test case for the fix ``` - + **Fixed code:** ```python # Using parameterized query @@ -35757,17 +35873,17 @@ Fix identified vulnerabilities: ``` Fix the XSS vulnerability in src/templates/profile.html: - + Current code:
${user.bio}
- + Requirements: 1. Properly escape user content 2. Consider Content Security Policy 3. Handle rich text if needed 4. Test with malicious input ``` - + **Fixed code:** ```html @@ -35777,28 +35893,28 @@ Fix identified vulnerabilities: ``` Fix the command injection in src/utils/network.py: - + Current code: def ping_host(hostname): os.system(f"ping -c 1 {hostname}") - + Requirements: 1. Use safe subprocess calls 2. Validate input format 3. Avoid shell=True 4. Handle errors properly ``` - + **Fixed code:** ```python import subprocess import re - + def ping_host(hostname): # Validate hostname format if not re.match(r'^[a-zA-Z0-9.-]+$', hostname): raise ValueError("Invalid hostname") - + # Use subprocess without shell result = subprocess.run( ["ping", "-c", "1", hostname], @@ -35841,7 +35957,7 @@ def get_documents(user_id: int, current_user: User = Depends(get_current_user)): if current_user.id != user_id and not current_user.is_admin: logger.warning(f"Unauthorized access attempt: user {current_user.id} tried to access user {user_id}'s documents") raise HTTPException(status_code=403, detail="Not authorized") - + return db.get_documents(user_id) ``` @@ -36145,7 +36261,7 @@ Each update is streamed as it occurs, allowing you to provide real-time feedback After starting a conversation, you can check its status to monitor whether the agent has completed its task. - The examples below show basic polling patterns. For production use, add proper error handling, + The examples below show basic polling patterns. For production use, add proper error handling, exponential backoff, and handle network failures gracefully. @@ -36290,7 +36406,7 @@ while not conversation_id and attempts < max_attempts: ) task_response.raise_for_status() tasks = task_response.json() - + if tasks and tasks[0].get("status") == "READY": conversation_id = tasks[0].get("app_conversation_id") print(f"Conversation ready: {base_url}/conversations/{conversation_id}") @@ -36319,24 +36435,24 @@ while attempts < max_attempts: ) conv_response.raise_for_status() conversations = conv_response.json() - + if not conversations: print("Warning: Conversation not found") time.sleep(30) attempts += 1 continue - + conv = conversations[0] sandbox_status = conv.get("sandbox_status") exec_status = conv.get("execution_status") - + # Check sandbox health first if sandbox_status in ["ERROR", "MISSING"]: print(f"Sandbox failed with status: {sandbox_status}") exit(1) - + print(f"Execution status: {exec_status}") - + # Check for terminal states if exec_status in ["finished", "error", "stuck"]: print(f"Conversation completed with status: {exec_status}") @@ -36345,7 +36461,7 @@ while attempts < max_attempts: print("Agent is waiting for user confirmation in the UI") print(f"Visit: {base_url}/conversations/{conversation_id}") break - + time.sleep(30) attempts += 1 else: @@ -36402,7 +36518,7 @@ To list all your conversations, use the search endpoint: ``` - The search endpoint returns conversations in the `items` array. Use `next_page_id` + The search endpoint returns conversations in the `items` array. Use `next_page_id` for pagination if you have more conversations than the `limit`. @@ -36810,6 +36926,271 @@ Once you've connected your account, you can: - [Learn about the Cloud UI](/openhands/usage/cloud/cloud-ui). - [Install the OpenHands Slack app](/openhands/usage/cloud/slack-installation). +### Managing Members +Source: https://docs.openhands.dev/openhands/usage/cloud/organizations/managing-members.md + +## Inviting Users + +To add a new member to your organization: + +1. Hover over the profile icon in the lower left — an account menu will appear. +2. Click **Invite Organization Members** in the menu. +3. Enter the email address of the user you want to invite. +4. Click **Add**. + +The invited user will receive an email with instructions to accept the invitation and join your organization. +Once they accept, they will be added as a **Member** by default. + + + Invitations expire after 7 days. If the invitation expires, you'll need to send a new one. + + +## Changing User Roles + +After a user has joined your organization, an Admin or Owner can modify their role: + +1. Hover over the profile icon in the lower left — an account menu will appear. +2. Select **Org Members**. +3. Find the user whose role you want to change. +4. Click the role dropdown next to their name. +5. Select the new role: `Owner`, `Admin`, or `Member`. +6. Confirm the change. + + + Changing a user's role takes effect immediately. Be careful when demoting users, as they will lose access + to features associated with their previous role. + + +## Removing Members + +To remove a member from your organization: + +1. Hover over the profile icon in the lower left — an account menu will appear. +2. Select **Org Members**. +3. Find the user you want to remove. +4. Click the **Remove** button next to their name. +5. Confirm the removal. + +Removed members will lose access to the organization's shared resources immediately, but their private +conversations will remain intact. + +## Next Steps + +- [Roles and Permissions](/openhands/usage/cloud/organizations/roles-permissions) - Understand what each role can do. +- [Organization Settings](/openhands/usage/cloud/organizations/settings) - Configure your organization's resources. + +### Organizations Overview +Source: https://docs.openhands.dev/openhands/usage/cloud/organizations/overview.md + +## What are Organizations? + +Organizations allow multiple users to collaborate within a shared workspace in OpenHands Cloud or OpenHands Enterprise. With +Organizations, teams can share a pool of credits, use consistent LLM configurations, and streamline access +to repositories. + + + Organizations is a commercial feature available with an OpenHands Cloud subscription or OpenHands Enterprise. + + +## Key Features + +Organizations provide the following capabilities: + +- **Multiple Users** - Add team members to a common organization for centralized management. +- **Shared Credits** - Pool OpenHands Cloud credits across all organization members. +- **Default LLM Configuration** - Define the default model provider and LLM that all members can use. +- **Git Organization Claiming** - Claim specific Git organizations to route OpenHands resolver requests to your organization. + +## Conversation Visibility + +By default, conversations remain **private to individual members** of an organization. Each user's conversations +are only visible to them. + +## Getting Started + +To start using Organizations: + +1. Subscribe to an OpenHands Cloud plan or OpenHands Enterprise. +2. Create a new organization from the OpenHands Cloud dashboard. +3. [Invite team members](/openhands/usage/cloud/organizations/managing-members) to join your organization. +4. Configure your organization's [LLM settings](/openhands/usage/cloud/organizations/settings) and shared resources. + +## Next Steps + +- [Managing Members](/openhands/usage/cloud/organizations/managing-members) - Learn how to invite users and manage roles. +- [Organization Settings](/openhands/usage/cloud/organizations/settings) - Configure LLM providers, credits, and Git organization claims. +- [Roles and Permissions](/openhands/usage/cloud/organizations/roles-permissions) - Understand the different permission levels. + +### Roles and Permissions +Source: https://docs.openhands.dev/openhands/usage/cloud/organizations/roles-permissions.md + +## Overview + +Organizations in OpenHands support three roles, each with different levels of access and capabilities: +**Member**, **Admin**, and **Owner**. This page describes what each role can do. + +## Permissions Table + +| Permission | Member | Admin | Owner | +|------------|:------:|:-----:|:-----:| +| Create conversations | ✓ | ✓ | ✓ | +| Manage private settings | ✓ | ✓ | ✓ | +| Invite users | | ✓ | ✓ | +| Elevate users to Admin role | | ✓ | ✓ | +| Add credits | | ✓ | ✓ | +| Modify LLM settings | | ✓ | ✓ | +| Elevate users to Owner role | | | ✓ | +| Claim Git organizations | | | ✓ | +| Delete organization | | | ✓ | + +## Role Descriptions + +### Member + +Members are the default role for users joining an organization. Members can: + +- **Create conversations** - Start new conversations using the organization's shared credits. +- **Manage their own private settings** - Configure settings that are only visible to them, including: + - MCP servers + - Secrets + - API keys + - Git user settings + - Slack integration + +Members cannot invite new users, modify organization-wide settings (like LLM settings), or manage other users' roles. + +### Admin + +Admins have all the capabilities of Members, plus the ability to manage the organization's settings and users. Admins can: + +- Everything a Member can do. +- **Invite users** - Send invitations to new team members. +- **Change user roles** - Promote Members to Admin or demote Admins to Members. +- **Add credits** - Purchase and add credits to the organization's shared pool. +- **Modify LLM settings** - Configure the default LLM provider and model for the organization. + +Admins cannot delete the organization or claim Git organizations. + +### Owner + +Owners have full control over the organization. Owners can: + +- Everything an Admin can do. +- **Delete the organization** - Permanently remove the organization and all associated data. +- **Claim Git organizations** - Link specific Git organizations to route OpenHands resolver requests to this organization. + + + Every organization must have at least one Owner. If you need to transfer ownership, first promote another + user to Owner before changing your own role. + + +## Private Settings + +Regardless of role, all organization members have control over their own **private settings**. These settings +are personal to each user and are not visible to other organization members, including Admins and Owners. + +Private settings include: + +- **MCP Servers** - Configure Model Context Protocol servers. +- **Secrets** - Store sensitive values like API tokens and credentials. +- **API Keys** - Manage keys for programmatic access to OpenHands. +- **Git Settings** - Configure personal Git authentication and preferences. +- **Slack Integration** - Connect your personal Slack workspace. + +## Next Steps + +- [Managing Members](/openhands/usage/cloud/organizations/managing-members) - Learn how to invite and manage users. +- [Organization Settings](/openhands/usage/cloud/organizations/settings) - Configure organization-wide settings. + +### Organization Settings +Source: https://docs.openhands.dev/openhands/usage/cloud/organizations/settings.md + +## Overview + +Organization settings allow Admins and Owners to configure shared resources that all members can use. This +includes LLM configurations, credits management, and Git organization claims. + +## LLM Configuration + + + Modifying LLM settings requires Admin or Owner permissions. + + +Organizations can define a default LLM provider and model that all members will use: + +1. Hover over the profile icon in the lower left — an account menu will appear. +2. Select **Organization**. +3. Select the `LLM` tab. +2. Select the `LLM` tab. +3. Choose your preferred **LLM provider** from the available options. +4. Select the **model** you want to use as the default. +5. Click `Save Changes`. + +All organization members will use this LLM configuration for their conversations unless they have configured +personal overrides. + +## Managing Credits + + + Adding credits requires Admin or Owner permissions. + + +Organization credits are shared across all members. To add credits: + +1. Hover over the profile icon in the lower left — an account menu will appear. +2. Select **Organization**. +3. Click **+ Add**. +4. Choose the amount of credits to purchase. +5. Complete the payment process. + +Credits will be added to the organization's shared pool immediately and can be used by any member. + +### Monitoring Usage + +You can monitor credit usage from the **Organization** settings page, which shows the current credit balance. + +For detailed usage reporting at the user level, use the API endpoint: + +``` +GET /api/organizations/{org_id}/members/financial +``` + +This endpoint provides financial usage data for all members within your organization. + +## Claiming Git Organizations + + + Claiming Git organizations requires Owner permissions. + + +Claiming a Git organization links it to your OpenHands organization, allowing OpenHands resolver requests +for repositories in that Git organization to be routed to your organization. + +Owners have the ability to claim a Git organization: + +1. Hover over the profile icon in the lower left — an account menu will appear. +2. Select **Organization**. +3. In the **Git Conversation Routing** section, Git organizations from your linked GitHub/GitLab accounts are listed automatically. +4. Click **Claim** next to the Git organization you want to link to your OpenHands organization. + + + You must have admin access to the Git organization to claim it. The verification process confirms your + authorization. + + +### Benefits of Claiming Git Organizations + +When a Git organization is claimed: + +- All resolver requests for repositories in that Git organization are automatically routed to your OpenHands organization. +- Organization members can work seamlessly with repositories in the claimed Git organization. +- Usage is tracked against your organization's credit pool. + +## Next Steps + +- [Managing Members](/openhands/usage/cloud/organizations/managing-members) - Invite users and manage roles. +- [Roles and Permissions](/openhands/usage/cloud/organizations/roles-permissions) - Understand permission levels. + ### Plugin Launcher Source: https://docs.openhands.dev/openhands/usage/cloud/plugin-launcher.md @@ -36932,7 +37313,7 @@ Once configured, you can request OpenHands to work on a Jira ticket by: 1. **Specify the Repository**: Include the repository location in either: - The ticket body itself, or - A comment on the ticket - + 2. **Trigger OpenHands**: Activate the agent using one of these methods: - Add an `openhands` label to the ticket - Comment with: `@openhands please review these requirements, generate a plan, and then proceed with implementation` @@ -37096,7 +37477,7 @@ Once configured, you can request OpenHands to work on a Jira ticket by: 1. **Specify the Repository**: Include the repository location in either: - The ticket body itself, or - A comment on the ticket - + 2. **Trigger OpenHands**: Activate the agent using one of these methods: - Add an `openhands` label to the ticket - Comment with: `@openhands please review these requirements, generate a plan, and then proceed with implementation` @@ -37693,7 +38074,7 @@ There are countless ways to contribute to OpenHands. Whether you're a seasoned d ### Frontend & UI/UX Make OpenHands more beautiful and user-friendly: - **React & TypeScript Development** - Improve the web interface -- **UI/UX Design** - Enhance user experience and accessibility +- **UI/UX Design** - Enhance user experience and accessibility - **Mobile Responsiveness** - Make OpenHands work great on all devices - **Component Libraries** - Build reusable UI components @@ -37786,7 +38167,7 @@ Don't hesitate to ask for help: --- -Thank you for considering contributing to OpenHands! Together, we're building tools that will democratize AI-powered software development and make it accessible to developers everywhere. Every contribution, no matter how small, helps us move closer to that vision. +Thank you for considering contributing to OpenHands! Together, we're building tools that will democratize AI-powered software development and make it accessible to developers everywhere. Every contribution, no matter how small, helps us move closer to that vision. Welcome to the community! 🎉 @@ -38135,6 +38516,586 @@ When OpenHands starts, it: - **For SDK integration**: See [SDK MCP Guide](/sdk/guides/mcp) - **For architecture details**: See [MCP Architecture](/sdk/arch/mcp) +### Plugins +Source: https://docs.openhands.dev/overview/plugins.md + +Plugins provide a way to package and distribute multiple agent components as a single unit. Instead of managing individual skills, hooks, and configurations separately, plugins bundle everything together for easier installation and distribution. + +## What Are Plugins? + +A plugin is a directory structure that can contain: + +- **Skills**: Specialized knowledge and workflows +- **Hooks**: Event handlers for tool lifecycle +- **MCP Config**: External tool server configurations +- **Agents**: Specialized agent definitions +- **Commands**: Slash commands + +The plugin format is compatible with the [Claude Code plugin structure](https://github.com/anthropics/claude-code/tree/main/plugins). Both `.plugin/` (OpenHands-native) and `.claude-plugin/` (Claude Code compatible) directory names are supported for the metadata directory. + + +## Plugins vs Skills + +Understanding the difference helps you choose the right approach: + + + + **Specialized prompts for specific tasks** + + - One skill = one specific capability + - Just a SKILL.md file (+ optional resources) + - Lightweight and focused + - Quick to create and share + + **When to use:** + - Adding single capabilities + - Simple workflows + - Domain-specific knowledge + - Quick solutions + + + + **Multi-component bundles** + + - Multiple skills + hooks + config + - Complete feature ecosystems + - Coordinated components + - Professional distribution + + **When to use:** + - Complete feature sets + - Tool integrations + - Team standards + - Commercial distributions + + + +### Comparison Table + +| Aspect | Skills | Plugins | +|--------|--------|---------| +| **Complexity** | Simple | Comprehensive | +| **Components** | Knowledge only | Skills + hooks + MCP + commands | +| **Use Case** | Single capability | Complete feature set | +| **Creation** | Few minutes | Planned development | +| **Distribution** | Copy directory | Structured package | +| **Maintenance** | Individual files | Coordinated bundle | + +### When to Use Each + +**Use a Skill when you need:** +- A single reusable prompt or workflow +- Domain-specific knowledge +- Simple automation +- Quick solutions + +**Use a Plugin when you need:** +- Multiple related skills working together +- Event handlers (hooks) for tool actions +- External tool integrations (MCP) +- Complete platform integrations +- Team or organizational standards + +**Example: Code Quality** + +*As separate skills:* +``` +.agents/skills/ +├── python-linting/ +├── code-review/ +└── pre-commit-setup/ +``` + +*As a plugin:* +``` +code-quality-plugin/ +├── .plugin/plugin.json # or .claude-plugin/plugin.json +├── skills/ +│ ├── linting/ +│ ├── review/ +│ └── setup/ +├── hooks/hooks.json # Post-edit linting +└── .mcp.json # Code analysis tools +``` + +The plugin version bundles all quality-related capabilities and automatically runs checks after file edits. + +## Plugin Structure + +A complete plugin follows this directory structure: + +``` +plugin-name/ +├── .plugin/ # or .claude-plugin/ +│ └── plugin.json # Required: Plugin metadata +├── skills/ +│ └── skill-name/ +│ └── SKILL.md # Individual skills +├── hooks/ +│ └── hooks.json # Tool lifecycle hooks +├── agents/ +│ └── agent-name.md # Specialized agents +├── commands/ +│ └── command-name.md # Slash commands +├── .mcp.json # MCP server config +└── README.md # Documentation +``` + +### Required Components + +Only one file is required: + +- **`plugin-name/.plugin/plugin.json`** or **`plugin-name/.claude-plugin/plugin.json`**: Plugin metadata + +All other components are optional—include only what your plugin needs. + +### Plugin Metadata + +The `plugin.json` file defines your plugin: + +```json +{ + "name": "code-quality", + "version": "1.0.0", + "description": "Code quality tools and workflows", + "author": { + "name": "Your Name", + "email": "your@email.com" + }, + "license": "MIT", + "repository": "https://github.com/example/code-quality-plugin" +} +``` + +The `author` field can also be a simple string such as `"Your Name"`. + +## Plugin Components Explained + + + + Skills in plugins work identically to standalone skills. Each skill has its own directory with a SKILL.md file: + + ``` + skills/ + ├── linting/ + │ ├── SKILL.md + │ └── scripts/ + └── testing/ + └── SKILL.md + ``` + + See [Skills Documentation](/overview/skills) for skill creation details. + + + + Hooks are event handlers that run during tool lifecycle events: + + ```json + { + "hooks": { + "PostToolUse": [ + { + "matcher": "file_editor", + "hooks": [ + { + "type": "command", + "command": "ruff check $OPENHANDS_PROJECT_DIR", + "timeout": 10 + } + ] + } + ] + } + } + ``` + + Hook commands have access to these environment variables: + - `$OPENHANDS_PROJECT_DIR`: Path to the project directory + - `$OPENHANDS_SESSION_ID`: Current session identifier + - `$OPENHANDS_EVENT_TYPE`: The triggering event type + - `$OPENHANDS_TOOL_NAME`: Name of the tool that triggered the hook + + **Common use cases:** + - Run linters after file edits + - Validate tool inputs + - Log tool usage + - Trigger dependent actions + + **Available hook events:** + - `PreToolUse`: Before tool execution + - `PostToolUse`: After tool execution + - `UserPromptSubmit`: When the user submits a prompt + - `SessionStart`: When the session starts + - `SessionEnd`: When the session ends + - `Stop`: When execution stops + + + + MCP (Model Context Protocol) servers provide external tools and resources: + + ```json + { + "mcpServers": { + "fetch": { + "command": "uvx", + "args": ["mcp-server-fetch"] + }, + "github": { + "command": "uvx", + "args": ["mcp-server-github"], + "env": { + "GITHUB_TOKEN": "${GITHUB_TOKEN}" + } + } + } + } + ``` + + **Use cases:** + - Connect to external APIs + - Add specialized tools + - Integrate third-party services + + Learn more: [Model Context Protocol](/overview/model-context-protocol) + + + + Specialized agent definitions for specific tasks: + + ```markdown + --- + name: code-reviewer + description: Specialized agent for code review tasks + --- + + # Code Review Agent + + This agent specializes in reviewing code according to team standards... + ``` + + Agents in plugins can use the plugin's skills and hooks automatically. + + + + Custom slash commands for plugin functionality: + + ```markdown + --- + name: /lint + description: Run linters on current file + --- + + # Lint Command + + Run configured linters on the current file... + ``` + + Commands provide quick access to plugin features. + + + +## Using Plugins + +How you use plugins depends on your platform: + + + + **Via configuration file:** + + Create `~/.openhands/config.toml`: + ```toml + [plugins] + sources = [ + "/path/to/local/plugin", + "github:org/plugin-repo", + ] + ``` + + **Via command line:** + ```bash + openhands --plugin /path/to/plugin + openhands --plugin github:org/plugin-repo + ``` + + Plugins are loaded when OpenHands starts. + + + + Load plugins programmatically: + + ```python + from openhands.sdk import LLM, Agent, Conversation + from openhands.sdk.plugin import PluginSource + from pydantic import SecretStr + + llm = LLM(model="claude-sonnet-4-20250514", api_key=SecretStr("your-api-key")) + agent = Agent(llm=llm) + + plugins = [ + PluginSource(source="/path/to/plugin"), + PluginSource(source="github:org/repo", ref="v1.0.0"), + ] + + conversation = Conversation( + agent=agent, + plugins=plugins, + ) + ``` + + See [SDK Plugins Guide](/sdk/guides/plugins) for details. + + + + **Via UI:** + 1. Open Settings + 2. Navigate to Plugins section + 3. Add plugin path or GitHub URL + 4. Restart to load + + **Via file system:** + Place plugins in `.openhands/plugins/` in your workspace. + + + + **Via Cloud UI:** + 1. Navigate to Workspace Settings + 2. Select Plugins tab + 3. Browse plugin library or add custom plugin + 4. Click "Enable" to activate + + Organization admins can publish plugins for team-wide access. + + + +## Installing Plugins + +### From a Local Directory + +1. **Verify plugin structure**: + ```bash + ls plugin-dir/.plugin/plugin.json || ls plugin-dir/.claude-plugin/plugin.json + ``` + +2. **Use the plugin path** in your configuration or command line + +### From GitHub + +Plugins can be loaded directly from GitHub repositories: + +``` +github:OpenHands/example-plugin +github:org/repo/path/to/plugin # For monorepos +github:org/repo#branch-name # Specific branch +github:org/repo#v1.0.0 # Specific tag +``` + +### Plugin Sources + + + + [github.com/OpenHands/extensions](https://github.com/OpenHands/extensions) + + Community-maintained plugins + + + + Your own GitHub repositories + + Organization or private plugins + + + +## Creating Plugins + +To create your own plugin: + +### 1. Plan Your Components + +Determine what your plugin needs: +- Which skills? +- What hooks for automation? +- Any MCP integrations? +- Custom commands? + +### 2. Create Directory Structure + +```bash +mkdir -p my-plugin/.plugin +mkdir -p my-plugin/skills +mkdir -p my-plugin/hooks +``` + +Use `.claude-plugin/` instead of `.plugin/` if you want Claude Code-compatible naming. + +### 3. Create Plugin Metadata + +Create `my-plugin/.plugin/plugin.json` (or `my-plugin/.claude-plugin/plugin.json`): +```json +{ + "name": "my-plugin", + "version": "0.1.0", + "description": "My custom plugin", + "author": { + "name": "Your Name" + } +} +``` + +### 4. Add Components + +Add skills, hooks, and other components as needed: + +``` +my-plugin/ +├── .plugin/plugin.json # or .claude-plugin/plugin.json +├── skills/ +│ └── my-skill/ +│ └── SKILL.md +└── hooks/ + └── hooks.json +``` + +### 5. Test Locally + +Load your plugin and verify all components work: + +```bash +openhands --plugin /path/to/my-plugin +``` + +### 6. Distribute + +Options for distribution: +- **GitHub repository**: Push to GitHub and share URL +- **File sharing**: Zip and share directory +- **Package registry**: Submit to official registry + +## Plugin Examples + + + + **Contains:** + - Python linting skill + - JavaScript linting skill + - Post-edit hooks for auto-linting + - Pre-commit setup + + **Use case:** Enforce code standards + + + + **Contains:** + - Kubernetes deployment skill + - Docker build skill + - CI/CD workflow skill + - kubectl MCP server + + **Use case:** Infrastructure management + + + + **Contains:** + - REST API client skill + - Authentication skill + - Rate limiting hooks + - API MCP server + + **Use case:** External service integration + + + + **Contains:** + - Unit testing skill + - Integration testing skill + - Post-code hooks for test runs + - Coverage commands + + **Use case:** Automated testing + + + +## Plugin Development Best Practices + + + + Begin by creating the core skills your plugin needs. Test them individually before bundling. + + + + Identify repetitive tasks and automate them with hooks. Example: run linters after file edits. + + + + Add MCP servers for external tool integration. This provides your skills with additional capabilities. + + + + Include a comprehensive README explaining: + - What the plugin does + - How to install it + - Configuration options + - Example usage + + + + Use semantic versioning (major.minor.patch) and document breaking changes. + + + +## Troubleshooting + + + + **Check:** + - `.plugin/plugin.json` or `.claude-plugin/plugin.json` exists and is valid JSON + - Plugin path is correct + - All referenced files exist + + **Debug:** + ```bash + # Verify structure + ls -la plugin-name/.plugin/plugin.json || ls -la plugin-name/.claude-plugin/plugin.json + + # Check JSON syntax + (cat plugin-name/.plugin/plugin.json 2>/dev/null || cat plugin-name/.claude-plugin/plugin.json) | python -m json.tool + ``` + + + + **Check:** + - Skills have valid SKILL.md files + - Frontmatter includes `triggers` + - Trigger keywords match your prompts + + **Test:** + Use explicit trigger keywords from the skill's frontmatter. + + + + **Check:** + - `hooks/hooks.json` syntax is valid + - Hook matchers target the right tools + - Commands are executable + + **Debug:** + Check logs for hook execution errors. + + + +## Next Steps + +- **[Learn about Skills](/overview/skills)** - Understand the core component of plugins +- **[Explore MCP](/overview/model-context-protocol)** - Add external tool integrations +- **[SDK Plugins Guide](/sdk/guides/plugins)** - Programmatic plugin usage +- **[Browse Examples](https://github.com/OpenHands/software-agent-sdk/tree/main/examples/05_skills_and_plugins/02_loading_plugins/example_plugins)** - See complete plugin structures + +## Further Reading + +For SDK developers: +- **[SDK Plugins Documentation](/sdk/guides/plugins)** - Detailed SDK integration +- **[Hooks Guide](/sdk/guides/hooks)** - Event handler details +- **[MCP Integration](/sdk/guides/mcp)** - External tool servers + ### Quick Start Source: https://docs.openhands.dev/overview/quickstart.md @@ -38302,10 +39263,766 @@ Each skill file may include frontmatter that provides additional information. In ## Learn More +- **To add existing skills**: See [Adding New Skills](/overview/skills/adding) +- **To create your own skills**: See [Creating New Skills](/overview/skills/creating) +- **To monitor skill performance**: See [Monitoring and Improving Skills](/overview/skills/monitoring) +- **For bundling multiple components**: See [Plugins](/overview/plugins) - **For SDK integration**: See [SDK Skills Guide](/sdk/guides/skill) - **For architecture details**: See [Skills Architecture](/sdk/arch/skill) - **For specific skill types**: See [Repository Skills](/overview/skills/repo), [Keyword Skills](/overview/skills/keyword), [Organization Skills](/overview/skills/org), and [Global Skills](/overview/skills/public) +### Adding New Skills +Source: https://docs.openhands.dev/overview/skills/adding.md + +OpenHands makes it easy to extend your agent's capabilities by adding pre-built skills from the community or custom repositories. Skills can be added globally (available in all conversations) or to specific projects. + +## Using the Add-Skill Action + +The quickest way to add a skill is using the `/add-skill` command in your conversation with OpenHands. This command fetches skills from GitHub repositories and installs them in your workspace. + +### Basic Usage + +Provide a GitHub URL pointing to a skill: + +``` +/add-skill https://github.com/OpenHands/extensions/tree/main/skills/codereview +``` + +OpenHands will: +1. Parse the URL to identify the repository and skill path +2. Fetch the skill files from GitHub +3. Install the skill in `.agents/skills/` directory +4. Verify the installation +5. Make the skill immediately available + +### Supported URL Formats + +The `/add-skill` command accepts various GitHub URL formats: + +- Full GitHub tree URL: `https://github.com/OpenHands/extensions/tree/main/skills/codereview` +- Repository path: `https://github.com/OpenHands/extensions/skills/codereview` +- Short form: `github.com/OpenHands/extensions/skills/codereview` +- Shorthand: `OpenHands/extensions/skills/codereview` + +### Examples + +Add the code review skill: +``` +/add-skill https://github.com/OpenHands/extensions/tree/main/skills/codereview-roasted +``` + +Add the Kubernetes skill: +``` +/add-skill OpenHands/extensions/skills/kubernetes +``` + +Add a skill from a custom repository: +``` +/add-skill https://github.com/your-org/your-repo/tree/main/custom-skills/analytics +``` + +## Skill Storage Locations + +Skills are stored in different locations depending on the platform and scope: + + + + The CLI supports two skill locations: + + **User-level skills** (global, available in all conversations): + ``` + ~/.openhands/skills/ + ``` + + **Project-level skills** (specific to current directory): + ``` + .agents/skills/ + ``` + + Skills added via `/add-skill` are installed in `.agents/skills/` of your current workspace, making them available for that project. + + To add skills globally, manually place skill directories in `~/.openhands/skills/`. + + + + SDK users programmatically load skills: + + ```python + from openhands.sdk import Skill + + # Load from a directory + skill = Skill.load("/path/to/skill") + + # Load all skills from a directory + skills = Skill.load_all("/path/to/skills") + ``` + + See the [SDK Skills Guide](/sdk/guides/skill) for more details. + + + + Skills are stored in: + ``` + .agents/skills/ + ``` + + The GUI provides a visual interface for managing skills, but skills can also be added manually by placing them in this directory. + + + + OpenHands Cloud provides a centralized skill library accessible through the web interface. Skills can be: + - Added from the official registry with one click + - Imported from your connected repositories + - Shared across your team or organization + + See the [Cloud UI documentation](/openhands/usage/cloud/cloud-ui) for details. + + + +## Manual Installation + +You can also manually install skills by copying skill directories into the appropriate location. + +### For Project-Level Skills + +1. Create the skills directory if it doesn't exist: + ```bash + mkdir -p .agents/skills + ``` + +2. Copy or clone the skill directory: + ```bash + # Using git + git clone https://github.com/OpenHands/extensions temp-clone + cp -r temp-clone/skills/codereview .agents/skills/ + rm -rf temp-clone + + # Or download and extract manually + ``` + +3. Verify the skill structure: + ```bash + ls .agents/skills/codereview/SKILL.md + ``` + +### For User-Level Skills (CLI Only) + +1. Create the global skills directory: + ```bash + mkdir -p ~/.openhands/skills + ``` + +2. Add skills to this directory: + ```bash + cp -r /path/to/skill ~/.openhands/skills/ + ``` + +Skills in `~/.openhands/skills/` are available in all your conversations when using the CLI. + +## Verifying Installation + +After adding a skill, verify it's available: + +1. **Check the file exists**: The skill directory should contain at least a `SKILL.md` file + ```bash + ls .agents/skills/your-skill/SKILL.md + ``` + +2. **Test the trigger**: For keyword-triggered skills, use one of the trigger words in your prompt: + ``` + Help me set up kubernetes + ``` + +3. **Check skill loading**: OpenHands will indicate when a skill is loaded in response to your prompt + +## Skill Updates + +To update a skill to the latest version: + +1. **Remove the old version**: + ```bash + rm -rf .agents/skills/skill-name + ``` + +2. **Add the updated version**: + ``` + /add-skill https://github.com/OpenHands/extensions/tree/main/skills/skill-name + ``` + +Or manually pull updates if you cloned the skill repository. + +## Authentication for Private Skills + +The `/add-skill` command automatically uses the `GITHUB_TOKEN` environment variable to access private repositories via the GitHub API. + +For manual `git clone` operations (such as when cloning directly into `.agents/skills/`), you'll need to handle authentication differently—typically using SSH keys or embedding a personal access token in the clone URL. + +**Using `/add-skill` with private repositories:** + +1. Set the `GITHUB_TOKEN` environment variable: + +```bash +export GITHUB_TOKEN=your_github_token +``` + +2. Use `/add-skill` as normal with private repository URLs + +The command will automatically use the token for authentication. + +## Skill Conflicts + +If a skill with the same name already exists, OpenHands will warn you before overwriting. To resolve conflicts: + +1. **Rename the existing skill**: Move or rename the existing skill directory +2. **Choose a different installation location**: Install at user-level vs project-level +3. **Overwrite**: Confirm the overwrite when prompted + +## Next Steps + +- **[Browse available skills](https://github.com/OpenHands/extensions)** in the official registry +- **[Create your own skills](/overview/skills/creating)** for custom workflows +- **[Learn about keyword triggers](/overview/skills/keyword)** to make skills activate automatically +- **[Understand skill structure](/sdk/guides/skill)** for the AgentSkills format + +### Creating New Skills +Source: https://docs.openhands.dev/overview/skills/creating.md + +Instead of repeating the same prompts or instructions in every conversation, create a skill that OpenHands can load automatically when needed. Skills transform one-time prompts into reusable, maintainable knowledge that improves over time. + +## Why Create Skills? + +**Before (repeating yourself):** +``` +Please analyze this code using our company's Python style guide: +- Use black for formatting +- Max line length 88 +- Use type hints for all functions +- Follow PEP 8 naming conventions +... +``` + +**After (using a skill):** +``` +Review this Python code +``` + +The skill triggers automatically and applies all your style guidelines consistently. + +## When to Create a Skill + +Create a skill when you find yourself: + +- Repeating the same instructions across multiple conversations +- Working with domain-specific knowledge (company policies, API schemas, workflows) +- Using the same multi-step procedures repeatedly +- Needing consistent behavior for specific tools or frameworks +- Sharing best practices across a team + +## Quick Start + +### Automated Approach: Let OpenHands Help + +To create a skill with guided assistance, ask OpenHands to help you: + +``` +Create a skill for [your use case] +``` + +or simply: + +``` +Write a new skill +``` + +The `skill-creator` skill (from the [OpenHands public skills library](https://github.com/OpenHands/extensions/tree/main/skills/skill-creator)) will guide you through an interactive process: +- Asks questions about your use cases and requirements +- Suggests appropriate skill structure (references, scripts, assets) +- Helps you write effective trigger keywords and descriptions +- Ensures you follow best practices automatically +- Creates the complete skill structure for you + +This is the recommended approach, especially when you're starting out. + +### Manual Approach + +If you prefer to create the skill structure manually: + +1. **Create the skill directory**: + ```bash + mkdir -p .agents/skills/my-skill + ``` + +2. **Create the SKILL.md file**: + ```bash + touch .agents/skills/my-skill/SKILL.md + ``` + +3. **Add content** (see structure and guidelines below) + +4. **Test it** by using a trigger keyword in your prompt + +## Determining Scope + +Before writing your skill, define its scope clearly: + +### Ask These Questions + +1. **What specific task does this skill handle?** + - ❌ Too broad: "Help with coding" + - ✅ Focused: "Lint Python code using ruff with our company rules" + +2. **What knowledge is required?** + - Code style guidelines + - API documentation + - Domain-specific schemas + - Multi-step procedures + +3. **What resources are needed?** + - Scripts for deterministic tasks + - Reference documents for detailed information + - Asset files for templates or boilerplate + +4. **Who will use this skill?** + - Just you (keep it simple) + - Your team (add more documentation) + - Public sharing (comprehensive examples) + +### Scope Examples + +**Good scope (focused):** +- "Configure pre-commit hooks for Python projects" +- "Generate financial reports using our SQL schema" +- "Deploy to our Kubernetes staging environment" + +**Poor scope (too broad):** +- "Help with Python" +- "Work with databases" +- "Deploy applications" + +## Choosing Name and Triggers + +The skill name and trigger keywords determine when OpenHands loads your skill. + +### Naming Your Skill + +Choose a clear, descriptive name: + +- **Use lowercase with hyphens**: `python-linting`, `k8s-deploy`, `api-docs` +- **Be specific**: `ruff-linter` not just `linter` +- **Match common terms**: Use vocabulary your users know + +### Defining Triggers + +Triggers are keywords that automatically activate your skill. Choose words users naturally say when they need this skill. + + + + List specific words or phrases that should activate the skill: + + ```yaml + --- + name: python-linting + description: This skill should be used when the user asks to "lint Python code", "check Python style", "run ruff", or mentions Python code quality. + triggers: + - lint + - linting + - ruff + - code quality + --- + ``` + + **Best practices:** + - Include 2-5 trigger keywords + - Use terms users actually say + - Include tool names (e.g., "ruff", "pytest") + - Include action words (e.g., "lint", "test", "deploy") + + + + The skill description is crucial for trigger matching. Write it in third person and include specific phrases: + + ```yaml + description: This skill should be used when the user asks to "deploy to Kubernetes", "apply K8s manifests", "check pod status", or mentions kubectl commands. Provides comprehensive Kubernetes deployment workflows. + ``` + + **Key elements:** + - Start with "This skill should be used when..." + - Quote specific user phrases: "deploy to Kubernetes" + - List concrete scenarios + - Mention related tools or frameworks + + + +### Examples of Good Triggers + +```yaml +# API integration skill +triggers: +- stripe +- payment +- checkout +``` + +```yaml +# Database skill +triggers: +- bigquery +- sql query +- data warehouse +``` + +```yaml +# Deployment skill +triggers: +- deploy +- kubernetes +- k8s +- kubectl +``` + +## Defining the Skill Body + +The skill body contains the instructions OpenHands will follow. Write in imperative form (command form) rather than second person. + +### Basic Structure + +```markdown +--- +name: skill-name +description: This skill should be used when... +triggers: +- keyword1 +- keyword2 +--- + +# Skill Title + +Brief overview of what this skill does. + +## Core Instructions + +Main procedures and guidelines. + +## Common Patterns + +Typical use cases and solutions. + +## Additional Resources + +(Optional) References to bundled files. +``` + +### Writing Style + +**Use imperative/infinitive form:** +✅ "Check the configuration file" +✅ "Validate input before processing" +✅ "Run tests after deployment" + +**Avoid second person:** +❌ "You should check the configuration" +❌ "You need to validate input" +❌ "You must run tests" + +### Keep It Focused + +**SKILL.md content:** +- Core concepts and workflows (1,500-2,000 words ideal) +- Essential procedures +- Quick reference information +- Pointers to additional resources + +**What NOT to include:** +- Exhaustive API documentation (use `references/` instead) +- Detailed edge cases (use `references/` instead) +- Long examples (use `references/` instead) + +## Best Practices and Tips + +### Use Numbered Step Workflows + +For multi-step procedures, use numbered lists: + +```markdown +## Deployment Workflow + +1. **Validate the configuration**: + ```bash + kubectl apply --dry-run=client -f deployment.yaml + ``` + +2. **Apply to staging**: + ```bash + kubectl apply -f deployment.yaml -n staging + ``` + +3. **Verify pod status**: + ```bash + kubectl get pods -n staging --watch + ``` + +4. **Check logs**: + ```bash + kubectl logs -f deployment/app-name -n staging + ``` +``` + +**Benefits:** +- Clear sequence for complex workflows +- Easy to follow and verify +- Reduces errors from skipped steps + +### Add Large Files as References + +Keep SKILL.md lean by moving detailed content to `references/`: + +``` +my-skill/ +├── SKILL.md # Core instructions (< 3,000 words) +└── references/ + ├── api-docs.md # Detailed API reference + ├── examples.md # Comprehensive examples + └── troubleshooting.md # Edge cases and fixes +``` + +**In SKILL.md, reference these files:** + +```markdown +## Additional Resources + +For detailed information, see: +- **`references/api-docs.md`** - Complete API documentation +- **`references/examples.md`** - Working code examples +- **`references/troubleshooting.md`** - Common issues and solutions +``` + +**Benefits:** +- Keeps context window smaller when skill loads +- OpenHands reads references only when needed +- Easier to maintain and update specific sections + +### Create Scripts for Predictable Steps + +For tasks that are repeatedly rewritten or need deterministic behavior, create executable scripts: + +``` +my-skill/ +├── SKILL.md +└── scripts/ + ├── validate_config.py + ├── deploy.sh + └── rollback.sh +``` + +**When to use scripts:** +- Same code being rewritten repeatedly +- Deterministic reliability required +- Complex parsing or validation +- Multi-step automation + +**Reference scripts in SKILL.md:** + +```markdown +## Validation + +Run the validation script: + +\`\`\`bash +python3 scripts/validate_config.py config.yaml +\`\`\` + +This checks: +- YAML syntax +- Required fields +- Value constraints +``` + +**Benefits:** +- Token efficient (scripts can run without being read) +- Deterministic behavior +- Reusable across projects +- Can be versioned and tested + +### Include Quick Reference Tables + +Use tables for configuration options, command flags, or status codes: + +```markdown +## Configuration Options + +| Option | Default | Description | +|--------|---------|-------------| +| `timeout` | 30s | Maximum wait time | +| `retries` | 3 | Number of retry attempts | +| `env` | production | Target environment | +``` + +### Provide Concrete Examples + +Show real examples, not abstract descriptions: + +```markdown +## Example Usage + +Deploy the web application: + +\`\`\`bash +# Build the image +docker build -t myapp:v1.0 . + +# Push to registry +docker push registry.example.com/myapp:v1.0 + +# Update Kubernetes deployment +kubectl set image deployment/web web=registry.example.com/myapp:v1.0 +\`\`\` +``` + +### Use Progressive Disclosure + +Structure information from simple to complex: + +1. **SKILL.md**: Essential workflows and core concepts +2. **references/**: Detailed patterns, advanced techniques, edge cases +3. **scripts/**: Automation for predictable tasks +4. **assets/**: Templates and boilerplate files + +## Complete Example + +Here's a complete skill for Python code review: + +``` +python-review/ +├── SKILL.md +├── references/ +│ ├── style-guide.md +│ └── common-issues.md +└── scripts/ + └── run-checks.sh +``` + +**SKILL.md:** +```markdown +--- +name: python-review +description: This skill should be used when the user asks to "review Python code", "check Python style", "lint Python", or requests code quality analysis. Provides comprehensive Python code review workflows. +triggers: +- python review +- code review +- lint python +- black +- ruff +--- + +# Python Code Review + +Review Python code using company standards and best practices. + +## Review Workflow + +1. **Run automated checks**: + \`\`\`bash + scripts/run-checks.sh + \`\`\` + +2. **Review linter output** for: + - Style violations (Black, Ruff) + - Type errors (mypy) + - Security issues (bandit) + +3. **Check code structure**: + - Function length (< 50 lines) + - Complexity (< 10 cyclomatic) + - Naming conventions + +4. **Verify tests**: + \`\`\`bash + pytest tests/ --cov=src --cov-report=term + \`\`\` + +## Style Guidelines + +- **Formatting**: Black with 88-character line limit +- **Linting**: Ruff with company config +- **Types**: Full type hints for public APIs +- **Docstrings**: Google style for all public functions + +## Additional Resources + +- **`references/style-guide.md`** - Complete style guide +- **`references/common-issues.md`** - Common mistakes and fixes +- **`scripts/run-checks.sh`** - Automated quality checks +``` + +## Testing Your Skill + +After creating your skill: + +1. **Verify structure**: + ```bash + ls .agents/skills/your-skill/SKILL.md + ``` + +2. **Check frontmatter**: Ensure YAML is valid with `name`, `description`, and `triggers` + +3. **Test trigger keywords**: Use a trigger word in a prompt: + ``` + Help me lint this Python code + ``` + +4. **Verify loading**: OpenHands should indicate the skill was loaded + +5. **Iterate**: Improve based on actual usage + + +For production deployments, see [Monitoring and Improving Skills](/overview/skills/monitoring) to track performance using logging, evaluation metrics, dashboarding, and automated feedback aggregation. + + +## Common Mistakes to Avoid + + +**Mistake 1: Vague triggers** +❌ `description: Helps with Python` +✅ `description: This skill should be used when the user asks to "lint Python code", "run black", or mentions Python code quality` + + + +**Mistake 2: Everything in SKILL.md** +❌ Single 10,000-word SKILL.md +✅ Focused SKILL.md (2,000 words) + references/ for details + + + +**Mistake 3: Using "you" in instructions** +❌ "You should validate the config" +✅ "Validate the config" + + + +**Mistake 4: Missing examples** +❌ Abstract descriptions only +✅ Concrete examples with actual commands + + +## Next Steps + +- **[Add your skill](/overview/skills/adding)** to your workspace +- **[Monitor skill performance](/overview/skills/monitoring)** in production +- **[Share skills](https://github.com/OpenHands/extensions)** with the community +- **[Learn the AgentSkills format](/sdk/guides/skill)** for advanced features +- **[Explore example skills](https://github.com/OpenHands/extensions)** for inspiration + +## Further Reading + +For advanced skill creation techniques and SDK integration: +- **[Monitoring Skills](/overview/skills/monitoring)** - Track performance and improve skills in production +- **[Plugins](/overview/plugins)** - Bundle multiple skills with hooks and MCP config +- **[SDK Skills Guide](/sdk/guides/skill)** - Programmatic skill creation +- **[Observability & Tracing](/sdk/guides/observability)** - OpenTelemetry configuration details +- **[GitHub Workflows](/sdk/guides/github-workflows/pr-review)** - Automate skills in CI/CD pipelines +- **[Skills Architecture](/sdk/arch/skill)** - Technical details +- **[Official Skill Registry](https://github.com/OpenHands/extensions)** - Community examples + ### Keyword-Triggered Skills Source: https://docs.openhands.dev/overview/skills/keyword.md @@ -38327,18 +40044,202 @@ Enclose the frontmatter in triple dashes (---) and include the following fields: ## Example -Keyword-triggered skill file example located at `.agents/skills/yummy.md`: -``` +Here's a simplified example of the `github` skill located at `.agents/skills/github/SKILL.md`: + +```markdown --- +name: github +description: Interact with GitHub repositories, pull requests, issues, and workflows using the GITHUB_TOKEN environment variable and GitHub CLI. Use when working with code hosted on GitHub or managing GitHub resources. triggers: -- yummyhappy -- happyyummy +- github +- git --- -The user has said the magic word. Respond with "That was delicious!" +You have access to an environment variable, `GITHUB_TOKEN`, which allows you to interact with +the GitHub API. + + +You can use `curl` with the `GITHUB_TOKEN` to interact with GitHub's API. +ALWAYS use the GitHub API for operations instead of a web browser. +ALWAYS use the `create_pr` tool to open a pull request + + +... (additional GitHub-specific instructions) +``` + + +**Context Management with Platform Skills**: OpenHands includes specialized skills for platforms like GitHub and GitLab that are only triggered when needed (e.g., when you mention "github" or "gitlab" in your prompt). This keeps your context clean and focused, loading platform-specific guidance only when working with those services. + + +[See more examples of keyword-triggered skills in the official OpenHands Skills Registry](https://github.com/OpenHands/extensions) + +### Monitoring and Improving Skills +Source: https://docs.openhands.dev/overview/skills/monitoring.md + +After creating and deploying a skill, monitor its performance to ensure it works correctly in production. This is particularly important for skills used in automated workflows like CI/CD pipelines. + +## The Monitoring Workflow + +Production skill monitoring follows a four-part process: + +1. **Logging** - Record agent behavior during skill execution +2. **Evaluating** - Measure performance using relevant metrics +3. **Dashboarding** - Visualize metrics over time +4. **Aggregating** - Use feedback to improve the skill + +## Logging Agent Behavior + +OpenHands includes OpenTelemetry-compatible instrumentation via the [Laminar](https://github.com/lmnr-ai/lmnr) library. Set up logging to capture agent traces during skill execution. + +### For SDK Users + +Set the `LMNR_PROJECT_API_KEY` environment variable to send traces to Laminar, or configure any OpenTelemetry-compatible backend: + +```bash +export LMNR_PROJECT_API_KEY="your-api-key" +``` + +See the [SDK Observability Guide](/sdk/guides/observability) for detailed configuration options including Honeycomb, Jaeger, Datadog, and other OTLP-compatible backends. + +### For GitHub Actions + +When using skills in GitHub workflows, add the API key to your action configuration. See the [PR review action example](https://github.com/OpenHands/extensions/blob/main/plugins/pr-review/action.yml) for reference. + +## Evaluating Performance + +Define metrics that reflect whether your skill is working correctly. Effective metrics measure actual outcomes rather than intermediate steps. + +### Example: PR Review Skill + +For a code review skill, measure suggestion acceptance rate: + ``` +suggestion_accuracy = ai_suggestions_reflected / ai_suggestions +``` + +Track: +- Number of suggestions made by the agent +- Number of suggestions incorporated by developers + +### Implementation Approach + +1. **Create an evaluation workflow** - Run after the main task completes (e.g., after PR merge) +2. **Collect relevant data** - Agent output, human responses, final results +3. **Use LLM as judge** - Feed data into a prompt that calculates metrics + +Example evaluation prompt excerpt: + +``` +### ai_suggestions +Count items where the body contains an actionable code suggestion +(look for code blocks, "suggestion:", specific changes to make). +Do NOT count general praise or approval-only comments. + +### ai_suggestions_reflected +Count suggestions that were incorporated. A suggestion is "reflected" if: +1. A human response indicates the suggestion was implemented, OR +2. The suggestion appears in the final diff +``` + +See the [evaluation action example](https://github.com/OpenHands/extensions/blob/main/.github/workflows/pr-review-evaluation.yml) for a complete implementation. + +## Dashboarding Metrics + +Visualize metrics over time to identify trends. With Laminar or similar platforms, create SQL queries that aggregate evaluation results. -[See examples of keyword-triggered skills in the official OpenHands Skills Registry](https://github.com/OpenHands/extensions) +Track: +- Metric trends (improving or degrading) +- Performance across different contexts (repos, file types, etc.) +- Comparison between prompt variations or models + +## Aggregating Feedback for Improvement + +Use language models to analyze patterns in evaluation results and suggest skill improvements. + +### Process + +1. **Collect evaluation data** - Aggregate analyses from recent runs +2. **Provide current skill content** - Include the existing SKILL.md +3. **Use a reasoning model** - Feed both into a long-context model (Gemini-2-Pro, Claude 3.5 Sonnet, etc.) +4. **Extract actionable suggestions** - Review model output for concrete improvements + +### Example Output + +Example output from aggregation: + +``` +### Issue: Context-Unaware Suggestions +The agent suggests technically correct changes that conflict with +repository conventions (e.g., suggesting integration tests when the +repo uses mocks). + +Frequency: ~15% of suggestions +Recommendation: Add repo-specific testing philosophy to references/ +``` + +## Deployment in Automated Workflows + +Skills can run automatically in CI/CD pipelines. The [OpenHands Extensions repository](https://github.com/OpenHands/extensions/tree/main/plugins) includes example GitHub Actions for common automation patterns. + +### Common Automation Use Cases + +- **PR review** - Run code review skills when PRs are marked "ready for review" +- **Issue triage** - Classify and label new issues +- **Code generation** - Generate boilerplate or documentation +- **Security scanning** - Check for vulnerabilities and suggest fixes + +See the [GitHub Workflows guide](/sdk/guides/github-workflows/pr-review) for SDK-based automation examples. + +## Best Practices + + + Select metrics that reflect real-world outcomes, not just intermediate steps. + + **Good metrics:** + - Suggestion acceptance rate (for code review) + - Issue classification accuracy (for triage) + - Time to resolution (for bug fixing) + + **Poor metrics:** + - Number of suggestions made + - Lines of code generated + - Tokens consumed + + + + Begin with basic logging before implementing complex evaluation pipelines. + + 1. Set up OpenTelemetry logging + 2. Review traces manually to understand agent behavior + 3. Identify patterns in successes and failures + 4. Design metrics based on observed patterns + 5. Automate evaluation + + + + Use evaluation results to make targeted improvements: + + - Low accuracy → Review skill instructions for clarity + - Inconsistent behavior → Add more specific examples + - Context errors → Expand references/ with domain knowledge + - Repetitive failures → Create scripts for deterministic tasks + + + + Track performance across different contexts: + + - **By repository** - Different repos may need different approaches + - **By file type** - Skills may work better on certain languages + - **By time** - Identify degradation or improvement trends + - **By model** - Compare different LLM backends + + +## Further Reading + +- **[SDK Observability Guide](/sdk/guides/observability)** - Detailed OpenTelemetry configuration +- **[GitHub Workflows](/sdk/guides/github-workflows/pr-review)** - Automate skills in CI/CD +- **[Hooks Guide](/sdk/guides/hooks)** - Event-driven skill execution +- **[Creating Skills](/overview/skills/creating)** - Skill creation fundamentals ### Organization and User Skills Source: https://docs.openhands.dev/overview/skills/org.md @@ -38537,8 +40438,8 @@ OpenHands Enterprise allows you to run AI coding agents directly on your own servers or in your private cloud. Unlike the SaaS version, the enterprise deployment gives you complete control over your AI development environment. - - Deploy OpenHands Enterprise on your own infrastructure in under an hour. + + Start your free 30-day trial and deploy OpenHands Enterprise on your own infrastructure in under an hour. No credit card required. @@ -38585,8 +40486,8 @@ agreements and API keys. OpenHands Enterprise integrates with your existing enterprise ecosystem: - **Identity & Access**: Enterprise SAML/SSO for centralized authentication -- **Source Control**: GitHub Enterprise, GitLab, Bitbucket -- **Project Management**: Jira and other ticketing systems +- **Source Control**: GitHub Enterprise, GitLab, and [Bitbucket Data Center](/enterprise/integrations/bitbucket-data-center) +- **Project Management**: [Jira Data Center](/enterprise/integrations/jira-data-center) and other ticketing systems - **Communication**: Slack integration for notifications and workflows ### Containerized Sandbox Runtime @@ -38642,56 +40543,176 @@ Enterprise customers receive: - [SDK Documentation](/sdk/index) — Build custom agents with the OpenHands SDK - [Pricing](https://openhands.dev/pricing) — Compare all OpenHands plans +### Analytics +Source: https://docs.openhands.dev/enterprise/analytics.md + +This guide walks you through setting up and using Laminar for Analytics in OpenHands Enterprise. +You'll opt into Analytics and configure conversations to automatically send traces to Laminar. + +## Who This Is For + +This guide is for users who want to explore analytics on their OpenHands Enterprise conversations. + +### Why Laminar? + +[Laminar](https://laminar.sh/) is an open source observability platform for AI agents like OpenHands. + +Use Laminar to view your conversation traces including prompts, tool calls, and answers. A trace is a record of what your agent did. + +Laminar can help you see where the agent went wrong. From traces, you can create signals. A signal is a natural language instruction to extract structured data from traces. Use signals to analyze recurring behavior across traces. You can then create better situations for prompting and measure them in Laminar. You can also analyze and improve your skills. + +For example, you can view all conversation traces related to a specific skill. + +Laminar can help you answer the following questions: +- On a trace, did the agent do a good job using the skill? +- On another trace, did the agent do a bad job? + +For more information on evaluating skills, see [Evaluating Agent Skills](https://www.openhands.dev/blog/evaluating-agent-skills). + +### Prerequisites + +Before you begin, make sure you completed the [Quick Start guide](/enterprise/quick-start). + +## Enable Analytics + +You should see an **Analytics Configuration** section on the application configuration page. + +Check the **Enable Analytics** box to have the installer set up and configure Laminar for analytics. + +![Configure Analytics](./images/laminar-configure-analytics.png) + +## Deploy + +OpenHands will begin deploying. You can expect the deployment status to transition from +**Missing** to **Unavailable** to **Ready**. This typically takes 10-15 minutes. + +![Deployment in progress](./images/laminar-deploy-in-progress.png) + +Click **Details** next to the deployment status to monitor individual resources. Resources +shown in orange are still deploying -- wait until all resources are ready. + +![Deployment status details](./images/laminar-deployment-status-details.png) + +## Access Laminar UI + +Once the deployment status shows **Ready**, navigate to `https://analytics.app.`. + +Click the **Continue with Keycloak** button: + +![Laminar Keycloak Auth](./images/laminar-keycloak-auth.png) + +## Create a Laminar project + +![Laminar Create Project](./images/laminar-create-project.png) + +Once a project has been created, Laminar is ready to listen for traces. + +![Laminar Listen Traces](./images/laminar-listen-traces.png) + +## Create an ingest only API Key + +Important: Always use ingest API keys when deploying. + +Create a key with the right permissions. Ingest only keys are recommended as they only have write access to write traces. They cannot be used to read data. + +![Configure Laminar Ingest Only Key](./images/laminar-ingest-only-key.png) + +## Set Laminar Project API Key to enable automatic conversation traces + +Set the ingest only key as the Laminar Project API Key in the Admin Console configuration: + +![Configure Laminar Project API Key](./images/laminar-configure-key.png) + +Click **Save config**. + +## Deploy Updated Configuration + +Deploy the config change after setting the Laminar Project API Key in the Admin Console. + +![Laminar Deploy Again](./images/laminar-deploy-again.png) + +Wait for the deployment to complete. + +## Start a conversation + +Navigate to the OpenHands UI at `https://app.`. Start a new conversation and try a prompt. + +![Start a Conversation](./images/laminar-openhands-conversation.png) + +Your conversations will now automatically send a trace to Laminar. + +![Laminar Trace](./images/laminar-trace.png) + + + +## Next Steps + + + + Get the most out of your AI coding agents with effective prompting techniques. + + + Reach out to the OpenHands team for deployment assistance or questions. + + + Explore the full OpenHands documentation for usage guides and features. + + + ### Enterprise vs. Open Source Source: https://docs.openhands.dev/enterprise/enterprise-vs-oss.md -This page describes the key differences between **OpenHands Local GUI** (open source) for individual developers and small teams running the Local GUI on their own machines, and **OpenHands Enterprise** for organizations that need advanced collaboration, integrations, and management capabilities. +This page describes the key differences between **OpenHands Agent Canvas** (open source) for individual developers and small teams running the Agent Canvas on their own machines, and **OpenHands Enterprise** for organizations that need advanced collaboration, integrations, and management capabilities. ## Feature Comparison -The table below highlights the key differences between the OpenHands Local GUI and OpenHands Enterprise offerings: +The table below highlights the key differences between the OpenHands Agent Canvas and OpenHands Enterprise offerings: -| Feature | Local GUI | OpenHands Enterprise | +| Feature | Agent Canvas | OpenHands Enterprise | |---------|-------------------|----------------------| | **Full breadth of agent functionality (sub-agents, MCP, skills, model agnosticism)** | ✅ | ✅ | -| **Where does the agent run?** | Local dev machines | Scalable, runtime sandboxes | -| **Scalability** *Run multiple concurrent agent conversations* | Limited by machine | Unlimited, on-demand | -| **'@OpenHands' in Slack and Jira** *Important for real-time resolution of bugs and feedback* | ❌ | ✅ | -| **'@OpenHands' in GitHub, GitLab, Bitbucket** *Important for real-time resolution of PR comments and failing tests* | ❌ | ✅ | -| **Multiple agent conversations in one place** *Give users visibility into all agent conversations* | ✅ | ✅ | -| **Share conversations** *Unlock collaboration use cases* | ❌ | ✅ | -| **Remote monitoring of agent conversations** *Enable Human-in-the-Loop operations for running agents* | ❌ Requires access to machine where agent is running | ✅ Monitor remotely from OpenHands Enterprise UI | -| **Multi-user management and RBAC** *Roll out to several users and teams* | ❌ | ✅ | -| **Automations** *Create scheduled and event-based workflows* | ❌ | ✅ | +| **Where does the agent run?** | Locally or on a custom backend | Scalable, Kubernetes runtimes | +| **Automations**
*Create scheduled and event-based workflows* | ✅ | ✅ | +| **'@OpenHands' in Slack and Jira**
*Important for real-time resolution of bugs and feedback* | Requires custom [Automation](/openhands/usage/automations/overview) | Native integration | +| **'@OpenHands' in GitHub, GitLab, Bitbucket**
*Important for real-time resolution of PR comments and failing tests* | Requires custom [Automation](/openhands/usage/automations/overview) | Native integration | +| **Share conversations**
*Unlock collaboration use cases* | ❌ | ✅ | +| **Multi-user Organizations and RBAC**
*Roll out to several users and teams* | ❌ | ✅ | +| **User and Organization Budgets**
*Monitor and control costs* | ❌ | ✅ | +| **Agent Observability Integrations**
*Centralized logging of conversations* | ❌ | ✅ Uses Laminar| +| **Private Plugin Marketplace**
*Publish reusable plugins for teams to use* | ❌ | ✅ | | **SAML** | ❌ | ✅ | | **REST APIs** | ❌ | ✅ | ## When to Choose Each Option -### OpenHands Local GUI +### OpenHands Agent Canvas -The OpenHands Local GUI is ideal for: +The OpenHands Agent Canvas is ideal for: - Individual developers exploring AI-assisted coding - Small teams with basic requirements - Self-hosted environments where you manage your own infrastructure -- Running OpenHands locally on your own machine +- Running OpenHands locally on your own machine using the Agent Canvas ### OpenHands Enterprise OpenHands Enterprise is the right choice when you need: -- **Team collaboration** — Share conversations and manage multiple users from a single platform +- **Multi-use RBAC** — Manage multiple users from a single platform - **Platform integrations** — Invoke OpenHands directly from Slack, Jira, GitHub, GitLab, or Bitbucket - **Scalability** — Run unlimited parallel agent conversations without local resource constraints - **Enterprise security** — SAML authentication, RBAC, and centralized audit logs -- **Remote monitoring** — Track agent progress in real-time from anywhere +- **Usage Monitoring** — Track and enforce budgets; monitor usage across all users ## Getting Started @@ -38706,6 +40727,494 @@ OpenHands Enterprise is the right choice when you need: +### External PostgreSQL +Source: https://docs.openhands.dev/enterprise/external-postgres.md + +OpenHands Enterprise can connect to an external PostgreSQL instance instead of using +the bundled database. This is useful when you have existing database infrastructure, +need specific backup/recovery procedures, or require high availability configurations. + +## PostgreSQL Version + +OpenHands Enterprise requires **PostgreSQL 16.4.0 or above**. PostgreSQL 17 is also supported. + +## Database Encoding Requirement + + + All databases used by OpenHands Enterprise **must use UTF8 encoding**. Using other encodings + (such as LATIN1) will cause database migrations to fail during installation or upgrades. + + +When creating databases manually or configuring your PostgreSQL instance, ensure UTF8 encoding +is set: + +```sql +-- Check current database encoding +SELECT datname, pg_encoding_to_char(encoding) AS encoding FROM pg_database; + +-- Create databases with explicit UTF8 encoding +CREATE DATABASE openhands WITH ENCODING 'UTF8'; +``` + +If your PostgreSQL server's default encoding is not UTF8, you may need to specify the encoding +explicitly when creating each database, or configure the server's default encoding. + +## Required Databases + +OpenHands Enterprise uses the following databases: + +| Database | Purpose | +|----------|---------| +| `openhands` | Core application data | +| `bitnami_keycloak` | Identity and access management | +| `litellm` | LLM proxy configuration and usage tracking | +| `runtime_api_db` | Runtime/sandbox management | +| `automations` | Scheduled tasks and automation workflows | + +## Database User Requirements + +The PostgreSQL user provided to OpenHands Enterprise needs specific privileges depending +on your preferred setup approach. + +### Option 1: Automatic Database Creation (Recommended) + +If you provide a database user with the `CREATEDB` privilege, OpenHands Enterprise will +automatically create all required databases during installation. + +```sql +-- Create user with CREATEDB privilege +CREATE USER openhands_user WITH PASSWORD 'your-secure-password' CREATEDB; +``` + +When the user creates its own databases, it will automatically have all necessary privileges +on them including the ability to manage the `public` schema. + +### Option 2: Manual Database Creation + +If your security policies prevent granting `CREATEDB`, you must manually create all +databases before installation: + +```sql +-- Create the databases with UTF8 encoding +CREATE DATABASE openhands WITH ENCODING 'UTF8'; +CREATE DATABASE bitnami_keycloak WITH ENCODING 'UTF8'; +CREATE DATABASE litellm WITH ENCODING 'UTF8'; +CREATE DATABASE runtime_api_db WITH ENCODING 'UTF8'; +CREATE DATABASE automations WITH ENCODING 'UTF8'; + +-- Create user without CREATEDB +CREATE USER openhands_user WITH PASSWORD 'your-secure-password'; + +-- Grant privileges on each database +GRANT ALL PRIVILEGES ON DATABASE openhands TO openhands_user; +GRANT ALL PRIVILEGES ON DATABASE bitnami_keycloak TO openhands_user; +GRANT ALL PRIVILEGES ON DATABASE litellm TO openhands_user; +GRANT ALL PRIVILEGES ON DATABASE runtime_api_db TO openhands_user; +GRANT ALL PRIVILEGES ON DATABASE automations TO openhands_user; + +-- Connect to each database and grant schema privileges +\c openhands +GRANT USAGE, CREATE ON SCHEMA public TO openhands_user; + +\c bitnami_keycloak +GRANT USAGE, CREATE ON SCHEMA public TO openhands_user; + +\c litellm +GRANT USAGE, CREATE ON SCHEMA public TO openhands_user; + +\c runtime_api_db +GRANT USAGE, CREATE ON SCHEMA public TO openhands_user; + +\c automations +GRANT USAGE, CREATE ON SCHEMA public TO openhands_user; +``` + +## Network Requirements + +Ensure your PostgreSQL instance is accessible from: + +- The OpenHands application pods/services +- The Keycloak service +- The LiteLLM proxy service +- The Runtime API service + +If using network policies or firewalls, allow connections on the PostgreSQL port (default: 5432) +from the OpenHands deployment. + +## Configuration + +When configuring OpenHands Enterprise, provide your external PostgreSQL connection details +in the Admin Console or Helm values: + +- **Host**: Your PostgreSQL server hostname or IP +- **Port**: PostgreSQL port (default: 5432) +- **Username**: The database user created above +- **Password**: The user's password + + + For production deployments, we recommend enabling SSL/TLS for database connections. + + +### Bitbucket Data Center +Source: https://docs.openhands.dev/enterprise/integrations/bitbucket-data-center.md + +This guide explains how to connect Bitbucket Data Center to an OpenHands +Enterprise Replicated installation. The integration lets users sign in with +Bitbucket Data Center, open repositories, and invoke OpenHands from pull request +comments. + +## Prerequisites + +- A Bitbucket Data Center administrator who can create an OAuth 2.0 Application + Link. +- A currently supported Bitbucket Data Center version with OAuth 2.0 Application + Links enabled. If the application link flow does not show + incoming OAuth 2.0 settings, verify your Bitbucket Data Center version and + application link settings. +- Repository administrator access for users who will install repository + webhooks from OpenHands. +- Network access from OpenHands to Bitbucket Data Center for API calls, and + from Bitbucket Data Center back to the OpenHands app URL for webhook delivery. +- If Bitbucket Data Center uses an internal or self-signed certificate, upload + the issuing CA in the OpenHands Enterprise Admin Console under **Additional + Trusted CA Certificates** before deploying. + +## Create a Bitbucket OAuth Application Link + +In Bitbucket Data Center, create an OAuth 2.0 Application Link for OpenHands. +The exact menu labels can vary by Bitbucket version, but this is usually under +**Administration > Application Links**. + +![Bitbucket Data Center Application Links settings](../images/bitbucket-data-center-application-links.png) + +Use this callback URL: + +```text +https://auth.app./realms/allhands/broker/bitbucket_data_center/endpoint +``` + +Replace only `` in the callback URL. Leave the rest of +the path unchanged. + +Use your actual auth hostname, for example: + +```text +https://auth.app.openhands.example.com/realms/allhands/broker/bitbucket_data_center/endpoint +``` + +OpenHands requests the `REPO_ADMIN` OAuth scope so it can list repositories and +install or refresh repository webhooks from the OpenHands UI. Copy the client ID +and client secret. You will paste them into the OpenHands Enterprise Admin +Console. + + + `REPO_ADMIN` is required so OpenHands can list repositories in the UI and + create or refresh the `OpenHands Resolver` repository webhook. OpenHands does + not perform other repository administration actions. + + +![Bitbucket Data Center incoming OAuth link form](../images/bitbucket-data-center-incoming-link.png) + +## Create a Bot Token + +This step is strongly recommended but technically optional. When a bot token is +configured, OpenHands posts comments and reactions as the bot account instead of +as the user. + +Create a dedicated Bitbucket Data Center user for OpenHands. For example, create +a user named `openhands` with an email address such as +`openhands-bot@company.com`. Grant this user access to all repositories where +OpenHands should post comments or reactions. Then create an HTTP access token +for that user with **Repository permissions** set to **Repository write**. Store +the token securely. You will need to paste the HTTP access token into the +OpenHands Enterprise Admin Console. + +![Bitbucket Data Center HTTP access token setup](../images/bitbucket-data-center-bot-token.png) + +## Configure the Admin Console + +Open the Replicated Admin Console for your OpenHands Enterprise installation and +go to the application configuration page. + +In **Bitbucket Data Center Authentication**: + +1. Enable **Bitbucket Data Center Authentication**. +2. Enter the **Bitbucket Data Center Domain**. +3. Enter the **Bitbucket Data Center Client ID**. +4. Enter the **Bitbucket Data Center Client Secret**. +5. Enter the **Bitbucket Data Center Bot Token** if you have one. +6. Save and deploy the updated configuration. + + + The Bitbucket Data Center Domain must be a bare hostname, for example + `bitbucket.example.com`. Do not include `https://`. + + +## Sign In with Bitbucket Data Center + +After the deployment is completed, users choose **Sign in with Bitbucket Data +Center** on your app's login page. + +On first sign-in, users may be asked to accept OpenHands terms and complete an +offline access flow. After sign-in, OpenHands stores the user's Bitbucket Data +Center token so it can list repositories and run resolver jobs as that user. + +## Install Repository Webhooks + +To trigger OpenHands on Bitbucket repositories, repository administrators can +install the OpenHands bot onto a repository from **Settings > Integrations** +within the OpenHands app. For each repository that should support `@openhands` +pull request comments, click **Install**. If a webhook already exists, click +**Reinstall** to refresh it. + +OpenHands creates or updates a repository webhook named `OpenHands Resolver`. +The webhook URL is connection-specific: + +```text +https://app./integration/bitbucket-dc/connections//events +``` + +OpenHands subscribes the webhook to repository and pull request events, +including pull request comment add, edit, and delete events. The signing secret +is generated and stored by OpenHands. + +## Trigger OpenHands from Bitbucket Data Center + +Open a pull request and add a comment containing `@openhands`. Inline pull +request comments are also supported. + +OpenHands starts a resolver job when: + +- The repository webhook is installed and active. +- The webhook delivery signature is valid. +- The mentioning Bitbucket user has signed in to OpenHands with Bitbucket Data + Center. +- The mentioning user has access to the repository. + +The resolver context includes the pull request title, description, current +comments, and the triggering comment. OpenHands replies back to the pull request +when the job starts and when it completes. + +## Troubleshooting + +| Symptom | Check | +| --- | --- | +| The Bitbucket Data Center login option is not visible | Confirm Bitbucket Data Center Authentication is enabled in the Admin Console and the deployment has been applied. | +| OAuth redirects fail | Confirm the callback URL exactly matches `https://auth.app./realms/allhands/broker/bitbucket_data_center/endpoint`. | +| Login tries to reach an invalid `https://https://...` URL | Remove `https://` from the Bitbucket Data Center Domain field in the Admin Console. | +| Repository webhook install fails | Confirm the user has repository admin access and the OAuth app grants `REPO_ADMIN`. | +| Webhook delivery reaches OpenHands but no job starts | Confirm the comment contains `@openhands`, the webhook is installed for that repository, and the mentioning Bitbucket user has signed in to OpenHands. | +| OpenHands cannot list Bitbucket repositories or install webhooks | Confirm the OpenHands cluster can reach the Bitbucket Data Center URL. | +| Bitbucket webhook deliveries do not reach OpenHands | Confirm the Bitbucket Data Center network can reach the OpenHands app URL. | +| Bitbucket API calls fail with TLS errors | Upload the Bitbucket Data Center CA certificate in **Additional Trusted CA Certificates** and redeploy. | + +### Jira Data Center +Source: https://docs.openhands.dev/enterprise/integrations/jira-data-center.md + +This guide explains how to connect Jira Data Center to an OpenHands Enterprise +Replicated installation. The integration lets users start OpenHands from Jira +issues by commenting with `@openhands` or by adding the `openhands` label. + +![Jira Data Center issue with OpenHands comments](../images/jira-data-center-openhands-comment.png) + +## Prerequisites + +- Jira Data Center administrator access to create users, personal access + tokens, OAuth applications, and webhooks. +- A currently supported Jira Data Center version with OAuth 2.0 incoming + application links enabled. If you do not see **External + application** and **Incoming** while creating the link, verify your Jira Data + Center version and application link settings. +- Network access from OpenHands to Jira Data Center for API calls, and from + Jira Data Center back to the OpenHands app URL for webhook delivery. +- If Jira Data Center uses an internal or self-signed certificate, upload the + issuing CA in the OpenHands Enterprise Admin Console under **Additional + Trusted CA Certificates** before deploying. + + + Jira Data Center setup is global for the OpenHands Enterprise installation. + Service account values are configured in the Admin Console. Webhook setup is + completed later inside OpenHands. + + +## Create a Bot Token + +Create a dedicated Jira user for OpenHands. For example, create a user named +`openhands` with an email address such as `openhands-bot@company.com`. +OpenHands uses this bot account to read issues, add comments, and add +reactions. Grant it access to all Jira projects where OpenHands should read and +comment. + +After you have granted the bot user access, sign in as the `openhands` user and +create a Jira personal access token from the user's profile. Store it securely. +You will need to paste the bot account email and PAT into the OpenHands +Enterprise Admin Console. + +![Jira Data Center personal access token permissions inherit the user's access](../images/jira-data-center-personal-access-token-permissions.png) + +## Create a Jira OAuth Application + +OAuth linking is recommended because it lets team members prove ownership of +their Jira account before using OpenHands to process their Jira events. + +In Jira Data Center, open **Administration > Applications > Application links** +and create a new link. When Jira asks what type of application to connect, +choose **External application**. For the direction, choose **Incoming** because +OpenHands connects to Jira during OAuth linking. + +![Jira Data Center create incoming OAuth link dialog](../images/jira-data-center-create-incoming-link.png) + +Configure the incoming link with this callback URL: + +```text +https://app./integration/jira-dc/callback +``` + +Use your actual app hostname, for example: + +```text +https://app.openhands.example.com/integration/jira-dc/callback +``` + +When prompted for OAuth scopes, select `WRITE` (allows OpenHands to link Jira +accounts and make Jira API calls within the user's granted Jira permissions). + +![Jira Data Center incoming OAuth link form](../images/jira-data-center-incoming-link-form.png) + +Copy the OAuth client ID and client secret and store them securely. You will +paste them into the Admin Console. + +![Jira Data Center OAuth credentials](../images/jira-data-center-oauth-credentials.png) + + + If your Jira Data Center installation cannot provide an OAuth application, you + can select email matching in the Admin Console instead. In that mode, + OpenHands links Jira users by matching their Jira email address to their + OpenHands email address. + + +## Configure the Admin Console + +Open the Replicated Admin Console for your OpenHands Enterprise installation and +go to the application configuration page. + +In **Jira Data Center Integration**: + +1. Enable **Jira Data Center Integration**. +2. Select the user linking method: + - **OAuth** is recommended. + - **Email match** can be used if OAuth is not available. +3. Enter the **Jira Data Center Service Account Email**. +4. Enter the **Jira Data Center Service Account PAT**. +5. If using OAuth, enter the **Jira Data Center Base URL**, including + `https://`. +6. If using OAuth, enter the **Jira Data Center OAuth Client ID** and + **OAuth Client Secret**. +7. Save and deploy the updated configuration. + + + The Jira Data Center Base URL must include the scheme, for example + `https://jira.example.com`. Do not enter only `jira.example.com`. + + +## Install the Jira Webhook + +After OpenHands is deployed, sign in to OpenHands and open +**Settings > Integrations > Jira Data Center**. + +If OAuth is enabled, click **Connect** and complete the Jira OAuth flow. Then set +up the webhook using one of the options below. + +### Automatic setup + +Choose **Install automatically** and paste a short-lived Jira admin PAT. +OpenHands uses this PAT once to call Jira's webhook API and then discards it. The +PAT is never stored. The automatic setup creates or updates a Jira global +webhook named `OpenHands` that points to this OpenHands URL. + +```text +https://app./integration/jira-dc/connections//events +``` + +### Manual setup + +Choose **Set it up in Jira myself**, then click **Generate webhook details**. +OpenHands saves the connection and shows a webhook URL and signing secret. + +![Jira Data Center manual webhook setup values](../images/jira-data-center-manual-webhook.png) + +Automatic setup is recommended. If you choose manual setup, create a global +webhook using the generated URL and signing secret. Jira must include the +request body and sign deliveries with the generated secret; if your Jira admin +UI does not support those settings, use automatic setup. + +Use these events: + +- `jira:issue_created` +- `jira:issue_updated` +- `jira:issue_deleted` +- `comment_created` +- `comment_updated` +- `comment_deleted` + +After saving the webhook in Jira, return to OpenHands and click +**I created the webhook**. + +## Link Users + +Each user who wants to invoke OpenHands from Jira should sign in to OpenHands and +connect their Jira Data Center account from **Settings > Integrations > Jira Data +Center**. + + + Webhook setup is global for the OpenHands Enterprise installation. Only the + user setting up the integration needs to install the webhook or provide a Jira + admin PAT. Other teammates only need to connect their own Jira Data Center + account from **Settings > Integrations** before using `@openhands` from Jira. + + +When a Jira event arrives, OpenHands resolves the Jira user to an OpenHands user. +If the Jira user has an OpenHands account but has not connected Jira Data +Center, OpenHands comments on the issue asking them to connect their account and +try again. +If no OpenHands account exists for the Jira user's email address, OpenHands +comments on the issue asking the user to sign up and try again. + +## Trigger OpenHands from Jira + +Create or update a Jira issue with clear requirements. Include the target +repository in the issue description or in a follow-up comment, for example: + +```text +Repository: Acme/web-app +``` + +OpenHands looks for a line starting with `Repository:` followed by the same +`org/repo` format configured in your connected source control provider. + +Then trigger OpenHands with either: + +- A Jira comment containing `@openhands`. +- The `openhands` label on the issue. + +The invoking OpenHands user must have access to the target repository written in +the Jira issue. If OpenHands cannot determine or access the repository, it +comments on the issue with the next step to fix the repository reference or +access. + +## Troubleshooting + +| Symptom | Check | +| --- | --- | +| The Jira Data Center card is not visible in OpenHands | Confirm Jira Data Center Integration is enabled in the Admin Console and the deployment has been applied. | +| OAuth redirects fail | Confirm the Jira OAuth callback URL exactly matches `https://app./integration/jira-dc/callback`. | +| Automatic webhook setup fails | Confirm the admin PAT belongs to a Jira user allowed to create global webhooks. | +| Webhook deliveries return `403` | Confirm the webhook URL and signing secret match the values generated by OpenHands. | +| Webhook deliveries reach OpenHands but no job starts | Confirm the Jira user is linked, the integration is active, the comment contains `@openhands` or the issue update added the `openhands` label, and the user has access to the repository. | +| OAuth, issue reads, or automatic webhook setup fail with connection errors | Confirm the OpenHands cluster can reach the Jira Data Center URL. | +| Jira webhook deliveries do not reach OpenHands | Confirm the Jira Data Center network can reach the OpenHands app URL. | +| Jira API calls fail with TLS errors | Upload the Jira Data Center CA certificate in **Additional Trusted CA Certificates** and redeploy. | + ### Kubernetes Installation Source: https://docs.openhands.dev/enterprise/k8s-install.md @@ -38755,6 +41264,10 @@ OpenHands Enterprise consists of several components deployed as Kubernetes workl ## Guides + + Configure OpenHands to use your own PostgreSQL database instead of the bundled instance. + + Configure memory, CPU, and storage for optimal performance. @@ -39049,7 +41562,7 @@ Source: https://docs.openhands.dev/enterprise/quick-start.md This guide walks you through trialing OpenHands Enterprise on your own infrastructure. You'll provision infrastructure (AWS Terraform or a manual VM setup), configure -GitHub for user authentication, and set up Anthropic as your LLM provider. +GitHub for user authentication, and configure your LLM provider. ## Who This Is For @@ -39065,7 +41578,12 @@ If you want to use OpenHands immediately without infrastructure setup: Before you begin, make sure you have the following ready: -- **Anthropic API key** from the [Anthropic Console](https://console.anthropic.com/) + + Sign up for a free 30-day OpenHands Enterprise trial account. You'll need this to access the installer dashboard. + + +- **LLM credentials** from your chosen provider, for example an Anthropic API + key from the [Anthropic Console](https://console.anthropic.com/) - **A GitHub account** with permission to create GitHub Apps - **An AWS account** with permissions to create EC2, VPC, and Route53 resources (**if using the AWS with Terraform path**) @@ -39164,6 +41682,7 @@ You will need a VM to host OpenHands Enterprise. Choose one of the options below |--------|---------| | `` | `openhands.example.com` | | `app.` | `app.openhands.example.com` | + | `analytics.app.` | `analytics.app.openhands.example.com` | | `auth.app.` | `auth.app.openhands.example.com` | | `llm-proxy.` | `llm-proxy.openhands.example.com` | | `runtime-api.` | `runtime-api.openhands.example.com` | @@ -39191,6 +41710,7 @@ All items below must be completed before running the installer: - Inbound ports are open: `80`, `443`, and `30000` - Outbound domains are reachable from the VM - GitHub App prerequisites are prepared +- (Optional) [External PostgreSQL](/enterprise/external-postgres) instance provisioned if using your own database Do not run the installer until preflight checks pass. @@ -39210,6 +41730,7 @@ Test DNS: for h in \ "${BASE_DOMAIN}" \ "app.${BASE_DOMAIN}" \ + "analytics.app.${BASE_DOMAIN}" \ "auth.app.${BASE_DOMAIN}" \ "llm-proxy.${BASE_DOMAIN}" \ "runtime-api.${BASE_DOMAIN}"; do @@ -39371,7 +41892,20 @@ You should now see the application configuration page. ### LLM Configuration -Enter your Anthropic API key from the [Anthropic Console](https://console.anthropic.com/). +Choose an LLM provider from the LLM Configuration dropdown and enter the details +from that provider. + +![LLM Configuration provider dropdown](./images/llm-configuration-provider-dropdown.png) + +For example, if you use Anthropic, enter your API key from the +[Anthropic Console](https://console.anthropic.com/). + +### Database Configuration + +By default, OpenHands Enterprise uses a bundled PostgreSQL database. If you need to use your +own PostgreSQL instance (for example, to integrate with existing database infrastructure or +meet specific backup/HA requirements), see [External PostgreSQL](/enterprise/external-postgres) +for setup instructions. ### GitHub Authentication @@ -39386,6 +41920,21 @@ Run our [script](https://github.com/All-Hands-AI/OpenHands-Cloud/tree/main/scrip Go back to the Installer Admin Console in your browser and enter the values from the Create GitHub App script output. For the private key, upload the file from the `keys` directory of the script location. +### Additional Integrations + +If your team uses Jira Data Center or Bitbucket Data Center, follow these guides +to configure Admin Console values before deployment and complete webhook setup +inside OpenHands after deployment. + + + + Configure Bitbucket Data Center login, repository access, bot identity, and pull request webhooks. + + + Configure Jira issue triggers, OAuth account linking, service account credentials, and Jira webhooks. + + + After filling in all fields, click **Continue** at the bottom of the page. ## Deploy and Verify diff --git a/llms.txt b/llms.txt index 1a94acc61..722355c30 100644 --- a/llms.txt +++ b/llms.txt @@ -9,7 +9,7 @@ from the OpenHands Software Agent SDK. - [ACP Agent](https://docs.openhands.dev/sdk/guides/agent-acp.md): Delegate to an ACP-compatible server (Claude Code, Gemini CLI, etc.) instead of calling an LLM directly. - [Agent](https://docs.openhands.dev/sdk/arch/agent.md): High-level architecture of the reasoning-action loop -- [Agent Server Package](https://docs.openhands.dev/sdk/arch/agent-server.md): HTTP API server for remote agent execution with workspace isolation, container orchestration, and multi-user support. +- [Agent Server Package](https://docs.openhands.dev/sdk/arch/agent-server.md): Install, run, and secure the OpenHands Agent Server for remote agent execution. - [Agent Settings](https://docs.openhands.dev/sdk/guides/agent-settings.md): Configure, serialize, and recreate agents from structured settings. - [Agent Skills & Context](https://docs.openhands.dev/sdk/guides/skill.md): Skills add specialized behaviors, domain knowledge, and context-aware triggers to your agent through structured prompts. - [API-based Sandbox](https://docs.openhands.dev/sdk/guides/agent-server/api-sandbox.md): Connect to hosted API-based agent server for fully managed infrastructure. @@ -47,7 +47,7 @@ from the OpenHands Software Agent SDK. - [LLM Registry](https://docs.openhands.dev/sdk/guides/llm-registry.md): Dynamically select and configure language models using the LLM registry. - [LLM Streaming](https://docs.openhands.dev/sdk/guides/llm-streaming.md): Stream LLM responses token-by-token for real-time display and interactive user experiences. - [LLM Subscriptions](https://docs.openhands.dev/sdk/guides/llm-subscriptions.md): Use your ChatGPT Plus/Pro subscription to access Codex models without consuming API credits. -- [Local Agent Server](https://docs.openhands.dev/sdk/guides/agent-server/local-server.md): Run agents through a local HTTP server with RemoteConversation for client-server architecture. +- [Local Agent Server](https://docs.openhands.dev/sdk/guides/agent-server/local-server.md): Install and run an OpenHands Agent Server on your machine, then connect to it from the SDK. - [MCP Integration](https://docs.openhands.dev/sdk/arch/mcp.md): High-level architecture of Model Context Protocol support - [Metrics Tracking](https://docs.openhands.dev/sdk/guides/metrics.md): Track token usage, costs, and latency metrics for your agents. - [Model Context Protocol](https://docs.openhands.dev/sdk/guides/mcp.md): Model Context Protocol (MCP) enables dynamic tool integration from external servers. Agents can discover and use MCP-provided tools automatically. @@ -110,7 +110,8 @@ from the OpenHands Software Agent SDK. - [API Keys Settings](https://docs.openhands.dev/openhands/usage/settings/api-keys-settings.md): View your OpenHands LLM key and create API keys to work with OpenHands programmatically. - [Application Settings](https://docs.openhands.dev/openhands/usage/settings/application-settings.md): Configure application-level settings for OpenHands. - [Automated Code Review](https://docs.openhands.dev/openhands/usage/use-cases/code-review.md): Set up automated PR reviews using OpenHands and the Software Agent SDK -- [Automations Overview](https://docs.openhands.dev/openhands/usage/automations/overview.md): Create scheduled tasks that run automatically in OpenHands Cloud and Enterprise. +- [Automated QA Testing](https://docs.openhands.dev/openhands/usage/use-cases/qa-changes.md): Validate pull request changes by actually running the software — not just reading code or running tests +- [Automations Overview](https://docs.openhands.dev/openhands/usage/automations/overview.md): Create scheduled tasks that run automatically in OpenHands. - [AWS Bedrock](https://docs.openhands.dev/openhands/usage/llms/aws-bedrock.md): OpenHands uses LiteLLM to make calls to AWS Bedrock models. You can find their documentation on using Bedrock as a provider [here](https://docs.litellm.ai/docs/providers/bedrock). - [Azure](https://docs.openhands.dev/openhands/usage/llms/azure-llms.md): OpenHands uses LiteLLM to make calls to Azure's chat models. You can find their documentation on using Azure as a provider [here](https://docs.litellm.ai/docs/providers/azure). - [Backend Architecture](https://docs.openhands.dev/openhands/usage/architecture/backend.md) @@ -134,7 +135,7 @@ from the OpenHands Software Agent SDK. - [Incident Triage](https://docs.openhands.dev/openhands/usage/use-cases/incident-triage.md): Using OpenHands to investigate and resolve production incidents - [Integrations Settings](https://docs.openhands.dev/openhands/usage/settings/integrations-settings.md): How to setup and modify the various integrations in OpenHands. - [Key Features](https://docs.openhands.dev/openhands/usage/key-features.md) -- [Language Model (LLM) Settings](https://docs.openhands.dev/openhands/usage/settings/llm-settings.md): This page goes over how to set the LLM to use in OpenHands. As well as some additional LLM settings. +- [Language Model (LLM) Settings](https://docs.openhands.dev/openhands/usage/settings/llm-settings.md): This page goes over how to set the LLM to use in OpenHands, including LLM profiles for switching models during conversations. - [LiteLLM Proxy](https://docs.openhands.dev/openhands/usage/llms/litellm-proxy.md): OpenHands supports using the [LiteLLM proxy](https://docs.litellm.ai/docs/proxy/quick_start) to access various LLM providers. - [Local LLMs](https://docs.openhands.dev/openhands/usage/llms/local-llms.md): When using a Local LLM, OpenHands may have limited functionality. It is highly recommended that you use GPUs to serve local models for optimal experience. - [Main Agent and Capabilities](https://docs.openhands.dev/openhands/usage/agents.md) @@ -176,14 +177,20 @@ from the OpenHands Software Agent SDK. - [Jira Cloud Integration](https://docs.openhands.dev/openhands/usage/cloud/project-management/jira-integration.md): Complete guide for setting up Jira Cloud integration with OpenHands Cloud, including service account creation, API token generation, webhook configuration, and workspace integration setup. - [Jira Data Center Integration (Coming soon...)](https://docs.openhands.dev/openhands/usage/cloud/project-management/jira-dc-integration.md): Complete guide for setting up Jira Data Center integration with OpenHands Cloud, including service account creation, personal access token generation, webhook configuration, and workspace integration setup. - [Linear Integration (Coming soon...)](https://docs.openhands.dev/openhands/usage/cloud/project-management/linear-integration.md): Complete guide for setting up Linear integration with OpenHands Cloud, including service account creation, API key generation, webhook configuration, and workspace integration setup. +- [Managing Members](https://docs.openhands.dev/openhands/usage/cloud/organizations/managing-members.md): How to invite users and manage team members in your organization. +- [Organization Settings](https://docs.openhands.dev/openhands/usage/cloud/organizations/settings.md): Configure shared resources and settings for your OpenHands organization. +- [Organizations Overview](https://docs.openhands.dev/openhands/usage/cloud/organizations/overview.md): Manage teams and collaborate with shared resources in OpenHands Cloud or OpenHands Enterprise. - [Plugin Launcher](https://docs.openhands.dev/openhands/usage/cloud/plugin-launcher.md): Use the OpenHands Cloud `/launch` route to open a conversation with plugins or skills pre-configured from a Git repository. - [Project Management Tool Integrations (Coming soon...)](https://docs.openhands.dev/openhands/usage/cloud/project-management/overview.md): Overview of OpenHands Cloud integrations with project management platforms including Jira Cloud, Jira Data Center, and Linear. Learn about setup requirements, usage methods, and troubleshooting. +- [Roles and Permissions](https://docs.openhands.dev/openhands/usage/cloud/organizations/roles-permissions.md): Understanding the different permission levels in OpenHands Organizations. - [Slack Integration](https://docs.openhands.dev/openhands/usage/cloud/slack-installation.md): This guide walks you through installing the OpenHands Slack app. ## OpenHands Overview +- [Adding New Skills](https://docs.openhands.dev/overview/skills/adding.md): Learn how to add existing skills to your OpenHands workspace from the official registry or custom repositories. - [Community](https://docs.openhands.dev/overview/community.md): Learn about the OpenHands community, mission, and values - [Contributing](https://docs.openhands.dev/overview/contributing.md): Join us in building OpenHands and the future of AI. Learn how to contribute to make a meaningful impact. +- [Creating New Skills](https://docs.openhands.dev/overview/skills/creating.md): Learn how to create reusable skills instead of repeating prompts, with best practices for structure, triggers, and content organization. - [FAQs](https://docs.openhands.dev/overview/faqs.md): Frequently asked questions about OpenHands. - [First Projects](https://docs.openhands.dev/overview/first-projects.md): So you've [run OpenHands](/overview/quickstart). Now what? - [General Skills](https://docs.openhands.dev/overview/skills/repo.md): General guidelines for OpenHands to work more effectively with the repository. @@ -191,13 +198,19 @@ from the OpenHands Software Agent SDK. - [Introduction](https://docs.openhands.dev/overview/introduction.md): Welcome to OpenHands, a community focused on AI-driven development - [Keyword-Triggered Skills](https://docs.openhands.dev/overview/skills/keyword.md): Keyword-triggered skills provide OpenHands with specific instructions that are activated when certain keywords appear in the prompt. This is useful for tailoring behavior based on particular tools, languages, or frameworks. - [Model Context Protocol (MCP)](https://docs.openhands.dev/overview/model-context-protocol.md): Model Context Protocol support across OpenHands platforms +- [Monitoring and Improving Skills](https://docs.openhands.dev/overview/skills/monitoring.md): Monitor skill performance in production using logging, evaluation metrics, dashboarding, and automated feedback aggregation. - [Organization and User Skills](https://docs.openhands.dev/overview/skills/org.md): Organizations and users can define skills that apply to all repositories belonging to the organization or user. - [Overview](https://docs.openhands.dev/overview/skills.md): Skills are specialized prompts that enhance OpenHands with domain-specific knowledge, expert guidance, and automated task handling. +- [Plugins](https://docs.openhands.dev/overview/plugins.md): Plugins bundle multiple agent components together—skills, hooks, MCP servers, agents, and commands—into reusable packages that extend OpenHands capabilities. - [Quick Start](https://docs.openhands.dev/overview/quickstart.md): Choose how you want to run OpenHands ## Other +- [Analytics](https://docs.openhands.dev/enterprise/analytics.md): Get started with LLM observability and tracing in OpenHands Enterprise. +- [Bitbucket Data Center](https://docs.openhands.dev/enterprise/integrations/bitbucket-data-center.md): Configure Bitbucket Data Center authentication and repository webhooks for OpenHands Enterprise. - [Enterprise vs. Open Source](https://docs.openhands.dev/enterprise/enterprise-vs-oss.md): Compare OpenHands Enterprise and Open Source offerings to choose the right option for your team +- [External PostgreSQL](https://docs.openhands.dev/enterprise/external-postgres.md): Configure OpenHands Enterprise to use your own PostgreSQL database +- [Jira Data Center](https://docs.openhands.dev/enterprise/integrations/jira-data-center.md): Configure Jira Data Center for OpenHands Enterprise. - [Kubernetes Installation](https://docs.openhands.dev/enterprise/k8s-install.md): Deploy OpenHands Enterprise into your own Kubernetes cluster using Helm - [OpenHands Enterprise](https://docs.openhands.dev/enterprise.md): Run AI coding agents on your own infrastructure with complete control - [Quick Start](https://docs.openhands.dev/enterprise/quick-start.md): Get started with a 30-day trial of OpenHands Enterprise. diff --git a/sdk/arch/agent-server.mdx b/sdk/arch/agent-server.mdx index 6a6aa6cb0..ab25e89f8 100644 --- a/sdk/arch/agent-server.mdx +++ b/sdk/arch/agent-server.mdx @@ -1,542 +1,177 @@ --- title: Agent Server Package -description: HTTP API server for remote agent execution with workspace isolation, container orchestration, and multi-user support. +description: Install, run, and secure the OpenHands Agent Server for remote agent execution. --- -The Agent Server package (`openhands.agent_server`) provides an HTTP API server for remote agent execution. It enables building multi-user systems, SaaS products, and distributed agent platforms. - -**Source**: [`openhands/agent_server/`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-agent-server/openhands/agent_server) - -## Purpose - -The Agent Server enables: -- **Remote execution**: Clients interact with agents via HTTP API -- **Multi-user isolation**: Each user gets isolated workspace -- **Container orchestration**: Manages Docker containers for workspaces -- **Centralized management**: Monitor and control all agents -- **Scalability**: Horizontal scaling with multiple servers - -## Architecture Overview - -```mermaid -graph TB - Client[Web/Mobile Client] -->|HTTPS| API[FastAPI Server] - - API --> Auth[Authentication] - API --> Router[API Router] - - Router --> WS[Workspace Manager] - Router --> Conv[Conversation Handler] - - WS --> Docker[Docker Manager] - Docker --> C1[Container 1
User A] - Docker --> C2[Container 2
User B] - Docker --> C3[Container 3
User C] - - Conv --> Agent[Software Agent SDK] - Agent --> C1 - Agent --> C2 - Agent --> C3 - - style Client fill:#e1f5fe - style API fill:#fff3e0 - style WS fill:#e8f5e8 - style Docker fill:#f3e5f5 - style Agent fill:#fce4ec -``` - -### Key Components - -**1. FastAPI Server** -- HTTP REST API endpoints -- Authentication and authorization -- Request validation -- WebSocket support for streaming - -**2. Workspace Manager** -- Creates and manages Docker containers -- Isolates workspaces per user -- Handles container lifecycle -- Manages resource limits - -**3. Conversation Handler** -- Routes requests to appropriate workspace -- Manages conversation state -- Handles concurrent requests -- Supports streaming responses - -**4. Docker Manager** -- Interfaces with Docker daemon -- Builds and pulls images -- Creates and destroys containers -- Monitors container health - -## Design Decisions - -### Why HTTP API? - -Alternative approaches considered: -- **gRPC**: More efficient but harder for web clients -- **WebSockets only**: Good for streaming but not RESTful -- **HTTP + WebSockets**: Best of both worlds - -**Decision**: HTTP REST for operations, WebSockets for streaming -- ✅ Works from any client (web, mobile, CLI) -- ✅ Easy to debug (curl, Postman) -- ✅ Standard authentication (API keys, OAuth) -- ✅ Streaming where needed +The Agent Server package (`openhands-agent-server`) runs the OpenHands Software Agent SDK behind an HTTP and WebSocket API. Use it when another service, such as an Agent Canvas backend, needs to start conversations, stream events, and run file or command operations in a workspace without embedding the SDK directly in the same process. -### Why Container Per User? +## When to Use It -Alternative approaches: -- **Shared container**: Multiple users in one container -- **Container per session**: New container each conversation -- **Container per user**: One container per user (chosen) +Use the Agent Server when you need: -**Decision**: Container per user -- ✅ Strong isolation between users -- ✅ Persistent workspace across sessions -- ✅ Better resource management -- ⚠️ More containers, but worth it for isolation +- A backend process that clients can reach over HTTP/WebSocket. +- A long-running service for conversations and workspace files. +- A server API that can be protected with a session API key. +- A clean boundary between your application backend and the agent runtime. -### Why FastAPI? - -Alternative frameworks: -- **Flask**: Simpler but less type-safe -- **Django**: Too heavyweight -- **FastAPI**: Modern, fast, type-safe (chosen) - -**Decision**: FastAPI -- ✅ Automatic API documentation (OpenAPI) -- ✅ Type validation with Pydantic -- ✅ Async support for performance -- ✅ WebSocket support built-in - -## API Design - -### Key Endpoints - -**Workspace Management** -``` -POST /workspaces Create new workspace -GET /workspaces/{id} Get workspace info -DELETE /workspaces/{id} Delete workspace -POST /workspaces/{id}/execute Execute command -``` - -**Conversation Management** -``` -POST /conversations Create conversation -GET /conversations/{id} Get conversation -POST /conversations/{id}/messages Send message -GET /conversations/{id}/stream Stream responses (WebSocket) -``` +For a single local script, the standalone SDK is usually simpler. For a backend service, web UI, automation system, or Agent Canvas-style deployment, run an Agent Server and connect to it from the client service. -**Health & Monitoring** -``` -GET /health Server health check -GET /metrics Prometheus metrics -``` +## Install -### Authentication +Install the SDK packages in the Python environment that will run the server: -**API Key Authentication** ```bash -curl -H "Authorization: Bearer YOUR_API_KEY" \ - https://agent-server.example.com/conversations +python -m venv .venv +source .venv/bin/activate +pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server ``` -**Per-user workspace isolation** -- API key → user ID mapping -- Each user gets separate workspace -- Users can't access each other's workspaces - -### Streaming Responses +If you are working from the `OpenHands/software-agent-sdk` repository, use the repository's normal `uv` setup instead: -**WebSocket for real-time updates** -```python -async with websocket_connect(url) as ws: - # Send message - await ws.send_json({"message": "Hello"}) - - # Receive events - async for event in ws: - if event["type"] == "message": - print(event["content"]) +```bash +git clone https://github.com/OpenHands/software-agent-sdk.git +cd software-agent-sdk +uv sync +uv run python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -**Why streaming?** -- Real-time feedback to users -- Show agent thinking process -- Better UX for long-running tasks +## Start a Local Server -## Deployment Models +For local-only use, bind to `127.0.0.1`: -### 1. Local Development - -Run server locally for testing: ```bash -# Start server -openhands-agent-server --port 8000 - -# Or with Docker -docker run -p 8000:8000 \ - -v /var/run/docker.sock:/var/run/docker.sock \ - ghcr.io/all-hands-ai/agent-server:latest +python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -**Use case**: Development and testing - -### 2. Single-Server Deployment +Check that the server is alive: -Deploy on one server (VPS, EC2, etc.): ```bash -# Install -pip install openhands-agent-server - -# Run with systemd/supervisor -openhands-agent-server \ - --host 0.0.0.0 \ - --port 8000 \ - --workers 4 +curl http://127.0.0.1:8000/health ``` -**Use case**: Small deployments, prototypes, MVPs - -### 3. Multi-Server Deployment - -Scale horizontally with load balancer: -``` - Load Balancer - | - +-------------+-------------+ - | | | - Server 1 Server 2 Server 3 - (Agents) (Agents) (Agents) - | | | - +-------------+-------------+ - | - Shared State Store - (Database, Redis, etc.) -``` +The interactive API docs are available at: -**Use case**: Production SaaS, high traffic, need redundancy - -### 4. Kubernetes Deployment - -Container orchestration with Kubernetes: -```yaml -apiVersion: apps/v1 -kind: Deployment -metadata: - name: agent-server -spec: - replicas: 3 - template: - spec: - containers: - - name: agent-server - image: ghcr.io/all-hands-ai/agent-server:latest - ports: - - containerPort: 8000 +```text +http://127.0.0.1:8000/docs ``` -**Use case**: Enterprise deployments, auto-scaling, high availability +## Secure the Server -## Resource Management +By default, the Agent Server starts without API authentication. Before exposing it to another process, container, host, or user, set at least one session API key. -### Container Limits +```bash +export OH_SESSION_API_KEYS_0="$(openssl rand -hex 32)" +export OH_SECRET_KEY="$(openssl rand -hex 32)" -Set per-workspace resource limits: -```python -# In server configuration -WORKSPACE_CONFIG = { - "resource_limits": { - "memory": "2g", # 2GB RAM - "cpus": "2", # 2 CPU cores - "disk": "10g" # 10GB disk - }, - "timeout": 300, # 5 min timeout -} +python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -**Why limit resources?** -- Prevent one user from consuming all resources -- Fair usage across users -- Protect server from runaway processes -- Cost control - -### Cleanup & Garbage Collection - -**Container lifecycle**: -- Containers created on first use -- Kept alive between requests (warm) -- Cleaned up after inactivity timeout -- Force cleanup on server shutdown - -**Storage management**: -- Old workspaces deleted automatically -- Disk usage monitored -- Alerts when approaching limits - -## Security Considerations - -### Multi-Tenant Isolation - -**Container isolation**: -- Each user gets separate container -- Containers can't communicate -- Network isolation (optional) -- File system isolation +Clients must send the session key in the `X-Session-API-Key` header: -**API isolation**: -- API keys mapped to users -- Users can only access their workspaces -- Server validates all permissions - -### Input Validation - -**Server validates**: -- API request schemas -- Command injection attempts -- Path traversal attempts -- File size limits - -**Defense in depth**: -- API validation -- Container validation -- Docker security features -- OS-level security - -### Network Security - -**Best practices**: -- HTTPS only (TLS certificates) -- Firewall rules (only port 443/8000) -- Rate limiting -- DDoS protection - -**Container networking**: -```python -# Disable network for workspace -WORKSPACE_CONFIG = { - "network_mode": "none" # No network access -} - -# Or allow specific hosts -WORKSPACE_CONFIG = { - "allowed_hosts": ["api.example.com"] -} +```bash +curl \ + -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ + http://127.0.0.1:8000/api/conversations ``` -## Monitoring & Observability - -### Health Checks +Use additional indexed variables when you need key rotation: ```bash -# Simple health check -curl https://agent-server.example.com/health - -# Response -{ - "status": "healthy", - "docker": "connected", - "workspaces": 15, - "uptime": 86400 -} -``` - -### Metrics - -**Prometheus metrics**: -- Request count and latency -- Active workspaces -- Container resource usage -- Error rates - -**Logging**: -- Structured JSON logs -- Per-request tracing -- Workspace events -- Error tracking - -### Alerting - -**Alert on**: -- Server down -- High error rate -- Resource exhaustion -- Container failures - -## Client Integration Architecture - -The SDK implements a **workspace-based dispatch pattern** for connecting to agent servers. The `Conversation` factory inspects the workspace type and returns the appropriate conversation implementation. - -```mermaid -flowchart LR - Conv["Conversation()"] --> Check{"Workspace Type?"} - Check -->|LocalWorkspace| Local["LocalConversation"] - Check -->|RemoteWorkspace| Remote["RemoteConversation"] - Remote -->|HTTP/WebSocket| Server["Agent Server"] - - style Conv fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px - style Remote fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px - style Server fill:#fff4df,stroke:#b7791f,stroke-width:2px +export OH_SESSION_API_KEYS_0="current-key" +export OH_SESSION_API_KEYS_1="next-key" ``` -### Workspace Types - -| Workspace | Conversation Type | Communication | -|-----------|------------------|---------------| -| [`LocalWorkspace`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/workspace/local.py) | [`LocalConversation`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/impl/local_conversation.py) | Direct execution | -| [`OpenHandsCloudWorkspace`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-workspace/openhands/workspace/cloud/workspace.py) | [`RemoteConversation`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py) | HTTPS + WebSocket to OpenHands Cloud | -| [`APIRemoteWorkspace`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-workspace/openhands/workspace/remote_api/workspace.py) | [`RemoteConversation`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py) | HTTPS + WebSocket to Runtime API | -| [`DockerWorkspace`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-workspace/openhands/workspace/docker/workspace.py) | [`RemoteConversation`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py) | HTTP + WebSocket to local container | - -### [`RemoteConversation`](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py) Responsibilities - -The `RemoteConversation` implementation handles all client-server concerns: - -- **Session management**: Authenticates and maintains connection to agent server -- **Event streaming**: WebSocket connection for real-time agent events -- **Request routing**: HTTP calls for conversation lifecycle operations -- **Reconnection**: Automatic retry logic for transient failures - -### Usage Examples - -For complete working examples with all required setup: - -- **[OpenHands Cloud Workspace](/sdk/guides/agent-server/cloud-workspace)** - Managed cloud infrastructure -- **[API-based Sandbox](/sdk/guides/agent-server/api-sandbox)** - Custom runtime environments -- **[Docker Sandbox](/sdk/guides/agent-server/docker-sandbox)** - Local containerized execution - -## Cost Considerations - -### Server Costs + + `OH_SECRET_KEY` encrypts sensitive values stored with conversations, including LLM API keys and secrets. Keep it stable across restarts. If it changes, previously encrypted values cannot be restored. + -**Compute**: CPU and memory for containers -- Each active workspace = 1 container -- Typically 1-2 GB RAM per workspace -- 0.5-1 CPU core per workspace +## Connect From Python -**Storage**: Workspace files and conversation state -- ~1-10 GB per workspace (depends on usage) -- Conversation history in database +Pass the server URL and API key to `Workspace`. The SDK sends the key as `X-Session-API-Key` and uses remote HTTP/WebSocket calls for workspace and conversation operations. -**Network**: API requests and responses -- Minimal (mostly text) -- Streaming adds bandwidth - -### Cost Optimization - -**1. Idle timeout**: Shutdown containers after inactivity -```python -WORKSPACE_CONFIG = { - "idle_timeout": 3600 # 1 hour -} -``` - -**2. Resource limits**: Don't over-provision ```python -WORKSPACE_CONFIG = { - "resource_limits": { - "memory": "1g", # Smaller limit - "cpus": "0.5" # Fractional CPU - } -} -``` - -**3. Shared resources**: Use single server for multiple low-traffic apps +import os -**4. Auto-scaling**: Scale servers based on demand +from pydantic import SecretStr -## When to Use Agent Server +from openhands.sdk import Conversation, LLM, Workspace +from openhands.tools.preset.default import get_default_agent -### Use Agent Server When: -✅ **Multi-user system**: Web app with many users -✅ **Remote clients**: Mobile app, web frontend -✅ **Centralized management**: Need to monitor all agents -✅ **Workspace isolation**: Users shouldn't interfere -✅ **SaaS product**: Building agent-as-a-service -✅ **Scaling**: Need to handle concurrent users +llm = LLM( + model=os.environ.get("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), + api_key=SecretStr(os.environ["LLM_API_KEY"]), +) +agent = get_default_agent(llm=llm, cli_mode=True) -**Examples**: -- Chatbot platforms -- Code assistant web apps -- Agent marketplaces -- Enterprise agent deployments +workspace = Workspace( + host="http://127.0.0.1:8000", + api_key=os.environ["OH_SESSION_API_KEYS_0"], + working_dir="workspace/project", +) -### Use Standalone SDK When: +conversation = Conversation(agent=agent, workspace=workspace) +conversation.send_message("Create a TODO.md file with three setup tasks.") +conversation.run() +conversation.close() +``` -✅ **Single-user**: Personal tool or script -✅ **Local execution**: Running on your machine -✅ **Full control**: Need programmatic access -✅ **Simpler deployment**: No server management -✅ **Lower latency**: No network overhead +## Expose It Safely -**Examples**: -- CLI tools -- Automation scripts -- Local development -- Desktop applications +If another service runs on the same machine, keep the server bound to `127.0.0.1` and let that service connect locally. -### Hybrid Approach +If another host must connect to the server: -Use SDK locally but RemoteAPIWorkspace for execution: -- Agent logic in your Python code -- Execution happens on remote server -- Best of both worlds +1. Set `OH_SESSION_API_KEYS_0` and `OH_SECRET_KEY`. +2. Bind the server to a reachable interface, for example `--host 0.0.0.0`. +3. Put the server behind TLS, a private network, or a trusted reverse proxy. +4. Restrict firewall access to only the services that need it. +5. Configure CORS only for browser clients that must call the server directly. -## Building Custom Agent Server +```bash +export OH_SESSION_API_KEYS_0="$(openssl rand -hex 32)" +export OH_SECRET_KEY="$(openssl rand -hex 32)" +export OH_ALLOW_CORS_ORIGINS_0="https://your-frontend.example.com" -The server is extensible for custom needs: +python -m openhands.agent_server --host 0.0.0.0 --port 8000 +``` -**Custom authentication**: -```python -from openhands.agent_server import AgentServer + + Do not expose an unauthenticated Agent Server on a public network. It can execute commands and read or write files in its configured workspace. + -class CustomAgentServer(AgentServer): - async def authenticate(self, request): - # Custom auth logic - return await oauth_verify(request) -``` +## Runtime Files -**Custom workspace configuration**: -```python -server = AgentServer( - workspace_factory=lambda user: DockerWorkspace( - image=f"custom-image-{user.tier}", - resource_limits=user.resource_limits - ) -) -``` +By default, the server stores conversation and workspace data under `workspace/` relative to the process working directory: -**Custom middleware**: -```python -@server.middleware -async def logging_middleware(request, call_next): - # Custom logging - response = await call_next(request) - return response +```text +workspace/ +|-- bash_events/ +|-- conversations/ +`-- project/ ``` -## Next Steps +Run the server from a directory with enough disk space and with permissions appropriate for the files the agent should access. -### For Usage Examples +## Useful Endpoints -- [Local Agent Server](/sdk/guides/agent-server/local-server) - Run locally -- [Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox) - Docker setup -- [API Sandboxed Server](/sdk/guides/agent-server/api-sandbox) - Remote API -- [Remote Agent Server Overview](/sdk/guides/agent-server/overview) - All options +- `GET /health` - Basic health check. +- `GET /ready` - Readiness check after startup initialization. +- `GET /server_info` - Version, uptime, and available tool information. +- `GET /docs` - Interactive OpenAPI documentation. +- `/api/*` - Authenticated conversation, workspace, file, command, and settings APIs when session API keys are configured. -### For Related Architecture +## Troubleshooting -- [Workspace Architecture](/sdk/arch/workspace) - RemoteAPIWorkspace details -- [SDK Architecture](/sdk/arch/sdk) - Core framework -- [Architecture Overview](/sdk/arch/overview) - System design +- **401 responses**: Send `X-Session-API-Key` with one of the configured `OH_SESSION_API_KEYS_*` values. +- **Secrets disappear after restart**: Set a stable `OH_SECRET_KEY` before starting the server. +- **Port already in use**: Change the port with `--port`. +- **Browser CORS errors**: Add the browser origin with `OH_ALLOW_CORS_ORIGINS_0`. +- **Cannot reach the server from another host**: Check `--host`, firewall rules, reverse proxy routing, and TLS configuration. -### For Implementation Details +## Next Steps -- [`openhands/agent_server/`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-agent-server/openhands/agent_server) - Server source -- [`examples/`](https://github.com/OpenHands/software-agent-sdk/tree/main/examples) - Working examples +- [Local Agent Server](/sdk/guides/agent-server/local-server) - Run and connect to a local server. +- [Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox) - Run the server in an isolated Docker workspace. +- [API Sandboxed Server](/sdk/guides/agent-server/api-sandbox) - Start agent servers through a hosted runtime API. +- [Agent Server API Reference](/sdk/guides/agent-server/api-reference/server-details/alive) - Browse the generated REST API docs. diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx index 541c5038c..70bdab51c 100644 --- a/sdk/guides/agent-server/local-server.mdx +++ b/sdk/guides/agent-server/local-server.mdx @@ -1,368 +1,147 @@ --- title: Local Agent Server -description: Run agents through a local HTTP server with RemoteConversation for client-server architecture. +description: Install and run an OpenHands Agent Server on your machine, then connect to it from the SDK. --- import RunExampleCode from "/sdk/shared-snippets/how-to-run-example.mdx"; -> A ready-to-run example is available [here](#ready-to-run-example)! - -The Local Agent Server demonstrates how to run a remote agent server locally and connect to it using `RemoteConversation`. This pattern is useful for local development, testing, and scenarios where you want to separate the client code from the agent execution environment. - -## Key Concepts - -### Managed API Server - -The ready-to-run example includes a `ManagedAPIServer` context manager that handles starting and stopping the server subprocess: - -```python icon="python" focus={1, 2, 4, 5} -class ManagedAPIServer: - """Context manager for subprocess-managed OpenHands API server.""" - - def __enter__(self): - """Start the API server subprocess.""" - self.process = subprocess.Popen( - [ - "python", - "-m", - "openhands.agent_server", - "--port", - str(self.port), - "--host", - self.host, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env={"LOG_JSON": "true", **os.environ}, - ) -``` - -The server starts with `python -m openhands.agent_server` and automatically handles health checks to ensure it's ready before proceeding. +Run a local Agent Server when you want a backend process to host OpenHands conversations over HTTP and WebSocket. This is the simplest setup for testing Agent Canvas-style backends, local integrations, and client-server SDK applications. -### Remote Workspace +## Install -When connecting to a remote server, you need to provide a `Workspace` that connects to that server: +Create a Python environment and install the server package with the SDK packages it uses: -```python icon="python" -workspace = Workspace(host=server.base_url) -result = workspace.execute_command("pwd") +```bash +python -m venv .venv +source .venv/bin/activate +pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server ``` -When `host` is provided, the `Workspace` returns an instance of `RemoteWorkspace` ([source](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/workspace/workspace.py)). -The `Workspace` object communicates with the remote server's API to execute commands and manage files. - -### RemoteConversation +## Start Without Authentication -When you pass a remote `Workspace` to `Conversation`, it automatically becomes a `RemoteConversation` ([source](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/conversation.py)): +For local development on your own machine, start the server on loopback: -```python icon="python" focus={1, 3, 7} -conversation = Conversation( - agent=agent, - workspace=workspace, - callbacks=[event_callback], - visualize=True, -) -assert isinstance(conversation, RemoteConversation) +```bash +python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -`RemoteConversation` handles communication with the remote agent server over WebSocket for real-time event streaming. - -### Event Callbacks +Verify that it is running: -Callbacks receive events in real-time as they happen on the remote server: - -```python icon="python" -def event_callback(event): - """Callback to capture events for testing.""" - event_type = type(event).__name__ - logger.info(f"🔔 Callback received event: {event_type}\n{event}") - received_events.append(event) - event_tracker["last_event_time"] = time.time() +```bash +curl http://127.0.0.1:8000/health ``` -This enables monitoring agent activity, tracking progress, and implementing custom event handling logic. +Open the API docs at `http://127.0.0.1:8000/docs`. + + + This unauthenticated mode is only appropriate for local development. Do not bind an unauthenticated server to a public or shared network interface. + -### Conversation State +## Start With an API Key -The conversation state provides access to all events and status: +Set a session API key before starting the server: -```python icon="python" -# Count total events using state.events -total_events = len(conversation.state.events) -logger.info(f"📈 Total events in conversation: {total_events}") +```bash +export OH_SESSION_API_KEYS_0="$(openssl rand -hex 32)" +export OH_SECRET_KEY="$(openssl rand -hex 32)" -# Get recent events (last 5) using state.events -all_events = conversation.state.events -recent_events = all_events[-5:] if len(all_events) >= 5 else all_events +python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -This allows you to inspect the conversation history, analyze agent behavior, and build custom monitoring tools. +Requests to `/api/*` must include the session key: -## Ready-to-run Example +```bash +curl \ + -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ + http://127.0.0.1:8000/api/conversations +``` - -This example is available on GitHub: [examples/02_remote_agent_server/01_convo_with_local_agent_server.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/01_convo_with_local_agent_server.py) - +Use `OH_SECRET_KEY` whenever you want conversations and stored settings to survive restarts with their sensitive values intact. Keep this value stable and store it in your normal secret manager. + +## Connect From the SDK -This example shows how to programmatically start a local agent server and interact with it through a `RemoteConversation`: +Use `Workspace(host=..., api_key=...)` to connect SDK code to the server: -```python icon="python" expandable examples/02_remote_agent_server/01_convo_with_local_agent_server.py +```python import os -import subprocess -import sys -import tempfile -import threading -import time from pydantic import SecretStr -from openhands.sdk import LLM, Conversation, RemoteConversation, Workspace, get_logger -from openhands.sdk.event import ConversationStateUpdateEvent +from openhands.sdk import Conversation, LLM, Workspace from openhands.tools.preset.default import get_default_agent -logger = get_logger(__name__) - - -def _stream_output(stream, prefix, target_stream): - """Stream output from subprocess to target stream with prefix.""" - try: - for line in iter(stream.readline, ""): - if line: - target_stream.write(f"[{prefix}] {line}") - target_stream.flush() - except Exception as e: - print(f"Error streaming {prefix}: {e}", file=sys.stderr) - finally: - stream.close() - - -class ManagedAPIServer: - """Context manager for subprocess-managed OpenHands API server.""" - - def __init__(self, port: int = 8000, host: str = "127.0.0.1"): - self.port: int = port - self.host: str = host - self.process: subprocess.Popen[str] | None = None - self.base_url: str = f"http://{host}:{port}" - self.stdout_thread: threading.Thread | None = None - self.stderr_thread: threading.Thread | None = None - - def __enter__(self): - """Start the API server subprocess.""" - print(f"Starting OpenHands API server on {self.base_url}...") - - # Start the server process - self.process = subprocess.Popen( - [ - "python", - "-m", - "openhands.agent_server", - "--port", - str(self.port), - "--host", - self.host, - ], - stdout=subprocess.PIPE, - stderr=subprocess.PIPE, - text=True, - env={"LOG_JSON": "true", **os.environ}, - ) - - # Start threads to stream stdout and stderr - assert self.process is not None - assert self.process.stdout is not None - assert self.process.stderr is not None - self.stdout_thread = threading.Thread( - target=_stream_output, - args=(self.process.stdout, "SERVER", sys.stdout), - daemon=True, - ) - self.stderr_thread = threading.Thread( - target=_stream_output, - args=(self.process.stderr, "SERVER", sys.stderr), - daemon=True, - ) - - self.stdout_thread.start() - self.stderr_thread.start() - - # Wait for server to be ready - max_retries = 30 - for i in range(max_retries): - try: - import httpx - - response = httpx.get(f"{self.base_url}/health", timeout=1.0) - if response.status_code == 200: - print(f"API server is ready at {self.base_url}") - return self - except Exception: - pass - - assert self.process is not None - if self.process.poll() is not None: - # Process has terminated - raise RuntimeError( - "Server process terminated unexpectedly. " - "Check the server logs above for details." - ) - - time.sleep(1) - - raise RuntimeError(f"Server failed to start after {max_retries} seconds") - - def __exit__(self, exc_type, exc_val, exc_tb): - """Stop the API server subprocess.""" - if self.process: - print("Stopping API server...") - self.process.terminate() - try: - self.process.wait(timeout=5) - except subprocess.TimeoutExpired: - print("Force killing API server...") - self.process.kill() - self.process.wait() - - # Wait for streaming threads to finish (they're daemon threads, - # so they'll stop automatically) - # But give them a moment to flush any remaining output - time.sleep(0.5) - print("API server stopped.") - - -api_key = os.getenv("LLM_API_KEY") -assert api_key is not None, "LLM_API_KEY environment variable is not set." - llm = LLM( - usage_id="agent", - model=os.getenv("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), - base_url=os.getenv("LLM_BASE_URL"), - api_key=SecretStr(api_key), + model=os.environ.get("LLM_MODEL", "anthropic/claude-sonnet-4-5-20250929"), + api_key=SecretStr(os.environ["LLM_API_KEY"]), ) -title_gen_llm = LLM( - usage_id="title-gen-llm", - model=os.getenv("LLM_MODEL", "openhands/gpt-5-mini-2025-08-07"), - base_url=os.getenv("LLM_BASE_URL"), - api_key=SecretStr(api_key), +agent = get_default_agent(llm=llm, cli_mode=True) + +workspace = Workspace( + host="http://127.0.0.1:8000", + api_key=os.environ.get("OH_SESSION_API_KEYS_0"), + working_dir="workspace/project", ) -# Use managed API server -with ManagedAPIServer(port=8001) as server: - # Create agent - agent = get_default_agent( - llm=llm, - cli_mode=True, # Disable browser tools for simplicity - ) - - # Define callbacks to test the WebSocket functionality - received_events = [] - event_tracker = {"last_event_time": time.time()} - - def event_callback(event): - """Callback to capture events for testing.""" - event_type = type(event).__name__ - logger.info(f"🔔 Callback received event: {event_type}\n{event}") - received_events.append(event) - event_tracker["last_event_time"] = time.time() - - # Create RemoteConversation with callbacks - # NOTE: Workspace is required for RemoteConversation - # Use a temp directory that exists and is accessible in CI environments - temp_workspace_dir = tempfile.mkdtemp(prefix="agent_server_demo_") - workspace = Workspace(host=server.base_url, working_dir=temp_workspace_dir) - result = workspace.execute_command("pwd") - logger.info( - f"Command '{result.command}' completed with exit code {result.exit_code}" - ) - logger.info(f"Output: {result.stdout}") - - conversation = Conversation( - agent=agent, - workspace=workspace, - callbacks=[event_callback], - ) - assert isinstance(conversation, RemoteConversation) - - try: - logger.info(f"\n📋 Conversation ID: {conversation.state.id}") - - # Send first message and run - logger.info("📝 Sending first message...") - conversation.send_message( - "Read the current repo and write 3 facts about the project into FACTS.txt." - ) - - # Generate title using a specific LLM - title = conversation.generate_title(max_length=60, llm=title_gen_llm) - logger.info(f"Generated conversation title: {title}") - - logger.info("🚀 Running conversation...") - conversation.run() - - logger.info("✅ First task completed!") - logger.info(f"Agent status: {conversation.state.execution_status}") - - # Wait for events to stop coming (no events for 2 seconds) - logger.info("⏳ Waiting for events to stop...") - while time.time() - event_tracker["last_event_time"] < 2.0: - time.sleep(0.1) - logger.info("✅ Events have stopped") - - logger.info("🚀 Running conversation again...") - conversation.send_message("Great! Now delete that file.") - conversation.run() - logger.info("✅ Second task completed!") - - # Demonstrate state.events functionality - logger.info("\n" + "=" * 50) - logger.info("📊 Demonstrating State Events API") - logger.info("=" * 50) - - # Count total events using state.events - total_events = len(conversation.state.events) - logger.info(f"📈 Total events in conversation: {total_events}") - - # Get recent events (last 5) using state.events - logger.info("\n🔍 Getting last 5 events using state.events...") - all_events = conversation.state.events - recent_events = all_events[-5:] if len(all_events) >= 5 else all_events - - for i, event in enumerate(recent_events, 1): - event_type = type(event).__name__ - timestamp = getattr(event, "timestamp", "Unknown") - logger.info(f" {i}. {event_type} at {timestamp}") - - # Let's see what the actual event types are - logger.info("\n🔍 Event types found:") - event_types = set() - for event in recent_events: - event_type = type(event).__name__ - event_types.add(event_type) - for event_type in sorted(event_types): - logger.info(f" - {event_type}") - - # Print all ConversationStateUpdateEvent - logger.info("\n🗂️ ConversationStateUpdateEvent events:") - for event in conversation.state.events: - if isinstance(event, ConversationStateUpdateEvent): - logger.info(f" - {event}") - - cost = conversation.conversation_stats.get_combined_metrics().accumulated_cost - print(f"EXAMPLE_COST: {cost}") - - finally: - # Clean up - print("\n🧹 Cleaning up conversation...") - conversation.close() +conversation = Conversation(agent=agent, workspace=workspace) +conversation.send_message("Create a NOTES.md file with three facts about this project.") +conversation.run() +conversation.close() +``` + +If the server was started without `OH_SESSION_API_KEYS_0`, omit `api_key`. + +## Connect From Another Service + +For a backend service, configure two values: + +- The Agent Server URL, for example `http://127.0.0.1:8000`. +- The session API key, passed as `X-Session-API-Key` on API requests or as `api_key` when using the SDK `Workspace`. + +Keep the Agent Server bound to `127.0.0.1` when the backend runs on the same machine. If the backend runs on another host, use a private network or reverse proxy, enable TLS, and restrict network access to trusted callers. + +## Configure CORS + +Most backends call the Agent Server from server-side code and do not need CORS. If browser code calls the Agent Server directly, allow that origin: + +```bash +export OH_ALLOW_CORS_ORIGINS_0="https://your-frontend.example.com" ``` +Localhost and `127.0.0.1` origins are allowed automatically. + +## Run From the SDK Repository + +When developing `OpenHands/software-agent-sdk` itself: + +```bash +git clone https://github.com/OpenHands/software-agent-sdk.git +cd software-agent-sdk +uv sync +uv run python -m openhands.agent_server --host 127.0.0.1 --port 8000 +``` + +## Ready-to-Run Example + + + This example is available on GitHub: [examples/02_remote_agent_server/01_convo_with_local_agent_server.py](https://github.com/OpenHands/software-agent-sdk/blob/main/examples/02_remote_agent_server/01_convo_with_local_agent_server.py). + + +The example starts a local Agent Server subprocess, waits for it to become healthy, connects with `Workspace(host=...)`, and runs a `RemoteConversation`. + +## Troubleshooting + +- **401 Unauthorized**: Check that the client sends `X-Session-API-Key` and that it matches `OH_SESSION_API_KEYS_0`. +- **Secrets are missing after restart**: Set a stable `OH_SECRET_KEY` before starting the server. +- **The server is reachable locally but not from another machine**: Use `--host 0.0.0.0` only behind trusted network controls, then check firewall and proxy rules. +- **CORS errors in a browser**: Set `OH_ALLOW_CORS_ORIGINS_0` to the browser app origin. +- **Port conflict**: Start with another port, for example `--port 8001`. + ## Next Steps -- **[Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox)** - Run server in Docker for isolation -- **[API Sandboxed Server](/sdk/guides/agent-server/api-sandbox)** - Connect to hosted API service -- **[Agent Server Overview](/sdk/guides/agent-server/overview)** - Architecture and implementation details -- **[Agent Server Package Architecture](/sdk/arch/agent-server)** - Remote execution architecture +- [Agent Server Package](/sdk/arch/agent-server) - Installation, security, and operational guidance. +- [Docker Sandboxed Server](/sdk/guides/agent-server/docker-sandbox) - Run the server in an isolated Docker workspace. +- [API Sandboxed Server](/sdk/guides/agent-server/api-sandbox) - Start hosted runtime workspaces. +- [Agent Server API Reference](/sdk/guides/agent-server/api-reference/server-details/alive) - Browse generated endpoint docs. From 0c6a38ccf5fc01ba4cfdca7c743b3271600f33c1 Mon Sep 17 00:00:00 2001 From: openhands Date: Thu, 28 May 2026 22:18:53 +0000 Subject: [PATCH 2/3] docs: validate agent server setup instructions Co-authored-by: openhands --- llms-full.txt | 1034 +++++++++++----------- sdk/arch/agent-server.mdx | 15 +- sdk/guides/agent-server/local-server.mdx | 15 +- 3 files changed, 546 insertions(+), 518 deletions(-) diff --git a/llms-full.txt b/llms-full.txt index d9f13d765..15355495e 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -924,7 +924,7 @@ Initialize the conversation. * `hook_config` – Optional hook configuration to auto-wire session hooks. If plugins are loaded, their hooks are combined with this config. * `max_iteration_per_run` – Maximum number of iterations per run - * `visualizer` – + * `visualizer` – Visualization configuration. Can be: - ConversationVisualizerBase subclass: Class to instantiate @@ -1107,7 +1107,7 @@ Remote conversation proxy that talks to an agent server. ‘monologue’, ‘alternating_pattern’. Values are integers representing the number of repetitions before triggering. * `hook_config` – Optional hook configuration for session hooks - * `visualizer` – + * `visualizer` – Visualization configuration. Can be: - ConversationVisualizerBase subclass: Class to instantiate @@ -3243,7 +3243,7 @@ Features: Simple tool with no parameters: : class FinishTool(ToolDefinition[FinishAction, FinishObservation]): : @classmethod - def create(cls, conv_state=None, + def create(cls, conv_state=None, `
` ``` ** @@ -3257,7 +3257,7 @@ Complex tool with initialization parameters: : class TerminalTool(ToolDefinition[TerminalAction, : TerminalObservation]): @classmethod - def create(cls, conv_state, + def create(cls, conv_state, `
` ``` ** @@ -3918,18 +3918,18 @@ flowchart TB Events["Event History"] Context["Agent Context
Skills + Prompts"] end - + subgraph Core["Agent Core"] Condense["Condenser
History compression"] Reason["LLM Query
Generate actions"] Security["Security Analyzer
Risk assessment"] end - + subgraph Execution[" "] Tools["Tool Executor
Action → Observation"] Results["Observation Events"] end - + Events --> Condense Context -.->|Skills| Reason Condense --> Reason @@ -3937,11 +3937,11 @@ flowchart TB Security --> Tools Tools --> Results Results -.->|Feedback| Events - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Reason primary class Condense,Security secondary class Tools tertiary @@ -3967,55 +3967,55 @@ flowchart TB Start["step() called"] Pending{"Pending
actions?"} ExecutePending["Execute pending actions"] - + HasCondenser{"Has
condenser?"} Condense["Call condenser.condense()"] CondenseResult{"Result
type?"} EmitCondensation["Emit Condensation event"] UseView["Use View events"] UseRaw["Use raw events"] - + Query["Query LLM with messages"] ContextExceeded{"Context
window
exceeded?"} EmitRequest["Emit CondensationRequest"] - + Parse{"Response
type?"} CreateActions["Create ActionEvents"] CreateMessage["Create MessageEvent"] - + Confirmation{"Need
confirmation?"} SetWaiting["Set WAITING_FOR_CONFIRMATION"] - + Execute["Execute actions"] Observe["Create ObservationEvents"] - + Return["Return"] - + Start --> Pending Pending -->|Yes| ExecutePending --> Return Pending -->|No| HasCondenser - + HasCondenser -->|Yes| Condense HasCondenser -->|No| UseRaw Condense --> CondenseResult CondenseResult -->|Condensation| EmitCondensation --> Return CondenseResult -->|View| UseView --> Query UseRaw --> Query - + Query --> ContextExceeded ContextExceeded -->|Yes| EmitRequest --> Return ContextExceeded -->|No| Parse - + Parse -->|Tool calls| CreateActions Parse -->|Message| CreateMessage --> Return - + CreateActions --> Confirmation Confirmation -->|Yes| SetWaiting --> Return Confirmation -->|No| Execute - + Execute --> Observe Observe --> Return - + style Query fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Condense fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Confirmation fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4050,26 +4050,26 @@ The agent applies `AgentContext` which includes **skills** and **prompts** to sh %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR Context["AgentContext"] - + subgraph Skills["Skills"] Repo["repo
Always active"] Knowledge["knowledge
Trigger-based"] end SystemAug["System prompt prefix/suffix
Per-conversation"] System["Prompt template
Per-conversation"] - + subgraph Application["Applied to LLM"] SysPrompt["System Prompt"] UserMsg["User Messages"] end - + Context --> Skills Context --> SystemAug Repo --> SysPrompt Knowledge -.->|When triggered| UserMsg System --> SysPrompt SystemAug --> SysPrompt - + style Context fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Repo fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Knowledge fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4092,26 +4092,26 @@ Tools follow a **strict action-observation pattern**: flowchart TB LLM["LLM generates tool_call"] Convert["Convert to ActionEvent"] - + Decision{"Confirmation
mode?"} Defer["Store as pending"] - + Execute["Execute tool"] Success{"Success?"} - + Obs["ObservationEvent
with result"] Error["ObservationEvent
with error"] - + LLM --> Convert Convert --> Decision - + Decision -->|Yes| Defer Decision -->|No| Execute - + Execute --> Success Success -->|Yes| Obs Success -->|No| Error - + style Convert fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Execute fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Decision fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4143,14 +4143,14 @@ flowchart LR LLM["LLM"] Tools["Tools"] Context["AgentContext"] - + Conv -->|.step calls| Agent Agent -->|Reads events| Conv Agent -->|Query| LLM Agent -->|Execute| Tools Context -.->|Skills and Context| Agent Agent -.->|New events| Conv - + style Agent fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Conv fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style LLM fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4189,12 +4189,17 @@ For a single local script, the standalone SDK is usually simpler. For a backend ## Install -Install the SDK packages in the Python environment that will run the server: +Install the same release of the SDK packages in the Python environment that will run the server: ```bash python -m venv .venv source .venv/bin/activate -pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server +export OPENHANDS_VERSION="1.24.0" +pip install -U \ + "openhands-sdk==$OPENHANDS_VERSION" \ + "openhands-tools==$OPENHANDS_VERSION" \ + "openhands-workspace==$OPENHANDS_VERSION" \ + "openhands-agent-server==$OPENHANDS_VERSION" ``` If you are working from the `OpenHands/software-agent-sdk` repository, use the repository's normal `uv` setup instead: @@ -4226,6 +4231,8 @@ The interactive API docs are available at: http://127.0.0.1:8000/docs ``` +If `SESSION_API_KEY` or `OH_SESSION_API_KEYS_*` is already set in your shell, the server will require that key for `/api/*` requests. Unset those variables for unauthenticated local-only testing. + ## Secure the Server By default, the Agent Server starts without API authentication. Before exposing it to another process, container, host, or user, set at least one session API key. @@ -4237,12 +4244,12 @@ export OH_SECRET_KEY="$(openssl rand -hex 32)" python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -Clients must send the session key in the `X-Session-API-Key` header: +Clients must send the session key in the `X-Session-API-Key` header. This request returns the conversation count when the key is accepted: ```bash curl \ -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ - http://127.0.0.1:8000/api/conversations + http://127.0.0.1:8000/api/conversations/count ``` Use additional indexed variables when you need key rotation: @@ -4371,40 +4378,40 @@ flowchart TB subgraph Interface["Abstract Interface"] Base["CondenserBase
Abstract base"] end - + subgraph Implementations["Concrete Implementations"] NoOp["NoOpCondenser
No compression"] LLM["LLMSummarizingCondenser
LLM-based"] Pipeline["PipelineCondenser
Multi-stage"] end - + subgraph Process["Condensation Process"] View["View
Event history"] Check["should_condense()?"] Condense["get_condensation()"] Result["View | Condensation"] end - + subgraph Output["Condensation Output"] CondEvent["Condensation Event
Summary metadata"] NewView["Condensed View
Reduced tokens"] end - + Base --> NoOp Base --> LLM Base --> Pipeline - + View --> Check Check -->|Yes| Condense Check -->|No| Result Condense --> CondEvent CondEvent --> NewView NewView --> Result - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base primary class LLM,Pipeline secondary class Check,Condense tertiary @@ -4434,9 +4441,9 @@ flowchart LR View["View"] NoOp["NoOpCondenser"] Same["Same View"] - + View --> NoOp --> Same - + style NoOp fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -4455,7 +4462,7 @@ flowchart LR AddToHistory["Add to History"] NextStep["Next Step: View.from_events()"] NewView["Condensed View"] - + View --> Check Check -->|Yes| Summarize Summarize --> Summary @@ -4463,7 +4470,7 @@ flowchart LR Metadata --> AddToHistory AddToHistory --> NextStep NextStep --> NewView - + style Check fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Summarize fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style NewView fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4494,9 +4501,9 @@ flowchart LR C2["Condenser 2"] C3["Condenser 3"] Final["Final View"] - + View --> C1 --> C2 --> C3 --> Final - + style C1 fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style C2 fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style C3 fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4519,9 +4526,9 @@ flowchart TB Check1["condenser.condense(view)"] Trigger1["should_condense()?"] end - + Agent1 --> Build1 --> Check1 --> Trigger1 - + style Check1 fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -4540,9 +4547,9 @@ flowchart TB NextStep["Next Agent Step"] Trigger2["condense() detects request"] end - + Error --> Request --> NextStep --> Trigger2 - + style Request fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` **Manual Trigger:** @@ -4556,11 +4563,11 @@ flowchart TB %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart TB Start["Agent calls condense(view)"] - + Decision{"should_condense?"} - + ReturnView["Return View
Agent proceeds"] - + Extract["Select Events to Keep/Forget"] Generate["LLM Generates Summary"] Create["Create Condensation Event"] @@ -4570,7 +4577,7 @@ flowchart TB FilterEvents["Filter forgotten events"] InsertSummary["Insert summary at offset"] NewView["New condensed view"] - + Start --> Decision Decision -->|No| ReturnView Decision -->|Yes| Extract @@ -4582,7 +4589,7 @@ flowchart TB NextStep --> FilterEvents FilterEvents --> InsertSummary InsertSummary --> NewView - + style Decision fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Generate fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Create fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4614,14 +4621,14 @@ flowchart LR View["View
LLMConvertibleEvents"] Convert["events_to_messages()"] LLM["LLM Input"] - + Events --> FromEvents FromEvents --> Filter Filter --> Insert Insert --> View View --> Convert Convert --> LLM - + style View fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style FromEvents fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -4644,12 +4651,12 @@ flowchart LR Event["Condensation Event
forgotten_event_ids"] Applied["View.from_events()"] New["New View
~60 events + summary"] - + Old -.->|Summarized| Summary Summary --> Event Event --> Applied Applied --> New - + style Event fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Summary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -4669,33 +4676,33 @@ flowchart LR flowchart TB View["Current View
120+ events"] Check["Count Events"] - + Compare{"Count >
max_size?"} - + Keep["Keep All Events"] - + Split["Split Events"] Head["Head
First 4 events"] Middle["Middle
~56 events"] Tail["Tail
~56 events"] Summarize["LLM Summarizes Middle"] Result["Head + Summary + Tail
~60 events total"] - + View --> Check Check --> Compare - + Compare -->|Under| Keep Compare -->|Over| Split - + Split --> Head Split --> Middle Split --> Tail - + Middle --> Summarize Head --> Result Summarize --> Result Tail --> Result - + style Compare fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Split fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Summarize fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4718,13 +4725,13 @@ flowchart LR Condenser["Condenser"] State["Conversation State"] Events["Event Log"] - + Agent -->|"View.from_events()"| State State -->|View| Agent Agent -->|"condense(view)"| Condenser Condenser -->|"View | Condensation"| Agent Agent -->|Adds Condensation| Events - + style Condenser fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Events fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4765,7 +4772,7 @@ The Conversation system has four primary responsibilities: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 25, "rankSpacing": 35}} }%% flowchart LR User["User Code"] - + subgraph Factory[" "] Entry["Conversation()"] end @@ -4774,26 +4781,26 @@ flowchart LR Local["LocalConversation
Direct execution"] Remote["RemoteConversation
Via agent-server API"] end - + subgraph Core[" "] State["ConversationState
• agent
workspace • stats • ..."] EventLog["ConversationState.events
Event storage"] end - + User --> Entry Entry -.->|LocalWorkspace| Local Entry -.->|RemoteWorkspace| Remote - + Local --> State Remote --> State - + State --> EventLog - + classDef factory fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef impl fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef core fill:#fff4df,stroke:#b7791f,stroke-width:2px classDef service fill:#e9f9ef,stroke:#2f855a,stroke-width:1.5px - + class Entry factory class Local,Remote impl class State,EventLog core @@ -4821,11 +4828,11 @@ flowchart LR Check{Workspace Type?} Local["LocalConversation
Agent runs in-process"] Remote["RemoteConversation
Agent runs via API"] - + Input --> Check Check -->|str or LocalWorkspace| Local Check -->|RemoteWorkspace| Remote - + style Input fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Local fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Remote fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px @@ -4847,13 +4854,13 @@ flowchart TB Start["State Update Request"] Lock["Acquire FIFO Lock"] Decision{New Event?} - + StateOnly["Update State Fields
stats, status, metadata"] EventPath["Append to Event Log
messages, actions, observations"] - + Callback["Trigger Callbacks"] Release["Release Lock"] - + Start --> Lock Lock --> Decision Decision -->|No| StateOnly @@ -4861,7 +4868,7 @@ flowchart TB StateOnly --> Callback EventPath --> Callback Callback --> Release - + style Decision fill:#fff4df,stroke:#b7791f,stroke-width:2px style EventPath fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style StateOnly fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px @@ -4948,13 +4955,13 @@ flowchart LR WS["Workspace"] Tools["Tools"] LLM["LLM"] - + Conv -->|Delegates to| Agent Conv -->|Configures| WS Agent -.->|Updates| Conv Agent -->|Uses| Tools Agent -->|Queries| LLM - + style Conv fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style WS fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -4982,52 +4989,52 @@ The **OpenHands Software Agent SDK** is part of the [OpenHands V1](https://openh ## Optional Isolation over Mandatory Sandboxing -**V0 Challenge:** -Every tool call in V0 executed in a sandboxed Docker container by default. While this guaranteed reproducibility and security, it also created friction — the agent and sandbox ran as separate processes, states diverged easily, and multi-tenant workloads could crash each other. +**V0 Challenge:** +Every tool call in V0 executed in a sandboxed Docker container by default. While this guaranteed reproducibility and security, it also created friction — the agent and sandbox ran as separate processes, states diverged easily, and multi-tenant workloads could crash each other. Moreover, with the rise of the Model Context Protocol (MCP), which assumes local execution and direct access to user environments, V0's rigid isolation model became incompatible. -**V1 Principle:** -**Sandboxing should be opt-in, not universal.** -V1 unifies agent and tool execution within a single process by default, aligning with MCP's local-execution model. +**V1 Principle:** +**Sandboxing should be opt-in, not universal.** +V1 unifies agent and tool execution within a single process by default, aligning with MCP's local-execution model. When isolation is needed, the same stack can be transparently containerized, maintaining flexibility without complexity. ## Stateless by Default, One Source of Truth for State -**V0 Challenge:** +**V0 Challenge:** V0 relied on mutable Python objects and dynamic typing, which led to silent inconsistencies — failed session restores, version drift, and non-deterministic behavior. Each subsystem tracked its own transient state, making debugging and recovery painful. -**V1 Principle:** -**Keep everything stateless, with exactly one mutable state.** -All components (agents, tools, LLMs, and configurations) are immutable Pydantic models validated at construction. +**V1 Principle:** +**Keep everything stateless, with exactly one mutable state.** +All components (agents, tools, LLMs, and configurations) are immutable Pydantic models validated at construction. The only mutable entity is the [conversation state](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/event/conversation_state.py), a single source of truth that enables deterministic replay and robust persistence across sessions or distributed systems. ## Clear Boundaries between Agent and Applications -**V0 Challenge:** -The same codebase powered the CLI, web interface, and integrations (e.g., Github, Gitlab, etc). Over time, application-specific conditionals and prompts polluted the agent core, making it brittle. +**V0 Challenge:** +The same codebase powered the CLI, web interface, and integrations (e.g., Github, Gitlab, etc). Over time, application-specific conditionals and prompts polluted the agent core, making it brittle. Heavy research dependencies and benchmark integrations further bloated production builds. -**V1 Principle:** -**Maintain strict separation of concerns.** -V1 divides the system into stable, isolated layers: the [SDK (agent core)](/sdk/arch/overview#1-sdk-%E2%80%93-openhands-sdk), [tools (set of tools)](/sdk/arch/overview#2-tools-%E2%80%93-openhands-tools), [workspace (sandbox)](/sdk/arch/overview#3-workspace-%E2%80%93-openhands-workspace), and [agent server (server that runs inside sandbox)](/sdk/arch/overview#4-agent-server-%E2%80%93-openhands-agent-server). +**V1 Principle:** +**Maintain strict separation of concerns.** +V1 divides the system into stable, isolated layers: the [SDK (agent core)](/sdk/arch/overview#1-sdk-%E2%80%93-openhands-sdk), [tools (set of tools)](/sdk/arch/overview#2-tools-%E2%80%93-openhands-tools), [workspace (sandbox)](/sdk/arch/overview#3-workspace-%E2%80%93-openhands-workspace), and [agent server (server that runs inside sandbox)](/sdk/arch/overview#4-agent-server-%E2%80%93-openhands-agent-server). Applications communicate with the agent via APIs rather than embedding it directly, ensuring research and production can evolve independently. ## Composable Components for Extensibility -**V0 Challenge:** +**V0 Challenge:** Because agent logic was hard-coded into the core application, extending behavior (e.g., adding new tools or entry points) required branching logic for different entrypoints. This rigidity limited experimentation and discouraged contributions. -**V1 Principle:** -**Everything should be composable and safe to extend.** -Agents are defined as graphs of interchangeable components—tools, prompts, LLMs, and contexts—each described declaratively with strong typing. +**V1 Principle:** +**Everything should be composable and safe to extend.** +Agents are defined as graphs of interchangeable components—tools, prompts, LLMs, and contexts—each described declaratively with strong typing. Developers can reconfigure capabilities (e.g., swap toolsets, override prompts, add delegation logic) without modifying core code, preserving stability while fostering rapid innovation. ### Events @@ -5053,37 +5060,37 @@ The Event System has four primary responsibilities: flowchart TB Base["Event
Base class"] LLMBase["LLMConvertibleEvent
Abstract base"] - + subgraph LLMTypes["LLM-Convertible Events
Visible to the LLM"] Message["MessageEvent
User/assistant text"] Action["ActionEvent
Tool calls"] System["SystemPromptEvent
Initial system prompt"] CondSummary["CondensationSummaryEvent
Condenser summary"] - + ObsBase["ObservationBaseEvent
Base for tool responses"] Observation["ObservationEvent
Tool results"] UserReject["UserRejectObservation
User rejected action"] AgentError["AgentErrorEvent
Agent error"] end - + subgraph Internals["Internal Events
NOT visible to the LLM"] ConvState["ConversationStateUpdateEvent
State updates"] CondReq["CondensationRequest
Request compression"] Cond["Condensation
Compression result"] Pause["PauseEvent
User pause"] end - + Base --> LLMBase Base --> Internals LLMBase --> LLMTypes ObsBase --> Observation ObsBase --> UserReject ObsBase --> AgentError - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base,LLMBase,Message,Action,SystemPromptEvent primary class ObsBase,Observation,UserReject,AgentError secondary class ConvState,CondReq,Cond,Pause tertiary @@ -5136,12 +5143,12 @@ flowchart LR Group["Group ActionEvents
by llm_response_id"] Convert["Convert to Messages"] LLM["LLM Input"] - + Events --> Filter Filter --> Group Group --> Convert Convert --> LLM - + style Filter fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Group fill:#fff4df,stroke:#b7791f,stroke-width:2px style Convert fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px @@ -5208,13 +5215,13 @@ flowchart LR Conversation["Conversation"] Tools["Tools"] Services["Auxiliary Services"] - + Agent -->|Reads| Events Agent -->|Writes| Events Conversation -->|Manages| Events Tools -->|Creates| Events Events -.->|Stream| Services - + style Events fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Conversation fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5280,37 +5287,37 @@ flowchart TB JSON["JSON Files
config/llm.json"] Code["Programmatic
LLM(...)"] end - + subgraph Core["Core LLM"] Model["LLM Model
Pydantic configuration"] Pipeline["Request Pipeline
Retry, timeout, telemetry"] end - + subgraph Backend["LiteLLM Backend"] Providers["100+ Providers
OpenAI, Anthropic, etc."] end - + subgraph Output["Telemetry"] Usage["Token Usage"] Cost["Cost Tracking"] Latency["Latency Metrics"] end - + Env --> Model JSON --> Model Code --> Model - + Model --> Pipeline Pipeline --> Providers - + Pipeline --> Usage Pipeline --> Cost Pipeline --> Latency - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Model primary class Pipeline secondary class LiteLLM tertiary @@ -5341,10 +5348,10 @@ flowchart LR Code["Python Code"] LLM["LLM(model=...)"] Agent["Agent"] - + Code --> LLM LLM --> Agent - + style LLM fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -5405,30 +5412,30 @@ If you need to include secrets in JSON, use `llm.model_dump_json(exclude_none=Tr flowchart TB Request["completion() or responses() call"] Validate["Validate Config"] - + Attempt["LiteLLM Request"] Success{"Success?"} - + Retry{"Retries
remaining?"} Wait["Exponential Backoff"] - + Telemetry["Record Telemetry"] Response["Return Response"] Error["Raise Error"] - + Request --> Validate Validate --> Attempt Attempt --> Success - + Success -->|Yes| Telemetry Success -->|No| Retry - + Retry -->|Yes| Wait Retry -->|No| Error - + Wait --> Attempt Telemetry --> Response - + style Attempt fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Retry fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Telemetry fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5452,37 +5459,37 @@ In addition to the standard chat completion API, the LLM system supports [OpenAI %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart TB Check{"Model supports
Responses API?"} - + subgraph Standard["Standard Path"] ChatFormat["Format as
Chat Messages"] ChatCall["litellm.completion()"] end - + subgraph ResponsesPath["Responses Path"] RespFormat["Format as
instructions + input[]"] RespCall["litellm.responses()"] end - + ChatResponse["ModelResponse"] RespResponse["ResponsesAPIResponse"] - + Parse["Parse to Message"] Return["LLMResponse"] - + Check -->|No| ChatFormat Check -->|Yes| RespFormat - + ChatFormat --> ChatCall RespFormat --> RespCall - + ChatCall --> ChatResponse RespCall --> RespResponse - + ChatResponse --> Parse RespResponse --> Parse - + Parse --> Return - + style RespFormat fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style RespCall fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5509,7 +5516,7 @@ Software Agent SDK uses LiteLLM for provider abstraction: flowchart TB SDK["Software Agent SDK"] LiteLLM["LiteLLM"] - + subgraph Providers["100+ Providers"] OpenAI["OpenAI"] Anthropic["Anthropic"] @@ -5517,14 +5524,14 @@ flowchart TB Azure["Azure"] Others["..."] end - + SDK --> LiteLLM LiteLLM --> OpenAI LiteLLM --> Anthropic LiteLLM --> Google LiteLLM --> Azure LiteLLM --> Others - + style LiteLLM fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style SDK fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5571,23 +5578,23 @@ LLM requests automatically collect metrics: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR Request["LLM Request"] - + subgraph Metrics Tokens["Token Counts
Input/Output"] Cost["Cost
USD"] Latency["Latency
ms"] end - + Events["Event Log"] - + Request --> Tokens Request --> Cost Request --> Latency - + Tokens --> Events Cost --> Events Latency --> Events - + style Metrics fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Events fill:#fff4df,stroke:#b7791f,stroke-width:2px ``` @@ -5630,13 +5637,13 @@ flowchart LR Events["Events"] Security["Security Analyzer"] Condenser["Context Condenser"] - + Agent -->|Uses| LLM LLM -->|Records| Events Security -.->|Optional| LLM Condenser -.->|Optional| LLM Conversation -->|Provides context| Agent - + style LLM fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Events fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5682,38 +5689,38 @@ flowchart TB Sync["MCPClient
Sync/Async bridge"] Async["AsyncMCPClient
FastMCP base"] end - + subgraph Bridge["Tool Bridge"] Def["MCPToolDefinition
Schema conversion"] Exec["MCPToolExecutor
Execution handler"] end - + subgraph Integration["Agent Integration"] Action["MCPToolAction
Dynamic model"] Obs["MCPToolObservation
Result wrapper"] end - + subgraph External["External"] Server["MCP Server
stdio/HTTP"] Tools["External Tools"] end - + Sync --> Async Async --> Server - + Server --> Def Def --> Exec - + Exec --> Action Action --> Server Server --> Obs - + Server -.->|Spawns| Tools - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Sync,Async primary class Def,Exec secondary class Action,Obs tertiary @@ -5744,14 +5751,14 @@ flowchart TB Async["Async MCP Call"] Server["MCP Server"] Result["Result"] - + Sync --> Bridge Bridge --> Executor Executor --> Async Async --> Server Server --> Result Result --> Sync - + style Bridge fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Executor fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Async fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5802,23 +5809,23 @@ flowchart TB Config["MCP Config"] Spawn["Spawn Server"] List["List Tools"] - + subgraph Convert["Convert Each Tool"] Schema["MCP Schema"] Action["Generate Action Model"] Def["Create ToolDefinition"] end - + Register["Register in ToolRegistry"] - + Config --> Spawn Spawn --> List List --> Schema - + Schema --> Action Action --> Def Def --> Register - + style Spawn fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Action fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Register fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5844,11 +5851,11 @@ flowchart LR Parse["Parse Parameters"] Model["Dynamic Pydantic Model
MCPToolAction"] Def["ToolDefinition
SDK format"] - + MCP --> Parse Parse --> Model Model --> Def - + style Parse fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Model fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5897,15 +5904,15 @@ flowchart TB Agent["Agent generates action"] Action["MCPToolAction"] Executor["MCPToolExecutor"] - + Convert["Convert to MCP format"] Call["MCP call_tool"] Server["MCP Server"] - + Result["MCP Result"] Obs["MCPToolObservation"] Return["Return to Agent"] - + Agent --> Action Action --> Executor Executor --> Convert @@ -5914,7 +5921,7 @@ flowchart TB Server --> Result Result --> Obs Obs --> Return - + style Executor fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Call fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Obs fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -5940,10 +5947,10 @@ flowchart LR Executor["MCPToolExecutor"] Client["MCP Client"] Name["tool_name"] - + Executor -->|Uses| Client Executor -->|Knows| Name - + style Executor fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Client fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -5967,32 +5974,32 @@ flowchart TB Spawn["Spawn MCP Servers"] Discover["Discover Tools"] Register["Register Tools"] - + Ready["Agent Ready"] - + Step["Agent Step"] LLM["LLM Tool Call"] Execute["Execute MCP Tool"] Result["Return Observation"] - + End["End Conversation"] Cleanup["Close MCP Clients"] - + Load --> Start Start --> Spawn Spawn --> Discover Discover --> Register Register --> Ready - + Ready --> Step Step --> LLM LLM --> Execute Execute --> Result Result --> Step - + Step --> End End --> Cleanup - + style Spawn fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Execute fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Cleanup fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6015,22 +6022,22 @@ MCP tools can include metadata hints for agents: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR Tool["MCP Tool"] - + subgraph Annotations ReadOnly["readOnlyHint"] Destructive["destructiveHint"] Progress["progressEnabled"] end - + Security["Security Analysis"] - + Tool --> ReadOnly Tool --> Destructive Tool --> Progress - + ReadOnly --> Security Destructive --> Security - + style Destructive fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Security fill:#fff4df,stroke:#b7791f,stroke-width:2px ``` @@ -6057,12 +6064,12 @@ flowchart LR Tools["Tool Registry"] Agent["Agent"] Security["Security"] - + Skills -->|Configures| MCP MCP -->|Registers| Tools Agent -->|Uses| Tools MCP -->|Provides hints| Security - + style MCP fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Skills fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Agent fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -6121,7 +6128,7 @@ graph TB end SDK[Software Agent SDK
openhands.sdk + tools + workspace] - + subgraph External["External Services"] LLM[LLM Providers
OpenAI, Anthropic, etc.] Runtime[Runtime Services
Docker, Remote API, etc.] @@ -6130,14 +6137,14 @@ graph TB UI --> SDK CLI --> SDK Custom --> SDK - + SDK --> LLM SDK --> Runtime - + classDef interface fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef sdk fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef external fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class UI,CLI,Custom interface class SDK sdk class LLM,Runtime external @@ -6174,9 +6181,9 @@ pip install openhands-sdk openhands-tools flowchart LR SDK["openhands.sdk
Agent · LLM · Conversation
+ LocalWorkspace"]:::sdk Tools["openhands.tools
BashTool · FileEditor · GrepTool · …"]:::tools - + SDK -->|uses| Tools - + classDef sdk fill:#e8f3ff,stroke:#2b6cb0,color:#0f2a45,stroke-width:2px,rx:8,ry:8 classDef tools fill:#e9f9ef,stroke:#2f855a,color:#14532d,stroke-width:2px,rx:8,ry:8 ``` @@ -6199,31 +6206,31 @@ pip install openhands-sdk openhands-tools openhands-workspace openhands-agent-se ```mermaid %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 20, "rankSpacing": 30}} }%% flowchart LR - + WSBase["openhands.sdk
Base Classes:
Workspace · Local · Remote"]:::sdk - + subgraph WS[" "] direction LR Docker["openhands.workspace DockerWorkspace
extends RemoteWorkspace"]:::ws Remote["openhands.workspace RemoteAPIWorkspace
extends RemoteWorkspace"]:::ws end - + Server["openhands.agent_server
FastAPI + WebSocket"]:::server Agent["openhands.sdk
Agent · LLM · Conversation"]:::sdk Tools["openhands.tools
BashTool · FileEditor · …"]:::tools - + WSBase -.->|extended by| Docker WSBase -.->|extended by| Remote Docker -->|spawns container with| Server Remote -->|connects via HTTP to| Server Server -->|runs| Agent Agent -->|uses| Tools - + classDef sdk fill:#e8f3ff,stroke:#2b6cb0,color:#0f2a45,stroke-width:1.1px,rx:8,ry:8 classDef ws fill:#fff4df,stroke:#b7791f,color:#5b3410,stroke-width:1.1px,rx:8,ry:8 classDef server fill:#f3e8ff,stroke:#7c3aed,color:#3b2370,stroke-width:1.1px,rx:8,ry:8 classDef tools fill:#e9f9ef,stroke:#2f855a,color:#14532d,stroke-width:1.1px,rx:8,ry:8 - + style WS stroke:#b7791f,stroke-width:1.5px,stroke-dasharray: 4 3,rx:8,ry:8,fill:none ``` @@ -6317,7 +6324,7 @@ sequenceDiagram participant Agent participant LLM participant Tool - + You->>Conversation: "Create hello.txt" Conversation->>Agent: Process message Agent->>LLM: What should I do? @@ -6342,17 +6349,17 @@ graph TB subgraph "Your Code (Unchanged)" Code["Agent + Tools + LLM"] end - + subgraph "Deployment Options" Local["Local
Direct execution"] Docker["Docker
Containerized"] Remote["Remote
Multi-user server"] end - + Code -->|LocalWorkspace| Local Code -->|DockerWorkspace| Docker Code -->|RemoteAPIWorkspace| Remote - + style Code fill:#e1f5fe style Local fill:#e8f5e8 style Docker fill:#e8f5e8 @@ -6419,18 +6426,18 @@ The SDK package handles: ```mermaid graph TB Conv[Conversation
Lifecycle Manager] --> Agent[Agent
Reasoning Loop] - + Agent --> LLM[LLM
Language Model] Agent --> Tools[Tool System
Capabilities] Agent --> Micro[Skills
Behavior Modules] Agent --> Cond[Condenser
Memory Manager] - + Tools --> Workspace[Workspace
Execution] - + Conv --> Events[Events
Communication] Tools --> MCP[MCP
External Tools] Workspace --> Security[Security
Validation] - + style Conv fill:#e1f5fe style Agent fill:#f3e5f5 style LLM fill:#e8f5e8 @@ -6567,7 +6574,7 @@ graph TB 2. **Observation**: Output schema (what the tool returns) 3. **ToolExecutor**: Logic that transforms Action → Observation -**Why this pattern?** +**Why this pattern?** - Type safety catches errors early - LLMs get accurate schemas for tool calling - Tools are testable in isolation @@ -6885,46 +6892,46 @@ flowchart TB subgraph Interface["Abstract Interface"] Base["SecurityAnalyzerBase
Abstract analyzer"] end - + subgraph Implementations["Concrete Analyzers"] LLM["LLMSecurityAnalyzer
Inline risk prediction"] NoOp["NoOpSecurityAnalyzer
No analysis"] end - + subgraph Risk["Risk Levels"] Low["LOW
Safe operations"] Medium["MEDIUM
Moderate risk"] High["HIGH
Dangerous ops"] Unknown["UNKNOWN
Unanalyzed"] end - + subgraph Policy["Confirmation Policy"] Check["should_require_confirmation()"] Mode["Confirmation Mode"] Decision["Require / Allow"] end - + Base --> LLM Base --> NoOp - + Implementations --> Low Implementations --> Medium Implementations --> High Implementations --> Unknown - + Low --> Check Medium --> Check High --> Check Unknown --> Check - + Check --> Mode Mode --> Decision - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px classDef danger fill:#ffe8e8,stroke:#dc2626,stroke-width:2px - + class Base primary class LLM secondary class High danger @@ -6950,20 +6957,20 @@ Security analyzers return one of four risk levels: flowchart TB Action["ActionEvent"] Analyze["Security Analyzer"] - + subgraph Levels["Risk Levels"] Low["LOW
Read-only, safe"] Medium["MEDIUM
Modify files"] High["HIGH
Delete, execute"] Unknown["UNKNOWN
Not analyzed"] end - + Action --> Analyze Analyze --> Low Analyze --> Medium Analyze --> High Analyze --> Unknown - + style Low fill:#d1fae5,stroke:#10b981,stroke-width:2px style Medium fill:#fef3c7,stroke:#f59e0b,stroke-width:2px style High fill:#ffe8e8,stroke:#dc2626,stroke-width:2px @@ -6994,13 +7001,13 @@ flowchart TB Extract["Extract security_risk
from arguments"] ActionEvent["ActionEvent
with security_risk set"] Analyzer["LLMSecurityAnalyzer
returns security_risk"] - + Schema --> LLM LLM --> ToolCall ToolCall --> Extract Extract --> ActionEvent ActionEvent --> Analyzer - + style Schema fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Extract fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Analyzer fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7043,9 +7050,9 @@ flowchart LR Action["ActionEvent"] NoOp["NoOpSecurityAnalyzer"] Unknown["SecurityRisk.UNKNOWN"] - + Action --> NoOp --> Unknown - + style NoOp fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -7076,20 +7083,20 @@ flowchart TB CheckUnknown{"Risk ==
UNKNOWN?"} UseConfirmUnknown{"confirm_unknown
setting?"} CheckThreshold{"risk.is_riskier
(threshold)?"} - + Confirm["Require Confirmation"] Allow["Allow Execution"] - + Risk --> CheckUnknown CheckUnknown -->|Yes| UseConfirmUnknown CheckUnknown -->|No| CheckThreshold - + UseConfirmUnknown -->|True| Confirm UseConfirmUnknown -->|False| Allow - + CheckThreshold -->|Yes| Confirm CheckThreshold -->|No| Allow - + style CheckUnknown fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Confirm fill:#ffe8e8,stroke:#dc2626,stroke-width:2px style Allow fill:#d1fae5,stroke:#10b981,stroke-width:2px @@ -7146,12 +7153,12 @@ flowchart LR Conversation["Conversation"] Tools["Tools"] MCP["MCP Tools"] - + Agent -->|Validates actions| Security Security -->|Checks| Tools Security -->|Uses hints| MCP Conversation -->|Pauses for confirmation| Agent - + style Security fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Conversation fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7197,47 +7204,47 @@ flowchart TB Knowledge["Knowledge Skill
trigger: KeywordTrigger"] Task["Task Skill
trigger: TaskTrigger"] end - + subgraph Triggers["Trigger Evaluation"] Always["Always Active
Repository guidelines"] Keyword["Keyword Match
String matching on user messages"] TaskMatch["Keyword Match + Inputs
Same as KeywordTrigger + user inputs"] end - + subgraph Content["Skill Content"] Markdown["Markdown with Frontmatter"] Dynamic["Dynamic Commands
!`command` execution"] MCPTools["MCP Tools Config
Repo skills only"] Inputs["Input Metadata
Task skills only"] end - + subgraph Integration["Agent Integration"] Context["Agent Context"] Prompt["System Prompt"] end - + Repo --> Always Knowledge --> Keyword Task --> TaskMatch - + Always --> Markdown Keyword --> Markdown TaskMatch --> Markdown - + Markdown -.->|Optional| Dynamic Repo -.->|Optional| MCPTools Task -.->|Requires| Inputs - + Markdown --> Context Dynamic --> Context MCPTools --> Context Context --> Prompt - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px classDef dynamic fill:#e9f9ef,stroke:#2f855a,stroke-width:2px - + class Repo,Knowledge,Task primary class Always,Keyword,TaskMatch secondary class Context tertiary @@ -7270,11 +7277,11 @@ flowchart LR Parse["Parse Frontmatter"] Skill["Skill(trigger=None)"] Context["Always in Context"] - + File --> Parse Parse --> Skill Skill --> Context - + style Skill fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Context fill:#fff4df,stroke:#b7791f,stroke-width:2px ``` @@ -7307,13 +7314,13 @@ flowchart TB Activate["Activate Skill"] Skip["Skip Skill"] Context["Add to Context"] - + User --> Check Check --> Match Match -->|Yes| Activate Match -->|No| Skip Activate --> Context - + style Check fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Activate fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7347,13 +7354,13 @@ flowchart TB Template["Apply Template"] Context["Add to Context"] Skip["Skip Skill"] - + User --> Match Match -->|Yes| Inputs Match -->|No| Skip Inputs --> Template Template --> Context - + style Match fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Template fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7387,35 +7394,35 @@ Skills are evaluated at different points in the agent lifecycle: %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart TB Start["Agent Step Start"] - + Repo["Check Repository Skills
trigger: None"] AddRepo["Always Add to Context"] - + Message["Check User Message"] Keyword["Match Keyword Triggers"] AddKeyword["Add Matched Skills"] - + TaskType["Check Task Type"] TaskMatch["Match Task Triggers"] AddTask["Add Task Skill"] - + Build["Build Agent Context"] - + Start --> Repo Repo --> AddRepo - + Start --> Message Message --> Keyword Keyword --> AddKeyword - + Start --> TaskType TaskType --> TaskMatch TaskMatch --> AddTask - + AddRepo --> Build AddKeyword --> Build AddTask --> Build - + style Repo fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Keyword fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style TaskMatch fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7442,11 +7449,11 @@ flowchart LR MCPConfig["mcp_tools Config"] Client["MCP Client"] Tools["Tool Registry"] - + Skill -->|Contains| MCPConfig MCPConfig -->|Spawns| Client Client -->|Registers| Tools - + style Skill fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style MCPConfig fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Tools fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7535,12 +7542,12 @@ flowchart LR Context["Agent Context"] Agent["Agent"] MCP["MCP Client"] - + Skills -->|Injects content| Context Skills -.->|Spawns tools| MCP Context -->|System prompt| Agent MCP -->|Tool| Agent - + style Skills fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Context fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Agent fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -7586,7 +7593,7 @@ flowchart TB Observation["Observation
Output schema"] Executor["Executor
Business logic"] end - + subgraph Framework["Tool Framework"] Base["ToolBase
Abstract base"] Impl["Tool Implementation
Concrete tool"] @@ -7598,7 +7605,7 @@ flowchart TB ToolSpec["Tool Spec
name + params"] Base -.->|Extends| Impl - + ToolSpec -->|resolve_tool| Registry Registry -->|Create instances| Impl Impl -->|Available in| Agent @@ -7607,11 +7614,11 @@ flowchart TB Agent -->|Parse & validate| Action Agent -->|Execute via Tool.\_\_call\_\_| Executor Executor -->|Return| Observation - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base primary class Action,Observation,Executor secondary class Registry tertiary @@ -7645,14 +7652,14 @@ flowchart TB WrapObs["ObservationEvent
wraps Observation"] Error["AgentErrorEvent"] end - + subgraph ToolSystem["Tool System"] ActionType["Action
Pydantic model"] ToolCall2["tool.\_\_call\_\_(action)
type-safe execution"] Execute["ToolExecutor
business logic"] ObsType["Observation
Pydantic model"] end - + ToolCall --> ParseJSON ParseJSON -->|Valid JSON| CreateAction ParseJSON -->|Invalid JSON| Error @@ -7663,7 +7670,7 @@ flowchart TB ToolCall2 --> Execute Execute --> ObsType ObsType --> WrapObs - + style ToolSystem fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style ActionType fill:#ddd6fe,stroke:#7c3aed,stroke-width:2px @@ -7690,11 +7697,11 @@ flowchart LR Obs["Define Observation
with to_llm_content"] Exec["Define Executor
stateless logic"] Tool["ToolDefinition(...,
executor=Executor())"] - + Action --> Tool Obs --> Tool Exec --> Tool - + style Tool fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px ``` @@ -7716,12 +7723,12 @@ flowchart LR Exec["Define Executor
with \_\_init\_\_ and state"] Subclass["class MyTool(ToolDefinition)
with create() method"] Instance["Return [MyTool(...,
executor=instance)]"] - + Action --> Subclass Obs --> Subclass Exec --> Subclass Subclass --> Instance - + style Instance fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7739,21 +7746,21 @@ flowchart TB P1E["Define ToolExecutor
with \_\_call\_\_()"] P1T["ToolDefinition(...,
executor=Executor())"] end - + subgraph Pattern2["Pattern 2: Subclass with Factory"] P2A["Define Action/Observation
with visualize/to_llm_content"] P2E["Define Stateful ToolExecutor
with \_\_init\_\_() and \_\_call\_\_()"] P2C["class MyTool(ToolDefinition)
@classmethod create()"] P2I["Return [MyTool(...,
executor=instance)]"] end - + P1A --> P1E P1E --> P1T - + P2A --> P2E P2E --> P2C P2C --> P2I - + style P1T fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style P2I fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7797,20 +7804,20 @@ The registry enables **dynamic tool discovery** and instantiation from tool spec %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30}} }%% flowchart LR ToolSpec["Tool Spec
name + params"] - + subgraph Registry["Tool Registry"] Resolver["Resolver
name → factory"] Factory["Factory
create(params)"] end - + Instance["Tool Instance
with executor"] Agent["Agent"] - + ToolSpec -->|"resolve_tool(spec)"| Resolver Resolver -->|Lookup factory| Factory Factory -->|"create(**params)"| Instance Instance -->|Used by| Agent - + style Registry fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Factory fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7874,18 +7881,18 @@ flowchart TB Server["MCP Server
stdio/HTTP"] ExtTools["External Tools"] end - + subgraph Bridge["MCP Integration Layer"] MCPClient["MCPClient
Sync/Async bridge"] Convert["Schema Conversion
MCP → MCPToolDefinition"] MCPExec["MCPToolExecutor
Bridges to MCP calls"] end - + subgraph Agent["Agent System"] ToolsMap["tools_map
str -> ToolDefinition"] AgentLogic["Agent Execution"] end - + Server -.->|Spawns| ExtTools MCPClient --> Server Server --> Convert @@ -7894,11 +7901,11 @@ flowchart TB ToolsMap --> AgentLogic AgentLogic -->|Tool call| MCPExec MCPExec --> MCPClient - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef external fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class MCPClient primary class Convert,MCPExec secondary class Server,ExtTools external @@ -7927,13 +7934,13 @@ flowchart LR Loop["Background Event Loop"] Async["Async MCP Call"] Result["Return Result"] - + Sync --> Bridge Bridge --> Loop Loop --> Async Async --> Result Result --> Sync - + style Bridge fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Loop fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -7954,16 +7961,16 @@ flowchart TB Config["MCP Server Config
command + args"] Spawn["Spawn Server Process
MCPClient"] List["List Available Tools
client.list_tools()"] - + subgraph Convert["For Each MCP Tool"] Store["Store MCP metadata
name, description, inputSchema"] CreateExec["Create MCPToolExecutor
bound to tool + client"] Def["Create MCPToolDefinition
generic MCPToolAction type"] end - + Register["Add to Agent's tools_map
bypasses ToolRegistry"] Ready["Tools Available
Dynamic models created on-demand"] - + Config --> Spawn Spawn --> List List --> Store @@ -7971,7 +7978,7 @@ flowchart TB CreateExec --> Def Def --> Register Register --> Ready - + style Spawn fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Def fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Register fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8032,18 +8039,18 @@ flowchart TB Native["Native Tools"] MCP["MCP Tools"] end - + Registry["Tool Registry
resolve_tool"] ToolsMap["Agent.tools_map
Merged tool dict"] - + subgraph AgentSystem["Agent System"] Agent["Agent Logic"] LLM["LLM"] end - + Security["Security Analyzer"] Conversation["Conversation State"] - + Native -->|register_tool| Registry Registry --> ToolsMap MCP -->|create_mcp_tools| ToolsMap @@ -8051,7 +8058,7 @@ flowchart TB Agent -->|Execute tools| ToolsMap ToolsMap -.->|Action risk| Security ToolsMap -.->|Read state| Conversation - + style ToolsMap fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Agent fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style Security fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8098,38 +8105,38 @@ flowchart TB subgraph Interface["Abstract Interface"] Base["BaseWorkspace
Abstract base class"] end - + subgraph Implementations["Concrete Implementations"] Local["LocalWorkspace
Direct subprocess"] Remote["RemoteWorkspace
HTTP API calls"] end - + subgraph Operations["Core Operations"] Command["execute_command()"] Upload["file_upload()"] Download["file_download()"] Context["__enter__ / __exit__"] end - + subgraph Targets["Execution Targets"] Process["Local Process"] Container["Docker Container"] Server["Remote Server"] end - + Base --> Local Base --> Remote - + Base -.->|Defines| Operations - + Local --> Process Remote --> Container Remote --> Server - + classDef primary fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px classDef secondary fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px classDef tertiary fill:#fff4df,stroke:#b7791f,stroke-width:2px - + class Base primary class Local,Remote secondary class Command,Upload tertiary @@ -8166,21 +8173,21 @@ flowchart TB %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 30, "rankSpacing": 40}} }%% flowchart LR Tool["Tool invokes
execute_command()"] - + Decision{"Workspace
type?"} - + LocalExec["subprocess.run()
Direct execution"] RemoteExec["POST /command
HTTP API"] - + Result["CommandResult
stdout, stderr, exit_code"] - + Tool --> Decision Decision -->|Local| LocalExec Decision -->|Remote| RemoteExec - + LocalExec --> Result RemoteExec --> Result - + style Decision fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style LocalExec fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style RemoteExec fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8224,16 +8231,16 @@ The SDK provides remote workspace implementations in `openhands-workspace` packa %%{init: {"theme": "default", "flowchart": {"nodeSpacing": 50}} }%% flowchart TB Base["RemoteWorkspace
SDK base class"] - + Docker["DockerWorkspace
Auto-spawn containers"] API["RemoteAPIWorkspace
Connect to existing server"] - + Base -.->|Extended by| Docker Base -.->|Extended by| API - + Docker -->|Creates| Container["Docker Container
with agent-server"] API -->|Connects| Server["Remote Agent Server"] - + style Base fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Docker fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px style API fill:#fff4df,stroke:#b7791f,stroke-width:2px @@ -8247,7 +8254,7 @@ flowchart TB | **DockerWorkspace** | Spawn container | Container | Multi-user, untrusted code | | **RemoteAPIWorkspace** | Connect to URL | Remote server | Distributed systems, cloud | -**Source:** +**Source:** - **DockerWorkspace**: [`openhands-workspace/openhands/workspace/docker`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-workspace/openhands/workspace/docker) - **RemoteAPIWorkspace**: [`openhands-workspace/openhands/workspace/remote_api`](https://github.com/OpenHands/software-agent-sdk/tree/main/openhands-workspace/openhands/workspace/remote_api) @@ -8261,10 +8268,10 @@ flowchart LR Workspace["Workspace"] Conversation["Conversation"] AgentServer["Agent Server"] - + Conversation -->|Configures| Workspace Workspace -.->|Remote type| AgentServer - + style Workspace fill:#f3e8ff,stroke:#7c3aed,stroke-width:2px style Conversation fill:#e8f3ff,stroke:#2b6cb0,stroke-width:2px ``` @@ -9453,7 +9460,7 @@ def get_planning_tools() -> list[Tool]: The planning agent uses: - **GlobTool**: For discovering files and directories matching patterns -- **GrepTool**: For searching specific content across files +- **GrepTool**: For searching specific content across files - **PlanningFileEditorTool**: For writing structured plans to `PLAN.md` only This read-only approach (except for `PLAN.md`) keeps the agent focused on analysis without implementation distractions. @@ -11951,7 +11958,7 @@ The docker sandboxed agent server demonstrates how to run agents in isolated Doc This provides complete isolation from the host system, making it ideal for production deployments, testing, and executing untrusted code safely. -Use `DockerWorkspace` with a pre-built agent server image for the fastest startup. When you need to build your own image from a base image, switch to `DockerDevWorkspace`. +Use `DockerWorkspace` with a pre-built agent server image for the fastest startup. When you need to build your own image from a base image, switch to `DockerDevWorkspace`. the Docker sandbox image ships with features configured in the [Dockerfile](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-agent-server/openhands/agent_server/docker/Dockerfile) (e.g., secure defaults and services like VSCode and VNC exposed behind well-defined ports), which are not available in the local (non-Docker) agent server. @@ -12438,7 +12445,7 @@ agent = get_default_agent( When `cli_mode=False`, the agent gains access to browser automation tools for web interaction. -When VNC is available and `extra_ports=True`, the browser will be opened in the VNC desktop to visualize agent's work. You can watch the browser in real-time via VNC. Demo video: +When VNC is available and `extra_ports=True`, the browser will be opened in the VNC desktop to visualize agent's work. You can watch the browser in real-time via VNC. Demo video:
@@ -12641,12 +12655,12 @@ export OH_SECRET_KEY="$(openssl rand -hex 32)" python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -Requests to `/api/*` must include the session key: +Requests to `/api/*` must include the session key. This request returns the conversation count when the key is accepted: ```bash curl \ -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ - http://127.0.0.1:8000/api/conversations + http://127.0.0.1:8000/api/conversations/count ``` Use `OH_SECRET_KEY` whenever you want conversations and stored settings to survive restarts with their sensitive values intact. Keep this value stable and store it in your normal secret manager. @@ -12786,11 +12800,11 @@ A Remote Agent Server is an HTTP/WebSocket server that: Think of it as the "backend" for your agent, while your Python code acts as the "frontend" client. -{/* -Same interfaces as local: -[BaseConversation](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/base.py), -[ConversationStateProtocol](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/base.py), -[EventsListBase](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/events_list_base.py). Server-backed impl: +{/* +Same interfaces as local: +[BaseConversation](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/base.py), +[ConversationStateProtocol](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/base.py), +[EventsListBase](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/events_list_base.py). Server-backed impl: [RemoteConversation](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/impl/remote_conversation.py). */} @@ -12819,8 +12833,8 @@ graph TD style Workspace fill:#e8f5e8 ``` -1. **Client (Python SDK)** — Your application creates and controls conversations using the SDK. -2. **Agent Server** — A lightweight HTTP/WebSocket service that runs the agent and manages workspace execution. +1. **Client (Python SDK)** — Your application creates and controls conversations using the SDK. +2. **Agent Server** — A lightweight HTTP/WebSocket service that runs the agent and manages workspace execution. 3. **Workspace** — An isolated environment (local, Docker, or remote VM) where the agent code runs. The same SDK API works across all three workspace types—you just switch which workspace the conversation connects to. @@ -13095,7 +13109,7 @@ The Stuck Detector automatically identifies when an agent enters unproductive pa 4. **Alternating Patterns**: Two different action-observation pairs alternate in a ping-pong pattern (6+ cycles) 5. **Context Window Errors**: Repeated context window errors that indicate memory management issues -When enabled (which is the default), the stuck detector monitors the conversation in real-time and can automatically halt execution when stuck patterns are detected, preventing infinite loops and wasted resources. +When enabled (which is the default), the stuck detector monitors the conversation in real-time and can automatically halt execution when stuck patterns are detected, preventing infinite loops and wasted resources. For more information about the detection algorithms and how pattern matching works, refer to the [StuckDetector source code](https://github.com/OpenHands/software-agent-sdk/blob/main/openhands-sdk/openhands/sdk/conversation/stuck_detector.py). @@ -13799,7 +13813,7 @@ This architecture allows you to implement custom condensation logic tailored to ### Setting Up Condensing -Create a `LLMSummarizingCondenser` to manage the context. +Create a `LLMSummarizingCondenser` to manage the context. The condenser will automatically truncate conversation history when it exceeds max_size, and replaces the dropped events with an LLM-generated summary. This condenser triggers when there are more than `max_context_length` events in @@ -14422,21 +14436,21 @@ from openhands.sdk.event import ActionEvent, ObservationEvent, AgentErrorEvent, class MinimalVisualizer(ConversationVisualizerBase): """A minimal visualizer that prints raw event information.""" - + def __init__(self, name: str | None = None): super().__init__(name=name) self.step_count = 0 - + def on_event(self, event: Event) -> None: """Handle each event.""" if isinstance(event, ActionEvent): self.step_count += 1 tool_name = event.tool_name or "unknown" print(f"Step {self.step_count}: {tool_name}") - + elif isinstance(event, ObservationEvent): print(f" → Result received") - + elif isinstance(event, AgentErrorEvent): print(f"❌ Error: {event.error}") @@ -21263,7 +21277,7 @@ The `conversation.conversation_stats` object provides cost tracking across all L #### Key Methods and Properties - **`usage_to_metrics`**: A dictionary mapping usage IDs to their respective `Metrics` objects. This allows you to track costs separately for each LLM used in the conversation. - + - **`get_combined_metrics()`**: Returns a single `Metrics` object that aggregates costs across all LLMs used in the conversation. This gives you the total cost of the entire conversation. - **`get_metrics_for_usage(usage_id: str)`**: Retrieves the `Metrics` object for a specific usage ID, allowing you to inspect costs for individual LLMs. @@ -23103,13 +23117,13 @@ from openhands.sdk.event.llm_convertible import ActionEvent class CustomSecurityAnalyzer(SecurityAnalyzerBase): """Custom security analyzer with domain-specific rules.""" - + def security_risk(self, action: ActionEvent) -> SecurityRisk: """Evaluate security risk based on custom rules. - + Args: action: The ActionEvent to analyze - + Returns: SecurityRisk level (LOW, MEDIUM, HIGH, or UNKNOWN) """ @@ -23119,11 +23133,11 @@ class CustomSecurityAnalyzer(SecurityAnalyzerBase): # High-risk patterns if any(pattern in action_str for pattern in ['rm -rf', 'sudo', 'chmod 777']): return SecurityRisk.HIGH - + # Medium-risk patterns if any(pattern in action_str for pattern in ['curl', 'wget', 'git clone']): return SecurityRisk.MEDIUM - + # Default to low risk return SecurityRisk.LOW @@ -27146,7 +27160,7 @@ The agent will automatically have access to tools provided by enabled MCP server ```bash # For stdio servers python -m my_mcp_server - + # For HTTP servers, check the URL is reachable curl https://api.example.com/mcp ``` @@ -28230,8 +28244,8 @@ There are two types of automations: Most automations are prompt-based. Just describe the task in natural language: ``` - Create an automation called "Daily Standup Summary" that runs every weekday - at 9 AM Eastern. It should check our GitHub repo for PRs merged yesterday + Create an automation called "Daily Standup Summary" that runs every weekday + at 9 AM Eastern. It should check our GitHub repo for PRs merged yesterday and post a summary to #engineering on Slack. ``` @@ -28241,7 +28255,7 @@ There are two types of automations: For specialized capabilities, include one or more plugins from the [OpenHands extensions repository](https://github.com/OpenHands/extensions): ``` - Create an automation using the code-review plugin that runs every weekday + Create an automation using the code-review plugin that runs every weekday at 9 AM. It should review any Python files changed in the last 24 hours. ``` @@ -28308,7 +28322,7 @@ For monitoring tasks, explain what should happen when things go wrong: ``` Check the health endpoint at https://api.example.com/health. -If it returns anything other than 200 OK, send an alert to #ops +If it returns anything other than 200 OK, send an alert to #ops with the status code and response body. If it's healthy, just log success without alerting. ``` @@ -28693,14 +28707,14 @@ Both types are created the same way—just describe what you want and OpenHands Just ask OpenHands to create one: ``` -Create an automation that runs every Monday at 9 AM and summarizes +Create an automation that runs every Monday at 9 AM and summarizes our open GitHub issues, then posts the summary to #engineering on Slack. ``` For plugin-based automations, mention the plugin: ``` -Create an automation using the code-review plugin that runs daily +Create an automation using the code-review plugin that runs daily and reviews any Python files changed in the last 24 hours. ``` @@ -28734,7 +28748,7 @@ Your automation has access to everything a normal OpenHands conversation does: t Open a new conversation in OpenHands and ask it to create an automation: ``` -Create an automation that runs every Monday at 9 AM and summarizes +Create an automation that runs every Monday at 9 AM and summarizes our open GitHub issues, then posts to #engineering on Slack. ``` @@ -30258,7 +30272,7 @@ Fix the TypeError in src/api/users.py line 45. Error message: TypeError: 'NoneType' object has no attribute 'get' -Expected behavior: The get_user_preferences() function should return +Expected behavior: The get_user_preferences() function should return default preferences when the user has no saved preferences. Actual behavior: It crashes with the error above when user.preferences is None. @@ -30301,7 +30315,7 @@ Requirements: Follow the existing patterns in src/api/routes.js for route structure. Use the existing db.query() helper in src/db/index.js for database access. -Success criteria: I can call the endpoint with valid credentials +Success criteria: I can call the endpoint with valid credentials and receive a JWT token that works with our existing auth middleware. ``` @@ -30425,7 +30439,7 @@ Context: - We use Redis (already available in the project) - Our API follows the controller pattern in src/controllers/ -Requirement: Limit each API key to 100 requests per minute with +Requirement: Limit each API key to 100 requests per minute with appropriate 429 responses and Retry-After headers. ``` @@ -30481,15 +30495,15 @@ Constraints to specify:
``` - The dashboard takes 5 seconds to load. - + The dashboard takes 5 seconds to load. + Profile it and optimize to load in under 1 second. - + Likely issues: - N+1 queries in getWidgetData() - Uncompressed images - Missing database indexes - + Focus on the biggest wins first. ``` @@ -30506,13 +30520,13 @@ Constraints to specify: ``` Add caching to the product catalog API. - + Context: - 95% of requests are for the same 1000 products - Product data changes only via admin panel (rare) - We already have Redis running for sessions - Current response time is 200ms, target is <50ms - + Cache strategy: Cache product data in Redis with 5-minute TTL, invalidate on product update. ``` @@ -30529,15 +30543,15 @@ Constraints to specify: ``` - Create a Go microservice for the image processing currently in + Create a Go microservice for the image processing currently in src/php/ImageProcessor.php. - - This is the first step in our gradual migration. + + This is the first step in our gradual migration. The Go service should: 1. Expose the same API endpoints 2. Be deployable alongside the existing PHP app 3. Include a feature flag to route traffic - + Start with just the resize and crop functions. ``` @@ -30554,15 +30568,15 @@ Constraints to specify: ``` Users can't log in since yesterday's deployment. - + Symptoms: - Login form submits but returns 500 error - Server logs show: "Redis connection refused" - Redis was moved to a new host yesterday - - The issue is likely in src/config/redis.js which may + + The issue is likely in src/config/redis.js which may have the old host hardcoded. - + Expected: Login should work with the new Redis at redis.internal:6380 ``` @@ -30635,7 +30649,7 @@ This task is complete when: Build on previous work: ``` -In our last session, you added the login endpoint. +In our last session, you added the login endpoint. Now add the logout functionality: 1. POST /api/auth/logout endpoint @@ -30882,7 +30896,7 @@ review: - performance - test_coverage - documentation - + severity_levels: block_merge: - critical @@ -30892,7 +30906,7 @@ review: informational: - minor - suggestion - + ignore_patterns: - "*.generated.*" - "vendor/*" @@ -30918,15 +30932,15 @@ quality_gates: - name: test_coverage threshold: 80% action: block_merge - + - name: security_issues threshold: 0 critical action: block_merge - + - name: code_review_score threshold: 7/10 action: require_review - + - name: documentation requirement: all_public_apis action: warn @@ -31053,8 +31067,8 @@ OpenHands excels at many development tasks, but knowing when to use it—and whe **Example prompt:** ``` -Add a calculateDiscount() function to src/utils/pricing.js that takes -a price and discount percentage, returns the discounted price. +Add a calculateDiscount() function to src/utils/pricing.js that takes +a price and discount percentage, returns the discounted price. Add unit tests. ``` @@ -31096,13 +31110,13 @@ Add a user profile endpoint to our API: ``` Break large tasks into phases: -Phase 1: "Analyze the current authentication system and document +Phase 1: "Analyze the current authentication system and document all touch points that need to change for OAuth2 migration." -Phase 2: "Implement the OAuth2 provider configuration and basic +Phase 2: "Implement the OAuth2 provider configuration and basic token flow, keeping existing auth working in parallel." -Phase 3: "Migrate the user login flow to use OAuth2, maintaining +Phase 3: "Migrate the user login flow to use OAuth2, maintaining backwards compatibility." ``` @@ -31222,7 +31236,7 @@ Prepare your repository: ## AGENTS.md Checklist - [ ] Build commands documented -- [ ] Test commands documented +- [ ] Test commands documented - [ ] Code style guidelines noted - [ ] Architecture overview included - [ ] Common patterns described @@ -34151,10 +34165,10 @@ After the GitHub organization rename from `All-Hands-AI` to `OpenHands`, you may ```bash # Check current remote git remote get-url origin - + # Update SSH remote git remote set-url origin git@github.com:OpenHands/OpenHands.git - + # Or update HTTPS remote git remote set-url origin https://github.com/OpenHands/OpenHands.git ``` @@ -34269,11 +34283,11 @@ while current_score < QUALITY_THRESHOLD and iteration < MAX_ITERATIONS: # Migrating agent converts COBOL to Java migration_conversation.send_message(migration_prompt) migration_conversation.run() - + # Critiquing agent evaluates the conversion critique_conversation.send_message(critique_prompt) critique_conversation.run() - + # Parse the score and decide whether to continue current_score = parse_critique_score(critique_file) ``` @@ -34687,13 +34701,13 @@ See real automated reviews in action on the OpenHands Software Agent SDK reposit - Verify the workflow file is in `.github/workflows/` - Check the Actions tab for workflow run errors - + - Ensure `GITHUB_TOKEN` has `pull-requests: write` permission - Check the workflow logs for API errors - Verify the PR is not from a fork with restricted permissions - + - Large PRs may take longer to analyze - Consider splitting large PRs into smaller ones @@ -34809,7 +34823,7 @@ Perform straightforward version updates: ``` Update all patch and minor versions in package.json: - + 1. Review each update for changelog notes 2. Update package.json with new versions 3. Update package-lock.json @@ -34820,7 +34834,7 @@ Perform straightforward version updates: ``` Update dependencies in requirements.txt: - + 1. Check each package for updates 2. Update requirements.txt with compatible versions 3. Update requirements-dev.txt similarly @@ -34831,7 +34845,7 @@ Perform straightforward version updates: ``` Update dependencies in pom.xml: - + 1. Check for newer versions of each dependency 2. Update version numbers in pom.xml 3. Run mvn dependency:tree to check conflicts @@ -35155,12 +35169,12 @@ Analyze this stack trace from our production error: ``` Analyze this Java exception: - + java.lang.OutOfMemoryError: Java heap space at java.util.Arrays.copyOf(Arrays.java:3210) at java.util.ArrayList.grow(ArrayList.java:265) at com.myapp.DataProcessor.loadAllRecords(DataProcessor.java:142) - + Identify: 1. What operation is consuming memory? 2. Is there a memory leak or just too much data? @@ -35170,25 +35184,25 @@ Analyze this stack trace from our production error: ``` Analyze this Python traceback: - + Traceback (most recent call last): File "app/api/orders.py", line 45, in create_order order = OrderService.create(data) File "app/services/order.py", line 89, in create inventory.reserve(item_id, quantity) AttributeError: 'NoneType' object has no attribute 'reserve' - + What's None and why? ``` ``` Analyze this Node.js error: - + TypeError: Cannot read property 'map' of undefined at processItems (/app/src/handlers/items.js:23:15) at async handleRequest (/app/src/api/router.js:45:12) - + What's undefined and how should we handle it? ``` @@ -35851,18 +35865,18 @@ Fix identified vulnerabilities: ``` Fix the SQL injection vulnerability in src/api/users.py: - + Current code: query = f"SELECT * FROM users WHERE id = {user_id}" cursor.execute(query) - + Requirements: 1. Use parameterized queries 2. Add input validation 3. Maintain the same functionality 4. Add a test case for the fix ``` - + **Fixed code:** ```python # Using parameterized query @@ -35873,17 +35887,17 @@ Fix identified vulnerabilities: ``` Fix the XSS vulnerability in src/templates/profile.html: - + Current code:
${user.bio}
- + Requirements: 1. Properly escape user content 2. Consider Content Security Policy 3. Handle rich text if needed 4. Test with malicious input ``` - + **Fixed code:** ```html @@ -35893,28 +35907,28 @@ Fix identified vulnerabilities: ``` Fix the command injection in src/utils/network.py: - + Current code: def ping_host(hostname): os.system(f"ping -c 1 {hostname}") - + Requirements: 1. Use safe subprocess calls 2. Validate input format 3. Avoid shell=True 4. Handle errors properly ``` - + **Fixed code:** ```python import subprocess import re - + def ping_host(hostname): # Validate hostname format if not re.match(r'^[a-zA-Z0-9.-]+$', hostname): raise ValueError("Invalid hostname") - + # Use subprocess without shell result = subprocess.run( ["ping", "-c", "1", hostname], @@ -35957,7 +35971,7 @@ def get_documents(user_id: int, current_user: User = Depends(get_current_user)): if current_user.id != user_id and not current_user.is_admin: logger.warning(f"Unauthorized access attempt: user {current_user.id} tried to access user {user_id}'s documents") raise HTTPException(status_code=403, detail="Not authorized") - + return db.get_documents(user_id) ``` @@ -36261,7 +36275,7 @@ Each update is streamed as it occurs, allowing you to provide real-time feedback After starting a conversation, you can check its status to monitor whether the agent has completed its task. - The examples below show basic polling patterns. For production use, add proper error handling, + The examples below show basic polling patterns. For production use, add proper error handling, exponential backoff, and handle network failures gracefully. @@ -36406,7 +36420,7 @@ while not conversation_id and attempts < max_attempts: ) task_response.raise_for_status() tasks = task_response.json() - + if tasks and tasks[0].get("status") == "READY": conversation_id = tasks[0].get("app_conversation_id") print(f"Conversation ready: {base_url}/conversations/{conversation_id}") @@ -36435,24 +36449,24 @@ while attempts < max_attempts: ) conv_response.raise_for_status() conversations = conv_response.json() - + if not conversations: print("Warning: Conversation not found") time.sleep(30) attempts += 1 continue - + conv = conversations[0] sandbox_status = conv.get("sandbox_status") exec_status = conv.get("execution_status") - + # Check sandbox health first if sandbox_status in ["ERROR", "MISSING"]: print(f"Sandbox failed with status: {sandbox_status}") exit(1) - + print(f"Execution status: {exec_status}") - + # Check for terminal states if exec_status in ["finished", "error", "stuck"]: print(f"Conversation completed with status: {exec_status}") @@ -36461,7 +36475,7 @@ while attempts < max_attempts: print("Agent is waiting for user confirmation in the UI") print(f"Visit: {base_url}/conversations/{conversation_id}") break - + time.sleep(30) attempts += 1 else: @@ -36518,7 +36532,7 @@ To list all your conversations, use the search endpoint: ``` - The search endpoint returns conversations in the `items` array. Use `next_page_id` + The search endpoint returns conversations in the `items` array. Use `next_page_id` for pagination if you have more conversations than the `limit`. @@ -37004,7 +37018,7 @@ Organizations provide the following capabilities: ## Conversation Visibility By default, conversations remain **private to individual members** of an organization. Each user's conversations -are only visible to them. +are only visible to them. ## Getting Started @@ -37313,7 +37327,7 @@ Once configured, you can request OpenHands to work on a Jira ticket by: 1. **Specify the Repository**: Include the repository location in either: - The ticket body itself, or - A comment on the ticket - + 2. **Trigger OpenHands**: Activate the agent using one of these methods: - Add an `openhands` label to the ticket - Comment with: `@openhands please review these requirements, generate a plan, and then proceed with implementation` @@ -37477,7 +37491,7 @@ Once configured, you can request OpenHands to work on a Jira ticket by: 1. **Specify the Repository**: Include the repository location in either: - The ticket body itself, or - A comment on the ticket - + 2. **Trigger OpenHands**: Activate the agent using one of these methods: - Add an `openhands` label to the ticket - Comment with: `@openhands please review these requirements, generate a plan, and then proceed with implementation` @@ -38074,7 +38088,7 @@ There are countless ways to contribute to OpenHands. Whether you're a seasoned d ### Frontend & UI/UX Make OpenHands more beautiful and user-friendly: - **React & TypeScript Development** - Improve the web interface -- **UI/UX Design** - Enhance user experience and accessibility +- **UI/UX Design** - Enhance user experience and accessibility - **Mobile Responsiveness** - Make OpenHands work great on all devices - **Component Libraries** - Build reusable UI components @@ -38167,7 +38181,7 @@ Don't hesitate to ask for help: --- -Thank you for considering contributing to OpenHands! Together, we're building tools that will democratize AI-powered software development and make it accessible to developers everywhere. Every contribution, no matter how small, helps us move closer to that vision. +Thank you for considering contributing to OpenHands! Together, we're building tools that will democratize AI-powered software development and make it accessible to developers everywhere. Every contribution, no matter how small, helps us move closer to that vision. Welcome to the community! 🎉 @@ -38527,7 +38541,7 @@ A plugin is a directory structure that can contain: - **Skills**: Specialized knowledge and workflows - **Hooks**: Event handlers for tool lifecycle -- **MCP Config**: External tool server configurations +- **MCP Config**: External tool server configurations - **Agents**: Specialized agent definitions - **Commands**: Slash commands @@ -38541,27 +38555,27 @@ Understanding the difference helps you choose the right approach: **Specialized prompts for specific tasks** - + - One skill = one specific capability - Just a SKILL.md file (+ optional resources) - Lightweight and focused - Quick to create and share - + **When to use:** - Adding single capabilities - Simple workflows - Domain-specific knowledge - Quick solutions - + **Multi-component bundles** - + - Multiple skills + hooks + config - Complete feature ecosystems - Coordinated components - Professional distribution - + **When to use:** - Complete feature sets - Tool integrations @@ -38674,7 +38688,7 @@ The `author` field can also be a simple string such as `"Your Name"`. Skills in plugins work identically to standalone skills. Each skill has its own directory with a SKILL.md file: - + ``` skills/ ├── linting/ @@ -38683,13 +38697,13 @@ The `author` field can also be a simple string such as `"Your Name"`. └── testing/ └── SKILL.md ``` - + See [Skills Documentation](/overview/skills) for skill creation details. - + Hooks are event handlers that run during tool lifecycle events: - + ```json { "hooks": { @@ -38708,19 +38722,19 @@ The `author` field can also be a simple string such as `"Your Name"`. } } ``` - + Hook commands have access to these environment variables: - `$OPENHANDS_PROJECT_DIR`: Path to the project directory - `$OPENHANDS_SESSION_ID`: Current session identifier - `$OPENHANDS_EVENT_TYPE`: The triggering event type - `$OPENHANDS_TOOL_NAME`: Name of the tool that triggered the hook - + **Common use cases:** - Run linters after file edits - Validate tool inputs - Log tool usage - Trigger dependent actions - + **Available hook events:** - `PreToolUse`: Before tool execution - `PostToolUse`: After tool execution @@ -38729,10 +38743,10 @@ The `author` field can also be a simple string such as `"Your Name"`. - `SessionEnd`: When the session ends - `Stop`: When execution stops - + MCP (Model Context Protocol) servers provide external tools and resources: - + ```json { "mcpServers": { @@ -38750,46 +38764,46 @@ The `author` field can also be a simple string such as `"Your Name"`. } } ``` - + **Use cases:** - Connect to external APIs - Add specialized tools - Integrate third-party services - + Learn more: [Model Context Protocol](/overview/model-context-protocol) - + Specialized agent definitions for specific tasks: - + ```markdown --- name: code-reviewer description: Specialized agent for code review tasks --- - + # Code Review Agent - + This agent specializes in reviewing code according to team standards... ``` - + Agents in plugins can use the plugin's skills and hooks automatically. - + Custom slash commands for plugin functionality: - + ```markdown --- name: /lint description: Run linters on current file --- - + # Lint Command - + Run configured linters on the current file... ``` - + Commands provide quick access to plugin features. @@ -38801,7 +38815,7 @@ How you use plugins depends on your platform: **Via configuration file:** - + Create `~/.openhands/config.toml`: ```toml [plugins] @@ -38810,59 +38824,59 @@ How you use plugins depends on your platform: "github:org/plugin-repo", ] ``` - + **Via command line:** ```bash openhands --plugin /path/to/plugin openhands --plugin github:org/plugin-repo ``` - + Plugins are loaded when OpenHands starts. - + Load plugins programmatically: - + ```python from openhands.sdk import LLM, Agent, Conversation from openhands.sdk.plugin import PluginSource from pydantic import SecretStr - + llm = LLM(model="claude-sonnet-4-20250514", api_key=SecretStr("your-api-key")) agent = Agent(llm=llm) - + plugins = [ PluginSource(source="/path/to/plugin"), PluginSource(source="github:org/repo", ref="v1.0.0"), ] - + conversation = Conversation( agent=agent, plugins=plugins, ) ``` - + See [SDK Plugins Guide](/sdk/guides/plugins) for details. - + **Via UI:** 1. Open Settings 2. Navigate to Plugins section 3. Add plugin path or GitHub URL 4. Restart to load - + **Via file system:** Place plugins in `.openhands/plugins/` in your workspace. - + **Via Cloud UI:** 1. Navigate to Workspace Settings 2. Select Plugins tab 3. Browse plugin library or add custom plugin 4. Click "Enable" to activate - + Organization admins can publish plugins for team-wide access. @@ -38894,13 +38908,13 @@ github:org/repo#v1.0.0 # Specific tag [github.com/OpenHands/extensions](https://github.com/OpenHands/extensions) - + Community-maintained plugins - + Your own GitHub repositories - + Organization or private plugins @@ -38979,37 +38993,37 @@ Options for distribution: - JavaScript linting skill - Post-edit hooks for auto-linting - Pre-commit setup - + **Use case:** Enforce code standards - + **Contains:** - Kubernetes deployment skill - Docker build skill - CI/CD workflow skill - kubectl MCP server - + **Use case:** Infrastructure management - + **Contains:** - REST API client skill - Authentication skill - Rate limiting hooks - API MCP server - + **Use case:** External service integration - + **Contains:** - Unit testing skill - Integration testing skill - Post-code hooks for test runs - Coverage commands - + **Use case:** Automated testing @@ -39020,15 +39034,15 @@ Options for distribution: Begin by creating the core skills your plugin needs. Test them individually before bundling. - + Identify repetitive tasks and automate them with hooks. Example: run linters after file edits. - + Add MCP servers for external tool integration. This provides your skills with additional capabilities. - + Include a comprehensive README explaining: - What the plugin does @@ -39036,7 +39050,7 @@ Options for distribution: - Configuration options - Example usage - + Use semantic versioning (major.minor.patch) and document breaking changes. @@ -39050,33 +39064,33 @@ Options for distribution: - `.plugin/plugin.json` or `.claude-plugin/plugin.json` exists and is valid JSON - Plugin path is correct - All referenced files exist - + **Debug:** ```bash # Verify structure ls -la plugin-name/.plugin/plugin.json || ls -la plugin-name/.claude-plugin/plugin.json - + # Check JSON syntax (cat plugin-name/.plugin/plugin.json 2>/dev/null || cat plugin-name/.claude-plugin/plugin.json) | python -m json.tool ```
- + **Check:** - Skills have valid SKILL.md files - Frontmatter includes `triggers` - Trigger keywords match your prompts - + **Test:** Use explicit trigger keywords from the skill's frontmatter. - + **Check:** - `hooks/hooks.json` syntax is valid - Hook matchers target the right tools - Commands are executable - + **Debug:** Check logs for hook execution errors. @@ -39328,53 +39342,53 @@ Skills are stored in different locations depending on the platform and scope: The CLI supports two skill locations: - + **User-level skills** (global, available in all conversations): ``` ~/.openhands/skills/ ``` - + **Project-level skills** (specific to current directory): ``` .agents/skills/ ``` - + Skills added via `/add-skill` are installed in `.agents/skills/` of your current workspace, making them available for that project. - + To add skills globally, manually place skill directories in `~/.openhands/skills/`. - + SDK users programmatically load skills: - + ```python from openhands.sdk import Skill - + # Load from a directory skill = Skill.load("/path/to/skill") - + # Load all skills from a directory skills = Skill.load_all("/path/to/skills") ``` - + See the [SDK Skills Guide](/sdk/guides/skill) for more details. - + Skills are stored in: ``` .agents/skills/ ``` - + The GUI provides a visual interface for managing skills, but skills can also be added manually by placing them in this directory. - + OpenHands Cloud provides a centralized skill library accessible through the web interface. Skills can be: - Added from the official registry with one click - Imported from your connected repositories - Shared across your team or organization - + See the [Cloud UI documentation](/openhands/usage/cloud/cloud-ui) for details. @@ -39396,7 +39410,7 @@ You can also manually install skills by copying skill directories into the appro git clone https://github.com/OpenHands/extensions temp-clone cp -r temp-clone/skills/codereview .agents/skills/ rm -rf temp-clone - + # Or download and extract manually ``` @@ -39618,7 +39632,7 @@ Triggers are keywords that automatically activate your skill. Choose words users List specific words or phrases that should activate the skill: - + ```yaml --- name: python-linting @@ -39630,21 +39644,21 @@ Triggers are keywords that automatically activate your skill. Choose words users - code quality --- ``` - + **Best practices:** - Include 2-5 trigger keywords - Use terms users actually say - Include tool names (e.g., "ruff", "pytest") - Include action words (e.g., "lint", "test", "deploy") - + The skill description is crucial for trigger matching. Write it in third person and include specific phrases: - + ```yaml description: This skill should be used when the user asks to "deploy to Kubernetes", "apply K8s manifests", "check pod status", or mentions kubectl commands. Provides comprehensive Kubernetes deployment workflows. ``` - + **Key elements:** - Start with "This skill should be used when..." - Quote specific user phrases: "deploy to Kubernetes" @@ -40194,12 +40208,12 @@ See the [GitHub Workflows guide](/sdk/guides/github-workflows/pr-review) for SDK Select metrics that reflect real-world outcomes, not just intermediate steps. - + **Good metrics:** - Suggestion acceptance rate (for code review) - Issue classification accuracy (for triage) - Time to resolution (for bug fixing) - + **Poor metrics:** - Number of suggestions made - Lines of code generated @@ -40208,7 +40222,7 @@ See the [GitHub Workflows guide](/sdk/guides/github-workflows/pr-review) for SDK Begin with basic logging before implementing complex evaluation pipelines. - + 1. Set up OpenTelemetry logging 2. Review traces manually to understand agent behavior 3. Identify patterns in successes and failures @@ -40218,7 +40232,7 @@ See the [GitHub Workflows guide](/sdk/guides/github-workflows/pr-review) for SDK Use evaluation results to make targeted improvements: - + - Low accuracy → Review skill instructions for clarity - Inconsistent behavior → Add more specific examples - Context errors → Expand references/ with domain knowledge @@ -40227,7 +40241,7 @@ See the [GitHub Workflows guide](/sdk/guides/github-workflows/pr-review) for SDK Track performance across different contexts: - + - **By repository** - Different repos may need different approaches - **By file type** - Skills may work better on certain languages - **By time** - Identify degradation or improvement trends diff --git a/sdk/arch/agent-server.mdx b/sdk/arch/agent-server.mdx index ab25e89f8..166415e6d 100644 --- a/sdk/arch/agent-server.mdx +++ b/sdk/arch/agent-server.mdx @@ -18,12 +18,17 @@ For a single local script, the standalone SDK is usually simpler. For a backend ## Install -Install the SDK packages in the Python environment that will run the server: +Install the same release of the SDK packages in the Python environment that will run the server: ```bash python -m venv .venv source .venv/bin/activate -pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server +export OPENHANDS_VERSION="1.24.0" +pip install -U \ + "openhands-sdk==$OPENHANDS_VERSION" \ + "openhands-tools==$OPENHANDS_VERSION" \ + "openhands-workspace==$OPENHANDS_VERSION" \ + "openhands-agent-server==$OPENHANDS_VERSION" ``` If you are working from the `OpenHands/software-agent-sdk` repository, use the repository's normal `uv` setup instead: @@ -55,6 +60,8 @@ The interactive API docs are available at: http://127.0.0.1:8000/docs ``` +If `SESSION_API_KEY` or `OH_SESSION_API_KEYS_*` is already set in your shell, the server will require that key for `/api/*` requests. Unset those variables for unauthenticated local-only testing. + ## Secure the Server By default, the Agent Server starts without API authentication. Before exposing it to another process, container, host, or user, set at least one session API key. @@ -66,12 +73,12 @@ export OH_SECRET_KEY="$(openssl rand -hex 32)" python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -Clients must send the session key in the `X-Session-API-Key` header: +Clients must send the session key in the `X-Session-API-Key` header. This request returns the conversation count when the key is accepted: ```bash curl \ -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ - http://127.0.0.1:8000/api/conversations + http://127.0.0.1:8000/api/conversations/count ``` Use additional indexed variables when you need key rotation: diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx index 70bdab51c..04042576b 100644 --- a/sdk/guides/agent-server/local-server.mdx +++ b/sdk/guides/agent-server/local-server.mdx @@ -9,12 +9,17 @@ Run a local Agent Server when you want a backend process to host OpenHands conve ## Install -Create a Python environment and install the server package with the SDK packages it uses: +Create a Python environment and install the same release of the server package and SDK packages it uses: ```bash python -m venv .venv source .venv/bin/activate -pip install -U openhands-sdk openhands-tools openhands-workspace openhands-agent-server +export OPENHANDS_VERSION="1.24.0" +pip install -U \ + "openhands-sdk==$OPENHANDS_VERSION" \ + "openhands-tools==$OPENHANDS_VERSION" \ + "openhands-workspace==$OPENHANDS_VERSION" \ + "openhands-agent-server==$OPENHANDS_VERSION" ``` ## Start Without Authentication @@ -33,6 +38,8 @@ curl http://127.0.0.1:8000/health Open the API docs at `http://127.0.0.1:8000/docs`. +If `SESSION_API_KEY` or `OH_SESSION_API_KEYS_*` is already set in your shell, the server will require that key for `/api/*` requests. Unset those variables for unauthenticated local-only testing. + This unauthenticated mode is only appropriate for local development. Do not bind an unauthenticated server to a public or shared network interface. @@ -48,12 +55,12 @@ export OH_SECRET_KEY="$(openssl rand -hex 32)" python -m openhands.agent_server --host 127.0.0.1 --port 8000 ``` -Requests to `/api/*` must include the session key: +Requests to `/api/*` must include the session key. This request returns the conversation count when the key is accepted: ```bash curl \ -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ - http://127.0.0.1:8000/api/conversations + http://127.0.0.1:8000/api/conversations/count ``` Use `OH_SECRET_KEY` whenever you want conversations and stored settings to survive restarts with their sensitive values intact. Keep this value stable and store it in your normal secret manager. From e3b22f7ccc05e7eb3fd996824de2cd6a2bc0d449 Mon Sep 17 00:00:00 2001 From: openhands Date: Sat, 30 May 2026 04:29:58 +0000 Subject: [PATCH 3/3] chore: address PR review feedback (#534) - Drop hardcoded openhands version pin from install commands in both sdk/arch/agent-server.mdx and sdk/guides/agent-server/local-server.mdx - Fix api_key pattern in local-server.mdx to use os.environ[...] (fail loudly when key is absent) for consistency with arch page - Add for OH_SECRET_KEY to local-server.mdx matching arch page (values cannot be restored if key changes) - Add curl example and working_dir explanation to local-server.mdx - Regenerate llms-full.txt Co-authored-by: openhands --- llms-full.txt | 42 ++++++++++++++---------- sdk/arch/agent-server.mdx | 11 +++---- sdk/guides/agent-server/local-server.mdx | 31 ++++++++++------- 3 files changed, 48 insertions(+), 36 deletions(-) diff --git a/llms-full.txt b/llms-full.txt index 15355495e..f33f18417 100644 --- a/llms-full.txt +++ b/llms-full.txt @@ -4189,17 +4189,16 @@ For a single local script, the standalone SDK is usually simpler. For a backend ## Install -Install the same release of the SDK packages in the Python environment that will run the server: +Install the server package and its SDK dependencies into a Python environment: ```bash python -m venv .venv source .venv/bin/activate -export OPENHANDS_VERSION="1.24.0" pip install -U \ - "openhands-sdk==$OPENHANDS_VERSION" \ - "openhands-tools==$OPENHANDS_VERSION" \ - "openhands-workspace==$OPENHANDS_VERSION" \ - "openhands-agent-server==$OPENHANDS_VERSION" + openhands-sdk \ + openhands-tools \ + openhands-workspace \ + openhands-agent-server ``` If you are working from the `OpenHands/software-agent-sdk` repository, use the repository's normal `uv` setup instead: @@ -12609,17 +12608,16 @@ Run a local Agent Server when you want a backend process to host OpenHands conve ## Install -Create a Python environment and install the same release of the server package and SDK packages it uses: +Create a Python environment and install the server package and its SDK dependencies: ```bash python -m venv .venv source .venv/bin/activate -export OPENHANDS_VERSION="1.24.0" pip install -U \ - "openhands-sdk==$OPENHANDS_VERSION" \ - "openhands-tools==$OPENHANDS_VERSION" \ - "openhands-workspace==$OPENHANDS_VERSION" \ - "openhands-agent-server==$OPENHANDS_VERSION" + openhands-sdk \ + openhands-tools \ + openhands-workspace \ + openhands-agent-server ``` ## Start Without Authentication @@ -12663,7 +12661,9 @@ curl \ http://127.0.0.1:8000/api/conversations/count ``` -Use `OH_SECRET_KEY` whenever you want conversations and stored settings to survive restarts with their sensitive values intact. Keep this value stable and store it in your normal secret manager. + + `OH_SECRET_KEY` encrypts sensitive values stored with conversations, including LLM API keys and secrets. Keep it stable across restarts. If it changes, previously encrypted values cannot be restored. + ## Connect From the SDK @@ -12686,7 +12686,7 @@ agent = get_default_agent(llm=llm, cli_mode=True) workspace = Workspace( host="http://127.0.0.1:8000", - api_key=os.environ.get("OH_SESSION_API_KEYS_0"), + api_key=os.environ["OH_SESSION_API_KEYS_0"], working_dir="workspace/project", ) @@ -12696,14 +12696,20 @@ conversation.run() conversation.close() ``` -If the server was started without `OH_SESSION_API_KEYS_0`, omit `api_key`. +If the server was started without `OH_SESSION_API_KEYS_0`, remove the `api_key=...` argument. + +The `working_dir` value is a path relative to the server's working directory. The default layout places conversation files under `workspace/bash_events/`, `workspace/conversations/`, and agent task files under `workspace/project/`. ## Connect From Another Service -For a backend service, configure two values: +For a non-SDK backend service, pass the session API key as `X-Session-API-Key`: -- The Agent Server URL, for example `http://127.0.0.1:8000`. -- The session API key, passed as `X-Session-API-Key` on API requests or as `api_key` when using the SDK `Workspace`. +```bash +curl \ + -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ + -H "Content-Type: application/json" \ + http://127.0.0.1:8000/api/conversations/count +``` Keep the Agent Server bound to `127.0.0.1` when the backend runs on the same machine. If the backend runs on another host, use a private network or reverse proxy, enable TLS, and restrict network access to trusted callers. diff --git a/sdk/arch/agent-server.mdx b/sdk/arch/agent-server.mdx index 166415e6d..4798b97e2 100644 --- a/sdk/arch/agent-server.mdx +++ b/sdk/arch/agent-server.mdx @@ -18,17 +18,16 @@ For a single local script, the standalone SDK is usually simpler. For a backend ## Install -Install the same release of the SDK packages in the Python environment that will run the server: +Install the server package and its SDK dependencies into a Python environment: ```bash python -m venv .venv source .venv/bin/activate -export OPENHANDS_VERSION="1.24.0" pip install -U \ - "openhands-sdk==$OPENHANDS_VERSION" \ - "openhands-tools==$OPENHANDS_VERSION" \ - "openhands-workspace==$OPENHANDS_VERSION" \ - "openhands-agent-server==$OPENHANDS_VERSION" + openhands-sdk \ + openhands-tools \ + openhands-workspace \ + openhands-agent-server ``` If you are working from the `OpenHands/software-agent-sdk` repository, use the repository's normal `uv` setup instead: diff --git a/sdk/guides/agent-server/local-server.mdx b/sdk/guides/agent-server/local-server.mdx index 04042576b..c83dca526 100644 --- a/sdk/guides/agent-server/local-server.mdx +++ b/sdk/guides/agent-server/local-server.mdx @@ -9,17 +9,16 @@ Run a local Agent Server when you want a backend process to host OpenHands conve ## Install -Create a Python environment and install the same release of the server package and SDK packages it uses: +Create a Python environment and install the server package and its SDK dependencies: ```bash python -m venv .venv source .venv/bin/activate -export OPENHANDS_VERSION="1.24.0" pip install -U \ - "openhands-sdk==$OPENHANDS_VERSION" \ - "openhands-tools==$OPENHANDS_VERSION" \ - "openhands-workspace==$OPENHANDS_VERSION" \ - "openhands-agent-server==$OPENHANDS_VERSION" + openhands-sdk \ + openhands-tools \ + openhands-workspace \ + openhands-agent-server ``` ## Start Without Authentication @@ -63,7 +62,9 @@ curl \ http://127.0.0.1:8000/api/conversations/count ``` -Use `OH_SECRET_KEY` whenever you want conversations and stored settings to survive restarts with their sensitive values intact. Keep this value stable and store it in your normal secret manager. + + `OH_SECRET_KEY` encrypts sensitive values stored with conversations, including LLM API keys and secrets. Keep it stable across restarts. If it changes, previously encrypted values cannot be restored. + ## Connect From the SDK @@ -86,7 +87,7 @@ agent = get_default_agent(llm=llm, cli_mode=True) workspace = Workspace( host="http://127.0.0.1:8000", - api_key=os.environ.get("OH_SESSION_API_KEYS_0"), + api_key=os.environ["OH_SESSION_API_KEYS_0"], working_dir="workspace/project", ) @@ -96,14 +97,20 @@ conversation.run() conversation.close() ``` -If the server was started without `OH_SESSION_API_KEYS_0`, omit `api_key`. +If the server was started without `OH_SESSION_API_KEYS_0`, remove the `api_key=...` argument. + +The `working_dir` value is a path relative to the server's working directory. The default layout places conversation files under `workspace/bash_events/`, `workspace/conversations/`, and agent task files under `workspace/project/`. ## Connect From Another Service -For a backend service, configure two values: +For a non-SDK backend service, pass the session API key as `X-Session-API-Key`: -- The Agent Server URL, for example `http://127.0.0.1:8000`. -- The session API key, passed as `X-Session-API-Key` on API requests or as `api_key` when using the SDK `Workspace`. +```bash +curl \ + -H "X-Session-API-Key: $OH_SESSION_API_KEYS_0" \ + -H "Content-Type: application/json" \ + http://127.0.0.1:8000/api/conversations/count +``` Keep the Agent Server bound to `127.0.0.1` when the backend runs on the same machine. If the backend runs on another host, use a private network or reverse proxy, enable TLS, and restrict network access to trusted callers.