Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions AGENTS.md
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@ When reviewing code, provide constructive feedback:
- Agent-server Docker publish tags are defined centrally in `openhands-agent-server/openhands/agent_server/docker/build.py`; keep `server.yml` manifest publication derived from the emitted per-arch tags so SHA/branch/git-tag aliases stay in sync, while preserving the legacy `latest-<variant>` alias used by workspace defaults.
- The published agent-server Docker images in `.github/workflows/server.yml` must pass `OPENHANDS_BUILD_GIT_SHA` and `OPENHANDS_BUILD_GIT_REF` as explicit `docker/build-push-action` build args; the workflow only uses `docker/build.py` for context/tag generation, so those runtime env vars are otherwise left at the Dockerfile `unknown` defaults.
- The PyInstaller agent-server binary should copy OpenHands distribution metadata (`openhands-agent-server`, `openhands-sdk`, `openhands-tools`, `openhands-workspace`) in `agent-server.spec`, otherwise `/server_info` version lookups via `importlib.metadata` can fall back to `unknown` inside published binary images.
- Agent-server deferred init (warm-pool / dormant mode) is driven by `Config.deferred_init` (env `OH_DEFERRED_INIT`). The `InitService` in `openhands-agent-server/openhands/agent_server/init_router.py` owns the dormant→initializing→ready transition and is registered on `app.state.init_service` only when `deferred_init=True`; the `require_initialized` dependency, added to the `/api/*` router, returns 503 while not `ready`. Bootstrap auth for `POST /api/init` is a separate `OH_INIT_API_KEY` (`X-Init-API-Key` header), distinct from `session_api_keys`, because session keys are part of the per-user payload that arrives *inside* the init body. The agent-server's 5xx exception handler rewrites `detail` on 503s, so warm-pool orchestrators should rely on the HTTP status code (not the body) when probing dormant state.


- Auto-title generation should not re-read `ConversationState.events` from a background task triggered by a freshly received `MessageEvent`; extract message text synchronously from the incoming event and then reuse shared title helpers (`extract_message_text`, `generate_title_from_message`) to avoid persistence-order races.
Expand Down
84 changes: 60 additions & 24 deletions openhands-agent-server/openhands/agent_server/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,11 @@
from openhands.agent_server.file_router import file_router
from openhands.agent_server.git_router import git_router
from openhands.agent_server.hooks_router import hooks_router
from openhands.agent_server.init_router import (
InitService,
init_router,
require_initialized,
)
from openhands.agent_server.llm_router import llm_router
from openhands.agent_server.mcp_router import mcp_router
from openhands.agent_server.middleware import CORSDispatcher
Expand Down Expand Up @@ -120,7 +125,8 @@ async def api_lifespan(api: FastAPI) -> AsyncIterator[None]:
# Clean up stale tmux sessions from previous server runs
_cleanup_stale_tmux_sessions()

service = get_default_conversation_service()
config: Config = api.state.config
deferred = config.deferred_init
vscode_service = get_vscode_service()
desktop_service = get_desktop_service()
tool_preload_service = get_tool_preload_service()
Expand Down Expand Up @@ -181,13 +187,50 @@ async def start_tool_preload_service():
f"Server initialization failed with {len(exceptions)} exception(s)"
) from exceptions[0]

# Mark initialization as complete - now the /ready endpoint will return 200
# and Kubernetes readiness probes will pass
async def stop_stateless_services():
async def stop_vscode_service():
if vscode_service is not None:
await vscode_service.stop()

async def stop_desktop_service():
if desktop_service is not None:
await desktop_service.stop()

async def stop_tool_preload_service():
if tool_preload_service is not None:
await tool_preload_service.stop()

await asyncio.gather(
stop_vscode_service(),
stop_desktop_service(),
stop_tool_preload_service(),
return_exceptions=True,
)

# In deferred-init mode the conversation service is *not* entered
# here — that happens later, when POST /api/init delivers the runtime
# config. We still mark the /ready endpoint as ready so a warm-pool
# orchestrator can tell the pod has finished booting and is
# available to receive its /api/init payload.
if deferred:
init_service = InitService(api, base_config=config)
api.state.init_service = init_service
mark_initialization_complete()
logger.info("Server started in deferred-init mode; awaiting POST /api/init")
try:
yield
finally:
await init_service.teardown()
await stop_stateless_services()
return

# Non-deferred (legacy) path: build and enter the conversation
# service as part of the lifespan, exactly as before.
service = get_default_conversation_service()
mark_initialization_complete()
logger.info("Server initialization complete - ready to serve requests")

async with service:
# Store the initialized service in app state for dependency injection
api.state.conversation_service = service

config = api.state.config
Expand All @@ -211,26 +254,7 @@ async def start_tool_preload_service():
with suppress(asyncio.CancelledError):
await retention_task

# Define async functions for stopping each service
async def stop_vscode_service():
if vscode_service is not None:
await vscode_service.stop()

async def stop_desktop_service():
if desktop_service is not None:
await desktop_service.stop()

async def stop_tool_preload_service():
if tool_preload_service is not None:
await tool_preload_service.stop()

# Stop all services concurrently
await asyncio.gather(
stop_vscode_service(),
stop_desktop_service(),
stop_tool_preload_service(),
return_exceptions=True,
)
await stop_stateless_services()
finally:
if tmux_tmpdir_was_defaulted and os.environ.get("TMUX_TMPDIR") == str(
tmux_tmpdir
Expand Down Expand Up @@ -290,12 +314,24 @@ def _add_api_routes(app: FastAPI, config: Config) -> None:
"""
app.include_router(server_details_router)

# The /api/init endpoint bypasses both the session-key auth and the
# dormant gate. It has its own X-Init-API-Key auth. When
# ``deferred_init`` is False the endpoints are still mounted but return
# 404 because no InitService is registered on app.state — see
# ``get_init_service``.
init_api_router = APIRouter(prefix="/api")
init_api_router.include_router(init_router)
app.include_router(init_api_router)

# Header-only auth: applied to every /api/* route EXCEPT the workspace
# static-file routes (handled separately below). Cookies are NOT honored
# here so that we don't expand the CSRF surface across the whole API.
dependencies = []
if config.session_api_keys:
dependencies.append(Depends(create_session_api_key_dependency(config)))
# Dormant gate: when ``deferred_init`` is True this 503s every /api/*
# route until POST /api/init completes. No-op for non-deferred deployments.
dependencies.append(Depends(require_initialized))

api_router = APIRouter(prefix="/api", dependencies=dependencies)
api_router.include_router(event_router)
Expand Down
44 changes: 44 additions & 0 deletions openhands-agent-server/openhands/agent_server/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,27 @@ def _default_web_url() -> str | None:
return None


def _default_deferred_init() -> bool:
"""Read OH_DEFERRED_INIT, accepting the same truthy values as BoolEnvParser.

The env parser pipeline reads this from ``OH_DEFERRED_INIT`` once
``deferred_init`` is registered on ``Config``. This factory is just a
safety fallback for direct ``Config()`` construction outside the env
parser flow (e.g. tests that import ``os.environ`` directly).
"""
raw = os.getenv("OH_DEFERRED_INIT")
if raw is None:
return False
return raw.upper() in ("1", "TRUE")


def _default_init_api_key() -> SecretStr | None:
raw = os.getenv("OH_INIT_API_KEY")
if raw:
return SecretStr(raw)
return None


class WebhookSpec(BaseModel):
"""Spec to create a webhook. All webhook requests use POST method."""

Expand Down Expand Up @@ -212,6 +233,29 @@ class Config(BaseModel):
"The URL where this agent server instance is available externally"
),
)
deferred_init: bool = Field(
default_factory=_default_deferred_init,
description=(
"When True, the server starts in dormant mode. Stateless services "
"(VSCode, tool preload, etc.) start as usual, but the conversation, "
"event, and bash routers return 503 until POST /api/init is called with "
"the runtime configuration. This is intended for warm-pool deployments "
"where pods are pre-warmed before a user is matched and per-user "
"configuration is delivered later."
),
)
init_api_key: SecretStr | None = Field(
default_factory=_default_init_api_key,
description=(
"API key required to call POST /api/init when ``deferred_init`` is True. "
"Sent via the ``X-Init-API-Key`` header. Distinct from "
"``session_api_keys`` because the session key is part of the per-user "
"config that arrives at /api/init time; the init key is the pool-bootstrap "
"credential held by the orchestrator. When unset, /api/init is "
"unauthenticated, which is acceptable for development but not for "
"production warm pools."
),
)
model_config: ClassVar[ConfigDict] = {"frozen": True}

@property
Expand Down
Loading
Loading