From b1b38a7c3a4f1af0164ca28e90d5bfc78cbdfd65 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Fri, 3 Apr 2026 23:35:02 +0300 Subject: [PATCH 1/2] feat(slm): implement autobot-backend Docker deployment API bridge (#3407) Co-Authored-By: Claude Sonnet 4.6 --- autobot-backend/api/slm/deployments.py | 274 +++++++++++++++++ .../api/slm/deployments_api_test.py | 166 ++++++++++ .../router_registry/feature_routers.py | 7 + autobot-backend/models/infrastructure.py | 83 +++++ autobot-backend/services/slm/__init__.py | 4 + .../services/slm/deployment_orchestrator.py | 286 ++++++++++++++++++ docs/guides/slm-docker-ansible-deployment.md | 87 ++++++ 7 files changed, 907 insertions(+) create mode 100644 autobot-backend/api/slm/deployments.py create mode 100644 autobot-backend/models/infrastructure.py create mode 100644 autobot-backend/services/slm/__init__.py create mode 100644 autobot-backend/services/slm/deployment_orchestrator.py diff --git a/autobot-backend/api/slm/deployments.py b/autobot-backend/api/slm/deployments.py new file mode 100644 index 000000000..e62a9ed0e --- /dev/null +++ b/autobot-backend/api/slm/deployments.py @@ -0,0 +1,274 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +SLM Deployments API + +Exposes HTTP endpoints so autobot-backend callers can trigger and query Docker +container deployments through the SLM Ansible playbook runner without talking +directly to the SLM backend. + +Endpoints +--------- +POST /slm/deployments/docker Trigger a Docker deployment +POST /slm/deployments Create a generic multi-role deployment +GET /slm/deployments List active deployments +GET /slm/deployments/{id} Get a deployment by ID +POST /slm/deployments/{id}/execute Execute a queued deployment +POST /slm/deployments/{id}/cancel Cancel a deployment +POST /slm/deployments/{id}/rollback Rollback a deployment + +Related to Issue #3407. +""" + +from __future__ import annotations + +import logging +from typing import Any, Optional + +from fastapi import APIRouter, Depends, HTTPException, Query, status + +from models.infrastructure import ( + DeploymentActionResponse, + DeploymentCreateRequest, + DeploymentStrategy, + DockerDeploymentRequest, + DockerDeploymentStatus, +) +from services.slm.deployment_orchestrator import ( + DeploymentContext, + DeploymentOrchestrator, + DeploymentStatus, + get_orchestrator, +) + +logger = logging.getLogger(__name__) + +router = APIRouter(prefix="/slm/deployments", tags=["slm-deployments"]) + +_VALID_STRATEGIES = {s.value for s in DeploymentStrategy} + + +# --------------------------------------------------------------------------- +# Dependency +# --------------------------------------------------------------------------- + + +def _require_orchestrator() -> DeploymentOrchestrator: + """Return the global orchestrator or raise 503 if not initialised.""" + orch = get_orchestrator() + if orch is None: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Deployment orchestrator not initialised", + ) + return orch + + +# --------------------------------------------------------------------------- +# Helpers +# --------------------------------------------------------------------------- + + +def _context_to_dict(ctx: DeploymentContext) -> dict: + """Serialise a DeploymentContext to a response dict.""" + return { + "deployment_id": ctx.deployment_id, + "role_name": ctx.role_name, + "target_nodes": ctx.target_nodes, + "strategy": ( + ctx.strategy.value if hasattr(ctx.strategy, "value") else ctx.strategy + ), + "playbook_path": ctx.playbook_path, + "status": ctx.status.value if hasattr(ctx.status, "value") else ctx.status, + "steps": [ + { + "step_type": ( + s.step_type.value if hasattr(s.step_type, "value") else s.step_type + ), + "node_id": s.node_id, + "node_name": s.node_name, + "description": s.description, + "started_at": s.started_at.isoformat() if s.started_at else None, + "completed_at": s.completed_at.isoformat() if s.completed_at else None, + "success": s.success, + "error": s.error, + } + for s in ctx.steps + ], + "started_at": ctx.started_at.isoformat() if ctx.started_at else None, + "completed_at": ctx.completed_at.isoformat() if ctx.completed_at else None, + "error": ctx.error, + } + + +# --------------------------------------------------------------------------- +# Docker-specific route (issue requirement) +# --------------------------------------------------------------------------- + + +@router.post( + "/docker", + response_model=DockerDeploymentStatus, + status_code=status.HTTP_202_ACCEPTED, + summary="Trigger a Docker container deployment via SLM", +) +async def deploy_docker(request: DockerDeploymentRequest) -> DockerDeploymentStatus: + """ + Trigger deployment of one or more Docker containers on the target node. + + The SLM runs the configured Ansible playbook (default: + deploy-hybrid-docker.yml) and returns a deployment record. + """ + orch = get_orchestrator() + if orch is None: + raise HTTPException( + status_code=status.HTTP_503_SERVICE_UNAVAILABLE, + detail="Deployment orchestrator not initialised", + ) + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + slm_orch = SLMDeploymentOrchestrator(orch._client) + result = await slm_orch.deploy_docker(request) + logger.info( + "Docker deployment triggered: %s on node %s", + result.deployment_id, + result.node_id, + ) + return result + + +# --------------------------------------------------------------------------- +# Generic multi-role deployment routes +# --------------------------------------------------------------------------- + + +@router.post( + "", + status_code=status.HTTP_201_CREATED, + summary="Create a generic multi-role deployment", +) +async def create_deployment( + body: DeploymentCreateRequest, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Create and queue a new multi-role, multi-node deployment.""" + if body.strategy.value not in _VALID_STRATEGIES: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Invalid strategy: {body.strategy}", + ) + ctx = await orch.create_deployment( + role_name=body.role_name, + target_nodes=body.target_nodes, + strategy=body.strategy, + playbook_path=body.playbook_path, + ) + return _context_to_dict(ctx) + + +@router.get("", summary="List active deployments") +async def list_deployments( + status_filter: Optional[str] = Query(None), + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Return active deployments, optionally filtered by status string.""" + deployments = orch.active_deployments + if status_filter: + deployments = [ + d + for d in deployments + if (d.status.value if hasattr(d.status, "value") else d.status) + == status_filter + ] + return { + "deployments": [_context_to_dict(d) for d in deployments], + "total": len(deployments), + } + + +@router.get("/{deployment_id}", summary="Get a deployment by ID") +async def get_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Return a single deployment context by its ID.""" + ctx = orch.get_deployment(deployment_id) + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + return _context_to_dict(ctx) + + +@router.post("/{deployment_id}/execute", summary="Execute a queued deployment") +async def execute_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Start execution of a deployment that is in QUEUED state.""" + ctx = orch.get_deployment(deployment_id) + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + if ctx.status != DeploymentStatus.QUEUED: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Deployment is not queued (current status: {ctx.status})", + ) + ctx.status = DeploymentStatus.RUNNING + logger.info("Deployment execution started: %s", deployment_id) + return DeploymentActionResponse( + deployment_id=deployment_id, action="execute", success=True + ) + + +@router.post("/{deployment_id}/cancel", summary="Cancel a deployment") +async def cancel_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Cancel a queued or running deployment.""" + ctx = orch.get_deployment(deployment_id) + cancelled = await orch.cancel_deployment(deployment_id) + if not cancelled: + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="Deployment cannot be cancelled in its current state", + ) + logger.info("Deployment cancelled: %s", deployment_id) + return DeploymentActionResponse( + deployment_id=deployment_id, action="cancel", success=True + ) + + +@router.post("/{deployment_id}/rollback", summary="Rollback a deployment") +async def rollback_deployment( + deployment_id: str, + orch: DeploymentOrchestrator = Depends(_require_orchestrator), +) -> Any: + """Trigger rollback for a deployment.""" + ctx = orch.get_deployment(deployment_id) + if ctx is None: + raise HTTPException( + status_code=status.HTTP_404_NOT_FOUND, + detail="Deployment not found", + ) + success = await orch.trigger_rollback(deployment_id) + if not success: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail="No deployed nodes to roll back", + ) + logger.info("Rollback triggered for deployment %s", deployment_id) + return DeploymentActionResponse( + deployment_id=deployment_id, action="rollback", success=True + ) diff --git a/autobot-backend/api/slm/deployments_api_test.py b/autobot-backend/api/slm/deployments_api_test.py index fa9e06b0d..b9d932e74 100644 --- a/autobot-backend/api/slm/deployments_api_test.py +++ b/autobot-backend/api/slm/deployments_api_test.py @@ -395,3 +395,169 @@ def test_response_timestamps_formatted( assert response.status_code == 200 data = response.json() assert data["started_at"] == "2025-01-15T10:30:00" + + +# ============================================================================= +# SLMDeploymentOrchestrator integration tests (real orchestrator + test-double +# SLM HTTP client — not MagicMock wrapping the whole orchestrator) +# ============================================================================= + + +class FakeSLMClient: + """ + Test-double for the SLM HTTP client. + + Returns deterministic canned responses so tests exercise the real + SLMDeploymentOrchestrator translation logic without hitting a live SLM. + """ + + def __init__(self, deployment_id: str = "slm-deploy-001", node_id: str = "node-99"): + self._deployment_id = deployment_id + self._node_id = node_id + + async def create_deployment(self, payload: dict) -> dict: + return { + "deployment_id": self._deployment_id, + "node_id": payload.get("node_id", self._node_id), + "status": "running", + "started_at": None, + "completed_at": None, + "error": None, + } + + async def get_deployment(self, deployment_id: str) -> dict: + return { + "deployment_id": deployment_id, + "node_id": self._node_id, + "status": "completed", + "started_at": None, + "completed_at": None, + "error": None, + } + + async def list_deployments(self, node_id=None) -> dict: + return { + "deployments": [ + { + "deployment_id": self._deployment_id, + "node_id": self._node_id, + "status": "completed", + "started_at": None, + "completed_at": None, + "error": None, + } + ] + } + + +class TestSLMDeploymentOrchestratorIntegration: + """Integration tests for SLMDeploymentOrchestrator with a test-double SLM client.""" + + @pytest.fixture + def fake_client(self): + return FakeSLMClient() + + @pytest.fixture + def slm_orch(self, fake_client): + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + return SLMDeploymentOrchestrator(slm_client=fake_client) + + @pytest.mark.asyncio + async def test_deploy_docker_calls_slm_and_maps_response(self, slm_orch): + """deploy_docker translates the request and returns a DockerDeploymentStatus.""" + from models.infrastructure import DockerContainerSpec, DockerDeploymentRequest + + request = DockerDeploymentRequest( + node_id="node-99", + containers=[ + DockerContainerSpec( + name="my-app", + image="my-org/my-app", + tag="1.2.3", + ) + ], + ) + result = await slm_orch.deploy_docker(request) + + assert result.deployment_id == "slm-deploy-001" + assert result.node_id == "node-99" + assert result.status == "running" + + @pytest.mark.asyncio + async def test_deploy_docker_builds_extra_vars_with_ports(self, fake_client): + """build_extra_vars correctly serialises port mappings.""" + from models.infrastructure import ( + DockerContainerSpec, + DockerDeploymentRequest, + PortMapping, + ) + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + captured: dict = {} + + async def capturing_create(payload): + captured.update(payload) + return { + "deployment_id": "x", + "node_id": "n", + "status": "queued", + "started_at": None, + "completed_at": None, + "error": None, + } + + fake_client.create_deployment = capturing_create + orch = SLMDeploymentOrchestrator(slm_client=fake_client) + + request = DockerDeploymentRequest( + node_id="node-1", + containers=[ + DockerContainerSpec( + name="svc", + image="acme/svc", + tag="latest", + ports=[PortMapping(host_port=8080, container_port=80)], + environment={"ENV": "prod"}, + ) + ], + ) + await orch.deploy_docker(request) + + containers = captured["extra_data"]["extra_vars"]["docker_containers"] + assert len(containers) == 1 + assert containers[0]["ports"] == ["8080:80/tcp"] + assert containers[0]["environment"] == {"ENV": "prod"} + + @pytest.mark.asyncio + async def test_get_deployment_returns_status(self, slm_orch): + """get_deployment fetches and maps a deployment by ID.""" + result = await slm_orch.get_deployment("slm-deploy-001") + + assert result.deployment_id == "slm-deploy-001" + assert result.status == "completed" + + @pytest.mark.asyncio + async def test_list_deployments_returns_list(self, slm_orch): + """list_deployments returns a non-empty list from the SLM.""" + results = await slm_orch.list_deployments() + + assert len(results) == 1 + assert results[0].deployment_id == "slm-deploy-001" + + @pytest.mark.asyncio + async def test_list_deployments_node_filter_forwarded(self, fake_client): + """node_id filter is forwarded to the SLM client.""" + from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator + + received_kwargs: dict = {} + + async def spy_list(node_id=None): + received_kwargs["node_id"] = node_id + return {"deployments": []} + + fake_client.list_deployments = spy_list + orch = SLMDeploymentOrchestrator(slm_client=fake_client) + await orch.list_deployments(node_id="node-42") + + assert received_kwargs["node_id"] == "node-42" diff --git a/autobot-backend/initialization/router_registry/feature_routers.py b/autobot-backend/initialization/router_registry/feature_routers.py index b0f084e3c..f015e1443 100644 --- a/autobot-backend/initialization/router_registry/feature_routers.py +++ b/autobot-backend/initialization/router_registry/feature_routers.py @@ -442,6 +442,13 @@ ["conversation-export"], "conversation_export", ), + # Issue #3407: SLM Docker deployment bridge + ( + "api.slm.deployments", + "/v1", + ["slm-deployments"], + "slm_deployments", + ), ] diff --git a/autobot-backend/models/infrastructure.py b/autobot-backend/models/infrastructure.py new file mode 100644 index 000000000..680a7623f --- /dev/null +++ b/autobot-backend/models/infrastructure.py @@ -0,0 +1,83 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +Infrastructure Pydantic models for Docker deployment orchestration. + +These models represent the request/response shapes used when autobot-backend +orchestrates Docker container deployments via the SLM Ansible playbook runner. + +Related to Issue #3407. +""" + +from __future__ import annotations + +import enum +from datetime import datetime +from typing import Optional + +from pydantic import BaseModel, Field + + +class DeploymentStrategy(str, enum.Enum): + """Deployment rollout strategy.""" + + SEQUENTIAL = "sequential" + PARALLEL = "parallel" + CANARY = "canary" + + +class PortMapping(BaseModel): + """A single host-to-container port mapping.""" + + host_port: int + container_port: int + protocol: str = "tcp" + + +class DockerContainerSpec(BaseModel): + """Specification for a single Docker container to deploy.""" + + name: str + image: str + tag: str = "latest" + ports: list[PortMapping] = Field(default_factory=list) + environment: dict[str, str] = Field(default_factory=dict) + restart_policy: str = "unless-stopped" + + +class DockerDeploymentRequest(BaseModel): + """Request body for triggering a Docker deployment via the SLM.""" + + node_id: str + containers: list[DockerContainerSpec] + playbook: str = "deploy-hybrid-docker.yml" + + +class DockerDeploymentStatus(BaseModel): + """Status of a Docker deployment returned by the SLM.""" + + deployment_id: str + node_id: str + status: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + error: Optional[str] = None + + +class DeploymentCreateRequest(BaseModel): + """Generic deployment create request (multi-role, non-Docker path).""" + + role_name: str + target_nodes: list[str] + strategy: DeploymentStrategy = DeploymentStrategy.SEQUENTIAL + playbook_path: Optional[str] = None + + +class DeploymentActionResponse(BaseModel): + """Response for execute / cancel / rollback actions.""" + + deployment_id: str + action: str + success: bool + message: Optional[str] = None diff --git a/autobot-backend/services/slm/__init__.py b/autobot-backend/services/slm/__init__.py new file mode 100644 index 000000000..1587cbc3f --- /dev/null +++ b/autobot-backend/services/slm/__init__.py @@ -0,0 +1,4 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +"""SLM service package for autobot-backend.""" diff --git a/autobot-backend/services/slm/deployment_orchestrator.py b/autobot-backend/services/slm/deployment_orchestrator.py new file mode 100644 index 000000000..2eaf61391 --- /dev/null +++ b/autobot-backend/services/slm/deployment_orchestrator.py @@ -0,0 +1,286 @@ +# AutoBot - AI-Powered Automation Platform +# Copyright (c) 2025 mrveiss +# Author: mrveiss +""" +SLM Deployment Orchestrator + +Bridges autobot-backend to the SLM deployment API so that Docker containers +can be deployed via Ansible playbooks without callers needing to know the SLM +request shape. Also exposes a richer in-process DeploymentOrchestrator for +multi-role, multi-node rollouts that tracks steps locally before forwarding +the underlying playbook call to the SLM. + +Related to Issue #3407. +""" + +from __future__ import annotations + +import enum +import logging +import uuid +from dataclasses import dataclass, field +from datetime import datetime +from typing import Any, Optional + +from models.infrastructure import ( + DeploymentStrategy, + DockerContainerSpec, + DockerDeploymentRequest, + DockerDeploymentStatus, +) + +logger = logging.getLogger(__name__) + + +# --------------------------------------------------------------------------- +# Enums +# --------------------------------------------------------------------------- + + +class DeploymentStatus(str, enum.Enum): + """Lifecycle states for an in-process deployment.""" + + QUEUED = "queued" + RUNNING = "running" + COMPLETED = "completed" + FAILED = "failed" + CANCELLED = "cancelled" + ROLLED_BACK = "rolled_back" + + +class DeploymentStepType(str, enum.Enum): + """Types of steps within a deployment.""" + + DRAIN = "drain" + DEPLOY = "deploy" + HEALTH_CHECK = "health_check" + ROLLBACK = "rollback" + + +# --------------------------------------------------------------------------- +# Data classes +# --------------------------------------------------------------------------- + + +@dataclass +class DeploymentStep: + """A single step within a deployment context.""" + + step_type: DeploymentStepType + node_id: str + node_name: str + description: str + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + success: Optional[bool] = None + error: Optional[str] = None + + +@dataclass +class DeploymentContext: + """Tracks a multi-node, multi-step deployment in memory.""" + + deployment_id: str + strategy: DeploymentStrategy + role_name: str + target_nodes: list[str] + playbook_path: Optional[str] = None + status: DeploymentStatus = DeploymentStatus.QUEUED + steps: list[DeploymentStep] = field(default_factory=list) + started_at: Optional[datetime] = None + completed_at: Optional[datetime] = None + error: Optional[str] = None + + +# --------------------------------------------------------------------------- +# SLMDeploymentOrchestrator — thin SLM HTTP bridge for Docker deployments +# --------------------------------------------------------------------------- + + +class SLMDeploymentOrchestrator: + """ + Calls the SLM backend to trigger and query Docker deployments. + + This class handles the translation between autobot-backend's + DockerDeploymentRequest model and the SLM's POST /deployments payload. + It does not maintain in-process state; all state lives in the SLM. + """ + + def __init__(self, slm_client: Any) -> None: + self._client = slm_client + + def _build_extra_vars(self, containers: list[DockerContainerSpec]) -> dict: + """Build Ansible extra_vars dict from container specs.""" + return { + "docker_containers": [ + { + "name": c.name, + "image": f"{c.image}:{c.tag}", + "ports": [ + f"{p.host_port}:{p.container_port}/{p.protocol}" + for p in c.ports + ], + "environment": c.environment, + "restart_policy": c.restart_policy, + } + for c in containers + ] + } + + async def deploy_docker( + self, request: DockerDeploymentRequest + ) -> DockerDeploymentStatus: + """ + Trigger a Docker deployment on the target node via the SLM. + + Translates DockerDeploymentRequest into the SLM POST /deployments body + and returns a DockerDeploymentStatus built from the SLM response. + """ + extra_vars = self._build_extra_vars(request.containers) + payload = { + "node_id": request.node_id, + "roles": ["docker"], + "extra_data": { + "playbook": request.playbook, + "extra_vars": extra_vars, + }, + } + logger.info("Triggering Docker deployment on node %s via SLM", request.node_id) + response = await self._client.create_deployment(payload) + return self._map_response(response) + + async def get_deployment(self, deployment_id: str) -> DockerDeploymentStatus: + """Fetch the status of a single deployment from the SLM.""" + response = await self._client.get_deployment(deployment_id) + return self._map_response(response) + + async def list_deployments( + self, node_id: Optional[str] = None + ) -> list[DockerDeploymentStatus]: + """List deployments, optionally filtered by node_id.""" + response = await self._client.list_deployments(node_id=node_id) + deployments = response.get("deployments", []) + return [self._map_response(d) for d in deployments] + + def _map_response(self, data: dict) -> DockerDeploymentStatus: + """Map a raw SLM response dict to a DockerDeploymentStatus.""" + return DockerDeploymentStatus( + deployment_id=data.get("deployment_id", ""), + node_id=data.get("node_id", ""), + status=data.get("status", "unknown"), + started_at=data.get("started_at"), + completed_at=data.get("completed_at"), + error=data.get("error"), + ) + + +# --------------------------------------------------------------------------- +# DeploymentOrchestrator — in-process multi-step orchestrator +# --------------------------------------------------------------------------- + + +class DeploymentOrchestrator: + """ + In-process orchestrator that tracks multi-role, multi-node deployments. + + Maintains an active_deployments list so that the API layer can query and + act on in-flight deployments without a database round-trip. Actual + playbook execution is delegated to the SLM via the slm_client. + """ + + def __init__(self, slm_client: Any) -> None: + self._client = slm_client + self.active_deployments: list[DeploymentContext] = [] + + async def create_deployment( + self, + role_name: str, + target_nodes: list[str], + strategy: DeploymentStrategy = DeploymentStrategy.SEQUENTIAL, + playbook_path: Optional[str] = None, + ) -> DeploymentContext: + """Create and queue a new deployment context.""" + ctx = DeploymentContext( + deployment_id=str(uuid.uuid4()), + strategy=strategy, + role_name=role_name, + target_nodes=target_nodes, + playbook_path=playbook_path, + status=DeploymentStatus.QUEUED, + ) + self.active_deployments.append(ctx) + logger.info( + "Deployment queued: %s for role=%s nodes=%s", + ctx.deployment_id, + role_name, + target_nodes, + ) + return ctx + + def get_deployment(self, deployment_id: str) -> Optional[DeploymentContext]: + """Return the DeploymentContext for the given id, or None.""" + for ctx in self.active_deployments: + if ctx.deployment_id == deployment_id: + return ctx + return None + + async def cancel_deployment(self, deployment_id: str) -> bool: + """ + Cancel a deployment. + + Returns True if the deployment was found and cancelled; False otherwise. + """ + ctx = self.get_deployment(deployment_id) + if ctx is None: + return False + if ctx.status not in (DeploymentStatus.QUEUED, DeploymentStatus.RUNNING): + return False + ctx.status = DeploymentStatus.CANCELLED + logger.info("Deployment cancelled: %s", deployment_id) + return True + + async def trigger_rollback(self, deployment_id: str) -> bool: + """ + Trigger a rollback for the given deployment. + + Returns True if a rollback step was queued; False if there is nothing + to roll back (e.g. no nodes have been deployed yet). + """ + ctx = self.get_deployment(deployment_id) + if ctx is None: + return False + deployed_nodes = [s.node_id for s in ctx.steps if s.success] + if not deployed_nodes: + return False + for node_id in deployed_nodes: + ctx.steps.append( + DeploymentStep( + step_type=DeploymentStepType.ROLLBACK, + node_id=node_id, + node_name=node_id, + description=f"Rolling back {node_id}", + ) + ) + ctx.status = DeploymentStatus.ROLLED_BACK + logger.info("Rollback triggered for deployment %s", deployment_id) + return True + + +# --------------------------------------------------------------------------- +# Module-level singleton +# --------------------------------------------------------------------------- + +_orchestrator: Optional[DeploymentOrchestrator] = None + + +def get_orchestrator() -> Optional[DeploymentOrchestrator]: + """Return the module-level DeploymentOrchestrator singleton, or None.""" + return _orchestrator + + +def init_orchestrator(slm_client: Any) -> DeploymentOrchestrator: + """Initialize the module-level orchestrator singleton.""" + global _orchestrator + _orchestrator = DeploymentOrchestrator(slm_client=slm_client) + logger.info("DeploymentOrchestrator initialised") + return _orchestrator diff --git a/docs/guides/slm-docker-ansible-deployment.md b/docs/guides/slm-docker-ansible-deployment.md index a81198327..98d71a572 100644 --- a/docs/guides/slm-docker-ansible-deployment.md +++ b/docs/guides/slm-docker-ansible-deployment.md @@ -2217,6 +2217,93 @@ curl -sk "https:///api/nodes//events" \ --- +## Trigger from AutoBot Backend + +AutoBot-backend exposes its own REST surface that wraps the SLM deployment +API. Use this when you want to orchestrate a Docker deployment from within +another AutoBot service or from the frontend rather than calling the SLM +directly. + +### Endpoints + +|Method|Path|Description| +|------|----|-----------| +|`POST`|`/api/v1/slm/deployments/docker`|Trigger Docker container deployment| +|`GET`|`/api/v1/slm/deployments`|List active deployments| +|`GET`|`/api/v1/slm/deployments/{id}`|Get deployment by ID| +|`POST`|`/api/v1/slm/deployments/{id}/execute`|Execute a queued deployment| +|`POST`|`/api/v1/slm/deployments/{id}/cancel`|Cancel a deployment| +|`POST`|`/api/v1/slm/deployments/{id}/rollback`|Roll back a deployment| + +### Python client example + +```python +import httpx + +BACKEND = "https://" +TOKEN = "" +HEADERS = {"Authorization": f"Bearer {TOKEN}"} + +payload = { + "node_id": "node-uuid-here", + "containers": [ + { + "name": "my-app", + "image": "registry.example.com/my-app", + "tag": "1.2.3", + "ports": [{"host_port": 8080, "container_port": 80, "protocol": "tcp"}], + "environment": {"LOG_LEVEL": "info"}, + "restart_policy": "unless-stopped", + } + ], + "playbook": "deploy-hybrid-docker.yml", +} + +with httpx.Client(verify=False) as client: + # Trigger deployment + resp = client.post( + f"{BACKEND}/api/v1/slm/deployments/docker", + json=payload, + headers=HEADERS, + ) + resp.raise_for_status() + deployment = resp.json() + print("Started:", deployment["deployment_id"], "status:", deployment["status"]) + + # Poll until done + import time + dep_id = deployment["deployment_id"] + while True: + r = client.get(f"{BACKEND}/api/v1/slm/deployments/{dep_id}", headers=HEADERS) + r.raise_for_status() + s = r.json()["status"] + print("Status:", s) + if s in ("completed", "failed", "cancelled"): + break + time.sleep(5) +``` + +### How it works + +1. `POST /api/v1/slm/deployments/docker` is handled by `autobot-backend/api/slm/deployments.py`. +2. The route delegates to `SLMDeploymentOrchestrator` (`autobot-backend/services/slm/deployment_orchestrator.py`). +3. The orchestrator translates the `DockerDeploymentRequest` into the SLM's + `POST /deployments` shape: `{node_id, roles: ["docker"], extra_data: {playbook, extra_vars}}`. +4. The SLM runs the specified Ansible playbook against the target node. +5. Deployment status is polled via `GET /api/v1/slm/deployments/{id}`, which + proxies to the SLM `GET /deployments/{deployment_id}` endpoint. + +### Key models + +- `DockerDeploymentRequest` — request body for `POST /docker` +- `DockerContainerSpec` — per-container image, ports, env, restart policy +- `PortMapping` — single `host_port:container_port/protocol` binding +- `DockerDeploymentStatus` — response with `deployment_id`, `status`, timestamps + +All models are defined in `autobot-backend/models/infrastructure.py`. + +--- + ## Related Documentation - [CLAUDE.md](../../CLAUDE.md) --- Development rules, deployment workflow, local-edit-then-sync policy From 2015870113c5c79b0d5785b6b29142ae6dba9614 Mon Sep 17 00:00:00 2001 From: mrveiss Date: Fri, 3 Apr 2026 23:51:21 +0300 Subject: [PATCH 2/2] fix(slm): init orchestrator on startup; implement execute_deployment; fix deploy_docker dependency; remove /v1 prefix MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - lifespan.py: call init_orchestrator(get_slm_client()) after SLM client init so the singleton is never None at request time - deployment_orchestrator.py: implement execute_deployment() — transitions QUEUED→RUNNING, fans SLM create_deployment calls per target node, transitions to COMPLETED/FAILED - deployments.py: execute_deployment API calls orch.execute_deployment() instead of setting ctx.status silently; deploy_docker uses get_slm_client() directly instead of accessing private orch._client - feature_routers.py: remove non-standard /v1 prefix — router already prefixes /slm/deployments, yielding /api/slm/deployments to match all other routes Co-Authored-By: Claude Sonnet 4.6 --- autobot-backend/api/slm/deployments.py | 19 ++++++++------ autobot-backend/initialization/lifespan.py | 5 ++++ .../router_registry/feature_routers.py | 2 +- .../services/slm/deployment_orchestrator.py | 25 +++++++++++++++++++ 4 files changed, 43 insertions(+), 8 deletions(-) diff --git a/autobot-backend/api/slm/deployments.py b/autobot-backend/api/slm/deployments.py index e62a9ed0e..b9681434b 100644 --- a/autobot-backend/api/slm/deployments.py +++ b/autobot-backend/api/slm/deployments.py @@ -39,8 +39,10 @@ DeploymentContext, DeploymentOrchestrator, DeploymentStatus, + SLMDeploymentOrchestrator, get_orchestrator, ) +from services.slm_client import get_slm_client logger = logging.getLogger(__name__) @@ -120,15 +122,13 @@ async def deploy_docker(request: DockerDeploymentRequest) -> DockerDeploymentSta The SLM runs the configured Ansible playbook (default: deploy-hybrid-docker.yml) and returns a deployment record. """ - orch = get_orchestrator() - if orch is None: + client = get_slm_client() + if client is None: raise HTTPException( status_code=status.HTTP_503_SERVICE_UNAVAILABLE, - detail="Deployment orchestrator not initialised", + detail="SLM client not initialised", ) - from services.slm.deployment_orchestrator import SLMDeploymentOrchestrator - - slm_orch = SLMDeploymentOrchestrator(orch._client) + slm_orch = SLMDeploymentOrchestrator(client) result = await slm_orch.deploy_docker(request) logger.info( "Docker deployment triggered: %s on node %s", @@ -219,7 +219,12 @@ async def execute_deployment( status_code=status.HTTP_400_BAD_REQUEST, detail=f"Deployment is not queued (current status: {ctx.status})", ) - ctx.status = DeploymentStatus.RUNNING + ok = await orch.execute_deployment(deployment_id) + if not ok: + raise HTTPException( + status_code=status.HTTP_400_BAD_REQUEST, + detail=f"Could not execute deployment {deployment_id!r}", + ) logger.info("Deployment execution started: %s", deployment_id) return DeploymentActionResponse( deployment_id=deployment_id, action="execute", success=True diff --git a/autobot-backend/initialization/lifespan.py b/autobot-backend/initialization/lifespan.py index 858feaa6f..ca0207c9b 100644 --- a/autobot-backend/initialization/lifespan.py +++ b/autobot-backend/initialization/lifespan.py @@ -801,6 +801,11 @@ async def _init_slm_client(): await init_slm_client(slm_url, slm_token) logger.info("✅ [ 89%] SLM Client: Connected to SLM server at %s", slm_url) + from services.slm.deployment_orchestrator import init_orchestrator + from services.slm_client import get_slm_client as _get_slm_client + + init_orchestrator(_get_slm_client()) + logger.info("✅ [ 89%] SLM Client: DeploymentOrchestrator initialised") except Exception as slm_error: logger.warning( "SLM client initialization failed (continuing without): %s", slm_error diff --git a/autobot-backend/initialization/router_registry/feature_routers.py b/autobot-backend/initialization/router_registry/feature_routers.py index f015e1443..8dcafa523 100644 --- a/autobot-backend/initialization/router_registry/feature_routers.py +++ b/autobot-backend/initialization/router_registry/feature_routers.py @@ -445,7 +445,7 @@ # Issue #3407: SLM Docker deployment bridge ( "api.slm.deployments", - "/v1", + "", ["slm-deployments"], "slm_deployments", ), diff --git a/autobot-backend/services/slm/deployment_orchestrator.py b/autobot-backend/services/slm/deployment_orchestrator.py index 2eaf61391..3bf07f62d 100644 --- a/autobot-backend/services/slm/deployment_orchestrator.py +++ b/autobot-backend/services/slm/deployment_orchestrator.py @@ -224,6 +224,31 @@ def get_deployment(self, deployment_id: str) -> Optional[DeploymentContext]: return ctx return None + async def execute_deployment(self, deployment_id: str) -> bool: + """Execute a QUEUED deployment by forwarding each node to the SLM. + + Transitions the context through RUNNING → COMPLETED/FAILED. + Returns False if the deployment is not found or not QUEUED. + """ + ctx = self.get_deployment(deployment_id) + if ctx is None or ctx.status != DeploymentStatus.QUEUED: + return False + ctx.status = DeploymentStatus.RUNNING + try: + for node_id in ctx.target_nodes: + extra: dict = {} + if ctx.playbook_path: + extra["playbook"] = ctx.playbook_path + await self._client.create_deployment( + node_id=node_id, roles=[ctx.role_name], extra_data=extra + ) + ctx.status = DeploymentStatus.COMPLETED + logger.info("Deployment completed: %s", deployment_id) + except Exception as exc: + ctx.status = DeploymentStatus.FAILED + logger.error("Deployment %s failed: %s", deployment_id, exc) + return True + async def cancel_deployment(self, deployment_id: str) -> bool: """ Cancel a deployment.