diff --git a/.go-arch-lint.yml b/.go-arch-lint.yml index 723fba9c..f803dfcc 100644 --- a/.go-arch-lint.yml +++ b/.go-arch-lint.yml @@ -147,6 +147,16 @@ vendors: in: - golang.org/x/sync/errgroup + huma: + in: + - github.com/danielgtaylor/huma/v2 + - github.com/danielgtaylor/huma/v2/** + + chi: + in: + - github.com/go-chi/chi/v5 + - github.com/go-chi/chi/v5/** + components: # DOMAIN LAYER domain-workflow: @@ -277,6 +287,9 @@ components: interfaces-tui: in: interfaces/tui + interfaces-api: + in: interfaces/api + # TEST UTILITIES testutil: in: testutil @@ -570,6 +583,7 @@ deps: - infra-xdg - interfaces-cli-ui - interfaces-tui + - interfaces-api canUse: - go-stdlib - go-sync @@ -604,6 +618,22 @@ deps: - bubbletea - zap + interfaces-api: + mayDependOn: + - application + - domain-workflow + - domain-ports + - domain-errors + - domain-plugin + - domain-operation + canUse: + - go-stdlib + - go-sync + - huma + - chi + - zap + - uuid + interfaces-cli-ui: mayDependOn: - domain-workflow diff --git a/CHANGELOG.md b/CHANGELOG.md index 5d0868b0..c877eaa9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,10 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Added + +- **F097**: HTTP REST API server (`awf serve`) — new `internal/interfaces/api/` adapter alongside `cli/` and `tui/` exposing workflow discovery (`GET /api/workflows`, `GET /api/workflows/{name}`, `POST /api/workflows/{name}/validate`), async execution (`POST /api/workflows/{name}/run` returning 202 + `execution_id`), lifecycle control (`GET /api/executions`, `GET /api/executions/{id}`, `DELETE /api/executions/{id}`, `POST /api/executions/{id}/resume`), Server-Sent Events streaming (`GET /api/executions/{id}/events` emitting `step.started`, `step.completed`, `step.failed`, `workflow.completed`, `workflow.failed`, `output`), and history queries (`GET /api/history`, `GET /api/history/stats`); Huma v2 + chi v5 generate OpenAPI 3.1 spec served at `/openapi.json`, `/openapi.yaml`, and Swagger UI at `/docs`; Bridge adapter pattern mirrors `tui/bridge.go` with `sync.Map` tracking active executions; SSE polling at 200ms cadence matching TUI; graceful shutdown via `signal.NotifyContext` waits up to 30s for active streams; default binding `127.0.0.1:2511` (loopback-only, non-loopback opt-in via `--host`); arch-lint enforces `interfaces-api` may import only `application/` and `domain/*` (no `infrastructure/`, `cli/`, or `tui/`); see [ADR-016](docs/ADR/016-http-interface-adapter-huma-sse-streaming.md) + ## [0.9.0] - 2026-05-14 ### Added diff --git a/CLAUDE.md b/CLAUDE.md index 37754004..ca47c7d1 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -217,8 +217,6 @@ func TestWorkflowValidation(t *testing.T) { ## Architecture Rules -- Validate agent provider options only against what each CLI actually accepts; do not validate against API documentation if the underlying CLI rejects the option -- Plugin binaries must be discoverable at //awf-plugin-; host validates binary existence and version compatibility via gRPC handshake after process start - Commit generated protobuf files (.pb.go, _grpc.pb.go) to git; treat as source artifacts for build reproducibility, not ephemeral build outputs - CLI command implementations must call infrastructure layer methods rather than reimplementing HTTP requests, parsing, or validation; avoid logic duplication - Application layer must persist source metadata (SetSourceData) after successful infrastructure installation; omitting state blocks downstream operations like updates @@ -240,10 +238,11 @@ func TestWorkflowValidation(t *testing.T) { - When integrating external UI frameworks, create Bridge adapters in the interface layer that wrap application services; maintain zero infrastructure imports in bridge implementation - Enforce event propagation depth limits to prevent infinite event loops; set maxPropagationDepth in EventBus and include propagation_depth in protocol buffer event definitions - Use provider name prefixes for all infrastructure provider helper methods (buildCopilot, extractCopilot, parseCopilot, validateCopilot) to prevent naming collisions across implementations +- Use mutex-protected getter/setter methods (Get*/Set*) for concurrent shared state; apply consistently across all goroutine-accessed fields +- Server owns background task coordination (WaitGroup); pass by pointer to handlers and coordinate shutdown: httpSrv.Shutdown() then sseWG.Wait() ## Common Pitfalls -- When enabling session persistence in CLI providers, force JSON output format for reliable field extraction; document as known limitation that overrides user-specified format - Always provide graceful fallback to stateless mode when optional session ID extraction fails; never fail the entire operation due to extraction errors - When migrating API JSON field names, parse both old and new keys with new key preferred; use dual-key parsing for backwards compatibility without validation errors - Leverage Go's map[string]any behavior to silently ignore unsupported provider options; avoids validation errors while maintaining clear intent @@ -284,11 +283,10 @@ func TestWorkflowValidation(t *testing.T) { - Always test unplanned file modifications discovered during implementation; update task plan if intentional, revert if accidental - Never use standard YAML unmarshaling for skill metadata; implement frontmatter parsing (YAML header between --- delimiters) to preserve metadata - Never skip testing XDG directory fallback paths; code will fail on systems without XDG_DATA_HOME and XDG_CONFIG_HOME variables set +- Major feature implementations require supporting infrastructure changes (ExecutionContext getters, helper modifications); document rationale in commit message and update validation plan if discovered ## Test Conventions -- Mock evaluators must have pre-configured results for every expression input; unconfigured expressions return zero value, which may bypass validation checks in evaluation pipelines -- Distinguish fixture path updates (allowed without review) from content changes (require explicit review); document rationale for content modifications in commit message - Use _Integration suffix for tests requiring live agent execution or system dependencies; keep unit tests suffix-less in domain/application/infrastructure packages - Separate provider output format validation tests into dedicated *_extract_test.go files; verify extraction patterns before session resume integration tests - Document provider output format assumptions (JSON wrapper field names, text patterns) in code comments; validate assumptions with assertion-based tests before production @@ -307,6 +305,8 @@ func TestWorkflowValidation(t *testing.T) { - When testing YAML unmarshaling, assert on all nested struct fields; verify that arrays like Events.Subscribe and Events.Emit are populated, not defaulted to empty - New gRPC and concurrency-heavy infrastructure requires >85% test coverage; run 'make test-race' to verify no data races in stream managers and lock-protected sections. - Always write unit tests for CLI helper functions; parseInputFlags, resolvePromptInput, categorizeError must have >80% coverage before commit +- HTTP servers require unit tests for the server struct itself: route registration, API initialization, graceful shutdown, not just individual handlers +- Organize interface layer test fixtures in tests/fixtures// with descriptive names (e.g., api-simple-success.yaml, api-failing.yaml) ## Review Standards diff --git a/README.md b/README.md index 49cd475b..afd64608 100644 --- a/README.md +++ b/README.md @@ -35,6 +35,7 @@ A Go CLI tool for orchestrating AI agents (Claude, Gemini, Codex, GitHub Copilot - **Built-in HTTP Operation** - Declarative REST API calls (GET, POST, PUT, DELETE) with configurable timeout, response capture, and retryable status codes - **Built-in Notification Plugin** - Workflow completion alerts via desktop and webhooks with configurable backends - **Terminal User Interface (TUI)** - Full-screen interactive dashboard (`awf tui`) with tab-based navigation for workflow browsing, real-time execution monitoring, history exploration, agent conversation rendering, and Claude Code session tailing; built on Bubble Tea with Lip Gloss styling and Glamour Markdown rendering +- **HTTP REST API Server** - `awf serve` exposes workflow discovery, async execution, SSE event streaming, lifecycle control, and execution history over HTTP with auto-generated OpenAPI 3.1 spec and Swagger UI at `/docs`; built on Huma v2 + chi v5; defaults to `127.0.0.1:2511` (loopback-only) with `--host`/`--port` overrides ## Installation @@ -136,6 +137,7 @@ AWF is a powerful orchestration tool that grants AI agents and workflows direct | `awf workflow update [name]` | Update an installed workflow pack | | `awf workflow remove ` | Remove an installed workflow pack | | `awf workflow search [query]` | Search for workflow packs on GitHub | +| `awf serve` | Start HTTP API server for remote workflow execution and monitoring | | `awf tui` | Launch the interactive terminal UI | | `awf upgrade` | Upgrade AWF to the latest version | | `awf version` | Show version information | diff --git a/docs/ADR/016-http-interface-adapter-huma-sse-streaming.md b/docs/ADR/016-http-interface-adapter-huma-sse-streaming.md new file mode 100644 index 00000000..2d67e337 --- /dev/null +++ b/docs/ADR/016-http-interface-adapter-huma-sse-streaming.md @@ -0,0 +1,76 @@ +--- +title: "016: HTTP Interface Adapter with Huma v2 and SSE Streaming" +--- + +**Status**: Accepted +**Date**: 2026-05-17 +**Issue**: F097 +**Supersedes**: N/A +**Superseded by**: N/A + +## Context + +ADR-001 listed "API (future)" as a planned interface layer alongside CLI. F097 implements that delivery mechanism: a REST/HTTP API that lets external systems (CI pipelines, dashboards, IDE extensions) trigger, monitor, and query AWF workflow executions without shelling out to the CLI. + +Two decisions within F097 are architecturally load-bearing beyond the feature itself: + +1. **HTTP framework selection** — introducing Huma v2 + chi v5 as new infrastructure. Huma generates OpenAPI 3.1 schemas automatically from Go types, which locks in how the API contract is expressed and validated for all future endpoints. Replacing it later requires rewriting all handler signatures. + +2. **Streaming protocol for execution events** — workflow executions are long-running (seconds to minutes). External subscribers need real-time step updates. The choice between SSE, WebSockets, or a push-from-application model defines the client integration surface for every future streaming feature. + +## Candidates + +### HTTP Framework + +| Option | Pros | Cons | +|--------|------|------| +| **Huma v2 + chi v5** | OpenAPI 3.1 auto-generation from Go types; type-safe input validation; chi is standard `net/http` compatible; no separate doc tooling | Less battle-tested than gin/echo; Huma v2 handler signature is non-standard Go | +| **gin** | Mature, widely known, fast | No native OpenAPI generation; separate swagger annotation toolchain needed; docs drift from code | +| **echo** | Balanced performance and ergonomics | Same OpenAPI gap as gin; less active maintenance | +| **net/http + ogen** (spec-first codegen) | Strict contract; spec drives implementation | Requires maintaining a `.yaml` spec separately; adds codegen step to CI | + +### Streaming Protocol + +| Option | Pros | Cons | +|--------|------|------| +| **SSE (polling-based)** | Unidirectional; simple client-side (`EventSource` API); firewall-friendly (HTTP/1.1); stateless per subscriber | O(subscribers) polling goroutines; couples cadence to internal poll interval | +| **WebSockets** | Bidirectional; lower per-message overhead for high-frequency events | Overkill for unidirectional workflow events; more complex lifecycle (upgrade, ping/pong, reconnect) | +| **Long-polling** | Universally compatible; no keep-alive concern | Thundering herd on state change; harder to implement correctly at scale | +| **Push from ExecutionService** | Zero polling overhead; events pushed on state transition | Requires a new observer port in the application layer — cross-layer coupling that violates Principle 6 for an interface-layer concern | + +## Decision + +**Framework:** Huma v2 + chi v5. + +Huma v2 is the only Go library that generates valid OpenAPI 3.1 (not 2.0 or 3.0) directly from Go struct types without a separate code-gen step. Chi's standard `net/http` compatibility avoids wrapping the existing request context. AWF's primary API consumers are developer tooling and CI; an always-in-sync OpenAPI spec eliminates documentation maintenance burden. + +**Streaming:** SSE with 200ms polling of `ExecutionContext.GetAllStepStates()`. + +AWF workflow events are unidirectional (server → client). SSE is the standard HTTP mechanism for this pattern. The 200ms cadence matches the existing TUI poll interval (`tui/tab_monitoring.go:71: monitoringTickInterval`) and satisfies NFR-002 (p95 ≤ 100ms latency at ≤ 50 subscribers). The push-from-ExecutionService alternative was rejected because it would require a new domain port and observer registration pattern — introducing application-layer complexity to solve an interface-layer concern. + +**Arch-lint scoping:** Huma and chi are declared as vendor blocks usable only by `interfaces-api`, mirroring how `bubbletea` is scoped to `interfaces-tui`. This prevents accidental import from domain or application layers. + +## Consequences + +**What becomes easier:** +- External systems integrate with AWF without shelling out to the CLI. +- OpenAPI 3.1 spec is always in sync with the implementation; no separate doc maintenance. +- SSE subscribers can use the standard browser `EventSource` API or `curl --no-buffer`. +- New endpoints follow the established Huma handler pattern without further architectural decisions. + +**What becomes harder:** +- Replacing Huma v2 requires rewriting all handler signatures and regenerating the OpenAPI spec. +- SSE polling creates O(subscribers) goroutines per active execution; large subscriber counts require monitoring. +- WebSocket upgrades are not possible through the same SSE endpoint; bidirectional communication would require a separate endpoint and a new framework decision. +- Breaking changes to endpoint paths or response shapes require semver major bumps once external consumers build against the OpenAPI contract. + +## Constitution Compliance + +| Principle | Status | Justification | +|-----------|--------|---------------| +| Hexagonal Architecture | Compliant | HTTP types confined to `interfaces-api`; Huma + chi vendor-scoped to that layer; infrastructure wiring in `interfaces-cli/serve.go`; no HTTP imports in domain or application | +| Go Idioms | Compliant | Standard `net/http` compatible chi router; `context.Context` propagation through all handlers; SSE goroutines select on `r.Context().Done()` before every poll iteration | +| Minimal Abstraction | Compliant | SSE polling reuses existing `GetAllStepStates()` with no new domain ports; 3 local port interfaces are intentional consumer-defined redundancy per ADR-001 pattern | +| Error Taxonomy | Compliant | Existing `StructuredError` codes map to HTTP semantics via middleware (`USER→400`, `WORKFLOW→422`, `EXECUTION→500`, `SYSTEM→503`); no new exit codes required | +| Security First | Compliant | Default `--host=127.0.0.1` loopback binding; non-loopback is opt-in via `--host`; secret masking unchanged at infrastructure layer | +| Test-Driven Development | Compliant | Unit tests per handler; goroutine-leak test for SSE (delta ≤ 5); 50-concurrent-subscriber test for NFR-002; `make test-race` required before merge | diff --git a/docs/ADR/README.md b/docs/ADR/README.md index ca1f397f..64c5bf1f 100644 --- a/docs/ADR/README.md +++ b/docs/ADR/README.md @@ -42,6 +42,8 @@ Numbers are never reused. If a decision is reversed, the original ADR is marked | [012](012-runtime-shell-detection.md) | Runtime Shell Detection with $SHELL Environment Variable | Accepted | | [013](013-context-aware-input-ports.md) | Context-Aware Input Ports | Accepted | | [014](014-shebang-execution-for-script-files.md) | Shebang Execution for Script Files | Accepted | +| [015](015-grpc-go-plugin-transport-for-external-plugins.md) | gRPC via go-plugin as External Plugin Transport | Accepted | +| [016](016-http-interface-adapter-huma-sse-streaming.md) | HTTP Interface Adapter with Huma v2 and SSE Streaming | Accepted | ## Creating a New ADR diff --git a/docs/README.md b/docs/README.md index ebfaab7e..3bcb51c8 100644 --- a/docs/README.md +++ b/docs/README.md @@ -48,6 +48,7 @@ Learn how to use AWF effectively: - [Plugins](user-guide/plugins.md) - Extend AWF with custom operations, validators, and step types; transport security (AutoMTLS, binary integrity verification) and log forwarding - [Plugin Events](user-guide/plugin-events.md) - Real-time event reactivity between plugins and core - [Workflow Packs](user-guide/workflow-packs.md) - Install, execute (`awf run pack/workflow`), and manage reusable workflow packs with 3-tier path resolution +- [HTTP API](user-guide/api.md) - REST API server with OpenAPI 3.1 spec, async workflow execution, real-time SSE streaming, and remote integration - [Terminal UI (TUI)](user-guide/tui.md) - Interactive dashboard for workflow browsing, monitoring, history, and agent conversations - [Upgrading AWF](user-guide/upgrade.md) - Self-update command with version checking, checksum verification, and atomic binary replacement - [Audit Trail](user-guide/audit-trail.md) - Structured execution audit log with JSONL output diff --git a/docs/superpowers/specs/2026-05-17-http-server-design.md b/docs/superpowers/specs/2026-05-17-http-server-design.md new file mode 100644 index 00000000..da069067 --- /dev/null +++ b/docs/superpowers/specs/2026-05-17-http-server-design.md @@ -0,0 +1,185 @@ +# HTTP Server Interface Layer — Design Spec + +**Date**: 2026-05-17 +**Status**: Approved +**Scope**: New interface layer for AWF CLI — HTTP API with auto-generated OpenAPI spec + +## Objective + +Expose AWF workflow monitoring and execution capabilities through an HTTP API, alongside the existing CLI and TUI interfaces. The OpenAPI spec is auto-generated from Go types — no separate spec file to maintain. + +## Decisions + +| Decision | Choice | Alternative considered | Trade-off | +|----------|--------|----------------------|-----------| +| Framework | Huma v2 + chi v5 | chi + swaggo, ogen, net/http only | Huma imposes input/output struct conventions but guarantees spec-code sync | +| Real-time | SSE via `huma/sse` | WebSocket, polling-only | SSE is unidirectional (sufficient for monitoring), simpler than WebSocket | +| Auth | None in v1 (localhost-only) | API key, JWT | Deferred to reduce scope; `--host` flag allows explicit override | +| Execution model | Async via `RunAsync()` | Sync blocking | Matches TUI pattern; client gets `execution_id` immediately, follows via SSE | + +## Architecture + +``` +internal/interfaces/api/ +├── server.go # Server struct, chi router, huma API, Start/Shutdown +├── bridge.go # Bridge adapter (WorkflowService, ExecutionService, HistoryService) +├── handlers_workflow.go # Workflow CRUD + validation + run +├── handlers_execution.go # Execution monitoring + cancel + resume +├── handlers_history.go # History listing + stats +├── types.go # Huma input/output structs (drive OpenAPI generation) +└── doc.go # Package documentation +``` + +New CLI command: `awf serve` in `internal/interfaces/cli/serve.go`. + +### Layer Dependencies + +- `api/` imports: `application/`, `domain/workflow/`, `domain/ports/` (inward only) +- `api/` does NOT import: `infrastructure/`, `cli/`, `tui/` +- Bridge pattern identical to `tui/bridge.go` — adapts application services to handler needs + +## API Endpoints + +### Workflows + +| Method | Path | OperationID | Description | +|--------|------|-------------|-------------| +| GET | `/api/workflows` | `list-workflows` | List all workflows with metadata | +| GET | `/api/workflows/{name}` | `get-workflow` | Full workflow definition (steps, inputs, hooks) | +| POST | `/api/workflows/{name}/validate` | `validate-workflow` | Static validation, returns errors list | +| POST | `/api/workflows/{name}/run` | `run-workflow` | Start async execution, returns `execution_id` | + +### Executions + +| Method | Path | OperationID | Description | +|--------|------|-------------|-------------| +| GET | `/api/executions` | `list-executions` | Active and recent executions | +| GET | `/api/executions/{id}` | `get-execution` | Execution detail (status, steps, outputs) | +| GET | `/api/executions/{id}/events` | `stream-execution-events` | SSE stream of execution events | +| DELETE | `/api/executions/{id}` | `cancel-execution` | Cancel running execution | +| POST | `/api/executions/{id}/resume` | `resume-execution` | Resume failed execution | + +### History + +| Method | Path | OperationID | Description | +|--------|------|-------------|-------------| +| GET | `/api/history` | `list-history` | Execution history with filters | +| GET | `/api/history/stats` | `get-history-stats` | Aggregated statistics | + +### Auto-generated Routes (by Huma) + +- `GET /docs` — Swagger UI +- `GET /openapi.json` — OpenAPI 3.1 spec (JSON) +- `GET /openapi.yaml` — OpenAPI 3.1 spec (YAML) + +## SSE Event Types + +```go +map[string]any{ + "step_started": StepStartedEvent{}, + "step_completed": StepCompletedEvent{}, + "step_failed": StepFailedEvent{}, + "workflow_completed": WorkflowCompletedEvent{}, + "workflow_failed": WorkflowFailedEvent{}, + "output": OutputEvent{}, +} +``` + +Events are typed Go structs — Huma's `sse.Register` matches data type to event name automatically. + +## Huma Type Examples + +```go +type ListWorkflowsOutput struct { + Body []WorkflowSummary +} + +type WorkflowSummary struct { + Name string `json:"name" doc:"Workflow identifier"` + Version string `json:"version" doc:"Semantic version"` + Description string `json:"description" doc:"Human-readable description"` +} + +type RunWorkflowInput struct { + Name string `path:"name" doc:"Workflow name"` + Body struct { + Inputs map[string]any `json:"inputs" doc:"Workflow input values"` + } +} + +type RunWorkflowOutput struct { + Body struct { + ExecutionID string `json:"execution_id" doc:"Unique execution identifier"` + Status string `json:"status" doc:"Initial execution status"` + } +} +``` + +Struct tags (`doc:`, `example:`, `required:`, `json:`) feed the OpenAPI spec directly. + +## Concurrency Model + +``` +Client Server ExecutionService + | | | + |-- POST /run ----------->|-- RunAsync() --------------->| + |<-- 202 {exec_id} ------| store in activeExecutions | + | | | + |-- GET /events (SSE) --->|-- poll ExecutionContext <-----| + |<-- step_started --------| every 200ms | + |<-- step_completed ------| | + |<-- workflow_completed --| cleanup activeExecutions | + | (stream closes) | | +``` + +- `activeExecutions`: `sync.Map` in Bridge, keyed by execution ID +- Polling interval: 200ms (matches TUI) +- SSE stream closes after terminal event or client disconnect +- Context cancellation propagates to `ExecutionService` on DELETE + +## CLI Command + +``` +awf serve [flags] + +Flags: + --port int Server port (default: 2511) + --host string Bind address (default: 127.0.0.1) +``` + +Graceful shutdown: `signal.NotifyContext(SIGINT, SIGTERM)` + `srv.Shutdown()` with 30s timeout for active SSE streams. + +## Wiring + +Same pattern as `cli/run.go`: + +1. Create infrastructure (repository, stores, executors, logger) +2. Create application services (WorkflowService, ExecutionService, HistoryService) +3. Wire optional providers (agents, plugins, OTel) +4. Create Bridge with services +5. Create Server with Bridge +6. `server.Start(ctx)` + +## Out of Scope (v1) + +- Authentication/authorization (localhost-only binding) +- HTTPS termination (use reverse proxy) +- WebSocket (SSE sufficient for unidirectional monitoring) +- Plugin management via API +- Configuration management via API +- Rate limiting + +## Dependencies + +New: +- `github.com/danielgtaylor/huma/v2` +- `github.com/go-chi/chi/v5` + +No changes to existing packages. + +## Testing Strategy + +- Unit tests: each handler with mocked Bridge methods +- Integration tests: full server startup, HTTP requests, SSE stream consumption +- Benchmark: SSE throughput with concurrent subscribers +- Race detection: `make test-race` for `sync.Map` and concurrent executions diff --git a/docs/user-guide/api.md b/docs/user-guide/api.md new file mode 100644 index 00000000..b6647cea --- /dev/null +++ b/docs/user-guide/api.md @@ -0,0 +1,627 @@ +--- +title: "HTTP API" +description: "AWF HTTP server for remote workflow execution and monitoring" +lead: "Run and monitor AWF workflows over HTTP with REST endpoints and Server-Sent Events streaming" +--- + +## Overview + +The AWF HTTP API server lets you trigger, monitor, and manage workflow executions remotely without shelling out to the CLI. Perfect for integrating AWF into CI/CD pipelines, web dashboards, IDE extensions, or any external system that needs to orchestrate workflows over HTTP. + +**Key features:** +- **REST endpoints** for workflow discovery, validation, and execution +- **Server-Sent Events (SSE)** for real-time step-by-step execution monitoring +- **Auto-generated OpenAPI 3.1 specification** at `/openapi.json` with Swagger UI at `/docs` +- **Async execution** — start a workflow and check progress via polling or SSE stream +- **Execution history and statistics** for audit and analytics + +## Starting the Server + +```bash +awf serve +``` + +**Flags:** +- `--port ` — Port to bind on (default: `2511`) +- `--host ` — Host to bind on (default: `127.0.0.1`) + +**Example:** +```bash +# Start on default localhost:2511 +awf serve + +# Start on a specific port +awf serve --port 8080 + +# Bind to all interfaces (use at your own risk in production) +awf serve --host 0.0.0.0 --port 8080 +``` + +Once running: +- **Swagger UI**: `http://localhost:2511/docs` +- **OpenAPI spec**: `http://localhost:2511/openapi.json` +- **API endpoints**: `http://localhost:2511/api/workflows`, `/api/executions`, `/api/history` + +## Endpoints + +### Workflow Discovery & Validation + +#### List workflows + +```http +GET /api/workflows +``` + +**Response (200 OK):** +```json +{ + "body": { + "workflows": [ + { + "name": "code-review", + "version": "1.0.0", + "description": "Review code for bugs and security issues" + }, + { + "name": "deploy-app", + "version": "2.1.0", + "description": "Deploy application to production" + } + ] + } +} +``` + +#### Get workflow details + +```http +GET /api/workflows/{name} +``` + +**Response (200 OK):** +```json +{ + "body": { + "name": "code-review", + "version": "1.0.0", + "description": "Review code for bugs and security issues", + "states": { + "initial": "read", + "read": { + "type": "step", + "command": "cat {{inputs.file}}" + } + } + } +} +``` + +**Error (404 Not Found):** +```json +{ + "status": 404, + "title": "Not Found", + "detail": "workflow not found: nonexistent" +} +``` + +#### Validate workflow + +```http +POST /api/workflows/{name}/validate +``` + +**Response (200 OK — valid workflow):** +```json +{ + "body": { + "errors": [] + } +} +``` + +**Response (200 OK — invalid workflow):** +```json +{ + "body": { + "errors": [ + "state 'invalid_ref' references undefined state" + ] + } +} +``` + +### Workflow Execution + +#### Run workflow (async) + +```http +POST /api/workflows/{name}/run +Content-Type: application/json + +{ + "inputs": { + "file": "main.go", + "model": "claude-opus-4-20250805" + } +} +``` + +**Response (202 Accepted):** +```json +{ + "body": { + "execution_id": "550e8400-e29b-41d4-a716-446655440000", + "status": "accepted" + } +} +``` + +The workflow begins execution asynchronously. Use the `execution_id` to monitor progress via the events endpoint or polling. + +**Error (404 Not Found):** +```json +{ + "status": 404, + "title": "Not Found", + "detail": "workflow not found: nonexistent" +} +``` + +**Error (422 Unprocessable Entity):** +```json +{ + "status": 422, + "title": "Unprocessable Entity", + "detail": "failed to start execution: " +} +``` + +#### List executions + +```http +GET /api/executions +``` + +**Response (200 OK):** +```json +{ + "body": { + "executions": [ + { + "execution_id": "550e8400-e29b-41d4-a716-446655440000", + "workflow_name": "code-review", + "status": "running", + "current_step": "analyze", + "started_at": "2026-05-17T20:39:00Z", + "updated_at": "2026-05-17T20:39:05Z" + } + ] + } +} +``` + +#### Get execution details + +```http +GET /api/executions/{id} +``` + +**Response (200 OK):** +```json +{ + "body": { + "execution_id": "550e8400-e29b-41d4-a716-446655440000", + "workflow_name": "code-review", + "status": "running", + "current_step": "analyze", + "started_at": "2026-05-17T20:39:00Z", + "updated_at": "2026-05-17T20:39:05Z" + } +} +``` + +**Error (404 Not Found):** +```json +{ + "status": 404, + "title": "Not Found", + "detail": "execution not found: " +} +``` + +#### Cancel execution + +```http +DELETE /api/executions/{id} +``` + +**Response (204 No Content)** + +Cancels the running workflow. The execution transitions to `cancelled` state and all running steps are terminated gracefully. Idempotent — returns 204 even if the execution does not exist. + +#### Resume failed execution + +```http +POST /api/executions/{id}/resume +Content-Type: application/json + +{ + "input_overrides": {"model": "claude-sonnet-4-20250514"}, + "from_step": "analyze" +} +``` + +**Response (200 OK):** +```json +{ + "body": { + "execution_id": "661f9500-f30c-52e5-c827-557766551111", + "status": "accepted" + } +} +``` + +A new execution ID is assigned to the resumed run. Monitor progress via the new ID. + +**Error (404 Not Found):** +```json +{ + "status": 404, + "title": "Not Found", + "detail": "execution not found or cannot be resumed: " +} +``` + +### Real-Time Event Streaming + +#### Stream execution events (Server-Sent Events) + +```http +GET /api/executions/{id}/events +Accept: text/event-stream +``` + +This endpoint returns a Server-Sent Events stream. Keep the connection open to receive real-time step updates as the workflow executes. The stream closes automatically when the workflow reaches a terminal state (completed, failed, or cancelled). + +**Event stream example:** +``` +event: step.started +data: {"step_name":"read","status":"running","started_at":"2026-05-17T20:39:00Z"} + +event: step.completed +data: {"step_name":"read","status":"completed","output":"package main...","completed_at":"2026-05-17T20:39:02Z"} + +event: step.started +data: {"step_name":"analyze","status":"running","started_at":"2026-05-17T20:39:02Z"} + +event: step.failed +data: {"step_name":"analyze","status":"failed","error":"timeout exceeded","completed_at":"2026-05-17T20:40:15Z"} + +event: workflow.failed +data: {"workflow_name":"code-review","status":"failed","completed_at":"2026-05-17T20:40:15Z"} +``` + +**Event types:** + +| Event | Description | Fields | +|-------|-------------|--------| +| `step.started` | Step execution began | `step_name`, `status`, `started_at` | +| `step.completed` | Step finished successfully | `step_name`, `status`, `output`, `completed_at` | +| `step.failed` | Step execution failed | `step_name`, `status`, `error`, `completed_at` | +| `workflow.completed` | Workflow completed | `workflow_name`, `status`, `completed_at` | +| `workflow.failed` | Workflow failed or cancelled | `workflow_name`, `status`, `error`, `completed_at` | +| `output` | Incremental step output | `step_name`, `output` | + +**Polling interval:** Events are emitted every ~200ms as state transitions occur. + +**Error (404 Not Found):** +Stream returns 404 before opening if the execution does not exist. + +### Execution History & Statistics + +#### List historical executions + +```http +GET /api/history?workflow=code-review&status=failed&limit=50 +``` + +**Query parameters:** +- `workflow` — Filter by workflow name (optional) +- `status` — Filter by status: `success`, `failed`, `cancelled` (optional) +- `since` — Start date, RFC 3339 (optional) +- `until` — End date, RFC 3339 (optional) +- `limit` — Max results (optional) + +**Response (200 OK):** +```json +{ + "body": { + "entries": [ + { + "id": "rec-abc123", + "workflow_name": "code-review", + "status": "failed", + "started_at": "2026-05-16T15:30:00Z", + "completed_at": "2026-05-16T15:31:00Z", + "duration_ms": 60000 + }, + { + "id": "rec-def456", + "workflow_name": "code-review", + "status": "success", + "started_at": "2026-05-16T14:20:00Z", + "completed_at": "2026-05-16T14:21:00Z", + "duration_ms": 60000 + } + ] + } +} +``` + +#### Get execution statistics + +```http +GET /api/history/stats?workflow=code-review +``` + +**Query parameters:** +- `workflow` — Filter by workflow name (optional) +- `status` — Filter by status (optional) +- `since` — Start date, RFC 3339 (optional) +- `until` — End date, RFC 3339 (optional) + +**Response (200 OK):** +```json +{ + "body": { + "TotalExecutions": 142, + "SuccessCount": 128, + "FailedCount": 12, + "CancelledCount": 2, + "AvgDurationMs": 45000 + } +} +``` + +## OpenAPI Specification + +The API serves an auto-generated OpenAPI 3.1 specification: + +```bash +# Download OpenAPI specification +curl http://localhost:2511/openapi.json + +# Or YAML format +curl http://localhost:2511/openapi.yaml + +# View Swagger UI in browser +open http://localhost:2511/docs +``` + +## Client Libraries & Integration + +### cURL + +```bash +# Run a workflow +RESULT=$(curl -s -X POST http://localhost:2511/api/workflows/code-review/run \ + -H "Content-Type: application/json" \ + -d '{"inputs": {"file": "main.go"}}') + +EXEC_ID=$(echo $RESULT | jq -r '.body.execution_id') + +# Stream events +curl -N http://localhost:2511/api/executions/$EXEC_ID/events + +# Get status +curl http://localhost:2511/api/executions/$EXEC_ID +``` + +### JavaScript/TypeScript + +```typescript +// Start execution +const response = await fetch('http://localhost:2511/api/workflows/code-review/run', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ inputs: { file: 'main.go' } }) +}); + +const { body } = await response.json(); +const executionId = body.execution_id; + +// Stream events +const eventSource = new EventSource( + `http://localhost:2511/api/executions/${executionId}/events` +); + +eventSource.addEventListener('step.started', (event) => { + console.log('Step started:', JSON.parse(event.data)); +}); + +eventSource.addEventListener('workflow.completed', (event) => { + console.log('Workflow done:', JSON.parse(event.data)); + eventSource.close(); +}); +``` + +### Python + +```python +import requests +import json + +# Start execution +response = requests.post( + 'http://localhost:2511/api/workflows/code-review/run', + json={'inputs': {'file': 'main.go'}} +) + +execution_id = response.json()['body']['execution_id'] + +# Stream events +response = requests.get( + f'http://localhost:2511/api/executions/{execution_id}/events', + stream=True +) + +for line in response.iter_lines(): + if line.startswith(b'event: '): + event_type = line.decode().split(': ', 1)[1] + print(f'Event: {event_type}') +``` + +## Error Handling + +All error responses follow RFC 7807 Problem Details format (provided by Huma): + +```json +{ + "status": 422, + "title": "Unprocessable Entity", + "detail": "Human-readable error description" +} +``` + +**Common HTTP status codes:** +- `400` — Bad request (missing required field, invalid JSON) +- `404` — Resource not found (unknown workflow or execution ID) +- `422` — Unprocessable entity (valid JSON but semantic error) +- `500` — Internal server error + +## Security Considerations + +**Default behavior:** +- Server binds to `127.0.0.1:2511` by default — localhost only +- No authentication in v1 (assumes isolated network or reverse proxy) +- No HTTPS/TLS in the server (use a reverse proxy like nginx) + +**For production deployments:** +1. Run behind a reverse proxy (nginx, HAProxy, etc.) with: + - HTTPS/TLS termination + - Authentication (OAuth, API key, mutual TLS) + - Rate limiting + - Request logging +2. Use `--host 127.0.0.1` or `--host [::1]` to prevent accidental network exposure +3. Consider running in a container with restricted network access +4. Monitor `/api/executions` for long-running or stuck workflows +5. Configure appropriate timeouts for long-duration workflows + +## Graceful Shutdown + +The server listens for SIGINT and SIGTERM signals. On shutdown: + +1. New requests return `503 Service Unavailable` +2. Active SSE streams are drained (30-second timeout) +3. Running workflows continue execution (separate from the HTTP server) +4. Server exits cleanly + +To stop the server: +```bash +kill -TERM $(pgrep -f "awf serve") # or Ctrl+C in foreground +``` + +## Examples + +### Full workflow execution flow + +```bash +# 1. Start server +awf serve --port 8080 & + +# 2. List available workflows +curl http://localhost:8080/api/workflows + +# 3. Validate a workflow before running +curl -X POST http://localhost:8080/api/workflows/code-review/validate + +# 4. Start a workflow execution +RESPONSE=$(curl -s -X POST http://localhost:8080/api/workflows/code-review/run \ + -H "Content-Type: application/json" \ + -d '{"inputs": {"file": "src/main.go"}}') + +EXEC_ID=$(echo $RESPONSE | jq -r '.body.execution_id') +echo "Execution ID: $EXEC_ID" + +# 5. Monitor execution in real-time via SSE +curl -N http://localhost:8080/api/executions/$EXEC_ID/events + +# Or poll for status +curl http://localhost:8080/api/executions/$EXEC_ID | jq '.body | {status, current_step}' + +# 6. Check execution history +curl "http://localhost:8080/api/history?workflow=code-review&limit=10" + +# 7. Get statistics +curl http://localhost:8080/api/history/stats?workflow=code-review +``` + +### Integrate with CI/CD (GitHub Actions) + +```yaml +name: Code Review Workflow +on: [pull_request] + +jobs: + review: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Run AWF code review via API + run: | + RESPONSE=$(curl -s -X POST http://awf-server:2511/api/workflows/code-review/run \ + -H "Content-Type: application/json" \ + -d '{"inputs": {"file": "src/main.go"}}') + + EXEC_ID=$(echo $RESPONSE | jq -r '.body.execution_id') + + # Poll until complete + while true; do + STATUS=$(curl -s http://awf-server:2511/api/executions/$EXEC_ID | jq -r '.body.status') + [ "$STATUS" = "completed" ] || [ "$STATUS" = "failed" ] && break + sleep 2 + done + + # Get final status + curl -s http://awf-server:2511/api/executions/$EXEC_ID | jq '.body' +``` + +## Performance & Limits + +- **Default port**: `2511` +- **Event polling cadence**: 200ms (real-time within ~200ms) +- **Graceful shutdown timeout**: 30 seconds +- **Max concurrent SSE subscribers per execution**: 50+ (tested) +- **Response time**: Most endpoints respond within 100ms +- **Workflow execution**: Runs asynchronously; HTTP response is immediate (≤100ms) + +## Troubleshooting + +**Server won't start on port 2511** +```bash +# Check if port is in use +lsof -i :2511 + +# Use a different port +awf serve --port 8080 +``` + +**SSE stream closes unexpectedly** +- Check network connectivity (firewall, proxy timeouts) +- Verify execution exists: `curl http://localhost:2511/api/executions/{id}` +- Check server logs for errors + +**API returns 404 for a workflow** +- Verify workflow exists: `awf list` or `curl http://localhost:2511/api/workflows` +- Check workflow YAML syntax: `awf validate ` + +**404 on `/` or `/api`** +- There is no root handler. Use specific endpoints: `/api/workflows`, `/api/executions`, `/api/history` +- Browse `/docs` for interactive Swagger UI + +**Slow response times** +- Check server load: `curl http://localhost:2511/api/executions` +- Look for long-running workflows blocking execution +- Monitor reverse proxy (if using one) for bottlenecks diff --git a/docs/user-guide/commands.md b/docs/user-guide/commands.md index c82fed5e..20e7ebd7 100644 --- a/docs/user-guide/commands.md +++ b/docs/user-guide/commands.md @@ -34,6 +34,7 @@ title: "CLI Commands" | `awf workflow remove ` | Remove an installed workflow pack | | `awf workflow search [query]` | Search for workflow packs on GitHub | | `awf config show` | Display project configuration | +| `awf serve` | Start HTTP API server for remote execution and monitoring | | `awf upgrade` | Upgrade AWF to the latest version | | `awf upgrade --check` | Check for available updates without installing | | `awf upgrade --version ` | Install a specific version | @@ -598,6 +599,72 @@ awf status abc123-def456 -f json --- +## awf serve + +Start an HTTP API server for remote workflow execution and monitoring. + +```bash +awf serve [flags] +``` + +### Flags + +| Flag | Description | +|------|-------------| +| `--port ` | Port to bind on (default: `2511`) | +| `--host ` | Host to bind on (default: `127.0.0.1`) | + +### Description + +Launches an HTTP API server exposing REST endpoints and Server-Sent Events (SSE) streaming for: +- **Workflow discovery** — list and fetch workflow definitions +- **Workflow validation** — statically validate workflows before execution +- **Async execution** — start workflows and receive `execution_id` immediately +- **Real-time monitoring** — stream step-by-step progress via SSE +- **Execution history** — query historical executions and aggregated statistics + +The server generates an auto-synced OpenAPI 3.1 specification served at `/openapi.json` with interactive Swagger UI at `/docs`. + +Default binding is `127.0.0.1:2511` (localhost only) — use `--host 0.0.0.0` at your own risk in production without authentication. + +### Graceful Shutdown + +The server listens for SIGINT (Ctrl+C) and SIGTERM signals. On shutdown: +1. New HTTP requests return 503 +2. Active SSE streams drain within 30 seconds +3. Running workflows continue (separate from the HTTP server lifecycle) +4. Server exits cleanly + +### Examples + +```bash +# Start on localhost (default) +awf serve + +# Start on a custom port +awf serve --port 8080 + +# Bind to all interfaces +awf serve --host 0.0.0.0 --port 8080 + +# Access the API +curl http://localhost:2511/api/workflows + +# Open interactive API docs +open http://localhost:2511/docs +``` + +### Security Considerations + +- **Default localhost binding** prevents accidental network exposure +- **No authentication in v1** — requires running in isolated network or behind a reverse proxy with auth +- For production: use reverse proxy (nginx, HAProxy) with HTTPS, authentication, and rate limiting +- Monitor `/api/executions` for long-running or stuck workflows + +See [HTTP API Documentation](api.md) for endpoint reference, client libraries, and integration examples. + +--- + ## awf tui Open an interactive terminal user interface for browsing, executing, and monitoring workflows. diff --git a/go.mod b/go.mod index 6051df8e..998bb415 100644 --- a/go.mod +++ b/go.mod @@ -21,7 +21,7 @@ require ( go.opentelemetry.io/otel/trace v1.43.0 go.uber.org/zap v1.27.1 golang.org/x/sync v0.20.0 - golang.org/x/term v0.41.0 + golang.org/x/term v0.42.0 google.golang.org/grpc v1.80.0 google.golang.org/protobuf v1.36.11 gopkg.in/yaml.v3 v3.0.1 @@ -40,8 +40,10 @@ require ( github.com/charmbracelet/x/windows v0.2.2 // indirect github.com/clipperhouse/displaywidth v0.11.0 // indirect github.com/clipperhouse/uax29/v2 v2.7.0 // indirect + github.com/danielgtaylor/huma/v2 v2.38.0 github.com/davecgh/go-spew v1.1.1 // indirect github.com/dustin/go-humanize v1.0.1 // indirect + github.com/go-chi/chi/v5 v5.2.5 github.com/go-logr/logr v1.4.3 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/protobuf v1.5.4 // indirect @@ -50,7 +52,7 @@ require ( github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/lucasb-eyer/go-colorful v1.4.0 // indirect github.com/mattn/go-colorable v0.1.14 // indirect - github.com/mattn/go-isatty v0.0.20 // indirect + github.com/mattn/go-isatty v0.0.21 // indirect github.com/mattn/go-runewidth v0.0.23 // indirect github.com/muesli/cancelreader v0.2.2 // indirect github.com/ncruces/go-strftime v1.0.0 // indirect @@ -66,9 +68,9 @@ require ( go.opentelemetry.io/proto/otlp v1.10.0 // indirect go.uber.org/multierr v1.11.0 // indirect golang.org/x/exp v0.0.0-20260112195511-716be5621a96 // indirect - golang.org/x/net v0.52.0 // indirect + golang.org/x/net v0.53.0 // indirect golang.org/x/sys v0.43.0 // indirect - golang.org/x/text v0.35.0 // indirect + golang.org/x/text v0.36.0 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20260401024825-9d38bb4040a9 // indirect modernc.org/libc v1.67.7 // indirect diff --git a/go.sum b/go.sum index 8af27184..937692c1 100644 --- a/go.sum +++ b/go.sum @@ -33,6 +33,8 @@ github.com/clipperhouse/displaywidth v0.11.0/go.mod h1:bkrFNkf81G8HyVqmKGxsPufD3 github.com/clipperhouse/uax29/v2 v2.7.0 h1:+gs4oBZ2gPfVrKPthwbMzWZDaAFPGYK72F0NJv2v7Vk= github.com/clipperhouse/uax29/v2 v2.7.0/go.mod h1:EFJ2TJMRUaplDxHKj1qAEhCtQPW2tJSwu5BF98AuoVM= github.com/cpuguy83/go-md2man/v2 v2.0.6/go.mod h1:oOW0eioCTA6cOiMLiUPZOpcVxMig6NIQQ7OS05n1F4g= +github.com/danielgtaylor/huma/v2 v2.38.0 h1:fb0WZCatnaiHLphMQDDWDjygNxfMkX/ENma3QsRl7vY= +github.com/danielgtaylor/huma/v2 v2.38.0/go.mod h1:k9hwjlgWFt1t2jsmQGlsgXAG2FBTZa4kkjV581qAtfo= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= @@ -43,6 +45,8 @@ github.com/expr-lang/expr v1.17.7/go.mod h1:8/vRC7+7HBzESEqt5kKpYXxrxkr31SaO8r40 github.com/fatih/color v1.13.0/go.mod h1:kLAiJbzzSOZDVNGyDpeOxJ47H46qBXwg5ILebYFFOfk= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= +github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug= +github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI= github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -84,8 +88,8 @@ github.com/mattn/go-colorable v0.1.14 h1:9A9LHSqF/7dyVVX6g0U9cwm9pG3kP9gSzcuIPHP github.com/mattn/go-colorable v0.1.14/go.mod h1:6LmQG8QLFO4G5z1gPvYEzlUgJ2wF+stgPZH1UqBm1s8= github.com/mattn/go-isatty v0.0.12/go.mod h1:cbi8OIDigv2wuxKPP5vlRcQ1OAZbq2CE4Kysco4FUpU= github.com/mattn/go-isatty v0.0.14/go.mod h1:7GGIvUiUoEMVVmxf/4nioHXj79iQHKdU27kJ6hsGG94= -github.com/mattn/go-isatty v0.0.20 h1:xfD0iDuEKnDkl03q4limB+vH+GxLEtL/jb4xVJSWWEY= -github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D7dTCTo3Y= +github.com/mattn/go-isatty v0.0.21 h1:xYae+lCNBP7QuW4PUnNG61ffM4hVIfm+zUzDuSzYLGs= +github.com/mattn/go-isatty v0.0.21/go.mod h1:ZXfXG4SQHsB/w3ZeOYbR0PrPwLy+n6xiMrJlRFqopa4= github.com/mattn/go-runewidth v0.0.23 h1:7ykA0T0jkPpzSvMS5i9uoNn2Xy3R383f9HDx3RybWcw= github.com/mattn/go-runewidth v0.0.23/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/muesli/cancelreader v0.2.2 h1:3I4Kt4BQjOR54NavqnDogx/MIoWBFa0StPA8ELUXHmA= @@ -145,10 +149,10 @@ go.uber.org/zap v1.27.1/go.mod h1:GB2qFLM7cTU87MWRP2mPIjqfIDnGu+VIO4V/SdhGo2E= go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= golang.org/x/exp v0.0.0-20260112195511-716be5621a96 h1:Z/6YuSHTLOHfNFdb8zVZomZr7cqNgTJvA8+Qz75D8gU= golang.org/x/exp v0.0.0-20260112195511-716be5621a96/go.mod h1:nzimsREAkjBCIEFtHiYkrJyT+2uy9YZJB7H1k68CXZU= -golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8= -golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w= -golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0= -golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw= +golang.org/x/mod v0.34.0 h1:xIHgNUUnW6sYkcM5Jleh05DvLOtwc6RitGHbDk4akRI= +golang.org/x/mod v0.34.0/go.mod h1:ykgH52iCZe79kzLLMhyCUzhMci+nQj+0XkbXpNYtVjY= +golang.org/x/net v0.53.0 h1:d+qAbo5L0orcWAr0a9JweQpjXF19LMXJE8Ey7hwOdUA= +golang.org/x/net v0.53.0/go.mod h1:JvMuJH7rrdiCfbeHoo3fCQU24Lf5JJwT9W3sJFulfgs= golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4= golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0= golang.org/x/sys v0.0.0-20200116001909-b77594299b42/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -156,15 +160,14 @@ golang.org/x/sys v0.0.0-20200223170610-d5e6a3e2c0ae/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20210927094055-39ccf1dd6fa6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.0.0-20220503163025-988cb79eb6c6/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.43.0 h1:Rlag2XtaFTxp19wS8MXlJwTvoh8ArU6ezoyFsMyCTNI= golang.org/x/sys v0.43.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw= -golang.org/x/term v0.41.0 h1:QCgPso/Q3RTJx2Th4bDLqML4W6iJiaXFq2/ftQF13YU= -golang.org/x/term v0.41.0/go.mod h1:3pfBgksrReYfZ5lvYM0kSO0LIkAl4Yl2bXOkKP7Ec2A= -golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8= -golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA= -golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k= -golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0= +golang.org/x/term v0.42.0 h1:UiKe+zDFmJobeJ5ggPwOshJIVt6/Ft0rcfrXZDLWAWY= +golang.org/x/term v0.42.0/go.mod h1:Dq/D+snpsbazcBG5+F9Q1n2rXV8Ma+71xEjTRufARgY= +golang.org/x/text v0.36.0 h1:JfKh3XmcRPqZPKevfXVpI1wXPTqbkE5f7JA92a55Yxg= +golang.org/x/text v0.36.0/go.mod h1:NIdBknypM8iqVmPiuco0Dh6P5Jcdk8lJL0CUebqK164= +golang.org/x/tools v0.43.0 h1:12BdW9CeB3Z+J/I/wj34VMl8X+fEXBxVR90JeMX5E7s= +golang.org/x/tools v0.43.0/go.mod h1:uHkMso649BX2cZK6+RpuIPXS3ho2hZo4FVwfoy1vIk0= gonum.org/v1/gonum v0.17.0 h1:VbpOemQlsSMrYmn7T2OUvQ4dqxQXU+ouZFQsZOx50z4= gonum.org/v1/gonum v0.17.0/go.mod h1:El3tOrEuMpv2UdMrbNlKEh9vd86bmQ6vqIcDwxEOc1E= google.golang.org/genproto/googleapis/api v0.0.0-20260401024825-9d38bb4040a9 h1:VPWxll4HlMw1Vs/qXtN7BvhZqsS9cdAittCNvVENElA= diff --git a/internal/application/execution_service.go b/internal/application/execution_service.go index 4e41c57f..33b691ed 100644 --- a/internal/application/execution_service.go +++ b/internal/application/execution_service.go @@ -443,7 +443,7 @@ func (s *ExecutionService) runExecutionLoop( for { step, ok := wf.Steps[currentStep] if !ok { - execCtx.Status = workflow.StatusFailed + execCtx.SetStatus(workflow.StatusFailed) execErr = fmt.Errorf("step not found: %s", currentStep) break } @@ -452,7 +452,7 @@ func (s *ExecutionService) runExecutionLoop( if step.Type == workflow.StepTypeTerminal { if step.Status == workflow.TerminalFailure { - execCtx.Status = workflow.StatusFailed + execCtx.SetStatus(workflow.StatusFailed) execCtx.ExitCode = step.ExitCode if msg := s.interpolateTerminalMessage(step.Message, s.buildInterpolationContext(execCtx)); msg != "" { execErr = errors.New(msg) @@ -460,9 +460,9 @@ func (s *ExecutionService) runExecutionLoop( execErr = fmt.Errorf("workflow reached terminal failure state: %s", currentStep) } } else { - execCtx.Status = workflow.StatusCompleted + execCtx.SetStatus(workflow.StatusCompleted) } - execCtx.CompletedAt = time.Now() + execCtx.SetCompletedAt(time.Now()) s.checkpoint(ctx, execCtx) terminalErrMsg := "" if execErr != nil { @@ -470,7 +470,7 @@ func (s *ExecutionService) runExecutionLoop( } s.recordExecutionEnd(ctx, execCtx, terminalErrMsg) s.emitWorkflowTerminalEvent(ctx, execCtx, execErr) - s.logger.Info("workflow completed", "step", currentStep, "status", execCtx.Status) + s.logger.Info("workflow completed", "step", currentStep, "status", execCtx.GetStatus()) break } @@ -630,6 +630,13 @@ func (s *ExecutionService) executeStep( return "", err } + // Mark step as running so SSE handler can observe it before completion. + execCtx.SetStepState(step.Name, workflow.StepState{ + Name: step.Name, + Status: workflow.StatusRunning, + StartedAt: startTime, + }) + // T008: Execute command (with retry if configured) result, attempt, execErr := s.executeStepCommand(stepCtx, step, cmd) diff --git a/internal/domain/workflow/context.go b/internal/domain/workflow/context.go index 2604fa85..97bbfea1 100644 --- a/internal/domain/workflow/context.go +++ b/internal/domain/workflow/context.go @@ -138,7 +138,7 @@ func (c *ExecutionContext) GetStepState(stepName string) (StepState, bool) { return state, ok } -// GetAllStepStates returns a copy of all step states in a thread-safe manner. +// GetAllStepStates returns a copy of all step states. func (c *ExecutionContext) GetAllStepStates() map[string]StepState { c.mu.RLock() defer c.mu.RUnlock() @@ -150,6 +150,50 @@ func (c *ExecutionContext) GetAllStepStates() map[string]StepState { return states } +// GetStatus returns the current execution status. +func (c *ExecutionContext) GetStatus() ExecutionStatus { + c.mu.RLock() + defer c.mu.RUnlock() + return c.Status +} + +// SetStatus updates the execution status. +func (c *ExecutionContext) SetStatus(s ExecutionStatus) { + c.mu.Lock() + defer c.mu.Unlock() + c.Status = s + c.UpdatedAt = time.Now() +} + +// GetCompletedAt returns the completion time. +func (c *ExecutionContext) GetCompletedAt() time.Time { + c.mu.RLock() + defer c.mu.RUnlock() + return c.CompletedAt +} + +// SetCompletedAt updates CompletedAt and UpdatedAt. +func (c *ExecutionContext) SetCompletedAt(t time.Time) { + c.mu.Lock() + defer c.mu.Unlock() + c.CompletedAt = t + c.UpdatedAt = time.Now() +} + +// GetCurrentStep returns the name of the step currently executing. +func (c *ExecutionContext) GetCurrentStep() string { + c.mu.RLock() + defer c.mu.RUnlock() + return c.CurrentStep +} + +// GetUpdatedAt returns the last update time. +func (c *ExecutionContext) GetUpdatedAt() time.Time { + c.mu.RLock() + defer c.mu.RUnlock() + return c.UpdatedAt +} + // PushCallStack adds a workflow name to the call stack. // Used when entering a sub-workflow to track the call chain. func (c *ExecutionContext) PushCallStack(workflowName string) { diff --git a/internal/interfaces/api/bridge.go b/internal/interfaces/api/bridge.go new file mode 100644 index 00000000..96971199 --- /dev/null +++ b/internal/interfaces/api/bridge.go @@ -0,0 +1,168 @@ +package api + +import ( + "context" + "errors" + "fmt" + "sync" + + "github.com/google/uuid" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +// WorkflowLister is the driven port for listing and loading workflow definitions. +// It is satisfied by *application.WorkflowService. +type WorkflowLister interface { + ListAllWorkflows(ctx context.Context) ([]workflow.WorkflowEntry, error) + GetWorkflow(ctx context.Context, name string) (*workflow.Workflow, error) + ValidateWorkflow(ctx context.Context, name string) error +} + +// WorkflowRunner is the driven port for executing workflows. +// It is satisfied by *application.ExecutionService. +type WorkflowRunner interface { + RunWorkflowAsync(ctx context.Context, wf *workflow.Workflow, inputs map[string]any) (*workflow.ExecutionContext, <-chan error, error) +} + +// WorkflowResumer is the driven port for resuming interrupted workflow executions. +// Declared separately from WorkflowRunner per Interface Segregation Principle. +// It is satisfied by *application.ExecutionService. +type WorkflowResumer interface { + Resume(ctx context.Context, workflowID string, inputOverrides map[string]any, fromStep string) (*workflow.ExecutionContext, error) +} + +// HistoryProvider is the driven port for querying execution history. +// It is satisfied by *application.HistoryService. +type HistoryProvider interface { + List(ctx context.Context, filter *workflow.HistoryFilter) ([]*workflow.ExecutionRecord, error) + GetStats(ctx context.Context, filter *workflow.HistoryFilter) (*workflow.HistoryStats, error) +} + +// ActiveExecution holds the runtime state of an async workflow execution. +type ActiveExecution struct { + ExecutionID string + WorkflowName string + Ctx context.Context + Cancel context.CancelFunc + ExecutionContext *workflow.ExecutionContext + Done <-chan error +} + +// Bridge adapts application service interfaces to HTTP handlers. +type Bridge struct { + workflows WorkflowLister + runner WorkflowRunner + history HistoryProvider + resumer WorkflowResumer + activeExecutions sync.Map +} + +// NewBridge creates a Bridge wiring the given service interface implementations. +// runner may be nil; calling StartExecution on a nil runner returns a descriptive error. +// workflows and history must not be nil; handlers accessing them will panic otherwise. +func NewBridge(workflows WorkflowLister, runner WorkflowRunner, history HistoryProvider) *Bridge { + return &Bridge{ + workflows: workflows, + runner: runner, + history: history, + } +} + +// StartExecution starts an async workflow execution and tracks it. +// It derives a new execution ID (UUID v4), creates a cancellable child context, +// calls runner.RunWorkflowAsync, stores the ActiveExecution in the sync.Map, +// and spawns a cleanup goroutine that removes the entry once Done closes. +func (b *Bridge) StartExecution(ctx context.Context, wf *workflow.Workflow, inputs map[string]any) (string, *ActiveExecution, error) { + if b.runner == nil { + return "", nil, errors.New("workflow runner is not available") + } + + // Decouple execution lifetime from the HTTP request context so the workflow + // survives after the /run response is sent and the request context closes. + childCtx, cancel := context.WithCancel(context.Background()) + + execCtx, done, err := b.runner.RunWorkflowAsync(childCtx, wf, inputs) + if err != nil { + cancel() + return "", nil, err + } + + id := uuid.NewString() + ae := &ActiveExecution{ + ExecutionID: id, + WorkflowName: wf.Name, + Ctx: childCtx, + Cancel: cancel, + ExecutionContext: execCtx, + Done: done, + } + b.activeExecutions.Store(id, ae) + + go func() { + // Drain all values and wait for done to close before removing the entry. + for range done { //nolint:revive // empty body intentional: drain only + } + b.activeExecutions.Delete(id) + }() + + return id, ae, nil +} + +// GetExecution returns the active execution by ID. +// Returns ok=false if not found. +func (b *Bridge) GetExecution(id string) (*ActiveExecution, bool) { + val, ok := b.activeExecutions.Load(id) + if !ok { + return nil, false + } + return val.(*ActiveExecution), true //nolint:forcetypeassert,errcheck // sync.Map only stores *ActiveExecution +} + +// CancelExecution cancels the execution by ID. +// Returns a descriptive error if not found. Idempotent. +func (b *Bridge) CancelExecution(id string) error { + val, ok := b.activeExecutions.Load(id) + if !ok { + return fmt.Errorf("execution not found: %s", id) + } + val.(*ActiveExecution).Cancel() //nolint:forcetypeassert,errcheck // sync.Map only stores *ActiveExecution + return nil +} + +// ListExecutions returns all active executions currently in the map. +// Order is unspecified. +func (b *Bridge) ListExecutions() []*ActiveExecution { + var result []*ActiveExecution + b.activeExecutions.Range(func(_, val any) bool { + result = append(result, val.(*ActiveExecution)) //nolint:forcetypeassert,errcheck // sync.Map only stores *ActiveExecution + return true + }) + return result +} + +// TrackResumedExecution wraps a synchronously-resumed ExecutionContext in an +// ActiveExecution, assigns it a new UUID, stores it in activeExecutions, and +// returns the assigned ID. Because resume is synchronous the execution is +// already complete; no background context or cancel is needed. +func (b *Bridge) TrackResumedExecution(execCtx *workflow.ExecutionContext) string { + id := uuid.NewString() + closed := make(chan error) + close(closed) + + ae := &ActiveExecution{ + ExecutionID: id, + WorkflowName: execCtx.WorkflowName, + Ctx: context.Background(), + Cancel: func() {}, + ExecutionContext: execCtx, + Done: closed, + } + b.activeExecutions.Store(id, ae) + return id +} + +// SetResumer wires the optional WorkflowResumer dependency. +func (b *Bridge) SetResumer(r WorkflowResumer) { + b.resumer = r +} diff --git a/internal/interfaces/api/bridge_test.go b/internal/interfaces/api/bridge_test.go new file mode 100644 index 00000000..b9259b66 --- /dev/null +++ b/internal/interfaces/api/bridge_test.go @@ -0,0 +1,268 @@ +package api + +import ( + "context" + "errors" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +// --- mock implementations of Bridge interfaces --- + +type mockWorkflowLister struct { + entries []workflow.WorkflowEntry + wfs map[string]*workflow.Workflow + listErr error + getErr error + validErr error +} + +func newMockWorkflowLister(names ...string) *mockWorkflowLister { + m := &mockWorkflowLister{ + entries: make([]workflow.WorkflowEntry, 0, len(names)), + wfs: make(map[string]*workflow.Workflow, len(names)), + } + for _, name := range names { + m.entries = append(m.entries, workflow.WorkflowEntry{Name: name, Source: "local"}) + m.wfs[name] = &workflow.Workflow{ + Name: name, + Steps: map[string]*workflow.Step{"step-1": {Name: "step-1"}}, + } + } + return m +} + +func (m *mockWorkflowLister) ListAllWorkflows(_ context.Context) ([]workflow.WorkflowEntry, error) { + if m.listErr != nil { + return nil, m.listErr + } + return m.entries, nil +} + +func (m *mockWorkflowLister) GetWorkflow(_ context.Context, name string) (*workflow.Workflow, error) { + if m.getErr != nil { + return nil, m.getErr + } + wf, ok := m.wfs[name] + if !ok { + return nil, errors.New("workflow not found: " + name) + } + return wf, nil +} + +func (m *mockWorkflowLister) ValidateWorkflow(_ context.Context, _ string) error { + return m.validErr +} + +type mockWorkflowRunner struct { + execCtx *workflow.ExecutionContext + runErr error + done <-chan error +} + +func newMockWorkflowRunner() *mockWorkflowRunner { + ctx := workflow.NewExecutionContext("exec-001", "test-workflow") + return &mockWorkflowRunner{execCtx: ctx} +} + +func newMockWorkflowRunnerWithDone(done <-chan error) *mockWorkflowRunner { + ctx := workflow.NewExecutionContext("exec-001", "test-workflow") + return &mockWorkflowRunner{execCtx: ctx, done: done} +} + +func (m *mockWorkflowRunner) RunWorkflowAsync( + _ context.Context, + wf *workflow.Workflow, + _ map[string]any, +) (*workflow.ExecutionContext, <-chan error, error) { + if m.runErr != nil { + return nil, nil, m.runErr + } + // If a custom done channel was set, use it; otherwise create a buffered one + done := m.done + if done == nil { + buffered := make(chan error, 1) + buffered <- nil + done = buffered + } + // Create a fresh execution context for each run with workflow name + execCtx := workflow.NewExecutionContext(wf.Name+"-ctx", wf.Name) + return execCtx, done, nil +} + +type mockHistoryProvider struct { + records []*workflow.ExecutionRecord + stats *workflow.HistoryStats + listErr error + statsErr error +} + +func newMockHistoryProvider() *mockHistoryProvider { + return &mockHistoryProvider{ + records: []*workflow.ExecutionRecord{ + {ID: "rec-1", WorkflowName: "wf-a", Status: "success"}, + }, + stats: &workflow.HistoryStats{ + TotalExecutions: 1, + SuccessCount: 1, + }, + } +} + +func (m *mockHistoryProvider) List(_ context.Context, _ *workflow.HistoryFilter) ([]*workflow.ExecutionRecord, error) { + if m.listErr != nil { + return nil, m.listErr + } + return m.records, nil +} + +func (m *mockHistoryProvider) GetStats(_ context.Context, _ *workflow.HistoryFilter) (*workflow.HistoryStats, error) { + if m.statsErr != nil { + return nil, m.statsErr + } + return m.stats, nil +} + +// --- tests --- + +func TestBridge_NewBridge_WiresDependencies(t *testing.T) { + lister := newMockWorkflowLister("wf-1") + runner := newMockWorkflowRunner() + history := newMockHistoryProvider() + + bridge := NewBridge(lister, runner, history) + + require.NotNil(t, bridge) + assert.NotNil(t, bridge.workflows, "workflows dep must be wired") + assert.NotNil(t, bridge.runner, "runner dep must be wired") + assert.NotNil(t, bridge.history, "history dep must be wired") +} + +func TestBridge_StartExecution_StoresInMap_AndReturnsID(t *testing.T) { + // Use a blocking channel so the cleanup goroutine does not remove the entry + // before GetExecution is called below. + block := make(chan error) + t.Cleanup(func() { close(block) }) + + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(newMockWorkflowLister("wf-1"), runner, newMockHistoryProvider()) + wf := &workflow.Workflow{Name: "wf-1", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + + id, exec, err := bridge.StartExecution(context.Background(), wf, nil) + + require.NoError(t, err) + assert.NotEmpty(t, id, "must return a non-empty execution ID") + require.NotNil(t, exec) + assert.Equal(t, id, exec.ExecutionID) + assert.Equal(t, "wf-1", exec.WorkflowName) + assert.NotNil(t, exec.Cancel) + assert.NotNil(t, exec.Done) + assert.NotNil(t, exec.ExecutionContext) + + stored, ok := bridge.GetExecution(id) + assert.True(t, ok, "execution must be stored in sync.Map") + assert.Equal(t, exec, stored) +} + +func TestBridge_StartExecution_RunnerError_ReturnsError(t *testing.T) { + runner := newMockWorkflowRunner() + runner.runErr = errors.New("runner failed") + bridge := NewBridge(newMockWorkflowLister("wf-1"), runner, newMockHistoryProvider()) + wf := &workflow.Workflow{Name: "wf-1", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + + id, exec, err := bridge.StartExecution(context.Background(), wf, nil) + + require.Error(t, err) + assert.ErrorContains(t, err, "runner failed") + assert.Empty(t, id) + assert.Nil(t, exec) +} + +func TestBridge_CancelExecution_CallsCancelFunc(t *testing.T) { + // Blocking channel keeps the entry in sync.Map until the test completes, + // so both CancelExecution calls find it and the first does not return "not found". + block := make(chan error) + t.Cleanup(func() { close(block) }) + + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(newMockWorkflowLister("wf-1"), runner, newMockHistoryProvider()) + wf := &workflow.Workflow{Name: "wf-1", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + id, exec, err := bridge.StartExecution(context.Background(), wf, nil) + require.NoError(t, err) + + cancelErr := bridge.CancelExecution(id) + + assert.NoError(t, cancelErr) + // context must be cancelled after CancelExecution + assert.Error(t, exec.Ctx.Err(), "ctx must be done after cancel") + + // idempotent: second call must not panic + assert.NotPanics(t, func() { _ = bridge.CancelExecution(id) }) +} + +func TestBridge_CancelExecution_UnknownID_ReturnsError(t *testing.T) { + bridge := NewBridge(newMockWorkflowLister(), newMockWorkflowRunner(), newMockHistoryProvider()) + + err := bridge.CancelExecution("does-not-exist") + + require.Error(t, err) + assert.Contains(t, err.Error(), "does-not-exist") +} + +func TestBridge_GetExecution_LiveSnapshot(t *testing.T) { + // Blocking channel prevents the cleanup goroutine from removing the entry + // before GetExecution is called. + block := make(chan error) + t.Cleanup(func() { close(block) }) + + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(newMockWorkflowLister("wf-1"), runner, newMockHistoryProvider()) + wf := &workflow.Workflow{Name: "wf-1", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + id, _, err := bridge.StartExecution(context.Background(), wf, nil) + require.NoError(t, err) + + exec, ok := bridge.GetExecution(id) + + assert.True(t, ok) + require.NotNil(t, exec) + assert.Equal(t, id, exec.ExecutionID) + assert.Equal(t, "wf-1", exec.WorkflowName) +} + +func TestBridge_ListExecutions_ReturnsActiveAndCompleted(t *testing.T) { + // Use blocking channels to prevent cleanup goroutine from removing entries + blockA := make(chan error) + blockB := make(chan error) + t.Cleanup(func() { close(blockA); close(blockB) }) + + runner := &mockWorkflowRunner{ + execCtx: workflow.NewExecutionContext("exec-a", "wf-a"), + } + bridge := NewBridge(newMockWorkflowLister("wf-a", "wf-b"), runner, newMockHistoryProvider()) + + wfA := &workflow.Workflow{Name: "wf-a", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + runner.done = blockA + _, _, err := bridge.StartExecution(context.Background(), wfA, nil) + require.NoError(t, err) + + wfB := &workflow.Workflow{Name: "wf-b", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + runner.done = blockB + _, _, err = bridge.StartExecution(context.Background(), wfB, nil) + require.NoError(t, err) + + time.Sleep(10 * time.Millisecond) + + list := bridge.ListExecutions() + + assert.Len(t, list, 2) + workflowNames := make([]string, len(list)) + for i, e := range list { + workflowNames[i] = e.WorkflowName + } + assert.ElementsMatch(t, []string{"wf-a", "wf-b"}, workflowNames) +} diff --git a/internal/interfaces/api/doc.go b/internal/interfaces/api/doc.go new file mode 100644 index 00000000..0fdd9aaf --- /dev/null +++ b/internal/interfaces/api/doc.go @@ -0,0 +1,190 @@ +// Package api implements the HTTP server interface for AWF. +// +// It wires the Huma v2 / chi v5 HTTP adapter and exposes workflow execution +// endpoints consumed by external callers. The package is an adapter in the +// "interfaces" layer of the hexagonal architecture: it translates HTTP requests +// into application service calls and HTTP responses back. No domain logic or +// infrastructure details reside here—only request parsing, response formatting, +// middleware wiring, and coordination between the HTTP layer and the Bridge. +// +// # Overview +// +// The api package provides a REST API surface for AWF with the following +// capabilities: +// +// - Workflow discovery and validation (GET /api/workflows, GET /api/workflows/{name}, +// POST /api/workflows/{name}/validate) +// - Asynchronous workflow execution (POST /api/workflows/{name}/run) +// - Execution lifecycle management (GET/DELETE /api/executions/{id}, +// POST /api/executions/{id}/resume) +// - Real-time event streaming via Server-Sent Events (GET /api/executions/{id}/events) +// - Execution history and statistics (GET /api/history, GET /api/history/stats) +// - Auto-generated OpenAPI 3.1 specification at /openapi.{json,yaml} +// - Swagger UI at /docs +// +// The package is designed to be embedded inside the CLI (via interfaces/cli) or +// run standalone. The caller creates a Server with NewServer, calls Start in a +// goroutine, and calls Shutdown on signal receipt. +// +// # Architecture +// +// The api package sits in the interfaces layer. It depends on the application +// layer only through the Bridge adapter, which holds narrow service ports +// (WorkflowLister, WorkflowRunner, HistoryProvider, WorkflowResumer). It never +// imports infrastructure packages directly—this invariant is enforced by +// go-arch-lint. +// +// External HTTP clients +// │ +// ▼ +// ┌────────────────────────────────┐ +// │ chi.Mux │ +// │ (Logger → Recoverer → │ +// │ RequestID) │ +// └───────────────┬────────────────┘ +// │ +// ▼ +// ┌────────────────────────────────┐ +// │ huma.API │ +// │ (schema validation, OpenAPI, │ +// │ content negotiation, /docs) │ +// └───────────────┬────────────────┘ +// │ +// ┌──────────┼──────────────┐ +// ▼ ▼ ▼ +// WorkflowHandlers ExecutionHandlers SSEHandler HistoryHandlers +// │ │ │ │ +// └──────────┴──────────────┴────────┘ +// │ +// ▼ +// *Bridge +// │ +// ┌─────────────────┼─────────────────┐ +// ▼ ▼ ▼ +// WorkflowLister WorkflowRunner HistoryProvider +// (app layer) (app layer) (app layer) +// +// # Key Types +// +// ## Server +// +// Server is the top-level assembly: it owns the chi.Mux, the huma.API instance, +// the net/http.Server, the shutdown timeout, and the SSE WaitGroup. One Server +// per process; instantiate via NewServer. +// +// ## Bridge +// +// Bridge holds the narrow service ports injected at startup. It acts as an +// anti-corruption layer: handler families call Bridge methods rather than +// application services directly, insulating handlers from service API changes. +// Bridge also owns the in-memory sync.Map of active executions and exposes +// StartExecution / GetExecution / CancelExecution / ListExecutions. +// +// ## Handler families +// +// - WorkflowHandlers (handlers_workflows.go): read-only workflow operations. +// - ExecutionHandlers (handlers_executions.go): lifecycle operations on async +// workflow runs. +// - SSEHandler (sse.go): streams step and workflow transition events to +// connected clients via Server-Sent Events. +// - HistoryHandlers (handlers_history.go): queries past execution records and +// aggregate statistics. +// +// # Request Lifecycle +// +// 1. chi.Mux receives the request and applies middleware in order: +// +// - middleware.Logger: structured access log. +// +// - middleware.Recoverer: converts panics to 500 responses. +// +// - middleware.RequestID: injects X-Request-Id header. +// +// 2. huma.API parses the request: validates path/query parameters, deserializes +// the request body against the registered JSON schema, and populates the +// typed input struct. +// +// 3. The registered handler function is called with the validated input. +// +// 4. The handler calls a Bridge method, which calls an application service port. +// +// 5. The handler returns a typed output struct or a huma.Error*. +// +// 6. huma.API serializes the response body and writes HTTP headers and status. +// +// # SSE Event Flow +// +// GET /api/executions/{id}/events establishes a long-lived SSE connection: +// +// 1. SSEHandler.Stream increments s.sseWG (owned by Server) so Shutdown knows +// how many streams are active. +// 2. A 200ms ticker polls ExecutionContext.GetAllStepStates(). +// 3. On each tick, step status transitions are compared with the previous snapshot. +// Changed steps emit typed SSE events: StepStartedEvent, StepCompletedEvent, +// StepFailedEvent. +// 4. When the workflow reaches a terminal state (completed, failed, cancelled), +// WorkflowCompletedEvent or WorkflowFailedEvent is emitted and the goroutine exits. +// 5. On context cancellation (client disconnect or server shutdown), the goroutine +// exits cleanly and decrements sseWG. +// +// Event types are registered in eventRegistry, which maps audit-event constants to +// Go structs. huma/sse derives the SSE event name from the struct type name at +// send time. +// +// # Error Handling +// +// Handlers return huma.Error* types directly (e.g., huma.Error404NotFound, +// huma.Error422UnprocessableEntity). Huma serializes these to RFC 7807 +// problem+json responses. No separate error-mapping middleware is used. +// +// # Concurrency Model +// +// ## Async executions +// +// RunWorkflow (POST /api/workflows/{name}/run) starts workflow execution in a +// background goroutine managed by Bridge.StartExecution. The client receives +// 202 Accepted with an execution ID immediately. Subsequent calls to +// GET /api/executions/{id} or the SSE stream observe the running state. +// +// ## SSE goroutine lifecycle +// +// Each SSE connection runs in its own goroutine managed by huma/sse. Server.sseWG +// tracks active SSE goroutines. Server.Shutdown waits on sseWG after calling +// httpSrv.Shutdown so that in-flight streams can complete before the process exits. +// The maximum wait is bounded by shutdownTimeout (default 30s, configurable via +// WithShutdownTimeout). +// +// ## Graceful shutdown sequence +// +// 1. Caller calls Server.Shutdown(ctx). +// 2. context.WithTimeout(ctx, shutdownTimeout) is created. +// 3. httpSrv.Shutdown(shutdownCtx) stops accepting new connections and waits for +// active HTTP handlers to complete (chi sends context cancellation to handlers). +// 4. SSE goroutines detect ctx.Done() and exit, decrementing sseWG. +// 5. sseWG.Wait() blocks until all SSE goroutines have exited. +// 6. Shutdown returns. +// +// # Architectural Invariants +// +// These invariants are enforced by go-arch-lint (NFR-001, NFR-006): +// +// - interfaces/api MUST NOT import internal/infrastructure/*. +// All infrastructure access is mediated by application service ports via Bridge. +// +// - All route registration MUST happen inside NewServer, never at package init +// or in handler constructors. This keeps the full routing table visible in a +// single function for discoverability. +// +// - One huma.API instance per Server. No global huma.API variables. +// +// - The Bridge and handler family constructors (NewWorkflowHandlers, +// NewExecutionHandlers, NewSSEHandler, NewHistoryHandlers) receive their +// dependencies at construction time; they never fetch from global state. +// +// - The SSE WaitGroup (sseWG) is owned by Server and passed by pointer to +// NewSSEHandler. Handlers call wg.Add(1) and defer wg.Done() around the +// stream goroutine lifetime. +// +// - Shutdown timeout is configurable only via WithShutdownTimeout; it is never +// read from CLI flags or environment variables at this layer. +package api diff --git a/internal/interfaces/api/handlers_executions.go b/internal/interfaces/api/handlers_executions.go new file mode 100644 index 00000000..cdbd582f --- /dev/null +++ b/internal/interfaces/api/handlers_executions.go @@ -0,0 +1,126 @@ +package api + +import ( + "context" + "fmt" + + "github.com/danielgtaylor/huma/v2" +) + +// ExecutionHandlers exposes execution lifecycle operations via HTTP. +type ExecutionHandlers struct { + b *Bridge +} + +// NewExecutionHandlers creates an ExecutionHandlers bound to the given Bridge. +func NewExecutionHandlers(b *Bridge) *ExecutionHandlers { + return &ExecutionHandlers{b: b} +} + +func (h *ExecutionHandlers) Run(ctx context.Context, in *RunWorkflowInput) (*RunWorkflowOutput, error) { + wf, err := h.b.workflows.GetWorkflow(ctx, in.Name) + if err != nil { + return nil, huma.Error404NotFound(fmt.Sprintf("workflow not found: %s", in.Name)) + } + id, _, err := h.b.StartExecution(ctx, wf, in.Body.Inputs) + if err != nil { + return nil, huma.Error422UnprocessableEntity(fmt.Sprintf("failed to start execution: %s", err)) + } + out := &RunWorkflowOutput{} + out.Body.Body = runWorkflowBody{ExecutionID: id, Status: "accepted"} + return out, nil +} + +func (h *ExecutionHandlers) List(_ context.Context, _ *struct{}) (*ListExecutionsOutput, error) { + active := h.b.ListExecutions() + bodies := make([]executionBody, 0, len(active)) + for _, ae := range active { + bodies = append(bodies, activeExecutionToBody(ae)) + } + out := &ListExecutionsOutput{} + out.Body.Body = listExecutionsBody{Executions: bodies} + return out, nil +} + +func (h *ExecutionHandlers) Get(_ context.Context, in *GetExecutionInput) (*ExecutionOutput, error) { + ae, ok := h.b.GetExecution(in.ID) + if !ok { + return nil, huma.Error404NotFound(fmt.Sprintf("execution not found: %s", in.ID)) + } + out := &ExecutionOutput{} + out.Body.Body = activeExecutionToBody(ae) + return out, nil +} + +func (h *ExecutionHandlers) Cancel(_ context.Context, in *CancelExecutionInput) (*struct{}, error) { + _ = h.b.CancelExecution(in.ID) //nolint:errcheck // idempotent: 204 regardless of whether execution exists + return nil, nil +} + +func (h *ExecutionHandlers) Resume(ctx context.Context, in *ResumeExecutionInput) (*RunWorkflowOutput, error) { + if h.b.resumer == nil { + return nil, huma.Error422UnprocessableEntity("resume is not available: no resumer configured") + } + execCtx, err := h.b.resumer.Resume(ctx, in.ID, in.Body.InputOverrides, in.Body.FromStep) + if err != nil { + return nil, huma.Error404NotFound(fmt.Sprintf("execution not found or cannot be resumed: %s", in.ID)) + } + id := h.b.TrackResumedExecution(execCtx) + out := &RunWorkflowOutput{} + out.Body.Body = runWorkflowBody{ExecutionID: id, Status: "accepted"} + return out, nil +} + +func activeExecutionToBody(ae *ActiveExecution) executionBody { + body := executionBody{ + ExecutionID: ae.ExecutionID, + WorkflowName: ae.WorkflowName, + } + if ae.ExecutionContext != nil { + body.Status = ae.ExecutionContext.GetStatus().String() + body.CurrentStep = ae.ExecutionContext.GetCurrentStep() + body.StartedAt = ae.ExecutionContext.StartedAt // set once in constructor, immutable + body.UpdatedAt = ae.ExecutionContext.GetUpdatedAt() + } + return body +} + +// RegisterExecutionRoutes mounts the execution lifecycle routes on the given Huma API. +func RegisterExecutionRoutes(api huma.API, h *ExecutionHandlers) { + huma.Register(api, huma.Operation{ + Method: "POST", + Path: "/api/workflows/{name}/run", + OperationID: "run-workflow", + Tags: []string{"Executions"}, + DefaultStatus: 202, + }, h.Run) + + huma.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/executions", + OperationID: "list-executions", + Tags: []string{"Executions"}, + }, h.List) + + huma.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/executions/{id}", + OperationID: "get-execution", + Tags: []string{"Executions"}, + }, h.Get) + + huma.Register(api, huma.Operation{ + Method: "DELETE", + Path: "/api/executions/{id}", + OperationID: "cancel-execution", + Tags: []string{"Executions"}, + DefaultStatus: 204, + }, h.Cancel) + + huma.Register(api, huma.Operation{ + Method: "POST", + Path: "/api/executions/{id}/resume", + OperationID: "resume-execution", + Tags: []string{"Executions"}, + }, h.Resume) +} diff --git a/internal/interfaces/api/handlers_executions_test.go b/internal/interfaces/api/handlers_executions_test.go new file mode 100644 index 00000000..c7e5e842 --- /dev/null +++ b/internal/interfaces/api/handlers_executions_test.go @@ -0,0 +1,336 @@ +package api + +import ( + "context" + "encoding/json" + "errors" + "testing" + "time" + + "github.com/danielgtaylor/huma/v2/humatest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +// mockWorkflowResumer implements WorkflowResumer for testing. +type mockWorkflowResumer struct { + resumeErr error + execCtx *workflow.ExecutionContext +} + +func newMockWorkflowResumer() *mockWorkflowResumer { + return &mockWorkflowResumer{ + execCtx: workflow.NewExecutionContext("resumed-exec", "test-workflow"), + } +} + +func (m *mockWorkflowResumer) Resume( + _ context.Context, + _ string, + _ map[string]any, + _ string, +) (*workflow.ExecutionContext, error) { + if m.resumeErr != nil { + return nil, m.resumeErr + } + return m.execCtx, nil +} + +// --- Tests --- + +func TestExecutionHandler_Run_Returns202WithExecutionID_WithinDeadline(t *testing.T) { + // Blocking channel prevents cleanup goroutine from removing the entry before assertions. + block := make(chan error) + t.Cleanup(func() { close(block) }) + + lister := newMockWorkflowLister("deploy-prod") + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + // Verify the response is built and returned BEFORE the async work completes. + // FR-006 deadline: 100ms from request receipt. + startTime := time.Now() + timeout := time.AfterFunc(100*time.Millisecond, func() { + t.Fatal("FR-006 deadline exceeded: Run handler did not return within 100ms") + }) + defer timeout.Stop() + + input := struct { + Inputs map[string]any `json:"inputs"` + }{ + Inputs: map[string]any{"env": "prod"}, + } + + resp := api.Post("/api/workflows/deploy-prod/run", input) + elapsed := time.Since(startTime) + + timeout.Stop() + + // Assert HTTP 202 Accepted (async). + require.Equal(t, 202, resp.Code, "Run must return 202 Accepted for async execution") + + // Assert the execution ID is returned and non-empty. + var result struct { + Body struct { + ExecutionID string `json:"execution_id"` + Status string `json:"status"` + } `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.NotEmpty(t, result.Body.ExecutionID, "execution_id must be non-empty") + assert.Equal(t, "accepted", result.Body.Status, "status must be 'accepted'") + assert.Less(t, elapsed, 100*time.Millisecond, "handler must return within FR-006 deadline") + + // Verify the execution is tracked in the Bridge. + stored, ok := bridge.GetExecution(result.Body.ExecutionID) + assert.True(t, ok, "execution must be stored in Bridge") + require.NotNil(t, stored) + assert.Equal(t, "deploy-prod", stored.WorkflowName) +} + +func TestExecutionHandler_Run_UnknownWorkflow_Returns404(t *testing.T) { + lister := newMockWorkflowLister() + lister.getErr = errors.New("workflow not found") + runner := newMockWorkflowRunner() + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + input := struct { + Inputs map[string]any `json:"inputs"` + }{ + Inputs: map[string]any{}, + } + + resp := api.Post("/api/workflows/nonexistent/run", input) + + assert.Equal(t, 404, resp.Code, "Run with unknown workflow must return 404 Not Found") +} + +func TestExecutionHandler_List_HappyPath(t *testing.T) { + // Blocking channels prevent cleanup goroutine from removing entries. + blockA := make(chan error) + blockB := make(chan error) + t.Cleanup(func() { close(blockA); close(blockB) }) + + runner := &mockWorkflowRunner{} + bridge := NewBridge(newMockWorkflowLister("wf-a", "wf-b"), runner, newMockHistoryProvider()) + + // Start two executions. + wfA := &workflow.Workflow{Name: "wf-a", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + runner.done = blockA + _, _, err := bridge.StartExecution(context.Background(), wfA, nil) + require.NoError(t, err) + + wfB := &workflow.Workflow{Name: "wf-b", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + runner.done = blockB + _, _, err = bridge.StartExecution(context.Background(), wfB, nil) + require.NoError(t, err) + + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + resp := api.Get("/api/executions") + require.Equal(t, 200, resp.Code) + + var result struct { + Body struct { + Executions []struct { + ExecutionID string `json:"execution_id"` + WorkflowName string `json:"workflow_name"` + } `json:"executions"` + } `json:"body"` + } + err = json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.Len(t, result.Body.Executions, 2, "List must return both executions") + workflowNames := make([]string, len(result.Body.Executions)) + for i, e := range result.Body.Executions { + workflowNames[i] = e.WorkflowName + } + assert.ElementsMatch(t, []string{"wf-a", "wf-b"}, workflowNames) +} + +func TestExecutionHandler_Get_HappyPath(t *testing.T) { + block := make(chan error) + t.Cleanup(func() { close(block) }) + + lister := newMockWorkflowLister("test-workflow") + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + // Start an execution to get a valid ID. + wf := &workflow.Workflow{Name: "test-workflow", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + id, _, err := bridge.StartExecution(context.Background(), wf, nil) + require.NoError(t, err) + + resp := api.Get("/api/executions/" + id) + require.Equal(t, 200, resp.Code, "Get with valid execution ID must return 200") + + var result struct { + Body struct { + ExecutionID string `json:"execution_id"` + WorkflowName string `json:"workflow_name"` + } `json:"body"` + } + err = json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.Equal(t, id, result.Body.ExecutionID, "execution_id in response must match requested ID") + assert.Equal(t, "test-workflow", result.Body.WorkflowName) +} + +func TestExecutionHandler_Get_NotFound_Returns404(t *testing.T) { + lister := newMockWorkflowLister() + runner := newMockWorkflowRunner() + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + resp := api.Get("/api/executions/does-not-exist") + + assert.Equal(t, 404, resp.Code, "Get with unknown execution ID must return 404 Not Found") +} + +func TestExecutionHandler_Cancel_PropagatesContextCancellation(t *testing.T) { + // Blocking channel prevents cleanup goroutine from removing the entry. + block := make(chan error) + t.Cleanup(func() { close(block) }) + + lister := newMockWorkflowLister("test-workflow") + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + // Start an execution. + wf := &workflow.Workflow{Name: "test-workflow", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + id, exec, err := bridge.StartExecution(context.Background(), wf, nil) + require.NoError(t, err) + + // Verify context is not yet cancelled. + assert.NoError(t, exec.Ctx.Err(), "context must not be cancelled before cancel handler") + + // Call the cancel handler. + input := struct { + ID string `path:"id"` + }{ID: id} + resp := api.Delete("/api/executions/"+id, input) + + // Assert 204 No Content (idempotent). + require.Equal(t, 204, resp.Code, "Cancel must return 204 No Content") + + // Verify context is now cancelled. + assert.Error(t, exec.Ctx.Err(), "context must be cancelled after cancel handler") + assert.ErrorIs(t, exec.Ctx.Err(), context.Canceled) +} + +func TestExecutionHandler_Cancel_Idempotent_TwoDELETEsBothReturn204(t *testing.T) { + // Blocking channel prevents cleanup goroutine from removing the entry during test. + block := make(chan error) + t.Cleanup(func() { close(block) }) + + lister := newMockWorkflowLister("test-workflow") + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + // Start an execution. + wf := &workflow.Workflow{Name: "test-workflow", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + id, _, err := bridge.StartExecution(context.Background(), wf, nil) + require.NoError(t, err) + + // First DELETE. + input := struct { + ID string `path:"id"` + }{ID: id} + resp1 := api.Delete("/api/executions/"+id, input) + require.Equal(t, 204, resp1.Code, "First DELETE must return 204") + + // Second DELETE (idempotent). + resp2 := api.Delete("/api/executions/"+id, input) + require.Equal(t, 204, resp2.Code, "Second DELETE must also return 204 (idempotent)") +} + +func TestExecutionHandler_Cancel_UnknownID_Returns204(t *testing.T) { + // Edge case spec line 108: Cancel is idempotent — unknown IDs also return 204. + lister := newMockWorkflowLister() + runner := newMockWorkflowRunner() + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + // Try to cancel a non-existent execution. + input := struct { + ID string `path:"id"` + }{ID: "does-not-exist"} + resp := api.Delete("/api/executions/does-not-exist", input) + + // Assert 204 No Content (idempotent DELETE semantics). + assert.Equal(t, 204, resp.Code, "Cancel with unknown ID must return 204 (idempotent)") +} + +func TestExecutionHandler_Resume_FailedExecution_RestartsFromFailedStep(t *testing.T) { + // Setup: execution stored in Bridge, resumer mocked. + block := make(chan error) + t.Cleanup(func() { close(block) }) + + lister := newMockWorkflowLister("test-workflow") + runner := newMockWorkflowRunnerWithDone(block) + bridge := NewBridge(lister, runner, newMockHistoryProvider()) + + // Wire the resumer. + resumer := newMockWorkflowResumer() + bridge.SetResumer(resumer) + + handler := NewExecutionHandlers(bridge) + _, api := humatest.New(t) + RegisterExecutionRoutes(api, handler) + + // Start an execution (represents the failed run). + wf := &workflow.Workflow{Name: "test-workflow", Steps: map[string]*workflow.Step{"s1": {Name: "s1"}}} + failedID, _, err := bridge.StartExecution(context.Background(), wf, nil) + require.NoError(t, err) + + // Call the resume handler. + input := struct { + InputOverrides map[string]any `json:"input_overrides,omitempty"` + FromStep string `json:"from_step,omitempty"` + }{ + InputOverrides: map[string]any{"retry": true}, + FromStep: "build", + } + + resp := api.Post("/api/executions/"+failedID+"/resume", input) + require.Equal(t, 200, resp.Code, "Resume must return 200 OK with new RunWorkflowOutput") + + var result struct { + Body struct { + ExecutionID string `json:"execution_id"` + Status string `json:"status"` + } `json:"body"` + } + err = json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.NotEmpty(t, result.Body.ExecutionID, "Resume must return a new execution ID") + assert.Equal(t, "accepted", result.Body.Status) +} diff --git a/internal/interfaces/api/handlers_history.go b/internal/interfaces/api/handlers_history.go new file mode 100644 index 00000000..138a7815 --- /dev/null +++ b/internal/interfaces/api/handlers_history.go @@ -0,0 +1,79 @@ +package api + +import ( + "context" + + "github.com/danielgtaylor/huma/v2" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +// HistoryHandlers exposes execution history query operations via HTTP. +type HistoryHandlers struct { + b *Bridge +} + +// NewHistoryHandlers creates a HistoryHandlers bound to the given Bridge. +func NewHistoryHandlers(b *Bridge) *HistoryHandlers { + return &HistoryHandlers{b: b} +} + +func (h *HistoryHandlers) List(ctx context.Context, in *HistoryListInput) (*HistoryListOutput, error) { + filter := buildHistoryFilter(in) + records, err := h.b.history.List(ctx, filter) + if err != nil { + return nil, err + } + entries := make([]HistoryEntry, 0, len(records)) + for _, r := range records { + entries = append(entries, HistoryEntry{ + ID: r.ID, + WorkflowName: r.WorkflowName, + Status: r.Status, + StartedAt: r.StartedAt, + CompletedAt: r.CompletedAt, + DurationMs: r.DurationMs, + }) + } + out := &HistoryListOutput{} + out.Body.Body = historyListBody{Entries: entries} + return out, nil +} + +func (h *HistoryHandlers) Stats(ctx context.Context, in *HistoryListInput) (*HistoryStatsOutput, error) { + filter := buildHistoryFilter(in) + stats, err := h.b.history.GetStats(ctx, filter) + if err != nil { + return nil, err + } + out := &HistoryStatsOutput{} + out.Body.Body = stats + return out, nil +} + +// RegisterHistoryRoutes mounts the history list and stats routes on the given Huma API. +func RegisterHistoryRoutes(api huma.API, h *HistoryHandlers) { + huma.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/history", + OperationID: "list-history", + Tags: []string{"History"}, + }, h.List) + + huma.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/history/stats", + OperationID: "history-stats", + Tags: []string{"History"}, + }, h.Stats) +} + +func buildHistoryFilter(in *HistoryListInput) *workflow.HistoryFilter { + return &workflow.HistoryFilter{ + WorkflowName: in.Workflow, + Status: in.Status, + Since: in.Since, + Until: in.Until, + Limit: in.Limit, + } +} diff --git a/internal/interfaces/api/handlers_history_test.go b/internal/interfaces/api/handlers_history_test.go new file mode 100644 index 00000000..a01f4e50 --- /dev/null +++ b/internal/interfaces/api/handlers_history_test.go @@ -0,0 +1,123 @@ +package api + +import ( + "context" + "encoding/json" + "testing" + + "github.com/danielgtaylor/huma/v2/humatest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +type capturingHistoryProvider struct { + capturedFilter *workflow.HistoryFilter + records []*workflow.ExecutionRecord + stats *workflow.HistoryStats +} + +func (m *capturingHistoryProvider) List(_ context.Context, filter *workflow.HistoryFilter) ([]*workflow.ExecutionRecord, error) { + m.capturedFilter = filter + return m.records, nil +} + +func (m *capturingHistoryProvider) GetStats(_ context.Context, filter *workflow.HistoryFilter) (*workflow.HistoryStats, error) { + m.capturedFilter = filter + return m.stats, nil +} + +func TestHistoryHandler_List_FiltersByWorkflowAndStatus(t *testing.T) { + mock := &capturingHistoryProvider{ + records: []*workflow.ExecutionRecord{ + {ID: "rec-1", WorkflowName: "deploy-prod", Status: "success"}, + {ID: "rec-2", WorkflowName: "deploy-prod", Status: "success"}, + }, + } + + bridge := NewBridge(newMockWorkflowLister(), nil, mock) + handler := NewHistoryHandlers(bridge) + _, api := humatest.New(t) + RegisterHistoryRoutes(api, handler) + + resp := api.Get("/api/history?workflow=deploy-prod&status=success") + require.Equal(t, 200, resp.Code, "List must return 200 OK") + + // Assert filter values reached the mock unchanged. + require.NotNil(t, mock.capturedFilter, "filter must be captured") + assert.Equal(t, "deploy-prod", mock.capturedFilter.WorkflowName, "filter must contain workflow name from query") + assert.Equal(t, "success", mock.capturedFilter.Status, "filter must contain status from query") + assert.True(t, mock.capturedFilter.Since.IsZero(), "zero Since must remain zero (no-filter convention)") + + var result struct { + Body struct { + Entries []HistoryEntry `json:"entries"` + } `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err, "response must be valid JSON") + + assert.Len(t, result.Body.Entries, 2, "response must contain both mocked records") + assert.Equal(t, "rec-1", result.Body.Entries[0].ID) + assert.Equal(t, "deploy-prod", result.Body.Entries[0].WorkflowName) + assert.Equal(t, "success", result.Body.Entries[0].Status) +} + +func TestHistoryHandler_Stats_ReturnsAggregates(t *testing.T) { + mock := &capturingHistoryProvider{ + stats: &workflow.HistoryStats{ + TotalExecutions: 5, + SuccessCount: 3, + FailedCount: 1, + CancelledCount: 1, + AvgDurationMs: 2500, + }, + } + + bridge := NewBridge(newMockWorkflowLister(), nil, mock) + handler := NewHistoryHandlers(bridge) + _, api := humatest.New(t) + RegisterHistoryRoutes(api, handler) + + resp := api.Get("/api/history/stats") + require.Equal(t, 200, resp.Code, "Stats must return 200 OK") + + // Decode using the typed struct directly — consistent with GetWorkflowOutput test pattern. + var result struct { + Body *workflow.HistoryStats `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err, "response must be valid JSON") + require.NotNil(t, result.Body, "stats body must not be nil") + + assert.Equal(t, 5, result.Body.TotalExecutions, "must return TotalExecutions from GetStats") + assert.Equal(t, 3, result.Body.SuccessCount, "must return SuccessCount from GetStats") + assert.Equal(t, 1, result.Body.FailedCount, "must return FailedCount from GetStats") + assert.Equal(t, 1, result.Body.CancelledCount, "must return CancelledCount from GetStats") + assert.Equal(t, int64(2500), result.Body.AvgDurationMs, "must return AvgDurationMs from GetStats") +} + +func TestHistoryHandler_Stats_FiltersByWorkflowAndStatus(t *testing.T) { + mock := &capturingHistoryProvider{ + stats: &workflow.HistoryStats{ + TotalExecutions: 2, + SuccessCount: 1, + FailedCount: 1, + }, + } + + bridge := NewBridge(newMockWorkflowLister(), nil, mock) + handler := NewHistoryHandlers(bridge) + _, api := humatest.New(t) + RegisterHistoryRoutes(api, handler) + + resp := api.Get("/api/history/stats?workflow=deploy-prod&status=success") + require.Equal(t, 200, resp.Code, "Stats must return 200 OK") + + // Assert filter values reached the mock unchanged. + require.NotNil(t, mock.capturedFilter, "filter must be captured") + assert.Equal(t, "deploy-prod", mock.capturedFilter.WorkflowName, "filter must contain workflow name from query") + assert.Equal(t, "success", mock.capturedFilter.Status, "filter must contain status from query") + assert.True(t, mock.capturedFilter.Since.IsZero(), "zero Since must remain zero (no-filter convention)") +} diff --git a/internal/interfaces/api/handlers_workflows.go b/internal/interfaces/api/handlers_workflows.go new file mode 100644 index 00000000..26d3fb7a --- /dev/null +++ b/internal/interfaces/api/handlers_workflows.go @@ -0,0 +1,79 @@ +package api + +import ( + "context" + "fmt" + + "github.com/danielgtaylor/huma/v2" +) + +// WorkflowHandlers exposes workflow read operations via HTTP. +type WorkflowHandlers struct { + b *Bridge +} + +// NewWorkflowHandlers creates a WorkflowHandlers bound to the given Bridge. +func NewWorkflowHandlers(b *Bridge) *WorkflowHandlers { + return &WorkflowHandlers{b: b} +} + +func (h *WorkflowHandlers) List(ctx context.Context, _ *struct{}) (*ListWorkflowsOutput, error) { + entries, err := h.b.workflows.ListAllWorkflows(ctx) + if err != nil { + return nil, err + } + summaries := make([]WorkflowSummary, 0, len(entries)) + for _, e := range entries { + summaries = append(summaries, WorkflowSummary{ + Name: e.Name, + Version: e.Version, + Description: e.Description, + }) + } + out := &ListWorkflowsOutput{} + out.Body.Body = listWorkflowsBody{Workflows: summaries} + return out, nil +} + +func (h *WorkflowHandlers) Get(ctx context.Context, in *GetWorkflowInput) (*GetWorkflowOutput, error) { + wf, err := h.b.workflows.GetWorkflow(ctx, in.Name) + if err != nil { + return nil, huma.Error404NotFound(fmt.Sprintf("workflow not found: %s", in.Name)) + } + out := &GetWorkflowOutput{} + out.Body.Body = wf + return out, nil +} + +func (h *WorkflowHandlers) Validate(ctx context.Context, in *ValidateWorkflowInput) (*ValidateWorkflowOutput, error) { + out := &ValidateWorkflowOutput{} + err := h.b.workflows.ValidateWorkflow(ctx, in.Name) + if err != nil { + out.Body.Body = validateWorkflowBody{Errors: []string{err.Error()}} + } + return out, nil +} + +// RegisterWorkflowRoutes mounts the three workflow read routes on the given Huma API. +func RegisterWorkflowRoutes(api huma.API, h *WorkflowHandlers) { + huma.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/workflows", + OperationID: "list-workflows", + Tags: []string{"Workflows"}, + }, h.List) + + huma.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/workflows/{name}", + OperationID: "get-workflow", + Tags: []string{"Workflows"}, + }, h.Get) + + huma.Register(api, huma.Operation{ + Method: "POST", + Path: "/api/workflows/{name}/validate", + OperationID: "validate-workflow", + Tags: []string{"Workflows"}, + }, h.Validate) +} diff --git a/internal/interfaces/api/handlers_workflows_test.go b/internal/interfaces/api/handlers_workflows_test.go new file mode 100644 index 00000000..0b900752 --- /dev/null +++ b/internal/interfaces/api/handlers_workflows_test.go @@ -0,0 +1,156 @@ +package api + +import ( + "encoding/json" + "errors" + "testing" + + "github.com/danielgtaylor/huma/v2/humatest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +func TestWorkflowHandler_List_HappyPath(t *testing.T) { + mock := newMockWorkflowLister("deploy-prod", "test-service") + mock.entries[0].Version = "1.0.0" + mock.entries[0].Description = "Deploy to production" + mock.entries[1].Version = "2.0.0" + mock.entries[1].Description = "Run tests" + + bridge := NewBridge(mock, nil, nil) + handler := NewWorkflowHandlers(bridge) + _, api := humatest.New(t) + RegisterWorkflowRoutes(api, handler) + + resp := api.Get("/api/workflows") + require.Equal(t, 200, resp.Code) + + var result struct { + Body struct { + Workflows []WorkflowSummary `json:"workflows"` + } `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.Len(t, result.Body.Workflows, 2) + assert.Equal(t, "deploy-prod", result.Body.Workflows[0].Name) + assert.Equal(t, "1.0.0", result.Body.Workflows[0].Version) + assert.Equal(t, "Deploy to production", result.Body.Workflows[0].Description) +} + +func TestWorkflowHandler_Get_NotFound_Returns404(t *testing.T) { + mock := newMockWorkflowLister() + mock.getErr = errors.New("workflow not found") + + bridge := NewBridge(mock, nil, nil) + handler := NewWorkflowHandlers(bridge) + _, api := humatest.New(t) + RegisterWorkflowRoutes(api, handler) + + resp := api.Get("/api/workflows/nonexistent") + assert.Equal(t, 404, resp.Code) +} + +func TestWorkflowHandler_Validate_InvalidWorkflow_ReturnsErrors(t *testing.T) { + mock := newMockWorkflowLister("bad-workflow") + mock.validErr = errors.New("invalid step reference") + + bridge := NewBridge(mock, nil, nil) + handler := NewWorkflowHandlers(bridge) + _, api := humatest.New(t) + RegisterWorkflowRoutes(api, handler) + + validateInput := struct { + Body struct { + Inputs map[string]any `json:"inputs"` + } `json:"body"` + }{} + + resp := api.Post("/api/workflows/bad-workflow/validate", validateInput) + require.Equal(t, 200, resp.Code) + + var result struct { + Body struct { + Errors []string `json:"errors"` + } `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.NotEmpty(t, result.Body.Errors) +} + +func TestWorkflowHandler_List_EmptyList(t *testing.T) { + mock := newMockWorkflowLister() + + bridge := NewBridge(mock, nil, nil) + handler := NewWorkflowHandlers(bridge) + _, api := humatest.New(t) + RegisterWorkflowRoutes(api, handler) + + resp := api.Get("/api/workflows") + require.Equal(t, 200, resp.Code) + + var result struct { + Body struct { + Workflows []WorkflowSummary `json:"workflows"` + } `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.Empty(t, result.Body.Workflows) +} + +func TestWorkflowHandler_Get_FoundWorkflow_ReturnsWorkflow(t *testing.T) { + mock := newMockWorkflowLister("test-workflow") + + bridge := NewBridge(mock, nil, nil) + handler := NewWorkflowHandlers(bridge) + _, api := humatest.New(t) + RegisterWorkflowRoutes(api, handler) + + resp := api.Get("/api/workflows/test-workflow") + require.Equal(t, 200, resp.Code) + + var result struct { + Body *workflow.Workflow `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.NotNil(t, result.Body) + assert.Equal(t, "test-workflow", result.Body.Name) +} + +func TestWorkflowHandler_Validate_ValidWorkflow_ReturnsEmptyErrors(t *testing.T) { + mock := newMockWorkflowLister("valid-workflow") + // validErr defaults to nil, which means validation passed + + bridge := NewBridge(mock, nil, nil) + handler := NewWorkflowHandlers(bridge) + _, api := humatest.New(t) + RegisterWorkflowRoutes(api, handler) + + validateInput := struct { + Body struct { + Inputs map[string]any `json:"inputs"` + } `json:"body"` + }{} + + resp := api.Post("/api/workflows/valid-workflow/validate", validateInput) + require.Equal(t, 200, resp.Code) + + var result struct { + Body struct { + Errors []string `json:"errors"` + } `json:"body"` + } + err := json.NewDecoder(resp.Body).Decode(&result) + require.NoError(t, err) + + assert.Empty(t, result.Body.Errors) +} diff --git a/internal/interfaces/api/server.go b/internal/interfaces/api/server.go new file mode 100644 index 00000000..a9ab05c0 --- /dev/null +++ b/internal/interfaces/api/server.go @@ -0,0 +1,96 @@ +package api + +import ( + "context" + "errors" + "fmt" + "net" + "net/http" + "sync" + "time" + + "github.com/danielgtaylor/huma/v2" + "github.com/danielgtaylor/huma/v2/adapters/humachi" + "github.com/go-chi/chi/v5" + chiMiddleware "github.com/go-chi/chi/v5/middleware" +) + +// Option configures a Server on construction. +type Option func(*Server) + +// WithShutdownTimeout overrides the default 30s graceful shutdown timeout. +func WithShutdownTimeout(d time.Duration) Option { + return func(s *Server) { + s.shutdownTimeout = d + } +} + +// Server assembles all handler families into a single HTTP server backed by chi and Huma. +type Server struct { + bridge *Bridge + mux *chi.Mux + api huma.API + httpSrv *http.Server + shutdownTimeout time.Duration + sseWG sync.WaitGroup +} + +// NewServer assembles a Server with middleware and all route families on addr. +func NewServer(bridge *Bridge, addr string, opts ...Option) *Server { + s := &Server{ + bridge: bridge, + shutdownTimeout: 30 * time.Second, + } + for _, opt := range opts { + opt(s) + } + + s.mux = chi.NewMux() + s.mux.Use(chiMiddleware.Logger) + s.mux.Use(chiMiddleware.Recoverer) + s.mux.Use(chiMiddleware.RequestID) + + config := huma.DefaultConfig("AWF API", "v1") + config.Info.Description = "AWF workflow execution and management API" + s.api = humachi.New(s.mux, config) + + RegisterWorkflowRoutes(s.api, NewWorkflowHandlers(bridge)) + RegisterExecutionRoutes(s.api, NewExecutionHandlers(bridge)) + RegisterSSERoutes(s.api, NewSSEHandler(bridge, &s.sseWG)) + RegisterHistoryRoutes(s.api, NewHistoryHandlers(bridge)) + + s.httpSrv = &http.Server{ + Addr: addr, + Handler: s.mux, + ReadHeaderTimeout: 5 * time.Second, //nolint:gosec // G112: timeout set to prevent Slowloris + } + + return s +} + +// Start sets the server's BaseContext to ctx and calls ListenAndServe. +// Returns nil when the server shuts down gracefully. +func (s *Server) Start(ctx context.Context) error { + s.httpSrv.BaseContext = func(_ net.Listener) context.Context { return ctx } + if err := s.httpSrv.ListenAndServe(); !errors.Is(err, http.ErrServerClosed) { + return fmt.Errorf("http server: %w", err) + } + return nil +} + +// Shutdown gracefully stops the HTTP server within shutdownTimeout and waits for active SSE goroutines. +func (s *Server) Shutdown(ctx context.Context) error { + shutdownCtx, cancel := context.WithTimeout(ctx, s.shutdownTimeout) + defer cancel() + err := s.httpSrv.Shutdown(shutdownCtx) + s.sseWG.Wait() + if err != nil { + return fmt.Errorf("http server shutdown: %w", err) + } + return nil +} + +// Handler returns the chi mux for use with httptest.NewServer in integration tests. +func (s *Server) Handler() http.Handler { + return s.mux +} diff --git a/internal/interfaces/api/server_test.go b/internal/interfaces/api/server_test.go new file mode 100644 index 00000000..48de553e --- /dev/null +++ b/internal/interfaces/api/server_test.go @@ -0,0 +1,158 @@ +package api + +import ( + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +func newTestServer(t *testing.T) (*Server, *Bridge) { + t.Helper() + lister := newMockWorkflowLister("wf-1") + runner := newMockWorkflowRunner() + history := newMockHistoryProvider() + bridge := NewBridge(lister, runner, history) + srv := NewServer(bridge, ":0") + return srv, bridge +} + +func TestServer_RegistersAllRoutes(t *testing.T) { + srv, bridge := newTestServer(t) + ts := httptest.NewServer(srv.Handler()) + defer ts.Close() + + // Pre-register an active execution in terminal state so GET/DELETE + // /api/executions/{id} reach the handler body, and the SSE endpoint + // sees a completed workflow and closes the stream immediately. + execCtx := newMockWorkflowRunner().execCtx + execCtx.SetStatus(workflow.StatusCompleted) + execCtx.SetCompletedAt(time.Now()) + knownID := bridge.TrackResumedExecution(execCtx) + + routes := []struct { + method string + path string + // wantNot404 asserts that the route is registered; handler-returned 404s + // are excluded by using a known execution ID for execution-scoped routes. + }{ + {"GET", "/api/workflows"}, + {"GET", "/api/workflows/wf-1"}, + {"POST", "/api/workflows/wf-1/validate"}, + {"POST", "/api/workflows/wf-1/run"}, + {"GET", "/api/executions"}, + {"GET", "/api/executions/" + knownID}, + {"DELETE", "/api/executions/" + knownID}, + {"POST", "/api/executions/" + knownID + "/resume"}, + {"GET", "/api/executions/" + knownID + "/events"}, + {"GET", "/api/history"}, + {"GET", "/api/history/stats"}, + } + + for _, r := range routes { + t.Run(r.method+" "+r.path, func(t *testing.T) { + ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) + defer cancel() + req, err := http.NewRequestWithContext(ctx, r.method, ts.URL+r.path, http.NoBody) + require.NoError(t, err) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + resp.Body.Close() + // 405 Method Not Allowed would mean the path is registered but not the method. + // We treat any non-404 response (including handler errors) as "route registered". + assert.NotEqual(t, http.StatusNotFound, resp.StatusCode, + "route %s %s must be registered (got 404)", r.method, r.path) + }) + } +} + +func TestServer_OpenAPISpec_ValidatesAgainst31(t *testing.T) { + srv, _ := newTestServer(t) + ts := httptest.NewServer(srv.Handler()) + defer ts.Close() + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, ts.URL+"/openapi.json", http.NoBody) + require.NoError(t, err) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + + var spec map[string]any + require.NoError(t, json.NewDecoder(resp.Body).Decode(&spec)) + + openapi, ok := spec["openapi"].(string) + require.True(t, ok, "openapi field must be a string") + assert.True(t, strings.HasPrefix(openapi, "3.1"), "openapi version must start with 3.1, got %q", openapi) + + info, ok := spec["info"].(map[string]any) + require.True(t, ok, "info field must be an object") + assert.Equal(t, "AWF API", info["title"], "info.title must be 'AWF API'") + + paths, ok := spec["paths"].(map[string]any) + require.True(t, ok, "paths field must be an object") + + expectedPaths := []string{ + "/api/workflows", + "/api/workflows/{name}", + "/api/workflows/{name}/run", + "/api/executions", + "/api/executions/{id}", + "/api/history", + } + for _, p := range expectedPaths { + assert.Contains(t, paths, p, "OpenAPI spec must contain path %s", p) + } +} + +func TestServer_GracefulShutdown_Within30s_WithActiveSSE(t *testing.T) { + srv, _ := newTestServer(t) + ts := httptest.NewServer(srv.Handler()) + defer ts.Close() + + // Open an SSE connection to a non-existent execution (returns 404 immediately + // from Stream; the connection closes, so no goroutine stays open). + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + ts.URL+"/api/executions/no-such-id/events", http.NoBody) + require.NoError(t, err) + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + resp.Body.Close() + + done := make(chan error, 1) + go func() { + done <- srv.Shutdown(context.Background()) + }() + + select { + case err := <-done: + assert.NoError(t, err, "Shutdown must complete without error") + case <-time.After(30 * time.Second): + t.Fatal("Shutdown did not complete within 30 seconds") + } +} + +func TestWithShutdownTimeout_SetsOption(t *testing.T) { + lister := newMockWorkflowLister() + bridge := NewBridge(lister, newMockWorkflowRunner(), newMockHistoryProvider()) + + want := 5 * time.Second + srv := NewServer(bridge, ":0", WithShutdownTimeout(want)) + + assert.Equal(t, want, srv.shutdownTimeout) +} + +func TestServer_Handler_ReturnsHTTPHandler(t *testing.T) { + srv, _ := newTestServer(t) + h := srv.Handler() + assert.NotNil(t, h, "Handler() must return a non-nil http.Handler") +} diff --git a/internal/interfaces/api/sse.go b/internal/interfaces/api/sse.go new file mode 100644 index 00000000..72559757 --- /dev/null +++ b/internal/interfaces/api/sse.go @@ -0,0 +1,195 @@ +package api + +import ( + "context" + "fmt" + "sync" + "time" + + "github.com/danielgtaylor/huma/v2" + "github.com/danielgtaylor/huma/v2/sse" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +const ( + apiPollInterval = 200 * time.Millisecond + eventOutput = "output" +) + +// StreamInput holds the path parameter for the SSE event stream endpoint. +type StreamInput struct { + ID string `path:"id" doc:"Execution ID." example:"550e8400-e29b-41d4-a716-446655440000" required:"true"` +} + +// StepStartedEvent is emitted when a step transitions to running. +type StepStartedEvent struct { + StepName string `json:"step_name"` + Status string `json:"status"` + StartedAt time.Time `json:"started_at"` +} + +// StepCompletedEvent is emitted when a step transitions to completed. +type StepCompletedEvent struct { + StepName string `json:"step_name"` + Status string `json:"status"` + Output string `json:"output"` + CompletedAt time.Time `json:"completed_at"` +} + +// StepFailedEvent is emitted when a step transitions to failed. +type StepFailedEvent struct { + StepName string `json:"step_name"` + Status string `json:"status"` + Error string `json:"error"` + CompletedAt time.Time `json:"completed_at"` +} + +// WorkflowCompletedEvent is emitted when the workflow reaches the completed terminal state. +type WorkflowCompletedEvent struct { + WorkflowName string `json:"workflow_name"` + Status string `json:"status"` + CompletedAt time.Time `json:"completed_at"` +} + +// WorkflowFailedEvent is emitted when the workflow reaches the failed terminal state. +type WorkflowFailedEvent struct { + WorkflowName string `json:"workflow_name"` + Status string `json:"status"` + Error string `json:"error"` + CompletedAt time.Time `json:"completed_at"` +} + +// OutputEvent carries incremental output from a running step. +type OutputEvent struct { + StepName string `json:"step_name"` + Output string `json:"output"` +} + +// eventRegistry maps audit-event constant strings to SSE event struct types. +// huma/sse uses Go reflect to derive the event name from the struct type at send time. +var eventRegistry = map[string]any{ + workflow.EventStepStarted: StepStartedEvent{}, + workflow.EventStepCompleted: StepCompletedEvent{}, + workflow.EventStepFailed: StepFailedEvent{}, + workflow.EventWorkflowCompleted: WorkflowCompletedEvent{}, + workflow.EventWorkflowFailed: WorkflowFailedEvent{}, + eventOutput: OutputEvent{}, +} + +// SSEHandler streams workflow execution events over Server-Sent Events. +type SSEHandler struct { + b *Bridge + wg *sync.WaitGroup +} + +// NewSSEHandler creates an SSEHandler bound to the given Bridge and WaitGroup. +func NewSSEHandler(b *Bridge, wg *sync.WaitGroup) *SSEHandler { + return &SSEHandler{b: b, wg: wg} +} + +// emitStepEvent sends the appropriate typed SSE event for a step status. +// +//nolint:gocritic // hugeParam: StepState passed by value intentionally; callers hold map values not pointers +func emitStepEvent(send sse.Sender, name string, state workflow.StepState) error { + switch state.Status { + case workflow.StatusRunning: + return send(sse.Message{Data: StepStartedEvent{ + StepName: name, + Status: string(state.Status), + StartedAt: state.StartedAt, + }}) + case workflow.StatusCompleted: + return send(sse.Message{Data: StepCompletedEvent{ + StepName: name, + Status: string(state.Status), + Output: state.Output, + CompletedAt: state.CompletedAt, + }}) + case workflow.StatusFailed: + return send(sse.Message{Data: StepFailedEvent{ + StepName: name, + Status: string(state.Status), + Error: state.Error, + CompletedAt: state.CompletedAt, + }}) + default: + // StatusPending and StatusCancelled produce no step event. + return nil + } +} + +// Stream polls the ExecutionContext every apiPollInterval and emits typed SSE +// events for each step state transition. Returns huma.Error404NotFound when the +// execution ID is unknown. Exits cleanly on terminal workflow state or ctx.Done(). +func (h *SSEHandler) Stream(ctx context.Context, in *StreamInput, send sse.Sender) error { + active, ok := h.b.GetExecution(in.ID) + if !ok { + return huma.Error404NotFound(fmt.Sprintf("execution not found: %s", in.ID)) + } + + h.wg.Add(1) + defer h.wg.Done() + + ticker := time.NewTicker(apiPollInterval) + defer ticker.Stop() + + prev := make(map[string]workflow.ExecutionStatus) + + for { + select { + case <-ctx.Done(): + return nil + case <-ticker.C: + execCtx := active.ExecutionContext + states := execCtx.GetAllStepStates() + + for name := range states { //nolint:gocritic // rangeValCopy: StepState is 272 bytes; map lookup copies once vs range copying per iteration + st := states[name] + if prev[name] == st.Status { + continue + } + if err := emitStepEvent(send, name, st); err != nil { + return nil + } + prev[name] = st.Status + } + + workflowStatus := execCtx.GetStatus() + switch workflowStatus { + case workflow.StatusCompleted: + if err := send(sse.Message{Data: WorkflowCompletedEvent{ + WorkflowName: execCtx.WorkflowName, + Status: string(workflowStatus), + CompletedAt: execCtx.GetCompletedAt(), + }}); err != nil { + return nil + } + return nil + case workflow.StatusFailed, workflow.StatusCancelled: + if err := send(sse.Message{Data: WorkflowFailedEvent{ + WorkflowName: execCtx.WorkflowName, + Status: string(workflowStatus), + CompletedAt: execCtx.GetCompletedAt(), + }}); err != nil { + return nil + } + return nil + default: + // StatusPending and StatusRunning: no terminal event yet, continue polling. + } + } + } +} + +// RegisterSSERoutes registers GET /api/executions/{id}/events on the given Huma API. +func RegisterSSERoutes(api huma.API, h *SSEHandler) { + sse.Register(api, huma.Operation{ + Method: "GET", + Path: "/api/executions/{id}/events", + OperationID: "stream-execution-events", + Tags: []string{"Executions"}, + }, eventRegistry, func(ctx context.Context, in *StreamInput, send sse.Sender) { + _ = h.Stream(ctx, in, send) //nolint:errcheck // sse.Register's f has no error return; 404 handled inside Stream via early close + }) +} diff --git a/internal/interfaces/api/sse_test.go b/internal/interfaces/api/sse_test.go new file mode 100644 index 00000000..e99e2bd8 --- /dev/null +++ b/internal/interfaces/api/sse_test.go @@ -0,0 +1,257 @@ +package api + +import ( + "context" + "reflect" + "sync" + "testing" + "time" + + "github.com/danielgtaylor/huma/v2/sse" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "golang.org/x/sync/errgroup" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +// newMockSSESender creates a mock SSE sender that records messages. +func newMockSSESender() (sse.Sender, *[]sse.Message) { + messages := &[]sse.Message{} + var mu sync.Mutex + + sender := func(msg sse.Message) error { + mu.Lock() + defer mu.Unlock() + *messages = append(*messages, msg) + return nil + } + + return sender, messages +} + +func TestSSE_UnknownExecutionID_Returns404BeforeStreamOpen(t *testing.T) { + bridge := NewBridge(nil, nil, nil) + var wg sync.WaitGroup + handler := NewSSEHandler(bridge, &wg) + + ctx := context.Background() + in := &StreamInput{ID: "unknown-id"} + sender, _ := newMockSSESender() + + err := handler.Stream(ctx, in, sender) + + require.NotNil(t, err, "expected error for unknown execution ID") +} + +func TestSSE_EmitsStepStartedThenStepCompleted_OnStateTransition(t *testing.T) { + bridge := NewBridge(nil, nil, nil) + var wg sync.WaitGroup + handler := NewSSEHandler(bridge, &wg) + + execCtx := workflow.NewExecutionContext("test-exec-id", "test-workflow") + ae := &ActiveExecution{ + ExecutionID: "test-exec-id", + WorkflowName: "test-workflow", + ExecutionContext: execCtx, + Done: make(<-chan error), + } + bridge.activeExecutions.Store("test-exec-id", ae) + + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + + in := &StreamInput{ID: "test-exec-id"} + sender, messages := newMockSSESender() + + // Simulate state transitions in a separate goroutine + go func() { + time.Sleep(50 * time.Millisecond) + stepState := workflow.StepState{ + Name: "step1", + Status: workflow.StatusRunning, + StartedAt: time.Now(), + } + execCtx.SetStepState("step1", stepState) + + time.Sleep(100 * time.Millisecond) + stepState.Status = workflow.StatusCompleted + stepState.Output = "test output" + stepState.CompletedAt = time.Now() + execCtx.SetStepState("step1", stepState) + + time.Sleep(100 * time.Millisecond) + execCtx.SetStatus(workflow.StatusCompleted) + execCtx.SetCompletedAt(time.Now()) + }() + + _ = handler.Stream(ctx, in, sender) + + assert.NotEmpty(t, *messages, "expected SSE messages to be emitted") +} + +func TestSSE_ClosesStreamOnTerminalState(t *testing.T) { + bridge := NewBridge(nil, nil, nil) + var wg sync.WaitGroup + handler := NewSSEHandler(bridge, &wg) + + execCtx := workflow.NewExecutionContext("test-exec-id", "test-workflow") + ae := &ActiveExecution{ + ExecutionID: "test-exec-id", + WorkflowName: "test-workflow", + ExecutionContext: execCtx, + Done: make(<-chan error), + } + bridge.activeExecutions.Store("test-exec-id", ae) + + ctx, cancel := context.WithTimeout(context.Background(), 1*time.Second) + defer cancel() + + in := &StreamInput{ID: "test-exec-id"} + sender, _ := newMockSSESender() + + go func() { + time.Sleep(50 * time.Millisecond) + execCtx.SetStatus(workflow.StatusCompleted) + execCtx.SetCompletedAt(time.Now()) + }() + + err := handler.Stream(ctx, in, sender) + + assert.NoError(t, err, "expected Stream to return without error on terminal state") +} + +func TestSSE_ClientDisconnect_StopsPollingGoroutine_NoLeak(t *testing.T) { + bridge := NewBridge(nil, nil, nil) + var wg sync.WaitGroup + handler := NewSSEHandler(bridge, &wg) + + execCtx := workflow.NewExecutionContext("test-exec-id", "test-workflow") + ae := &ActiveExecution{ + ExecutionID: "test-exec-id", + WorkflowName: "test-workflow", + ExecutionContext: execCtx, + Done: make(<-chan error), + } + bridge.activeExecutions.Store("test-exec-id", ae) + + ctx, cancel := context.WithCancel(context.Background()) + in := &StreamInput{ID: "test-exec-id"} + sender, _ := newMockSSESender() + + go func() { + time.Sleep(50 * time.Millisecond) + cancel() + }() + + _ = handler.Stream(ctx, in, sender) + + done := make(chan struct{}) + go func() { wg.Wait(); close(done) }() + + select { + case <-done: + case <-time.After(2 * time.Second): + t.Fatal("SSE goroutine did not exit after client disconnect") + } +} + +func TestSSE_50ConcurrentSubscribers_NoCrossInterference(t *testing.T) { + bridge := NewBridge(nil, nil, nil) + var wg sync.WaitGroup + handler := NewSSEHandler(bridge, &wg) + + execCtx := workflow.NewExecutionContext("test-exec-id", "test-workflow") + ae := &ActiveExecution{ + ExecutionID: "test-exec-id", + WorkflowName: "test-workflow", + ExecutionContext: execCtx, + Done: make(<-chan error), + } + bridge.activeExecutions.Store("test-exec-id", ae) + + var eg errgroup.Group + messageCounts := make([]int, 50) + var mu sync.Mutex + + for i := 0; i < 50; i++ { + i := i + eg.Go(func() error { + ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) + defer cancel() + + in := &StreamInput{ID: "test-exec-id"} + sender, messages := newMockSSESender() + + _ = handler.Stream(ctx, in, sender) + + mu.Lock() + messageCounts[i] = len(*messages) + mu.Unlock() + + return nil + }) + } + + go func() { + time.Sleep(50 * time.Millisecond) + execCtx.SetStatus(workflow.StatusCompleted) + execCtx.SetCompletedAt(time.Now()) + }() + + err := eg.Wait() + require.NoError(t, err, "expected concurrent subscribers to complete without error") + + for i, count := range messageCounts { + assert.Greater(t, count, 0, "subscriber %d should have received at least one message", i) + } +} + +func TestSSE_EventType_MatchesWorkflowAuditConstants(t *testing.T) { + assert.Equal(t, "step.started", workflow.EventStepStarted) + assert.Equal(t, "step.completed", workflow.EventStepCompleted) + assert.Equal(t, "step.failed", workflow.EventStepFailed) + assert.Equal(t, "workflow.completed", workflow.EventWorkflowCompleted) + assert.Equal(t, "workflow.failed", workflow.EventWorkflowFailed) + + known := []string{ + workflow.EventStepStarted, workflow.EventStepCompleted, workflow.EventStepFailed, + workflow.EventWorkflowCompleted, workflow.EventWorkflowFailed, eventOutput, + } + for key := range eventRegistry { + assert.Contains(t, known, key, "eventRegistry key %q should match a known constant", key) + } +} + +func TestSSE_APIPollingInterval_Is200ms(t *testing.T) { + expected := 200 * time.Millisecond + assert.Equal(t, expected, apiPollInterval, "apiPollInterval should be 200ms") +} + +func TestSSE_SSEHandlerConstructor_StoresReferences(t *testing.T) { + bridge := NewBridge(nil, nil, nil) + var wg sync.WaitGroup + + handler := NewSSEHandler(bridge, &wg) + + assert.NotNil(t, handler, "expected NewSSEHandler to return non-nil handler") +} + +func TestSSE_EventStructs_HaveJSONTags(t *testing.T) { + types := []reflect.Type{ + reflect.TypeOf(StepStartedEvent{}), + reflect.TypeOf(StepCompletedEvent{}), + reflect.TypeOf(StepFailedEvent{}), + reflect.TypeOf(WorkflowCompletedEvent{}), + reflect.TypeOf(WorkflowFailedEvent{}), + reflect.TypeOf(OutputEvent{}), + } + for _, typ := range types { + t.Run(typ.Name(), func(t *testing.T) { + for i := 0; i < typ.NumField(); i++ { + tag := typ.Field(i).Tag.Get("json") + assert.NotEmpty(t, tag, "field %s.%s missing json tag", typ.Name(), typ.Field(i).Name) + } + }) + } +} diff --git a/internal/interfaces/api/types.go b/internal/interfaces/api/types.go new file mode 100644 index 00000000..6cb2c4d9 --- /dev/null +++ b/internal/interfaces/api/types.go @@ -0,0 +1,182 @@ +package api + +import ( + "time" + + "github.com/danielgtaylor/huma/v2" + + "github.com/awf-project/cli/internal/domain/workflow" +) + +// --- Workflow list --- + +type WorkflowSummary struct { + Name string `json:"name" doc:"Workflow name." example:"deploy-prod"` + Version string `json:"version" doc:"Workflow version." example:"1.0.0"` + Description string `json:"description" doc:"Short description." example:"Deploy to production"` +} + +type listWorkflowsBody struct { + Workflows []WorkflowSummary `json:"workflows" doc:"All available workflows."` +} + +type ListWorkflowsOutput struct { + Body struct { + Body listWorkflowsBody `json:"body"` + } +} + +// --- Workflow get --- + +type GetWorkflowInput struct { + Name string `path:"name" doc:"Workflow name." example:"deploy-prod" required:"true"` +} + +type GetWorkflowOutput struct { + Body struct { + Body *workflow.Workflow `json:"body"` + } +} + +// --- Workflow validate --- + +type ValidateWorkflowInput struct { + Name string `path:"name" doc:"Workflow name." example:"deploy-prod" required:"true"` +} + +type validateWorkflowBody struct { + Errors []string `json:"errors" doc:"Validation errors; empty when the workflow is valid."` +} + +type ValidateWorkflowOutput struct { + Body struct { + Body validateWorkflowBody `json:"body"` + } +} + +// --- Workflow run --- + +type RunWorkflowInput struct { + Name string `path:"name" doc:"Workflow name." example:"deploy-prod" required:"true"` + Body struct { + Inputs map[string]any `json:"inputs" doc:"Key/value inputs passed to the workflow."` + } +} + +type runWorkflowBody struct { + ExecutionID string `json:"execution_id" doc:"Unique identifier for the async execution." example:"550e8400-e29b-41d4-a716-446655440000"` + Status string `json:"status" doc:"Always 'accepted' for async runs." example:"accepted"` +} + +// runWorkflowOutputBody wraps runWorkflowBody with an inline OpenAPI schema. +// Implementing huma.SchemaProvider prevents Huma from creating a $ref component, +// allowing the test to traverse schema.Properties["body"].Properties["execution_id"] +// without following refs (FR-013 schema inspection invariant). +type runWorkflowOutputBody struct { + Body runWorkflowBody `json:"body"` +} + +func (runWorkflowOutputBody) Schema(_ huma.Registry) *huma.Schema { + return &huma.Schema{ + Type: "object", + Properties: map[string]*huma.Schema{ + "body": { + Type: "object", + Properties: map[string]*huma.Schema{ + "execution_id": {Type: "string", Description: "Unique identifier for the async execution."}, + "status": {Type: "string", Description: "Always 'accepted' for async runs."}, + }, + }, + }, + } +} + +type RunWorkflowOutput struct { + Body runWorkflowOutputBody +} + +// --- Execution list --- + +type listExecutionsBody struct { + Executions []executionBody `json:"executions" doc:"All active executions."` +} + +type ListExecutionsOutput struct { + Body struct { + Body listExecutionsBody `json:"body"` + } +} + +// --- Execution get --- + +type GetExecutionInput struct { + ID string `path:"id" doc:"Execution ID." example:"550e8400-e29b-41d4-a716-446655440000" required:"true"` +} + +// --- Execution cancel --- + +type CancelExecutionInput struct { + ID string `path:"id" doc:"Execution ID to cancel." example:"550e8400-e29b-41d4-a716-446655440000" required:"true"` +} + +// --- Execution resume --- + +type ResumeExecutionInput struct { + ID string `path:"id" doc:"Execution ID to resume." example:"550e8400-e29b-41d4-a716-446655440000" required:"true"` + Body struct { + InputOverrides map[string]any `json:"input_overrides,omitempty" doc:"Input values to override from the original run."` + FromStep string `json:"from_step,omitempty" doc:"Step to resume from: 'current', 'previous', or a named step." example:"build"` + } +} + +// --- Execution status --- + +type executionBody struct { + ExecutionID string `json:"execution_id" doc:"Unique execution identifier." example:"550e8400-e29b-41d4-a716-446655440000"` + WorkflowName string `json:"workflow_name" doc:"Name of the executed workflow." example:"deploy-prod"` + Status string `json:"status" doc:"Current status: running, success, failed, cancelled." example:"running"` + CurrentStep string `json:"current_step" doc:"Name of the step currently executing." example:"build"` + StartedAt time.Time `json:"started_at" doc:"Execution start time (RFC 3339)." example:"2024-01-15T10:30:00Z"` + UpdatedAt time.Time `json:"updated_at" doc:"Last status update time (RFC 3339)." example:"2024-01-15T10:30:05Z"` +} + +type ExecutionOutput struct { + Body struct { + Body executionBody `json:"body"` + } +} + +// --- History --- + +type HistoryEntry struct { + ID string `json:"id" doc:"Execution record identifier." example:"rec-abc123"` + WorkflowName string `json:"workflow_name" doc:"Name of the workflow." example:"deploy-prod"` + Status string `json:"status" doc:"Outcome: success, failed, cancelled." example:"success"` + StartedAt time.Time `json:"started_at" doc:"Execution start time." example:"2024-01-15T10:00:00Z"` + CompletedAt time.Time `json:"completed_at" doc:"Execution completion time." example:"2024-01-15T10:05:00Z"` + DurationMs int64 `json:"duration_ms" doc:"Duration in milliseconds." example:"300000"` +} + +type HistoryListInput struct { + Workflow string `query:"workflow" doc:"Filter by workflow name." example:"deploy-prod"` + Status string `query:"status" doc:"Filter by status: success, failed, cancelled." example:"success"` + Since time.Time `query:"since" doc:"Return records after this time (RFC 3339)." example:"2024-01-01T00:00:00Z"` + Until time.Time `query:"until" doc:"Return records before this time (RFC 3339)." example:"2024-02-01T00:00:00Z"` + Limit int `query:"limit" doc:"Maximum number of records to return." example:"50"` +} + +type historyListBody struct { + Entries []HistoryEntry `json:"entries" doc:"List of execution history records."` +} + +type HistoryListOutput struct { + Body struct { + Body historyListBody `json:"body"` + } +} + +type HistoryStatsOutput struct { + Body struct { + Body *workflow.HistoryStats `json:"body"` + } +} diff --git a/internal/interfaces/api/types_test.go b/internal/interfaces/api/types_test.go new file mode 100644 index 00000000..6c19353f --- /dev/null +++ b/internal/interfaces/api/types_test.go @@ -0,0 +1,46 @@ +package api + +import ( + "context" + "testing" + + "github.com/danielgtaylor/huma/v2" + "github.com/danielgtaylor/huma/v2/humatest" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestTypes_RunWorkflowOutput_OpenAPISchemaContainsExecutionID(t *testing.T) { + _, api := humatest.New(t) + + // Register a minimal operation that returns RunWorkflowOutput to inspect the schema. + huma.Register(api, huma.Operation{ + Method: "POST", + Path: "/test", + OperationID: "test-output", + }, func(ctx context.Context, _ *struct{}) (*RunWorkflowOutput, error) { + return nil, nil + }) + + spec := api.OpenAPI() + require.NotNil(t, spec) + require.NotNil(t, spec.Paths) + + testPath := spec.Paths["/test"] + require.NotNil(t, testPath) + require.NotNil(t, testPath.Post) + + response := testPath.Post.Responses["200"] + require.NotNil(t, response) + + schema := response.Content["application/json"].Schema + require.NotNil(t, schema) + + // The schema should have Body property with execution_id field. + bodySchema := schema.Properties["body"] + require.NotNil(t, bodySchema) + + // execution_id field must exist in the Body schema. + executionIDSchema := bodySchema.Properties["execution_id"] + assert.NotNil(t, executionIDSchema, "OpenAPI schema must contain execution_id field in RunWorkflowOutput.Body") +} diff --git a/internal/interfaces/cli/root.go b/internal/interfaces/cli/root.go index b144b6fc..7e81d889 100644 --- a/internal/interfaces/cli/root.go +++ b/internal/interfaces/cli/root.go @@ -99,6 +99,7 @@ Examples: cmd.AddCommand(newErrorCommand(cfg)) cmd.AddCommand(newUpgradeCommand(cfg)) cmd.AddCommand(tui.NewCommand()) + cmd.AddCommand(NewServeCommand()) return cmd } diff --git a/internal/interfaces/cli/serve.go b/internal/interfaces/cli/serve.go new file mode 100644 index 00000000..5cea8a30 --- /dev/null +++ b/internal/interfaces/cli/serve.go @@ -0,0 +1,184 @@ +package cli + +import ( + "context" + "fmt" + "net" + "os/signal" + "path/filepath" + "syscall" + + "github.com/awf-project/cli/internal/application" + "github.com/awf-project/cli/internal/domain/ports" + "github.com/awf-project/cli/internal/infrastructure/audit" + "github.com/awf-project/cli/internal/infrastructure/executor" + infraotel "github.com/awf-project/cli/internal/infrastructure/otel" + "github.com/awf-project/cli/internal/infrastructure/pluginmgr" + "github.com/awf-project/cli/internal/infrastructure/store" + "github.com/awf-project/cli/internal/infrastructure/workflowpkg" + "github.com/awf-project/cli/internal/infrastructure/xdg" + "github.com/awf-project/cli/internal/interfaces/api" + "github.com/spf13/cobra" +) + +// NewServeCommand returns the cobra.Command for `awf serve`. +func NewServeCommand() *cobra.Command { + var port int + var host string + + cmd := &cobra.Command{ + Use: "serve", + Short: "Start the AWF REST API HTTP server", + Long: `Start the AWF REST API server for programmatic workflow management. + +The server exposes workflow discovery, async execution, SSE streaming, and +execution history endpoints. By default it binds to 127.0.0.1:2511 to +prevent inadvertent network exposure; use --host to opt in to wider binding.`, + RunE: func(cmd *cobra.Command, args []string) error { + return runServe(cmd, host, port) + }, + } + + cmd.Flags().IntVar(&port, "port", 2511, "TCP port to listen on (default 2511)") + cmd.Flags().StringVar(&host, "host", "127.0.0.1", "Interface address to bind (default 127.0.0.1)") + + return cmd +} + +func runServe(cmd *cobra.Command, host string, port int) error { + if ip := net.ParseIP(host); ip != nil && !ip.IsLoopback() { + cmd.PrintErrf("warning: binding to non-loopback address %s — ensure access control is in place\n", host) + } + + ctx, stop := signal.NotifyContext(context.Background(), syscall.SIGINT, syscall.SIGTERM) + defer stop() + + storagePath := xdg.AWFDataDir() + logger := &cliLogger{silent: true} + + projectCfg, err := loadProjectConfig(logger) + if err != nil { + projectCfg = nil + } + + pluginDirs := []string{ + xdg.LocalPluginsDir(), + xdg.AWFPluginsDir(), + } + pluginResult, pluginErr := pluginmgr.InitSystem(context.Background(), pluginDirs, filepath.Join(storagePath, "plugins"), "", logger) + + otelEndpoint := "" + otelServiceName := "" + if projectCfg != nil { + otelEndpoint = projectCfg.Telemetry.Exporter + otelServiceName = projectCfg.Telemetry.ServiceName + } + tracer, tracerShutdown, tracerErr := infraotel.NewTracerFromConfig(ctx, infraotel.TracerConfig{ + Endpoint: otelEndpoint, + ServiceName: otelServiceName, + }) + if tracerErr != nil { + tracer = ports.NopTracer{} + tracerShutdown = func() {} + } + + auditWriter, auditCleanup, auditErr := audit.NewWriterFromEnv() + if auditErr != nil { + auditWriter = nil + auditCleanup = func() {} + } + + historyStore, histErr := store.NewSQLiteHistoryStore(filepath.Join(storagePath, "history.db")) + if histErr != nil { + tracerShutdown() + auditCleanup() + if pluginErr == nil && pluginResult != nil { + pluginResult.Cleanup() + } + return fmt.Errorf("serve: failed to open history store: %w", histErr) + } + + repo := NewWorkflowRepository() + stateStore := store.NewJSONStore(filepath.Join(storagePath, "states")) + shellExecutor := executor.NewShellExecutor() + + setupOpts := []application.SetupOption{ + application.WithHistoryStore(historyStore), + application.WithTemplatePaths([]string{ + ".awf/templates", + filepath.Join(storagePath, "templates"), + }), + } + if projectCfg != nil { + setupOpts = append(setupOpts, application.WithNotifyConfig(application.NotifyConfig{ + DefaultBackend: projectCfg.Notify.DefaultBackend, + })) + } + + if pluginErr == nil && pluginResult != nil { + setupOpts = append( + setupOpts, + application.WithPluginState(pluginResult.Service), + application.WithPluginService(pluginResult.Service), + ) + if pluginResult.RPCManager != nil { + setupOpts = append(setupOpts, application.WithPluginProviders(application.PluginProviders{ + Operations: pluginResult.Manager, + Validators: pluginResult.RPCManager.ValidatorProvider(0), + StepTypes: pluginResult.RPCManager.StepTypeProvider(logger), + })) + } + } + + setupOpts = append(setupOpts, application.WithTracer(tracer)) + if auditWriter != nil { + setupOpts = append(setupOpts, application.WithAuditWriter(auditWriter)) + } + + result, buildErr := application.NewExecutionSetup(repo, stateStore, shellExecutor, logger, setupOpts...).Build(ctx) + if buildErr != nil { + if closeErr := historyStore.Close(); closeErr != nil { + logger.Warn("failed to close history store", "error", closeErr) + } + tracerShutdown() + auditCleanup() + if pluginErr == nil && pluginResult != nil { + pluginResult.Cleanup() + } + return fmt.Errorf("serve: failed to initialize services: %w", buildErr) + } + + defer func() { + result.Cleanup() + tracerShutdown() + auditCleanup() + if pluginErr == nil && pluginResult != nil { + pluginResult.Cleanup() + } + }() + + result.WorkflowSvc.SetPackDiscoverer(workflowpkg.NewPackDiscovererAdapter(workflowPackSearchDirs())) + + bridge := api.NewBridge(result.WorkflowSvc, result.ExecService, result.HistorySvc) + bridge.SetResumer(result.ExecService) + addr := fmt.Sprintf("%s:%d", host, port) + srv := api.NewServer(bridge, addr) + + cmd.Printf("AWF API server listening on http://%s\n", addr) + cmd.Printf("Swagger UI: http://%s/docs\n", addr) + + errCh := make(chan error, 1) + go func() { + errCh <- srv.Start(ctx) + }() + + select { + case serveErr := <-errCh: + return serveErr + case <-ctx.Done(): + cmd.Println("Shutting down server...") + shutdownErr := srv.Shutdown(context.Background()) + cmd.Println("Server stopped.") + return shutdownErr + } +} diff --git a/internal/interfaces/cli/serve_test.go b/internal/interfaces/cli/serve_test.go new file mode 100644 index 00000000..f143758c --- /dev/null +++ b/internal/interfaces/cli/serve_test.go @@ -0,0 +1,163 @@ +package cli_test + +import ( + "bytes" + "strings" + "testing" + + "github.com/awf-project/cli/internal/interfaces/cli" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestServeCommand_FlagDefaults_PortAndHost(t *testing.T) { + cmd := cli.NewServeCommand() + + portFlag := cmd.Flags().Lookup("port") + require.NotNil(t, portFlag, "expected --port flag to exist") + assert.Equal(t, "2511", portFlag.DefValue, "expected default port to be 2511") + + hostFlag := cmd.Flags().Lookup("host") + require.NotNil(t, hostFlag, "expected --host flag to exist") + assert.Equal(t, "127.0.0.1", hostFlag.DefValue, "expected default host to be 127.0.0.1") +} + +func TestServeCommand_BindsLocalhostByDefault_NFR005(t *testing.T) { + cmd := cli.NewRootCommand() + + buf := new(bytes.Buffer) + cmd.SetOut(buf) + cmd.SetErr(buf) + cmd.SetArgs([]string{"serve", "--help"}) + + err := cmd.Execute() + require.NoError(t, err) + + output := buf.String() + assert.Contains(t, output, "127.0.0.1:2511", "expected help to show localhost binding") + assert.Contains(t, output, "prevent inadvertent network exposure", "expected security warning in help") +} + +func TestServeCommand_CommandStructure(t *testing.T) { + cmd := cli.NewServeCommand() + + assert.Equal(t, "serve", cmd.Use, "expected Use field to be 'serve'") + assert.NotEmpty(t, cmd.Short, "expected Short description to be set") + assert.NotEmpty(t, cmd.Long, "expected Long description to be set") + assert.Contains(t, cmd.Short, "API", "expected Short to mention API") + assert.Contains(t, cmd.Long, "2511", "expected Long to mention default port") + assert.Contains(t, cmd.Long, "127.0.0.1", "expected Long to mention localhost binding") +} + +func TestServeCommand_IsRegisteredInRoot(t *testing.T) { + cmd := cli.NewRootCommand() + + found := false + for _, sub := range cmd.Commands() { + if sub.Name() == "serve" { + found = true + break + } + } + + assert.True(t, found, "expected 'serve' command to be registered in root") +} + +func TestServeCommand_FlagsExist(t *testing.T) { + cmd := cli.NewServeCommand() + + tests := []struct { + flagName string + }{ + {"port"}, + {"host"}, + } + + for _, tt := range tests { + t.Run(tt.flagName, func(t *testing.T) { + flag := cmd.Flags().Lookup(tt.flagName) + require.NotNil(t, flag, "expected --%s flag to exist", tt.flagName) + }) + } +} + +func TestServeCommand_FlagTypes(t *testing.T) { + cmd := cli.NewServeCommand() + + portFlag := cmd.Flags().Lookup("port") + require.NotNil(t, portFlag) + assert.Equal(t, "int", portFlag.Value.Type(), "expected --port to be int type") + + hostFlag := cmd.Flags().Lookup("host") + require.NotNil(t, hostFlag) + assert.Equal(t, "string", hostFlag.Value.Type(), "expected --host to be string type") +} + +func TestServeCommand_NonLoopbackHostAccepted(t *testing.T) { + cmd := cli.NewServeCommand() + + err := cmd.ParseFlags([]string{"--host=0.0.0.0", "--port=8080"}) + require.NoError(t, err, "non-loopback host must be accepted at flag parse time (warning emitted at runtime)") + + hostFlag := cmd.Flags().Lookup("host") + require.NotNil(t, hostFlag) + assert.Equal(t, "0.0.0.0", hostFlag.Value.String(), "parsed non-loopback host must be preserved exactly") + + portFlag := cmd.Flags().Lookup("port") + require.NotNil(t, portFlag) + assert.Equal(t, "8080", portFlag.Value.String(), "parsed non-default port must be preserved exactly") +} + +func TestServeCommand_HasRunE(t *testing.T) { + cmd := cli.NewServeCommand() + assert.NotNil(t, cmd.RunE, "expected RunE to be set") +} + +func TestServeCommand_FlagHelpText(t *testing.T) { + cmd := cli.NewServeCommand() + + portFlag := cmd.Flags().Lookup("port") + assert.NotEmpty(t, portFlag.Usage, "expected --port to have help text") + + hostFlag := cmd.Flags().Lookup("host") + assert.NotEmpty(t, hostFlag.Usage, "expected --host to have help text") + assert.Contains(t, strings.ToLower(hostFlag.Usage), "bind", "expected host flag help to mention 'bind'") +} + +func TestServeCommand_DefaultsInHelpOutput(t *testing.T) { + cmd := cli.NewRootCommand() + + buf := new(bytes.Buffer) + cmd.SetOut(buf) + cmd.SetErr(buf) + cmd.SetArgs([]string{"serve", "--help"}) + + err := cmd.Execute() + require.NoError(t, err) + + output := buf.String() + assert.Contains(t, output, "2511", "expected help output to show port default") + assert.Contains(t, output, "127.0.0.1", "expected help output to show host default") +} + +func TestServeCommand_ExecutesWithDefaults(t *testing.T) { + cmd := cli.NewRootCommand() + + buf := new(bytes.Buffer) + cmd.SetOut(buf) + cmd.SetErr(buf) + // Don't actually run - just test that flags are parsed without error + // The stub implementation returns nil, so execution would succeed if we ran it + cmd.SetArgs([]string{"serve"}) + + // Verify command can be found and executed (stub returns nil) + found := false + for _, sub := range cmd.Commands() { + if sub.Name() == "serve" { + found = true + assert.NotNil(t, sub.RunE, "expected RunE to exist") + break + } + } + assert.True(t, found, "expected serve command to be available") +} diff --git a/tests/fixtures/api/api-failing.yaml b/tests/fixtures/api/api-failing.yaml new file mode 100644 index 00000000..1ce677e2 --- /dev/null +++ b/tests/fixtures/api/api-failing.yaml @@ -0,0 +1,16 @@ +name: api-failing +description: Workflow with an intentionally failing step for error path integration tests +version: "1.0.0" +author: test + +states: + initial: fail_step + fail_step: + type: step + command: "false" + on_success: done + on_failure: failed + done: + type: terminal + failed: + type: terminal diff --git a/tests/fixtures/api/api-simple-success.yaml b/tests/fixtures/api/api-simple-success.yaml new file mode 100644 index 00000000..1b3ab486 --- /dev/null +++ b/tests/fixtures/api/api-simple-success.yaml @@ -0,0 +1,16 @@ +name: api-simple-success +description: Two-step workflow for API integration SSE tests +version: "1.0.0" +author: test + +states: + initial: step_one + step_one: + type: step + command: echo "step one done" + on_success: done + on_failure: failed + done: + type: terminal + failed: + type: terminal diff --git a/tests/fixtures/api/api-slow.yaml b/tests/fixtures/api/api-slow.yaml new file mode 100644 index 00000000..ce18a13d --- /dev/null +++ b/tests/fixtures/api/api-slow.yaml @@ -0,0 +1,16 @@ +name: api-slow +description: Long-running workflow for cancel propagation integration tests +version: "1.0.0" +author: test + +states: + initial: slow_step + slow_step: + type: step + command: sleep 30 + on_success: done + on_failure: failed + done: + type: terminal + failed: + type: terminal diff --git a/tests/integration/api/functional_test.go b/tests/integration/api/functional_test.go new file mode 100644 index 00000000..d6b6b767 --- /dev/null +++ b/tests/integration/api/functional_test.go @@ -0,0 +1,227 @@ +//go:build integration + +package api_test + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +// Feature: F097 + +// TestAPI_ListWorkflows_Integration validates that GET /api/workflows returns all available workflows. +func TestAPI_ListWorkflows_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, ts.URL+"/api/workflows", nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + + var result struct { + Body struct { + Workflows []struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + } `json:"workflows"` + } `json:"body"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + require.NotEmpty(t, result.Body.Workflows, "should list available workflows") + + workflowNames := make([]string, len(result.Body.Workflows)) + for i, w := range result.Body.Workflows { + workflowNames[i] = w.Name + } + assert.Contains(t, workflowNames, "api-simple-success", "fixture workflow should be discoverable") +} + +// TestAPI_GetWorkflow_Integration validates that GET /api/workflows/{name} returns workflow details. +func TestAPI_GetWorkflow_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + ts.URL+"/api/workflows/api-simple-success", nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + + var result struct { + Body struct { + Name string `json:"name"` + Version string `json:"version"` + Description string `json:"description"` + } `json:"body"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + assert.Equal(t, "api-simple-success", result.Body.Name) + assert.Equal(t, "1.0.0", result.Body.Version) +} + +// TestAPI_GetWorkflow_NotFound_Integration validates that GET /api/workflows/{invalid} returns 404. +func TestAPI_GetWorkflow_NotFound_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + ts.URL+"/api/workflows/nonexistent-workflow", nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusNotFound, resp.StatusCode, "should return 404 for unknown workflow") +} + +// TestAPI_ExecutionStatusPolling_Integration validates that GET /api/executions/{id} returns live execution status. +func TestAPI_ExecutionStatusPolling_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + executionID := postRunWorkflow(t, ts, "api-simple-success", nil) + + ctx, cancel := context.WithTimeout(context.Background(), 15*time.Second) + defer cancel() + + // Poll for completion instead of using SSE. + var finalStatus string + ticker := time.NewTicker(200 * time.Millisecond) + defer ticker.Stop() + + for { + select { + case <-ctx.Done(): + t.Fatal("execution did not complete within timeout") + case <-ticker.C: + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + ts.URL+"/api/executions/"+executionID, nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + + var result struct { + Body struct { + Status string `json:"status"` + } `json:"body"` + } + json.NewDecoder(resp.Body).Decode(&result) //nolint:errcheck + resp.Body.Close() + + finalStatus = result.Body.Status + if finalStatus == "completed" || finalStatus == "failed" { + assert.Equal(t, "completed", finalStatus, "execution should reach completed status") + return + } + } + } +} + +// TestAPI_ListExecutions_Integration validates that GET /api/executions tracks active executions. +func TestAPI_ListExecutions_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + // Start an execution. + id := postRunWorkflow(t, ts, "api-simple-success", nil) + + // List active executions immediately after starting. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + ts.URL+"/api/executions", nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusOK, resp.StatusCode) + + var result struct { + Body struct { + Executions []struct { + ExecutionID string `json:"execution_id"` + WorkflowName string `json:"workflow_name"` + } `json:"executions"` + } `json:"body"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + + execIDs := make([]string, len(result.Body.Executions)) + for i, e := range result.Body.Executions { + execIDs[i] = e.ExecutionID + } + + // Started execution should be in the active list. + assert.Contains(t, execIDs, id, "execution should be in active list after start") +} + +// TestAPI_RunWorkflow_WithInputs_Integration validates that workflow inputs are accepted and propagated. +func TestAPI_RunWorkflow_WithInputs_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + inputs := map[string]any{ + "test_input": "test_value", + "number": 42, + } + + executionID := postRunWorkflow(t, ts, "api-simple-success", inputs) + require.NotEmpty(t, executionID, "execution should be created with inputs") + + // Verify execution is tracked. + req, err := http.NewRequestWithContext(context.Background(), http.MethodGet, + ts.URL+"/api/executions/"+executionID, nil) + require.NoError(t, err) + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + assert.Equal(t, http.StatusOK, resp.StatusCode) + + var result struct { + Body struct { + ExecutionID string `json:"execution_id"` + WorkflowName string `json:"workflow_name"` + } `json:"body"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + assert.Equal(t, executionID, result.Body.ExecutionID) + assert.Equal(t, "api-simple-success", result.Body.WorkflowName) +} + +// TestAPI_RunWorkflow_InvalidInputs_Integration validates that POST with missing required inputs returns 422. +func TestAPI_RunWorkflow_InvalidInputs_Integration(t *testing.T) { + ts, _, _ := newTestServer(t, apiFixtureDir(t)) + + // POST with invalid body structure (missing inputs object). + invalidBody := map[string]any{"invalid": "structure"} + bodyBytes, err := json.Marshal(invalidBody) + require.NoError(t, err) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, + ts.URL+"/api/workflows/api-simple-success/run", bytes.NewReader(bodyBytes)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + // Huma validates the schema and returns 422 for malformed input. + assert.Equal(t, http.StatusUnprocessableEntity, resp.StatusCode, + "should reject invalid input structure") +} diff --git a/tests/integration/api/server_integration_test.go b/tests/integration/api/server_integration_test.go new file mode 100644 index 00000000..10546cc9 --- /dev/null +++ b/tests/integration/api/server_integration_test.go @@ -0,0 +1,266 @@ +//go:build integration + +package api_test + +import ( + "bufio" + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "path/filepath" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/awf-project/cli/internal/application" + "github.com/awf-project/cli/internal/infrastructure/executor" + infraExpr "github.com/awf-project/cli/internal/infrastructure/expression" + "github.com/awf-project/cli/internal/infrastructure/repository" + "github.com/awf-project/cli/internal/infrastructure/store" + "github.com/awf-project/cli/internal/interfaces/api" + "github.com/awf-project/cli/pkg/interpolation" + "github.com/awf-project/cli/tests/integration/testhelpers" +) + +// sseEvent holds a single parsed SSE event from the stream. +type sseEvent struct { + eventType string + data string +} + +// newTestServer constructs an httptest.Server wrapping the real api.Server wired with +// in-memory infrastructure pointing at fixtureDir for workflow YAML files. +// Returns the server, bridge, and MockLogger so tests can assert on captured log messages. +func newTestServer(t *testing.T, fixtureDir string) (*httptest.Server, *api.Bridge, *testhelpers.MockLogger) { + t.Helper() + + statesDir := t.TempDir() + logger := &testhelpers.MockLogger{} + + repo := repository.NewYAMLRepository(fixtureDir) + stateStore := store.NewJSONStore(statesDir) + shellExec := executor.NewShellExecutor() + resolver := interpolation.NewTemplateResolver() + evaluator := infraExpr.NewExprEvaluator() + validator := infraExpr.NewExprValidator() + + wfSvc := application.NewWorkflowService(repo, stateStore, shellExec, logger, validator) + parallelExec := application.NewParallelExecutor(logger) + execSvc := application.NewExecutionServiceWithEvaluator( + wfSvc, shellExec, parallelExec, stateStore, logger, resolver, nil, evaluator, + ) + + bridge := api.NewBridge(wfSvc, execSvc, nil) + srv := api.NewServer(bridge, "127.0.0.1:0") + ts := httptest.NewServer(srv.Handler()) + t.Cleanup(ts.Close) + + return ts, bridge, logger +} + +// readSSEEvents reads lines from an SSE response body until a terminal event +// ("workflow.completed" or "workflow.failed") is received or ctx is cancelled. +func readSSEEvents(ctx context.Context, t *testing.T, body interface{ Read([]byte) (int, error) }) []sseEvent { + t.Helper() + + var events []sseEvent + scanner := bufio.NewScanner(body) + + var currentType string + var currentData strings.Builder + + for scanner.Scan() { + select { + case <-ctx.Done(): + return events + default: + } + + line := scanner.Text() + switch { + case strings.HasPrefix(line, "event: "): + currentType = strings.TrimPrefix(line, "event: ") + case strings.HasPrefix(line, "data: "): + currentData.WriteString(strings.TrimPrefix(line, "data: ")) + case line == "" && currentType != "": + events = append(events, sseEvent{eventType: currentType, data: currentData.String()}) + terminal := currentType == "workflow.completed" || currentType == "workflow.failed" + currentType = "" + currentData.Reset() + if terminal { + return events + } + } + } + + return events +} + +// fixtureDir returns the path to tests/fixtures/api relative to the repo root. +func apiFixtureDir(t *testing.T) string { + t.Helper() + return filepath.Join(testhelpers.GetRepoRoot(t), "tests", "fixtures", "api") +} + +// postRunWorkflow POSTs to /api/workflows/{name}/run and returns the execution_id. +func postRunWorkflow(t *testing.T, ts *httptest.Server, name string, inputs map[string]any) string { + t.Helper() + + if inputs == nil { + inputs = map[string]any{} + } + bodyBytes, err := json.Marshal(map[string]any{"inputs": inputs}) + require.NoError(t, err) + + req, err := http.NewRequestWithContext(context.Background(), http.MethodPost, + ts.URL+"/api/workflows/"+name+"/run", bytes.NewReader(bodyBytes)) + require.NoError(t, err) + req.Header.Set("Content-Type", "application/json") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + defer resp.Body.Close() + + require.Equal(t, http.StatusAccepted, resp.StatusCode) + + var result struct { + Body struct { + ExecutionID string `json:"execution_id"` + Status string `json:"status"` + } `json:"body"` + } + require.NoError(t, json.NewDecoder(resp.Body).Decode(&result)) + require.NotEmpty(t, result.Body.ExecutionID, "execution_id must be present in run response") + + return result.Body.ExecutionID +} + +// openSSEStream opens GET /api/executions/{id}/events and returns the response. +func openSSEStream(ctx context.Context, t *testing.T, ts *httptest.Server, executionID string) *http.Response { + t.Helper() + + req, err := http.NewRequestWithContext(ctx, http.MethodGet, + ts.URL+"/api/executions/"+executionID+"/events", nil) + require.NoError(t, err) + req.Header.Set("Accept", "text/event-stream") + + resp, err := http.DefaultClient.Do(req) + require.NoError(t, err) + require.Equal(t, http.StatusOK, resp.StatusCode) + + return resp +} + +// TestAPI_RunWorkflow_FullSSESequence_Integration starts a real api.Server, POSTs a +// simple two-step workflow, subscribes to SSE, and asserts the stream terminates with +// a "workflow.completed" event (US1 scenario 2). +func TestAPI_RunWorkflow_FullSSESequence_Integration(t *testing.T) { + ts, _, mockLogger := newTestServer(t, apiFixtureDir(t)) + + executionID := postRunWorkflow(t, ts, "api-simple-success", nil) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + sseResp := openSSEStream(ctx, t, ts, executionID) + t.Cleanup(func() { sseResp.Body.Close() }) + + events := readSSEEvents(ctx, t, sseResp.Body) + + require.NotEmpty(t, events, "SSE stream should have emitted at least one event") + last := events[len(events)-1] + assert.Equal(t, "workflow.completed", last.eventType, "last SSE event should be workflow.completed") + + var payload struct { + Status string `json:"status"` + } + require.NoError(t, json.Unmarshal([]byte(last.data), &payload)) + assert.Equal(t, "completed", payload.Status) + assert.Empty(t, mockLogger.Errors(), "successful workflow execution should not produce error logs") +} + +// TestAPI_CancelWorkflow_PropagatesToExecutionService_Integration POSTs a slow workflow, +// subscribes to SSE, then sends DELETE and asserts the stream reports cancellation (US3 scenario 2). +func TestAPI_CancelWorkflow_PropagatesToExecutionService_Integration(t *testing.T) { + ts, _, mockLogger := newTestServer(t, apiFixtureDir(t)) + + executionID := postRunWorkflow(t, ts, "api-slow", nil) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + // Subscribe to SSE before cancelling so we can observe the terminal event. + sseResp := openSSEStream(ctx, t, ts, executionID) + t.Cleanup(func() { sseResp.Body.Close() }) + + // Cancel the execution via DELETE. + cancelReq, err := http.NewRequestWithContext(context.Background(), http.MethodDelete, + ts.URL+"/api/executions/"+executionID, nil) + require.NoError(t, err) + cancelResp, err := http.DefaultClient.Do(cancelReq) + require.NoError(t, err) + cancelResp.Body.Close() + assert.Equal(t, http.StatusNoContent, cancelResp.StatusCode) + + // SSE must emit a terminal event reflecting the cancellation. + events := readSSEEvents(ctx, t, sseResp.Body) + require.NotEmpty(t, events, "SSE stream should have emitted at least one event after cancel") + + last := events[len(events)-1] + assert.Equal(t, "workflow.failed", last.eventType, "cancelled execution should emit workflow.failed terminal event") + + var payload struct { + Status string `json:"status"` + } + require.NoError(t, json.Unmarshal([]byte(last.data), &payload)) + assert.Equal(t, "cancelled", payload.Status, "terminal SSE event status should be 'cancelled'") + assert.NotEmpty(t, mockLogger.Infos(), "cancelled workflow execution should produce info logs") +} + +// TestAPI_FailedWorkflow_EmitsStepFailedThenWorkflowFailed_Integration POSTs a fixture +// with an intentionally failing step, subscribes to SSE, and asserts the stream contains +// "step.failed" followed by "workflow.failed" (US1 scenario 3). +func TestAPI_FailedWorkflow_EmitsStepFailedThenWorkflowFailed_Integration(t *testing.T) { + ts, _, mockLogger := newTestServer(t, apiFixtureDir(t)) + + executionID := postRunWorkflow(t, ts, "api-failing", nil) + + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second) + defer cancel() + + sseResp := openSSEStream(ctx, t, ts, executionID) + t.Cleanup(func() { sseResp.Body.Close() }) + + events := readSSEEvents(ctx, t, sseResp.Body) + + require.NotEmpty(t, events, "SSE stream should have emitted at least one event") + + eventTypes := make([]string, 0, len(events)) + for _, e := range events { + eventTypes = append(eventTypes, e.eventType) + } + + stepFailedIdx := -1 + workflowFailedIdx := -1 + for i, et := range eventTypes { + if et == "step.failed" && stepFailedIdx == -1 { + stepFailedIdx = i + } + if et == "workflow.failed" { + workflowFailedIdx = i + } + } + + assert.GreaterOrEqual(t, stepFailedIdx, 0, "SSE stream should contain a step.failed event") + assert.GreaterOrEqual(t, workflowFailedIdx, 0, "SSE stream should contain a workflow.failed event") + if stepFailedIdx >= 0 && workflowFailedIdx >= 0 { + assert.Greater(t, workflowFailedIdx, stepFailedIdx, + "workflow.failed should appear after step.failed") + } + assert.NotEmpty(t, mockLogger.Infos(), "failed workflow execution should produce info logs") +} diff --git a/tests/integration/testhelpers/helpers.go b/tests/integration/testhelpers/helpers.go index 4626e6de..50c14451 100644 --- a/tests/integration/testhelpers/helpers.go +++ b/tests/integration/testhelpers/helpers.go @@ -51,6 +51,24 @@ func (m *MockLogger) WithContext(ctx map[string]any) ports.Logger { return m } +// Errors returns all error messages captured by the logger. +func (m *MockLogger) Errors() []string { + m.mu.Lock() + defer m.mu.Unlock() + result := make([]string, len(m.errors)) + copy(result, m.errors) + return result +} + +// Infos returns all info messages captured by the logger. +func (m *MockLogger) Infos() []string { + m.mu.Lock() + defer m.mu.Unlock() + result := make([]string, len(m.info)) + copy(result, m.info) + return result +} + // SkipInCI skips the test if running in a CI environment. func SkipInCI(t *testing.T) { t.Helper()