From 0d2720d46149682dd1dbc9eb7374720f40e37c84 Mon Sep 17 00:00:00 2001
From: Cursor Agent <cursoragent@cursor.com>
Date: Mon, 15 Jun 2026 16:04:35 +0000
Subject: [PATCH] docs: add developer guides for recent subsystems

Add in-repo docs/ with guides for configuration, runners, executors,
workflows, task pagination, and HA cluster operations. Update
CONTRIBUTING and README to link the new index. Refresh api-docs.yml
for keyset pagination on GET .../tasks/last.

Co-authored-by: Denis Gukov <fiftin@outlook.com>
---
 CONTRIBUTING.md                 |   4 +
 README.md                       |   1 +
 api-docs.yml                    |  17 +++-
 docs/README.md                  |  14 ++++
 docs/cluster-dashboard.md       | 109 ++++++++++++++++++++++++
 docs/configuration.md           | 113 +++++++++++++++++++++++++
 docs/runner-executors.md        | 127 ++++++++++++++++++++++++++++
 docs/runners-and-tags.md        | 105 +++++++++++++++++++++++
 docs/tasks-api-pagination.md    |  88 ++++++++++++++++++++
 docs/workflows.md               | 142 ++++++++++++++++++++++++++++++++
 web/public/swagger/api-docs.yml |  17 +++-
 11 files changed, 733 insertions(+), 4 deletions(-)
 create mode 100644 docs/README.md
 create mode 100644 docs/cluster-dashboard.md
 create mode 100644 docs/configuration.md
 create mode 100644 docs/runner-executors.md
 create mode 100644 docs/runners-and-tags.md
 create mode 100644 docs/tasks-api-pagination.md
 create mode 100644 docs/workflows.md

diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index dda817e34..ded408a48 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -1,3 +1,7 @@
+## Developer documentation
+
+In-repo guides for contributors and operators live in [`docs/`](docs/README.md): configuration, runners, executors, workflows, API pagination, and HA cluster operations.
+
 ## Pull Requests
 
 When creating a pull-request you should:
diff --git a/README.md b/README.md
index 979063db9..4052bb083 100644
--- a/README.md
+++ b/README.md
@@ -92,6 +92,7 @@ For more installation options, visit our [Installation page](https://semaphoreui
 
 * [User Guide](https://docs.semaphoreui.com)
 * [API Reference](https://semaphoreui.com/api-docs)
+* [Developer docs](docs/README.md) — configuration, runners, workflows, and operational guides (in-repo)
 * [Postman Collection](https://www.postman.com/semaphoreui)
 
 ## Awesome Semaphore
diff --git a/api-docs.yml b/api-docs.yml
index 61ccb4e69..b80cc738c 100644
--- a/api-docs.yml
+++ b/api-docs.yml
@@ -3449,10 +3449,23 @@ paths:
     get:
       tags:
         - task
-      summary: Get last 200 Tasks related to current project
+      summary: Get recent tasks for the current project (keyset pagination)
+      parameters:
+        - name: count
+          in: query
+          type: integer
+          description: Page size (default and max 200). Legacy alias `limit` is also accepted.
+        - name: before
+          in: query
+          type: integer
+          description: Cursor — return tasks with id strictly less than this value (older page).
       responses:
         200:
-          description: Array of tasks in chronological order
+          description: Array of tasks ordered by id descending (newest first). Check X-Has-Next header for more pages.
+          headers:
+            X-Has-Next:
+              type: string
+              description: "true if older tasks exist beyond this page; false otherwise"
           schema:
             type: array
             items:
diff --git a/docs/README.md b/docs/README.md
new file mode 100644
index 000000000..d9e105414
--- /dev/null
+++ b/docs/README.md
@@ -0,0 +1,14 @@
+# Developer documentation
+
+Internal guides for contributors and operators. User-facing product docs live at [docs.semaphoreui.com](https://docs.semaphoreui.com).
+
+| Guide | Audience | Covers |
+|-------|----------|--------|
+| [Configuration](configuration.md) | Developers, operators | `config.json` / `config.yaml`, env vars, JSON Schema |
+| [Runners and tags](runners-and-tags.md) | Developers, operators | Remote runners, tag routing, webhooks, fleet timeouts |
+| [Runner executors](runner-executors.md) | Operators | Local, Docker, and Kubernetes task execution on runners |
+| [Workflows](workflows.md) | Developers (Pro) | DAG workflows, approvals, artifacts, API overview |
+| [Tasks API pagination](tasks-api-pagination.md) | Developers, API consumers | Keyset pagination for project task history |
+| [Cluster dashboard](cluster-dashboard.md) | Operators (HA) | Admin cluster API, task state inspection, recovery |
+
+Implementation plans for upcoming work are under [`AGENTS/plans/`](../AGENTS/plans/).
diff --git a/docs/cluster-dashboard.md b/docs/cluster-dashboard.md
new file mode 100644
index 000000000..c9b0549f7
--- /dev/null
+++ b/docs/cluster-dashboard.md
@@ -0,0 +1,109 @@
+# Cluster dashboard (HA)
+
+The cluster dashboard is an **admin-only** UI and API for inspecting high-availability (HA) deployments and the shared task state backend. It requires the enterprise HA feature (`features.high_availability`).
+
+## When it applies
+
+| `ha.enabled` | Dashboard |
+|--------------|-----------|
+| `false` | UI shows HA disabled; `GET /api/cluster` returns `{"ha_enabled": false}` only |
+| `true` | Full node list, Redis stats, task snapshot, maintenance clear |
+
+Configure HA in the server config:
+
+```yaml
+ha:
+  enabled: true
+  node_id: semaphore-1   # optional; auto-generated if empty
+  redis:
+    addr: redis.example.com:6379
+    pass: "<secret>"
+```
+
+`util.HAEnabled()` is true when `ha` is set and `ha.enabled` is true.
+
+## Admin API
+
+All routes require an authenticated **admin** session (same as other `/api/...` admin routes).
+
+### `GET /api/cluster`
+
+Returns cluster status:
+
+- `ha_enabled` (boolean) — always present
+- `node_id` (string) — this instance, when HA config exists
+- `nodes` (array) — peer nodes, heartbeats, versions (when HA overlay is active)
+- `redis` (object) — connection, memory, key groups (when inspector available)
+
+When HA is enabled but the cluster inspector is unavailable, the handler responds with **503** and a short error message. When HA is disabled, the response is **200** with only `ha_enabled: false` (no error).
+
+### `GET /api/cluster/tasks`
+
+Returns a **task state snapshot** from the task pool store:
+
+| Field | Meaning |
+|-------|---------|
+| `queue` | Tasks waiting to start |
+| `running` | Tasks currently executing |
+| `active_by_project` | Per-project active task records |
+| `aliases` | Alias string → task ID |
+| `claims` | Task IDs claimed for distributed coordination |
+
+Works in non-HA mode too (in-memory store); fields may be empty arrays/objects if the store does not implement introspection.
+
+### `DELETE /api/cluster/tasks`
+
+Maintenance: clear selected record groups from the backend (Redis in HA). Body:
+
+```json
+{
+  "scope": {
+    "queue": true,
+    "running": false,
+    "active": false,
+    "aliases": false,
+    "claims": false,
+    "runtime_fields": false
+  }
+}
+```
+
+At least one scope flag must be `true`. Use only when recovering from a stuck cluster state (orphaned queue entries, stale claims). Clearing **running** or **active** while real tasks execute can cause inconsistent behavior.
+
+The UI exposes the same scope checkboxes under **Clear tasks from Redis** (enabled only when `ha_enabled` is true).
+
+## UI entry
+
+**Admin → Cluster dashboard** (`web/src/views/Cluster.vue`):
+
+- Node table and Redis memory chart when HA is active
+- Live task tables from `/api/cluster/tasks`
+- Upgrade prompt when `features.high_availability` is false
+
+## Architecture sketch
+
+```mermaid
+flowchart LR
+  subgraph nodes [Semaphore nodes]
+    N1[Node A]
+    N2[Node B]
+  end
+  Redis[(Redis task state)]
+  N1 --> Redis
+  N2 --> Redis
+  Admin[Admin UI] --> API["/api/cluster*"]
+  API --> N1
+```
+
+`TaskStateStore` implementations may expose `TaskStateInspector` for snapshots and `ClearTasks`. See `services/tasks/task_state_store.go`.
+
+## OpenAPI
+
+Cluster endpoints are documented in `api-docs.yml` under the `cluster` tag (may be commented until Dredd hooks cover them). Regenerate the public Swagger bundle when enabling them in CI.
+
+## Related code
+
+- `api/cluster.go` — handlers
+- `api/router.go` — route registration
+- `pro_interfaces` — `ClusterInspector` for nodes/Redis
+- `services/tasks/task_state_store.go` — snapshot and clear types
diff --git a/docs/configuration.md b/docs/configuration.md
new file mode 100644
index 000000000..8bc435d44
--- /dev/null
+++ b/docs/configuration.md
@@ -0,0 +1,113 @@
+# Configuration
+
+Semaphore reads settings from a config file, then applies environment-variable overrides and built-in defaults. The canonical field list is maintained in [`config.schema.yaml`](../config.schema.yaml) (JSON Schema draft 2020-12), generated from `util.ConfigType` in Go.
+
+## File format and discovery
+
+Supported formats: **JSON** (`.json`) and **YAML** (`.yaml`, `.yml`). Keys use `snake_case` and match the `json` struct tags in `util/config.go`.
+
+### Search order
+
+When `--config` is not passed and `SEMAPHORE_CONFIG_PATH` is unset, the server looks for the first existing file among:
+
+1. `./config.json`, `./config.yaml`, `./config.yml` (current working directory)
+2. `/usr/local/etc/semaphore/config.{json,yaml,yml}`
+3. `/etc/semaphore/config.{json,yaml,yml}`
+
+Explicit path:
+
+```bash
+./bin/semaphore server --config /etc/semaphore/config.yaml
+# or
+export SEMAPHORE_CONFIG_PATH=/etc/semaphore/config.yaml
+```
+
+Interactive setup (`semaphore setup`) still writes `config.json` by default; YAML is fully supported for hand-written or GitOps-managed installs.
+
+### Load order
+
+`util.ConfigInit` applies settings in this order (later steps win):
+
+1. Config file (if present and not disabled with `--no-config`)
+2. Environment variables (`SEMAPHORE_*`, see `env:` tags on struct fields)
+3. Defaults from struct `default:` tags
+
+Sensitive values can be loaded from companion files (for example `runner.token_file`, `subscription.key_file`) after the main file is parsed.
+
+## Schema validation
+
+Use `config.schema.yaml` in your editor (YAML language server with JSON Schema) or in CI to validate configs before deploy. The schema `$id` is `https://semaphoreui.com/schemas/config.schema.json`.
+
+To regenerate the schema after changing `util.ConfigType`, follow [`.claude/skills/semaphore-config-schema/SKILL.md`](../.claude/skills/semaphore-config-schema/SKILL.md).
+
+## Common options (quick reference)
+
+| Area | Keys | Notes |
+|------|------|-------|
+| Database | `dialect`, `mysql` / `postgres` / `sqlite` | BoltDB was removed in 2.19; use `sqlite` for embedded DB |
+| HTTP | `port`, `interface`, `web_host` | `web_host` is the public URL used in links and emails |
+| TLS | `tls.enabled`, `tls.cert_file`, `tls.key_file` | Optional HTTP→HTTPS redirect via `tls.http_redirect_addr` **or** `tls.http_redirect_port` (mutually exclusive) |
+| Auth | `mfa.totp`, `mfa.email` | Former top-level `auth` was renamed to `mfa` |
+| Runners | `use_remote_runner`, `runner_registration_token`, `runner`, `runners` | `runner` configures a runner process; `runners` configures server-side fleet timeouts |
+| HA | `ha.enabled`, `ha.node_id`, `ha.redis` | Requires enterprise overlay; see [Cluster dashboard](cluster-dashboard.md) |
+| Concurrency | `max_parallel_tasks` | Server-wide cap; per-runner limit is `runner.max_parallel_tasks` |
+
+Environment variable names mirror keys: `port` → `SEMAPHORE_PORT`, nested fields use underscores (`SEMAPHORE_TLS_ENABLED`, `SEMAPHORE_HA_REDIS_ADDR`). Fields tagged `sensitive` are cleared from the process environment after load so secrets do not leak to child processes.
+
+## Examples
+
+### Minimal development (SQLite)
+
+```yaml
+dialect: sqlite
+sqlite:
+  host: /tmp/semaphore.db
+port: ":3000"
+tmp_path: /tmp/semaphore
+cookie_hash: <base64-32-bytes>
+cookie_encryption: <base64-32-bytes>
+access_key_encryption: <base64-32-bytes>
+```
+
+Generate secrets with `semaphore setup` or `openssl rand -base64 32`.
+
+### TLS with HTTP redirect
+
+```yaml
+tls:
+  enabled: true
+  cert_file: /etc/semaphore/tls.crt
+  key_file: /etc/semaphore/tls.key
+  http_redirect_port: 8080
+```
+
+A second listener on port `8080` redirects clients to HTTPS. Use `http_redirect_addr` instead when you need a non-default bind address (for example `:8080` or `127.0.0.1:8080`).
+
+### Remote runner (server side)
+
+```yaml
+use_remote_runner: true
+runner_registration_token: "<admin-generated-token>"
+runners:
+  offline_timeout_sec: 120
+  task_fail_timeout_sec: 420
+  reconcile_interval_sec: 30
+```
+
+Runners register with that token; task routing uses project/global runners and optional tags (see [Runners and tags](runners-and-tags.md)).
+
+## Troubleshooting
+
+| Symptom | Check |
+|---------|--------|
+| Server exits on start | Run with explicit `--config`; validate against `config.schema.yaml` |
+| Wrong database | `dialect` and the matching `mysql`/`postgres`/`sqlite` block |
+| Broken login cookies after config change | `cookie_hash` / `cookie_encryption` must stay stable or all sessions invalidate |
+| Runner never picks up jobs | `use_remote_runner`, runner `active`, tag match on template/inventory |
+| HA features missing in UI | `ha.enabled` and enterprise subscription; cluster API returns `ha_enabled: false` when disabled |
+
+## Related code
+
+- `util/config.go`, `util/config_auth.go` — struct definitions and loading
+- `util/config_test.go` — YAML/JSON load tests
+- `cli/cmd/root.go` — `--config`, `--no-config` flags
diff --git a/docs/runner-executors.md b/docs/runner-executors.md
new file mode 100644
index 000000000..f9fb0b786
--- /dev/null
+++ b/docs/runner-executors.md
@@ -0,0 +1,127 @@
+# Runner executors
+
+A **runner** process can execute each assigned task using one of three executor strategies. The strategy is selected in the runner config block (`runner.executor.type`).
+
+| Type | Config value | Availability | Behavior |
+|------|--------------|--------------|----------|
+| Local | `local` (default) | Open source | Runs the task as a subprocess on the runner host |
+| Docker | `docker` | Pro build | Runs each task in an ephemeral container |
+| Kubernetes | `k8s` | Pro build | Runs each task in an ephemeral Pod |
+
+The open-source build ships stubs for Docker and Kubernetes executors. If you set `type: docker` or `type: k8s` without the Pro module, the runner logs an initialization error and refuses jobs until restarted with a valid config.
+
+## Configuration
+
+Executor settings live under `runner.executor` in the runner's config file (or the `runner` section of a shared config used by `semaphore runner`):
+
+```yaml
+runner:
+  enabled: true
+  token: "<runner-auth-token>"
+  executor:
+    type: local   # local | docker | k8s
+```
+
+Environment variables follow the `SEMAPHORE_RUNNER_*` prefix. Nested executor fields use `SEMAPHORE_RUNNER_DOCKER_*` or `SEMAPHORE_RUNNER_K8S_*`.
+
+## Local executor
+
+The default. The runner clones the repository, prepares inventory and secrets, and invokes Ansible/Terraform/etc. directly on the runner host. No extra infrastructure is required.
+
+Use when the runner VM or container already has the required toolchains installed (or when using the `semaphoreui/job` image as the runner base).
+
+## Docker executor
+
+Each task runs in a short-lived container against a local or remote Docker daemon. Field shapes mirror the GitLab Docker executor for familiarity.
+
+```yaml
+runner:
+  executor:
+    type: docker
+    docker:
+      host: unix:///var/run/docker.sock
+      image: semaphoreui/job:latest
+      helper_image: semaphoreui/job:latest
+      network: bridge
+      pull_policy: if-not-present
+      cpu_limit: 2
+      memory_limit: 4g
+      poll_interval_seconds: 2
+      cleanup_grace_seconds: 30
+      privileged: false
+```
+
+| Field | Default | Purpose |
+|-------|---------|---------|
+| `host` | platform default / `DOCKER_HOST` | Daemon URL (`unix://`, `tcp://`, `npipe://`) |
+| `image` | `semaphoreui/job:latest` | Build container image |
+| `helper_image` | `semaphoreui/job:latest` | Git-clone helper container |
+| `network` | `bridge` | Docker network for the build container |
+| `pull_policy` | `if-not-present` | `always`, `if-not-present`, or `never` |
+| `cpu_limit` | none | CPU cap (`--cpus`) when > 0 |
+| `memory_limit` | none | Memory cap (e.g. `2g`) |
+| `privileged` | `false` | Run with `--privileged` (dangerous) |
+
+## Kubernetes executor
+
+Each task runs in an ephemeral Pod. Field shapes mirror the GitLab Kubernetes executor.
+
+```yaml
+runner:
+  executor:
+    type: k8s
+    k8s:
+      kubeconfig: /path/to/kubeconfig   # omit for in-cluster config
+      namespace: semaphore
+      image: alpine:latest
+      helper_image: alpine/git:latest
+      service_account: default
+      pull_secrets: regcred
+      poll_interval_seconds: 3
+      cleanup_grace_seconds: 30
+```
+
+| Field | Default | Purpose |
+|-------|---------|---------|
+| `kubeconfig` | in-cluster | Path to kubeconfig file |
+| `namespace` | `semaphore` | Namespace for task Pods |
+| `image` | `alpine:latest` | Default build container image |
+| `helper_image` | `alpine/git:latest` | Git-clone init container image |
+| `service_account` | `default` | Pod service account |
+| `pull_secrets` | none | Comma-separated `imagePullSecrets` |
+| `poll_interval_seconds` | 3 | Pod status poll interval |
+| `cleanup_grace_seconds` | 30 | Pod deletion grace period |
+
+### Kubernetes prerequisites
+
+1. A namespace (create if it does not exist).
+2. RBAC allowing the runner service account to create/delete Pods in that namespace.
+3. Network access from Pods to the Semaphore server (for log streaming and status).
+4. Container images that include the toolchains your templates need (or use `semaphoreui/job`).
+
+When `kubeconfig` is empty, the executor uses in-cluster configuration (ServiceAccount token and CA mounted by Kubernetes).
+
+## How executors plug in
+
+At runner startup, `services/runners/executor_factory.go` reads `runner.executor.type` and constructs an `ExecutorProvider`. The job pool uses this provider for every assigned task; switching executor type does not require changes to task routing or the Semaphore server.
+
+```
+JobPool → newExecutorProvider(config) → ExecutorProvider
+       → newExecutor(job, accessKeys, provider) → Executor (runs task)
+```
+
+## Troubleshooting
+
+| Symptom | Check |
+|---------|--------|
+| Runner starts but rejects all jobs | Logs for `failed to initialise executor provider`; verify `type` and Pro build for docker/k8s |
+| Docker: cannot connect to daemon | `host` / `DOCKER_HOST`, socket permissions, TLS certs in `cert_path` |
+| K8s: Pod stuck pending | Namespace, RBAC, image pull secrets, resource quotas |
+| K8s: in-cluster auth fails | ServiceAccount, automountServiceAccountToken, cluster DNS |
+
+## Related code
+
+- `util/config.go` — `ExecutorConfig`, `RunnerK8sConfig`, `RunnerDockerConfig`
+- `services/runners/executor_factory.go` — provider selection
+- `pro/services/tasks/k8s/` — K8s provider (Pro)
+- `pro/services/tasks/docker/` — Docker provider (Pro)
diff --git a/docs/runners-and-tags.md b/docs/runners-and-tags.md
new file mode 100644
index 000000000..bb5c43ce9
--- /dev/null
+++ b/docs/runners-and-tags.md
@@ -0,0 +1,105 @@
+# Runners and tags
+
+Semaphore can execute tasks on the server process or on **remote runners** (separate `semaphore runner` processes). Tags restrict which runner may execute a task.
+
+## Modes
+
+| Mode | Config | Behavior |
+|------|--------|----------|
+| Local | `use_remote_runner: false` (default) | Task pool runs jobs on the Semaphore server |
+| Remote | `use_remote_runner: true` | Tasks are assigned to registered runners via `RemoteJob` |
+
+Runners are **project-scoped** (bound to one project) or **global** (any project). Registration uses `runner_registration_token` on the server and `semaphore runner register` on the runner host.
+
+## Registration modes
+
+| Mode | How to create | Behavior |
+|------|---------------|----------|
+| Registered | `semaphore runner register` with the server token | Runner receives an auth token and polls for jobs |
+| Unregistered | Create in the UI with **Registered** unchecked, or API with `"registered": false` | Runner appears in the fleet but cannot pick up tasks until registered |
+
+A runner is *registered* when it has an auth token stored in the database. Unregistered runners are useful for pre-provisioning capacity before handing out per-runner tokens. See [deployment/compose/README.md](../deployment/compose/README.md) for Docker Compose examples.
+
+## Tags
+
+### Data model
+
+- Each runner has zero or more string **tags** (`db.Runner.Tags`).
+- Templates and inventories may set optional `runner_tag`. When a task runs, the effective tag is **inventory overrides template** if the inventory defines one.
+
+### Routing rules
+
+When `use_remote_runner` is true and a task needs a runner (`TaskPool` / `RemoteJob`):
+
+1. If `runner_tag` is set → select **active** runners whose tags include that value (`RunnerFilterTagCompleteMatch`).
+2. If `runner_tag` is empty → select runners marked **default** (`RunnerFilterIsDefault`).
+3. Project runners are tried before global runners; order within each group is shuffled (`crypto/rand`) for load spreading.
+4. A runner is preferred if it sent a heartbeat within the configured offline window or has a **webhook** configured (webhook-only runners are treated as always reachable).
+5. Among eligible runners, the first with `running_tasks < max_parallel_tasks` wins.
+
+If no runner matches, the task stays in **waiting** state with error `no runners available`.
+
+### UI and API
+
+- **Admin → Runners**: edit tags on global runners.
+- **Project → Runners**: project-scoped runners and tags (requires `project_runners` feature).
+- Template form: **Runner tag** dropdown populated from `GET /api/project/{id}/runner_tags`.
+- Inventory form: optional **Runner tag** (overrides template).
+- Tag catalog: `GET /api/runner_tags` (global), `GET /api/project/{id}/runner_tags` (project).
+
+CLI registration:
+
+```bash
+semaphore runner register --tags linux,amd64
+```
+
+## Server-side fleet timeouts
+
+The `runners` config block (not to be confused with `runner`, which configures a runner process) controls how the server treats runner heartbeats:
+
+| Key | Default | Effect |
+|-----|---------|--------|
+| `offline_timeout_sec` | 120 | Runner marked offline; no new tasks; **starting** tasks reassigned |
+| `task_fail_timeout_sec` | 420 | **Running** tasks failed if runner stays offline past this |
+| `reconcile_interval_sec` | 30 | How often dispatched tasks are checked against runner liveness |
+
+`task_fail_timeout_sec` must be ≥ `offline_timeout_sec` (values below are clamped). Set `offline_timeout_sec` comfortably above the runner poll interval so healthy-but-slow runners are not marked offline prematurely.
+
+## Webhooks
+
+Runners may define a `webhook` URL. Semaphore POSTs JSON when a task is assigned:
+
+```json
+{
+  "action": "start",
+  "project_id": 1,
+  "task_id": 42,
+  "template_id": 3,
+  "runner_id": 7
+}
+```
+
+Use webhooks to spawn **one-off** runners (`runner.one_off` in config) in autoscaling environments.
+
+## Executor types
+
+Runners can execute tasks locally on the host, in Docker containers, or in Kubernetes Pods. See [Runner executors](runner-executors.md).
+
+## Operational checklist
+
+1. Enable `use_remote_runner` and set `runner_registration_token`.
+2. Register runners; confirm **Active** and recent **Last seen**.
+3. Set template or inventory `runner_tag` when you need dedicated capacity.
+4. Mark exactly one default runner per scope if you rely on untagged templates.
+5. For stuck waiting tasks, verify tag spelling and that at least one active runner carries the tag.
+6. Tune `runners.offline_timeout_sec` / `task_fail_timeout_sec` for your network and job durations.
+
+Manual test case: [test/test-cases/TC-028-runner-tags.md](../test/test-cases/TC-028-runner-tags.md).
+
+## Related code
+
+- `services/tasks/RemoteJob.go` — runner selection
+- `services/tasks/TaskPool.go` — when remote jobs are created
+- `services/tasks/runner_reconciler.go` — offline/fail timeouts
+- `db/Runner.go` — tag filter modes
+- `api/runners.go`, `pro/api/projects/runners.go` — HTTP handlers
diff --git a/docs/tasks-api-pagination.md b/docs/tasks-api-pagination.md
new file mode 100644
index 000000000..42f1c728a
--- /dev/null
+++ b/docs/tasks-api-pagination.md
@@ -0,0 +1,88 @@
+# Tasks API pagination
+
+The project History page and the `GET /api/project/{project_id}/tasks/last` endpoint use **keyset (cursor) pagination** to page through task history without expensive `COUNT(*)` queries or deep `OFFSET` scans.
+
+## Why keyset
+
+Projects can accumulate millions of tasks. Offset pagination (`LIMIT n OFFSET m`) scans and discards every skipped row, so deep pages get slower. Counting all rows on every request is equally expensive.
+
+Keyset pagination walks backward through the primary-key index using a cursor (`before=<task_id>`), so each page costs the same regardless of depth. There is intentionally **no total page count**.
+
+## Request
+
+```
+GET /api/project/{project_id}/tasks/last?count=20
+GET /api/project/{project_id}/tasks/last?count=20&before=842
+```
+
+| Parameter | Meaning |
+|-----------|---------|
+| `count` | Page size (default and max: 200) |
+| `limit` | Legacy alias for `count` (still accepted) |
+| `before` | Cursor: return tasks with `id` strictly less than this value |
+
+Tasks are ordered by `id DESC` (newest first). The first page omits `before`; each subsequent page passes the smallest `id` from the previous page as `before`.
+
+### Template-scoped variant
+
+When the route is mounted under a template context, the same parameters apply but results are scoped to that template's tasks.
+
+## Response
+
+- **Body**: JSON array of `Task` objects (plain array, unchanged from legacy clients).
+- **Header**: `X-Has-Next: true` or `X-Has-Next: false` — whether older tasks exist beyond this page.
+
+The server fetches `count + 1` rows internally. If the extra row exists, it is trimmed and `X-Has-Next` is `true`.
+
+Example:
+
+```http
+GET /api/project/1/tasks/last?count=20 HTTP/1.1
+
+HTTP/1.1 200 OK
+X-Has-Next: true
+Content-Type: application/json
+
+[{ "id": 900, ... }, { "id": 899, ... }, ...]
+```
+
+Next page:
+
+```http
+GET /api/project/1/tasks/last?count=20&before=881
+```
+
+## Backward compatibility
+
+| Endpoint / caller | Behavior |
+|-------------------|----------|
+| `GET .../tasks/last?limit=200` | Returns up to 200 tasks; `X-Has-Next` ignored by legacy callers |
+| `GET .../tasks` (`GetAllTasks`) | Unchanged: up to 1000 tasks, no cursor, no `X-Has-Next` |
+| `TaskList.vue` (per-template list) | Still uses `limit=200`; header ignored |
+
+## Frontend (History page)
+
+`web/src/views/project/History.vue` maintains a cursor stack:
+
+```
+cursors: [null]     // cursors[i] = before id for page i (null = first page)
+pageIndex: 0
+hasNext: false      // from X-Has-Next
+```
+
+- **Next**: push the last row's `id` as the next cursor, increment `pageIndex`, reload.
+- **Prev**: decrement `pageIndex`, reload from the stored cursor.
+- Footer shows `‹  N  ›` with no total page count.
+
+WebSocket live updates reload the current page by its cursor, so concurrent task creation only affects page 1.
+
+## Cursor stability
+
+Keyset cursors are stable under inserts: a new task gets a higher `id` and only appears on page 1. Navigating older pages is unaffected by concurrent activity.
+
+## Related code
+
+- `api/projects/tasks.go` — `parseTasksPageParams`, `writeTasksList`, `GetLastTasks`
+- `db/sql/task.go` — `WHERE task.id < ?` keyset filter
+- `db/Store.go` — `RetrieveQueryParams.BeforeID`
+- [`AGENTS/plans/2_19/tasks-history-keyset-pagination.md`](../AGENTS/plans/2_19/tasks-history-keyset-pagination.md) — design notes
diff --git a/docs/workflows.md b/docs/workflows.md
new file mode 100644
index 000000000..4fb95daa9
--- /dev/null
+++ b/docs/workflows.md
@@ -0,0 +1,142 @@
+# Workflows
+
+Workflows chain multiple task templates into a **directed acyclic graph (DAG)** with conditional edges, manual approvals, and shared artifacts between steps. They are a **Pro feature** (`features.workflows`).
+
+## Overview
+
+A workflow consists of:
+
+- **Workflow template** — the graph definition (nodes + edges).
+- **Workflow run** — one execution of that graph.
+- **Tasks** — each `task` node launches a normal Semaphore task linked to the run via `workflow_run_id` and `workflow_node_id`.
+
+The UI provides a graphical editor (`WorkflowEditor.vue`), a list page (`Workflows.vue`), and a full-screen run view (`WorkflowRun.vue`) with live status on the graph.
+
+## Node kinds
+
+| Kind | Executes | Purpose |
+|------|----------|---------|
+| `task` | Yes | Runs a project template (playbook, Terraform, etc.) |
+| `approval` | Gates | Pauses the run until a user approves or rejects |
+| `note` | No | Canvas annotation only; excluded from validation and execution |
+
+Task nodes can override inventory, environment, and Ansible limit per node.
+
+## Edge conditions
+
+Edges connect nodes and control when downstream nodes start:
+
+| Condition | Fires when |
+|-----------|------------|
+| `on_success` | Source task succeeds (default) |
+| `on_failure` | Source task fails |
+| `always` | Source task reaches any terminal state |
+
+Approval nodes use `convergence_mode`: `all` (every inbound edge satisfied) or `any` (first satisfied edge).
+
+## Run lifecycle
+
+### Statuses
+
+| Status | Terminal | Meaning |
+|--------|----------|---------|
+| `running` | No | Tasks executing or waiting to start |
+| `approval` | No | Waiting on a manual approval |
+| `success` | Yes | All terminal nodes succeeded |
+| `failed` | Yes | A task failed with no matching failure edge, or approval rejected |
+| `stopped` | Yes | User stopped the run |
+
+### Progression
+
+Run progression is **server-driven**. When any workflow task finishes (success or failure), `TaskRunner.finishRun` calls `HandleWorkflowTaskCompletion`, which:
+
+1. Evaluates which downstream nodes are ready.
+2. Launches ready task nodes via the task pool.
+3. Creates approval records for approval nodes.
+4. Updates the run status.
+
+The run view polls every 5 seconds as a backstop, but progression does not depend on the UI being open.
+
+### Stopping a run
+
+`POST /api/project/{project_id}/workflows/{workflow_id}/runs/{run_id}/stop` (requires `run_project_tasks`):
+
+1. Force-stops all non-finished tasks of the run.
+2. Rejects pending approvals.
+3. Marks the run `stopped`.
+
+Stopped runs are never revived by later progression.
+
+### Versioning
+
+`start_version` on the template seeds build-style versioning. Each run gets a `version` derived from prior runs (same mechanism as build templates).
+
+## Artifacts
+
+Tasks can publish workflow artifacts (Ansible `set_stats` parity). Downstream nodes in the same run receive merged artifacts as extra variables (`semaphore_workflow_artifacts`). See [`AGENTS/plans/2_19/workflow-artifacts.md`](../AGENTS/plans/2_19/workflow-artifacts.md).
+
+`GET /api/project/{project_id}/workflows/{workflow_id}/runs/{run_id}/artifacts` returns the merged artifact map.
+
+## API endpoints
+
+All routes are under `/api/project/{project_id}/workflows`. Documented in `api-docs.yml` under the `workflow` tag.
+
+| Method | Path | Purpose |
+|--------|------|---------|
+| GET | `/workflows` | List workflow templates |
+| POST | `/workflows` | Create workflow |
+| GET | `/workflows/{id}` | Get workflow with nodes and edges |
+| PUT | `/workflows/{id}` | Update workflow (returns 204) |
+| DELETE | `/workflows/{id}` | Delete workflow |
+| POST | `/workflows/{id}/run` | Start a run |
+| GET | `/workflows/{id}/runs` | List runs |
+| GET | `/workflows/{id}/runs/{run_id}` | Run details (node statuses) |
+| POST | `/workflows/{id}/runs/{run_id}/stop` | Stop run |
+| GET | `/workflows/{id}/runs/{run_id}/approvals` | Pending approvals |
+| POST | `/workflows/{id}/runs/{run_id}/approvals/{node_id}` | Approve or reject |
+| GET | `/workflows/{id}/runs/{run_id}/artifacts` | Merged artifacts |
+
+### Validation rules
+
+`db.ValidateWorkflowTemplate` enforces before every write:
+
+- Non-empty name, at least one node.
+- Exactly one root (zero in-degree node).
+- Graph is a DAG (no cycles).
+- Task nodes require `template_id`; approval nodes require positive `approval_timeout` when set.
+- No self-edges or dangling edge references.
+
+## Architecture (open vs Pro)
+
+Workflows use the same Pro gating pattern as `terraform_backend` and `project_runners`:
+
+| Layer | Open source | Pro (`pro_impl`) |
+|-------|-------------|------------------|
+| DB schema + CRUD | `db/sql/workflow.go` | same |
+| HTTP handlers | stubs return `[]` or `404` | `pro_impl/api/projects/workflows.go` |
+| Orchestration | no-op `WorkflowService` | `pro_impl/services/server/workflow_svc.go` |
+| Feature flag | `features.workflows: false` | `true` when `IsPro()` |
+
+The open `TaskPool` delegates `HandleWorkflowTaskCompletion` and `GetWorkflowRunArtifacts` to the injected service (no-ops in open builds).
+
+Wiring in `cli/cmd/root.go`:
+
+```
+taskPool := NewTaskPool(...)
+workflowService := proServer.NewWorkflowService(store, &taskPool)
+taskPool.SetWorkflowService(workflowService)
+```
+
+## Persistence notes
+
+- Nodes and edges are stored in child tables; the template row holds metadata only.
+- **Writes are delete-and-reinsert**: node database IDs change on every save. The editor re-fetches after save to rebind IDs. Historical runs may reference node IDs that no longer exist in the current template (the run view tolerates missing nodes).
+- Node positions (`position_x`, `position_y`) persist per node and survive the reinsert.
+
+## Related code and plans
+
+- `db/Workflow.go` — data model
+- `pro_interfaces/workflow_ctl.go`, `workflow_svc.go` — interfaces
+- `web/src/components/WorkflowGraph.vue` — shared Drawflow renderer
+- [`AGENTS/plans/2_19/graphical-workflow-editor.md`](../AGENTS/plans/2_19/graphical-workflow-editor.md) — implementation details
+- [`AGENTS/plans/2_19/workflow-artifacts.md`](../AGENTS/plans/2_19/workflow-artifacts.md) — artifact design
diff --git a/web/public/swagger/api-docs.yml b/web/public/swagger/api-docs.yml
index 5aad742cc..0823e9fe0 100644
--- a/web/public/swagger/api-docs.yml
+++ b/web/public/swagger/api-docs.yml
@@ -2778,10 +2778,23 @@ paths:
     get:
       tags:
         - task
-      summary: Get last 200 Tasks related to current project
+      summary: Get recent tasks for the current project (keyset pagination)
+      parameters:
+        - name: count
+          in: query
+          type: integer
+          description: Page size (default and max 200). Legacy alias `limit` is also accepted.
+        - name: before
+          in: query
+          type: integer
+          description: Cursor — return tasks with id strictly less than this value (older page).
       responses:
         200:
-          description: Array of tasks in chronological order
+          description: Array of tasks ordered by id descending (newest first). Check X-Has-Next header for more pages.
+          headers:
+            X-Has-Next:
+              type: string
+              description: "true if older tasks exist beyond this page; false otherwise"
           schema:
             type: array
             items: