From f2ea86cf607f003715a03bfa3bab8a70280a68a4 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 09:48:44 +0100 Subject: [PATCH 01/12] feat(agentbox.yaml): idempotent tasks + replacement engine (render CLI + carry) - tasks: idempotent: true (command-hash marker) | { check } (probe) - shared pure replacement engine in @agentbox/core (env placeholders + rules) - agentbox-ctl render CLI (declarative sed alternative) - carry: replaceEnvs / replace / rules (host-side, file-only) - top-level replacements: reusable named rule-sets - schema + drift fixtures + unit tests --- apps/cli/src/lib/carry-gate.ts | 4 +- apps/cli/src/lib/carry-resolve.ts | 34 +- apps/cli/test/carry-resolve.test.ts | 39 ++ docs/plans/seed-claude-memory-into-box.md | 127 ------ .../user-defined-shims-auto-classifier.md | 376 ++++++++++++++++++ packages/core/src/index.ts | 17 + packages/core/src/provider.ts | 12 + packages/core/src/replace.ts | 226 +++++++++++ packages/core/test/replace.test.ts | 134 +++++++ packages/ctl/schema/agentbox.schema.json | 72 +++- packages/ctl/src/bin.ts | 2 + packages/ctl/src/carry.ts | 58 ++- packages/ctl/src/commands/daemon.ts | 10 +- packages/ctl/src/commands/render.ts | 72 ++++ packages/ctl/src/config.ts | 61 ++- packages/ctl/src/index.ts | 19 + packages/ctl/src/replace.ts | 51 +++ packages/ctl/src/supervisor.ts | 145 ++++++- packages/ctl/src/types.ts | 4 + packages/ctl/test/carry.test.ts | 35 ++ packages/ctl/test/config.test.ts | 4 +- packages/ctl/test/schema-drift.test.ts | 75 ++++ packages/ctl/test/socket.test.ts | 1 + .../ctl/test/supervisor-idempotent.test.ts | 128 ++++++ packages/ctl/test/supervisor.test.ts | 2 +- packages/sandbox-cloud/src/cloud-provider.ts | 14 +- packages/sandbox-core/src/carry-render.ts | 73 ++++ packages/sandbox-core/src/index.ts | 5 + .../sandbox-core/test/carry-render.test.ts | 63 +++ packages/sandbox-docker/src/create.ts | 8 +- 30 files changed, 1711 insertions(+), 160 deletions(-) delete mode 100644 docs/plans/seed-claude-memory-into-box.md create mode 100644 docs/plans/user-defined-shims-auto-classifier.md create mode 100644 packages/core/src/replace.ts create mode 100644 packages/core/test/replace.test.ts create mode 100644 packages/ctl/src/commands/render.ts create mode 100644 packages/ctl/src/replace.ts create mode 100644 packages/ctl/test/supervisor-idempotent.test.ts create mode 100644 packages/sandbox-core/src/carry-render.ts create mode 100644 packages/sandbox-core/test/carry-render.test.ts diff --git a/apps/cli/src/lib/carry-gate.ts b/apps/cli/src/lib/carry-gate.ts index 693e4008..c2115b9f 100644 --- a/apps/cli/src/lib/carry-gate.ts +++ b/apps/cli/src/lib/carry-gate.ts @@ -1,7 +1,7 @@ import { join } from 'node:path'; import { log } from '@clack/prompts'; import { loadEffectiveConfig } from '@agentbox/config'; -import { loadCarrySection } from '@agentbox/ctl'; +import { loadCarrySection, loadReplacementsSection } from '@agentbox/ctl'; import type { ResolvedCarryEntry } from '@agentbox/core'; import { promptForCarry } from '../carry-prompt.js'; import { resolveCarry } from './carry-resolve.js'; @@ -41,9 +41,11 @@ export async function runCarryGate(args: CarryGateArgs): Promise 0) { const msg = ['carry: refused to proceed:', ...resolved.errors.map((e) => ` - ${e}`)].join('\n'); diff --git a/apps/cli/src/lib/carry-resolve.ts b/apps/cli/src/lib/carry-resolve.ts index b8ae4434..37d6b436 100644 --- a/apps/cli/src/lib/carry-resolve.ts +++ b/apps/cli/src/lib/carry-resolve.ts @@ -2,7 +2,7 @@ import { realpath, stat } from 'node:fs/promises'; import { homedir } from 'node:os'; import { isAbsolute, join, normalize, relative, resolve } from 'node:path'; import { BUILT_IN_DEFAULTS } from '@agentbox/config'; -import type { CarryItem } from '@agentbox/ctl'; +import { resolveRuleRefs, type CarryItem, type ReplaceRule } from '@agentbox/ctl'; import { effectiveExcludes, isPathExcluded, toTarExcludes } from './dir-breakdown.js'; /** @@ -31,6 +31,10 @@ export interface ResolvedCarryEntry { symlinkInfo?: 'safe' | 'outside-home'; /** tar `--exclude` patterns applied when packing a dir entry. */ exclude?: string[]; + /** Substitute `{{AGENTBOX_*}}` placeholders host-side before copy (file only). */ + replaceEnvs?: boolean; + /** Final replacement rules (named refs already expanded). File only. */ + replace?: ReplaceRule[]; } export interface ResolveOptions { @@ -44,6 +48,8 @@ export interface ResolveOptions { * built-in `box.cpMaxBytes` when omitted. */ maxBytes?: number; + /** Top-level `replacements:` rule-sets, for expanding carry `rules:` refs. */ + replacements?: Record; } export interface ResolveResult { @@ -61,6 +67,7 @@ export async function resolveCarry( const home = opts.homeDir ?? homedir(); const cap = opts.maxBytes ?? BUILT_IN_DEFAULTS.box.cpMaxBytes; const projectRoot = opts.projectRoot; + const replacements = opts.replacements ?? {}; const entries: ResolvedCarryEntry[] = []; const errors: string[] = []; @@ -68,7 +75,7 @@ export async function resolveCarry( for (const [i, item] of items.entries()) { const where = `carry[${String(i)}]`; try { - const entry = await resolveOne(item, { projectRoot, home, cap, where }); + const entry = await resolveOne(item, { projectRoot, home, cap, where, replacements }); entries.push(entry); } catch (err) { errors.push(err instanceof Error ? err.message : String(err)); @@ -83,6 +90,7 @@ interface OneCtx { home: string; cap: number; where: string; + replacements: Record; } async function resolveOne(item: CarryItem, ctx: OneCtx): Promise { @@ -98,12 +106,28 @@ async function resolveOne(item: CarryItem, ctx: OneCtx): Promise 0) + ? { + ...(item.replaceEnvs ? { replaceEnvs: true } : {}), + ...(replaceRules.length > 0 ? { replace: replaceRules } : {}), + } + : {}; + let st: Awaited>; try { st = await stat(absSrc); } catch (err) { if ((err as NodeJS.ErrnoException).code === 'ENOENT') { if (optional) { + // A missing optional entry is skipped at transfer time, so replace + // options are moot — don't carry them onto the tombstone. return { rawSrc, rawDest, @@ -142,6 +166,11 @@ async function resolveOne(item: CarryItem, ctx: OneCtx): Promise { ); expect(res.entries[0]?.mode).toBe(0o600); }); + + it('expands named rule-set refs + inline rules onto a file entry', async () => { + await writeFile(join(home, 'e'), 'x'); + const res = await resolveCarry( + [item('~/e', '/workspace/e', { replaceEnvs: true, rules: ['host'], replace: [{ from: 'a', to: 'b' }] })], + { + projectRoot: workspace, + homeDir: home, + replacements: { host: [{ from: 'optima', to: '{{AGENTBOX_BOX_NAME}}' }] }, + }, + ); + expect(res.errors).toEqual([]); + expect(res.entries[0]?.replaceEnvs).toBe(true); + expect(res.entries[0]?.replace).toEqual([ + { from: 'optima', to: '{{AGENTBOX_BOX_NAME}}' }, + { from: 'a', to: 'b' }, + ]); + }); + + it('rejects replace options on a directory entry', async () => { + await mkdir(join(home, 'd')); + await writeFile(join(home, 'd', 'f'), 'x'); + const res = await resolveCarry([item('~/d', '/workspace/d', { replaceEnvs: true })], { + projectRoot: workspace, + homeDir: home, + }); + expect(res.entries).toHaveLength(0); + expect(res.errors[0]).toMatch(/file-only/); + }); + + it('errors on an unknown rule-set ref', async () => { + await writeFile(join(home, 'e'), 'x'); + const res = await resolveCarry([item('~/e', '/workspace/e', { rules: ['ghost'] })], { + projectRoot: workspace, + homeDir: home, + }); + expect(res.entries).toHaveLength(0); + expect(res.errors[0]).toMatch(/unknown replacements rule-set/); + }); }); diff --git a/docs/plans/seed-claude-memory-into-box.md b/docs/plans/seed-claude-memory-into-box.md deleted file mode 100644 index f59faa29..00000000 --- a/docs/plans/seed-claude-memory-into-box.md +++ /dev/null @@ -1,127 +0,0 @@ -# Seed the current project's Claude memory into every box (rekeyed to /workspace) - -## Context - -Verification found that Claude Code's per-project memory never reaches a box: -the cloud snapshot excludes `projects/` entirely, and docker copies `projects/` -under the **host** path key while the in-box Claude (cwd `/workspace`) reads the -`-workspace` key — so the keys never line up. Codex global memories already -sync; OpenCode has no file memory. (Full findings retained below.) - -Fix (this plan): at box create, migrate **only the current project's `memory/` -directory** — not sessions — into the box, **rekeyed/rehashed** from the host -cwd to `/workspace`. This mirrors the rekey we already do for the trust-workspace -alias in `_claude.json` (`addProjectAlias(working, hostWorkspace, '/workspace')`). -Implemented symmetrically across docker + cloud (daytona/hetzner). - -Host: `~/.claude/projects//memory/` -Box: `/home/vscode/.claude/projects/-workspace/memory/` -where `encode(p) = p.replace(/[^a-zA-Z0-9]/g, '-')` and `/workspace` → `-workspace`. - -## Implementation - -### 1. Shared resolver — `packages/sandbox-docker/src/host-stage.ts` - -Add (host-stage already imports `homedir`, `join`, `pathExists`, `readdir`): - -- `const BOX_CLAUDE_PROJECT_DIR = '/home/vscode/.claude/projects/-workspace';` - (the `-workspace` encoding is fixed for every box, matching the existing - hardcoded `CLOUD_WORKSPACE = '/workspace'` at line 53.) -- `export function encodeClaudeProjectsKey(absPath: string): string` → - `absPath.replace(/[^a-zA-Z0-9]/g, '-')` (same rule as - `apps/cli/src/session-teleport/cwd-encoding.ts`; duplicated with a comment - because host-stage must not depend on `apps/cli`). -- `export async function resolveClaudeMemoryDir(hostWorkspace: string, hostHome = homedir()): Promise` - — returns the host `…/projects//memory` - dir, or `null` when it's absent or empty (so callers no-op). - -Export all three from the package index so `@agentbox/sandbox-cloud` can import them. - -### 2. Docker — `packages/sandbox-docker/src/claude.ts` (`ensureClaudeVolume`) - -The box container isn't running at create time; the volume is populated by the -throwaway helper container that mounts host `~/.claude` at `/src-claude` and -rsyncs into `/dst`. Two edits inside that helper's `sh -c` (around lines 315–362): - -- **Stop leaking host-keyed projects/sessions into the shared volume**: add - `--exclude=projects` to `rsyncExcludes` (line 315), matching cloud's - `CLAUDE_RUNTIME_EXCLUDES`. Box-written `-workspace` sessions are untouched - (rsync has no `--delete`); session-teleport (`-c`/`--resume`) still uploads - its single jsonl directly to the running box, so continuity is preserved. -- **Re-add only the rekeyed memory**: when `opts.hostWorkspace` is set, compute - `const key = encodeClaudeProjectsKey(opts.hostWorkspace)` in JS (result is - `[A-Za-z0-9-]` only → shell-safe) and append a step after the rsync: - ``` - && { [ -d "/src-claude/projects//memory" ] \ - && mkdir -p /dst/projects/-workspace \ - && rm -rf /dst/projects/-workspace/memory \ - && cp -a "/src-claude/projects//memory" /dst/projects/-workspace/memory; true; } - ``` - The existing trailing `chown -R 1000:1000 /dst` (line 362) covers it. Log a - line (e.g. `seeded claude memory for -> /workspace`) when the dir - was present, via a flag returned from `ensureClaudeVolume`. - -### 3. Cloud — `packages/sandbox-cloud/src/cloud-provider.ts` (`create`) - -Cloud's static snapshot excludes `projects/` and is built per-org at prepare -time, so memory must be uploaded per-box at create. After -`seedOpencodeModelState(...)` (~line 381), in the **non-snapshot** branch only -(`if (!snapshotName)` — a checkpoint/snapshot boot already carries the source -box's memory and must not be clobbered), add a best-effort block: -```ts -const memDir = await resolveClaudeMemoryDir(req.workspacePath); -if (memDir) { - try { - await uploadToCloudBox(backend, handle, memDir, `${BOX_CLAUDE_PROJECT_DIR}/`); - log(`seeded claude memory for ${req.workspacePath} -> ${BOX_CLAUDE_PROJECT_DIR}/memory`); - } catch (err) { - log(`claude memory seed skipped (non-fatal): ${err instanceof Error ? err.message : String(err)}`); - } -} -``` -The trailing slash makes `uploadToCloudBox` land the source under its basename → -`…/projects/-workspace/memory/…`, chowned to uid 1000 (same as the docker side). - -### Notes / scope - -- **Only `memory/` is migrated; sessions are not.** Under the box's `-workspace` - key, the only project state seeded from the host is `memory/` (incl. - `MEMORY.md`). Sessions arrive only via the existing teleport on `-c`/`--resume`. -- **Codex / OpenCode: no change** (verified). Codex `~/.codex/memories/` is - global and already synced on both docker and cloud; OpenCode has no file memory. -- **Edge cases**: no host memory dir or empty dir → silent no-op; box already has - memory → `cp -a` after `rm -rf` (docker) / `tar -xf` overwrite (cloud) makes the - host authoritative; failures are best-effort and never fail box creation. -- **Docker shared volume**: still last-writer-wins for the `-workspace` key (same - semantics as the existing `_claude.json` project alias); `--isolate-claude-config` - gives per-box memory with zero extra code. - -## Verification - -1. Build: `pnpm -w build` (or the affected packages). -2. Docker: `node apps/cli/dist/index.js create -y -n memtest` in this repo, then - - `docker exec agentbox-memtest ls -la /home/vscode/.claude/projects/-workspace/memory/` - → lists `MEMORY.md` + the `.md` files, owned by `vscode`. - - `docker exec agentbox-memtest ls /home/vscode/.claude/projects/ | grep -i users` - → host-keyed dir is **absent** (sessions no longer leaked). -3. Negative: create a box for a project with no memory → no seed log line, in-box - memory dir absent, create still succeeds. -4. Codex unchanged: `docker exec agentbox-memtest ls /home/vscode/.codex/memories/`. -5. Cloud (optional, if testing daytona/hetzner): `agentbox create --provider hetzner -y -n memtest`, - then check `/home/vscode/.claude/projects/-workspace/memory/` over the box shell. -6. `pnpm lint` + any affected `vitest`. - ---- - -## Original verification findings (reference) - -| Agent | Memory location (host) | Reached box before fix? | -|-------|------------------------|-------------------------| -| Claude | `~/.claude/projects//memory/` | No — cloud excluded `projects/`; docker keyed to host path, not `/workspace` | -| Codex | `~/.codex/memories/` (global) | Yes — not in any exclude list | -| OpenCode | none (SQLite `opencode.db` / `storage/`) | N/A — no file memory | - -Key files: `packages/sandbox-docker/src/host-stage.ts` (`CLAUDE_RUNTIME_EXCLUDES` -line 159), `packages/sandbox-docker/src/claude.ts` (`ensureClaudeVolume` rsync -line 315), `packages/sandbox-cloud/src/cloud-provider.ts` (`create`), -`apps/cli/src/session-teleport/cwd-encoding.ts` (the encoding precedent). diff --git a/docs/plans/user-defined-shims-auto-classifier.md b/docs/plans/user-defined-shims-auto-classifier.md new file mode 100644 index 00000000..beb8d099 --- /dev/null +++ b/docs/plans/user-defined-shims-auto-classifier.md @@ -0,0 +1,376 @@ +# Plan: User-defined shims — a generic, relay-gated CLI bridge with an auto-classifier + +## Context + +AgentBox already proxies four host CLIs into boxes the safe way — `git`, `gh`, +`ntn` (Notion), `linear` — via the pattern: in-box **shim** → `agentbox-ctl` → +host **relay** classifies read/write → gates writes with `askPrompt` → shells +out to the host's authenticated CLI. Tokens never enter the box. + +Today every one of these is **hand-built**: a bespoke bash shim, a TypeScript +connector descriptor, hardcoded op tables, and per-CLI guard functions. Adding a +fifth CLI is a code change + release. The user wants to invert this: a box user +runs `agentbox shims add ` and the system **auto-generates** a profile so the +CLI is bridged into the next boxes — no AgentBox release, no first-party connector. + +The proposed simple model is "a list of allowed commands + whitelisted args." The +core engineering question — and the reason this plan leads with analysis — is +**where that simple model breaks**, measured against the four shims we already +ship. The answer drives the architecture: a declarative ruleset for the easy +~80%, plus a **library of named predicate primitives** (lifted from the existing +guard functions) for the hard ~20%, plus a JS escape hatch. Auto-generation is a +**skill** that drives claude/codex to author the profile from the CLI's own +`--help`, because reliably parsing arbitrary CLI surfaces in code is itself one of +the failure modes. + +Decisions locked with the user: +- **Deliverable**: design + an implementable v1 plan (this file). +- **Auto-gen**: scaffold + review, implemented as a **skill** so claude/codex + builds the rules (not a brittle in-code `--help` parser). +- **Ruleset storage**: profiles dir (`~/.agentbox/shims/.yaml`) + a predicate + library; plus a `shims..enabled` config flag honoring the existing + global/project/workspace/CLI override precedence. + +--- + +## Part 1 — Where "command allowlist + arg whitelist" fails (the analysis) + +Each row below is a **real guard in today's code** that a flat ruleset cannot +express, with the file it lives in and the predicate primitive (Part 2) that +rescues it. This catalog is the justification for the architecture. + +### A. Read/write is not in the command name — it's in flag *interactions* +`gh api` / `ntn api` infer the HTTP method from a *combination* of args: an +explicit `-X/--method` (in space, glued `-XPOST`, or `=` forms) **OR** the mere +presence of a field flag `-f/-F/--field` which implicitly switches GET→POST. +- Proof: `refuseGhApiCall` (`packages/relay/src/gh.ts:135-192`), `refuseApiNonGet` + (`packages/integrations/src/connectors/notion.ts:70-117`). +- Why a whitelist fails: it checks each arg independently; it cannot compute + `method = explicitMethod ?? (anyFieldFlag ? POST : GET)` across the whole argv. +- Rescued by: **`httpApi` predicate**. + +### B. The allowed value space is hierarchical, not enumerable +`gh api ` is allowed only for specific **REST path templates** with +wildcards (`repos/:owner/:repo/pulls/:n/comments`), and GET is allowed on any +allowlisted path while POST is allowed only on the comment subset. +- Proof: `GH_API_ALLOWED_ENDPOINTS` / `isAllowedGhApiEndpoint` (regex over a + normalized path, `gh.ts:66-95`). +- Why a whitelist fails: you can't enumerate every `:owner/:repo`; you need glob/ + template matching, *and* method↔path-subset correlation. +- Rescued by: **`httpApi` predicate** (endpoint globs + per-method subsets). + +### C. An allowed-looking READ leaks a secret (output sensitivity) +`linear auth token` prints the raw API key to stdout. It has no write side-effect, +so a naive read/write classifier labels it a safe read — exactly wrong. Same +shape: `gh api /user/keys`, or exfil *channels* embedded in otherwise-fine args: +`--input @file` (stdin/file body), `--variable key=@/etc/passwd` (host-file load). +- Proof: linear-shim hard-rejects `auth token` (`packages/sandbox-docker/scripts/linear-shim`); + `refuseGraphqlNonQuery` refuses `@` and `--input` + (`connectors/linear.ts:123-212`); `refuseGhApiCall` refuses `--input` (`gh.ts:161-164`). +- Why a whitelist fails: it has no concept of "this command's *output* is the + credential" or "this arg *value* opens a file-read channel." +- Rescued by: **`denySecretOutput`** + **`refuseFileLoadArgs`** predicates. + +### D. The gate depends on live HOST state at call time +`gh pr checkout` is refused if the host working tree is dirty, or if the host +HEAD is currently on a registered box branch (`agentbox/*`) — it would corrupt the +bind-mounted box. `git push` is *ungated* for `agentbox/*` branches but *prompts* +for any other branch — a decision made from the box's resolved worktree at +runtime. +- Proof: `checkoutGuards` (`gh.ts:397-441`, probes `git status --porcelain` + HEAD + vs the registered-branch set); branch-prefix gate (`server.ts:~420`, + `isAgentboxBranch`). +- Why a whitelist fails: the verdict is a function of host filesystem/git state + and the live set of box branches, not of the argv. +- Rescued by: **`hostStateGuard` predicate**. + +### E. The argv must be REWRITTEN, not just allowed/denied +`gh pr create` has `--head ` *injected* so the PR targets the box's +work (recognizing 3 spellings to avoid double-inject), and refuses outright if no +head can be resolved. `gh repo clone` argv is *reordered* (positionals first) for +the commander parser. +- Proof: `injectPrCreateHead` / `prCreateNeedsHead` (`gh.ts:194-242`); clone + reorder (`gh-shim:254-289`). +- Why a whitelist fails: allow/deny/prompt has no "transform/inject" verb; the + correct call literally differs from what the box typed. +- Rescued by: **`argvInject` / `argvReorder` transforms** in a rule. + +### F. Some writes need a higher tier than "prompt" +With `AGENTBOX_PROMPT=off` (auto-approve), `gh pr merge` *still* refuses unless +`AGENTBOX_GH_FORCE=1` — irreversibility warrants an extra interlock. `gh pr +checkout` is disabled entirely unless an opt-in env is set. +- Proof: `refuseMergeBypass` (`gh.ts:463-479`), `refuseCheckoutByDefault` + (`gh.ts:481-495`). +- Why a whitelist fails: gating is not binary (read|write); there's a third + "never silently auto-approve / opt-in only" tier. +- Rescued by: per-rule **`tier: irreversible | opt-in`** flag. + +### G. Interactive / streaming / no-TTY commands break the request/response model +`gh run watch` is deliberately excluded (blocks until CI finishes). `gh run view` +with no run-id would spawn an interactive picker that hangs with no TTY, so the +shim *requires* a positional. Any wrapped CLI that paginates, prompts, or streams +will hang the relay round-trip. +- Proof: `GH_RUN_OPS` omits `watch` (`gh.ts:48-64`); `gh-shim:196-202` requires a + run-id. +- Why a whitelist fails: it has no notion of "this subcommand is interactive/ + unbounded." The auto-generator can't infer this from `--help` either. +- Rescued by: per-rule **`requirePositional`** + a `deny`/`needsManualReview` + marker the skill sets; documented as a residual limit. + +### H. The CLI surface itself is unreliable to discover in code +Read/write semantics can't be inferred from a subcommand name alone, help formats +differ per CLI, and names drift (`linear issue comment add`, not `create` — the +exact mistake made in the Linear brief and caught only at runtime). +- Why an in-code auto-parser fails: brittle, and unsafe when wrong. +- Rescued by: **auto-gen is a skill**, not code — claude/codex reads `--help`, + reasons about read/write + danger, and writes the profile for human review. + +### I. Auth mechanism is heterogeneous (provisioning) +`ntn` needs `NOTION_KEYRING=0`; `linear` reads a plaintext TOML; `gh` uses its own +store; Trello uses `TRELLO_API_KEY/_TOKEN`. A connector may only inject env in its +own `_*` namespace. +- Proof: `mergeConnectorEnv` (`packages/relay/src/integrations.ts:176-189`) throws + on out-of-namespace keys; `env: { NOTION_KEYRING: '0' }` (notion connector). +- Why a whitelist fails: it has no env model. The generator can't know the auth + mechanism — it must be a reviewable field, namespace-enforced. +- Rescued by: profile `env` (namespace-checked by the existing `mergeConnectorEnv`) + + `detect.authArgs`. + +### J. A user-generated shim can't be baked into the image +All four current shims are COPY'd into `Dockerfile.box` and listed across five +provider arrays in `stage-runtime.mjs` (`gh-shim`, `git-shim`, `ntn-shim`, +`linear-shim`). A user adding a shim post-build cannot rebuild the image. +- Why the current provisioning fails: it's build-time + static. +- Rescued by: **one generic dispatcher baked once**, plus **create-time injection** + of a tiny per-CLI forwarder (just the binary name); the *ruleset stays host-side* + and never enters the box. + +**Conclusion.** The flat model cleanly handles a CLI whose read/write split is +per-subcommand with enumerable args and no embedded API/secret semantics +(Trello-like). It cannot handle raw API/GraphQL passthroughs (A/B), output-secret +reads (C), host-state-contextual gates (D), argv rewriting (E), irreversibility +tiers (F), or interactive commands (G) — and those are precisely the load-bearing +parts of `git`/`gh`/`ntn`/`linear`. So the system = **declarative core for the +common case + a predicate library for the hard cases + an LLM skill to author +both**, with the relay (not the box) holding all intelligence. + +--- + +## Part 2 — Architecture + +### Central move: dumb box-side forwarder, smart host-side relay +Unlike today's per-CLI bash shims (which embed dispatch logic), the box side +becomes a **single generic forwarder**. ALL classification lives host-side in a +profile the relay loads — richer than bash, editable without an image rebuild, +and never exposed to the box. + +``` +box: `linear issue create …` + └─ /usr/local/bin/linear (create-time wrapper) -> exec agentbox-ctl shim linear -- "$@" + └─ ctl POST /rpc method="shim.linear", params={ argv:[…], path:cwd, hostInitiated? } + └─ relay: load profile linear.yaml -> ROUTE argv to a rule -> + enable-gate (shims.linear.enabled) -> + rule.access: read|write|deny + predicate guard + argv transform -> + write? askPrompt (reuse) / host-initiated token (reuse) -> + spawn host `linear ` with namespaced env -> + {exitCode,stdout,stderr} back to box +``` + +This reuses, unchanged: `askPrompt`/`PromptSubscribers` (`prompts.ts`), +`HostInitiatedTokens`/`hashRpcParams` (`host-initiated.ts`), `mergeConnectorEnv` +and the `runHostBinary` spawn (`relay/src/integrations.ts`), `postRpcAndExit` +(`ctl/relay-rpc.ts`), and the `HostActionQueue`/`CloudBoxPoller` cloud round-trip +(method-prefix-agnostic). + +### Profile schema (`~/.agentbox/shims/.yaml`) — the easy 80% +```yaml +name: linear # wire name + default bin +bin: linear # host binary the relay execs (PATH-resolved) +detect: + versionArgs: [--version] + authArgs: [auth, whoami] + installHint: "npm i -g @schpet/linear-cli" + loginHint: "linear auth login" +env: {} # only LINEAR_* keys allowed (mergeConnectorEnv enforces) +rules: # ordered; first match wins + - match: [auth, whoami] # subcommand path prefix + access: read + - match: [issue, list] + access: read + - match: [issue, create] + access: write # -> askPrompt + - match: [issue, comment, add] + access: write + - match: [auth, token] + access: deny + reason: "prints the raw API key" + guard: denySecretOutput + - match: [api] + access: read + guard: { graphql: { queriesOnly: true } } # predicate (Part C/A/B) + - default: prompt # safe default for unmatched argv +# Optional per-rule arg constraints: +# allowFlags: [--json, --state] +# denyFlags: [--input] +# denyArgValues: ['@'] # refuse @-shaped values anywhere +# requirePositional: true # avoid no-TTY interactive picker (Cat G) +# tier: irreversible # never silent-auto-approve (Cat F) +# transform: { argvInject: { flag: --head, value: "$boxBranch", when: [pr, create] } } +``` + +### Predicate library (the hard 20%) — `packages/integrations/src/predicates.ts` +Each is a parameterized factory returning the existing `(args) => Refusal | null` +shape (so it slots into the current `refuseCall` path). Implementations are +**lifted from today's hand-written guards** so behavior is identical and tested: +- `httpApi({ methods, endpoints, writeEndpoints })` — generalizes + `refuseGhApiCall` + `refuseApiNonGet`: argv method inference (`-X/--method`, + field-flag→POST), endpoint glob/template match, method↔subset correlation, + `--input` refusal. (Cat A, B) +- `graphql({ queriesOnly })` — generalizes `refuseGraphqlNonQuery`: parse the + GraphQL positional, refuse `mutation`/`subscription`, refuse `--variable @` + / `--input`, comment/whitespace/BOM tolerant. (Cat A, C) +- `denySecretOutput({ reason })` — unconditional refusal for secret-printing ops + like `auth token`. (Cat C) +- `refuseFileLoadArgs()` — refuse `@` / `--input` value shapes anywhere. (Cat C) +- `hostStateGuard({ requireCleanTree, refuseOnBoxBranch })` — generalizes + `checkoutGuards`: probe host `git status`/HEAD vs the registered-branch set. (Cat D) +- argv transforms `argvInject` / `argvReorder` — generalize `injectPrCreateHead` / + clone reorder; resolvers like `$boxBranch` filled from the registered worktree. (Cat E) +- escape hatch `guard: ./guard.mjs#fn` — load a user JS module exporting the + `refuseCall` signature, for anything bespoke. +`tier: irreversible|opt-in` on a rule generalizes `refuseMergeBypass` / +`refuseCheckoutByDefault` (Cat F) — enforced in the dispatch, not a predicate. + +### Registry integration +A profile **compiles to the existing `IntegrationConnector` shape** +(`bin`→`hostBin`, `rules`→`ops` + a routing table, `env`, `detect`) so it flows +through the relay's existing dispatch. Extend the static `ALL_CONNECTORS` +(`packages/integrations/src/registry.ts`) with a dynamic loader that reads +`~/.agentbox/shims/*.yaml`. The one genuinely new piece: today the *shim* resolves +subcommand→op; here the **relay routes raw argv→rule** from the profile's ordered +`rules` (a small matcher in `relay/src/shims.ts`). + +### Provisioning (resolves Cat J) +- Bake **one** generic dispatcher `packages/sandbox-docker/scripts/agentbox-shim` + (infers wire-name from `argv[0]`, `exec agentbox-ctl shim "$name" -- "$@"`). + Register it once in `stage-runtime.mjs` (the `execBitFiles`/`contextFiles` + + `hetznerFiles`/`vercelFiles`/`e2bFiles` arrays) + `Dockerfile.box` COPY + + `install-box.sh` mirror — same five-place dance as existing shims, but only once. +- At **box-create time**, the CLI reads the enabled-shim list (`shims.*.enabled` + effective config) and, for each, drops `/usr/local/bin/` as a 1-line + `exec agentbox-shim` symlink/wrapper via the provider's existing file-injection + path (docker cp/exec, hetzner scp, vercel/e2b upload). **Profiles never enter + the box** — only the bin name + the generic forwarder. + +--- + +## Part 3 — Auto-generation as a skill (`agentbox shims add `) + +`agentbox shims add linear [--bin linear]`: +1. Scaffolds `~/.agentbox/shims/linear.yaml` with `name/bin/detect` stubbed and an + empty `rules` + `default: deny`. +2. Launches the **`shim-author` skill** (new, shipped under + `apps/cli/share/shim-author/SKILL.md`; invokable by claude/codex). The skill + instructs the agent to, **host-side**: + - run ` --help` and each subcommand `--help`, enumerate the surface; + - classify each subcommand read/write by semantics (get/list/view/show→read; + create/update/delete/add/move→write), defaulting uncertain ones to `prompt`; + - **flag and `deny` (with a TODO) every dangerous surface it must not auto-enable**: + any `api`/`graphql` passthrough (wire a `httpApi`/`graphql` predicate with + endpoints left for the human), any `token`/secret-printing op + (`denySecretOutput`), any `delete`/destructive op, anything interactive/ + streaming (Cat G); + - write the profile YAML and summarize what it could/couldn't safely classify. +3. The user reviews/edits, runs `agentbox shims test linear -- ` (host-side + dry-run: prints the matched rule + read/write/deny verdict, **no execution**) to + validate, then `agentbox shims enable linear`. + +`agentbox shims` subcommands: `add | list | show | edit | test | enable | disable +| remove`. `agentbox doctor` reports each loaded profile (extend the existing +`ALL_CONNECTORS` iteration in `apps/cli/src/lib/doctor-checks.ts` to include +profiles). + +**Residual limits to document honestly** (the skill marks these `deny`+TODO, never +auto-enables): exact `httpApi` endpoint globs need human confirmation (B); +output-secrecy must be guessed (C); interactivity can't be inferred from help (G). + +--- + +## Part 4 — Config + +Add a `shims` block mirroring `integrations`, in `packages/config/src/types.ts`: +```ts +// UserConfig: shims?: Record; +// EffectiveConfig: shims: Record; // default {} +``` +- Precedence is the existing global.enabled` at the chosen layer. +- The relay enable-gate generalizes `refuseIfIntegrationDisabled` + (`relay/src/integrations.ts:291-315`) to a `refuseIfShimDisabled(name, cwd)` + reading `shims..enabled`. +- **New wrinkle**: `KEY_REGISTRY` is a static list (`config/src/types.ts:~875`); + it can't enumerate dynamic shim names. Add wildcard handling so + `shims..enabled` validates as a `bool` key (a `KEY_PATTERNS` entry + alongside the static registry). This is the one config-system change. + +--- + +## Critical files + +- **New** `packages/integrations/src/profile.ts` (schema + `compileProfile`), + `packages/integrations/src/predicates.ts` (predicate library — lift from + `gh.ts`/`notion.ts`/`linear.ts` guards), `packages/integrations/src/profile-loader.ts` + (read `~/.agentbox/shims/*.yaml`). +- **New** `packages/relay/src/shims.ts` — argv→rule router, `shim.` dispatch, + enable-gate, tier enforcement. Wire the `shim.` prefix into + `packages/relay/src/server.ts` (POST /rpc) **and** `packages/relay/src/host-actions.ts` + (cloud), beside the `integration.` branch. +- **New** `packages/ctl/src/commands/shim.ts` — `agentbox-ctl shim -- `. +- **New** `apps/cli/src/commands/shims.ts` — the `agentbox shims …` surface; + register in `apps/cli/src/index.ts`. +- **New** `packages/sandbox-docker/scripts/agentbox-shim` (generic dispatcher) + + create-time per-bin injection in each provider's create path. +- **New** `apps/cli/share/shim-author/SKILL.md` (the auto-gen skill). +- **Edit** `apps/cli/scripts/stage-runtime.mjs` (+`Dockerfile.box`, + `install-box.sh`) — register `agentbox-shim` once; **edit** `packages/config/src/types.ts` + (`shims` block + wildcard KEY validation); **edit** `apps/cli/src/lib/doctor-checks.ts`. +- **Reference (copy/lift from)** `packages/relay/src/gh.ts`, `connectors/notion.ts`, + `connectors/linear.ts`, `relay/src/integrations.ts`, `host-initiated.ts`, + `prompts.ts`, `sandbox-docker/scripts/{gh,git,ntn,linear}-shim`. +- **Docs (same change)** new `docs/shims.md`; public `.mdx` + `meta.json` under + `apps/web/content/docs/`; mention in `docs/host-relay.md` (new `shim.*` method) + + `docs/features.md`. + +## Implementation phases (each its own box/PR, mirroring the integrations cadence) +1. **Predicate library + profile schema/compile** (pure `@agentbox/integrations`, + unit-tested by porting the existing `refuse*` tests). No box wiring yet. +2. **Relay `shim.*` dispatch + profile loader + enable-gate**, wired into + `server.ts` + `host-actions.ts`; ctl `shim` command. Unit tests for argv→rule + routing + write-gate + tier. +3. **Provisioning**: generic `agentbox-shim` baked once + create-time per-bin + injection across providers; `shims` config block + wildcard key validation. +4. **`agentbox shims` CLI** (`add/list/show/test/enable/disable/remove`) + the + `shim-author` skill + `doctor` integration + docs. +5. **Live e2e**: `agentbox shims add` a real CLI, skill authors the profile, + review, enable, box read (no prompt) + write (prompt) + denied `auth token`, + no-token assertion. Then optionally re-express one built-in (e.g. `linear`) as a + profile to prove parity and retire its bespoke connector. + +## Verification +- **Unit**: predicate parity tests (the ported `refuseGhApiCall` / `refuseApiNonGet` + / `refuseGraphqlNonQuery` / `checkoutGuards` cases must pass against the lifted + predicates); profile compile + argv→rule routing; `shims test` dry-run golden + output; enable-gate refuses when `shims..enabled` false. +- **e2e** (docker first, then one cloud per "fix across all providers"): the Part-5 + flow, asserting reads skip the prompt, writes gate via `askPrompt`, `deny`/secret + ops refuse, argv-inject/host-state guards fire, and `printenv` in the box shows + only `AGENTBOX_RELAY_TOKEN`. Ground-truth every write (don't trust exit codes). + +## Out of scope / follow-ups +- Migrating the four built-in connectors onto profiles (parity proof in phase 5 is + optional; full migration is a later cleanup — no deprecation churn now). +- Per-rule rate limits / audit log of proxied calls. +- A shared community profile registry (`agentbox shims add --from `) — + note the trust implications; out of scope for v1. + diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index 761d0923..dfbc7f84 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -39,6 +39,23 @@ export type { ResolvedCarryEntry, ResyncResult, } from './provider.js'; +export { + applyReplacements, + substitutePlaceholders, + placeholderContextFromEnv, + parseReplaceRule, + parseReplaceRules, + parseReplacements, + resolveRuleRefs, + parseRuleArg, + PLACEHOLDER_KEYS, + ReplaceError, +} from './replace.js'; +export type { + ReplaceRule, + ApplyReplacementsOptions, + PlaceholderKey, +} from './replace.js'; export type { CloudBackend, CloudExecOptions, diff --git a/packages/core/src/provider.ts b/packages/core/src/provider.ts index abd6a8b7..856776c2 100644 --- a/packages/core/src/provider.ts +++ b/packages/core/src/provider.ts @@ -8,6 +8,7 @@ import type { BoxRecord, ProviderName } from './box-record.js'; import type { BoxEndpoints } from './endpoints.js'; +import type { ReplaceRule } from './replace.js'; import type { BoxResourceStats } from './types.js'; /** Coarse lifecycle state, identical across providers. */ @@ -50,6 +51,17 @@ export interface ResolvedCarryEntry { * along. Set by the host resolver; ignored for `file`/`missing` entries. */ exclude?: string[]; + /** + * Substitute `{{AGENTBOX_*}}` whitelist placeholders in the file content + * before copying (host-side). File entries only. + */ + replaceEnvs?: boolean; + /** + * Custom replacement rules applied (in order) to the file content before + * copying. Named `replacements:` rule-sets are already expanded into this + * list by the host resolver. File entries only. + */ + replace?: ReplaceRule[]; } export interface CreateBoxRequest { diff --git a/packages/core/src/replace.ts b/packages/core/src/replace.ts new file mode 100644 index 00000000..db7c7d9c --- /dev/null +++ b/packages/core/src/replace.ts @@ -0,0 +1,226 @@ +/** + * Provider-neutral text-replacement engine. Pure (no fs / no yaml) so it can be + * shared by the host carry path (`renderCarryEntries`) and the in-box + * `agentbox-ctl render` CLI without a dependency cycle. The yaml/fs loaders that + * read a `replacements:` block live in `@agentbox/ctl` (which has those deps). + */ + +/** + * The fixed set of `{{NAME}}` placeholders that `replaceEnvs` / `--env` + * substitution recognizes. Deliberately a whitelist (not "any env var") so a + * rendered file is predictable: a stray `{{FOO}}` is left untouched rather than + * silently clobbered, and secrets/tokens are never substitutable. + */ +export const PLACEHOLDER_KEYS = [ + 'AGENTBOX_BOX_NAME', + 'AGENTBOX_BOX_ID', + 'AGENTBOX_BOX_KIND', + 'AGENTBOX_HOST_WORKSPACE', + 'AGENTBOX_PROJECT_ROOT', + // Convenience: the portless host this box publishes (`.localhost`). + // Derived from AGENTBOX_BOX_NAME when not set explicitly. + 'AGENTBOX_BOX_HOST', +] as const; + +export type PlaceholderKey = (typeof PLACEHOLDER_KEYS)[number]; + +const PLACEHOLDER_SET = new Set(PLACEHOLDER_KEYS); + +/** A single regex/literal substitution. `to` may itself contain placeholders. */ +export interface ReplaceRule { + from: string; + to: string; + /** Treat `from` as a JS regex (with `flags`); otherwise a literal string. */ + regex?: boolean; + /** Regex flags (default `g`). Ignored unless `regex` is true. */ + flags?: string; +} + +export class ReplaceError extends Error { + constructor(message: string) { + super(message); + this.name = 'ReplaceError'; + } +} + +const PLACEHOLDER_RE = /\{\{\s*([A-Z0-9_]+)\s*\}\}/g; + +/** + * Replace `{{NAME}}` placeholders whose NAME is in {@link PLACEHOLDER_KEYS} + * with the matching value from `context`. Unknown names (not whitelisted) are + * left as-is. Whitelisted names with no value in `context` are also left as-is + * (and reported via `onWarn`). + */ +export function substitutePlaceholders( + text: string, + context: Record, + onWarn?: (msg: string) => void, +): string { + return text.replace(PLACEHOLDER_RE, (match, name: string) => { + if (!PLACEHOLDER_SET.has(name)) return match; + const value = context[name]; + if (value === undefined) { + onWarn?.(`placeholder {{${name}}} has no value in this context — left untouched`); + return match; + } + return value; + }); +} + +export interface ApplyReplacementsOptions { + /** When true, substitute `{{NAME}}` whitelist placeholders across the file. */ + env?: boolean; + /** Ordered custom rules applied after (or instead of) placeholder env subst. */ + rules?: ReplaceRule[]; + /** Placeholder values (used by both `env` subst and rule `to` strings). */ + context: Record; + onWarn?: (msg: string) => void; +} + +/** + * Apply env-placeholder substitution and/or custom rules to file content. + * Rules run in declaration order; each rule's `to` string is itself run through + * placeholder substitution so `to: '.{{AGENTBOX_BOX_NAME}}.localhost'` works + * regardless of `env`. + */ +export function applyReplacements(content: string, opts: ApplyReplacementsOptions): string { + let out = content; + if (opts.env) { + out = substitutePlaceholders(out, opts.context, opts.onWarn); + } + for (const rule of opts.rules ?? []) { + const to = substitutePlaceholders(rule.to, opts.context, opts.onWarn); + if (rule.regex) { + let re: RegExp; + try { + re = new RegExp(rule.from, rule.flags ?? 'g'); + } catch (err) { + throw new ReplaceError( + `invalid regex "${rule.from}": ${err instanceof Error ? err.message : String(err)}`, + ); + } + out = out.replace(re, to); + } else { + // Literal: split/join so `$`/special chars in either side stay literal. + out = out.split(rule.from).join(to); + } + } + return out; +} + +/** Build the whitelist placeholder context from a process environment. */ +export function placeholderContextFromEnv( + env: NodeJS.ProcessEnv = process.env, +): Record { + const ctx: Record = {}; + for (const key of PLACEHOLDER_KEYS) { + const v = env[key]; + if (typeof v === 'string' && v.length > 0) ctx[key] = v; + } + if (ctx.AGENTBOX_BOX_HOST === undefined && ctx.AGENTBOX_BOX_NAME !== undefined) { + ctx.AGENTBOX_BOX_HOST = `${ctx.AGENTBOX_BOX_NAME}.localhost`; + } + return ctx; +} + +// --- rule parsing (shared by config top-level `replacements:` and the CLI) --- + +function isPlainObject(v: unknown): v is Record { + return typeof v === 'object' && v !== null && !Array.isArray(v); +} + +const RULE_KEYS = new Set(['from', 'to', 'regex', 'flags']); + +/** Parse one rule mapping (from `replacements:` blocks or carry `replace:`). */ +export function parseReplaceRule(raw: unknown, where: string): ReplaceRule { + if (!isPlainObject(raw)) { + throw new ReplaceError(`${where} must be a mapping with at least { from, to }`); + } + for (const key of Object.keys(raw)) { + if (!RULE_KEYS.has(key)) throw new ReplaceError(`${where} has unknown key "${key}"`); + } + if (typeof raw.from !== 'string' || raw.from.length === 0) { + throw new ReplaceError(`${where}.from must be a non-empty string`); + } + if (typeof raw.to !== 'string') { + throw new ReplaceError(`${where}.to must be a string`); + } + const rule: ReplaceRule = { from: raw.from, to: raw.to }; + if (raw.regex !== undefined && raw.regex !== null) { + if (typeof raw.regex !== 'boolean') throw new ReplaceError(`${where}.regex must be a boolean`); + rule.regex = raw.regex; + } + if (raw.flags !== undefined && raw.flags !== null) { + if (typeof raw.flags !== 'string') throw new ReplaceError(`${where}.flags must be a string`); + rule.flags = raw.flags; + } + if (rule.regex) { + try { + new RegExp(rule.from, rule.flags ?? 'g'); + } catch (err) { + throw new ReplaceError( + `${where}.from is not a valid regex: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + return rule; +} + +/** Parse a list of rules (carry `replace:` or a `replacements:` named set). */ +export function parseReplaceRules(raw: unknown, where: string): ReplaceRule[] { + if (raw === undefined || raw === null) return []; + if (!Array.isArray(raw)) throw new ReplaceError(`${where} must be a list of rules`); + return raw.map((r, i) => parseReplaceRule(r, `${where}[${String(i)}]`)); +} + +/** Parse the top-level `replacements:` block: name → rule list. */ +export function parseReplacements(raw: unknown): Record { + if (raw === undefined || raw === null) return {}; + if (!isPlainObject(raw)) { + throw new ReplaceError('replacements must be a mapping of name → rule list'); + } + const out: Record = {}; + for (const [name, rules] of Object.entries(raw)) { + if (!/^[A-Za-z0-9_-]+$/.test(name)) { + throw new ReplaceError(`replacements.${name}: name must match [A-Za-z0-9_-]+`); + } + out[name] = parseReplaceRules(rules, `replacements.${name}`); + } + return out; +} + +/** + * Resolve a list of named rule-set references against a `replacements:` map, + * concatenating their rules in reference order. Throws on an unknown name. + */ +export function resolveRuleRefs( + refs: string[], + replacements: Record, + where: string, +): ReplaceRule[] { + const out: ReplaceRule[] = []; + for (const name of refs) { + const set = replacements[name]; + if (set === undefined) { + const known = Object.keys(replacements); + throw new ReplaceError( + `${where}: unknown replacements rule-set "${name}"` + + (known.length > 0 ? ` (known: ${known.join(', ')})` : ' (none declared)'), + ); + } + out.push(...set); + } + return out; +} + +/** Parse a CLI `--rule 'from=>to'` argument into a rule. `regex` opt-in. */ +export function parseRuleArg(arg: string, regex: boolean): ReplaceRule { + const idx = arg.indexOf('=>'); + if (idx === -1) { + throw new ReplaceError(`--rule "${arg}" must be of the form 'from=>to'`); + } + const from = arg.slice(0, idx); + const to = arg.slice(idx + 2); + if (from.length === 0) throw new ReplaceError(`--rule "${arg}" has an empty 'from'`); + return parseReplaceRule({ from, to, ...(regex ? { regex: true } : {}) }, `--rule "${arg}"`); +} diff --git a/packages/core/test/replace.test.ts b/packages/core/test/replace.test.ts new file mode 100644 index 00000000..1e714394 --- /dev/null +++ b/packages/core/test/replace.test.ts @@ -0,0 +1,134 @@ +import { describe, expect, it } from 'vitest'; +import { + applyReplacements, + parseReplaceRule, + parseReplacements, + parseRuleArg, + placeholderContextFromEnv, + ReplaceError, + resolveRuleRefs, + substitutePlaceholders, +} from '../src/replace.js'; + +const ctx = { + AGENTBOX_BOX_NAME: 'optima-abc123', + AGENTBOX_BOX_HOST: 'optima-abc123.localhost', +}; + +describe('substitutePlaceholders', () => { + it('substitutes whitelisted placeholders', () => { + expect(substitutePlaceholders('https://{{AGENTBOX_BOX_HOST}}/x', ctx)).toBe( + 'https://optima-abc123.localhost/x', + ); + }); + + it('tolerates inner whitespace', () => { + expect(substitutePlaceholders('{{ AGENTBOX_BOX_NAME }}', ctx)).toBe('optima-abc123'); + }); + + it('leaves non-whitelisted placeholders untouched', () => { + expect(substitutePlaceholders('{{NOT_ALLOWED}} {{HOME}}', ctx)).toBe('{{NOT_ALLOWED}} {{HOME}}'); + }); + + it('leaves whitelisted-but-missing values untouched and warns', () => { + const warnings: string[] = []; + const out = substitutePlaceholders('{{AGENTBOX_BOX_ID}}', ctx, (m) => warnings.push(m)); + expect(out).toBe('{{AGENTBOX_BOX_ID}}'); + expect(warnings).toHaveLength(1); + }); +}); + +describe('applyReplacements', () => { + it('applies env substitution when env:true', () => { + expect( + applyReplacements('host={{AGENTBOX_BOX_HOST}}', { env: true, context: ctx }), + ).toBe('host=optima-abc123.localhost'); + }); + + it('does not substitute placeholders when env is false', () => { + expect(applyReplacements('{{AGENTBOX_BOX_HOST}}', { context: ctx })).toBe( + '{{AGENTBOX_BOX_HOST}}', + ); + }); + + it('applies literal rules and substitutes placeholders in the replacement', () => { + const out = applyReplacements('see optima.localhost here', { + context: ctx, + rules: [{ from: 'optima.localhost', to: '{{AGENTBOX_BOX_HOST}}' }], + }); + expect(out).toBe('see optima-abc123.localhost here'); + }); + + it('applies regex rules with capture groups', () => { + const out = applyReplacements('a1 b2', { + context: ctx, + rules: [{ from: '([a-z])(\\d)', to: '$2$1', regex: true }], + }); + expect(out).toBe('1a 2b'); + }); + + it('treats $ literally in literal rules', () => { + const out = applyReplacements('price', { + context: ctx, + rules: [{ from: 'price', to: '$1.00' }], + }); + expect(out).toBe('$1.00'); + }); + + it('throws ReplaceError on an invalid regex rule', () => { + expect(() => + applyReplacements('x', { context: ctx, rules: [{ from: '(', to: 'y', regex: true }] }), + ).toThrow(ReplaceError); + }); +}); + +describe('placeholderContextFromEnv', () => { + it('derives AGENTBOX_BOX_HOST from the box name', () => { + const c = placeholderContextFromEnv({ AGENTBOX_BOX_NAME: 'foo' } as NodeJS.ProcessEnv); + expect(c.AGENTBOX_BOX_HOST).toBe('foo.localhost'); + }); + + it('ignores non-whitelisted env vars', () => { + const c = placeholderContextFromEnv({ + AGENTBOX_BOX_NAME: 'foo', + SECRET: 'nope', + } as NodeJS.ProcessEnv); + expect(c).not.toHaveProperty('SECRET'); + }); +}); + +describe('rule parsing', () => { + it('parses a top-level replacements block', () => { + const r = parseReplacements({ host: [{ from: 'a', to: 'b' }] }); + expect(r.host).toEqual([{ from: 'a', to: 'b' }]); + }); + + it('rejects an unknown rule key', () => { + expect(() => parseReplaceRule({ from: 'a', to: 'b', bogus: 1 }, 'x')).toThrow(ReplaceError); + }); + + it('rejects an invalid regex at parse time', () => { + expect(() => parseReplaceRule({ from: '(', to: 'b', regex: true }, 'x')).toThrow(ReplaceError); + }); + + it('resolveRuleRefs concatenates named sets in order', () => { + const sets = { a: [{ from: '1', to: 'one' }], b: [{ from: '2', to: 'two' }] }; + expect(resolveRuleRefs(['a', 'b'], sets, 'x')).toEqual([ + { from: '1', to: 'one' }, + { from: '2', to: 'two' }, + ]); + }); + + it('resolveRuleRefs throws on an unknown name', () => { + expect(() => resolveRuleRefs(['ghost'], {}, 'carry[0].rules')).toThrow(/unknown/); + }); + + it('parseRuleArg parses from=>to', () => { + expect(parseRuleArg('a=>b', false)).toEqual({ from: 'a', to: 'b' }); + expect(parseRuleArg('a=>b', true)).toEqual({ from: 'a', to: 'b', regex: true }); + }); + + it('parseRuleArg rejects a missing arrow', () => { + expect(() => parseRuleArg('noarrow', false)).toThrow(ReplaceError); + }); +}); diff --git a/packages/ctl/schema/agentbox.schema.json b/packages/ctl/schema/agentbox.schema.json index 6846d01d..2687cbf1 100644 --- a/packages/ctl/schema/agentbox.schema.json +++ b/packages/ctl/schema/agentbox.schema.json @@ -32,7 +32,18 @@ "description": "Host-side AgentBox layered-config defaults for this project. Same shape as ~/.agentbox/config.yaml. The supervisor ignores this; @agentbox/config validates it strictly when loaded. See https://agent-box.sh/schema/user-config.schema.json for the full key set.", "additionalProperties": true }, - "carry": { "$ref": "#/$defs/carry" } + "carry": { "$ref": "#/$defs/carry" }, + "replacements": { + "type": "object", + "description": "Named reusable replacement rule-sets. Reference them by name from `carry[].rules` (host-side) and `agentbox-ctl render --rules` (in-box). Each value is an ordered list of {from,to,regex?} rules; `to` may contain {{AGENTBOX_*}} placeholders.", + "additionalProperties": false, + "patternProperties": { + "^[A-Za-z0-9_-]+$": { + "type": "array", + "items": { "$ref": "#/$defs/replaceRule" } + } + } + } }, "$defs": { "carry": { @@ -83,11 +94,50 @@ "optional": { "type": "boolean", "description": "true skips a missing src silently instead of erroring." + }, + "replaceEnvs": { + "type": "boolean", + "description": "Substitute {{AGENTBOX_*}} whitelist placeholders in the file content host-side before copying. File entries only." + }, + "replace": { + "type": "array", + "description": "Inline replacement rules applied (in order) before copying. File entries only.", + "items": { "$ref": "#/$defs/replaceRule" } + }, + "rules": { + "type": "array", + "description": "Names of top-level `replacements:` rule-sets to apply before copying. File entries only.", + "items": { "type": "string", "minLength": 1 } } } } ] }, + "replaceRule": { + "type": "object", + "additionalProperties": false, + "required": ["from", "to"], + "description": "A single text substitution. `to` may contain {{AGENTBOX_*}} placeholders.", + "properties": { + "from": { + "type": "string", + "minLength": 1, + "description": "Literal substring to replace, or a JS regex source when `regex: true`." + }, + "to": { + "type": "string", + "description": "Replacement text. Supports {{AGENTBOX_*}} placeholders (and $1 backrefs when `regex: true`)." + }, + "regex": { + "type": "boolean", + "description": "Treat `from` as a JS regex (with `flags`). Default false (literal)." + }, + "flags": { + "type": "string", + "description": "Regex flags (default `g`). Ignored unless `regex` is true." + } + } + }, "command": { "description": "Shell string (run via `bash -c`) or argv array. Non-empty.", "oneOf": [ @@ -173,7 +223,25 @@ "command": { "$ref": "#/$defs/command" }, "cwd": { "type": "string" }, "env": { "$ref": "#/$defs/env" }, - "needs": { "$ref": "#/$defs/needs" } + "needs": { "$ref": "#/$defs/needs" }, + "idempotent": { + "description": "Skip the task when already satisfied. `true` stores a marker keyed by a hash of the command (re-runs when the command changes). `{ check: }` runs a probe first and skips when it exits 0 (right for state outside the checkpointed filesystem, e.g. a containerized DB).", + "oneOf": [ + { "type": "boolean" }, + { + "type": "object", + "additionalProperties": false, + "required": ["check"], + "properties": { + "check": { + "type": "string", + "minLength": 1, + "description": "Probe command (run via `bash -c`). Exit 0 = already satisfied, skip the task." + } + } + } + ] + } } }, "backoff": { diff --git a/packages/ctl/src/bin.ts b/packages/ctl/src/bin.ts index 6da36853..5c5f0f4f 100644 --- a/packages/ctl/src/bin.ts +++ b/packages/ctl/src/bin.ts @@ -11,6 +11,7 @@ import { ghCommand } from './commands/gh.js'; import { gitCommand } from './commands/git.js'; import { notifyCommand } from './commands/notify.js'; import { openCommand } from './commands/open.js'; +import { renderCommand } from './commands/render.js'; import { statusCommand } from './commands/status.js'; import { logsCommand } from './commands/logs.js'; import { validateCommand } from './commands/validate.js'; @@ -48,6 +49,7 @@ program.addCommand(gitCommand); program.addCommand(ghCommand); program.addCommand(checkpointCommand); program.addCommand(cpCommand); +program.addCommand(renderCommand); program.addCommand(downloadCommand); program.addCommand(notifyCommand); program.addCommand(openCommand); diff --git a/packages/ctl/src/carry.ts b/packages/ctl/src/carry.ts index e85dd74f..8136d3e2 100644 --- a/packages/ctl/src/carry.ts +++ b/packages/ctl/src/carry.ts @@ -1,5 +1,6 @@ import { readFile } from 'node:fs/promises'; import { parse as parseYaml } from 'yaml'; +import { parseReplaceRules, type ReplaceRule } from './replace.js'; /** * One entry from the host-side `carry:` block in `agentbox.yaml`. @@ -26,6 +27,15 @@ export interface CarryItem { */ exclude?: string[]; optional: boolean; + /** + * Substitute `{{AGENTBOX_*}}` whitelist placeholders in the file content + * host-side before copying. File entries only. + */ + replaceEnvs?: boolean; + /** Inline replacement rules applied (in order) before copying. File only. */ + replace?: ReplaceRule[]; + /** Names of top-level `replacements:` rule-sets to apply. File only. */ + rules?: string[]; } export class CarryConfigError extends Error { @@ -35,7 +45,32 @@ export class CarryConfigError extends Error { } } -const ITEM_KEYS = new Set(['src', 'dest', 'mode', 'user', 'exclude', 'optional']); +const ITEM_KEYS = new Set([ + 'src', + 'dest', + 'mode', + 'user', + 'exclude', + 'optional', + 'replaceEnvs', + 'replace', + 'rules', +]); + +function parseRulesRefs(raw: unknown, where: string): string[] | undefined { + if (raw === undefined || raw === null) return undefined; + if (!Array.isArray(raw)) { + throw new CarryConfigError(`${where}.rules must be a list of replacements rule-set names`); + } + const out: string[] = []; + for (const [i, v] of raw.entries()) { + if (typeof v !== 'string' || v.trim().length === 0) { + throw new CarryConfigError(`${where}.rules[${String(i)}] must be a non-empty string`); + } + out.push(v.trim()); + } + return out.length > 0 ? out : undefined; +} function parseExclude(raw: unknown, where: string): string[] | undefined { if (raw === undefined || raw === null) return undefined; @@ -202,10 +237,31 @@ function parseMapping(raw: Record, where: string): CarryItem { optional = raw.optional; } + let replaceEnvs: boolean | undefined; + if (raw.replaceEnvs !== undefined && raw.replaceEnvs !== null) { + if (typeof raw.replaceEnvs !== 'boolean') { + throw new CarryConfigError(`${where}.replaceEnvs must be a boolean`); + } + replaceEnvs = raw.replaceEnvs; + } + let replace: ReplaceRule[] | undefined; + if (raw.replace !== undefined && raw.replace !== null) { + try { + const rules = parseReplaceRules(raw.replace, `${where}.replace`); + if (rules.length > 0) replace = rules; + } catch (err) { + throw new CarryConfigError(err instanceof Error ? err.message : String(err)); + } + } + const rules = parseRulesRefs(raw.rules, where); + const out: CarryItem = { src, dest, optional }; if (mode !== undefined) out.mode = mode; if (user !== undefined) out.user = user; if (exclude !== undefined) out.exclude = exclude; + if (replaceEnvs !== undefined) out.replaceEnvs = replaceEnvs; + if (replace !== undefined) out.replace = replace; + if (rules !== undefined) out.rules = rules; return out; } diff --git a/packages/ctl/src/commands/daemon.ts b/packages/ctl/src/commands/daemon.ts index 65ad65b6..cd57af9c 100644 --- a/packages/ctl/src/commands/daemon.ts +++ b/packages/ctl/src/commands/daemon.ts @@ -18,6 +18,7 @@ import { DEFAULT_CLAUDE_SESSION_NAME, DEFAULT_CONFIG_PATH, DEFAULT_LOG_DIR, + DEFAULT_STATE_DIR, DEFAULT_SOCKET_PATH, } from '../types.js'; @@ -38,6 +39,7 @@ interface DaemonOptions { socket: string; config: string; logDir: string; + stateDir: string; workspace: string; } @@ -46,13 +48,19 @@ export const daemonCommand = new Command('daemon') .option('--socket ', 'unix socket path', DEFAULT_SOCKET_PATH) .option('--config ', 'path to agentbox.yaml', DEFAULT_CONFIG_PATH) .option('--log-dir ', 'where per-service log files are written', DEFAULT_LOG_DIR) + .option('--state-dir ', 'where idempotent-task markers are written', DEFAULT_STATE_DIR) .option('--workspace ', 'cwd for service processes', '/workspace') .action(async (opts: DaemonOptions) => { const cfg = await loadConfig(opts.config); // Cloud backends that can't expose port 80 (Vercel) set AGENTBOX_WEB_PROXY_PORT // so the WebProxy binds a reachable non-privileged port. Unset → default 80. const webProxyPort = Number(process.env.AGENTBOX_WEB_PROXY_PORT) || undefined; - const sup = new Supervisor({ workspace: opts.workspace, logDir: opts.logDir, webProxyPort }); + const sup = new Supervisor({ + workspace: opts.workspace, + logDir: opts.logDir, + stateDir: opts.stateDir, + webProxyPort, + }); await sup.init(cfg); const reporter = new StatusReporter({ supervisor: sup, diff --git a/packages/ctl/src/commands/render.ts b/packages/ctl/src/commands/render.ts new file mode 100644 index 00000000..24092022 --- /dev/null +++ b/packages/ctl/src/commands/render.ts @@ -0,0 +1,72 @@ +import { readFile, writeFile } from 'node:fs/promises'; +import { Command } from 'commander'; +import { DEFAULT_CONFIG_PATH } from '../types.js'; +import { + applyReplacements, + loadReplacementsSection, + parseRuleArg, + placeholderContextFromEnv, + resolveRuleRefs, + type ReplaceRule, +} from '../replace.js'; + +interface RenderOptions { + out?: string; + inPlace?: boolean; + env?: boolean; + rule: string[]; + ruleRegex: string[]; + rules?: string; + config: string; +} + +function collect(value: string, prev: string[]): string[] { + prev.push(value); + return prev; +} + +export const renderCommand = new Command('render') + .description( + 'Render a file by substituting {{AGENTBOX_*}} placeholders and/or applying ' + + 'replacement rules (a declarative alternative to sed).', + ) + .argument('', 'file to read') + .option('--out ', 'write the result here (default: stdout)') + .option('--in-place', 'overwrite with the result') + .option('--env', 'substitute {{AGENTBOX_*}} whitelist placeholders') + .option('--rule to>', 'literal replacement (repeatable)', collect, []) + .option('--rule-regex repl>', 'regex replacement (repeatable)', collect, []) + .option('--rules ', 'comma-separated replacements: rule-set names to apply') + .option('--config ', 'agentbox.yaml to read replacements: from', DEFAULT_CONFIG_PATH) + .action(async (src: string, opts: RenderOptions) => { + const content = await readFile(src, 'utf8'); + + const rules: ReplaceRule[] = []; + if (opts.rules) { + const refs = opts.rules + .split(',') + .map((s) => s.trim()) + .filter((s) => s.length > 0); + if (refs.length > 0) { + const replacements = await loadReplacementsSection(opts.config); + rules.push(...resolveRuleRefs(refs, replacements, '--rules')); + } + } + for (const arg of opts.rule) rules.push(parseRuleArg(arg, false)); + for (const arg of opts.ruleRegex) rules.push(parseRuleArg(arg, true)); + + const result = applyReplacements(content, { + env: opts.env, + rules, + context: placeholderContextFromEnv(), + onWarn: (msg) => process.stderr.write(`agentbox-ctl render: ${msg}\n`), + }); + + if (opts.inPlace) { + await writeFile(src, result, 'utf8'); + } else if (opts.out) { + await writeFile(opts.out, result, 'utf8'); + } else { + process.stdout.write(result); + } + }); diff --git a/packages/ctl/src/config.ts b/packages/ctl/src/config.ts index 6750af56..3696489c 100644 --- a/packages/ctl/src/config.ts +++ b/packages/ctl/src/config.ts @@ -1,5 +1,6 @@ import { readFile } from 'node:fs/promises'; import { parse as parseYaml } from 'yaml'; +import { parseReplacements, type ReplaceRule } from './replace.js'; export type RestartPolicy = 'always' | 'on-failure' | 'never'; export type ProbeOnTimeout = 'kill' | 'mark_unhealthy'; @@ -46,12 +47,28 @@ export interface ExposeSpec { as: number; } +/** + * Declarative idempotence for a task. The supervisor re-runs every task from + * `pending` on each box start; `idempotent` lets it skip a task that has + * already succeeded. + * + * - `{ kind: 'marker' }` (from `idempotent: true`) — the supervisor stores a + * marker keyed by a hash of the resolved command; a warm boot skips while the + * hash matches, and editing the command re-runs. + * - `{ kind: 'check', command }` (from `idempotent: { check: ... }`) — run the + * probe before launching; exit 0 means already satisfied (skip). No marker: + * the probe is the source of truth (right for data that lives outside the + * checkpointed filesystem, e.g. a containerized DB). + */ +export type TaskIdempotent = { kind: 'marker' } | { kind: 'check'; command: string }; + export interface TaskSpec { name: string; command: string | string[]; cwd?: string; env?: Record; needs: string[]; + idempotent?: TaskIdempotent; } export interface ServiceSpec { @@ -71,6 +88,8 @@ export interface ServiceSpec { export interface CtlConfig { services: ServiceSpec[]; tasks: TaskSpec[]; + /** Named reusable replacement rule-sets (top-level `replacements:` block). */ + replacements: Record; } export const DEFAULT_BACKOFF: BackoffSpec = { @@ -401,7 +420,24 @@ function parseService(name: string, raw: unknown): ServiceSpec { return { name, command, cwd, env, autostart, restart, backoff, needs, readyWhen, expose }; } -const TASK_KEYS = new Set(['command', 'cwd', 'env', 'needs']); +const TASK_KEYS = new Set(['command', 'cwd', 'env', 'needs', 'idempotent']); + +function parseIdempotent(raw: unknown, where: string): TaskIdempotent | undefined { + if (raw === undefined || raw === null || raw === false) return undefined; + if (raw === true) return { kind: 'marker' }; + if (isPlainObject(raw)) { + const keys = Object.keys(raw); + if (keys.length !== 1 || keys[0] !== 'check') { + throw new ConfigError(`${where}.idempotent object form must be exactly { check: }`); + } + const check = raw.check; + if (typeof check !== 'string' || check.trim().length === 0) { + throw new ConfigError(`${where}.idempotent.check must be a non-empty command string`); + } + return { kind: 'check', command: check }; + } + throw new ConfigError(`${where}.idempotent must be true or { check: }`); +} function parseTask(name: string, raw: unknown): TaskSpec { const where = `tasks.${name}`; @@ -413,7 +449,10 @@ function parseTask(name: string, raw: unknown): TaskSpec { const cwd = raw.cwd === undefined ? undefined : assertString(raw.cwd, `${where}.cwd`); const env = parseEnv(raw.env, where); const needs = parseNeeds(raw.needs, `${where}.needs`); - return { name, command, cwd, env, needs }; + const idempotent = parseIdempotent(raw.idempotent, where); + const spec: TaskSpec = { name, command, cwd, env, needs }; + if (idempotent !== undefined) spec.idempotent = idempotent; + return spec; } function assertString(raw: unknown, where: string): string { @@ -434,7 +473,10 @@ function assertBool(raw: unknown, where: string): boolean { // layer via @agentbox/ctl/carry, applied at create time). The supervisor never // reads it — listing it here only suppresses the unknown-key error so a project // yaml that declares `carry:` still parses cleanly inside the box. -const TOP_LEVEL_KEYS = new Set(['services', 'tasks', 'ide', 'defaults', 'carry']); +// `replacements` is the top-level reusable replacement-rule block, consumed by +// the in-box `agentbox-ctl render` CLI and host-side `carry:` rule refs. We +// parse + validate it here (regex compile-check) so a typo fails loud in-box. +const TOP_LEVEL_KEYS = new Set(['services', 'tasks', 'ide', 'defaults', 'carry', 'replacements']); function validateUnitGraph(tasks: TaskSpec[], services: ServiceSpec[]): void { const names = new Set(); @@ -500,7 +542,7 @@ export function parseConfig(text: string): CtlConfig { } catch (err) { throw new ConfigError(`yaml parse error: ${err instanceof Error ? err.message : String(err)}`); } - if (doc === null || doc === undefined) return { services: [], tasks: [] }; + if (doc === null || doc === undefined) return { services: [], tasks: [], replacements: {} }; if (!isPlainObject(doc)) { throw new ConfigError('top-level config must be a mapping'); } @@ -557,7 +599,14 @@ export function parseConfig(text: string): CtlConfig { ); } - return { services, tasks }; + let replacements: Record; + try { + replacements = parseReplacements(doc.replacements); + } catch (err) { + throw new ConfigError(err instanceof Error ? err.message : String(err)); + } + + return { services, tasks, replacements }; } export async function loadConfig(path: string): Promise { @@ -566,7 +615,7 @@ export async function loadConfig(path: string): Promise { text = await readFile(path, 'utf8'); } catch (err) { if ((err as NodeJS.ErrnoException).code === 'ENOENT') { - return { services: [], tasks: [] }; + return { services: [], tasks: [], replacements: {} }; } throw err; } diff --git a/packages/ctl/src/index.ts b/packages/ctl/src/index.ts index 8447a2ac..de157bc2 100644 --- a/packages/ctl/src/index.ts +++ b/packages/ctl/src/index.ts @@ -30,6 +30,7 @@ export { DEFAULT_CLAUDE_SESSION_NAME, DEFAULT_CONFIG_PATH, DEFAULT_LOG_DIR, + DEFAULT_STATE_DIR, DEFAULT_SOCKET_PATH, } from './types.js'; export { @@ -67,6 +68,7 @@ export { type RestartPolicy, type ServiceSpec, type TaskSpec, + type TaskIdempotent, } from './config.js'; export { parseCarryRaw, @@ -75,3 +77,20 @@ export { CarryConfigError, type CarryItem, } from './carry.js'; +export { + applyReplacements, + substitutePlaceholders, + placeholderContextFromEnv, + parseReplaceRule, + parseReplaceRules, + parseReplacements, + parseReplacementsSection, + loadReplacementsSection, + resolveRuleRefs, + parseRuleArg, + PLACEHOLDER_KEYS, + ReplaceError, + type ReplaceRule, + type ApplyReplacementsOptions, + type PlaceholderKey, +} from './replace.js'; diff --git a/packages/ctl/src/replace.ts b/packages/ctl/src/replace.ts new file mode 100644 index 00000000..21a00548 --- /dev/null +++ b/packages/ctl/src/replace.ts @@ -0,0 +1,51 @@ +import { readFile } from 'node:fs/promises'; +import { parse as parseYaml } from 'yaml'; +import { parseReplacements, ReplaceError, type ReplaceRule } from '@agentbox/core'; + +// Re-export the pure engine (defined in @agentbox/core so the host carry path +// can share it without a dependency cycle) so in-box code keeps importing from +// a single `./replace.js` surface. +export { + applyReplacements, + substitutePlaceholders, + placeholderContextFromEnv, + parseReplaceRule, + parseReplaceRules, + parseReplacements, + resolveRuleRefs, + parseRuleArg, + PLACEHOLDER_KEYS, + ReplaceError, +} from '@agentbox/core'; +export type { ReplaceRule, ApplyReplacementsOptions, PlaceholderKey } from '@agentbox/core'; + +function isPlainObject(v: unknown): v is Record { + return typeof v === 'object' && v !== null && !Array.isArray(v); +} + +/** Parse the top-level `replacements:` block out of raw agentbox.yaml text. */ +export function parseReplacementsSection(text: string): Record { + let doc: unknown; + try { + doc = parseYaml(text); + } catch (err) { + throw new ReplaceError(`yaml parse error: ${err instanceof Error ? err.message : String(err)}`); + } + if (doc === null || doc === undefined) return {}; + if (!isPlainObject(doc)) throw new ReplaceError('top-level config must be a mapping'); + return parseReplacements(doc.replacements); +} + +/** Load the `replacements:` block from an agentbox.yaml path (missing → {}). */ +export async function loadReplacementsSection( + path: string, +): Promise> { + let text: string; + try { + text = await readFile(path, 'utf8'); + } catch (err) { + if ((err as NodeJS.ErrnoException).code === 'ENOENT') return {}; + throw err; + } + return parseReplacementsSection(text); +} diff --git a/packages/ctl/src/supervisor.ts b/packages/ctl/src/supervisor.ts index c872b7b2..70e33d67 100644 --- a/packages/ctl/src/supervisor.ts +++ b/packages/ctl/src/supervisor.ts @@ -1,7 +1,8 @@ import { execFileSync, spawn, type ChildProcess } from 'node:child_process'; +import { createHash } from 'node:crypto'; import { EventEmitter } from 'node:events'; import { createWriteStream, type WriteStream } from 'node:fs'; -import { mkdir, readFile } from 'node:fs/promises'; +import { mkdir, readFile, writeFile } from 'node:fs/promises'; import { join } from 'node:path'; import { describeCommand, @@ -10,6 +11,7 @@ import { type ServiceSpec, type TaskSpec, } from './config.js'; +import { DEFAULT_STATE_DIR } from './types.js'; import { startProbe, type ProbeHandle } from './probe.js'; import { RelayClient } from './relay-client.js'; import { WebProxy } from './web-proxy.js'; @@ -41,6 +43,8 @@ class Ring { export interface RunnerOptions { logDir: string; cwd: string; + /** Directory for idempotent-task completion markers. */ + stateDir: string; spawn?: typeof spawn; setTimer?: (fn: () => void, ms: number) => NodeJS.Timeout; clearTimer?: (h: NodeJS.Timeout) => void; @@ -374,6 +378,11 @@ export class TaskRunner extends EventEmitter implements Unit { private logStream: WriteStream | null = null; private readonly ring = new Ring(RING_SIZE); private readonly spawnFn: typeof spawn; + // True while the async idempotency gate is evaluating, so the scheduler won't + // launch the task twice before its first state transition lands. + private evaluating = false; + // Set by a forced re-run (run-task --force) to bypass the idempotency gate. + private forceNext = false; constructor( public readonly spec: TaskSpec, @@ -417,15 +426,18 @@ export class TaskRunner extends EventEmitter implements Unit { start(): void { if (this.state !== 'pending' && this.state !== 'waiting') return; - this.launch(); + if (this.evaluating) return; + void this.launch(); } /** * Force the task back to pending so the scheduler can re-run it. Used by - * reload when the spec changed, and (PR 5) by the run-task wire op. + * reload when the spec changed, and by the run-task wire op. `force` bypasses + * the idempotency gate on the next launch (run-task --force). */ - resetForRerun(): void { - if (this.state === 'running') return; + resetForRerun(force = false): void { + if (this.state === 'running' || this.evaluating) return; + this.forceNext = force; this.state = 'pending'; this.startedAt = null; this.finishedAt = null; @@ -439,19 +451,114 @@ export class TaskRunner extends EventEmitter implements Unit { this.emit('state', next); } - private launch(): void { + private ensureLogStream(): void { + if (this.logStream) return; + this.logStream = createWriteStream(join(this.opts.logDir, `${this.spec.name}.log`), { + flags: 'a', + }); + this.logStream.on('error', (err) => { + this.appendEvent('stderr', `[ctl] log write error: ${err.message}`); + }); + } + + /** SHA-256 of the resolved command + cwd + env — invalidates on any change. */ + private commandHash(cwd: string): string { + const payload = JSON.stringify({ command: this.spec.command, cwd, env: this.spec.env ?? null }); + return createHash('sha256').update(payload).digest('hex'); + } + + private markerPath(): string { + return join(this.opts.stateDir, 'tasks', this.spec.name); + } + + /** Run the `idempotent.check` probe. Resolves true when it exits 0. */ + private runCheck(command: string, cwd: string): Promise { + return new Promise((resolve) => { + let child: ChildProcess; + try { + child = this.spawnFn('bash', ['-c', command], { + cwd, + env: { ...process.env, PATH: loginShellPath(), ...(this.spec.env ?? {}) }, + stdio: ['ignore', 'pipe', 'pipe'], + }); + } catch (err) { + this.appendEvent( + 'stderr', + `[ctl] idempotent check spawn failed: ${err instanceof Error ? err.message : String(err)}`, + ); + resolve(false); + return; + } + const onLine = (stream: 'stdout' | 'stderr') => (chunk: Buffer) => { + for (const line of chunk.toString('utf8').split(/\r?\n/)) { + if (line.length > 0) this.appendEvent(stream, `[check] ${line}`); + } + }; + child.stdout?.on('data', onLine('stdout')); + child.stderr?.on('data', onLine('stderr')); + child.on('error', () => resolve(false)); + child.on('exit', (code) => resolve(code === 0)); + }); + } + + /** Returns a human reason if the task is already satisfied (skip), else null. */ + private async idempotentSkipReason(cwd: string): Promise { + const idem = this.spec.idempotent; + if (!idem) return null; + if (idem.kind === 'check') { + return (await this.runCheck(idem.command, cwd)) ? 'check passed' : null; + } + try { + const have = (await readFile(this.markerPath(), 'utf8')).trim(); + if (have === this.commandHash(cwd)) return 'marker matches'; + } catch { + // missing/unreadable marker → run the task + } + return null; + } + + private async writeMarker(cwd: string): Promise { + const marker = this.markerPath(); + try { + await mkdir(join(this.opts.stateDir, 'tasks'), { recursive: true }); + await writeFile(marker, `${this.commandHash(cwd)}\n`, 'utf8'); + } catch (err) { + this.appendEvent( + 'stderr', + `[ctl] could not write idempotent marker: ${err instanceof Error ? err.message : String(err)}`, + ); + } + } + + private async launch(): Promise { const spec = this.spec; const cwd = resolveCwd(spec.cwd, this.opts.cwd); - - if (!this.logStream) { - this.logStream = createWriteStream(join(this.opts.logDir, `${spec.name}.log`), { - flags: 'a', - }); - this.logStream.on('error', (err) => { - this.appendEvent('stderr', `[ctl] log write error: ${err.message}`); - }); + const force = this.forceNext; + this.forceNext = false; + + // Idempotency gate: skip the command entirely if already satisfied. `force` + // (run-task --force) bypasses it. Note: for non-idempotent tasks there is no + // await here, so launch stays synchronous through to `setState('running')`. + if (spec.idempotent && !force) { + this.evaluating = true; + try { + const reason = await this.idempotentSkipReason(cwd); + if (reason) { + this.ensureLogStream(); + this.appendEvent('stdout', `[ctl] idempotent: ${reason} — skip`); + this.startedAt = new Date(); + this.finishedAt = new Date(); + this.lastExitCode = 0; + this.setState('done'); + return; + } + } finally { + this.evaluating = false; + } } + this.ensureLogStream(); + const { bin, args } = spawnArgs(spec.command); let child: ChildProcess; @@ -491,6 +598,9 @@ export class TaskRunner extends EventEmitter implements Unit { this.finishedAt = new Date(); this.child = null; this.appendEvent('stderr', `[ctl] exited code=${String(code)} signal=${signal ?? 'none'}`); + if (code === 0 && spec.idempotent?.kind === 'marker') { + void this.writeMarker(cwd); + } this.setState(code === 0 ? 'done' : 'failed'); }); child.on('error', (err) => { @@ -514,6 +624,8 @@ export class TaskRunner extends EventEmitter implements Unit { export interface SupervisorOptions { workspace: string; logDir: string; + /** Directory for idempotent-task markers (default {@link DEFAULT_STATE_DIR}). */ + stateDir?: string; spawn?: typeof spawn; /** * Port the in-box WebProxy binds (forwarding to the `expose:` service). @@ -636,6 +748,7 @@ export class Supervisor extends EventEmitter { const runner = new ServiceRunner(spec, { logDir: this.opts.logDir, cwd: this.opts.workspace, + stateDir: this.opts.stateDir ?? DEFAULT_STATE_DIR, spawn: this.opts.spawn, }); runner.on('log', (ev) => this.emit('log', ev)); @@ -649,6 +762,7 @@ export class Supervisor extends EventEmitter { const runner = new TaskRunner(spec, { logDir: this.opts.logDir, cwd: this.opts.workspace, + stateDir: this.opts.stateDir ?? DEFAULT_STATE_DIR, spawn: this.opts.spawn, }); runner.on('log', (ev) => this.emit('log', ev)); @@ -721,7 +835,7 @@ export class Supervisor extends EventEmitter { if (!t) throw new Error(`unknown task: ${name}`); if (t.getState() === 'done' && !force) return t.getStatus(); if (t.getState() === 'running') return t.getStatus(); - t.resetForRerun(); + t.resetForRerun(force); this.schedule(); return t.getStatus(); } @@ -993,6 +1107,7 @@ function normalizeTask(t: TaskSpec): unknown { cwd: t.cwd ?? null, env: t.env ?? null, needs: [...t.needs].sort(), + idempotent: t.idempotent ?? null, }; } diff --git a/packages/ctl/src/types.ts b/packages/ctl/src/types.ts index d41a2cb6..7b82c933 100644 --- a/packages/ctl/src/types.ts +++ b/packages/ctl/src/types.ts @@ -307,6 +307,10 @@ export interface ClaudeSessionStatus { export const DEFAULT_SOCKET_PATH = '/run/agentbox/ctl.sock'; export const DEFAULT_CONFIG_PATH = '/workspace/agentbox.yaml'; export const DEFAULT_LOG_DIR = '/var/log/agentbox'; +// Where idempotent-task completion markers live. On the box rootfs (survives +// pause/stop/start and is captured by `docker commit` checkpoints) but NOT under +// /workspace, so markers never show up as untracked git changes. +export const DEFAULT_STATE_DIR = '/var/lib/agentbox'; export const DEFAULT_CLAUDE_SESSION_NAME = 'claude'; export const DEFAULT_CODEX_SESSION_NAME = 'codex'; export const DEFAULT_OPENCODE_SESSION_NAME = 'opencode'; diff --git a/packages/ctl/test/carry.test.ts b/packages/ctl/test/carry.test.ts index 59c593c0..3e260c0e 100644 --- a/packages/ctl/test/carry.test.ts +++ b/packages/ctl/test/carry.test.ts @@ -157,6 +157,41 @@ describe('parseCarryRaw', () => { }); }); +describe('carry replace options', () => { + it('parses replaceEnvs + inline replace + rules refs', () => { + const [item] = parseCarryRaw([ + { + src: '~/secrets/.env.prod', + dest: '/workspace/apps/saas/.env', + replaceEnvs: true, + rules: ['box-host'], + replace: [{ from: 'optima.localhost', to: '{{AGENTBOX_BOX_HOST}}' }], + }, + ]); + expect(item?.replaceEnvs).toBe(true); + expect(item?.rules).toEqual(['box-host']); + expect(item?.replace).toEqual([{ from: 'optima.localhost', to: '{{AGENTBOX_BOX_HOST}}' }]); + }); + + it('rejects replaceEnvs of the wrong type', () => { + expect(() => parseCarryRaw([{ src: './a', dest: '~/a', replaceEnvs: 'yes' }])).toThrow( + CarryConfigError, + ); + }); + + it('rejects a replace rule missing to', () => { + expect(() => parseCarryRaw([{ src: './a', dest: '~/a', replace: [{ from: 'x' }] }])).toThrow( + CarryConfigError, + ); + }); + + it('rejects an invalid regex in a replace rule', () => { + expect(() => + parseCarryRaw([{ src: './a', dest: '~/a', replace: [{ from: '(', to: 'y', regex: true }] }]), + ).toThrow(CarryConfigError); + }); +}); + describe('config schema drift', () => { // Ensures `carry` is tolerated as a top-level key in the supervisor's // parseConfig — the supervisor MUST not reject yaml that declares it, diff --git a/packages/ctl/test/config.test.ts b/packages/ctl/test/config.test.ts index 4f9e772f..34969aaf 100644 --- a/packages/ctl/test/config.test.ts +++ b/packages/ctl/test/config.test.ts @@ -3,8 +3,8 @@ import { ConfigError, parseConfig } from '../src/config.js'; describe('parseConfig', () => { it('returns empty services for empty or absent doc', () => { - expect(parseConfig('')).toEqual({ services: [], tasks: [] }); - expect(parseConfig('services: {}')).toEqual({ services: [], tasks: [] }); + expect(parseConfig('')).toEqual({ services: [], tasks: [], replacements: {} }); + expect(parseConfig('services: {}')).toEqual({ services: [], tasks: [], replacements: {} }); }); it('parses a minimal service with shell-string command', () => { diff --git a/packages/ctl/test/schema-drift.test.ts b/packages/ctl/test/schema-drift.test.ts index 9aba54c5..6bb0eb48 100644 --- a/packages/ctl/test/schema-drift.test.ts +++ b/packages/ctl/test/schema-drift.test.ts @@ -221,6 +221,50 @@ carry: dest: ~/.agentbox/secrets.env mode: "0600" user: 1000 +`, + }, + { + name: 'task with idempotent: true', + yaml: `tasks:\n install:\n command: pnpm install\n idempotent: true\n`, + }, + { + name: 'task with idempotent check', + yaml: ` +tasks: + seed: + command: pnpm db:seed + idempotent: + check: "psql -tAc 'select 1' | grep -q 1" +`, + }, + { + name: 'top-level replacements block', + yaml: ` +replacements: + box-host: + - from: '\\.optima\\.localhost' + to: '.{{AGENTBOX_BOX_NAME}}.localhost' + regex: true +services: + web: + command: pnpm dev +`, + }, + { + name: 'carry mapping with replaceEnvs + replace + rules', + yaml: ` +replacements: + box-host: + - from: optima.localhost + to: '{{AGENTBOX_BOX_HOST}}' +carry: + - src: ~/secrets/.env.prod + dest: /workspace/apps/saas/.env + replaceEnvs: true + rules: [box-host] + replace: + - from: PLACEHOLDER + to: '{{AGENTBOX_BOX_NAME}}' `, }, ]; @@ -538,6 +582,37 @@ services: yaml: `carry: 42\n`, schemaOnly: true, }, + { + name: 'idempotent as a string', + yaml: `tasks:\n build:\n command: pnpm build\n idempotent: "yes"\n`, + }, + { + name: 'idempotent object with unknown key', + yaml: `tasks:\n build:\n command: pnpm build\n idempotent:\n probe: foo\n`, + }, + { + name: 'replacements rule missing to', + yaml: `replacements:\n r:\n - from: a\n`, + }, + { + name: 'replacements rule unknown key', + yaml: `replacements:\n r:\n - from: a\n to: b\n bogus: 1\n`, + }, + { + name: 'replacements invalid regex (validator-only)', + yaml: `replacements:\n r:\n - from: "(unclosed"\n to: b\n regex: true\n`, + runtimeOnly: true, + }, + { + name: 'carry replace rule missing to (schema-only)', + yaml: `carry:\n - src: ./a\n dest: ~/a\n replace:\n - from: x\n`, + schemaOnly: true, + }, + { + name: 'carry replaceEnvs wrong type (schema-only)', + yaml: `carry:\n - src: ./a\n dest: ~/a\n replaceEnvs: "yes"\n`, + schemaOnly: true, + }, ]; function runtimeAccepts(yaml: string): boolean { diff --git a/packages/ctl/test/socket.test.ts b/packages/ctl/test/socket.test.ts index a627e0ba..3a101372 100644 --- a/packages/ctl/test/socket.test.ts +++ b/packages/ctl/test/socket.test.ts @@ -35,6 +35,7 @@ describe('socket protocol', () => { sup = new Supervisor({ workspace: dir, logDir: dir }); await sup.init({ tasks: [], + replacements: {}, services: [ spec({ name: 'svc', diff --git a/packages/ctl/test/supervisor-idempotent.test.ts b/packages/ctl/test/supervisor-idempotent.test.ts new file mode 100644 index 00000000..6ae93d2a --- /dev/null +++ b/packages/ctl/test/supervisor-idempotent.test.ts @@ -0,0 +1,128 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { Supervisor } from '../src/supervisor.js'; +import type { CtlConfig, TaskSpec } from '../src/config.js'; + +function taskCfg(task: TaskSpec): CtlConfig { + return { services: [], tasks: [task], replacements: {} }; +} + +async function waitForTaskDone(sup: Supervisor, name: string, timeoutMs = 3000): Promise { + const start = Date.now(); + while (Date.now() - start < timeoutMs) { + const t = sup.listTasks().find((x) => x.name === name); + if (t && (t.state === 'done' || t.state === 'failed')) return; + await new Promise((r) => setTimeout(r, 10)); + } + throw new Error(`task ${name} did not finish`); +} + +function lineCount(path: string): number { + if (!existsSync(path)) return 0; + return readFileSync(path, 'utf8').split('\n').filter((l) => l.length > 0).length; +} + +describe('idempotent tasks', () => { + let dir: string; + let stateDir: string; + + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'ctl-idem-')); + stateDir = join(dir, 'state'); + }); + + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + const mk = () => new Supervisor({ workspace: dir, logDir: dir, stateDir }); + + it('marker form: skips on a warm boot, leaving the command unrun', async () => { + const ran = join(dir, 'ran'); + const task = { name: 't', command: `: > '${ran}'`, needs: [], idempotent: { kind: 'marker' } as const }; + + const sup1 = mk(); + await sup1.init(taskCfg(task)); + await waitForTaskDone(sup1, 't'); + expect(existsSync(ran)).toBe(true); + await sup1.stopAll(); + + // Remove the side-effect file; a second boot must NOT recreate it (skipped). + await rm(ran); + const sup2 = mk(); + await sup2.init(taskCfg(task)); + await waitForTaskDone(sup2, 't'); + expect(existsSync(ran)).toBe(false); + await sup2.stopAll(); + }); + + it('marker form: re-runs when the command changes', async () => { + const sup1 = mk(); + await sup1.init( + taskCfg({ name: 't', command: 'true', needs: [], idempotent: { kind: 'marker' } }), + ); + await waitForTaskDone(sup1, 't'); + await sup1.stopAll(); + + const ran2 = join(dir, 'ran2'); + const sup2 = mk(); + await sup2.init( + taskCfg({ name: 't', command: `: > '${ran2}'`, needs: [], idempotent: { kind: 'marker' } }), + ); + await waitForTaskDone(sup2, 't'); + expect(existsSync(ran2)).toBe(true); // changed command invalidated the marker + await sup2.stopAll(); + }); + + it('check form: skips when the probe exits 0, runs when it fails', async () => { + const satisfied = join(dir, 'satisfied'); + const runs = join(dir, 'runs'); + const task = { + name: 't', + command: `echo x >> '${runs}'`, + needs: [], + idempotent: { kind: 'check', command: `test -f '${satisfied}'` } as const, + }; + + // Probe fails (no satisfied file) → task runs. + const sup1 = mk(); + await sup1.init(taskCfg(task)); + await waitForTaskDone(sup1, 't'); + expect(lineCount(runs)).toBe(1); + await sup1.stopAll(); + + // Probe now passes → task skips (runs file unchanged). No marker is written. + await import('node:fs/promises').then((fs) => fs.writeFile(satisfied, '')); + expect(existsSync(join(stateDir, 'tasks', 't'))).toBe(false); + const sup2 = mk(); + await sup2.init(taskCfg(task)); + await waitForTaskDone(sup2, 't'); + expect(lineCount(runs)).toBe(1); + await sup2.stopAll(); + }); + + it('run-task --force bypasses the marker and re-runs', async () => { + const runs = join(dir, 'runs'); + const task = { + name: 't', + command: `echo x >> '${runs}'`, + needs: [], + idempotent: { kind: 'marker' } as const, + }; + const sup = mk(); + await sup.init(taskCfg(task)); + await waitForTaskDone(sup, 't'); + expect(lineCount(runs)).toBe(1); + + await sup.runTask('t', true); + const start = Date.now(); + while (lineCount(runs) < 2 && Date.now() - start < 3000) { + await new Promise((r) => setTimeout(r, 10)); + } + expect(lineCount(runs)).toBe(2); + await sup.stopAll(); + }); +}); diff --git a/packages/ctl/test/supervisor.test.ts b/packages/ctl/test/supervisor.test.ts index f99fe933..cf598cf3 100644 --- a/packages/ctl/test/supervisor.test.ts +++ b/packages/ctl/test/supervisor.test.ts @@ -24,7 +24,7 @@ function taskSpec(over: Partial & { name: string; command: string | st } function cfg(services: ServiceSpec[], tasks: TaskSpec[] = []): CtlConfig { - return { services, tasks }; + return { services, tasks, replacements: {} }; } async function waitFor(fn: () => T | null | undefined, timeoutMs = 2000): Promise { diff --git a/packages/sandbox-cloud/src/cloud-provider.ts b/packages/sandbox-cloud/src/cloud-provider.ts index 2b693029..508465a8 100644 --- a/packages/sandbox-cloud/src/cloud-provider.ts +++ b/packages/sandbox-cloud/src/cloud-provider.ts @@ -36,6 +36,7 @@ import { readState, recordBox, removeBoxRecord, + renderCarryEntries, } from '@agentbox/sandbox-core'; import { buildTmuxConfigShellSnippet, @@ -726,10 +727,21 @@ export function createCloudProvider( | undefined; if (req.carry && req.carry.length > 0) { log(`carry: copying ${String(req.carry.length)} host path(s) into the box`); + const entries = await renderCarryEntries( + req.carry, + { + name, + id, + kind: 'cloud', + hostWorkspace: req.workspacePath, + projectRoot: req.projectRoot, + }, + log, + ); const result = await uploadCarryPaths({ backend, handle, - entries: req.carry, + entries, onLog: log, }); log(`carry: copied ${String(result.copied)}/${String(req.carry.length)} entry/entries`); diff --git a/packages/sandbox-core/src/carry-render.ts b/packages/sandbox-core/src/carry-render.ts new file mode 100644 index 00000000..a557847f --- /dev/null +++ b/packages/sandbox-core/src/carry-render.ts @@ -0,0 +1,73 @@ +import { mkdtemp, readFile, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { basename, join } from 'node:path'; +import { applyReplacements, type ResolvedCarryEntry } from '@agentbox/core'; + +/** + * Box facts used to fill `{{AGENTBOX_*}}` placeholders in carried files. The + * placeholder *names* match the in-box `agentbox-ctl render` whitelist; here the + * values come from the create context (the box is named but not yet booted). + */ +export interface CarryBoxContext { + name?: string; + id?: string; + kind?: string; + /** Host workspace path (mirrors in-box AGENTBOX_HOST_WORKSPACE). */ + hostWorkspace?: string; + projectRoot?: string; +} + +/** Build the whitelist placeholder context (and derive AGENTBOX_BOX_HOST). */ +export function carryPlaceholderContext(ctx: CarryBoxContext): Record { + const out: Record = {}; + if (ctx.name) out.AGENTBOX_BOX_NAME = ctx.name; + if (ctx.id) out.AGENTBOX_BOX_ID = ctx.id; + if (ctx.kind) out.AGENTBOX_BOX_KIND = ctx.kind; + if (ctx.hostWorkspace) out.AGENTBOX_HOST_WORKSPACE = ctx.hostWorkspace; + if (ctx.projectRoot) out.AGENTBOX_PROJECT_ROOT = ctx.projectRoot; + if (ctx.name) out.AGENTBOX_BOX_HOST = `${ctx.name}.localhost`; + return out; +} + +/** + * Render carry entries that opt into `replaceEnvs`/`replace`: read each file + * host-side, apply the substitutions, write the result to a temp file, and + * repoint `absSrc` at it so the existing per-provider tar/copy step transfers + * the rendered content. Entries without replace options pass through unchanged. + * Returns a new array (inputs are not mutated). + */ +export async function renderCarryEntries( + entries: ResolvedCarryEntry[], + ctx: CarryBoxContext, + onLog?: (line: string) => void, +): Promise { + const needsRender = entries.some( + (e) => e.kind === 'file' && (e.replaceEnvs || (e.replace && e.replace.length > 0)), + ); + if (!needsRender) return entries; + + const context = carryPlaceholderContext(ctx); + const stage = await mkdtemp(join(tmpdir(), 'agentbox-carry-render-')); + const out: ResolvedCarryEntry[] = []; + for (const [i, entry] of entries.entries()) { + const wants = entry.kind === 'file' && (entry.replaceEnvs || (entry.replace?.length ?? 0) > 0); + if (!wants) { + out.push(entry); + continue; + } + const content = await readFile(entry.absSrc, 'utf8'); + const rendered = applyReplacements(content, { + env: entry.replaceEnvs, + rules: entry.replace, + context, + onWarn: (msg) => onLog?.(`carry: ${entry.rawSrc}: ${msg}`), + }); + const tmp = join(stage, `${String(i)}-${basename(entry.absSrc)}`); + await writeFile(tmp, rendered, 'utf8'); + out.push({ ...entry, absSrc: tmp, bytes: Buffer.byteLength(rendered) }); + onLog?.(`carry: rendered ${entry.rawSrc} (${entry.replaceEnvs ? 'env' : ''}${ + entry.replace?.length ? `${entry.replaceEnvs ? '+' : ''}${String(entry.replace.length)} rule(s)` : '' + })`); + } + return out; +} diff --git a/packages/sandbox-core/src/index.ts b/packages/sandbox-core/src/index.ts index fa072c72..b5158237 100644 --- a/packages/sandbox-core/src/index.ts +++ b/packages/sandbox-core/src/index.ts @@ -19,6 +19,11 @@ export { type DetectedGitRepo, } from './git-detect.js'; export { hostOpenCommand } from './host-open.js'; +export { + carryPlaceholderContext, + renderCarryEntries, + type CarryBoxContext, +} from './carry-render.js'; export { computeContextSha256, DOCKER_CONTEXT_FILE_MAP, diff --git a/packages/sandbox-core/test/carry-render.test.ts b/packages/sandbox-core/test/carry-render.test.ts new file mode 100644 index 00000000..94da910a --- /dev/null +++ b/packages/sandbox-core/test/carry-render.test.ts @@ -0,0 +1,63 @@ +import { mkdtemp, readFile, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { ResolvedCarryEntry } from '@agentbox/core'; +import { carryPlaceholderContext, renderCarryEntries } from '../src/carry-render.js'; + +const BOX = { name: 'optima-abc', id: 'box_abc', kind: 'docker', hostWorkspace: '/w', projectRoot: '/w' }; + +function fileEntry(absSrc: string, over: Partial = {}): ResolvedCarryEntry { + return { + rawSrc: absSrc, + rawDest: 'apps/.env', + absSrc, + absDest: 'apps/.env', + kind: 'file', + optional: false, + ...over, + }; +} + +describe('carryPlaceholderContext', () => { + it('derives AGENTBOX_BOX_HOST from the name', () => { + expect(carryPlaceholderContext(BOX).AGENTBOX_BOX_HOST).toBe('optima-abc.localhost'); + }); +}); + +describe('renderCarryEntries', () => { + let dir: string; + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'carry-render-')); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + it('passes entries through untouched when none opt in', async () => { + const src = join(dir, 'a'); + await writeFile(src, 'plain'); + const entries = [fileEntry(src)]; + const out = await renderCarryEntries(entries, BOX); + expect(out).toBe(entries); // same array reference — no work done + }); + + it('renders env placeholders + rules into a temp file, repointing absSrc', async () => { + const src = join(dir, '.env.prod'); + await writeFile(src, 'URL=https://optima.localhost\nBOX={{AGENTBOX_BOX_NAME}}\n'); + const [out] = await renderCarryEntries( + [ + fileEntry(src, { + replaceEnvs: true, + replace: [{ from: 'optima.localhost', to: '{{AGENTBOX_BOX_HOST}}' }], + }), + ], + BOX, + ); + expect(out?.absSrc).not.toBe(src); // repointed at the rendered temp + const rendered = await readFile(out!.absSrc, 'utf8'); + expect(rendered).toBe('URL=https://optima-abc.localhost\nBOX=optima-abc\n'); + // The original host file is untouched. + expect(await readFile(src, 'utf8')).toContain('optima.localhost'); + }); +}); diff --git a/packages/sandbox-docker/src/create.ts b/packages/sandbox-docker/src/create.ts index d901f95e..92668ace 100644 --- a/packages/sandbox-docker/src/create.ts +++ b/packages/sandbox-docker/src/create.ts @@ -3,6 +3,7 @@ import { homedir } from 'node:os'; import { basename, join, resolve } from 'node:path'; import { execa } from 'execa'; import { ConfigError, loadConfig } from '@agentbox/ctl'; +import { renderCarryEntries } from '@agentbox/sandbox-core'; import { loadEffectiveConfig } from '@agentbox/config'; import { buildClaudeMounts, @@ -1114,9 +1115,14 @@ export async function createBox(opts: CreateBoxOptions): Promise { let carrySummary: BoxRecord['carry'] | undefined; if (opts.carry && opts.carry.length > 0) { log(`carry: copying ${String(opts.carry.length)} host path(s) into the box`); + const entries = await renderCarryEntries( + opts.carry, + { name, id, kind: 'docker', hostWorkspace: workspace, projectRoot: opts.projectRoot }, + log, + ); const result = await copyCarryPathsToBox({ container: containerName, - entries: opts.carry, + entries, onLog: log, }); log(`carry: copied ${String(result.copied)}/${String(opts.carry.length)} entry/entries`); From c3b0d6722243ed280dba9efe5e7cdf74ab48cfc0 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 09:54:29 +0100 Subject: [PATCH 02/12] docs(agentbox.yaml): document idempotent tasks + replacement engine - agentbox-yaml.mdx: idempotent field, replacements section, carry replace fields, placeholder table - in-box-supervisor.md + features.md: implementation notes - agentbox-setup skill: teach idempotent:/render/replaceEnvs over manual markers + sed - agentbox-info skill: one-line pointer to the new declarative fields --- apps/cli/share/agentbox-setup/SKILL.md | 81 +++++++++----- .../share/host-skills/agentbox-info/SKILL.md | 2 +- apps/web/content/docs/agentbox-yaml.mdx | 100 +++++++++++++++--- docs/features.md | 3 +- docs/in-box-supervisor.md | 4 +- 5 files changed, 143 insertions(+), 47 deletions(-) diff --git a/apps/cli/share/agentbox-setup/SKILL.md b/apps/cli/share/agentbox-setup/SKILL.md index d5cd13cc..57b17dc4 100644 --- a/apps/cli/share/agentbox-setup/SKILL.md +++ b/apps/cli/share/agentbox-setup/SKILL.md @@ -46,35 +46,30 @@ Look at `/workspace`: - **Tasks** = one-shot. `pnpm install`, DB migrations, codegen, fixture loaders, install apt packages. Wire dependent services with `needs:` so they wait for the task to finish successfully. - Names: must match `[A-Za-z0-9_-]+`. Task names and service names share a namespace — no collisions. - No cycles in `needs:`. -- **Always generate a dependency-install task** and make it the root of the `needs:` graph (every service that needs deps gets `needs: [install, …]`). Future boxes start from a snapshot of the final filesystem so they won't need this, but updates or moving to a cloud provider might need to rebuild the container from scratch. The filesystem can be then later captured by `agentbox-ctl checkpoint --set-default`. The task must be **idempotent and self-healing**: `agentbox-ctl` re-runs pending tasks on every box stop/start (the daemon dies with the container and is relaunched), so a plain `rm -rf node_modules && install` would wipe + reinstall on every start. Guard the rebuild with a marker file *inside* `node_modules` (the `.agentbox-installed` convention AgentBox uses internally): rebuild only when the marker is absent (fresh box), and be a fast no-op once it exists. Detect the package manager from the lockfile — never hardcode `pnpm`. See the worked example below. +- **Always generate a dependency-install task** and make it the root of the `needs:` graph (every service that needs deps gets `needs: [install, …]`). Future boxes start from a snapshot of the final filesystem so they won't need this, but updates or moving to a cloud provider might need to rebuild the container from scratch. The filesystem can be then later captured by `agentbox-ctl checkpoint --set-default`. The task must be **idempotent**: `agentbox-ctl` re-runs pending tasks on every box stop/start (the daemon dies with the container and is relaunched), so an unguarded install would reinstall on every start. The clean way is the **`idempotent: true`** field — the supervisor stores a marker keyed by a hash of the command and skips warm boots automatically (the marker lives at `/var/lib/agentbox/tasks/`, on the box rootfs, captured by checkpoints, never polluting `/workspace`). Editing the command re-runs it. Detect the package manager from the lockfile — never hardcode `pnpm`. See the worked example below. - **Add a comment to the beginning** of the file to explain what you did and what issues you encountered, so that future run might use this information in case the project evolves and you need to update the agentbox.yaml file. ### Stateful services: data persistence & re-seeding (read this for databases) **A checkpoint does NOT capture docker-in-docker data.** `agentbox checkpoint` is a `docker commit` of the box's writable filesystem (the system + `/workspace`). The in-box `dockerd` keeps its storage in a *separate* per-box volume (`/var/lib/docker`), which is **not** part of that image — it's fresh on every new box and wiped on `agentbox destroy`. So a database or cache you run as a **docker container** (e.g. `docker run … postgres`) starts **empty on every new box** created from a checkpoint (every `agentbox claude` / `agentbox create`), even though `/workspace` and any marker files you wrote were restored. (A DB run as a **native process** with its data dir on the box filesystem — e.g. `postgres -D /var/lib/postgresql/data` — *is* captured by the checkpoint, since it lives in the writable layer.) -**Consequence for migrate/seed tasks of a containerized DB: do not gate them on a filesystem marker.** A marker like `node_modules/.agentbox-installed` is correct for deps (they live in `/workspace`, which the checkpoint captures), but **wrong** for DB data living in a docker volume: the marker is restored from the checkpoint while the DB is empty, so a marker-guarded seed wrongly skips and the app boots against an empty database. Instead, **gate on the actual data** — connect to the DB and check whether a sentinel table/row exists, and seed only when it's missing: +**Consequence for migrate/seed tasks of a containerized DB: do NOT use `idempotent: true` (the marker form).** A command-hash marker is correct for deps (they live in `/workspace`, which the checkpoint captures), but **wrong** for DB data living in a docker volume: the marker is restored from the checkpoint while the DB is empty, so a marker-guarded seed wrongly skips and the app boots against an empty database. Instead use the **`idempotent: { check: }`** form — the probe runs first and the seed runs unless the probe exits 0, and **no marker is written** (the DB is the source of truth). Gate on the actual data: ```yaml seed: - # Re-seed when the DB is empty. The postgres data lives in the in-box - # docker volume, which is NOT captured by `agentbox checkpoint` — so a box - # started from a checkpoint has the workspace warm but an empty DB. We can't - # use a filesystem marker here (it would be restored while the DB is blank); - # instead probe the DB and seed only if the data is absent. Fast no-op once + # Re-seed when the DB is empty. The postgres data lives in the in-box docker + # volume, which is NOT captured by `agentbox checkpoint` — so a box started + # from a checkpoint has the workspace warm but an empty DB. The marker form + # would be restored while the DB is blank and wrongly skip; the `check` probe + # gates on the data itself. Exit 0 = already seeded, skip. Fast no-op once # the data is present. - command: | - set -e - export PGPASSWORD=postgres - # Probe for existing data. If the table is missing the query errors, - # stderr is suppressed, stdout is empty, the grep fails — so we seed. - if psql -h 127.0.0.1 -p 5432 -U postgres -d app -tAc \ - "SELECT EXISTS (SELECT 1 FROM users LIMIT 1)" 2>/dev/null | grep -q t; then - echo "data present — skip seed" - exit 0 - fi - pnpm db:seed + command: pnpm db:seed needs: [install, migrate] + idempotent: + check: | + export PGPASSWORD=postgres + psql -h 127.0.0.1 -p 5432 -U postgres -d app -tAc \ + "SELECT EXISTS (SELECT 1 FROM users LIMIT 1)" 2>/dev/null | grep -q t ``` **Lifecycle nuance (this is why the data check, not a marker, is right):** @@ -148,22 +143,19 @@ tasks: # Idempotent install. /workspace is the container's writable filesystem, so # node_modules persists across pause/stop/start and is captured by # `agentbox checkpoint`. The host's node_modules is macOS-native and is - # never copied in, so force a clean Linux build the first time — but skip - # on every subsequent box start (agentbox-ctl re-runs pending tasks after - # stop/start). Adjust the lockfile detection to the project's package - # manager. + # never copied in, so the first Linux install runs; `idempotent: true` then + # skips it on every subsequent box start (the supervisor stores a marker + # keyed by a hash of the command). Adjust the lockfile detection to the + # project's package manager. install: command: | set -e - MARKER=node_modules/.agentbox-installed - [ -f "$MARKER" ] && { echo "deps installed (marker present) — skip"; exit 0; } - apt-get update && apt-get install -y postgresql-client - rm -rf node_modules + sudo apt-get update && sudo apt-get install -y postgresql-client if [ -f pnpm-lock.yaml ]; then corepack enable >/dev/null 2>&1 || true pnpm install --frozen-lockfile || pnpm install fi - touch "$MARKER" + idempotent: true migrate: command: pnpm db:migrate @@ -258,6 +250,37 @@ On Vercel: this actually STOPS the sandbox, so warn the user about it. Also the - For Nextjs/Vite/Tasnstack projects, makes sure to forward also websocket for hot reload. -- Service like flask, nextjs, BETTER_AUTH_URL, NEXT_PUBLIC_APP_URL should use the .localhost url for the local development so that on the host it will use the same url as the box. +- Service like flask, nextjs, BETTER_AUTH_URL, NEXT_PUBLIC_APP_URL should use the `.localhost` url for the local development so that on the host it will use the same url as the box. Render this automatically instead of hand-writing `sed` — see section 6c. + +- The `install` task above uses `idempotent: true`, so it is a no-op on warm boots. Do **not** wrap it in a manual marker check too. To force a one-off rebuild, run `agentbox-ctl run-task install --force` (which bypasses the idempotent marker), or edit the command (a changed command invalidates the hash and re-runs). + +## 11. Pin URLs / render config files (env, secrets) + +Many apps hard-code a hostname (e.g. `optima.localhost`) or read a gitignored `.env`. Instead of long `sed` commands in a task, use the built-ins: + +- **`agentbox-ctl render `** — a declarative `sed` for files already in the workspace. `--env` substitutes `{{AGENTBOX_*}}` placeholders; `--rules ` applies a named rule-set from the top-level `replacements:` block; `--rule 'from=>to'` / `--rule-regex 'pat=>repl'` are inline. Write to `--out ` (or `--in-place`). The whitelist placeholders are `{{AGENTBOX_BOX_NAME}}`, `{{AGENTBOX_BOX_HOST}}` (= `.localhost`), `{{AGENTBOX_BOX_ID}}`, `{{AGENTBOX_BOX_KIND}}`, `{{AGENTBOX_HOST_WORKSPACE}}`, `{{AGENTBOX_PROJECT_ROOT}}`. + + Render a gitignored `.env` from a committed `env.example` on every boot, pinning the URLs to this box: + + ```yaml + replacements: + box-host: + - { from: '\.optima\.localhost', to: '.{{AGENTBOX_BOX_NAME}}.localhost', regex: true } + + tasks: + env: + command: agentbox-ctl render apps/saas/env.example --out apps/saas/.env --env --rules box-host + # Re-run when the rendered .env doesn't yet point at this box. + idempotent: + check: "grep -q '{{AGENTBOX_BOX_HOST}}' apps/saas/.env" + ``` + +- **`carry:` + `replaceEnvs`/`replace`/`rules`** — for a host-only file (e.g. a real `.env` with secrets that never lives in the repo), carry it in and render it host-side in one step (file entries only): -- The `install` task is intentionally a no-op once `node_modules/.agentbox-installed` exists. Do **not** remove the marker guard to "force a fresh install" — that reinstalls on every box start. To force a one-off rebuild, delete `node_modules` (or just the marker) then run `agentbox-ctl reload`. + ```yaml + carry: + - src: ~/secrets/optima.env + dest: /workspace/apps/saas/.env + replaceEnvs: true + rules: [box-host] + ``` diff --git a/apps/cli/share/host-skills/agentbox-info/SKILL.md b/apps/cli/share/host-skills/agentbox-info/SKILL.md index c33c1f5d..13db2df4 100644 --- a/apps/cli/share/host-skills/agentbox-info/SKILL.md +++ b/apps/cli/share/host-skills/agentbox-info/SKILL.md @@ -237,7 +237,7 @@ Per-project numeric index (`1`, `2`, …) and friendly name (`review`, `smoke`) 2. **Use `-i` whenever the user asks for parallel agent work** rather than spawning multiple foreground sessions. Then point them at `agentbox dashboard` to watch progress. 3. **Pick the provider deliberately.** `docker` is the fast default. `--provider hetzner` gives a real VPS (heavier, isolated, requires `agentbox prepare --provider hetzner` once). `--provider vercel` is the managed cloud option. 4. **Cross-check before recommending a command.** If a flag isn't listed here, run `agentbox --help` (it's safe and read-only) before suggesting it to the user. -5. **`/agentbox-setup` is a different skill.** It runs *inside* a box to generate `/workspace/agentbox.yaml`. Don't conflate it with `/agentbox` (host-side fork) or this reference skill. +5. **`/agentbox-setup` is a different skill.** It runs *inside* a box to generate `/workspace/agentbox.yaml`. Don't conflate it with `/agentbox` (host-side fork) or this reference skill. When authoring `agentbox.yaml`, prefer the declarative `idempotent: true` / `idempotent: { check }` task field over hand-rolled marker/probe guards, and `agentbox-ctl render` / carry `replaceEnvs` over `sed` for pinning env URLs to `{{AGENTBOX_BOX_HOST}}`. ## Reference diff --git a/apps/web/content/docs/agentbox-yaml.mdx b/apps/web/content/docs/agentbox-yaml.mdx index 68250214..060d94f7 100644 --- a/apps/web/content/docs/agentbox-yaml.mdx +++ b/apps/web/content/docs/agentbox-yaml.mdx @@ -19,12 +19,13 @@ In-repo examples use a local relative path instead; published projects use the U ## Top-level keys -All four top-level keys are optional, and unknown keys are rejected (`additionalProperties: false`). A missing or empty `agentbox.yaml` is completely fine — `create` does not fail without one. +All top-level keys are optional, and unknown keys are rejected (`additionalProperties: false`). A missing or empty `agentbox.yaml` is completely fine — `create` does not fail without one. | Key | Read by | Purpose | | --- | --- | --- | | `services` | supervisor | Long-running processes (map of name → spec). | | `tasks` | supervisor | One-shot units that run to completion before dependents. | +| `replacements` | supervisor + host | Named reusable text-substitution rule-sets — see [replacements](#replacements). | | `ide` | host (`agentbox code`) | VS Code attach customizations; the supervisor ignores it. | | `defaults` | host (`@agentbox/config`) | Project-level AgentBox config defaults. | @@ -93,31 +94,41 @@ Use the array form of `command` to avoid shell quoting; use the string form (`ba ## Tasks -A task is a one-shot unit that runs to completion. It accepts **only** four fields: `command`, `cwd`, `env`, and `needs`. Tasks cannot have `restart`, `autostart`, `backoff`, or `ready_when` — the schema rejects them. That is the key distinction from services. +A task is a one-shot unit that runs to completion. It accepts **only** five fields: `command`, `cwd`, `env`, `needs`, and `idempotent`. Tasks cannot have `restart`, `autostart`, `backoff`, or `ready_when` — the schema rejects them. That is the key distinction from services. A task moves through `pending → waiting → running → done`, and can land in `failed` or `skipped`. Tasks run before dependent services via `needs:`. Typical use: install deps, build, seed a database. -Make tasks **idempotent** — they re-run on every supervisor restart (which happens on box start, not just create). Guard slow paths with a marker file, as in the repo-root `agentbox.yaml`: +Tasks **re-run on every supervisor restart** (which happens on box start, not just create). So a task must be idempotent. The `idempotent` field makes the supervisor skip an already-satisfied task for you — no more hand-rolled marker checks: ```yaml tasks: install: - command: | - set -e - MARKER=node_modules/.agentbox-installed - if [ -f "$MARKER" ]; then - echo "deps already installed (marker present) — skipping" - exit 0 - fi - corepack enable >/dev/null 2>&1 || true - pnpm install --frozen-lockfile || pnpm install - touch "$MARKER" + command: pnpm install --frozen-lockfile + idempotent: true # skip while the command is unchanged build: command: pnpm build needs: [install] ``` +`idempotent` takes two forms: + +| Form | Behavior | +| --- | --- | +| `idempotent: true` | The supervisor stores a marker keyed by a **hash of the resolved command**. A warm boot skips while the hash matches; editing the command invalidates it and re-runs. The marker lives at `/var/lib/agentbox/tasks/` (box rootfs — captured by checkpoints, never under `/workspace`). | +| `idempotent: { check: }` | Run the probe before launching; **exit 0 means already satisfied** (skip). No marker is written — the probe is the source of truth. Use this when the thing you'd guard on lives **outside** the checkpointed filesystem (e.g. a containerized database, whose data is in the in-box docker volume, not the checkpoint). | + +```yaml +tasks: + seed: + command: pnpm db:seed + needs: [migrate] + # Probe the DB itself — a file marker would be restored from the checkpoint + # while the containerized DB starts empty, wrongly skipping the seed. + idempotent: + check: "psql -tAc \"select 1 from \\\"user\\\" limit 1\" | grep -q 1" +``` + Re-run a task in-box: ```console @@ -125,10 +136,10 @@ $ agentbox-ctl run-task install $ agentbox-ctl run-task install --force ``` -`run-task` resets the task to pending so the scheduler reruns it; it is a no-op on an already-`done` task unless you pass `--force`. +`run-task` resets the task to pending so the scheduler reruns it; it is a no-op on an already-`done` task unless you pass `--force`. `--force` also bypasses the `idempotent` skip (marker or check) and, for the marker form, rewrites the marker. -Tasks re-run on every daemon start, not just at create. A non-idempotent task (an unguarded `git init`, a destructive migration) will fire repeatedly — guard it with a marker file. +Tasks re-run on every daemon start, not just at create. A non-idempotent task (an unguarded `git init`, a destructive migration) will fire repeatedly — declare `idempotent:` (or guard it yourself). Prefer the `{ check }` form for state that a checkpoint does not capture (containerized DB data), where a filesystem marker would desync. ## ready_when @@ -265,6 +276,9 @@ See [configuration](/docs/configuration) for the full key set and precedence. No | `user` | Numeric uid that owns the file in-box (default `1000` = `vscode`; `0` keeps it root-owned). | | `exclude` | List of tar globs / bare dir names to drop when copying a **directory** (additive on top of the defaults below). | | `optional` | `true` skips a missing `src` silently instead of erroring. | +| `replaceEnvs` | `true` substitutes `{{AGENTBOX_*}}` placeholders in the file content host-side before copying (**file entries only**). See [replacements](#replacements). | +| `replace` | Inline replacement rules applied (in order) before copying (**file only**). | +| `rules` | Names of top-level [`replacements:`](#replacements) rule-sets to apply (**file only**). | A shorthand string form `"src=dest"` (or just `"src"` to mirror) is also accepted. @@ -300,6 +314,62 @@ You can also set `AGENTBOX_CARRY_YES=1` or `AGENTBOX_CARRY=skip`. Note that `-y` Mark credential entries `optional: true` so a box still comes up when a given file or agent isn't installed on the host. For how non-carry env files reach the box, see [environment](/docs/environment); for how git state is seeded, see [sync and git](/docs/sync-and-git) and [teleport a project](/docs/teleport-a-project). +## replacements + +`replacements` declares **reusable, named text-substitution rule-sets** that both the `carry:` block (host-side) and the in-box `agentbox-ctl render` CLI can reference by name. Each rule is `{ from, to }` with optional `regex` / `flags`; `to` may contain `{{AGENTBOX_*}}` placeholders. + +```yaml +replacements: + box-host: + # Repoint a hard-coded hostname at this box's published URL. + - from: '\.optima\.localhost' + to: '.{{AGENTBOX_BOX_NAME}}.localhost' + regex: true +``` + +Two ways to apply substitutions: + +**1. Carry-time (host→box files).** A `carry:` file entry can opt into `replaceEnvs: true` (placeholder substitution) and/or `replace:` (inline rules) and/or `rules:` (named refs). The file is rendered host-side into a temp copy before the copy — the original host file is never modified, and the box name is known by then: + +```yaml +carry: + - src: ~/secrets/.env.prod + dest: /workspace/apps/saas/.env + replaceEnvs: true + rules: [box-host] +``` + +**2. In-box (files already in the workspace).** `agentbox-ctl render` is a declarative `sed` replacement — handy for rendering a gitignored `.env` from a committed `env.example` on every boot: + +```yaml +tasks: + env: + command: agentbox-ctl render apps/saas/env.example --out apps/saas/.env --env --rules box-host + idempotent: + check: "grep -q '{{AGENTBOX_BOX_HOST}}' apps/saas/.env" +``` + +### Placeholders + +`replaceEnvs` / `--env` substitute a fixed **whitelist** of `{{...}}` placeholders (a stray `{{FOO}}` is left untouched, and secrets are never substitutable): + +| Placeholder | Value | +| --- | --- | +| `{{AGENTBOX_BOX_NAME}}` | The box name. | +| `{{AGENTBOX_BOX_HOST}}` | The published portless host, `.localhost`. | +| `{{AGENTBOX_BOX_ID}}` | The box id. | +| `{{AGENTBOX_BOX_KIND}}` | `docker` or `cloud`. | +| `{{AGENTBOX_HOST_WORKSPACE}}` | Host workspace path. | +| `{{AGENTBOX_PROJECT_ROOT}}` | Project root. | + +Arbitrary substitutions go through explicit `replace:` / `rules:` rules, not the placeholder whitelist. + +`agentbox-ctl render ` flags: `--out ` (or `--in-place`, else stdout), `--env` (placeholder substitution), `--rules ` (comma-separated `replacements:` refs), `--rule 'from=>to'` (literal, repeatable), and `--rule-regex 'pat=>repl'` (regex, repeatable). + + +Reach for `replaceEnvs`/`render` instead of hand-written `sed`: an `env` task that pins `BETTER_AUTH_URL`/`NEXT_PUBLIC_APP_URL` to `https://{{AGENTBOX_BOX_HOST}}` becomes a one-liner, and the box and host browser then resolve the app at the same URL. + + ## Validating the file Editor validation is automatic via the schema modeline. The host CLI also pre-validates on `agentbox create` before any docker work — a config error aborts with a formatted message — and the in-box daemon re-validates on start. You can validate by hand inside the box without starting the daemon: diff --git a/docs/features.md b/docs/features.md index 9f8f7a9e..c9235e51 100644 --- a/docs/features.md +++ b/docs/features.md @@ -7,7 +7,8 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider daytona` — see cloud-providers.md): - `agentbox create` — builds the image on first run (or resolves a checkpoint image when `--snapshot ` is given), detects git repos (root + 1st-level subdirs), collects host-side carry-over (`git stash create` + untracked `ls-files`), spins up the container, then seeds `/workspace` via either `seedWorkspace` (in-container `git worktree add` against the bind-mounted `.git/` + stash/untracked replay) or `seedWorkspaceFromDir` (tar-pipe from host workspace / APFS clone for the no-git case). Checkpoint restore skips both — the image already has `/workspace`. Mounts the `agentbox-claude-config` named volume at `/home/vscode/.claude` and rsyncs host's `~/.claude` into it (additive, host-authoritative). Bind-mounts each main repo's `.git/` at its identical absolute host path inside the container so worktree pointer files resolve symmetrically on both sides. `--with-env` (also on `agentbox claude`; config key `box.withEnv`) copies the host's `DEFAULT_ENV_PATTERNS` files (`.env*`, `.envrc`, `.dev.vars`, `secrets.toml`, `local.settings.json`, `appsettings.*.json`, `agentbox.yaml`) into `/workspace` after seeding — the host→box reverse of `agentbox download env` (gitignored files are otherwise excluded by the worktree carry-over's `git ls-files --others --exclude-standard`). One-shot at create time, lands in the container's writable layer (persists across stop/start), best-effort (warn-not-throw), recorded as `BoxRecord.withEnv` and surfaced in `agentbox status --inspect`. Implemented by `copyHostEnvFilesToBox` / `buildHostEnvFindArgs` in `packages/sandbox-docker/src/host-export.ts` (host `find . -print0 | tar` → `docker exec -i --user 1000:1000 tar -x`). -- `carry:` in `agentbox.yaml` — declarative host→box file copy that bypasses `.gitignore`. Each entry maps a host path (`/abs`, `~/...`, or `./relative-to-project-root`) to an explicit in-box destination (`/abs` or `~/...` — `~/` expands to `/home/vscode`); accepts a `mode:` (octal), `user:` (uid), `exclude:` (tar globs / bare dir names), and `optional: true`. When copying a directory, heavy regenerable dirs (`.git`, `node_modules`, `bin`, `obj`, `packages`, `dist`, `.next`, `target` — `DEFAULT_CP_EXCLUDES` in `apps/cli/src/lib/dir-breakdown.ts`) are dropped by default and `exclude:` is additive. The resolver enforces no-`..`-traversal, denies `/proc|/sys|/dev|/etc/passwd|/etc/shadow`, caps per-entry size **after excludes** at `box.cpMaxBytes` (default 100 MiB — the same limit `agentbox cp` uses; carry callers pass the effective value into `resolveCarry`), and flags symlinks whose target leaves `$HOME` *and* the project root. On `agentbox create` / `claude` / `codex` / `opencode`, the host CLI prompts ONCE (`@clack/prompts.select` — `yes` / `skip just for this box` / `cancel create`) listing every src→dest with size + mode + symlink warnings, then threads the approved set into `provider.create` as `req.carry`. Auto-approve with `--carry-yes` (or `AGENTBOX_CARRY_YES=1` for CI); skip with `--carry skip` (or `AGENTBOX_CARRY=skip`). `agentbox fork` is the exception: it **sends** the carry: block by default (it forwards `--carry-yes`), because the host is trusted and the box is the untrusted side, so a host→box copy is safe — opt out with `agentbox fork --carry skip`. `-y` / `--yes` does NOT auto-approve carry — non-TTY use of `-y` with non-empty entries fails loud, asking for the explicit env var (auditable in CI). The `-i` (queued background) path runs the same gate on the host **at submit time** (`runQueuedCarryGate`), serializes the approved `ResolvedCarryEntry[]` onto the queue job (`QueueJobCreateOpts.carry`), and the host-side worker applies them at box-create time — so `--carry-yes` / `--carry skip` work identically for `-i`. Docker injects via `copyCarryPathsToBox` (`docker cp` for files, host-tar + `docker exec tar -x` for dirs); cloud (Hetzner + Daytona) injects via `uploadCarryPaths` (host-tar + `backend.uploadFile` + `backend.exec(tar -x)`), per-entry isolated. Files land owned by `vscode:vscode` (uid 1000) when under `/home/vscode`; an audit summary (`{count, entries: [{src, dest, bytes}]}`) is recorded on `BoxRecord.carry`. Use case: develop AgentBox itself inside an AgentBox — carry `~/.agentbox/secrets.env` + `~/.agentbox/claude-credentials.json` so the in-box `agentbox` CLI is fully authenticated. Schema: `packages/ctl/src/carry.ts`. Resolver / prompt / gate: `apps/cli/src/lib/carry-resolve.ts`, `apps/cli/src/carry-prompt.ts`, `apps/cli/src/lib/carry-gate.ts`. Copiers: `packages/sandbox-docker/src/host-export.ts:copyCarryPathsToBox`, `packages/sandbox-cloud/src/carry.ts:uploadCarryPaths`. +- `carry:` in `agentbox.yaml` — declarative host→box file copy that bypasses `.gitignore`. Each entry maps a host path (`/abs`, `~/...`, or `./relative-to-project-root`) to an explicit in-box destination (`/abs` or `~/...` — `~/` expands to `/home/vscode`); accepts a `mode:` (octal), `user:` (uid), `exclude:` (tar globs / bare dir names), and `optional: true`. When copying a directory, heavy regenerable dirs (`.git`, `node_modules`, `bin`, `obj`, `packages`, `dist`, `.next`, `target` — `DEFAULT_CP_EXCLUDES` in `apps/cli/src/lib/dir-breakdown.ts`) are dropped by default and `exclude:` is additive. The resolver enforces no-`..`-traversal, denies `/proc|/sys|/dev|/etc/passwd|/etc/shadow`, caps per-entry size **after excludes** at `box.cpMaxBytes` (default 100 MiB — the same limit `agentbox cp` uses; carry callers pass the effective value into `resolveCarry`), and flags symlinks whose target leaves `$HOME` *and* the project root. On `agentbox create` / `claude` / `codex` / `opencode`, the host CLI prompts ONCE (`@clack/prompts.select` — `yes` / `skip just for this box` / `cancel create`) listing every src→dest with size + mode + symlink warnings, then threads the approved set into `provider.create` as `req.carry`. Auto-approve with `--carry-yes` (or `AGENTBOX_CARRY_YES=1` for CI); skip with `--carry skip` (or `AGENTBOX_CARRY=skip`). `agentbox fork` is the exception: it **sends** the carry: block by default (it forwards `--carry-yes`), because the host is trusted and the box is the untrusted side, so a host→box copy is safe — opt out with `agentbox fork --carry skip`. `-y` / `--yes` does NOT auto-approve carry — non-TTY use of `-y` with non-empty entries fails loud, asking for the explicit env var (auditable in CI). The `-i` (queued background) path runs the same gate on the host **at submit time** (`runQueuedCarryGate`), serializes the approved `ResolvedCarryEntry[]` onto the queue job (`QueueJobCreateOpts.carry`), and the host-side worker applies them at box-create time — so `--carry-yes` / `--carry skip` work identically for `-i`. Docker injects via `copyCarryPathsToBox` (`docker cp` for files, host-tar + `docker exec tar -x` for dirs); cloud (Hetzner + Daytona) injects via `uploadCarryPaths` (host-tar + `backend.uploadFile` + `backend.exec(tar -x)`), per-entry isolated. Files land owned by `vscode:vscode` (uid 1000) when under `/home/vscode`; an audit summary (`{count, entries: [{src, dest, bytes}]}`) is recorded on `BoxRecord.carry`. Use case: develop AgentBox itself inside an AgentBox — carry `~/.agentbox/secrets.env` + `~/.agentbox/claude-credentials.json` so the in-box `agentbox` CLI is fully authenticated. Schema: `packages/ctl/src/carry.ts`. Resolver / prompt / gate: `apps/cli/src/lib/carry-resolve.ts`, `apps/cli/src/carry-prompt.ts`, `apps/cli/src/lib/carry-gate.ts`. Copiers: `packages/sandbox-docker/src/host-export.ts:copyCarryPathsToBox`, `packages/sandbox-cloud/src/carry.ts:uploadCarryPaths`. A **file** carry entry may also set `replaceEnvs: true` (substitute `{{AGENTBOX_*}}` whitelist placeholders), `replace:` (inline `{from,to,regex?}` rules), and/or `rules:` (named refs into the top-level `replacements:` block) — the file is rendered host-side to a temp by `renderCarryEntries` (`@agentbox/sandbox-core/src/carry-render.ts`) before the copy (the host source is never modified; the box name is known by then). Named refs are expanded in `resolveCarry`; replace options are file-only (a dir entry errors). +- **Idempotent tasks + the replacement engine** — a task may declare `idempotent: true` (the supervisor skips it while a SHA-256 of the resolved command matches a marker at `/tasks/`, default `stateDir=/var/lib/agentbox` — box rootfs, captured by checkpoints, off `/workspace`) or `idempotent: { check: }` (run the probe first; exit 0 = skip, no marker — for state outside the checkpoint like a containerized DB). `run-task --force` bypasses both. Handled in `TaskRunner.launch` (`packages/ctl/src/supervisor.ts`). The shared, pure replacement engine lives in `@agentbox/core` (`replace.ts`: `applyReplacements` = `{{AGENTBOX_*}}` whitelist substitution + ordered rules; re-exported by `@agentbox/ctl` which adds the yaml/fs loaders — kept in core to avoid the `sandbox-core → ctl → relay → sandbox-core` build cycle). Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render [--out|--in-place] [--env] [--rules|--rule|--rule-regex]` (in-box declarative `sed`, `packages/ctl/src/commands/render.ts`), and the carry `replaceEnvs`/`replace`/`rules` above. - `agentbox claude [-- ...]` — does everything `create` does, then starts Claude Code in a detached tmux session inside the box and attaches the user's terminal to it. `Ctrl+a d` detaches; the claude process keeps running. Reattach with `agentbox claude attach `. Forwards `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` / `CLAUDE_EFFORT` / `ANTHROPIC_MODEL` from host env when set. `--isolate-claude-config` opts into a per-box `agentbox-claude-config-` volume. - `agentbox claude start [box] [-- ...]` — start a Claude session in an **existing** box (vs `agentbox claude` which creates one). Resolves `[box]` via the usual auto-pick / index / name / id-prefix chain. Auto-unpauses/starts the container if needed (mirrors `shell`/`code`). Re-syncs `~/.claude` into the box volume by default (skip with `--no-sync-config` for speed). Re-runs `rebuildPluginNativeDeps` (idempotent — gated by per-plugin marker). If a tmux session with the configured name already exists, just attaches; otherwise starts a fresh one. Post-`--` args are forwarded to claude only when starting a fresh session. - `agentbox codex [-- ...]` — the Codex parity of `agentbox claude`: does everything `create` does, then launches OpenAI Codex in a detachable tmux session (`codex` session name; `--session-name` / config `codex.sessionName` override). Forwards `OPENAI_API_KEY` from host env. `--isolate-codex-config` opts into a per-box `agentbox-codex-config-` volume. Subcommands mirror claude: `agentbox codex start [box] [-- ...]` (start a session in an existing box, auto-unpause/start, `--no-sync-config` to skip the `~/.codex` resync), `agentbox codex attach [box]` (attach/start without resyncing), `agentbox codex login [-- ]` (sign in via a throwaway container — defaults to `codex login --device-auth`, the headless device-code flow; pass `-- --api-key` for the API-key path). Skips the claude-only steps (setup wizard, plugin rebuild). `apps/cli/src/commands/codex.ts`. Codex is baked into the base image, but a box built from a **checkpoint captured before Codex support** (or an older base image) won't have the binary — `ensureCodexInstalled` (`codex.ts`) detects that and `npm install -g @openai/codex`s it into the box's writable layer at create/start time (mirrors `--with-playwright`; fast `command -v` no-op when codex is already present). diff --git a/docs/in-box-supervisor.md b/docs/in-box-supervisor.md index 99e479fa..3b7370e8 100644 --- a/docs/in-box-supervisor.md +++ b/docs/in-box-supervisor.md @@ -6,7 +6,9 @@ - `needs:` on any unit forms a DAG (cycles + unknown refs rejected at config load). Independent units launch in parallel. - `ready_when:` declares a readiness probe per service: `port` (TCP connect to `127.0.0.1:` by default), `log_match` (regex over stdout/stderr), or `http` (GET; expects 2xx by default). Probe lives in `packages/ctl/src/probe.ts`. `on_timeout: kill` (default) re-enters the restart policy; `on_timeout: mark_unhealthy` leaves the process running but flags the service — the escape hatch for legitimately slow cold starts. - `expose: { port: , as: 80 }` on a service marks it as **the** web service (at most one; `as` must be `80` — the only container port AgentBox reserves; `RESERVED_WEB_PORT` in `config.ts` / `WEB_CONTAINER_PORT` in `@agentbox/sandbox-docker`). The supervisor owns an in-process Node TCP forwarder (`WebProxy`, `packages/ctl/src/web-proxy.ts`) that binds container `:80` → `127.0.0.1:`, (re)pointed by `applyWebProxy()` on `init`/`reload` and torn down in `stopAll` — so the wizard writing `agentbox.yaml` post-create + `agentbox-ctl reload` activates it with no box restart. Binding `:80` as non-root `vscode` works because the image grants the node binary `cap_net_bind_service` (`setcap` in `Dockerfile.box`). The `expose` mapping rides in the status snapshot (`BoxStatusServiceEntry.expose`) so the host knows the web service even when `agentbox.yaml` lives only in the box. -- Wire ops: `status` returns `{ services, tasks }`; `task-status` returns task list; `wait-ready { timeoutMs?, units? }` blocks daemon-side until all autostart units reach their satisfying state, then resolves `{ ready: true }` or `{ ready: false, timedOut, failed }`; `run-task { name, force? }` resets a task back to pending so the scheduler reruns it. +- Wire ops: `status` returns `{ services, tasks }`; `task-status` returns task list; `wait-ready { timeoutMs?, units? }` blocks daemon-side until all autostart units reach their satisfying state, then resolves `{ ready: true }` or `{ ready: false, timedOut, failed }`; `run-task { name, force? }` resets a task back to pending so the scheduler reruns it (`force` also bypasses the `idempotent` skip). +- **`idempotent:` on a task** (handled in `TaskRunner.launch`, `supervisor.ts`) makes a re-run a no-op when already satisfied. `idempotent: true` → marker keyed by a SHA-256 of the resolved command (+cwd+env) at `/tasks/` (`stateDir` defaults to `DEFAULT_STATE_DIR = /var/lib/agentbox`, the box rootfs — captured by checkpoints, never under `/workspace`); editing the command invalidates it. `idempotent: { check: }` → run the probe first; exit 0 = skip, no marker written (right for state outside the checkpoint, e.g. a containerized DB). Marker writes happen in the child `exit` handler on code 0. +- **Replacement engine** (`@agentbox/core`'s `replace.ts`, re-exported by `@agentbox/ctl`'s `replace.ts` which adds the yaml/fs loaders — kept in core so the host carry path can share it without the `sandbox-core → ctl → relay → sandbox-core` cycle): `applyReplacements` does `{{AGENTBOX_*}}` whitelist substitution (`PLACEHOLDER_KEYS`) + ordered `{from,to,regex?}` rules. Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render` (in-box CLI, `commands/render.ts`), and carry `replaceEnvs`/`replace`/`rules` (host-side, file-only, rendered to a temp by `renderCarryEntries` in `@agentbox/sandbox-core` before the per-provider copy — wired in `sandbox-docker/create.ts` and `sandbox-cloud/cloud-provider.ts`). - Listens on `/run/agentbox/ctl.sock` (UNIX socket, newline-delimited JSON). Both the in-box `agentbox-ctl` client and host commands talk to the same socket — but the **host commands shell in via `docker exec`**, not the bind-mounted socket: Docker Desktop / OrbStack's VM boundary breaks `connect()` from the mac side, even though the file is visible. - Launched by `launchCtlDaemon()` in `sandbox-docker/src/ctl.ts` (best-effort; missing/empty `agentbox.yaml` is fine and doesn't fail `create`). Same call is repeated in `startBox()` because the daemon dies with the container. **Ordering invariant: the in-box `dockerd` is launched and awaited ready *before* the ctl daemon** — on create (`create.ts`), on docker restart (`startBox()`), and on cloud create/resume (`reEnsureCloudBox()` / create in `sandbox-cloud/src/cloud-provider.ts`). The supervisor starts services the moment it's up, so a `docker`-based service (`docker run`, `docker compose up`) would otherwise race a not-yet-ready `/var/run/docker.sock`. `launchDockerdDaemon` / `launchCloudDockerdDaemon` block until the socket is accept()-able (best-effort: a dockerd timeout still proceeds to launch the supervisor). Providers with no DinD (vercel, e2b) set `launchDockerd: false` and just launch the supervisor. - **In-box relay**: the daemon also binds an in-box endpoint on `127.0.0.1:8788` (`DEFAULT_BOX_RELAY_PORT`; override `AGENTBOX_BOX_RELAY_PORT`) so the in-box ctl client has a symmetric `AGENTBOX_RELAY_URL` across providers. For **cloud** boxes that endpoint is a full `mode: 'box'` relay the host's `CloudBoxPoller` long-polls; for **docker** boxes it's a thin reverse proxy (`packages/ctl/src/box-relay-forwarder.ts`) that whitelists `POST /rpc` + `POST /events` and forwards to `AGENTBOX_HOST_RELAY_URL` (default `http://host.docker.internal:8787`). Keeping :8787 unbound inside the box lets a nested `agentbox` run (developing agentbox-from-inside-agentbox) claim its own host relay there. See [`host-relay.md`](./host-relay.md). From 9be65f2eaa8d796963db991658084fcfe10001a5 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 09:56:00 +0100 Subject: [PATCH 03/12] docs(agentbox.yaml): fix replacements example to match bare hostnames The \.optima\.localhost (leading-dot) regex wouldn't match a bare optima.localhost; use optima\.localhost -> {{AGENTBOX_BOX_HOST}}. --- apps/cli/share/agentbox-setup/SKILL.md | 2 +- apps/web/content/docs/agentbox-yaml.mdx | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/apps/cli/share/agentbox-setup/SKILL.md b/apps/cli/share/agentbox-setup/SKILL.md index 57b17dc4..59507093 100644 --- a/apps/cli/share/agentbox-setup/SKILL.md +++ b/apps/cli/share/agentbox-setup/SKILL.md @@ -265,7 +265,7 @@ Many apps hard-code a hostname (e.g. `optima.localhost`) or read a gitignored `. ```yaml replacements: box-host: - - { from: '\.optima\.localhost', to: '.{{AGENTBOX_BOX_NAME}}.localhost', regex: true } + - { from: 'optima\.localhost', to: '{{AGENTBOX_BOX_HOST}}', regex: true } # {{AGENTBOX_BOX_HOST}} = .localhost tasks: env: diff --git a/apps/web/content/docs/agentbox-yaml.mdx b/apps/web/content/docs/agentbox-yaml.mdx index 060d94f7..6a169e99 100644 --- a/apps/web/content/docs/agentbox-yaml.mdx +++ b/apps/web/content/docs/agentbox-yaml.mdx @@ -322,8 +322,8 @@ Mark credential entries `optional: true` so a box still comes up when a given fi replacements: box-host: # Repoint a hard-coded hostname at this box's published URL. - - from: '\.optima\.localhost' - to: '.{{AGENTBOX_BOX_NAME}}.localhost' + - from: 'optima\.localhost' + to: '{{AGENTBOX_BOX_HOST}}' # = .localhost regex: true ``` From 6ca1198ef8c7fcd8cc13a3f2143b06fffae874ec Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 10:33:48 +0100 Subject: [PATCH 04/12] fix(ctl): idempotent marker dir must be writable by the non-root daemon MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit E2E on a real box surfaced EACCES writing /var/lib/agentbox (root-owned, daemon runs as vscode), so idempotent: true silently re-ran every boot. - supervisor: resolve a writable stateDir at init, falling back to /state (always daemon-writable, on rootfs/checkpointed, off /workspace) when the configured dir isn't creatable — works on every provider without an image bake - Dockerfile.box: mkdir+chown /var/lib/agentbox to vscode so docker uses the clean default path - test: fallback path covered --- packages/ctl/src/supervisor.ts | 35 +++++++++++++++++-- .../ctl/test/supervisor-idempotent.test.ts | 18 ++++++++++ packages/sandbox-docker/Dockerfile.box | 4 +-- 3 files changed, 53 insertions(+), 4 deletions(-) diff --git a/packages/ctl/src/supervisor.ts b/packages/ctl/src/supervisor.ts index 70e33d67..f62e8f43 100644 --- a/packages/ctl/src/supervisor.ts +++ b/packages/ctl/src/supervisor.ts @@ -668,6 +668,10 @@ export class Supervisor extends EventEmitter { private rescheduleDirty = false; private readonly relay: RelayClient; private readonly webProxy: WebProxy; + // Resolved at init: the configured stateDir, or a writable fallback under + // logDir when it isn't creatable (the daemon runs as a non-root user and the + // default /var/lib/agentbox is root-owned on stock images). + private resolvedStateDir: string = DEFAULT_STATE_DIR; constructor(private readonly opts: SupervisorOptions) { super(); @@ -734,12 +738,39 @@ export class Supervisor extends EventEmitter { async init(cfg: CtlConfig): Promise { await mkdir(this.opts.logDir, { recursive: true }); + this.resolvedStateDir = await this.ensureStateDir(); for (const t of cfg.tasks) this.addTaskUnit(t); for (const s of cfg.services) this.addServiceUnit(s); this.applyWebProxy(); this.schedule(); } + /** + * Pick a writable directory for idempotent-task markers. Prefer the configured + * stateDir (default /var/lib/agentbox), but the daemon runs as a non-root user + * and that path is root-owned on stock images, so fall back to a dir under + * logDir — always daemon-writable, on the box rootfs (captured by checkpoints), + * and off /workspace (no git noise). + */ + private async ensureStateDir(): Promise { + const want = this.opts.stateDir ?? DEFAULT_STATE_DIR; + try { + await mkdir(join(want, 'tasks'), { recursive: true }); + return want; + } catch { + const fallback = join(this.opts.logDir, 'state'); + try { + await mkdir(join(fallback, 'tasks'), { recursive: true }); + process.stderr.write( + `[ctl] idempotent markers: ${want} not writable, using ${fallback}\n`, + ); + return fallback; + } catch { + return want; // give up; per-task marker writes will warn + } + } + } + private emitChange(): void { this.emit('change'); } @@ -748,7 +779,7 @@ export class Supervisor extends EventEmitter { const runner = new ServiceRunner(spec, { logDir: this.opts.logDir, cwd: this.opts.workspace, - stateDir: this.opts.stateDir ?? DEFAULT_STATE_DIR, + stateDir: this.resolvedStateDir, spawn: this.opts.spawn, }); runner.on('log', (ev) => this.emit('log', ev)); @@ -762,7 +793,7 @@ export class Supervisor extends EventEmitter { const runner = new TaskRunner(spec, { logDir: this.opts.logDir, cwd: this.opts.workspace, - stateDir: this.opts.stateDir ?? DEFAULT_STATE_DIR, + stateDir: this.resolvedStateDir, spawn: this.opts.spawn, }); runner.on('log', (ev) => this.emit('log', ev)); diff --git a/packages/ctl/test/supervisor-idempotent.test.ts b/packages/ctl/test/supervisor-idempotent.test.ts index 6ae93d2a..d0ebe353 100644 --- a/packages/ctl/test/supervisor-idempotent.test.ts +++ b/packages/ctl/test/supervisor-idempotent.test.ts @@ -104,6 +104,24 @@ describe('idempotent tasks', () => { await sup2.stopAll(); }); + it('falls back to a writable dir under logDir when stateDir is not creatable', async () => { + const ran = join(dir, 'ran'); + const task = { name: 't', command: `: > '${ran}'`, needs: [], idempotent: { kind: 'marker' } as const }; + // /proc/... is not creatable — the supervisor must fall back to /state. + const sup1 = new Supervisor({ workspace: dir, logDir: dir, stateDir: '/proc/nope/agentbox' }); + await sup1.init(taskCfg(task)); + await waitForTaskDone(sup1, 't'); + expect(existsSync(join(dir, 'state', 'tasks', 't'))).toBe(true); // marker in fallback + await sup1.stopAll(); + + await rm(ran); + const sup2 = new Supervisor({ workspace: dir, logDir: dir, stateDir: '/proc/nope/agentbox' }); + await sup2.init(taskCfg(task)); + await waitForTaskDone(sup2, 't'); + expect(existsSync(ran)).toBe(false); // skipped via the fallback marker + await sup2.stopAll(); + }); + it('run-task --force bypasses the marker and re-runs', async () => { const runs = join(dir, 'runs'); const task = { diff --git a/packages/sandbox-docker/Dockerfile.box b/packages/sandbox-docker/Dockerfile.box index 6331b52f..667850ec 100644 --- a/packages/sandbox-docker/Dockerfile.box +++ b/packages/sandbox-docker/Dockerfile.box @@ -72,9 +72,9 @@ RUN apt-get update \ vim \ libcap2-bin \ && rm -rf /var/lib/apt/lists/* \ - && mkdir -p /workspace /run/agentbox /var/log/agentbox \ + && mkdir -p /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox \ && chmod 755 /workspace \ - && chown vscode:vscode /workspace /run/agentbox /var/log/agentbox + && chown vscode:vscode /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox # The in-box supervisor (runs as non-root `vscode`) owns a TCP forwarder that # binds container :80 -> the `expose:`-flagged service (see WebProxy / From 5b30be4411782e5966143572336a216b35ce89b7 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 10:45:12 +0100 Subject: [PATCH 05/12] refactor: /simplify cleanups + cross-provider state dir - bake /var/lib/agentbox on hetzner/vercel/e2b base images (was docker-only), so idempotent markers use the clean path on every provider, not the fallback - checkpoint-cleanup: exclude /var/log/agentbox/state from truncation so the marker fallback survives a checkpoint - core: single deriveBoxHost() shared by both placeholder-context builders - carry-render: dedup the wantsRender predicate; build the log suffix with join - carry.ts: fold parseRulesRefs/parseExclude into one parseStringList - carry-resolve: drop redundant replaceFields guard - carry-gate: read agentbox.yaml once, parse carry + replacements from one text --- apps/cli/src/lib/carry-gate.ts | 15 ++++++++-- apps/cli/src/lib/carry-resolve.ts | 11 +++----- packages/core/src/index.ts | 1 + packages/core/src/replace.ts | 18 +++++++++--- packages/ctl/src/carry.ts | 28 ++++++------------- packages/sandbox-core/src/carry-render.ts | 26 +++++++++-------- .../scripts/agentbox-checkpoint-cleanup | 7 +++-- .../sandbox-e2b/scripts/build-template.sh | 4 +-- .../sandbox-hetzner/scripts/install-box.sh | 4 +-- packages/sandbox-vercel/scripts/provision.sh | 4 +-- 10 files changed, 64 insertions(+), 54 deletions(-) diff --git a/apps/cli/src/lib/carry-gate.ts b/apps/cli/src/lib/carry-gate.ts index c2115b9f..d6a37749 100644 --- a/apps/cli/src/lib/carry-gate.ts +++ b/apps/cli/src/lib/carry-gate.ts @@ -1,7 +1,8 @@ +import { readFile } from 'node:fs/promises'; import { join } from 'node:path'; import { log } from '@clack/prompts'; import { loadEffectiveConfig } from '@agentbox/config'; -import { loadCarrySection, loadReplacementsSection } from '@agentbox/ctl'; +import { parseCarrySection, parseReplacementsSection } from '@agentbox/ctl'; import type { ResolvedCarryEntry } from '@agentbox/core'; import { promptForCarry } from '../carry-prompt.js'; import { resolveCarry } from './carry-resolve.js'; @@ -37,11 +38,19 @@ export async function runCarryGate(args: CarryGateArgs): Promise {}); const yamlPath = join(args.projectRoot, 'agentbox.yaml'); - const items = await loadCarrySection(yamlPath); + // Read agentbox.yaml once; parse both the carry and replacements sections + // from the same text (a single readFile + parse). + let yamlText = ''; + try { + yamlText = await readFile(yamlPath, 'utf8'); + } catch (err) { + if ((err as NodeJS.ErrnoException).code !== 'ENOENT') throw err; + } + const items = parseCarrySection(yamlText); if (items.length === 0) return { decision: 'approve', entries: [] }; const cfg = await loadEffectiveConfig(args.projectRoot); - const replacements = await loadReplacementsSection(yamlPath); + const replacements = parseReplacementsSection(yamlText); const resolved = await resolveCarry(items, { projectRoot: args.projectRoot, maxBytes: cfg.effective.box.cpMaxBytes, diff --git a/apps/cli/src/lib/carry-resolve.ts b/apps/cli/src/lib/carry-resolve.ts index 37d6b436..22c21659 100644 --- a/apps/cli/src/lib/carry-resolve.ts +++ b/apps/cli/src/lib/carry-resolve.ts @@ -112,13 +112,10 @@ async function resolveOne(item: CarryItem, ctx: OneCtx): Promise 0) - ? { - ...(item.replaceEnvs ? { replaceEnvs: true } : {}), - ...(replaceRules.length > 0 ? { replace: replaceRules } : {}), - } - : {}; + const replaceFields = { + ...(item.replaceEnvs ? { replaceEnvs: true } : {}), + ...(replaceRules.length > 0 ? { replace: replaceRules } : {}), + }; let st: Awaited>; try { diff --git a/packages/core/src/index.ts b/packages/core/src/index.ts index dfbc7f84..7de9053e 100644 --- a/packages/core/src/index.ts +++ b/packages/core/src/index.ts @@ -43,6 +43,7 @@ export { applyReplacements, substitutePlaceholders, placeholderContextFromEnv, + deriveBoxHost, parseReplaceRule, parseReplaceRules, parseReplacements, diff --git a/packages/core/src/replace.ts b/packages/core/src/replace.ts index db7c7d9c..d0550e59 100644 --- a/packages/core/src/replace.ts +++ b/packages/core/src/replace.ts @@ -108,6 +108,19 @@ export function applyReplacements(content: string, opts: ApplyReplacementsOption return out; } +/** + * Fill in `AGENTBOX_BOX_HOST` (the published portless host, `.localhost`) + * from `AGENTBOX_BOX_NAME` when it isn't already set. Single source of truth for + * the host-derivation rule, shared by every placeholder-context builder. Mutates + * and returns `ctx`. + */ +export function deriveBoxHost(ctx: Record): Record { + if (ctx.AGENTBOX_BOX_HOST === undefined && ctx.AGENTBOX_BOX_NAME !== undefined) { + ctx.AGENTBOX_BOX_HOST = `${ctx.AGENTBOX_BOX_NAME}.localhost`; + } + return ctx; +} + /** Build the whitelist placeholder context from a process environment. */ export function placeholderContextFromEnv( env: NodeJS.ProcessEnv = process.env, @@ -117,10 +130,7 @@ export function placeholderContextFromEnv( const v = env[key]; if (typeof v === 'string' && v.length > 0) ctx[key] = v; } - if (ctx.AGENTBOX_BOX_HOST === undefined && ctx.AGENTBOX_BOX_NAME !== undefined) { - ctx.AGENTBOX_BOX_HOST = `${ctx.AGENTBOX_BOX_NAME}.localhost`; - } - return ctx; + return deriveBoxHost(ctx); } // --- rule parsing (shared by config top-level `replacements:` and the CLI) --- diff --git a/packages/ctl/src/carry.ts b/packages/ctl/src/carry.ts index 8136d3e2..8ab19cd4 100644 --- a/packages/ctl/src/carry.ts +++ b/packages/ctl/src/carry.ts @@ -57,30 +57,18 @@ const ITEM_KEYS = new Set([ 'rules', ]); -function parseRulesRefs(raw: unknown, where: string): string[] | undefined { +// Parse a YAML field into a list of trimmed non-empty strings (undefined when +// empty/absent). Shared by `exclude` and `rules`; `desc` names the element kind +// in the error message. +function parseStringList(raw: unknown, where: string, desc: string): string[] | undefined { if (raw === undefined || raw === null) return undefined; if (!Array.isArray(raw)) { - throw new CarryConfigError(`${where}.rules must be a list of replacements rule-set names`); + throw new CarryConfigError(`${where} must be a list of ${desc}`); } const out: string[] = []; for (const [i, v] of raw.entries()) { if (typeof v !== 'string' || v.trim().length === 0) { - throw new CarryConfigError(`${where}.rules[${String(i)}] must be a non-empty string`); - } - out.push(v.trim()); - } - return out.length > 0 ? out : undefined; -} - -function parseExclude(raw: unknown, where: string): string[] | undefined { - if (raw === undefined || raw === null) return undefined; - if (!Array.isArray(raw)) { - throw new CarryConfigError(`${where}.exclude must be a list of glob/name strings`); - } - const out: string[] = []; - for (const [i, v] of raw.entries()) { - if (typeof v !== 'string' || v.trim().length === 0) { - throw new CarryConfigError(`${where}.exclude[${String(i)}] must be a non-empty string`); + throw new CarryConfigError(`${where}[${String(i)}] must be a non-empty string`); } out.push(v.trim()); } @@ -227,7 +215,7 @@ function parseMapping(raw: Record, where: string): CarryItem { const mode = parseMode(raw.mode, where); const user = parseUser(raw.user, where); - const exclude = parseExclude(raw.exclude, where); + const exclude = parseStringList(raw.exclude, `${where}.exclude`, 'glob/name strings'); let optional = false; if (raw.optional !== undefined && raw.optional !== null) { @@ -253,7 +241,7 @@ function parseMapping(raw: Record, where: string): CarryItem { throw new CarryConfigError(err instanceof Error ? err.message : String(err)); } } - const rules = parseRulesRefs(raw.rules, where); + const rules = parseStringList(raw.rules, `${where}.rules`, 'replacements rule-set names'); const out: CarryItem = { src, dest, optional }; if (mode !== undefined) out.mode = mode; diff --git a/packages/sandbox-core/src/carry-render.ts b/packages/sandbox-core/src/carry-render.ts index a557847f..d7f4bd8f 100644 --- a/packages/sandbox-core/src/carry-render.ts +++ b/packages/sandbox-core/src/carry-render.ts @@ -1,7 +1,7 @@ import { mkdtemp, readFile, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { basename, join } from 'node:path'; -import { applyReplacements, type ResolvedCarryEntry } from '@agentbox/core'; +import { applyReplacements, deriveBoxHost, type ResolvedCarryEntry } from '@agentbox/core'; /** * Box facts used to fill `{{AGENTBOX_*}}` placeholders in carried files. The @@ -25,8 +25,12 @@ export function carryPlaceholderContext(ctx: CarryBoxContext): Record 0); } /** @@ -41,17 +45,13 @@ export async function renderCarryEntries( ctx: CarryBoxContext, onLog?: (line: string) => void, ): Promise { - const needsRender = entries.some( - (e) => e.kind === 'file' && (e.replaceEnvs || (e.replace && e.replace.length > 0)), - ); - if (!needsRender) return entries; + if (!entries.some(wantsRender)) return entries; const context = carryPlaceholderContext(ctx); const stage = await mkdtemp(join(tmpdir(), 'agentbox-carry-render-')); const out: ResolvedCarryEntry[] = []; for (const [i, entry] of entries.entries()) { - const wants = entry.kind === 'file' && (entry.replaceEnvs || (entry.replace?.length ?? 0) > 0); - if (!wants) { + if (!wantsRender(entry)) { out.push(entry); continue; } @@ -65,9 +65,11 @@ export async function renderCarryEntries( const tmp = join(stage, `${String(i)}-${basename(entry.absSrc)}`); await writeFile(tmp, rendered, 'utf8'); out.push({ ...entry, absSrc: tmp, bytes: Buffer.byteLength(rendered) }); - onLog?.(`carry: rendered ${entry.rawSrc} (${entry.replaceEnvs ? 'env' : ''}${ - entry.replace?.length ? `${entry.replaceEnvs ? '+' : ''}${String(entry.replace.length)} rule(s)` : '' - })`); + const what = [ + ...(entry.replaceEnvs ? ['env'] : []), + ...(entry.replace?.length ? [`${String(entry.replace.length)} rule(s)`] : []), + ].join('+'); + onLog?.(`carry: rendered ${entry.rawSrc} (${what})`); } return out; } diff --git a/packages/sandbox-docker/scripts/agentbox-checkpoint-cleanup b/packages/sandbox-docker/scripts/agentbox-checkpoint-cleanup index 1cadfd4c..e01dd31b 100644 --- a/packages/sandbox-docker/scripts/agentbox-checkpoint-cleanup +++ b/packages/sandbox-docker/scripts/agentbox-checkpoint-cleanup @@ -32,10 +32,13 @@ rm -rf /var/lib/apt/lists/* 2>/dev/null find /tmp /var/tmp -mindepth 1 -maxdepth 1 ! -name 'claude-*' -exec rm -rf {} + 2>/dev/null # Logs: truncate (don't delete) so the original file modes / ownerships stay -# intact for the next run. Targets common rotated archives too. +# intact for the next run. Targets common rotated archives too. Exclude the +# `state/` subdir — it holds the idempotent-task marker fallback (used when +# /var/lib/agentbox isn't writable) and must survive the checkpoint. find /var/log -type f \( -name '*.log' -o -name '*.gz' -o -name '*.1' \) \ + -not -path '/var/log/agentbox/state/*' -exec truncate -s0 {} + 2>/dev/null +find /var/log/agentbox -type f -not -path '/var/log/agentbox/state/*' \ -exec truncate -s0 {} + 2>/dev/null -find /var/log/agentbox -type f -exec truncate -s0 {} + 2>/dev/null # Bash history (root + vscode). Re-assert vscode ownership: `: >` run as root # (re)creates the file root-owned 0644 when it didn't exist, which the uid-1000 diff --git a/packages/sandbox-e2b/scripts/build-template.sh b/packages/sandbox-e2b/scripts/build-template.sh index 53399a8b..ff734ce8 100755 --- a/packages/sandbox-e2b/scripts/build-template.sh +++ b/packages/sandbox-e2b/scripts/build-template.sh @@ -88,10 +88,10 @@ visudo -cf /etc/sudoers >/dev/null done_ "vscode user + sudoers" step "agentbox base dirs + /workspace ownership" -mkdir -p /workspace /run/agentbox /var/log/agentbox /etc/agentbox /etc/claude-code \ +mkdir -p /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox /etc/agentbox /etc/claude-code \ /usr/local/share/agentbox chmod 755 /workspace -chown vscode:vscode /workspace /run/agentbox /var/log/agentbox +chown vscode:vscode /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox done_ "agentbox base dirs + /workspace ownership" step "node setcap (bind <1024 without root)" diff --git a/packages/sandbox-hetzner/scripts/install-box.sh b/packages/sandbox-hetzner/scripts/install-box.sh index 1ea26b5b..31917536 100644 --- a/packages/sandbox-hetzner/scripts/install-box.sh +++ b/packages/sandbox-hetzner/scripts/install-box.sh @@ -108,10 +108,10 @@ chmod 0440 /etc/sudoers.d/90-agentbox-vscode done_ "vscode user (UID 1000) + sudoers" step "agentbox base dirs + /workspace ownership" -mkdir -p /workspace /run/agentbox /var/log/agentbox /etc/agentbox /etc/claude-code \ +mkdir -p /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox /etc/agentbox /etc/claude-code \ /usr/local/share/agentbox chmod 755 /workspace -chown vscode:vscode /workspace /run/agentbox /var/log/agentbox +chown vscode:vscode /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox done_ "agentbox base dirs + /workspace ownership" step "node setcap (port <1024 bind without root)" diff --git a/packages/sandbox-vercel/scripts/provision.sh b/packages/sandbox-vercel/scripts/provision.sh index edd5705e..2f30bba1 100644 --- a/packages/sandbox-vercel/scripts/provision.sh +++ b/packages/sandbox-vercel/scripts/provision.sh @@ -96,10 +96,10 @@ visudo -cf /etc/sudoers >/dev/null done_ "vscode user + sudoers" step "agentbox base dirs + /workspace ownership" -mkdir -p /workspace /run/agentbox /var/log/agentbox /etc/agentbox /etc/claude-code \ +mkdir -p /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox /etc/agentbox /etc/claude-code \ /usr/local/share/agentbox chmod 755 /workspace -chown vscode:vscode /workspace /run/agentbox /var/log/agentbox +chown vscode:vscode /workspace /run/agentbox /var/log/agentbox /var/lib/agentbox done_ "agentbox base dirs + /workspace ownership" step "node setcap (bind <1024 without root)" From 1749f781a81a54bc441e040e99d88f1bcf2ac2ae Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 10:49:56 +0100 Subject: [PATCH 06/12] docs: idempotent check probes are plain shell, not {{...}} (bugbot) Cursor Bugbot: example checks used grep -q '{{AGENTBOX_BOX_HOST}}', but the supervisor runs the probe verbatim via bash -c and never expands {{...}} (render-only), so it matched literal braces and re-ran every boot. Drop the unnecessary check from the naturally-idempotent render example, and document that check probes use shell vars ($AGENTBOX_BOX_NAME). --- apps/cli/share/agentbox-setup/SKILL.md | 8 +++++--- apps/web/content/docs/agentbox-yaml.mdx | 6 ++---- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/apps/cli/share/agentbox-setup/SKILL.md b/apps/cli/share/agentbox-setup/SKILL.md index 59507093..3f61f7be 100644 --- a/apps/cli/share/agentbox-setup/SKILL.md +++ b/apps/cli/share/agentbox-setup/SKILL.md @@ -269,12 +269,14 @@ Many apps hard-code a hostname (e.g. `optima.localhost`) or read a gitignored `. tasks: env: + # The render is idempotent (the rules re-pin the same lines every boot), so + # no `idempotent:` guard is needed — it self-corrects on a checkpoint-started + # box that carries a different box's host in .env. command: agentbox-ctl render apps/saas/env.example --out apps/saas/.env --env --rules box-host - # Re-run when the rendered .env doesn't yet point at this box. - idempotent: - check: "grep -q '{{AGENTBOX_BOX_HOST}}' apps/saas/.env" ``` + Note: an `idempotent: { check: }` probe runs verbatim via `bash -c` with the box env — use shell vars like `$AGENTBOX_BOX_NAME`, NOT `{{…}}` placeholders (those are only expanded by `render`/carry, never by the supervisor). + - **`carry:` + `replaceEnvs`/`replace`/`rules`** — for a host-only file (e.g. a real `.env` with secrets that never lives in the repo), carry it in and render it host-side in one step (file entries only): ```yaml diff --git a/apps/web/content/docs/agentbox-yaml.mdx b/apps/web/content/docs/agentbox-yaml.mdx index 6a169e99..227fe4dd 100644 --- a/apps/web/content/docs/agentbox-yaml.mdx +++ b/apps/web/content/docs/agentbox-yaml.mdx @@ -116,7 +116,7 @@ tasks: | Form | Behavior | | --- | --- | | `idempotent: true` | The supervisor stores a marker keyed by a **hash of the resolved command**. A warm boot skips while the hash matches; editing the command invalidates it and re-runs. The marker lives at `/var/lib/agentbox/tasks/` (box rootfs — captured by checkpoints, never under `/workspace`). | -| `idempotent: { check: }` | Run the probe before launching; **exit 0 means already satisfied** (skip). No marker is written — the probe is the source of truth. Use this when the thing you'd guard on lives **outside** the checkpointed filesystem (e.g. a containerized database, whose data is in the in-box docker volume, not the checkpoint). | +| `idempotent: { check: }` | Run the probe before launching; **exit 0 means already satisfied** (skip). No marker is written — the probe is the source of truth. Use this when the thing you'd guard on lives **outside** the checkpointed filesystem (e.g. a containerized database, whose data is in the in-box docker volume, not the checkpoint). The probe runs verbatim via `bash -c` with the box env, so use shell vars like `$AGENTBOX_BOX_NAME`; it does **not** expand `{{…}}` placeholders (those are render-only). | ```yaml tasks: @@ -339,14 +339,12 @@ carry: rules: [box-host] ``` -**2. In-box (files already in the workspace).** `agentbox-ctl render` is a declarative `sed` replacement — handy for rendering a gitignored `.env` from a committed `env.example` on every boot: +**2. In-box (files already in the workspace).** `agentbox-ctl render` is a declarative `sed` replacement — handy for rendering a gitignored `.env` from a committed `env.example` on every boot. The render is itself idempotent (the regex rules re-pin the same lines on every boot), so this task needs no `idempotent:` guard: ```yaml tasks: env: command: agentbox-ctl render apps/saas/env.example --out apps/saas/.env --env --rules box-host - idempotent: - check: "grep -q '{{AGENTBOX_BOX_HOST}}' apps/saas/.env" ``` ### Placeholders From c438b04d72b2f730fb5a954dd347121d1356e310 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 11:04:04 +0100 Subject: [PATCH 07/12] test(ctl): fix fallback test flakiness in CI The stateDir-fallback test used /proc/nope as the unwritable path; mkdir under /proc behaves differently on Linux (slow) than macOS and timed out in CI. Use a regular file as the parent dir (deterministic ENOTDIR, instant, cross-platform) and add a 20s testTimeout cushion for the two-cycle idempotent tests. --- .../ctl/test/supervisor-idempotent.test.ts | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/packages/ctl/test/supervisor-idempotent.test.ts b/packages/ctl/test/supervisor-idempotent.test.ts index d0ebe353..184ab53c 100644 --- a/packages/ctl/test/supervisor-idempotent.test.ts +++ b/packages/ctl/test/supervisor-idempotent.test.ts @@ -1,11 +1,15 @@ import { existsSync, readFileSync } from 'node:fs'; -import { mkdtemp, rm } from 'node:fs/promises'; +import { mkdtemp, rm, writeFile } from 'node:fs/promises'; import { tmpdir } from 'node:os'; import { join } from 'node:path'; -import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; import { Supervisor } from '../src/supervisor.js'; import type { CtlConfig, TaskSpec } from '../src/config.js'; +// Each test runs two full supervisor init/launch/stop cycles (spawning bash); +// give CI runners headroom over vitest's 5s default. +vi.setConfig({ testTimeout: 20000 }); + function taskCfg(task: TaskSpec): CtlConfig { return { services: [], tasks: [task], replacements: {} }; } @@ -106,16 +110,21 @@ describe('idempotent tasks', () => { it('falls back to a writable dir under logDir when stateDir is not creatable', async () => { const ran = join(dir, 'ran'); + // A regular file used as a parent dir → mkdir fails fast with ENOTDIR + // (deterministic + cross-platform; /proc behaves differently per-OS). + const blocker = join(dir, 'blocker'); + await writeFile(blocker, ''); + const badStateDir = join(blocker, 'agentbox'); const task = { name: 't', command: `: > '${ran}'`, needs: [], idempotent: { kind: 'marker' } as const }; - // /proc/... is not creatable — the supervisor must fall back to /state. - const sup1 = new Supervisor({ workspace: dir, logDir: dir, stateDir: '/proc/nope/agentbox' }); + + const sup1 = new Supervisor({ workspace: dir, logDir: dir, stateDir: badStateDir }); await sup1.init(taskCfg(task)); await waitForTaskDone(sup1, 't'); expect(existsSync(join(dir, 'state', 'tasks', 't'))).toBe(true); // marker in fallback await sup1.stopAll(); await rm(ran); - const sup2 = new Supervisor({ workspace: dir, logDir: dir, stateDir: '/proc/nope/agentbox' }); + const sup2 = new Supervisor({ workspace: dir, logDir: dir, stateDir: badStateDir }); await sup2.init(taskCfg(task)); await waitForTaskDone(sup2, 't'); expect(existsSync(ran)).toBe(false); // skipped via the fallback marker From 987bcb4e1be37062080bddbca16c151da2eb4bc8 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 12:45:17 +0100 Subject: [PATCH 08/12] feat(agentbox.yaml): docker image: services + {{AGENTBOX_AUTO_SECRET}} MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - services: image: form (ports/env/args/container_name) synthesizes the docker start-or-run shell; command|image mutually exclusive; reused by name - render: {{AGENTBOX_AUTO_SECRET}} (fresh per render) and :name (persisted at /secrets/, reused) — replaces openssl rand in env tasks - shared resolveWritableStateDir (state-dir.ts) backs markers + secrets - schema (oneOf command/image + dependentRequired) + drift fixtures - unit tests (config image synth/xor, secret per-render vs persisted) - docs + agentbox-setup skill --- apps/cli/share/agentbox-setup/SKILL.md | 27 +++++ apps/web/content/docs/agentbox-yaml.mdx | 52 +++++++- docs/features.md | 3 +- docs/in-box-supervisor.md | 2 + packages/ctl/schema/agentbox.schema.json | 29 ++++- packages/ctl/src/commands/render.ts | 14 ++- packages/ctl/src/config.ts | 145 ++++++++++++++++++++++- packages/ctl/src/secret.ts | 71 +++++++++++ packages/ctl/src/state-dir.ts | 34 ++++++ packages/ctl/src/supervisor.ts | 23 ++-- packages/ctl/test/config.test.ts | 59 +++++++++ packages/ctl/test/schema-drift.test.ts | 50 ++++++++ packages/ctl/test/secret.test.ts | 54 +++++++++ 13 files changed, 537 insertions(+), 26 deletions(-) create mode 100644 packages/ctl/src/secret.ts create mode 100644 packages/ctl/src/state-dir.ts create mode 100644 packages/ctl/test/secret.test.ts diff --git a/apps/cli/share/agentbox-setup/SKILL.md b/apps/cli/share/agentbox-setup/SKILL.md index 3f61f7be..3358b8f9 100644 --- a/apps/cli/share/agentbox-setup/SKILL.md +++ b/apps/cli/share/agentbox-setup/SKILL.md @@ -51,6 +51,31 @@ Look at `/workspace`: ### Stateful services: data persistence & re-seeding (read this for databases) +**Declare a containerized dependency with the `image:` service form** — AgentBox +generates the `docker start`-or-`run` shell (no hand-written `docker run … || docker +start …`). The container runs in the box's dockerd; a published port is reachable +from other in-box services at `127.0.0.1:`: + +```yaml +services: + postgres: + image: postgres:17-alpine + ports: ["5432:5432"] + env: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: app + args: "-c max_connections=200" # string or ["-c","max_connections=200"] + container_name: app_db # optional; default = service name + ready_when: { port: 5432 } + restart: always +``` + +The container is reused by name across box stop/start. (Changing `image`/`env` +reuses the existing container as-is; `docker rm ` + `agentbox-ctl +reload` to apply.) Install the DB client the migrate/seed tasks need (e.g. +`postgresql-client`) in the `install` task and reach the DB over TCP — don't +`docker exec` the container (nested exec fails with a `setns` error in a box). + **A checkpoint does NOT capture docker-in-docker data.** `agentbox checkpoint` is a `docker commit` of the box's writable filesystem (the system + `/workspace`). The in-box `dockerd` keeps its storage in a *separate* per-box volume (`/var/lib/docker`), which is **not** part of that image — it's fresh on every new box and wiped on `agentbox destroy`. So a database or cache you run as a **docker container** (e.g. `docker run … postgres`) starts **empty on every new box** created from a checkpoint (every `agentbox claude` / `agentbox create`), even though `/workspace` and any marker files you wrote were restored. (A DB run as a **native process** with its data dir on the box filesystem — e.g. `postgres -D /var/lib/postgresql/data` — *is* captured by the checkpoint, since it lives in the writable layer.) **Consequence for migrate/seed tasks of a containerized DB: do NOT use `idempotent: true` (the marker form).** A command-hash marker is correct for deps (they live in `/workspace`, which the checkpoint captures), but **wrong** for DB data living in a docker volume: the marker is restored from the checkpoint while the DB is empty, so a marker-guarded seed wrongly skips and the app boots against an empty database. Instead use the **`idempotent: { check: }`** form — the probe runs first and the seed runs unless the probe exits 0, and **no marker is written** (the DB is the source of truth). Gate on the actual data: @@ -277,6 +302,8 @@ Many apps hard-code a hostname (e.g. `optima.localhost`) or read a gitignored `. Note: an `idempotent: { check: }` probe runs verbatim via `bash -c` with the box env — use shell vars like `$AGENTBOX_BOX_NAME`, NOT `{{…}}` placeholders (those are only expanded by `render`/carry, never by the supervisor). + **Generated secrets:** put `{{AGENTBOX_AUTO_SECRET}}` in the template for a value like `BETTER_AUTH_SECRET` instead of shelling out to `openssl rand`. Unnamed → a fresh 32-byte base64url secret each render (stable when you render the template→`.env` once). `{{AGENTBOX_AUTO_SECRET:better-auth}}` → generated once, persisted at `/var/lib/agentbox/secrets/`, reused on every render (stable even if you render every boot). Example `env.example` line: `BETTER_AUTH_SECRET="{{AGENTBOX_AUTO_SECRET:better-auth}}"`. + - **`carry:` + `replaceEnvs`/`replace`/`rules`** — for a host-only file (e.g. a real `.env` with secrets that never lives in the repo), carry it in and render it host-side in one step (file entries only): ```yaml diff --git a/apps/web/content/docs/agentbox-yaml.mdx b/apps/web/content/docs/agentbox-yaml.mdx index 227fe4dd..813d871b 100644 --- a/apps/web/content/docs/agentbox-yaml.mdx +++ b/apps/web/content/docs/agentbox-yaml.mdx @@ -42,13 +42,14 @@ tasks: ## Services -A service is a long-running process. The spec requires `command`; unknown fields are rejected. +A service is a long-running process. It needs **either** `command` **or** [`image`](#docker-image-services) (not both); unknown fields are rejected. | Field | Default | Meaning | | --- | --- | --- | -| `command` | (required) | Shell string (run via `bash -c`) or an argv array. | +| `command` | (required, or `image`) | Shell string (run via `bash -c`) or an argv array. | +| `image` | (required, or `command`) | Run a docker container instead — see [docker image services](#docker-image-services). | | `cwd` | `/workspace` | Working directory; relative paths resolve against `/workspace`. | -| `env` | — | Extra env vars; scalar values, coerced to strings. | +| `env` | — | Extra env vars; scalar values, coerced to strings. (Container `-e` env for an `image` service.) | | `autostart` | `true` | Start automatically when the daemon boots. | | `restart` | `on-failure` | Restart policy — see [needs and restart](#needs-and-restart). | | `backoff` | — | Exponential backoff between restarts. | @@ -56,6 +57,7 @@ A service is a long-running process. The spec requires `command`; unknown fields | `ready_when` | — | Readiness probe — see [ready_when](#ready_when). | | `expose` | — | Mark the one web service — see [expose](#expose). | | `ide` | — | Per-service VS Code hints (host-side only). | +| `ports`, `args`, `container_name` | — | `image`-service only — see [docker image services](#docker-image-services). | A service moves through `pending → waiting → starting → running → ready`, and can land in `unhealthy`, `crashed`, `backoff`, or `stopped`. Logs land at `/var/log/agentbox/.log` inside the box. @@ -92,6 +94,32 @@ $ agentbox-ctl start web Use the array form of `command` to avoid shell quoting; use the string form (`bash -c`) when you need shell features like pipes, `&&`, or sourcing env files. +### Docker image services + +For a containerized dependency (a database, cache, …) set `image:` instead of `command:` and AgentBox generates the `docker start`-or-`run` shell for you — no hand-written `docker run … || docker start …` block. It runs in the box's own dockerd, so a published port like `5437:5432` is reachable from other in-box services at `127.0.0.1:5437`. + +```yaml +services: + postgres: + image: postgres:17-alpine + ports: ["5437:5432"] # ":" (or "") + env: # the container's -e env + POSTGRES_USER: optima + POSTGRES_PASSWORD: changeme + POSTGRES_DB: optima + args: "-c max_connections=200" # string OR ["-c", "max_connections=200"]; shell-tokenized + container_name: optima_db # optional; default = service name + ready_when: + port: 5437 + restart: always +``` + +All the usual service fields (`ready_when`, `restart`, `backoff`, `needs`, `expose`, `autostart`) still apply. The container is **reused by name** across box stop/start (its data lives in the per-box docker volume, which a checkpoint does not capture — see the database note in [services and tasks](/docs/services-and-tasks)). + + +A change to `image`/`ports`/`env` reuses the existing container as-is — AgentBox never auto-`docker rm`s it (that would wipe its data). To apply the change, `docker rm ` inside the box, then `agentbox-ctl reload`. + + ## Tasks A task is a one-shot unit that runs to completion. It accepts **only** five fields: `command`, `cwd`, `env`, `needs`, and `idempotent`. Tasks cannot have `restart`, `autostart`, `backoff`, or `ready_when` — the schema rejects them. That is the key distinction from services. @@ -362,7 +390,23 @@ tasks: Arbitrary substitutions go through explicit `replace:` / `rules:` rules, not the placeholder whitelist. -`agentbox-ctl render ` flags: `--out ` (or `--in-place`, else stdout), `--env` (placeholder substitution), `--rules ` (comma-separated `replacements:` refs), `--rule 'from=>to'` (literal, repeatable), and `--rule-regex 'pat=>repl'` (regex, repeatable). +### Generated secrets + +`render` also expands a secret-generator token, so you can stop shelling out to `openssl rand`: + +| Token | Behavior | +| --- | --- | +| `{{AGENTBOX_AUTO_SECRET}}` | A fresh 32-byte base64url secret each render. Stable in practice because you render the template→file once (guarded). | +| `{{AGENTBOX_AUTO_SECRET:}}` | Generated once, persisted at `/var/lib/agentbox/secrets/`, **reused** on every later render — stable even if you render every boot. | + +```yaml +# env.example +BETTER_AUTH_SECRET="{{AGENTBOX_AUTO_SECRET:better-auth}}" +``` + +`AGENTBOX_AUTO_SECRET` is a `render`-only token (the persistent store lives in the box); it isn't a `replaceEnvs` whitelist placeholder and isn't expanded by `carry:`. + +`agentbox-ctl render ` flags: `--out ` (or `--in-place`, else stdout), `--env` (placeholder substitution), `--rules ` (comma-separated `replacements:` refs), `--rule 'from=>to'` (literal, repeatable), `--rule-regex 'pat=>repl'` (regex, repeatable), and `--state-dir ` (where named secrets persist). Reach for `replaceEnvs`/`render` instead of hand-written `sed`: an `env` task that pins `BETTER_AUTH_URL`/`NEXT_PUBLIC_APP_URL` to `https://{{AGENTBOX_BOX_HOST}}` becomes a one-liner, and the box and host browser then resolve the app at the same URL. diff --git a/docs/features.md b/docs/features.md index c9235e51..190d7725 100644 --- a/docs/features.md +++ b/docs/features.md @@ -8,7 +8,8 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - `agentbox create` — builds the image on first run (or resolves a checkpoint image when `--snapshot ` is given), detects git repos (root + 1st-level subdirs), collects host-side carry-over (`git stash create` + untracked `ls-files`), spins up the container, then seeds `/workspace` via either `seedWorkspace` (in-container `git worktree add` against the bind-mounted `.git/` + stash/untracked replay) or `seedWorkspaceFromDir` (tar-pipe from host workspace / APFS clone for the no-git case). Checkpoint restore skips both — the image already has `/workspace`. Mounts the `agentbox-claude-config` named volume at `/home/vscode/.claude` and rsyncs host's `~/.claude` into it (additive, host-authoritative). Bind-mounts each main repo's `.git/` at its identical absolute host path inside the container so worktree pointer files resolve symmetrically on both sides. `--with-env` (also on `agentbox claude`; config key `box.withEnv`) copies the host's `DEFAULT_ENV_PATTERNS` files (`.env*`, `.envrc`, `.dev.vars`, `secrets.toml`, `local.settings.json`, `appsettings.*.json`, `agentbox.yaml`) into `/workspace` after seeding — the host→box reverse of `agentbox download env` (gitignored files are otherwise excluded by the worktree carry-over's `git ls-files --others --exclude-standard`). One-shot at create time, lands in the container's writable layer (persists across stop/start), best-effort (warn-not-throw), recorded as `BoxRecord.withEnv` and surfaced in `agentbox status --inspect`. Implemented by `copyHostEnvFilesToBox` / `buildHostEnvFindArgs` in `packages/sandbox-docker/src/host-export.ts` (host `find . -print0 | tar` → `docker exec -i --user 1000:1000 tar -x`). - `carry:` in `agentbox.yaml` — declarative host→box file copy that bypasses `.gitignore`. Each entry maps a host path (`/abs`, `~/...`, or `./relative-to-project-root`) to an explicit in-box destination (`/abs` or `~/...` — `~/` expands to `/home/vscode`); accepts a `mode:` (octal), `user:` (uid), `exclude:` (tar globs / bare dir names), and `optional: true`. When copying a directory, heavy regenerable dirs (`.git`, `node_modules`, `bin`, `obj`, `packages`, `dist`, `.next`, `target` — `DEFAULT_CP_EXCLUDES` in `apps/cli/src/lib/dir-breakdown.ts`) are dropped by default and `exclude:` is additive. The resolver enforces no-`..`-traversal, denies `/proc|/sys|/dev|/etc/passwd|/etc/shadow`, caps per-entry size **after excludes** at `box.cpMaxBytes` (default 100 MiB — the same limit `agentbox cp` uses; carry callers pass the effective value into `resolveCarry`), and flags symlinks whose target leaves `$HOME` *and* the project root. On `agentbox create` / `claude` / `codex` / `opencode`, the host CLI prompts ONCE (`@clack/prompts.select` — `yes` / `skip just for this box` / `cancel create`) listing every src→dest with size + mode + symlink warnings, then threads the approved set into `provider.create` as `req.carry`. Auto-approve with `--carry-yes` (or `AGENTBOX_CARRY_YES=1` for CI); skip with `--carry skip` (or `AGENTBOX_CARRY=skip`). `agentbox fork` is the exception: it **sends** the carry: block by default (it forwards `--carry-yes`), because the host is trusted and the box is the untrusted side, so a host→box copy is safe — opt out with `agentbox fork --carry skip`. `-y` / `--yes` does NOT auto-approve carry — non-TTY use of `-y` with non-empty entries fails loud, asking for the explicit env var (auditable in CI). The `-i` (queued background) path runs the same gate on the host **at submit time** (`runQueuedCarryGate`), serializes the approved `ResolvedCarryEntry[]` onto the queue job (`QueueJobCreateOpts.carry`), and the host-side worker applies them at box-create time — so `--carry-yes` / `--carry skip` work identically for `-i`. Docker injects via `copyCarryPathsToBox` (`docker cp` for files, host-tar + `docker exec tar -x` for dirs); cloud (Hetzner + Daytona) injects via `uploadCarryPaths` (host-tar + `backend.uploadFile` + `backend.exec(tar -x)`), per-entry isolated. Files land owned by `vscode:vscode` (uid 1000) when under `/home/vscode`; an audit summary (`{count, entries: [{src, dest, bytes}]}`) is recorded on `BoxRecord.carry`. Use case: develop AgentBox itself inside an AgentBox — carry `~/.agentbox/secrets.env` + `~/.agentbox/claude-credentials.json` so the in-box `agentbox` CLI is fully authenticated. Schema: `packages/ctl/src/carry.ts`. Resolver / prompt / gate: `apps/cli/src/lib/carry-resolve.ts`, `apps/cli/src/carry-prompt.ts`, `apps/cli/src/lib/carry-gate.ts`. Copiers: `packages/sandbox-docker/src/host-export.ts:copyCarryPathsToBox`, `packages/sandbox-cloud/src/carry.ts:uploadCarryPaths`. A **file** carry entry may also set `replaceEnvs: true` (substitute `{{AGENTBOX_*}}` whitelist placeholders), `replace:` (inline `{from,to,regex?}` rules), and/or `rules:` (named refs into the top-level `replacements:` block) — the file is rendered host-side to a temp by `renderCarryEntries` (`@agentbox/sandbox-core/src/carry-render.ts`) before the copy (the host source is never modified; the box name is known by then). Named refs are expanded in `resolveCarry`; replace options are file-only (a dir entry errors). -- **Idempotent tasks + the replacement engine** — a task may declare `idempotent: true` (the supervisor skips it while a SHA-256 of the resolved command matches a marker at `/tasks/`, default `stateDir=/var/lib/agentbox` — box rootfs, captured by checkpoints, off `/workspace`) or `idempotent: { check: }` (run the probe first; exit 0 = skip, no marker — for state outside the checkpoint like a containerized DB). `run-task --force` bypasses both. Handled in `TaskRunner.launch` (`packages/ctl/src/supervisor.ts`). The shared, pure replacement engine lives in `@agentbox/core` (`replace.ts`: `applyReplacements` = `{{AGENTBOX_*}}` whitelist substitution + ordered rules; re-exported by `@agentbox/ctl` which adds the yaml/fs loaders — kept in core to avoid the `sandbox-core → ctl → relay → sandbox-core` build cycle). Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render [--out|--in-place] [--env] [--rules|--rule|--rule-regex]` (in-box declarative `sed`, `packages/ctl/src/commands/render.ts`), and the carry `replaceEnvs`/`replace`/`rules` above. +- **Idempotent tasks + the replacement engine** — a task may declare `idempotent: true` (the supervisor skips it while a SHA-256 of the resolved command matches a marker at `/tasks/`, default `stateDir=/var/lib/agentbox` — box rootfs, captured by checkpoints, off `/workspace`) or `idempotent: { check: }` (run the probe first; exit 0 = skip, no marker — for state outside the checkpoint like a containerized DB). `run-task --force` bypasses both. Handled in `TaskRunner.launch` (`packages/ctl/src/supervisor.ts`). The shared, pure replacement engine lives in `@agentbox/core` (`replace.ts`: `applyReplacements` = `{{AGENTBOX_*}}` whitelist substitution + ordered rules; re-exported by `@agentbox/ctl` which adds the yaml/fs loaders — kept in core to avoid the `sandbox-core → ctl → relay → sandbox-core` build cycle). Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render [--out|--in-place] [--env] [--rules|--rule|--rule-regex]` (in-box declarative `sed`, `packages/ctl/src/commands/render.ts`), and the carry `replaceEnvs`/`replace`/`rules` above. `render` also expands `{{AGENTBOX_AUTO_SECRET}}` (fresh 32-byte base64url per render) / `{{AGENTBOX_AUTO_SECRET:}}` (generated once, persisted at `/secrets/`, reused) — `packages/ctl/src/secret.ts`, replacing `openssl rand` in env tasks. +- **Declarative docker `image:` services** — a service may set `image: postgres:17-alpine` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; `parseService` (`packages/ctl/src/config.ts`) synthesizes the `docker start`-or-`run` shell (the proven `examples/express-ready` / optima pattern), reused by name across restarts (env baked into `-e`, no auto-`rm`). `command`/`image` are mutually exclusive; the runner/`ready_when`/`restart`/`expose` machinery is unchanged. The shared writable-state-dir resolver (`packages/ctl/src/state-dir.ts`) backs both idempotent markers and persisted secrets. - `agentbox claude [-- ...]` — does everything `create` does, then starts Claude Code in a detached tmux session inside the box and attaches the user's terminal to it. `Ctrl+a d` detaches; the claude process keeps running. Reattach with `agentbox claude attach `. Forwards `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` / `CLAUDE_EFFORT` / `ANTHROPIC_MODEL` from host env when set. `--isolate-claude-config` opts into a per-box `agentbox-claude-config-` volume. - `agentbox claude start [box] [-- ...]` — start a Claude session in an **existing** box (vs `agentbox claude` which creates one). Resolves `[box]` via the usual auto-pick / index / name / id-prefix chain. Auto-unpauses/starts the container if needed (mirrors `shell`/`code`). Re-syncs `~/.claude` into the box volume by default (skip with `--no-sync-config` for speed). Re-runs `rebuildPluginNativeDeps` (idempotent — gated by per-plugin marker). If a tmux session with the configured name already exists, just attaches; otherwise starts a fresh one. Post-`--` args are forwarded to claude only when starting a fresh session. - `agentbox codex [-- ...]` — the Codex parity of `agentbox claude`: does everything `create` does, then launches OpenAI Codex in a detachable tmux session (`codex` session name; `--session-name` / config `codex.sessionName` override). Forwards `OPENAI_API_KEY` from host env. `--isolate-codex-config` opts into a per-box `agentbox-codex-config-` volume. Subcommands mirror claude: `agentbox codex start [box] [-- ...]` (start a session in an existing box, auto-unpause/start, `--no-sync-config` to skip the `~/.codex` resync), `agentbox codex attach [box]` (attach/start without resyncing), `agentbox codex login [-- ]` (sign in via a throwaway container — defaults to `codex login --device-auth`, the headless device-code flow; pass `-- --api-key` for the API-key path). Skips the claude-only steps (setup wizard, plugin rebuild). `apps/cli/src/commands/codex.ts`. Codex is baked into the base image, but a box built from a **checkpoint captured before Codex support** (or an older base image) won't have the binary — `ensureCodexInstalled` (`codex.ts`) detects that and `npm install -g @openai/codex`s it into the box's writable layer at create/start time (mirrors `--with-playwright`; fast `command -v` no-op when codex is already present). diff --git a/docs/in-box-supervisor.md b/docs/in-box-supervisor.md index 3b7370e8..bb5f7c48 100644 --- a/docs/in-box-supervisor.md +++ b/docs/in-box-supervisor.md @@ -9,6 +9,8 @@ - Wire ops: `status` returns `{ services, tasks }`; `task-status` returns task list; `wait-ready { timeoutMs?, units? }` blocks daemon-side until all autostart units reach their satisfying state, then resolves `{ ready: true }` or `{ ready: false, timedOut, failed }`; `run-task { name, force? }` resets a task back to pending so the scheduler reruns it (`force` also bypasses the `idempotent` skip). - **`idempotent:` on a task** (handled in `TaskRunner.launch`, `supervisor.ts`) makes a re-run a no-op when already satisfied. `idempotent: true` → marker keyed by a SHA-256 of the resolved command (+cwd+env) at `/tasks/` (`stateDir` defaults to `DEFAULT_STATE_DIR = /var/lib/agentbox`, the box rootfs — captured by checkpoints, never under `/workspace`); editing the command invalidates it. `idempotent: { check: }` → run the probe first; exit 0 = skip, no marker written (right for state outside the checkpoint, e.g. a containerized DB). Marker writes happen in the child `exit` handler on code 0. - **Replacement engine** (`@agentbox/core`'s `replace.ts`, re-exported by `@agentbox/ctl`'s `replace.ts` which adds the yaml/fs loaders — kept in core so the host carry path can share it without the `sandbox-core → ctl → relay → sandbox-core` cycle): `applyReplacements` does `{{AGENTBOX_*}}` whitelist substitution (`PLACEHOLDER_KEYS`) + ordered `{from,to,regex?}` rules. Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render` (in-box CLI, `commands/render.ts`), and carry `replaceEnvs`/`replace`/`rules` (host-side, file-only, rendered to a temp by `renderCarryEntries` in `@agentbox/sandbox-core` before the per-provider copy — wired in `sandbox-docker/create.ts` and `sandbox-cloud/cloud-provider.ts`). +- **`{{AGENTBOX_AUTO_SECRET}}` render generator** (`commands/render.ts` → `secret.ts`, not the pure engine — needs crypto + fs): a render-time pass before `applyReplacements`. Unnamed → fresh `randomBytes(32).toString('base64url')` per occurrence; `:` → generated once and persisted at `/secrets/` (0600), reused across renders. State dir resolved via the shared `resolveWritableStateDir` (`state-dir.ts`, extracted from the supervisor's marker-dir logic — try `/var/lib/agentbox`, fall back to `/state`). +- **Declarative docker `image:` services** (`config.ts` `parseService` → `synthesizeImageCommand`): a service sets `image:` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; the parser synthesizes the start-or-run shell (`docker container inspect` → `docker start` + `logs -f`, else `docker run` with `-p`/`-e`/args), so the runner/DAG/`ready_when`/`restart` machinery is unchanged. Container reused by name across restarts (no auto-`rm`; `env` baked into `-e`, `spec.env` left unset). `command` and `image` are mutually exclusive (one required). - Listens on `/run/agentbox/ctl.sock` (UNIX socket, newline-delimited JSON). Both the in-box `agentbox-ctl` client and host commands talk to the same socket — but the **host commands shell in via `docker exec`**, not the bind-mounted socket: Docker Desktop / OrbStack's VM boundary breaks `connect()` from the mac side, even though the file is visible. - Launched by `launchCtlDaemon()` in `sandbox-docker/src/ctl.ts` (best-effort; missing/empty `agentbox.yaml` is fine and doesn't fail `create`). Same call is repeated in `startBox()` because the daemon dies with the container. **Ordering invariant: the in-box `dockerd` is launched and awaited ready *before* the ctl daemon** — on create (`create.ts`), on docker restart (`startBox()`), and on cloud create/resume (`reEnsureCloudBox()` / create in `sandbox-cloud/src/cloud-provider.ts`). The supervisor starts services the moment it's up, so a `docker`-based service (`docker run`, `docker compose up`) would otherwise race a not-yet-ready `/var/run/docker.sock`. `launchDockerdDaemon` / `launchCloudDockerdDaemon` block until the socket is accept()-able (best-effort: a dockerd timeout still proceeds to launch the supervisor). Providers with no DinD (vercel, e2b) set `launchDockerd: false` and just launch the supervisor. - **In-box relay**: the daemon also binds an in-box endpoint on `127.0.0.1:8788` (`DEFAULT_BOX_RELAY_PORT`; override `AGENTBOX_BOX_RELAY_PORT`) so the in-box ctl client has a symmetric `AGENTBOX_RELAY_URL` across providers. For **cloud** boxes that endpoint is a full `mode: 'box'` relay the host's `CloudBoxPoller` long-polls; for **docker** boxes it's a thin reverse proxy (`packages/ctl/src/box-relay-forwarder.ts`) that whitelists `POST /rpc` + `POST /events` and forwards to `AGENTBOX_HOST_RELAY_URL` (default `http://host.docker.internal:8787`). Keeping :8787 unbound inside the box lets a nested `agentbox` run (developing agentbox-from-inside-agentbox) claim its own host relay there. See [`host-relay.md`](./host-relay.md). diff --git a/packages/ctl/schema/agentbox.schema.json b/packages/ctl/schema/agentbox.schema.json index 2687cbf1..e1f8208c 100644 --- a/packages/ctl/schema/agentbox.schema.json +++ b/packages/ctl/schema/agentbox.schema.json @@ -165,9 +165,36 @@ "service": { "type": "object", "additionalProperties": false, - "required": ["command"], + "oneOf": [{ "required": ["command"] }, { "required": ["image"] }], + "dependentRequired": { + "ports": ["image"], + "args": ["image"], + "container_name": ["image"] + }, "properties": { "command": { "$ref": "#/$defs/command" }, + "image": { + "type": "string", + "minLength": 1, + "description": "Run a docker container instead of a command. AgentBox synthesizes a start-or-run shell (the in-box container is reused by name across restarts). Mutually exclusive with command. Pair with ports/env/args/container_name." + }, + "ports": { + "type": "array", + "description": "Container port publishes for an image service: \":\" or \"\". Reachable from other in-box services at 127.0.0.1:.", + "items": { "type": "string", "pattern": "^[0-9]+(:[0-9]+)?$" } + }, + "args": { + "description": "Extra args appended after the image (image services only). A string or a list of strings; shell word-split.", + "oneOf": [ + { "type": "string" }, + { "type": "array", "items": { "type": "string" } } + ] + }, + "container_name": { + "type": "string", + "pattern": "^[A-Za-z0-9][A-Za-z0-9_.-]*$", + "description": "Container name for an image service (default: the service name)." + }, "cwd": { "type": "string", "description": "Working directory. Relative paths resolve against /workspace." diff --git a/packages/ctl/src/commands/render.ts b/packages/ctl/src/commands/render.ts index 24092022..0f84a505 100644 --- a/packages/ctl/src/commands/render.ts +++ b/packages/ctl/src/commands/render.ts @@ -1,6 +1,6 @@ import { readFile, writeFile } from 'node:fs/promises'; import { Command } from 'commander'; -import { DEFAULT_CONFIG_PATH } from '../types.js'; +import { DEFAULT_CONFIG_PATH, DEFAULT_STATE_DIR } from '../types.js'; import { applyReplacements, loadReplacementsSection, @@ -9,6 +9,7 @@ import { resolveRuleRefs, type ReplaceRule, } from '../replace.js'; +import { resolveAutoSecrets } from '../secret.js'; interface RenderOptions { out?: string; @@ -18,6 +19,7 @@ interface RenderOptions { ruleRegex: string[]; rules?: string; config: string; + stateDir: string; } function collect(value: string, prev: string[]): string[] { @@ -38,8 +40,16 @@ export const renderCommand = new Command('render') .option('--rule-regex repl>', 'regex replacement (repeatable)', collect, []) .option('--rules ', 'comma-separated replacements: rule-set names to apply') .option('--config ', 'agentbox.yaml to read replacements: from', DEFAULT_CONFIG_PATH) + .option('--state-dir ', 'where named {{AGENTBOX_AUTO_SECRET:x}} secrets persist', DEFAULT_STATE_DIR) .action(async (src: string, opts: RenderOptions) => { - const content = await readFile(src, 'utf8'); + const raw = await readFile(src, 'utf8'); + + // Resolve {{AGENTBOX_AUTO_SECRET}} tokens first (generate / persist), then + // the placeholder + rule substitutions. + const content = await resolveAutoSecrets(raw, { + stateDir: opts.stateDir, + onLog: (msg) => process.stderr.write(`agentbox-ctl render: ${msg}\n`), + }); const rules: ReplaceRule[] = []; if (opts.rules) { diff --git a/packages/ctl/src/config.ts b/packages/ctl/src/config.ts index 3696489c..5975a3c7 100644 --- a/packages/ctl/src/config.ts +++ b/packages/ctl/src/config.ts @@ -83,6 +83,20 @@ export interface ServiceSpec { readyWhen?: ReadyProbe; /** When set, container port `expose.as` forwards to `127.0.0.1:expose.port`. */ expose?: ExposeSpec; + /** + * Declarative docker sidecar. When set, `command` is synthesized into a + * `docker start`-or-`run` shell (the in-box dockerd container is reused by + * name across restarts). Mutually exclusive with a user `command`. The other + * `*image*` fields below are kept for introspection; `env` is baked into the + * container's `-e` flags and not set as the process env. + */ + image?: string; + /** Port publishes (":" or ""); image services only. */ + ports?: string[]; + /** Extra args appended after the image (shell-tokenized); image services only. */ + args?: string; + /** Container name (default = service name); image services only. */ + containerName?: string; } export interface CtlConfig { @@ -371,8 +385,84 @@ const SERVICE_KEYS = new Set([ 'ready_when', 'expose', 'ide', + 'image', + 'ports', + 'args', + 'container_name', ]); +// Minimal POSIX single-quote escaping for values baked into a generated +// `bash -c` docker command. (sandbox-cloud has an equivalent quoteShellArg, but +// ctl can't depend on it — wrong direction.) +function shQuote(s: string): string { + if (/^[A-Za-z0-9_@%+=:,./-]+$/.test(s)) return s; + return `'${s.replace(/'/g, `'\\''`)}'`; +} + +function parsePorts(raw: unknown, where: string): string[] | undefined { + if (raw === undefined || raw === null) return undefined; + if (!Array.isArray(raw)) { + throw new ConfigError(`${where}.ports must be a list of ":" strings`); + } + const out: string[] = []; + for (const [i, v] of raw.entries()) { + const s = typeof v === 'number' ? String(v) : v; + if (typeof s !== 'string' || !/^\d+(:\d+)?$/.test(s.trim())) { + throw new ConfigError( + `${where}.ports[${String(i)}] must be "" or ":" (got ${JSON.stringify(v)})`, + ); + } + out.push(s.trim()); + } + return out.length > 0 ? out : undefined; +} + +// `args` is a string (appended raw, bash word-splits) or a list of strings +// (joined with spaces, then bash word-splits) — so both `args: "-c x=1"` and +// `args: ["-c", "x=1"]` produce the same docker invocation. +function parseArgs(raw: unknown, where: string): string | undefined { + if (raw === undefined || raw === null) return undefined; + if (typeof raw === 'string') return raw.trim().length > 0 ? raw : undefined; + if (Array.isArray(raw)) { + const parts: string[] = []; + for (const [i, v] of raw.entries()) { + if (typeof v !== 'string') throw new ConfigError(`${where}.args[${String(i)}] must be a string`); + parts.push(v); + } + const joined = parts.join(' ').trim(); + return joined.length > 0 ? joined : undefined; + } + throw new ConfigError(`${where}.args must be a string or a list of strings`); +} + +// Build the start-or-run shell for an `image:` service. Reuses the existing +// container by name across restarts (data lives in the per-box /var/lib/docker); +// a config change needs a manual `docker rm `. +function synthesizeImageCommand(opts: { + image: string; + name: string; + ports?: string[]; + env?: Record; + args?: string; +}): string { + const name = shQuote(opts.name); + const run = ['docker', 'run', '--name', name]; + for (const p of opts.ports ?? []) run.push('-p', shQuote(p)); + for (const [k, v] of Object.entries(opts.env ?? {})) run.push('-e', `${k}=${shQuote(v)}`); + run.push(shQuote(opts.image)); + let runLine = run.join(' '); + if (opts.args) runLine += ` ${opts.args}`; // raw — bash word-splits + return [ + 'set -e', + `if docker container inspect ${name} >/dev/null 2>&1; then`, + ` docker start ${name} >/dev/null`, + ` exec docker logs -f ${name}`, + 'else', + ` exec ${runLine}`, + 'fi', + ].join('\n'); +} + const EXPOSE_KEYS = new Set(['port', 'as']); function parseExpose(raw: unknown, where: string): ExposeSpec | undefined { @@ -407,9 +497,17 @@ function parseService(name: string, raw: unknown): ServiceSpec { throw new ConfigError(`${where} must be a mapping`); } rejectUnknownKeys(raw, SERVICE_KEYS, where); - const command = parseCommand(raw.command, where); + + const hasImage = raw.image !== undefined && raw.image !== null; + const hasCommand = raw.command !== undefined && raw.command !== null; + if (hasImage && hasCommand) { + throw new ConfigError(`${where} sets both command and image — use exactly one`); + } + if (!hasImage && !hasCommand) { + throw new ConfigError(`${where} must set either command or image`); + } + const cwd = raw.cwd === undefined ? undefined : assertString(raw.cwd, `${where}.cwd`); - const env = parseEnv(raw.env, where); const autostart = raw.autostart === undefined ? true : assertBool(raw.autostart, `${where}.autostart`); const restart = parseRestart(raw.restart, where); @@ -417,6 +515,49 @@ function parseService(name: string, raw: unknown): ServiceSpec { const needs = parseNeeds(raw.needs, `${where}.needs`); const readyWhen = parseReadyWhen(raw.ready_when, where); const expose = parseExpose(raw.expose, where); + + if (hasImage) { + const image = assertString(raw.image, `${where}.image`).trim(); + if (image.length === 0) throw new ConfigError(`${where}.image must not be empty`); + const ports = parsePorts(raw.ports, where); + const args = parseArgs(raw.args, where); + const env = parseEnv(raw.env, where); // container -e env + const containerName = + raw.container_name === undefined + ? name + : assertString(raw.container_name, `${where}.container_name`).trim(); + if (!/^[A-Za-z0-9][A-Za-z0-9_.-]*$/.test(containerName)) { + throw new ConfigError( + `${where}.container_name "${containerName}" is not a valid docker container name`, + ); + } + const command = synthesizeImageCommand({ image, name: containerName, ports, env, args }); + const spec: ServiceSpec = { + name, + command, + cwd, + autostart, + restart, + backoff, + needs, + readyWhen, + expose, + image, + containerName, + }; + if (ports !== undefined) spec.ports = ports; + if (args !== undefined) spec.args = args; + return spec; + } + + // command service — the image-only keys are rejected. + for (const k of ['ports', 'args', 'container_name']) { + if (raw[k] !== undefined) { + throw new ConfigError(`${where}.${k} is only valid alongside image:`); + } + } + const command = parseCommand(raw.command, where); + const env = parseEnv(raw.env, where); return { name, command, cwd, env, autostart, restart, backoff, needs, readyWhen, expose }; } diff --git a/packages/ctl/src/secret.ts b/packages/ctl/src/secret.ts new file mode 100644 index 00000000..901eeaa1 --- /dev/null +++ b/packages/ctl/src/secret.ts @@ -0,0 +1,71 @@ +import { randomBytes } from 'node:crypto'; +import { mkdir, readFile, writeFile } from 'node:fs/promises'; +import { join } from 'node:path'; +import { DEFAULT_LOG_DIR } from './types.js'; +import { resolveWritableStateDir } from './state-dir.js'; + +// {{AGENTBOX_AUTO_SECRET}} or {{AGENTBOX_AUTO_SECRET:}}. Its own grammar +// (allows `:` + lowercase names), separate from the [A-Z0-9_] placeholder +// whitelist in replace.ts. +const SECRET_RE = /\{\{\s*AGENTBOX_AUTO_SECRET(?::([A-Za-z0-9_-]+))?\s*\}\}/g; +const SECRET_BYTES = 32; // 32 bytes → 43-char base64url, matches `openssl rand -base64 32` + +function generateSecret(): string { + return randomBytes(SECRET_BYTES).toString('base64url'); +} + +/** + * Replace `{{AGENTBOX_AUTO_SECRET}}` tokens in `content`: + * - unnamed → a fresh random secret per occurrence (stable in practice because + * the template→output render is guarded to run once). + * - `:` → generated once and persisted at `/secrets/`, + * reused on every later render so it's stable even if rendered every boot. + * + * No tokens → returns `content` untouched without touching the state dir. + */ +export async function resolveAutoSecrets( + content: string, + opts: { stateDir?: string; logDir?: string; onLog?: (msg: string) => void } = {}, +): Promise { + // Cheap bail-out that doesn't disturb the shared regex's lastIndex. + if (!content.includes('AGENTBOX_AUTO_SECRET')) return content; + + const names = new Set(); + SECRET_RE.lastIndex = 0; + for (const m of content.matchAll(SECRET_RE)) if (m[1]) names.add(m[1]); + + const named = new Map(); + if (names.size > 0) { + const base = await resolveWritableStateDir( + opts.stateDir, + opts.logDir ?? DEFAULT_LOG_DIR, + 'secrets', + (msg) => opts.onLog?.(msg), + ); + const dir = join(base, 'secrets'); + for (const name of names) named.set(name, await loadOrCreateSecret(dir, name, opts.onLog)); + } + + return content.replace(SECRET_RE, (_match, name?: string) => + name ? named.get(name)! : generateSecret(), + ); +} + +async function loadOrCreateSecret( + dir: string, + name: string, + onLog?: (msg: string) => void, +): Promise { + const file = join(dir, name); + try { + const existing = (await readFile(file, 'utf8')).trim(); + if (existing.length > 0) return existing; + } catch { + // missing/unreadable → create below + } + const secret = generateSecret(); + await mkdir(dir, { recursive: true }); + await writeFile(file, `${secret}\n`, { mode: 0o600 }); + onLog?.(`generated persisted secret "${name}"`); + return secret; +} diff --git a/packages/ctl/src/state-dir.ts b/packages/ctl/src/state-dir.ts new file mode 100644 index 00000000..c474f9c0 --- /dev/null +++ b/packages/ctl/src/state-dir.ts @@ -0,0 +1,34 @@ +import { mkdir } from 'node:fs/promises'; +import { join } from 'node:path'; +import { DEFAULT_LOG_DIR, DEFAULT_STATE_DIR } from './types.js'; + +/** + * Resolve a writable base directory for supervisor/render state (idempotent-task + * markers, generated secrets). Prefer `want` (default /var/lib/agentbox — box + * rootfs, checkpoint-captured, off /workspace), but the daemon runs as a + * non-root user and that dir is root-owned on stock images, so fall back to + * `/state` (always daemon-writable, also on rootfs). Writability is + * probed by creating `/` (a no-op mkdir on an existing but + * unwritable root-owned dir would otherwise look like success). Returns the + * resolved base; the caller uses its own subdir under it. + */ +export async function resolveWritableStateDir( + want: string = DEFAULT_STATE_DIR, + logDir: string = DEFAULT_LOG_DIR, + ensureSubdir = 'tasks', + onNotice?: (msg: string) => void, +): Promise { + try { + await mkdir(join(want, ensureSubdir), { recursive: true }); + return want; + } catch { + const fallback = join(logDir, 'state'); + try { + await mkdir(join(fallback, ensureSubdir), { recursive: true }); + onNotice?.(`${want} not writable, using ${fallback}`); + return fallback; + } catch { + return want; // give up; the caller's write will surface the error + } + } +} diff --git a/packages/ctl/src/supervisor.ts b/packages/ctl/src/supervisor.ts index f62e8f43..e400b753 100644 --- a/packages/ctl/src/supervisor.ts +++ b/packages/ctl/src/supervisor.ts @@ -12,6 +12,7 @@ import { type TaskSpec, } from './config.js'; import { DEFAULT_STATE_DIR } from './types.js'; +import { resolveWritableStateDir } from './state-dir.js'; import { startProbe, type ProbeHandle } from './probe.js'; import { RelayClient } from './relay-client.js'; import { WebProxy } from './web-proxy.js'; @@ -753,22 +754,12 @@ export class Supervisor extends EventEmitter { * and off /workspace (no git noise). */ private async ensureStateDir(): Promise { - const want = this.opts.stateDir ?? DEFAULT_STATE_DIR; - try { - await mkdir(join(want, 'tasks'), { recursive: true }); - return want; - } catch { - const fallback = join(this.opts.logDir, 'state'); - try { - await mkdir(join(fallback, 'tasks'), { recursive: true }); - process.stderr.write( - `[ctl] idempotent markers: ${want} not writable, using ${fallback}\n`, - ); - return fallback; - } catch { - return want; // give up; per-task marker writes will warn - } - } + return resolveWritableStateDir( + this.opts.stateDir ?? DEFAULT_STATE_DIR, + this.opts.logDir, + 'tasks', + (m) => process.stderr.write(`[ctl] idempotent markers: ${m}\n`), + ); } private emitChange(): void { diff --git a/packages/ctl/test/config.test.ts b/packages/ctl/test/config.test.ts index 34969aaf..5e3366bb 100644 --- a/packages/ctl/test/config.test.ts +++ b/packages/ctl/test/config.test.ts @@ -76,3 +76,62 @@ services: expect(() => parseConfig(`services:\n "bad name":\n command: foo\n`)).toThrow(/must match/); }); }); + +describe('image services', () => { + function svc(yaml: string) { + return parseConfig(yaml).services[0]!; + } + + it('synthesizes a start-or-run command with ports/env/args', () => { + const s = svc(` +services: + postgres: + image: postgres:17-alpine + ports: ["5437:5432"] + env: + POSTGRES_USER: optima + POSTGRES_PASSWORD: "with space" + args: "-c max_connections=200" + container_name: optima_db +`); + expect(s.image).toBe('postgres:17-alpine'); + expect(s.containerName).toBe('optima_db'); + expect(s.ports).toEqual(['5437:5432']); + const cmd = s.command as string; + expect(cmd).toContain('docker container inspect optima_db'); + expect(cmd).toContain('docker start optima_db'); + expect(cmd).toContain('docker run --name optima_db -p 5437:5432'); + expect(cmd).toContain('-e POSTGRES_USER=optima'); + expect(cmd).toContain("-e POSTGRES_PASSWORD='with space'"); // shell-quoted value + expect(cmd).toContain('postgres:17-alpine -c max_connections=200'); + expect(s.env).toBeUndefined(); // baked into -e, not the process env + }); + + it('defaults container name to the service name and joins args lists', () => { + const s = svc(`services:\n cache:\n image: redis:7\n args: ["--save", "60 1"]\n`); + expect(s.containerName).toBe('cache'); + expect(s.command as string).toContain('redis:7 --save 60 1'); + }); + + it('rejects command + image together', () => { + expect(() => svc(`services:\n db:\n command: x\n image: postgres\n`)).toThrow( + ConfigError, + ); + }); + + it('rejects neither command nor image', () => { + expect(() => svc(`services:\n db:\n restart: always\n`)).toThrow(/command or image/); + }); + + it('rejects ports without image', () => { + expect(() => svc(`services:\n web:\n command: x\n ports: ["3000:3000"]\n`)).toThrow( + /only valid alongside image/, + ); + }); + + it('rejects a bad container_name', () => { + expect(() => + svc(`services:\n db:\n image: postgres\n container_name: "bad name"\n`), + ).toThrow(/not a valid docker container name/); + }); +}); diff --git a/packages/ctl/test/schema-drift.test.ts b/packages/ctl/test/schema-drift.test.ts index 6bb0eb48..39d10d3a 100644 --- a/packages/ctl/test/schema-drift.test.ts +++ b/packages/ctl/test/schema-drift.test.ts @@ -235,6 +235,32 @@ tasks: command: pnpm db:seed idempotent: check: "psql -tAc 'select 1' | grep -q 1" +`, + }, + { + name: 'docker image service (ports/env/args/container_name)', + yaml: ` +services: + postgres: + image: postgres:17-alpine + ports: ["5437:5432"] + env: + POSTGRES_USER: optima + POSTGRES_PASSWORD: changeme + args: "-c max_connections=200" + container_name: optima_db + ready_when: + port: 5437 + restart: always +`, + }, + { + name: 'docker image service minimal + args list', + yaml: ` +services: + cache: + image: redis:7 + args: ["--save", "60 1"] `, }, { @@ -582,6 +608,30 @@ services: yaml: `carry: 42\n`, schemaOnly: true, }, + { + name: 'service with both command and image', + yaml: `services:\n db:\n command: postgres\n image: postgres:17-alpine\n`, + }, + { + name: 'service with neither command nor image', + yaml: `services:\n db:\n ready_when:\n port: 5432\n`, + }, + { + name: 'ports without image', + yaml: `services:\n web:\n command: pnpm dev\n ports: ["3000:3000"]\n`, + }, + { + name: 'container_name without image', + yaml: `services:\n web:\n command: pnpm dev\n container_name: web1\n`, + }, + { + name: 'image service with non-numeric port', + yaml: `services:\n db:\n image: postgres:17-alpine\n ports: ["abc"]\n`, + }, + { + name: 'image service with invalid container_name', + yaml: `services:\n db:\n image: postgres:17-alpine\n container_name: "bad name"\n`, + }, { name: 'idempotent as a string', yaml: `tasks:\n build:\n command: pnpm build\n idempotent: "yes"\n`, diff --git a/packages/ctl/test/secret.test.ts b/packages/ctl/test/secret.test.ts new file mode 100644 index 00000000..2733a05c --- /dev/null +++ b/packages/ctl/test/secret.test.ts @@ -0,0 +1,54 @@ +import { existsSync, readFileSync } from 'node:fs'; +import { mkdtemp, rm } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { resolveAutoSecrets } from '../src/secret.js'; + +describe('resolveAutoSecrets', () => { + let dir: string; + beforeEach(async () => { + dir = await mkdtemp(join(tmpdir(), 'ctl-secret-')); + }); + afterEach(async () => { + await rm(dir, { recursive: true, force: true }); + }); + + const opts = () => ({ stateDir: dir, logDir: dir }); + + it('leaves content without the token untouched (no state-dir write)', async () => { + const out = await resolveAutoSecrets('PORT=3000\n', opts()); + expect(out).toBe('PORT=3000\n'); + expect(existsSync(join(dir, 'secrets'))).toBe(false); + }); + + it('replaces an unnamed token with a 43-char base64url secret', async () => { + const out = await resolveAutoSecrets('S="{{AGENTBOX_AUTO_SECRET}}"\n', opts()); + const m = out.match(/^S="([A-Za-z0-9_-]+)"$/m); + expect(m).toBeTruthy(); + expect(m![1]!.length).toBe(43); // 32 bytes base64url + }); + + it('regenerates a fresh secret each render for unnamed tokens', async () => { + const a = await resolveAutoSecrets('{{AGENTBOX_AUTO_SECRET}}', opts()); + const b = await resolveAutoSecrets('{{AGENTBOX_AUTO_SECRET}}', opts()); + expect(a).not.toBe(b); + }); + + it('persists and reuses a named secret across renders', async () => { + const first = await resolveAutoSecrets('{{AGENTBOX_AUTO_SECRET:better-auth}}', opts()); + expect(existsSync(join(dir, 'secrets', 'better-auth'))).toBe(true); + const second = await resolveAutoSecrets('{{AGENTBOX_AUTO_SECRET:better-auth}}', opts()); + expect(second).toBe(first); // reused, not regenerated + expect(readFileSync(join(dir, 'secrets', 'better-auth'), 'utf8').trim()).toBe(first); + }); + + it('uses the same value for repeated occurrences of one named token', async () => { + const out = await resolveAutoSecrets( + 'A={{AGENTBOX_AUTO_SECRET:k}} B={{AGENTBOX_AUTO_SECRET:k}}', + opts(), + ); + const [, a, b] = out.match(/^A=(\S+) B=(\S+)$/)!; + expect(a).toBe(b); + }); +}); From f4a1e24f97b36821c4307baf9551d57fd2573101 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 12:58:33 +0100 Subject: [PATCH 09/12] fix(ctl): render expands AGENTBOX_AUTO_SECRET after rules So a replacement rule can emit an {{AGENTBOX_AUTO_SECRET}} token that the secret pass then resolves in a single render (e.g. 'your-secret-here=>{{...}}'). Verified e2e: optima's env task renders env.example with a box-host rule + a persisted secret in one pass. --- packages/ctl/src/commands/render.ts | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/packages/ctl/src/commands/render.ts b/packages/ctl/src/commands/render.ts index 0f84a505..3f0e63a6 100644 --- a/packages/ctl/src/commands/render.ts +++ b/packages/ctl/src/commands/render.ts @@ -42,14 +42,7 @@ export const renderCommand = new Command('render') .option('--config ', 'agentbox.yaml to read replacements: from', DEFAULT_CONFIG_PATH) .option('--state-dir ', 'where named {{AGENTBOX_AUTO_SECRET:x}} secrets persist', DEFAULT_STATE_DIR) .action(async (src: string, opts: RenderOptions) => { - const raw = await readFile(src, 'utf8'); - - // Resolve {{AGENTBOX_AUTO_SECRET}} tokens first (generate / persist), then - // the placeholder + rule substitutions. - const content = await resolveAutoSecrets(raw, { - stateDir: opts.stateDir, - onLog: (msg) => process.stderr.write(`agentbox-ctl render: ${msg}\n`), - }); + const content = await readFile(src, 'utf8'); const rules: ReplaceRule[] = []; if (opts.rules) { @@ -65,13 +58,20 @@ export const renderCommand = new Command('render') for (const arg of opts.rule) rules.push(parseRuleArg(arg, false)); for (const arg of opts.ruleRegex) rules.push(parseRuleArg(arg, true)); - const result = applyReplacements(content, { + const replaced = applyReplacements(content, { env: opts.env, rules, context: placeholderContextFromEnv(), onWarn: (msg) => process.stderr.write(`agentbox-ctl render: ${msg}\n`), }); + // Secret expansion runs last, so a rule may emit an + // {{AGENTBOX_AUTO_SECRET}} token that this pass then resolves. + const result = await resolveAutoSecrets(replaced, { + stateDir: opts.stateDir, + onLog: (msg) => process.stderr.write(`agentbox-ctl render: ${msg}\n`), + }); + if (opts.inPlace) { await writeFile(src, result, 'utf8'); } else if (opts.out) { From 49dc3fd1d5b3c013d41f5fa87d599d99503a35ec Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 15:40:08 +0100 Subject: [PATCH 10/12] refactor(agentbox.yaml): rename task idempotent: -> run_once: MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Clearer, less jargon (AgentBox is unreleased, so a clean rename — no alias). Renames the YAML key, schema property, TS types (RunOnceSpec/parseRunOnce/ TaskSpec.runOnce), supervisor skip logic + log lines, docs, skills, and the services-and-tasks guide (now recommends run_once over hand-rolled markers). --- apps/cli/share/agentbox-setup/SKILL.md | 16 ++++---- .../share/host-skills/agentbox-info/SKILL.md | 2 +- apps/web/content/docs/agentbox-yaml.mdx | 20 +++++----- apps/web/content/docs/services-and-tasks.mdx | 18 ++++----- docs/features.md | 4 +- docs/in-box-supervisor.md | 4 +- packages/ctl/schema/agentbox.schema.json | 2 +- packages/ctl/src/commands/daemon.ts | 2 +- packages/ctl/src/config.ts | 28 +++++++------- packages/ctl/src/index.ts | 2 +- packages/ctl/src/state-dir.ts | 2 +- packages/ctl/src/supervisor.ts | 38 +++++++++---------- packages/ctl/src/types.ts | 2 +- packages/ctl/test/schema-drift.test.ts | 16 ++++---- ...nt.test.ts => supervisor-run-once.test.ts} | 14 +++---- 15 files changed, 84 insertions(+), 86 deletions(-) rename packages/ctl/test/{supervisor-idempotent.test.ts => supervisor-run-once.test.ts} (93%) diff --git a/apps/cli/share/agentbox-setup/SKILL.md b/apps/cli/share/agentbox-setup/SKILL.md index 3358b8f9..67a3ff82 100644 --- a/apps/cli/share/agentbox-setup/SKILL.md +++ b/apps/cli/share/agentbox-setup/SKILL.md @@ -46,7 +46,7 @@ Look at `/workspace`: - **Tasks** = one-shot. `pnpm install`, DB migrations, codegen, fixture loaders, install apt packages. Wire dependent services with `needs:` so they wait for the task to finish successfully. - Names: must match `[A-Za-z0-9_-]+`. Task names and service names share a namespace — no collisions. - No cycles in `needs:`. -- **Always generate a dependency-install task** and make it the root of the `needs:` graph (every service that needs deps gets `needs: [install, …]`). Future boxes start from a snapshot of the final filesystem so they won't need this, but updates or moving to a cloud provider might need to rebuild the container from scratch. The filesystem can be then later captured by `agentbox-ctl checkpoint --set-default`. The task must be **idempotent**: `agentbox-ctl` re-runs pending tasks on every box stop/start (the daemon dies with the container and is relaunched), so an unguarded install would reinstall on every start. The clean way is the **`idempotent: true`** field — the supervisor stores a marker keyed by a hash of the command and skips warm boots automatically (the marker lives at `/var/lib/agentbox/tasks/`, on the box rootfs, captured by checkpoints, never polluting `/workspace`). Editing the command re-runs it. Detect the package manager from the lockfile — never hardcode `pnpm`. See the worked example below. +- **Always generate a dependency-install task** and make it the root of the `needs:` graph (every service that needs deps gets `needs: [install, …]`). Future boxes start from a snapshot of the final filesystem so they won't need this, but updates or moving to a cloud provider might need to rebuild the container from scratch. The filesystem can be then later captured by `agentbox-ctl checkpoint --set-default`. The task must be **idempotent**: `agentbox-ctl` re-runs pending tasks on every box stop/start (the daemon dies with the container and is relaunched), so an unguarded install would reinstall on every start. The clean way is the **`run_once: true`** field — the supervisor stores a marker keyed by a hash of the command and skips warm boots automatically (the marker lives at `/var/lib/agentbox/tasks/`, on the box rootfs, captured by checkpoints, never polluting `/workspace`). Editing the command re-runs it. Detect the package manager from the lockfile — never hardcode `pnpm`. See the worked example below. - **Add a comment to the beginning** of the file to explain what you did and what issues you encountered, so that future run might use this information in case the project evolves and you need to update the agentbox.yaml file. ### Stateful services: data persistence & re-seeding (read this for databases) @@ -78,7 +78,7 @@ reload` to apply.) Install the DB client the migrate/seed tasks need (e.g. **A checkpoint does NOT capture docker-in-docker data.** `agentbox checkpoint` is a `docker commit` of the box's writable filesystem (the system + `/workspace`). The in-box `dockerd` keeps its storage in a *separate* per-box volume (`/var/lib/docker`), which is **not** part of that image — it's fresh on every new box and wiped on `agentbox destroy`. So a database or cache you run as a **docker container** (e.g. `docker run … postgres`) starts **empty on every new box** created from a checkpoint (every `agentbox claude` / `agentbox create`), even though `/workspace` and any marker files you wrote were restored. (A DB run as a **native process** with its data dir on the box filesystem — e.g. `postgres -D /var/lib/postgresql/data` — *is* captured by the checkpoint, since it lives in the writable layer.) -**Consequence for migrate/seed tasks of a containerized DB: do NOT use `idempotent: true` (the marker form).** A command-hash marker is correct for deps (they live in `/workspace`, which the checkpoint captures), but **wrong** for DB data living in a docker volume: the marker is restored from the checkpoint while the DB is empty, so a marker-guarded seed wrongly skips and the app boots against an empty database. Instead use the **`idempotent: { check: }`** form — the probe runs first and the seed runs unless the probe exits 0, and **no marker is written** (the DB is the source of truth). Gate on the actual data: +**Consequence for migrate/seed tasks of a containerized DB: do NOT use `run_once: true` (the marker form).** A command-hash marker is correct for deps (they live in `/workspace`, which the checkpoint captures), but **wrong** for DB data living in a docker volume: the marker is restored from the checkpoint while the DB is empty, so a marker-guarded seed wrongly skips and the app boots against an empty database. Instead use the **`run_once: { check: }`** form — the probe runs first and the seed runs unless the probe exits 0, and **no marker is written** (the DB is the source of truth). Gate on the actual data: ```yaml seed: @@ -90,7 +90,7 @@ reload` to apply.) Install the DB client the migrate/seed tasks need (e.g. # the data is present. command: pnpm db:seed needs: [install, migrate] - idempotent: + run_once: check: | export PGPASSWORD=postgres psql -h 127.0.0.1 -p 5432 -U postgres -d app -tAc \ @@ -168,7 +168,7 @@ tasks: # Idempotent install. /workspace is the container's writable filesystem, so # node_modules persists across pause/stop/start and is captured by # `agentbox checkpoint`. The host's node_modules is macOS-native and is - # never copied in, so the first Linux install runs; `idempotent: true` then + # never copied in, so the first Linux install runs; `run_once: true` then # skips it on every subsequent box start (the supervisor stores a marker # keyed by a hash of the command). Adjust the lockfile detection to the # project's package manager. @@ -180,7 +180,7 @@ tasks: corepack enable >/dev/null 2>&1 || true pnpm install --frozen-lockfile || pnpm install fi - idempotent: true + run_once: true migrate: command: pnpm db:migrate @@ -277,7 +277,7 @@ On Vercel: this actually STOPS the sandbox, so warn the user about it. Also the - Service like flask, nextjs, BETTER_AUTH_URL, NEXT_PUBLIC_APP_URL should use the `.localhost` url for the local development so that on the host it will use the same url as the box. Render this automatically instead of hand-writing `sed` — see section 6c. -- The `install` task above uses `idempotent: true`, so it is a no-op on warm boots. Do **not** wrap it in a manual marker check too. To force a one-off rebuild, run `agentbox-ctl run-task install --force` (which bypasses the idempotent marker), or edit the command (a changed command invalidates the hash and re-runs). +- The `install` task above uses `run_once: true`, so it is a no-op on warm boots. Do **not** wrap it in a manual marker check too. To force a one-off rebuild, run `agentbox-ctl run-task install --force` (which bypasses the run_once marker), or edit the command (a changed command invalidates the hash and re-runs). ## 11. Pin URLs / render config files (env, secrets) @@ -295,12 +295,12 @@ Many apps hard-code a hostname (e.g. `optima.localhost`) or read a gitignored `. tasks: env: # The render is idempotent (the rules re-pin the same lines every boot), so - # no `idempotent:` guard is needed — it self-corrects on a checkpoint-started + # no `run_once:` guard is needed — it self-corrects on a checkpoint-started # box that carries a different box's host in .env. command: agentbox-ctl render apps/saas/env.example --out apps/saas/.env --env --rules box-host ``` - Note: an `idempotent: { check: }` probe runs verbatim via `bash -c` with the box env — use shell vars like `$AGENTBOX_BOX_NAME`, NOT `{{…}}` placeholders (those are only expanded by `render`/carry, never by the supervisor). + Note: an `run_once: { check: }` probe runs verbatim via `bash -c` with the box env — use shell vars like `$AGENTBOX_BOX_NAME`, NOT `{{…}}` placeholders (those are only expanded by `render`/carry, never by the supervisor). **Generated secrets:** put `{{AGENTBOX_AUTO_SECRET}}` in the template for a value like `BETTER_AUTH_SECRET` instead of shelling out to `openssl rand`. Unnamed → a fresh 32-byte base64url secret each render (stable when you render the template→`.env` once). `{{AGENTBOX_AUTO_SECRET:better-auth}}` → generated once, persisted at `/var/lib/agentbox/secrets/`, reused on every render (stable even if you render every boot). Example `env.example` line: `BETTER_AUTH_SECRET="{{AGENTBOX_AUTO_SECRET:better-auth}}"`. diff --git a/apps/cli/share/host-skills/agentbox-info/SKILL.md b/apps/cli/share/host-skills/agentbox-info/SKILL.md index 13db2df4..55b75b2e 100644 --- a/apps/cli/share/host-skills/agentbox-info/SKILL.md +++ b/apps/cli/share/host-skills/agentbox-info/SKILL.md @@ -237,7 +237,7 @@ Per-project numeric index (`1`, `2`, …) and friendly name (`review`, `smoke`) 2. **Use `-i` whenever the user asks for parallel agent work** rather than spawning multiple foreground sessions. Then point them at `agentbox dashboard` to watch progress. 3. **Pick the provider deliberately.** `docker` is the fast default. `--provider hetzner` gives a real VPS (heavier, isolated, requires `agentbox prepare --provider hetzner` once). `--provider vercel` is the managed cloud option. 4. **Cross-check before recommending a command.** If a flag isn't listed here, run `agentbox --help` (it's safe and read-only) before suggesting it to the user. -5. **`/agentbox-setup` is a different skill.** It runs *inside* a box to generate `/workspace/agentbox.yaml`. Don't conflate it with `/agentbox` (host-side fork) or this reference skill. When authoring `agentbox.yaml`, prefer the declarative `idempotent: true` / `idempotent: { check }` task field over hand-rolled marker/probe guards, and `agentbox-ctl render` / carry `replaceEnvs` over `sed` for pinning env URLs to `{{AGENTBOX_BOX_HOST}}`. +5. **`/agentbox-setup` is a different skill.** It runs *inside* a box to generate `/workspace/agentbox.yaml`. Don't conflate it with `/agentbox` (host-side fork) or this reference skill. When authoring `agentbox.yaml`, prefer the declarative `run_once: true` / `run_once: { check }` task field over hand-rolled marker/probe guards, and `agentbox-ctl render` / carry `replaceEnvs` over `sed` for pinning env URLs to `{{AGENTBOX_BOX_HOST}}`. ## Reference diff --git a/apps/web/content/docs/agentbox-yaml.mdx b/apps/web/content/docs/agentbox-yaml.mdx index 813d871b..cdb30df8 100644 --- a/apps/web/content/docs/agentbox-yaml.mdx +++ b/apps/web/content/docs/agentbox-yaml.mdx @@ -122,29 +122,29 @@ A change to `image`/`ports`/`env` reuses the existing container as-is — AgentB ## Tasks -A task is a one-shot unit that runs to completion. It accepts **only** five fields: `command`, `cwd`, `env`, `needs`, and `idempotent`. Tasks cannot have `restart`, `autostart`, `backoff`, or `ready_when` — the schema rejects them. That is the key distinction from services. +A task is a one-shot unit that runs to completion. It accepts **only** five fields: `command`, `cwd`, `env`, `needs`, and `run_once`. Tasks cannot have `restart`, `autostart`, `backoff`, or `ready_when` — the schema rejects them. That is the key distinction from services. A task moves through `pending → waiting → running → done`, and can land in `failed` or `skipped`. Tasks run before dependent services via `needs:`. Typical use: install deps, build, seed a database. -Tasks **re-run on every supervisor restart** (which happens on box start, not just create). So a task must be idempotent. The `idempotent` field makes the supervisor skip an already-satisfied task for you — no more hand-rolled marker checks: +Tasks **re-run on every supervisor restart** (which happens on box start, not just create). So a task must be idempotent. The `run_once` field makes the supervisor skip an already-satisfied task for you — no more hand-rolled marker checks: ```yaml tasks: install: command: pnpm install --frozen-lockfile - idempotent: true # skip while the command is unchanged + run_once: true # skip while the command is unchanged build: command: pnpm build needs: [install] ``` -`idempotent` takes two forms: +`run_once` takes two forms: | Form | Behavior | | --- | --- | -| `idempotent: true` | The supervisor stores a marker keyed by a **hash of the resolved command**. A warm boot skips while the hash matches; editing the command invalidates it and re-runs. The marker lives at `/var/lib/agentbox/tasks/` (box rootfs — captured by checkpoints, never under `/workspace`). | -| `idempotent: { check: }` | Run the probe before launching; **exit 0 means already satisfied** (skip). No marker is written — the probe is the source of truth. Use this when the thing you'd guard on lives **outside** the checkpointed filesystem (e.g. a containerized database, whose data is in the in-box docker volume, not the checkpoint). The probe runs verbatim via `bash -c` with the box env, so use shell vars like `$AGENTBOX_BOX_NAME`; it does **not** expand `{{…}}` placeholders (those are render-only). | +| `run_once: true` | The supervisor stores a marker keyed by a **hash of the resolved command**. A warm boot skips while the hash matches; editing the command invalidates it and re-runs. The marker lives at `/var/lib/agentbox/tasks/` (box rootfs — captured by checkpoints, never under `/workspace`). | +| `run_once: { check: }` | Run the probe before launching; **exit 0 means already satisfied** (skip). No marker is written — the probe is the source of truth. Use this when the thing you'd guard on lives **outside** the checkpointed filesystem (e.g. a containerized database, whose data is in the in-box docker volume, not the checkpoint). The probe runs verbatim via `bash -c` with the box env, so use shell vars like `$AGENTBOX_BOX_NAME`; it does **not** expand `{{…}}` placeholders (those are render-only). | ```yaml tasks: @@ -153,7 +153,7 @@ tasks: needs: [migrate] # Probe the DB itself — a file marker would be restored from the checkpoint # while the containerized DB starts empty, wrongly skipping the seed. - idempotent: + run_once: check: "psql -tAc \"select 1 from \\\"user\\\" limit 1\" | grep -q 1" ``` @@ -164,10 +164,10 @@ $ agentbox-ctl run-task install $ agentbox-ctl run-task install --force ``` -`run-task` resets the task to pending so the scheduler reruns it; it is a no-op on an already-`done` task unless you pass `--force`. `--force` also bypasses the `idempotent` skip (marker or check) and, for the marker form, rewrites the marker. +`run-task` resets the task to pending so the scheduler reruns it; it is a no-op on an already-`done` task unless you pass `--force`. `--force` also bypasses the `run_once` skip (marker or check) and, for the marker form, rewrites the marker. -Tasks re-run on every daemon start, not just at create. A non-idempotent task (an unguarded `git init`, a destructive migration) will fire repeatedly — declare `idempotent:` (or guard it yourself). Prefer the `{ check }` form for state that a checkpoint does not capture (containerized DB data), where a filesystem marker would desync. +Tasks re-run on every daemon start, not just at create. A non-idempotent task (an unguarded `git init`, a destructive migration) will fire repeatedly — declare `run_once:` (or guard it yourself). Prefer the `{ check }` form for state that a checkpoint does not capture (containerized DB data), where a filesystem marker would desync. ## ready_when @@ -367,7 +367,7 @@ carry: rules: [box-host] ``` -**2. In-box (files already in the workspace).** `agentbox-ctl render` is a declarative `sed` replacement — handy for rendering a gitignored `.env` from a committed `env.example` on every boot. The render is itself idempotent (the regex rules re-pin the same lines on every boot), so this task needs no `idempotent:` guard: +**2. In-box (files already in the workspace).** `agentbox-ctl render` is a declarative `sed` replacement — handy for rendering a gitignored `.env` from a committed `env.example` on every boot. The render is itself idempotent (the regex rules re-pin the same lines on every boot), so this task needs no `run_once:` guard: ```yaml tasks: diff --git a/apps/web/content/docs/services-and-tasks.mdx b/apps/web/content/docs/services-and-tasks.mdx index 04b0fab9..85260f40 100644 --- a/apps/web/content/docs/services-and-tasks.mdx +++ b/apps/web/content/docs/services-and-tasks.mdx @@ -87,22 +87,20 @@ Tasks must be **idempotent**, because they re-run more often than you'd expect: - On a new box from a [checkpoint](/docs/checkpoints-and-pausing) the tasks run again on the warm filesystem. - Only `pause`/`unpause` skips them — the daemon is frozen and thawed, not restarted. -For deps, guard the install task with a marker file in `/workspace` (e.g. `node_modules/.agentbox-installed`) so warm boots are a fast no-op — `/workspace` is part of the writable layer, so the marker is honest there. +For deps, declare [`run_once: true`](/docs/agentbox-yaml#tasks) — the supervisor stores a command-hash marker on the box rootfs and skips warm boots automatically (no hand-written marker file). -A **marker is the wrong guard for a containerized database's seed**, though. A [docker-in-docker](/docs/docker-in-docker) DB keeps its data in a per-box volume that checkpoints don't capture, so a box launched from a checkpoint has the marker (restored from `/workspace`) but an empty DB — and the seed wrongly skips. Gate the seed on the **actual data** instead: query the DB for a sentinel table/row and seed only when it's absent. That's a no-op once data exists (warm `stop`/`start`) and correctly re-seeds an empty DB (fresh checkpoint box). +A **command-hash marker is the wrong guard for a containerized database's seed**, though. A [docker-in-docker](/docs/docker-in-docker) DB keeps its data in a per-box volume that checkpoints don't capture, so a box launched from a checkpoint has the marker but an empty DB — and the seed wrongly skips. Use the `run_once: { check }` form instead, gating on the **actual data**: the probe queries the DB for a sentinel row and the seed runs unless it exits 0. That's a no-op once data exists (warm `stop`/`start`) and correctly re-seeds an empty DB (fresh checkpoint box). ```yaml tasks: seed: - command: | - set -e - export PGPASSWORD=postgres - if psql -h 127.0.0.1 -p 5432 -U postgres -d app -tAc \ - "SELECT EXISTS (SELECT 1 FROM users LIMIT 1)" 2>/dev/null | grep -q t; then - echo "data present — skip"; exit 0 - fi - pnpm db:seed + command: pnpm db:seed needs: [install, migrate] + run_once: + check: | + export PGPASSWORD=postgres + psql -h 127.0.0.1 -p 5432 -U postgres -d app -tAc \ + "SELECT EXISTS (SELECT 1 FROM users LIMIT 1)" 2>/dev/null | grep -q t ``` ## Status & logs diff --git a/docs/features.md b/docs/features.md index 190d7725..034bf3ab 100644 --- a/docs/features.md +++ b/docs/features.md @@ -8,8 +8,8 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - `agentbox create` — builds the image on first run (or resolves a checkpoint image when `--snapshot ` is given), detects git repos (root + 1st-level subdirs), collects host-side carry-over (`git stash create` + untracked `ls-files`), spins up the container, then seeds `/workspace` via either `seedWorkspace` (in-container `git worktree add` against the bind-mounted `.git/` + stash/untracked replay) or `seedWorkspaceFromDir` (tar-pipe from host workspace / APFS clone for the no-git case). Checkpoint restore skips both — the image already has `/workspace`. Mounts the `agentbox-claude-config` named volume at `/home/vscode/.claude` and rsyncs host's `~/.claude` into it (additive, host-authoritative). Bind-mounts each main repo's `.git/` at its identical absolute host path inside the container so worktree pointer files resolve symmetrically on both sides. `--with-env` (also on `agentbox claude`; config key `box.withEnv`) copies the host's `DEFAULT_ENV_PATTERNS` files (`.env*`, `.envrc`, `.dev.vars`, `secrets.toml`, `local.settings.json`, `appsettings.*.json`, `agentbox.yaml`) into `/workspace` after seeding — the host→box reverse of `agentbox download env` (gitignored files are otherwise excluded by the worktree carry-over's `git ls-files --others --exclude-standard`). One-shot at create time, lands in the container's writable layer (persists across stop/start), best-effort (warn-not-throw), recorded as `BoxRecord.withEnv` and surfaced in `agentbox status --inspect`. Implemented by `copyHostEnvFilesToBox` / `buildHostEnvFindArgs` in `packages/sandbox-docker/src/host-export.ts` (host `find . -print0 | tar` → `docker exec -i --user 1000:1000 tar -x`). - `carry:` in `agentbox.yaml` — declarative host→box file copy that bypasses `.gitignore`. Each entry maps a host path (`/abs`, `~/...`, or `./relative-to-project-root`) to an explicit in-box destination (`/abs` or `~/...` — `~/` expands to `/home/vscode`); accepts a `mode:` (octal), `user:` (uid), `exclude:` (tar globs / bare dir names), and `optional: true`. When copying a directory, heavy regenerable dirs (`.git`, `node_modules`, `bin`, `obj`, `packages`, `dist`, `.next`, `target` — `DEFAULT_CP_EXCLUDES` in `apps/cli/src/lib/dir-breakdown.ts`) are dropped by default and `exclude:` is additive. The resolver enforces no-`..`-traversal, denies `/proc|/sys|/dev|/etc/passwd|/etc/shadow`, caps per-entry size **after excludes** at `box.cpMaxBytes` (default 100 MiB — the same limit `agentbox cp` uses; carry callers pass the effective value into `resolveCarry`), and flags symlinks whose target leaves `$HOME` *and* the project root. On `agentbox create` / `claude` / `codex` / `opencode`, the host CLI prompts ONCE (`@clack/prompts.select` — `yes` / `skip just for this box` / `cancel create`) listing every src→dest with size + mode + symlink warnings, then threads the approved set into `provider.create` as `req.carry`. Auto-approve with `--carry-yes` (or `AGENTBOX_CARRY_YES=1` for CI); skip with `--carry skip` (or `AGENTBOX_CARRY=skip`). `agentbox fork` is the exception: it **sends** the carry: block by default (it forwards `--carry-yes`), because the host is trusted and the box is the untrusted side, so a host→box copy is safe — opt out with `agentbox fork --carry skip`. `-y` / `--yes` does NOT auto-approve carry — non-TTY use of `-y` with non-empty entries fails loud, asking for the explicit env var (auditable in CI). The `-i` (queued background) path runs the same gate on the host **at submit time** (`runQueuedCarryGate`), serializes the approved `ResolvedCarryEntry[]` onto the queue job (`QueueJobCreateOpts.carry`), and the host-side worker applies them at box-create time — so `--carry-yes` / `--carry skip` work identically for `-i`. Docker injects via `copyCarryPathsToBox` (`docker cp` for files, host-tar + `docker exec tar -x` for dirs); cloud (Hetzner + Daytona) injects via `uploadCarryPaths` (host-tar + `backend.uploadFile` + `backend.exec(tar -x)`), per-entry isolated. Files land owned by `vscode:vscode` (uid 1000) when under `/home/vscode`; an audit summary (`{count, entries: [{src, dest, bytes}]}`) is recorded on `BoxRecord.carry`. Use case: develop AgentBox itself inside an AgentBox — carry `~/.agentbox/secrets.env` + `~/.agentbox/claude-credentials.json` so the in-box `agentbox` CLI is fully authenticated. Schema: `packages/ctl/src/carry.ts`. Resolver / prompt / gate: `apps/cli/src/lib/carry-resolve.ts`, `apps/cli/src/carry-prompt.ts`, `apps/cli/src/lib/carry-gate.ts`. Copiers: `packages/sandbox-docker/src/host-export.ts:copyCarryPathsToBox`, `packages/sandbox-cloud/src/carry.ts:uploadCarryPaths`. A **file** carry entry may also set `replaceEnvs: true` (substitute `{{AGENTBOX_*}}` whitelist placeholders), `replace:` (inline `{from,to,regex?}` rules), and/or `rules:` (named refs into the top-level `replacements:` block) — the file is rendered host-side to a temp by `renderCarryEntries` (`@agentbox/sandbox-core/src/carry-render.ts`) before the copy (the host source is never modified; the box name is known by then). Named refs are expanded in `resolveCarry`; replace options are file-only (a dir entry errors). -- **Idempotent tasks + the replacement engine** — a task may declare `idempotent: true` (the supervisor skips it while a SHA-256 of the resolved command matches a marker at `/tasks/`, default `stateDir=/var/lib/agentbox` — box rootfs, captured by checkpoints, off `/workspace`) or `idempotent: { check: }` (run the probe first; exit 0 = skip, no marker — for state outside the checkpoint like a containerized DB). `run-task --force` bypasses both. Handled in `TaskRunner.launch` (`packages/ctl/src/supervisor.ts`). The shared, pure replacement engine lives in `@agentbox/core` (`replace.ts`: `applyReplacements` = `{{AGENTBOX_*}}` whitelist substitution + ordered rules; re-exported by `@agentbox/ctl` which adds the yaml/fs loaders — kept in core to avoid the `sandbox-core → ctl → relay → sandbox-core` build cycle). Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render [--out|--in-place] [--env] [--rules|--rule|--rule-regex]` (in-box declarative `sed`, `packages/ctl/src/commands/render.ts`), and the carry `replaceEnvs`/`replace`/`rules` above. `render` also expands `{{AGENTBOX_AUTO_SECRET}}` (fresh 32-byte base64url per render) / `{{AGENTBOX_AUTO_SECRET:}}` (generated once, persisted at `/secrets/`, reused) — `packages/ctl/src/secret.ts`, replacing `openssl rand` in env tasks. -- **Declarative docker `image:` services** — a service may set `image: postgres:17-alpine` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; `parseService` (`packages/ctl/src/config.ts`) synthesizes the `docker start`-or-`run` shell (the proven `examples/express-ready` / optima pattern), reused by name across restarts (env baked into `-e`, no auto-`rm`). `command`/`image` are mutually exclusive; the runner/`ready_when`/`restart`/`expose` machinery is unchanged. The shared writable-state-dir resolver (`packages/ctl/src/state-dir.ts`) backs both idempotent markers and persisted secrets. +- **`run_once` tasks + the replacement engine** — a task may declare `run_once: true` (the supervisor skips it while a SHA-256 of the resolved command matches a marker at `/tasks/`, default `stateDir=/var/lib/agentbox` — box rootfs, captured by checkpoints, off `/workspace`) or `run_once: { check: }` (run the probe first; exit 0 = skip, no marker — for state outside the checkpoint like a containerized DB). `run-task --force` bypasses both. Handled in `TaskRunner.launch` (`packages/ctl/src/supervisor.ts`). The shared, pure replacement engine lives in `@agentbox/core` (`replace.ts`: `applyReplacements` = `{{AGENTBOX_*}}` whitelist substitution + ordered rules; re-exported by `@agentbox/ctl` which adds the yaml/fs loaders — kept in core to avoid the `sandbox-core → ctl → relay → sandbox-core` build cycle). Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render [--out|--in-place] [--env] [--rules|--rule|--rule-regex]` (in-box declarative `sed`, `packages/ctl/src/commands/render.ts`), and the carry `replaceEnvs`/`replace`/`rules` above. `render` also expands `{{AGENTBOX_AUTO_SECRET}}` (fresh 32-byte base64url per render) / `{{AGENTBOX_AUTO_SECRET:}}` (generated once, persisted at `/secrets/`, reused) — `packages/ctl/src/secret.ts`, replacing `openssl rand` in env tasks. +- **Declarative docker `image:` services** — a service may set `image: postgres:17-alpine` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; `parseService` (`packages/ctl/src/config.ts`) synthesizes the `docker start`-or-`run` shell (the proven `examples/express-ready` / optima pattern), reused by name across restarts (env baked into `-e`, no auto-`rm`). `command`/`image` are mutually exclusive; the runner/`ready_when`/`restart`/`expose` machinery is unchanged. The shared writable-state-dir resolver (`packages/ctl/src/state-dir.ts`) backs both run_once markers and persisted secrets. - `agentbox claude [-- ...]` — does everything `create` does, then starts Claude Code in a detached tmux session inside the box and attaches the user's terminal to it. `Ctrl+a d` detaches; the claude process keeps running. Reattach with `agentbox claude attach `. Forwards `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` / `CLAUDE_EFFORT` / `ANTHROPIC_MODEL` from host env when set. `--isolate-claude-config` opts into a per-box `agentbox-claude-config-` volume. - `agentbox claude start [box] [-- ...]` — start a Claude session in an **existing** box (vs `agentbox claude` which creates one). Resolves `[box]` via the usual auto-pick / index / name / id-prefix chain. Auto-unpauses/starts the container if needed (mirrors `shell`/`code`). Re-syncs `~/.claude` into the box volume by default (skip with `--no-sync-config` for speed). Re-runs `rebuildPluginNativeDeps` (idempotent — gated by per-plugin marker). If a tmux session with the configured name already exists, just attaches; otherwise starts a fresh one. Post-`--` args are forwarded to claude only when starting a fresh session. - `agentbox codex [-- ...]` — the Codex parity of `agentbox claude`: does everything `create` does, then launches OpenAI Codex in a detachable tmux session (`codex` session name; `--session-name` / config `codex.sessionName` override). Forwards `OPENAI_API_KEY` from host env. `--isolate-codex-config` opts into a per-box `agentbox-codex-config-` volume. Subcommands mirror claude: `agentbox codex start [box] [-- ...]` (start a session in an existing box, auto-unpause/start, `--no-sync-config` to skip the `~/.codex` resync), `agentbox codex attach [box]` (attach/start without resyncing), `agentbox codex login [-- ]` (sign in via a throwaway container — defaults to `codex login --device-auth`, the headless device-code flow; pass `-- --api-key` for the API-key path). Skips the claude-only steps (setup wizard, plugin rebuild). `apps/cli/src/commands/codex.ts`. Codex is baked into the base image, but a box built from a **checkpoint captured before Codex support** (or an older base image) won't have the binary — `ensureCodexInstalled` (`codex.ts`) detects that and `npm install -g @openai/codex`s it into the box's writable layer at create/start time (mirrors `--with-playwright`; fast `command -v` no-op when codex is already present). diff --git a/docs/in-box-supervisor.md b/docs/in-box-supervisor.md index bb5f7c48..dcb7afa8 100644 --- a/docs/in-box-supervisor.md +++ b/docs/in-box-supervisor.md @@ -6,8 +6,8 @@ - `needs:` on any unit forms a DAG (cycles + unknown refs rejected at config load). Independent units launch in parallel. - `ready_when:` declares a readiness probe per service: `port` (TCP connect to `127.0.0.1:` by default), `log_match` (regex over stdout/stderr), or `http` (GET; expects 2xx by default). Probe lives in `packages/ctl/src/probe.ts`. `on_timeout: kill` (default) re-enters the restart policy; `on_timeout: mark_unhealthy` leaves the process running but flags the service — the escape hatch for legitimately slow cold starts. - `expose: { port: , as: 80 }` on a service marks it as **the** web service (at most one; `as` must be `80` — the only container port AgentBox reserves; `RESERVED_WEB_PORT` in `config.ts` / `WEB_CONTAINER_PORT` in `@agentbox/sandbox-docker`). The supervisor owns an in-process Node TCP forwarder (`WebProxy`, `packages/ctl/src/web-proxy.ts`) that binds container `:80` → `127.0.0.1:`, (re)pointed by `applyWebProxy()` on `init`/`reload` and torn down in `stopAll` — so the wizard writing `agentbox.yaml` post-create + `agentbox-ctl reload` activates it with no box restart. Binding `:80` as non-root `vscode` works because the image grants the node binary `cap_net_bind_service` (`setcap` in `Dockerfile.box`). The `expose` mapping rides in the status snapshot (`BoxStatusServiceEntry.expose`) so the host knows the web service even when `agentbox.yaml` lives only in the box. -- Wire ops: `status` returns `{ services, tasks }`; `task-status` returns task list; `wait-ready { timeoutMs?, units? }` blocks daemon-side until all autostart units reach their satisfying state, then resolves `{ ready: true }` or `{ ready: false, timedOut, failed }`; `run-task { name, force? }` resets a task back to pending so the scheduler reruns it (`force` also bypasses the `idempotent` skip). -- **`idempotent:` on a task** (handled in `TaskRunner.launch`, `supervisor.ts`) makes a re-run a no-op when already satisfied. `idempotent: true` → marker keyed by a SHA-256 of the resolved command (+cwd+env) at `/tasks/` (`stateDir` defaults to `DEFAULT_STATE_DIR = /var/lib/agentbox`, the box rootfs — captured by checkpoints, never under `/workspace`); editing the command invalidates it. `idempotent: { check: }` → run the probe first; exit 0 = skip, no marker written (right for state outside the checkpoint, e.g. a containerized DB). Marker writes happen in the child `exit` handler on code 0. +- Wire ops: `status` returns `{ services, tasks }`; `task-status` returns task list; `wait-ready { timeoutMs?, units? }` blocks daemon-side until all autostart units reach their satisfying state, then resolves `{ ready: true }` or `{ ready: false, timedOut, failed }`; `run-task { name, force? }` resets a task back to pending so the scheduler reruns it (`force` also bypasses the `run_once` skip). +- **`run_once:` on a task** (handled in `TaskRunner.launch`, `supervisor.ts`) makes a re-run a no-op when already satisfied. `run_once: true` → marker keyed by a SHA-256 of the resolved command (+cwd+env) at `/tasks/` (`stateDir` defaults to `DEFAULT_STATE_DIR = /var/lib/agentbox`, the box rootfs — captured by checkpoints, never under `/workspace`); editing the command invalidates it. `run_once: { check: }` → run the probe first; exit 0 = skip, no marker written (right for state outside the checkpoint, e.g. a containerized DB). Marker writes happen in the child `exit` handler on code 0. - **Replacement engine** (`@agentbox/core`'s `replace.ts`, re-exported by `@agentbox/ctl`'s `replace.ts` which adds the yaml/fs loaders — kept in core so the host carry path can share it without the `sandbox-core → ctl → relay → sandbox-core` cycle): `applyReplacements` does `{{AGENTBOX_*}}` whitelist substitution (`PLACEHOLDER_KEYS`) + ordered `{from,to,regex?}` rules. Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render` (in-box CLI, `commands/render.ts`), and carry `replaceEnvs`/`replace`/`rules` (host-side, file-only, rendered to a temp by `renderCarryEntries` in `@agentbox/sandbox-core` before the per-provider copy — wired in `sandbox-docker/create.ts` and `sandbox-cloud/cloud-provider.ts`). - **`{{AGENTBOX_AUTO_SECRET}}` render generator** (`commands/render.ts` → `secret.ts`, not the pure engine — needs crypto + fs): a render-time pass before `applyReplacements`. Unnamed → fresh `randomBytes(32).toString('base64url')` per occurrence; `:` → generated once and persisted at `/secrets/` (0600), reused across renders. State dir resolved via the shared `resolveWritableStateDir` (`state-dir.ts`, extracted from the supervisor's marker-dir logic — try `/var/lib/agentbox`, fall back to `/state`). - **Declarative docker `image:` services** (`config.ts` `parseService` → `synthesizeImageCommand`): a service sets `image:` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; the parser synthesizes the start-or-run shell (`docker container inspect` → `docker start` + `logs -f`, else `docker run` with `-p`/`-e`/args), so the runner/DAG/`ready_when`/`restart` machinery is unchanged. Container reused by name across restarts (no auto-`rm`; `env` baked into `-e`, `spec.env` left unset). `command` and `image` are mutually exclusive (one required). diff --git a/packages/ctl/schema/agentbox.schema.json b/packages/ctl/schema/agentbox.schema.json index e1f8208c..84f096bb 100644 --- a/packages/ctl/schema/agentbox.schema.json +++ b/packages/ctl/schema/agentbox.schema.json @@ -251,7 +251,7 @@ "cwd": { "type": "string" }, "env": { "$ref": "#/$defs/env" }, "needs": { "$ref": "#/$defs/needs" }, - "idempotent": { + "run_once": { "description": "Skip the task when already satisfied. `true` stores a marker keyed by a hash of the command (re-runs when the command changes). `{ check: }` runs a probe first and skips when it exits 0 (right for state outside the checkpointed filesystem, e.g. a containerized DB).", "oneOf": [ { "type": "boolean" }, diff --git a/packages/ctl/src/commands/daemon.ts b/packages/ctl/src/commands/daemon.ts index cd57af9c..5eb106f2 100644 --- a/packages/ctl/src/commands/daemon.ts +++ b/packages/ctl/src/commands/daemon.ts @@ -48,7 +48,7 @@ export const daemonCommand = new Command('daemon') .option('--socket ', 'unix socket path', DEFAULT_SOCKET_PATH) .option('--config ', 'path to agentbox.yaml', DEFAULT_CONFIG_PATH) .option('--log-dir ', 'where per-service log files are written', DEFAULT_LOG_DIR) - .option('--state-dir ', 'where idempotent-task markers are written', DEFAULT_STATE_DIR) + .option('--state-dir ', 'where run_once task markers are written', DEFAULT_STATE_DIR) .option('--workspace ', 'cwd for service processes', '/workspace') .action(async (opts: DaemonOptions) => { const cfg = await loadConfig(opts.config); diff --git a/packages/ctl/src/config.ts b/packages/ctl/src/config.ts index 5975a3c7..925a3916 100644 --- a/packages/ctl/src/config.ts +++ b/packages/ctl/src/config.ts @@ -48,19 +48,19 @@ export interface ExposeSpec { } /** - * Declarative idempotence for a task. The supervisor re-runs every task from - * `pending` on each box start; `idempotent` lets it skip a task that has - * already succeeded. + * Declarative "run once" for a task. The supervisor re-runs every task from + * `pending` on each box start; `run_once` lets it skip a task that has already + * succeeded. * - * - `{ kind: 'marker' }` (from `idempotent: true`) — the supervisor stores a + * - `{ kind: 'marker' }` (from `run_once: true`) — the supervisor stores a * marker keyed by a hash of the resolved command; a warm boot skips while the * hash matches, and editing the command re-runs. - * - `{ kind: 'check', command }` (from `idempotent: { check: ... }`) — run the + * - `{ kind: 'check', command }` (from `run_once: { check: ... }`) — run the * probe before launching; exit 0 means already satisfied (skip). No marker: * the probe is the source of truth (right for data that lives outside the * checkpointed filesystem, e.g. a containerized DB). */ -export type TaskIdempotent = { kind: 'marker' } | { kind: 'check'; command: string }; +export type RunOnceSpec = { kind: 'marker' } | { kind: 'check'; command: string }; export interface TaskSpec { name: string; @@ -68,7 +68,7 @@ export interface TaskSpec { cwd?: string; env?: Record; needs: string[]; - idempotent?: TaskIdempotent; + runOnce?: RunOnceSpec; } export interface ServiceSpec { @@ -561,23 +561,23 @@ function parseService(name: string, raw: unknown): ServiceSpec { return { name, command, cwd, env, autostart, restart, backoff, needs, readyWhen, expose }; } -const TASK_KEYS = new Set(['command', 'cwd', 'env', 'needs', 'idempotent']); +const TASK_KEYS = new Set(['command', 'cwd', 'env', 'needs', 'run_once']); -function parseIdempotent(raw: unknown, where: string): TaskIdempotent | undefined { +function parseRunOnce(raw: unknown, where: string): RunOnceSpec | undefined { if (raw === undefined || raw === null || raw === false) return undefined; if (raw === true) return { kind: 'marker' }; if (isPlainObject(raw)) { const keys = Object.keys(raw); if (keys.length !== 1 || keys[0] !== 'check') { - throw new ConfigError(`${where}.idempotent object form must be exactly { check: }`); + throw new ConfigError(`${where}.run_once object form must be exactly { check: }`); } const check = raw.check; if (typeof check !== 'string' || check.trim().length === 0) { - throw new ConfigError(`${where}.idempotent.check must be a non-empty command string`); + throw new ConfigError(`${where}.run_once.check must be a non-empty command string`); } return { kind: 'check', command: check }; } - throw new ConfigError(`${where}.idempotent must be true or { check: }`); + throw new ConfigError(`${where}.run_once must be true or { check: }`); } function parseTask(name: string, raw: unknown): TaskSpec { @@ -590,9 +590,9 @@ function parseTask(name: string, raw: unknown): TaskSpec { const cwd = raw.cwd === undefined ? undefined : assertString(raw.cwd, `${where}.cwd`); const env = parseEnv(raw.env, where); const needs = parseNeeds(raw.needs, `${where}.needs`); - const idempotent = parseIdempotent(raw.idempotent, where); + const runOnce = parseRunOnce(raw.run_once, where); const spec: TaskSpec = { name, command, cwd, env, needs }; - if (idempotent !== undefined) spec.idempotent = idempotent; + if (runOnce !== undefined) spec.runOnce = runOnce; return spec; } diff --git a/packages/ctl/src/index.ts b/packages/ctl/src/index.ts index de157bc2..be42cb76 100644 --- a/packages/ctl/src/index.ts +++ b/packages/ctl/src/index.ts @@ -68,7 +68,7 @@ export { type RestartPolicy, type ServiceSpec, type TaskSpec, - type TaskIdempotent, + type RunOnceSpec, } from './config.js'; export { parseCarryRaw, diff --git a/packages/ctl/src/state-dir.ts b/packages/ctl/src/state-dir.ts index c474f9c0..fab7c5d5 100644 --- a/packages/ctl/src/state-dir.ts +++ b/packages/ctl/src/state-dir.ts @@ -3,7 +3,7 @@ import { join } from 'node:path'; import { DEFAULT_LOG_DIR, DEFAULT_STATE_DIR } from './types.js'; /** - * Resolve a writable base directory for supervisor/render state (idempotent-task + * Resolve a writable base directory for supervisor/render state (run_once task * markers, generated secrets). Prefer `want` (default /var/lib/agentbox — box * rootfs, checkpoint-captured, off /workspace), but the daemon runs as a * non-root user and that dir is root-owned on stock images, so fall back to diff --git a/packages/ctl/src/supervisor.ts b/packages/ctl/src/supervisor.ts index e400b753..3d2a2192 100644 --- a/packages/ctl/src/supervisor.ts +++ b/packages/ctl/src/supervisor.ts @@ -44,7 +44,7 @@ class Ring { export interface RunnerOptions { logDir: string; cwd: string; - /** Directory for idempotent-task completion markers. */ + /** Directory for run_once task completion markers. */ stateDir: string; spawn?: typeof spawn; setTimer?: (fn: () => void, ms: number) => NodeJS.Timeout; @@ -472,7 +472,7 @@ export class TaskRunner extends EventEmitter implements Unit { return join(this.opts.stateDir, 'tasks', this.spec.name); } - /** Run the `idempotent.check` probe. Resolves true when it exits 0. */ + /** Run the `run_once.check` probe. Resolves true when it exits 0. */ private runCheck(command: string, cwd: string): Promise { return new Promise((resolve) => { let child: ChildProcess; @@ -485,7 +485,7 @@ export class TaskRunner extends EventEmitter implements Unit { } catch (err) { this.appendEvent( 'stderr', - `[ctl] idempotent check spawn failed: ${err instanceof Error ? err.message : String(err)}`, + `[ctl] run_once check spawn failed: ${err instanceof Error ? err.message : String(err)}`, ); resolve(false); return; @@ -503,11 +503,11 @@ export class TaskRunner extends EventEmitter implements Unit { } /** Returns a human reason if the task is already satisfied (skip), else null. */ - private async idempotentSkipReason(cwd: string): Promise { - const idem = this.spec.idempotent; - if (!idem) return null; - if (idem.kind === 'check') { - return (await this.runCheck(idem.command, cwd)) ? 'check passed' : null; + private async runOnceSkipReason(cwd: string): Promise { + const ro = this.spec.runOnce; + if (!ro) return null; + if (ro.kind === 'check') { + return (await this.runCheck(ro.command, cwd)) ? 'check passed' : null; } try { const have = (await readFile(this.markerPath(), 'utf8')).trim(); @@ -526,7 +526,7 @@ export class TaskRunner extends EventEmitter implements Unit { } catch (err) { this.appendEvent( 'stderr', - `[ctl] could not write idempotent marker: ${err instanceof Error ? err.message : String(err)}`, + `[ctl] could not write run_once marker: ${err instanceof Error ? err.message : String(err)}`, ); } } @@ -537,16 +537,16 @@ export class TaskRunner extends EventEmitter implements Unit { const force = this.forceNext; this.forceNext = false; - // Idempotency gate: skip the command entirely if already satisfied. `force` - // (run-task --force) bypasses it. Note: for non-idempotent tasks there is no + // run_once gate: skip the command entirely if already satisfied. `force` + // (run-task --force) bypasses it. Note: for tasks without run_once there is no // await here, so launch stays synchronous through to `setState('running')`. - if (spec.idempotent && !force) { + if (spec.runOnce && !force) { this.evaluating = true; try { - const reason = await this.idempotentSkipReason(cwd); + const reason = await this.runOnceSkipReason(cwd); if (reason) { this.ensureLogStream(); - this.appendEvent('stdout', `[ctl] idempotent: ${reason} — skip`); + this.appendEvent('stdout', `[ctl] run_once: ${reason} — skip`); this.startedAt = new Date(); this.finishedAt = new Date(); this.lastExitCode = 0; @@ -599,7 +599,7 @@ export class TaskRunner extends EventEmitter implements Unit { this.finishedAt = new Date(); this.child = null; this.appendEvent('stderr', `[ctl] exited code=${String(code)} signal=${signal ?? 'none'}`); - if (code === 0 && spec.idempotent?.kind === 'marker') { + if (code === 0 && spec.runOnce?.kind === 'marker') { void this.writeMarker(cwd); } this.setState(code === 0 ? 'done' : 'failed'); @@ -625,7 +625,7 @@ export class TaskRunner extends EventEmitter implements Unit { export interface SupervisorOptions { workspace: string; logDir: string; - /** Directory for idempotent-task markers (default {@link DEFAULT_STATE_DIR}). */ + /** Directory for run_once task markers (default {@link DEFAULT_STATE_DIR}). */ stateDir?: string; spawn?: typeof spawn; /** @@ -747,7 +747,7 @@ export class Supervisor extends EventEmitter { } /** - * Pick a writable directory for idempotent-task markers. Prefer the configured + * Pick a writable directory for run_once task markers. Prefer the configured * stateDir (default /var/lib/agentbox), but the daemon runs as a non-root user * and that path is root-owned on stock images, so fall back to a dir under * logDir — always daemon-writable, on the box rootfs (captured by checkpoints), @@ -758,7 +758,7 @@ export class Supervisor extends EventEmitter { this.opts.stateDir ?? DEFAULT_STATE_DIR, this.opts.logDir, 'tasks', - (m) => process.stderr.write(`[ctl] idempotent markers: ${m}\n`), + (m) => process.stderr.write(`[ctl] run_once markers: ${m}\n`), ); } @@ -1129,7 +1129,7 @@ function normalizeTask(t: TaskSpec): unknown { cwd: t.cwd ?? null, env: t.env ?? null, needs: [...t.needs].sort(), - idempotent: t.idempotent ?? null, + runOnce: t.runOnce ?? null, }; } diff --git a/packages/ctl/src/types.ts b/packages/ctl/src/types.ts index 7b82c933..89829cd3 100644 --- a/packages/ctl/src/types.ts +++ b/packages/ctl/src/types.ts @@ -307,7 +307,7 @@ export interface ClaudeSessionStatus { export const DEFAULT_SOCKET_PATH = '/run/agentbox/ctl.sock'; export const DEFAULT_CONFIG_PATH = '/workspace/agentbox.yaml'; export const DEFAULT_LOG_DIR = '/var/log/agentbox'; -// Where idempotent-task completion markers live. On the box rootfs (survives +// Where run_once task completion markers live. On the box rootfs (survives // pause/stop/start and is captured by `docker commit` checkpoints) but NOT under // /workspace, so markers never show up as untracked git changes. export const DEFAULT_STATE_DIR = '/var/lib/agentbox'; diff --git a/packages/ctl/test/schema-drift.test.ts b/packages/ctl/test/schema-drift.test.ts index 39d10d3a..cede0529 100644 --- a/packages/ctl/test/schema-drift.test.ts +++ b/packages/ctl/test/schema-drift.test.ts @@ -224,16 +224,16 @@ carry: `, }, { - name: 'task with idempotent: true', - yaml: `tasks:\n install:\n command: pnpm install\n idempotent: true\n`, + name: 'task with run_once: true', + yaml: `tasks:\n install:\n command: pnpm install\n run_once: true\n`, }, { - name: 'task with idempotent check', + name: 'task with run_once check', yaml: ` tasks: seed: command: pnpm db:seed - idempotent: + run_once: check: "psql -tAc 'select 1' | grep -q 1" `, }, @@ -633,12 +633,12 @@ services: yaml: `services:\n db:\n image: postgres:17-alpine\n container_name: "bad name"\n`, }, { - name: 'idempotent as a string', - yaml: `tasks:\n build:\n command: pnpm build\n idempotent: "yes"\n`, + name: 'run_once as a string', + yaml: `tasks:\n build:\n command: pnpm build\n run_once: "yes"\n`, }, { - name: 'idempotent object with unknown key', - yaml: `tasks:\n build:\n command: pnpm build\n idempotent:\n probe: foo\n`, + name: 'run_once object with unknown key', + yaml: `tasks:\n build:\n command: pnpm build\n run_once:\n probe: foo\n`, }, { name: 'replacements rule missing to', diff --git a/packages/ctl/test/supervisor-idempotent.test.ts b/packages/ctl/test/supervisor-run-once.test.ts similarity index 93% rename from packages/ctl/test/supervisor-idempotent.test.ts rename to packages/ctl/test/supervisor-run-once.test.ts index 184ab53c..a7151561 100644 --- a/packages/ctl/test/supervisor-idempotent.test.ts +++ b/packages/ctl/test/supervisor-run-once.test.ts @@ -29,7 +29,7 @@ function lineCount(path: string): number { return readFileSync(path, 'utf8').split('\n').filter((l) => l.length > 0).length; } -describe('idempotent tasks', () => { +describe('run_once tasks', () => { let dir: string; let stateDir: string; @@ -46,7 +46,7 @@ describe('idempotent tasks', () => { it('marker form: skips on a warm boot, leaving the command unrun', async () => { const ran = join(dir, 'ran'); - const task = { name: 't', command: `: > '${ran}'`, needs: [], idempotent: { kind: 'marker' } as const }; + const task = { name: 't', command: `: > '${ran}'`, needs: [], runOnce: { kind: 'marker' } as const }; const sup1 = mk(); await sup1.init(taskCfg(task)); @@ -66,7 +66,7 @@ describe('idempotent tasks', () => { it('marker form: re-runs when the command changes', async () => { const sup1 = mk(); await sup1.init( - taskCfg({ name: 't', command: 'true', needs: [], idempotent: { kind: 'marker' } }), + taskCfg({ name: 't', command: 'true', needs: [], runOnce: { kind: 'marker' } }), ); await waitForTaskDone(sup1, 't'); await sup1.stopAll(); @@ -74,7 +74,7 @@ describe('idempotent tasks', () => { const ran2 = join(dir, 'ran2'); const sup2 = mk(); await sup2.init( - taskCfg({ name: 't', command: `: > '${ran2}'`, needs: [], idempotent: { kind: 'marker' } }), + taskCfg({ name: 't', command: `: > '${ran2}'`, needs: [], runOnce: { kind: 'marker' } }), ); await waitForTaskDone(sup2, 't'); expect(existsSync(ran2)).toBe(true); // changed command invalidated the marker @@ -88,7 +88,7 @@ describe('idempotent tasks', () => { name: 't', command: `echo x >> '${runs}'`, needs: [], - idempotent: { kind: 'check', command: `test -f '${satisfied}'` } as const, + runOnce: { kind: 'check', command: `test -f '${satisfied}'` } as const, }; // Probe fails (no satisfied file) → task runs. @@ -115,7 +115,7 @@ describe('idempotent tasks', () => { const blocker = join(dir, 'blocker'); await writeFile(blocker, ''); const badStateDir = join(blocker, 'agentbox'); - const task = { name: 't', command: `: > '${ran}'`, needs: [], idempotent: { kind: 'marker' } as const }; + const task = { name: 't', command: `: > '${ran}'`, needs: [], runOnce: { kind: 'marker' } as const }; const sup1 = new Supervisor({ workspace: dir, logDir: dir, stateDir: badStateDir }); await sup1.init(taskCfg(task)); @@ -137,7 +137,7 @@ describe('idempotent tasks', () => { name: 't', command: `echo x >> '${runs}'`, needs: [], - idempotent: { kind: 'marker' } as const, + runOnce: { kind: 'marker' } as const, }; const sup = mk(); await sup.init(taskCfg(task)); From 3e807b6e7db203b3ccac89689638db8522e02981 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 15:48:33 +0100 Subject: [PATCH 11/12] fix(ctl): persist run_once marker before transitioning to done MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The marker write was fire-and-forget, so a task reached 'done' before its marker hit disk — a CI race (slower fs) and a latent durability gap (a crash in between would lose the marker and re-run next boot). Await the write, then setState('done'). --- packages/ctl/src/supervisor.ts | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/packages/ctl/src/supervisor.ts b/packages/ctl/src/supervisor.ts index 3d2a2192..a5cd7be3 100644 --- a/packages/ctl/src/supervisor.ts +++ b/packages/ctl/src/supervisor.ts @@ -600,9 +600,13 @@ export class TaskRunner extends EventEmitter implements Unit { this.child = null; this.appendEvent('stderr', `[ctl] exited code=${String(code)} signal=${signal ?? 'none'}`); if (code === 0 && spec.runOnce?.kind === 'marker') { - void this.writeMarker(cwd); + // Persist the marker BEFORE transitioning to done, so the marker is + // durable once the task is observably complete (a crash in between would + // otherwise lose it and re-run next boot). + void this.writeMarker(cwd).finally(() => this.setState('done')); + } else { + this.setState(code === 0 ? 'done' : 'failed'); } - this.setState(code === 0 ? 'done' : 'failed'); }); child.on('error', (err) => { this.appendEvent('stderr', `[ctl] child error: ${err.message}`); From 4173323d6f565da58f1deda00d47d535e17665f6 Mon Sep 17 00:00:00 2001 From: Marco D'Alia Date: Sun, 7 Jun 2026 15:56:28 +0100 Subject: [PATCH 12/12] refactor(agentbox.yaml): nest image service config under image: Per review, group the container config under image: instead of flat sibling keys. image: is now either a bare ref string (image: redis:7) or a mapping { name, ports, env, args, container_name }. Container env moves to image.env (top-level env on an image service is rejected). Schema/tests/docs/skill + optima updated. --- apps/cli/share/agentbox-setup/SKILL.md | 15 +++-- apps/web/content/docs/agentbox-yaml.mdx | 24 +++---- docs/features.md | 2 +- docs/in-box-supervisor.md | 2 +- packages/ctl/schema/agentbox.schema.json | 48 ++++++------- packages/ctl/src/config.ts | 86 ++++++++++++++++-------- packages/ctl/test/config.test.ts | 43 +++++++----- packages/ctl/test/schema-drift.test.ts | 47 ++++++++----- 8 files changed, 165 insertions(+), 102 deletions(-) diff --git a/apps/cli/share/agentbox-setup/SKILL.md b/apps/cli/share/agentbox-setup/SKILL.md index 67a3ff82..43425186 100644 --- a/apps/cli/share/agentbox-setup/SKILL.md +++ b/apps/cli/share/agentbox-setup/SKILL.md @@ -59,13 +59,14 @@ from other in-box services at `127.0.0.1:`: ```yaml services: postgres: - image: postgres:17-alpine - ports: ["5432:5432"] - env: - POSTGRES_PASSWORD: postgres - POSTGRES_DB: app - args: "-c max_connections=200" # string or ["-c","max_connections=200"] - container_name: app_db # optional; default = service name + image: # bare string (image: postgres:17-alpine) or a mapping: + name: postgres:17-alpine + ports: ["5432:5432"] + env: + POSTGRES_PASSWORD: postgres + POSTGRES_DB: app + args: "-c max_connections=200" # string or ["-c","max_connections=200"] + container_name: app_db # optional; default = service name ready_when: { port: 5432 } restart: always ``` diff --git a/apps/web/content/docs/agentbox-yaml.mdx b/apps/web/content/docs/agentbox-yaml.mdx index cdb30df8..c4e5c633 100644 --- a/apps/web/content/docs/agentbox-yaml.mdx +++ b/apps/web/content/docs/agentbox-yaml.mdx @@ -49,7 +49,7 @@ A service is a long-running process. It needs **either** `command` **or** [`imag | `command` | (required, or `image`) | Shell string (run via `bash -c`) or an argv array. | | `image` | (required, or `command`) | Run a docker container instead — see [docker image services](#docker-image-services). | | `cwd` | `/workspace` | Working directory; relative paths resolve against `/workspace`. | -| `env` | — | Extra env vars; scalar values, coerced to strings. (Container `-e` env for an `image` service.) | +| `env` | — | Extra env vars for a `command` service (scalars, coerced to strings). For an `image` service, put container env under `image.env`. | | `autostart` | `true` | Start automatically when the daemon boots. | | `restart` | `on-failure` | Restart policy — see [needs and restart](#needs-and-restart). | | `backoff` | — | Exponential backoff between restarts. | @@ -57,7 +57,6 @@ A service is a long-running process. It needs **either** `command` **or** [`imag | `ready_when` | — | Readiness probe — see [ready_when](#ready_when). | | `expose` | — | Mark the one web service — see [expose](#expose). | | `ide` | — | Per-service VS Code hints (host-side only). | -| `ports`, `args`, `container_name` | — | `image`-service only — see [docker image services](#docker-image-services). | A service moves through `pending → waiting → starting → running → ready`, and can land in `unhealthy`, `crashed`, `backoff`, or `stopped`. Logs land at `/var/log/agentbox/.log` inside the box. @@ -96,25 +95,26 @@ Use the array form of `command` to avoid shell quoting; use the string form (`ba ### Docker image services -For a containerized dependency (a database, cache, …) set `image:` instead of `command:` and AgentBox generates the `docker start`-or-`run` shell for you — no hand-written `docker run … || docker start …` block. It runs in the box's own dockerd, so a published port like `5437:5432` is reachable from other in-box services at `127.0.0.1:5437`. +For a containerized dependency (a database, cache, …) set `image:` instead of `command:` and AgentBox generates the `docker start`-or-`run` shell for you — no hand-written `docker run … || docker start …` block. It runs in the box's own dockerd, so a published port like `5437:5432` is reachable from other in-box services at `127.0.0.1:5437`. `image:` is either a bare ref string (`image: redis:7`) or a mapping with the container config nested under it: ```yaml services: postgres: - image: postgres:17-alpine - ports: ["5437:5432"] # ":" (or "") - env: # the container's -e env - POSTGRES_USER: optima - POSTGRES_PASSWORD: changeme - POSTGRES_DB: optima - args: "-c max_connections=200" # string OR ["-c", "max_connections=200"]; shell-tokenized - container_name: optima_db # optional; default = service name + image: + name: postgres:17-alpine + ports: ["5437:5432"] # ":" (or "") + env: # the container's -e env + POSTGRES_USER: optima + POSTGRES_PASSWORD: changeme + POSTGRES_DB: optima + args: "-c max_connections=200" # string OR ["-c", "max_connections=200"]; shell-tokenized + container_name: optima_db # optional; default = service name ready_when: port: 5437 restart: always ``` -All the usual service fields (`ready_when`, `restart`, `backoff`, `needs`, `expose`, `autostart`) still apply. The container is **reused by name** across box stop/start (its data lives in the per-box docker volume, which a checkpoint does not capture — see the database note in [services and tasks](/docs/services-and-tasks)). +All the usual service fields (`ready_when`, `restart`, `backoff`, `needs`, `expose`, `autostart`) still apply at the service level — only the container config (`name`/`ports`/`env`/`args`/`container_name`) nests under `image:`. The container is **reused by name** across box stop/start (its data lives in the per-box docker volume, which a checkpoint does not capture — see the database note in [services and tasks](/docs/services-and-tasks)). A change to `image`/`ports`/`env` reuses the existing container as-is — AgentBox never auto-`docker rm`s it (that would wipe its data). To apply the change, `docker rm ` inside the box, then `agentbox-ctl reload`. diff --git a/docs/features.md b/docs/features.md index 034bf3ab..3845ee10 100644 --- a/docs/features.md +++ b/docs/features.md @@ -9,7 +9,7 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - `agentbox create` — builds the image on first run (or resolves a checkpoint image when `--snapshot ` is given), detects git repos (root + 1st-level subdirs), collects host-side carry-over (`git stash create` + untracked `ls-files`), spins up the container, then seeds `/workspace` via either `seedWorkspace` (in-container `git worktree add` against the bind-mounted `.git/` + stash/untracked replay) or `seedWorkspaceFromDir` (tar-pipe from host workspace / APFS clone for the no-git case). Checkpoint restore skips both — the image already has `/workspace`. Mounts the `agentbox-claude-config` named volume at `/home/vscode/.claude` and rsyncs host's `~/.claude` into it (additive, host-authoritative). Bind-mounts each main repo's `.git/` at its identical absolute host path inside the container so worktree pointer files resolve symmetrically on both sides. `--with-env` (also on `agentbox claude`; config key `box.withEnv`) copies the host's `DEFAULT_ENV_PATTERNS` files (`.env*`, `.envrc`, `.dev.vars`, `secrets.toml`, `local.settings.json`, `appsettings.*.json`, `agentbox.yaml`) into `/workspace` after seeding — the host→box reverse of `agentbox download env` (gitignored files are otherwise excluded by the worktree carry-over's `git ls-files --others --exclude-standard`). One-shot at create time, lands in the container's writable layer (persists across stop/start), best-effort (warn-not-throw), recorded as `BoxRecord.withEnv` and surfaced in `agentbox status --inspect`. Implemented by `copyHostEnvFilesToBox` / `buildHostEnvFindArgs` in `packages/sandbox-docker/src/host-export.ts` (host `find . -print0 | tar` → `docker exec -i --user 1000:1000 tar -x`). - `carry:` in `agentbox.yaml` — declarative host→box file copy that bypasses `.gitignore`. Each entry maps a host path (`/abs`, `~/...`, or `./relative-to-project-root`) to an explicit in-box destination (`/abs` or `~/...` — `~/` expands to `/home/vscode`); accepts a `mode:` (octal), `user:` (uid), `exclude:` (tar globs / bare dir names), and `optional: true`. When copying a directory, heavy regenerable dirs (`.git`, `node_modules`, `bin`, `obj`, `packages`, `dist`, `.next`, `target` — `DEFAULT_CP_EXCLUDES` in `apps/cli/src/lib/dir-breakdown.ts`) are dropped by default and `exclude:` is additive. The resolver enforces no-`..`-traversal, denies `/proc|/sys|/dev|/etc/passwd|/etc/shadow`, caps per-entry size **after excludes** at `box.cpMaxBytes` (default 100 MiB — the same limit `agentbox cp` uses; carry callers pass the effective value into `resolveCarry`), and flags symlinks whose target leaves `$HOME` *and* the project root. On `agentbox create` / `claude` / `codex` / `opencode`, the host CLI prompts ONCE (`@clack/prompts.select` — `yes` / `skip just for this box` / `cancel create`) listing every src→dest with size + mode + symlink warnings, then threads the approved set into `provider.create` as `req.carry`. Auto-approve with `--carry-yes` (or `AGENTBOX_CARRY_YES=1` for CI); skip with `--carry skip` (or `AGENTBOX_CARRY=skip`). `agentbox fork` is the exception: it **sends** the carry: block by default (it forwards `--carry-yes`), because the host is trusted and the box is the untrusted side, so a host→box copy is safe — opt out with `agentbox fork --carry skip`. `-y` / `--yes` does NOT auto-approve carry — non-TTY use of `-y` with non-empty entries fails loud, asking for the explicit env var (auditable in CI). The `-i` (queued background) path runs the same gate on the host **at submit time** (`runQueuedCarryGate`), serializes the approved `ResolvedCarryEntry[]` onto the queue job (`QueueJobCreateOpts.carry`), and the host-side worker applies them at box-create time — so `--carry-yes` / `--carry skip` work identically for `-i`. Docker injects via `copyCarryPathsToBox` (`docker cp` for files, host-tar + `docker exec tar -x` for dirs); cloud (Hetzner + Daytona) injects via `uploadCarryPaths` (host-tar + `backend.uploadFile` + `backend.exec(tar -x)`), per-entry isolated. Files land owned by `vscode:vscode` (uid 1000) when under `/home/vscode`; an audit summary (`{count, entries: [{src, dest, bytes}]}`) is recorded on `BoxRecord.carry`. Use case: develop AgentBox itself inside an AgentBox — carry `~/.agentbox/secrets.env` + `~/.agentbox/claude-credentials.json` so the in-box `agentbox` CLI is fully authenticated. Schema: `packages/ctl/src/carry.ts`. Resolver / prompt / gate: `apps/cli/src/lib/carry-resolve.ts`, `apps/cli/src/carry-prompt.ts`, `apps/cli/src/lib/carry-gate.ts`. Copiers: `packages/sandbox-docker/src/host-export.ts:copyCarryPathsToBox`, `packages/sandbox-cloud/src/carry.ts:uploadCarryPaths`. A **file** carry entry may also set `replaceEnvs: true` (substitute `{{AGENTBOX_*}}` whitelist placeholders), `replace:` (inline `{from,to,regex?}` rules), and/or `rules:` (named refs into the top-level `replacements:` block) — the file is rendered host-side to a temp by `renderCarryEntries` (`@agentbox/sandbox-core/src/carry-render.ts`) before the copy (the host source is never modified; the box name is known by then). Named refs are expanded in `resolveCarry`; replace options are file-only (a dir entry errors). - **`run_once` tasks + the replacement engine** — a task may declare `run_once: true` (the supervisor skips it while a SHA-256 of the resolved command matches a marker at `/tasks/`, default `stateDir=/var/lib/agentbox` — box rootfs, captured by checkpoints, off `/workspace`) or `run_once: { check: }` (run the probe first; exit 0 = skip, no marker — for state outside the checkpoint like a containerized DB). `run-task --force` bypasses both. Handled in `TaskRunner.launch` (`packages/ctl/src/supervisor.ts`). The shared, pure replacement engine lives in `@agentbox/core` (`replace.ts`: `applyReplacements` = `{{AGENTBOX_*}}` whitelist substitution + ordered rules; re-exported by `@agentbox/ctl` which adds the yaml/fs loaders — kept in core to avoid the `sandbox-core → ctl → relay → sandbox-core` build cycle). Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render [--out|--in-place] [--env] [--rules|--rule|--rule-regex]` (in-box declarative `sed`, `packages/ctl/src/commands/render.ts`), and the carry `replaceEnvs`/`replace`/`rules` above. `render` also expands `{{AGENTBOX_AUTO_SECRET}}` (fresh 32-byte base64url per render) / `{{AGENTBOX_AUTO_SECRET:}}` (generated once, persisted at `/secrets/`, reused) — `packages/ctl/src/secret.ts`, replacing `openssl rand` in env tasks. -- **Declarative docker `image:` services** — a service may set `image: postgres:17-alpine` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; `parseService` (`packages/ctl/src/config.ts`) synthesizes the `docker start`-or-`run` shell (the proven `examples/express-ready` / optima pattern), reused by name across restarts (env baked into `-e`, no auto-`rm`). `command`/`image` are mutually exclusive; the runner/`ready_when`/`restart`/`expose` machinery is unchanged. The shared writable-state-dir resolver (`packages/ctl/src/state-dir.ts`) backs both run_once markers and persisted secrets. +- **Declarative docker `image:` services** — a service may set `image:` (a bare ref string, or a mapping `{ name, ports, env, args, container_name }`) instead of `command:`; `parseService` (`packages/ctl/src/config.ts`) synthesizes the `docker start`-or-`run` shell (the proven `examples/express-ready` / optima pattern), reused by name across restarts (env baked into `-e`, no auto-`rm`). `command`/`image` are mutually exclusive; the runner/`ready_when`/`restart`/`expose` machinery is unchanged. The shared writable-state-dir resolver (`packages/ctl/src/state-dir.ts`) backs both run_once markers and persisted secrets. - `agentbox claude [-- ...]` — does everything `create` does, then starts Claude Code in a detached tmux session inside the box and attaches the user's terminal to it. `Ctrl+a d` detaches; the claude process keeps running. Reattach with `agentbox claude attach `. Forwards `ANTHROPIC_API_KEY` / `CLAUDE_CODE_OAUTH_TOKEN` / `CLAUDE_EFFORT` / `ANTHROPIC_MODEL` from host env when set. `--isolate-claude-config` opts into a per-box `agentbox-claude-config-` volume. - `agentbox claude start [box] [-- ...]` — start a Claude session in an **existing** box (vs `agentbox claude` which creates one). Resolves `[box]` via the usual auto-pick / index / name / id-prefix chain. Auto-unpauses/starts the container if needed (mirrors `shell`/`code`). Re-syncs `~/.claude` into the box volume by default (skip with `--no-sync-config` for speed). Re-runs `rebuildPluginNativeDeps` (idempotent — gated by per-plugin marker). If a tmux session with the configured name already exists, just attaches; otherwise starts a fresh one. Post-`--` args are forwarded to claude only when starting a fresh session. - `agentbox codex [-- ...]` — the Codex parity of `agentbox claude`: does everything `create` does, then launches OpenAI Codex in a detachable tmux session (`codex` session name; `--session-name` / config `codex.sessionName` override). Forwards `OPENAI_API_KEY` from host env. `--isolate-codex-config` opts into a per-box `agentbox-codex-config-` volume. Subcommands mirror claude: `agentbox codex start [box] [-- ...]` (start a session in an existing box, auto-unpause/start, `--no-sync-config` to skip the `~/.codex` resync), `agentbox codex attach [box]` (attach/start without resyncing), `agentbox codex login [-- ]` (sign in via a throwaway container — defaults to `codex login --device-auth`, the headless device-code flow; pass `-- --api-key` for the API-key path). Skips the claude-only steps (setup wizard, plugin rebuild). `apps/cli/src/commands/codex.ts`. Codex is baked into the base image, but a box built from a **checkpoint captured before Codex support** (or an older base image) won't have the binary — `ensureCodexInstalled` (`codex.ts`) detects that and `npm install -g @openai/codex`s it into the box's writable layer at create/start time (mirrors `--with-playwright`; fast `command -v` no-op when codex is already present). diff --git a/docs/in-box-supervisor.md b/docs/in-box-supervisor.md index dcb7afa8..24732ebb 100644 --- a/docs/in-box-supervisor.md +++ b/docs/in-box-supervisor.md @@ -10,7 +10,7 @@ - **`run_once:` on a task** (handled in `TaskRunner.launch`, `supervisor.ts`) makes a re-run a no-op when already satisfied. `run_once: true` → marker keyed by a SHA-256 of the resolved command (+cwd+env) at `/tasks/` (`stateDir` defaults to `DEFAULT_STATE_DIR = /var/lib/agentbox`, the box rootfs — captured by checkpoints, never under `/workspace`); editing the command invalidates it. `run_once: { check: }` → run the probe first; exit 0 = skip, no marker written (right for state outside the checkpoint, e.g. a containerized DB). Marker writes happen in the child `exit` handler on code 0. - **Replacement engine** (`@agentbox/core`'s `replace.ts`, re-exported by `@agentbox/ctl`'s `replace.ts` which adds the yaml/fs loaders — kept in core so the host carry path can share it without the `sandbox-core → ctl → relay → sandbox-core` cycle): `applyReplacements` does `{{AGENTBOX_*}}` whitelist substitution (`PLACEHOLDER_KEYS`) + ordered `{from,to,regex?}` rules. Surfaced three ways: the top-level `replacements:` block (named rule-sets, parsed in `config.ts`), `agentbox-ctl render` (in-box CLI, `commands/render.ts`), and carry `replaceEnvs`/`replace`/`rules` (host-side, file-only, rendered to a temp by `renderCarryEntries` in `@agentbox/sandbox-core` before the per-provider copy — wired in `sandbox-docker/create.ts` and `sandbox-cloud/cloud-provider.ts`). - **`{{AGENTBOX_AUTO_SECRET}}` render generator** (`commands/render.ts` → `secret.ts`, not the pure engine — needs crypto + fs): a render-time pass before `applyReplacements`. Unnamed → fresh `randomBytes(32).toString('base64url')` per occurrence; `:` → generated once and persisted at `/secrets/` (0600), reused across renders. State dir resolved via the shared `resolveWritableStateDir` (`state-dir.ts`, extracted from the supervisor's marker-dir logic — try `/var/lib/agentbox`, fall back to `/state`). -- **Declarative docker `image:` services** (`config.ts` `parseService` → `synthesizeImageCommand`): a service sets `image:` (+ `ports`/`env`/`args`/`container_name`) instead of `command:`; the parser synthesizes the start-or-run shell (`docker container inspect` → `docker start` + `logs -f`, else `docker run` with `-p`/`-e`/args), so the runner/DAG/`ready_when`/`restart` machinery is unchanged. Container reused by name across restarts (no auto-`rm`; `env` baked into `-e`, `spec.env` left unset). `command` and `image` are mutually exclusive (one required). +- **Declarative docker `image:` services** (`config.ts` `parseService` → `parseImage` → `synthesizeImageCommand`): a service sets `image:` instead of `command:` — either a bare ref string or a mapping `{ name, ports, env, args, container_name }` (container config nested under `image:`); the parser synthesizes the start-or-run shell (`docker container inspect` → `docker start` + `logs -f`, else `docker run` with `-p`/`-e`/args), so the runner/DAG/`ready_when`/`restart` machinery is unchanged. Container reused by name across restarts (no auto-`rm`; `env` baked into `-e`, `spec.env` left unset). `command` and `image` are mutually exclusive (one required). - Listens on `/run/agentbox/ctl.sock` (UNIX socket, newline-delimited JSON). Both the in-box `agentbox-ctl` client and host commands talk to the same socket — but the **host commands shell in via `docker exec`**, not the bind-mounted socket: Docker Desktop / OrbStack's VM boundary breaks `connect()` from the mac side, even though the file is visible. - Launched by `launchCtlDaemon()` in `sandbox-docker/src/ctl.ts` (best-effort; missing/empty `agentbox.yaml` is fine and doesn't fail `create`). Same call is repeated in `startBox()` because the daemon dies with the container. **Ordering invariant: the in-box `dockerd` is launched and awaited ready *before* the ctl daemon** — on create (`create.ts`), on docker restart (`startBox()`), and on cloud create/resume (`reEnsureCloudBox()` / create in `sandbox-cloud/src/cloud-provider.ts`). The supervisor starts services the moment it's up, so a `docker`-based service (`docker run`, `docker compose up`) would otherwise race a not-yet-ready `/var/run/docker.sock`. `launchDockerdDaemon` / `launchCloudDockerdDaemon` block until the socket is accept()-able (best-effort: a dockerd timeout still proceeds to launch the supervisor). Providers with no DinD (vercel, e2b) set `launchDockerd: false` and just launch the supervisor. - **In-box relay**: the daemon also binds an in-box endpoint on `127.0.0.1:8788` (`DEFAULT_BOX_RELAY_PORT`; override `AGENTBOX_BOX_RELAY_PORT`) so the in-box ctl client has a symmetric `AGENTBOX_RELAY_URL` across providers. For **cloud** boxes that endpoint is a full `mode: 'box'` relay the host's `CloudBoxPoller` long-polls; for **docker** boxes it's a thin reverse proxy (`packages/ctl/src/box-relay-forwarder.ts`) that whitelists `POST /rpc` + `POST /events` and forwards to `AGENTBOX_HOST_RELAY_URL` (default `http://host.docker.internal:8787`). Keeping :8787 unbound inside the box lets a nested `agentbox` run (developing agentbox-from-inside-agentbox) claim its own host relay there. See [`host-relay.md`](./host-relay.md). diff --git a/packages/ctl/schema/agentbox.schema.json b/packages/ctl/schema/agentbox.schema.json index 84f096bb..adb2a90d 100644 --- a/packages/ctl/schema/agentbox.schema.json +++ b/packages/ctl/schema/agentbox.schema.json @@ -166,35 +166,37 @@ "type": "object", "additionalProperties": false, "oneOf": [{ "required": ["command"] }, { "required": ["image"] }], - "dependentRequired": { - "ports": ["image"], - "args": ["image"], - "container_name": ["image"] - }, "properties": { "command": { "$ref": "#/$defs/command" }, "image": { - "type": "string", - "minLength": 1, - "description": "Run a docker container instead of a command. AgentBox synthesizes a start-or-run shell (the in-box container is reused by name across restarts). Mutually exclusive with command. Pair with ports/env/args/container_name." - }, - "ports": { - "type": "array", - "description": "Container port publishes for an image service: \":\" or \"\". Reachable from other in-box services at 127.0.0.1:.", - "items": { "type": "string", "pattern": "^[0-9]+(:[0-9]+)?$" } - }, - "args": { - "description": "Extra args appended after the image (image services only). A string or a list of strings; shell word-split.", + "description": "Run a docker container instead of a command. AgentBox synthesizes a start-or-run shell (the in-box container is reused by name across restarts). Mutually exclusive with command. Either a bare image ref string, or a mapping with ports/env/args/container_name.", "oneOf": [ - { "type": "string" }, - { "type": "array", "items": { "type": "string" } } + { "type": "string", "minLength": 1 }, + { + "type": "object", + "additionalProperties": false, + "required": ["name"], + "properties": { + "name": { "type": "string", "minLength": 1, "description": "Image ref, e.g. postgres:17-alpine." }, + "ports": { + "type": "array", + "description": "Port publishes: \":\" or \"\". Reachable from other in-box services at 127.0.0.1:.", + "items": { "type": "string", "pattern": "^[0-9]+(:[0-9]+)?$" } + }, + "env": { "$ref": "#/$defs/env" }, + "args": { + "description": "Extra args appended after the image. A string or a list of strings; shell word-split.", + "oneOf": [{ "type": "string" }, { "type": "array", "items": { "type": "string" } }] + }, + "container_name": { + "type": "string", + "pattern": "^[A-Za-z0-9][A-Za-z0-9_.-]*$", + "description": "Container name (default: the service name)." + } + } + } ] }, - "container_name": { - "type": "string", - "pattern": "^[A-Za-z0-9][A-Za-z0-9_.-]*$", - "description": "Container name for an image service (default: the service name)." - }, "cwd": { "type": "string", "description": "Working directory. Relative paths resolve against /workspace." diff --git a/packages/ctl/src/config.ts b/packages/ctl/src/config.ts index 925a3916..9b630a54 100644 --- a/packages/ctl/src/config.ts +++ b/packages/ctl/src/config.ts @@ -386,11 +386,11 @@ const SERVICE_KEYS = new Set([ 'expose', 'ide', 'image', - 'ports', - 'args', - 'container_name', ]); +// The container config nested under a service's `image:` (when it's a mapping). +const IMAGE_KEYS = new Set(['name', 'ports', 'env', 'args', 'container_name']); + // Minimal POSIX single-quote escaping for values baked into a generated // `bash -c` docker command. (sandbox-cloud has an equivalent quoteShellArg, but // ctl can't depend on it — wrong direction.) @@ -463,6 +463,48 @@ function synthesizeImageCommand(opts: { ].join('\n'); } +interface ParsedImage { + name: string; + ports?: string[]; + env?: Record; + args?: string; + containerName: string; +} + +// Parse a service's `image:` — either a bare ref string (shorthand) or a mapping +// `{ name, ports?, env?, args?, container_name? }`. `defaultName` (the service +// name) is the default container name. +function parseImage(raw: unknown, where: string, defaultName: string): ParsedImage { + if (typeof raw === 'string') { + const name = raw.trim(); + if (name.length === 0) throw new ConfigError(`${where}.image must not be empty`); + return { name, containerName: defaultName }; + } + if (!isPlainObject(raw)) { + throw new ConfigError(`${where}.image must be an image ref string or a mapping`); + } + rejectUnknownKeys(raw, IMAGE_KEYS, `${where}.image`); + const name = assertString(raw.name, `${where}.image.name`).trim(); + if (name.length === 0) throw new ConfigError(`${where}.image.name must not be empty`); + const ports = parsePorts(raw.ports, `${where}.image`); + const args = parseArgs(raw.args, `${where}.image`); + const env = parseEnv(raw.env, `${where}.image`); + const containerName = + raw.container_name === undefined + ? defaultName + : assertString(raw.container_name, `${where}.image.container_name`).trim(); + if (!/^[A-Za-z0-9][A-Za-z0-9_.-]*$/.test(containerName)) { + throw new ConfigError( + `${where}.image.container_name "${containerName}" is not a valid docker container name`, + ); + } + const out: ParsedImage = { name, containerName }; + if (ports !== undefined) out.ports = ports; + if (env !== undefined) out.env = env; + if (args !== undefined) out.args = args; + return out; +} + const EXPOSE_KEYS = new Set(['port', 'as']); function parseExpose(raw: unknown, where: string): ExposeSpec | undefined { @@ -517,21 +559,17 @@ function parseService(name: string, raw: unknown): ServiceSpec { const expose = parseExpose(raw.expose, where); if (hasImage) { - const image = assertString(raw.image, `${where}.image`).trim(); - if (image.length === 0) throw new ConfigError(`${where}.image must not be empty`); - const ports = parsePorts(raw.ports, where); - const args = parseArgs(raw.args, where); - const env = parseEnv(raw.env, where); // container -e env - const containerName = - raw.container_name === undefined - ? name - : assertString(raw.container_name, `${where}.container_name`).trim(); - if (!/^[A-Za-z0-9][A-Za-z0-9_.-]*$/.test(containerName)) { - throw new ConfigError( - `${where}.container_name "${containerName}" is not a valid docker container name`, - ); + if (raw.env !== undefined) { + throw new ConfigError(`${where}.env is not valid for an image service — use image.env`); } - const command = synthesizeImageCommand({ image, name: containerName, ports, env, args }); + const img = parseImage(raw.image, where, name); + const command = synthesizeImageCommand({ + image: img.name, + name: img.containerName, + ports: img.ports, + env: img.env, + args: img.args, + }); const spec: ServiceSpec = { name, command, @@ -542,20 +580,14 @@ function parseService(name: string, raw: unknown): ServiceSpec { needs, readyWhen, expose, - image, - containerName, + image: img.name, + containerName: img.containerName, }; - if (ports !== undefined) spec.ports = ports; - if (args !== undefined) spec.args = args; + if (img.ports !== undefined) spec.ports = img.ports; + if (img.args !== undefined) spec.args = img.args; return spec; } - // command service — the image-only keys are rejected. - for (const k of ['ports', 'args', 'container_name']) { - if (raw[k] !== undefined) { - throw new ConfigError(`${where}.${k} is only valid alongside image:`); - } - } const command = parseCommand(raw.command, where); const env = parseEnv(raw.env, where); return { name, command, cwd, env, autostart, restart, backoff, needs, readyWhen, expose }; diff --git a/packages/ctl/test/config.test.ts b/packages/ctl/test/config.test.ts index 5e3366bb..720b4f76 100644 --- a/packages/ctl/test/config.test.ts +++ b/packages/ctl/test/config.test.ts @@ -82,17 +82,18 @@ describe('image services', () => { return parseConfig(yaml).services[0]!; } - it('synthesizes a start-or-run command with ports/env/args', () => { + it('synthesizes a start-or-run command from a nested image (ports/env/args)', () => { const s = svc(` services: postgres: - image: postgres:17-alpine - ports: ["5437:5432"] - env: - POSTGRES_USER: optima - POSTGRES_PASSWORD: "with space" - args: "-c max_connections=200" - container_name: optima_db + image: + name: postgres:17-alpine + ports: ["5437:5432"] + env: + POSTGRES_USER: optima + POSTGRES_PASSWORD: "with space" + args: "-c max_connections=200" + container_name: optima_db `); expect(s.image).toBe('postgres:17-alpine'); expect(s.containerName).toBe('optima_db'); @@ -104,12 +105,18 @@ services: expect(cmd).toContain('-e POSTGRES_USER=optima'); expect(cmd).toContain("-e POSTGRES_PASSWORD='with space'"); // shell-quoted value expect(cmd).toContain('postgres:17-alpine -c max_connections=200'); - expect(s.env).toBeUndefined(); // baked into -e, not the process env + expect(s.env).toBeUndefined(); // container env is baked into -e, not the process env }); - it('defaults container name to the service name and joins args lists', () => { - const s = svc(`services:\n cache:\n image: redis:7\n args: ["--save", "60 1"]\n`); + it('accepts the image string shorthand, defaulting container name to the service name', () => { + const s = svc(`services:\n cache:\n image: redis:7\n`); + expect(s.image).toBe('redis:7'); expect(s.containerName).toBe('cache'); + expect(s.command as string).toContain('docker run --name cache'); + }); + + it('joins args lists', () => { + const s = svc(`services:\n cache:\n image:\n name: redis:7\n args: ["--save", "60 1"]\n`); expect(s.command as string).toContain('redis:7 --save 60 1'); }); @@ -123,15 +130,21 @@ services: expect(() => svc(`services:\n db:\n restart: always\n`)).toThrow(/command or image/); }); - it('rejects ports without image', () => { - expect(() => svc(`services:\n web:\n command: x\n ports: ["3000:3000"]\n`)).toThrow( - /only valid alongside image/, + it('rejects top-level env on an image service', () => { + expect(() => svc(`services:\n db:\n image: postgres\n env:\n X: y\n`)).toThrow( + /use image\.env/, + ); + }); + + it('rejects an image mapping without name', () => { + expect(() => svc(`services:\n db:\n image:\n ports: ["5432:5432"]\n`)).toThrow( + ConfigError, ); }); it('rejects a bad container_name', () => { expect(() => - svc(`services:\n db:\n image: postgres\n container_name: "bad name"\n`), + svc(`services:\n db:\n image:\n name: postgres\n container_name: "bad name"\n`), ).toThrow(/not a valid docker container name/); }); }); diff --git a/packages/ctl/test/schema-drift.test.ts b/packages/ctl/test/schema-drift.test.ts index cede0529..ab8d0a46 100644 --- a/packages/ctl/test/schema-drift.test.ts +++ b/packages/ctl/test/schema-drift.test.ts @@ -238,29 +238,35 @@ tasks: `, }, { - name: 'docker image service (ports/env/args/container_name)', + name: 'docker image service (nested ports/env/args/container_name)', yaml: ` services: postgres: - image: postgres:17-alpine - ports: ["5437:5432"] - env: - POSTGRES_USER: optima - POSTGRES_PASSWORD: changeme - args: "-c max_connections=200" - container_name: optima_db + image: + name: postgres:17-alpine + ports: ["5437:5432"] + env: + POSTGRES_USER: optima + POSTGRES_PASSWORD: changeme + args: "-c max_connections=200" + container_name: optima_db ready_when: port: 5437 restart: always `, }, { - name: 'docker image service minimal + args list', + name: 'docker image service string shorthand', + yaml: `services:\n cache:\n image: redis:7\n`, + }, + { + name: 'docker image service nested + args list', yaml: ` services: cache: - image: redis:7 - args: ["--save", "60 1"] + image: + name: redis:7 + args: ["--save", "60 1"] `, }, { @@ -617,20 +623,29 @@ services: yaml: `services:\n db:\n ready_when:\n port: 5432\n`, }, { - name: 'ports without image', + name: 'top-level ports is an unknown key (now nested under image)', yaml: `services:\n web:\n command: pnpm dev\n ports: ["3000:3000"]\n`, }, { - name: 'container_name without image', - yaml: `services:\n web:\n command: pnpm dev\n container_name: web1\n`, + name: 'image as a mapping without name', + yaml: `services:\n db:\n image:\n ports: ["5432:5432"]\n`, + }, + { + name: 'image mapping with unknown key', + yaml: `services:\n db:\n image:\n name: postgres\n bogus: 1\n`, }, { name: 'image service with non-numeric port', - yaml: `services:\n db:\n image: postgres:17-alpine\n ports: ["abc"]\n`, + yaml: `services:\n db:\n image:\n name: postgres:17-alpine\n ports: ["abc"]\n`, }, { name: 'image service with invalid container_name', - yaml: `services:\n db:\n image: postgres:17-alpine\n container_name: "bad name"\n`, + yaml: `services:\n db:\n image:\n name: postgres:17-alpine\n container_name: "bad name"\n`, + }, + { + name: 'top-level env on an image service (validator-only)', + yaml: `services:\n db:\n image: postgres:17-alpine\n env:\n FOO: bar\n`, + runtimeOnly: true, }, { name: 'run_once as a string',