diff --git a/apps/cli/package.json b/apps/cli/package.json index 4d14a15..d35ea1b 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -70,6 +70,7 @@ "@agentbox/config": "workspace:*", "@agentbox/core": "workspace:*", "@agentbox/ctl": "workspace:*", + "@agentbox/integrations": "workspace:*", "@agentbox/relay": "workspace:*", "@agentbox/sandbox-cloud": "workspace:*", "@agentbox/sandbox-core": "workspace:*", diff --git a/apps/cli/src/commands/doctor.ts b/apps/cli/src/commands/doctor.ts index 6581889..24a80b9 100644 --- a/apps/cli/src/commands/doctor.ts +++ b/apps/cli/src/commands/doctor.ts @@ -11,6 +11,7 @@ import { Command } from 'commander'; import { formatDetailed, + integrationsChecks, runAllChecks, runProviderChecks, runSystemChecks, @@ -42,9 +43,20 @@ export const doctorCommand = new Command('doctor') ); process.exit(1); } + // Integrations are host-side (not provider-side), but a user running + // `doctor -p hetzner` still wants to know whether their Notion is + // installed/authed/enabled — otherwise the only way to see the + // integrations group is the unscoped doctor, which is a discoverability + // gap. Include it alongside system + the scoped provider. + const [sys, prov, integrations] = await Promise.all([ + runSystemChecks(), + runProviderChecks(name as ProviderName), + integrationsChecks(), + ]); groups = [ - { title: 'system', results: await runSystemChecks() }, - await runProviderChecks(name as ProviderName), + { title: 'system', results: sys }, + prov, + { title: 'integrations', results: integrations }, ]; } else { groups = await runAllChecks(); diff --git a/apps/cli/src/lib/doctor-checks.ts b/apps/cli/src/lib/doctor-checks.ts index 1c3b19f..0f878b8 100644 --- a/apps/cli/src/lib/doctor-checks.ts +++ b/apps/cli/src/lib/doctor-checks.ts @@ -10,8 +10,16 @@ import { accessSync, constants as fsConstants, mkdirSync } from 'node:fs'; import { homedir } from 'node:os'; import { join } from 'node:path'; import { execa } from 'execa'; +import { loadEffectiveConfig } from '@agentbox/config'; +import { ALL_CONNECTORS, type IntegrationConnector } from '@agentbox/integrations'; -export type CheckStatus = 'ok' | 'warn' | 'fail'; +/** + * `info` is for rows that are intentionally inert (e.g. an integration the + * user hasn't enabled). It surfaces as a distinct glyph but rolls up like + * `ok` so it never pushes the overall doctor status to "warn" — disabling + * Notion is a setting, not a problem. + */ +export type CheckStatus = 'ok' | 'info' | 'warn' | 'fail'; export interface CheckResult { label: string; @@ -373,6 +381,134 @@ async function e2bChecks(): Promise { } } +/** + * Probe a binary, treating ENOENT (missing on PATH) as a distinct outcome + * from a non-zero exit. `execa({reject:false})` returns a result envelope + * even on spawn failure — `{ failed: true, code: 'ENOENT', exitCode: undefined }` + * — rather than throwing. We map that to `missing: true` so the integration + * check has a single, easy-to-read branch. Wrapped in try/catch in case a + * future execa release reverts to throwing on spawn errors. + */ +async function probeIntegrationBin( + bin: string, + args: readonly string[], +): Promise<{ exitCode: number; stdout: string; stderr: string; missing: boolean }> { + try { + const r = await execa(bin, [...args], { reject: false }); + const code = (r as { code?: string }).code; + if (code === 'ENOENT') { + return { exitCode: 127, stdout: '', stderr: r.stderr ?? '', missing: true }; + } + return { + exitCode: r.exitCode ?? 1, + stdout: typeof r.stdout === 'string' ? r.stdout : '', + stderr: typeof r.stderr === 'string' ? r.stderr : '', + missing: false, + }; + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + return { + exitCode: code === 'ENOENT' ? 127 : 1, + stdout: '', + stderr: errSummary(err), + missing: code === 'ENOENT', + }; + } +} + +/** Shape `loadEffectiveConfig` returns; only the integrations slice matters here. */ +type IntegrationsConfigSlice = { + effective: { integrations?: Record }; +}; + +export type IntegrationsConfigLoader = (cwd: string) => Promise; + +/** + * Per-connector host-side detection: is each `integrations..enabled` + * flipped on, is the host CLI installed, and is the user logged in. Driven + * off `ALL_CONNECTORS` so Linear/Trello light up here automatically when + * they ship — no doctor change needed. + * + * `loader` is injectable for unit tests (mirrors `refuseIfIntegrationDisabled`'s + * approach). The default reads layered config from `cwd`, so toggling the + * flag via `agentbox config set` takes effect on the next doctor run with + * no caching. + * + * Auth env handling: we deliberately do NOT force `NOTION_KEYRING=0` on the + * host probe. The Notion connector forces it inside the box because the box + * has no keychain; on the host the user's authed state IS the keychain + * entry, and forcing the file-auth path would make a keychain-authed user + * read as "not logged in" against a non-existent `~/.config/notion/auth.json`. + */ +export async function integrationsChecks( + loader: IntegrationsConfigLoader = loadEffectiveConfig, +): Promise { + let cfg: IntegrationsConfigSlice; + try { + cfg = await loader(process.cwd()); + } catch { + cfg = { effective: {} }; + } + // Parallel: each connector's two probes (version + auth) are independent + // across connectors. With Linear / Trello / ClickUp queued, the serial + // walk would scale linearly; Promise.all keeps doctor latency flat. + return Promise.all( + ALL_CONNECTORS.map((connector) => checkOneIntegration(connector, cfg.effective.integrations)), + ); +} + +async function checkOneIntegration( + connector: IntegrationConnector, + integrations: Record | undefined, +): Promise { + const svc = connector.service; + const enabled = integrations?.[svc]?.enabled === true; + if (!enabled) { + return { + label: svc, + status: 'info', + detail: 'disabled', + hint: `enable with \`agentbox config set --project integrations.${svc}.enabled true\``, + }; + } + + const version = await probeIntegrationBin(connector.hostBin, connector.detect.versionArgs); + if (version.missing || version.exitCode === 127) { + return { + label: svc, + status: 'warn', + detail: `${connector.hostBin} not installed`, + hint: + connector.detect.installHint ?? + `install the ${svc} CLI (\`${connector.hostBin}\`) on the host`, + }; + } + if (version.exitCode !== 0) { + const tail = firstLine((version.stderr || version.stdout).trim()); + return { + label: svc, + status: 'warn', + detail: `${connector.hostBin} ${connector.detect.versionArgs.join(' ')} failed${tail ? `: ${tail}` : ''}`, + }; + } + const versionLine = firstLine((version.stdout || version.stderr).trim()) || connector.hostBin; + + if (!connector.detect.authArgs || connector.detect.authArgs.length === 0) { + return { label: svc, status: 'ok', detail: versionLine }; + } + + const auth = await probeIntegrationBin(connector.hostBin, connector.detect.authArgs); + if (auth.exitCode !== 0) { + return { + label: svc, + status: 'warn', + detail: 'not logged in', + hint: connector.detect.loginHint ?? `run \`${connector.hostBin} login\``, + }; + } + return { label: svc, status: 'ok', detail: `${versionLine} · authed` }; +} + export async function runProviderChecks(name: ProviderName): Promise { let results: CheckResult[]; switch (name) { @@ -398,7 +534,8 @@ export async function runProviderChecks(name: ProviderName): Promise export async function runAllChecks(): Promise { const sys: CheckGroup = { title: 'system', results: await runSystemChecks() }; const providerGroups = await Promise.all(ALL_PROVIDERS.map((n) => runProviderChecks(n))); - return [sys, ...providerGroups]; + const integrations: CheckGroup = { title: 'integrations', results: await integrationsChecks() }; + return [sys, ...providerGroups, integrations]; } function worstInResults(results: CheckResult[]): CheckStatus { @@ -406,6 +543,8 @@ function worstInResults(results: CheckResult[]): CheckStatus { for (const r of results) { if (r.status === 'fail') return 'fail'; if (r.status === 'warn') worst = 'warn'; + // `info` rolls up like `ok` — intentionally inert rows shouldn't flip + // the overall doctor status. } return worst; } @@ -427,6 +566,14 @@ function summaryToken(group: CheckGroup): string { if (worst === 'warn') return 'system warn'; return 'system ok'; } + if (group.title === 'integrations') { + if (worst === 'fail') return 'integrations FAIL'; + if (worst === 'warn') return 'integrations check'; + // All rows ok or info (disabled) — render as "off" when every row is + // info, else "ready" when at least one is enabled and green. + const anyEnabled = group.results.some((r) => r.status === 'ok'); + return anyEnabled ? 'integrations ready' : 'integrations off'; + } if (worst === 'fail') return `${group.title} FAIL`; if (worst === 'warn') { // Distinguish "not configured" (warn on credentials) from other warns. @@ -441,13 +588,14 @@ function summaryToken(group: CheckGroup): string { const C_GREEN = '\x1b[32m'; const C_YELLOW = '\x1b[33m'; const C_RED = '\x1b[31m'; +const C_DIM = '\x1b[2m'; const C_RESET = '\x1b[0m'; const COLOR = !process.env.NO_COLOR; // install requires a TTY anyway; honor NO_COLOR for piped output function statusMarker(s: CheckStatus): string { - const glyph = s === 'ok' ? '✓' : s === 'warn' ? '⚠' : '✗'; + const glyph = s === 'ok' ? '✓' : s === 'info' ? '·' : s === 'warn' ? '⚠' : '✗'; if (!COLOR) return glyph; - const color = s === 'ok' ? C_GREEN : s === 'warn' ? C_YELLOW : C_RED; + const color = s === 'ok' ? C_GREEN : s === 'info' ? C_DIM : s === 'warn' ? C_YELLOW : C_RED; return `${color}${glyph}${C_RESET}`; } @@ -464,6 +612,7 @@ function pad(s: string, width: number): string { function statusBadge(s: CheckStatus): string { if (s === 'ok') return '[ ok ]'; + if (s === 'info') return '[info]'; if (s === 'warn') return '[warn]'; return '[FAIL]'; } diff --git a/apps/cli/test/doctor-integrations.test.ts b/apps/cli/test/doctor-integrations.test.ts new file mode 100644 index 0000000..3e7e1a7 --- /dev/null +++ b/apps/cli/test/doctor-integrations.test.ts @@ -0,0 +1,127 @@ +/** + * Unit tests for the `integrations:` group in `agentbox doctor`. + * + * The real `ntn` lives only on the host (this box can't install it), so the + * test stages a tiny shell script named `ntn` on a private PATH and asserts + * the four meaningful transitions: disabled → info, enabled+missing → warn, + * enabled+present-but-unauthed → warn (with the login hint), enabled+ok → ok. + * + * Config is injected via the `IntegrationsConfigLoader` parameter rather than + * touched on disk — same pattern `refuseIfIntegrationDisabled` uses in the + * relay, so the test stays pure (no `~/.agentbox` touch). + */ + +import { chmod, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + integrationsChecks, + type IntegrationsConfigLoader, +} from '../src/lib/doctor-checks.js'; + +const NTN_SCRIPT = `#!/usr/bin/env bash +case "$1" in + --version) + echo "ntn version 0.42.0" + exit 0 ;; + api) + if [ "$NTN_TEST_AUTH" = "ok" ]; then + echo '{"object":"user","id":"stub"}' + exit 0 + fi + echo "Error: not logged in. Run 'ntn login' to authenticate." >&2 + exit 1 ;; + *) + echo "stub: unknown subcommand $1" >&2 + exit 2 ;; +esac +`; + +const enabled: IntegrationsConfigLoader = () => + Promise.resolve({ effective: { integrations: { notion: { enabled: true } } } }); +const disabled: IntegrationsConfigLoader = () => Promise.resolve({ effective: {} }); + +describe('doctor — integrations group', () => { + let stubDir: string; + let originalPath: string | undefined; + let originalAuth: string | undefined; + + beforeEach(async () => { + stubDir = await mkdtemp(join(tmpdir(), 'agentbox-doctor-int-')); + originalPath = process.env.PATH; + originalAuth = process.env.NTN_TEST_AUTH; + }); + + afterEach(async () => { + if (originalPath === undefined) delete process.env.PATH; + else process.env.PATH = originalPath; + if (originalAuth === undefined) delete process.env.NTN_TEST_AUTH; + else process.env.NTN_TEST_AUTH = originalAuth; + await rm(stubDir, { recursive: true, force: true }); + }); + + async function stageStub(): Promise { + const ntn = join(stubDir, 'ntn'); + await writeFile(ntn, NTN_SCRIPT, 'utf8'); + await chmod(ntn, 0o755); + // Prepend the stub dir so our fake `ntn` wins over any real one, but + // keep the original PATH so the script's `#!/usr/bin/env bash` shebang + // can still resolve `bash` (env in /usr/bin uses the child's PATH). + process.env.PATH = `${stubDir}:${originalPath ?? ''}`; + } + + function emptyPath(): void { + // Only the empty stub dir — execa(`ntn`) gets ENOENT directly (no + // shebang interpretation needed for a missing binary). + process.env.PATH = stubDir; + } + + it('renders info / "disabled" when the flag is off (default)', async () => { + emptyPath(); + const results = await integrationsChecks(disabled); + expect(results).toHaveLength(1); + const row = results[0]!; + expect(row.label).toBe('notion'); + expect(row.status).toBe('info'); + expect(row.detail).toBe('disabled'); + expect(row.hint).toContain('integrations.notion.enabled true'); + }); + + it('renders warn / "not installed" when enabled but ntn is missing', async () => { + emptyPath(); + const results = await integrationsChecks(enabled); + const row = results[0]!; + expect(row.status).toBe('warn'); + expect(row.detail).toMatch(/not installed/); + expect(row.hint).toMatch(/install ntn/); + }); + + it('renders warn / "not logged in" when ntn is present but unauthed', async () => { + await stageStub(); + delete process.env.NTN_TEST_AUTH; + const results = await integrationsChecks(enabled); + const row = results[0]!; + expect(row.status).toBe('warn'); + expect(row.detail).toBe('not logged in'); + expect(row.hint).toBe('ntn login'); + }); + + it('renders ok with the version line when ntn is present and authed', async () => { + await stageStub(); + process.env.NTN_TEST_AUTH = 'ok'; + const results = await integrationsChecks(enabled); + const row = results[0]!; + expect(row.status).toBe('ok'); + expect(row.detail).toContain('ntn version 0.42.0'); + expect(row.detail).toContain('authed'); + }); + + it('fails closed (no throw) when the config loader rejects', async () => { + emptyPath(); + const broken: IntegrationsConfigLoader = () => + Promise.reject(new Error('malformed yaml')); + const results = await integrationsChecks(broken); + expect(results[0]?.status).toBe('info'); + }); +}); diff --git a/apps/web/content/docs/cli.mdx b/apps/web/content/docs/cli.mdx index 07f4e5c..9152afc 100644 --- a/apps/web/content/docs/cli.mdx +++ b/apps/web/content/docs/cli.mdx @@ -259,7 +259,7 @@ agentbox prepare -p hetzner agentbox prepare -p docker --build ``` -`install` is the first-run setup wizard (system check, pick a provider, log in, prepare its base image, install the host skill). `install cmux` pins a live `agentbox list` panel (all your boxes) to the [cmux](https://cmux.com) sidebar dock — see [cmux integration](/docs/integrations-cmux#the-agentbox-dock-right-sidebar). `doctor` diagnoses system and provider readiness. `prepare` builds base images or snapshots — omit `--provider` for status only. +`install` is the first-run setup wizard (system check, pick a provider, log in, prepare its base image, install the host skill). `install cmux` pins a live `agentbox list` panel (all your boxes) to the [cmux](https://cmux.com) sidebar dock — see [cmux integration](/docs/integrations-cmux#the-agentbox-dock-right-sidebar). `doctor` diagnoses system and provider readiness — and reports each [service integration](/docs/integrations-notion) (host CLI installed? authed? enabled per project?). `prepare` builds base images or snapshots — omit `--provider` for status only. `agentbox config get --all` shows which layer each value comes from. See the full key reference in [Configuration](/docs/configuration). diff --git a/apps/web/content/docs/configuration.mdx b/apps/web/content/docs/configuration.mdx index 2f600ab..1820bd5 100644 --- a/apps/web/content/docs/configuration.mdx +++ b/apps/web/content/docs/configuration.mdx @@ -230,6 +230,20 @@ See [access your box](/docs/access-your-box). See [browser and screen](/docs/browser-and-screen). +## integrations + +Per-service toggles for relay-gated service integrations. Each integration is **disabled by default** — even when the host CLI is installed and authed, the box can't call out until you flip it on. The box never holds the service's token; reads pass through, writes prompt on the host. See [Notion](/docs/integrations-notion). + +| Key | Type | Default | Meaning | +| --- | --- | --- | --- | +| `integrations.notion.enabled` | bool | `false` | proxy `ntn` calls from the box through the host relay; reads pass, writes prompt | + +```bash +agentbox config set --project integrations.notion.enabled true +``` + +`agentbox doctor` reports a row per integration in a dedicated `integrations:` group: disabled (default), `ntn not installed`, `not logged in`, or `authed` — with a one-line hint for each non-`ok` state. + ## queue & autopause `queue.*` schedules background `-i` jobs; `autopause.*` pauses idle boxes. diff --git a/apps/web/content/docs/integrations-notion.mdx b/apps/web/content/docs/integrations-notion.mdx new file mode 100644 index 0000000..8a85c37 --- /dev/null +++ b/apps/web/content/docs/integrations-notion.mdx @@ -0,0 +1,95 @@ +--- +title: Notion +description: Let your box read and write Notion pages through the host's authenticated ntn CLI — your token never enters the box +--- + +AgentBox can proxy Notion calls from inside a box to the host's authenticated `ntn` CLI. The box agent can search workspaces, read pages, and (with your approval for each write) create or update pages — without your Notion token ever entering the box. The same model as `agentbox-ctl git push` and `agentbox-ctl git pr create`. + + +The box runs a tiny `ntn` / `notion` shim. Calls go through `agentbox-ctl integration notion ` to the **host relay**, which runs the host's real `ntn` and ships the result back. Reads pass straight through. Writes raise a one-line confirm in your terminal first. + + +## Prerequisites + +The integration wraps Notion's official CLI ([`ntn`](https://developers.notion.com/reference/notion-cli) — currently in beta). Install it on the **host** (not in the box): + +```bash +# macOS / Linux — see the official docs for other install methods +brew install notion-cli +ntn login # opens the browser, stores auth in the system keychain +``` + +Then verify with `agentbox doctor`: + +```text +integrations: + [info] notion disabled (enable with `agentbox config set --project integrations.notion.enabled true`) +``` + +The integration is **off by default**, so even with `ntn` installed the box can't call it until you opt in. Doctor's `info` line confirms `ntn` is detected; flip the flag to graduate it to a usable state. + +## Enable it for this project + +```bash +agentbox config set --project integrations.notion.enabled true +``` + +`--project` scopes it to the current project (config file under `~/.agentbox/projects//`). Drop `--project` for global. Run `agentbox doctor` again — the row should now read: + +```text +integrations: + [ ok ] notion ntn version X.Y.Z · authed +``` + +If you see `[warn] not logged in`, run `ntn login` on the host. If you see `[warn] ntn not installed`, the host install didn't put `ntn` on `PATH`. + +## What works inside the box + +The in-box shim exposes a strict allowlist. Anything outside the list is rejected with a clear message — start conservative, widen as flows surface. + +| In-box command | Class | What happens | +| --- | --- | --- | +| `ntn whoami` | read | Passes through; prints the authed host user. | +| `ntn api v1/` | read | `GET`-only passthrough — `v1/search`, `v1/pages/`, etc. Non-GET (`-X POST`, `-f`, `-F`, `--input`) is refused with exit 65. | +| `ntn pages create …` | write | **Prompts** the host for approval; on `y` the host runs `ntn pages create` and ships back the result. | +| `ntn pages update …` | write | **Prompts** the host for approval; covers props and archive. | + +`notion` is a symlink to `ntn` — either name works. Reads are unprompted; every write raises a one-line confirm in your attached terminal (or in `agentbox agent approvals` for orchestrators driving boxes headlessly — see [Background & parallel](/docs/background-and-parallel)). + +```bash +# Inside the box — these all flow through the host relay: +ntn whoami +ntn api v1/search -f "query=design doc" +ntn api v1/pages/abc123 # GET passthrough +ntn pages create -p -t "Draft from the box" # prompts on the host +``` + + +Notion's `ntn` exposes no top-level `comment` subcommand, and posting via `ntn api v1/comments -X POST -f …` needs a structured JSON body that the GET-only `api` op refuses. Comment writing will land as a dedicated op with a payload translator — see [integrations follow-ups](https://github.com/madarco/agentbox/blob/main/docs/integrations.md#open-follow-ups). + + +## Security model + +| Concern | What AgentBox does | +| --- | --- | +| Where the Notion token lives | **Host only** — in the macOS keychain (or `ntn`'s configured file-auth on Linux). The box has no access to either. | +| What the box can do unprompted | **Reads only** (`whoami`, `ntn api` GETs). | +| What needs your approval | **Every write** (`page.create`, `page.update`), and **any non-GET `api` call** is refused outright with exit 65. | +| Where the approval lives | The host relay raises a confirm prompt; you answer in the attached terminal (`y` / `n`) or via `agentbox agent approve ` from an orchestrator. | +| Inside the box, does the agent ever see the token? | **No.** `printenv \| grep -i notion` inside a box returns nothing — only `AGENTBOX_RELAY_TOKEN`, which only authenticates to the box-local relay endpoint. | +| Auditability | Every approved write is logged as a relay event (visible via `/admin/events`, `agentbox agent`, the dashboard). | + +The integration is **off by default** for every new project. You flip it on per project once you've installed and authed `ntn` on the host. + + +The box is the untrusted side. Tokens in the box would survive `agentbox download`, leak into commits if the agent mishandles them, and undermine the entire sandbox premise. Keeping the token on the host and putting the gate at the host boundary is the same model AgentBox already uses for `git push` and `gh pr create` — one model, audited in one place. + + +## Limitations and roadmap + +- **Notion only for now.** Linear, Trello, and ClickUp are on the integrations roadmap; their connectors will appear in `agentbox doctor` the same way once they ship. +- **GET-only `api` passthrough.** Non-GET HTTP methods (`POST`, `PATCH`, `DELETE`) on `ntn api` are refused; use the dedicated write ops instead. This guards against an agent slipping a write past the read classification. +- **Comment creation deferred.** Tracked as a follow-up. +- **Allowlist starts conservative.** As real agent flows surface needs, the op set will widen — file an issue with the failing call if something's missing. + +See also [CLI commands](/docs/cli) for `agentbox doctor`, [Configuration](/docs/configuration) for the `integrations.notion.enabled` flag, and [Background & parallel](/docs/background-and-parallel) for the host-action approval surface. diff --git a/apps/web/content/docs/meta.json b/apps/web/content/docs/meta.json index 0525fa2..186f517 100644 --- a/apps/web/content/docs/meta.json +++ b/apps/web/content/docs/meta.json @@ -20,6 +20,8 @@ "integrations-iterm2", "integrations-tmux", "integrations-cmux", + "---Services---", + "integrations-notion", "---Providers---", "local-docker", "hetzner", diff --git a/docs/features.md b/docs/features.md index 9f8f7a9..ae94e37 100644 --- a/docs/features.md +++ b/docs/features.md @@ -27,6 +27,7 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - `agentbox relay status|stop|start|restart` — manage the host relay process. `status` reads the pidfile + GETs `/healthz` and renders running / not-responding (zombie) / not-running; `--json` dumps the `RelayStatus` shape. `stop` / `start` / `restart` wrap `stopRelay()` / `ensureRelay()` (both idempotent — the same helpers `self-update` uses). Backed by `getRelayStatus()` in `packages/sandbox-docker/src/relay.ts` (re-exported from `@agentbox/sandbox-docker`); CLI in `apps/cli/src/commands/relay.ts`. - `agentbox prepare` — one-stop "set up the base image / show what's prepared" command. `agentbox prepare` (no args) prints a status table across all providers: docker's `agentbox/box:dev` image + the three shared docker volumes (`agentbox-claude-config`, `agentbox-codex-config`, `agentbox-opencode-config`), plus all daytona `agentbox*` snapshots (state / size / age / `(pinned in project)` marker) and `agentbox*` volumes — including the legacy per-agent ones that the daytona path no longer uses (visible reminder to clean them up via the Daytona dashboard). `agentbox prepare --provider docker` pre-builds the local Dockerfile.box image (idempotent). `agentbox prepare --provider daytona [--name X] [-y]` builds a layered `Image.fromDockerfile().addLocalFile().runCommands()` for the three host agent static tarballs and registers it as a named org-scoped snapshot via the documented `daytona.snapshot.create({ name, image })` API ([daytona.io/docs/en/snapshots](https://www.daytona.io/docs/en/snapshots/)), then pins `box.image: ` into the project config — subsequent `agentbox create --provider daytona` boots in seconds with the agent static config (plugins/skills/marketplaces/settings) already in place. Replaces the old `agentbox daytona publish-snapshot` (which used `_experimental_createSnapshot`, broken upstream). - `agentbox self-update` — self-updates the CLI then refreshes the local runtime. Detects how it was launched (`apps/cli/src/exec-method.ts`'s `detectExecutionMethod`): `npm` → `npm install -g @madarco/agentbox@latest`, `pnpm` → `pnpm add -g @madarco/agentbox@latest`, `npx`/`direct` (dev clone) → skip the package update with a note. Then best-effort `docker image rm -f agentbox/box:dev` (rebuilds lazily on next `create`/`claude` via `ensureImage()`) and reloads the relay via `stopRelay()`. The relay is only respawned in-process (`ensureRelay()`) when **no** self-update ran — after a real self-update this process is the stale build, so it just stops the relay and the next box command brings up the new one. `-y` skips the prompt, `--dry-run` previews, `--skip-self` does only the image+relay refresh. `stopRelay` lives in `packages/sandbox-docker/src/relay.ts` (reuses the existing pidfile helpers); `removeImage` in `docker.ts`. +- **Notion integration (relay-gated, host CLI)** — `agentbox-ctl integration notion ` and the in-box `ntn` / `notion` shims proxy a small allowlist of ops (`whoami`, GET-only `api` passthrough, `page.create`, `page.update`) through the host relay to the host's authenticated `ntn` CLI. Reads pass through; writes prompt the host for approval (same `askPrompt` gate as `git push` / `gh pr create`). Non-GET HTTP methods on `ntn api` are refused (`-X`, `--method`, `-f`, `-F`, `--input` all rejected with exit 65 by `refuseApiNonGet`). The box never holds a Notion token — `printenv | grep -i notion` inside a box returns nothing. Off by default — enable per project with `agentbox config set --project integrations.notion.enabled true` (typed config key `integrations.notion.enabled` in `packages/config/src/types.ts`); the relay re-reads the layered config on every call so a flag flip takes effect with no bounce, and a disabled integration is refused before any host process is touched. `agentbox doctor` reports each integration in a dedicated `integrations:` group — `info` for disabled, `warn` for not-installed / not-logged-in (with install/login hints from the connector descriptor), `ok` when authed. Connector descriptor lives in `packages/integrations/src/connectors/notion.ts`; the relay spine in `packages/relay/src/integrations.ts` (`parseIntegrationMethod`, `assertIntegrationReady`, `refuseIfIntegrationDisabled`, `runHostIntegration`) is dispatched identically by docker (`server.ts`) and cloud (`host-actions.ts`) per the "fix across all providers" rule. Adding a service (Linear / Trello / ClickUp) is one new descriptor file + a one-line registry add — no relay change. See [`integrations.md`](./integrations.md) (design) and [`notion_backlog.md`](./notion_backlog.md) (per-task status). - In-box `agentbox-ctl git pull|push [-- ]` (and any tool the agent runs that shells out via this command) — POSTs to the host relay's `/rpc`, which executes git on the host with the user's SSH agent + gitconfig. Commits made inside the box land in the host's main `.git/` immediately (the `.git/` is bind-mounted RW at its identical absolute path); `git push` is the only operation that needs host credentials, hence the RPC. - Browser support — Vercel's [`agent-browser`](https://github.com/vercel-labs/agent-browser) is baked into the box image (`npm install -g agent-browser`). The Chromium binary that drives it is *not* Chrome for Testing (no Linux ARM64 build, and Noble's `chromium-browser` apt package is a snap stub that doesn't run in containers) — it's Playwright's Chromium, which has working linux/arm64 + linux/amd64 builds. It is **not** baked: `ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/local/bin/chromium` points at the `chromium-resolver` script (`packages/sandbox-docker/scripts/chromium-resolver`, installed at `/usr/local/bin/chromium`), which on first launch reuses the newest installed Playwright Chromium and otherwise runs `playwright install chromium` — preferring the project's pinned Playwright (`/workspace/node_modules/.bin/playwright`, so the build matches the project's own tests and they share one binary), else the box's global `playwright` as a fallback downloader. This avoids baking a version-pinned Chromium that goes stale the instant a project pins a different Playwright (the old bug: a baked build masqueraded in `~/.cache/ms-playwright`, the project's `playwright install` fetched a different one, and agents waiting on the baked path hung). Chrome runtime libs (libnss3, libxkbcommon0, libcups2t64, etc. — Noble names with the `t64` suffix where applicable) are installed once at image build. Agents inside the box invoke `agent-browser` directly; sessions/auth/cookies persist under `~/.agent-browser/` in the container's writable layer, so they survive `pause/unpause` and `stop/start` and are wiped on `destroy`. The flag `--with-playwright` on both `agentbox create` and `agentbox claude` additionally runs `npm install -g @playwright/cli@latest` inside the container at create time (recorded as `BoxRecord.withPlaywright` and surfaced in `agentbox status --inspect`) — a separate package from the `playwright` runtime baked into the image. - Web service port — every box reserves container `:80` at create with an unconditional `docker run -p 127.0.0.1:0:80` (immutable after `docker run`, so it's reserved up front even though the `expose:`-flagged service is usually only known after the in-box wizard writes `agentbox.yaml`). The ephemeral host port is resolved via `docker port` and persisted to `BoxRecord.webHostPort` (re-resolved on every `startBox`, like `vncHostPort`, since Docker reallocates it). `getBoxEndpoints` emits a `kind: 'web'` endpoint whose URL is the published loopback port (`http://127.0.0.1:`) — **uniform across engines, not OrbStack-dependent**; it's the primary clickable link in `agentbox list`/`status`. Until a service declares `expose:` it renders as `web reserved (...)`. The in-box `:80 → expose.port` forward is the supervisor-owned `WebProxy` (see [`in-box-supervisor.md`](./in-box-supervisor.md)). Pre-feature boxes (no `BoxRecord.webContainerPort`) have no reservation and are skipped by `startBox` — recreate to enable. @@ -48,7 +49,7 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - Auto-pause-on-idle / auto-stop policy. - Auto-refresh of the merged host export (inotify-driven `agentbox open` keeps `~/.agentbox/boxes//workspace` in sync without manual refresh). Today refresh is on-demand only. - Exporting the container writable layer on destroy (`--export ` flag). The live merged export under `~/.agentbox/boxes//workspace` is wiped with the box (use `agentbox checkpoint create` first if you want to keep the state). -- Additional `/rpc` methods beyond `git.pull` / `git.push`. The dispatch is a single switch in `packages/relay/src/server.ts` — easy to extend (target ideas: `git.fetch`, `gh.*`, `npm.publish`, anything else that needs host creds). +- Additional `/rpc` methods beyond `git.pull` / `git.push` / `gh.pr.*` / `integration..`. The dispatch is a single switch in `packages/relay/src/server.ts` — easy to extend (target ideas: `git.fetch`, `npm.publish`, anything else that needs host creds). - A user-facing `agentbox events`/`agentbox notify` CLI on top of the relay's ring buffer. Today you can `agentbox-relay tail` (against the host process at 127.0.0.1:8787) or `tail -f ~/.agentbox/relay.log`. - Event-buffer persistence (events are lost on relay restart; the token registry is rehydrated from `state.json` on next `agentbox create`, but historical events aren't). - Remote providers (E2B / Modal / Daytona / Vercel Sandbox). diff --git a/docs/host-relay.md b/docs/host-relay.md index 54f28f4..a046a9c 100644 --- a/docs/host-relay.md +++ b/docs/host-relay.md @@ -15,6 +15,7 @@ - **Rehydration after restart**: every `createBox` reads `~/.agentbox/state.json` and re-pushes every known `(relayToken, gitWorktrees)` via `rehydrateRelayRegistry()`. Idempotent and cheap, so we do it unconditionally instead of trying to detect a restart. `startBox` also re-registers its own box. - The supervisor pushes outbound: `packages/ctl/src/relay-client.ts` is a fire-and-forget POST to `/events` (node:http, 2s timeout, silent failure). `onServiceState` / `onTaskState` in `supervisor.ts` forward terminal states (`ready` / `crashed` / `backoff` / `unhealthy` / `stopped` / `done` / `failed`). Disabled at construction when `AGENTBOX_RELAY_URL` / `AGENTBOX_RELAY_TOKEN` are unset — keeps existing tests and pre-relay boxes a no-op. - In-box CLI: `agentbox-ctl git pull|push [-- ...]` (in `packages/ctl/src/commands/git.ts`) POSTs to `/rpc` with `{ method: 'git.pull'|'git.push', params: { path: , args: [...] } }`, streams `stdout`/`stderr` back to the agent's terminal, and exits with the host's git exit code. This is what the agent invokes to ask the host to push the box's commits — no SSH keys leak into the box. +- **Service integrations via host CLIs**: `agentbox-ctl integration [-- args...]` (and the in-box `ntn` / `notion` shims) POST `{ method: 'integration..', params: { path, args } }` for any connector registered in `@agentbox/integrations`. Currently shipped: `notion` (host bin `ntn`) with ops `whoami`, `api` (GET-only passthrough, refuses `-X`/`--method`/`-f`/`-F` / `--input`), `page.create` (gated), `page.update` (gated). The relay parses the method, looks up the connector + op, refuses with exit 65 if the per-project `integrations..enabled` flag is off, runs the op's `refuseCall` pre-flight, probes the host binary (` --version`, cached 60s), then either passes through (read) or gates the call via `askPrompt` (write) before shelling out to the host CLI via `runHostIntegration`. A connector-declared env namespace (`_*` only) is merged onto the host spawn env — Notion forces `NOTION_KEYRING=0` so file-based auth works in nested boxes; a descriptor that tries to set anything outside its namespace yields exit 78 instead of silently rewriting `PATH`. Same `{exitCode, stdout, stderr}` envelope as `git.*` / `gh.pr.*`; wired into both `server.ts` (docker) and `host-actions.ts` (cloud — daytona/hetzner/vercel/e2b) per the "fix across all providers" rule. The reusable spine lives in `packages/relay/src/integrations.ts` (`parseIntegrationMethod`, `getConnector`, `assertIntegrationReady`, `refuseIntegrationCall`, `refuseIfIntegrationDisabled`, `runHostIntegration`); the in-box ctl surface is built from the same descriptors in `packages/ctl/src/commands/integration.ts`. Adding a service is one new descriptor file + a one-line registry add — no relay change. See [`integrations.md`](./integrations.md) for the design + the connector descriptor shape. - **PR ops via host `gh`**: `agentbox-ctl git pr [args...]` POSTs `{ method: 'gh.pr.', params: { path, args } }` for `op ∈ {create, view, list, comment, review, merge, checkout, close, reopen}`. The relay shells `gh pr ` with `cwd = worktree.hostMainRepo` so gh infers the repo from the host repo's `git remote -v`. Read-only ops (`view`, `list`) bypass the prompt; everything else triggers an `askPrompt`. Extra guards: `merge` refuses the `AGENTBOX_PROMPT=off` auto-`y` unless `AGENTBOX_GH_FORCE=1`; `checkout` is disabled by default (opt-in via `AGENTBOX_GH_PR_CHECKOUT=allow`) and refused on a dirty host tree or a host HEAD that matches any registered box branch (would corrupt the bind-mounted box `.git/HEAD`). Cloud path mirrors the same matrix in `executeCloudAction → runGhPrRpc`, with the no-attached-wrapper behavior gated by `AGENTBOX_GH_NO_SUB` (`deny` default, `allow`, or `prompt`). Requires `gh` installed and `gh auth login` on the host; for HTTPS push/pull/fetch we additionally recommend `gh auth setup-git` so plain `git push` uses gh's OAuth token (handled invisibly by git's credential helpers — no relay change needed). - `agentbox-ctl open ` (`packages/ctl/src/commands/open.ts`) opens the URL in the **box's own Chromium** via `agent-browser open --headed` (visible in the VNC view / `agentbox screen`), then best-effort POSTs `{ method: 'browser.open', params: { url } }` to the relay. The relay records a `browser-open` event, answers immediately (never blocks the box), and raises a **non-blocking, auto-expiring** confirm prompt (`askPrompt(..., { ttlMs })`, ~25s) in the footer/dashboard — "open link on the host?" — and only `open`s it on the host on a `y`. URL scheme is validated http/https both in the ctl command and via `isOpenableUrl` in `server.ts`. The box image symlinks `/usr/local/bin/xdg-open` to the `agentbox-open` wrapper and sets `BROWSER=/usr/local/bin/agentbox-open`, so `xdg-open` and any `$BROWSER`-aware tool (Claude Code OAuth, `gh`, …) route here. The `Ctrl+a u` footer/dashboard leader action is unrelated — it opens the box's web *app* on the host (`agentbox url`). - **Host-action approvals (orchestrator path)**: the confirm prompts that gate `git.push` / `cp.*` / `gh.pr.*` writes / `checkpoint.create` / `browser.open` are raised by `askPrompt` and answered over `/admin/prompts/answer`. Because that endpoint is **loopback-only**, only a host process can answer — a box can't. A host-side **orchestrator** (e.g. a Claude driving boxes with `agentbox claude -i`) inspects and answers them deliberately via two CLI commands (`apps/cli/src/commands/agent.ts`): diff --git a/docs/integrations.md b/docs/integrations.md new file mode 100644 index 0000000..527b03e --- /dev/null +++ b/docs/integrations.md @@ -0,0 +1,141 @@ +# Integrations — relay-gated service connectors + +> Part of the AgentBox docs. Start at [CLAUDE.md](../CLAUDE.md). Planning context: [`integrations_backlog.md`](./integrations_backlog.md) (the four-service plan). Per-task tracker for Notion: [`notion_backlog.md`](./notion_backlog.md). The user-facing page is `apps/web/content/docs/integrations-notion.mdx` (published at https://agent-box.sh/docs/integrations-notion). + +This is the design / reference doc for the host-side integrations spine — the box-to-host bridge that lets an in-box agent read tickets/docs from Notion (and, in future, Linear / Trello / ClickUp) and make a small, prompted set of writes, without ever holding the service's credentials inside the box. The shape mirrors the existing `gh` and `git` relay flows exactly. + +## Why this exists + +The host owns the credentials. The box is the untrusted side. A box agent should be able to **read** tickets/docs freely (a search, a `GET`) and **write** with the user's per-call approval (a `page.create`, a `comment.add`), but **the token must never enter the box**. The model is the one we already proved with `gh`: + +- An in-box shim (`gh-shim`) intercepts a strict subcommand allowlist and forwards through `agentbox-ctl`. +- `agentbox-ctl` POSTs `/rpc` on the box-local relay (bearer-authed, see [`host-relay.md`](./host-relay.md)). +- The relay classifies the op as **read** or **write**. Reads pass; writes go through `askPrompt` (host approval), then shell out to the host's authenticated CLI. The token stays on the host. + +Integrations generalize this for any host CLI: each service is one **connector descriptor** in `@agentbox/integrations`, and the relay's `integration..` dispatcher walks the same path. + +## Where the gate lives + +The gate lives in the **relay**, not in the box. The in-box ctl is unprivileged; it sends an RPC and waits for a verdict. The relay (a host process) is the only thing that runs the host CLI, and it's the only thing that consults the per-project `integrations..enabled` flag, the op's read/write classification, the op's `refuseCall` pre-flight, and `askPrompt` for writes. One check covers every caller — the shim, the `notion`/`ntn` alias, a direct `agentbox-ctl integration` invocation, a future host-initiated one-time token. See "gate at the host boundary" in the user feedback notes. + +## The connector descriptor + +`packages/integrations/src/types.ts` defines two types: + +```ts +export interface IntegrationConnector { + service: IntegrationService; // 'notion' (more later) + hostBin: string; // 'ntn' + detect: { // T3 doctor probes + versionArgs: readonly string[]; + authArgs?: readonly string[]; + installHint?: string; // shown by `agentbox doctor` when missing + loginHint?: string; // shown when unauthed + }; + env?: Readonly>; // forced env vars; _* only + ops: Readonly>; +} + +export interface IntegrationOp { + write: boolean; // false = read, true = gated write + buildArgv?: (args: readonly string[]) => string[]; // shape user argv → host CLI argv + refuseCall?: (args: readonly string[]) => IntegrationOpRefusal | null; +} +``` + +Pure data + small predicates. No I/O at import time, so unit tests stay pure. The descriptor file (`packages/integrations/src/connectors/notion.ts`) is the single source of truth for the box surface, the relay's allowlist, and (since T3) the doctor's install/login hint strings. + +A `registry.ts` exports `getConnector(service)` and `ALL_CONNECTORS`. Adding a service is a new descriptor file + a one-line registry add. No relay change, no ctl change. + +### env-var namespace guard + +`packages/relay/src/integrations.ts:mergeConnectorEnv` enforces that a descriptor can only set env vars in its own `_*` namespace (e.g. Notion's connector can set `NOTION_KEYRING` but never `PATH` or `AGENTBOX_PROMPT`). A misconfigured descriptor returns a typed exit-78 envelope rather than silently disabling the relay's gate or rewriting `PATH`. + +### env: `NOTION_KEYRING=0` + +The Notion connector forces `NOTION_KEYRING=0` on the host spawn so `ntn` reads file-based auth (`~/.config/notion/auth.json`) instead of the macOS keychain. This is required when the integration is exercised **inside a box** (the box has no keychain — see "Carry-based file-auth" below). On the macOS host itself, setting it is harmless: the keychain path is only suppressed when the file-auth path is present, which it isn't on a fresh host. `agentbox doctor` deliberately does NOT set this env var (see "Doctor" below). + +## The relay dispatch flow + +`packages/relay/src/integrations.ts` is the spine. The dispatcher in `packages/relay/src/server.ts` (docker) and `packages/relay/src/host-actions.ts` (cloud) calls into it for any method starting with `integration.`. Per the "fix across all providers" rule, both paths share the exact same handler. + +For `integration..`: + +1. **`parseIntegrationMethod`** splits on the first two dots; dotted ops (`page.create`) keep their dot. Unknown shape → exit 64. +2. **`getConnector(service)`** — unknown service → exit 64. +3. **op allowlist** — unknown op → exit 65, with the list of available ops. +4. **worktree resolve** — `params.path` → which registered worktree (cwd for the host CLI spawn). +5. **`refuseIntegrationCall(op, args)`** — runs the op's `refuseCall` pre-flight (e.g. `notion.api`'s GET-only check). Refused → exit 65, before any host process is spawned. +6. **`refuseIfIntegrationDisabled(service, cwd)`** — re-reads the layered config every call (so a flag flip takes effect without bouncing the relay; same approach as `loadAutopauseConfig`). Disabled → exit 65 with a config-hint. Runs **before** any host probe / prompt so a disabled integration is never user-visible as a permission prompt. +7. **`assertIntegrationReady(connector)`** — cached for 60s per `hostBin`. Probes ` ` to make sure the binary exists. Missing → exit 127. Failed version → propagate exit. +8. **Write gating.** For `op.write === true`: + - If `params.hostInitiated` is set, validate it against `HostInitiatedTokens` (scope + params-hash bound). A present-but-invalid token is a hard reject (attack signal — exit 10). + - Otherwise (or for any unbound write) `askPrompt(...)` blocks until the host answers `y` / `n`. Denied → exit 10. + - Read ops skip both gates entirely. +9. **`runHostIntegration`** spawns the host binary in the worktree's `hostMainRepo`, with the connector's `env` merged on top of `process.env` (subject to the namespace guard). Returns the standard `{exitCode, stdout, stderr}` envelope. + +## Read vs write — the Notion op surface + +`packages/integrations/src/connectors/notion.ts` carries the current allowlist. Intentionally minimal — start conservative, widen as real agent flows surface needs. + +| Op | Class | Host argv | Notes | +| ------------- | ----- | ------------------------ | -------------------------------------------------------------------------------------- | +| `whoami` | read | `ntn whoami` | dedicated op so the agent doesn't need to widen the `api` allowlist. | +| `api` | read | `ntn api ` | `GET`-only; `refuseApiNonGet` rejects `-X`/`--method`/`-f`/`-F` (Go pflag-style). | +| `page.create` | write | `ntn pages create ` | gated by `askPrompt`. (User-facing shim form: `ntn pages create …`.) | +| `page.update` | write | `ntn pages update ` | gated; covers archive + props. (User-facing shim form: `ntn pages update …`.) | + +`comment.add` is intentionally absent — `ntn` exposes no top-level `comment` subcommand. The only path is `ntn api v1/comments -X POST -f …`, which the `api` op refuses (GET-only). Comment creation needs a Notion-API-aware payload assembler that maps CLI flags to the structured `POST /v1/comments` body; tracked as a follow-up in [`notion_backlog.md`](./notion_backlog.md). The in-box shim rejects `notion comment add …` with a clear "deferred" message. + +## The enable flag + +`integrations.notion.enabled` (typed config, default **false**) lives in `packages/config/src/types.ts` (`UserConfig`, `EffectiveConfig`, `BUILT_IN_DEFAULTS`, `KEY_REGISTRY`). The config parser/merger/writer were taught to walk three-level nested keys (`branch.subbranch.leaf`) for this, so the YAML reads naturally. Layered the usual way: CLI > workspace > project > global > built-in. + +Toggle per project: + +```bash +agentbox config set --project integrations.notion.enabled true +``` + +Default off so every box ships the shim but it's inert until the user opts in — no surprise box→host calls. + +## In-box surface + +`packages/sandbox-docker/scripts/ntn-shim` is the `gh-shim` pattern: strict subcommand allowlist (`whoami`, `api`, `page`, …) → `exec agentbox-ctl integration notion -- "$@"`. Anything off the allowlist is rejected with a clear message. The same shim is symlinked at `/usr/local/bin/notion` so the agent can type either name. + +Staging follows the canonical pattern (see the `feedback-canonical-dockerfile-box-location` memory): + +- Listed in `contextFiles` + `execBitFiles` in `apps/cli/scripts/stage-runtime.mjs`. +- COPY'd into `Dockerfile.box` next to the `gh-shim` / `git-shim` block. +- Mirrored into `packages/sandbox-hetzner/scripts/install-box.sh`, `packages/sandbox-vercel/scripts/provision.sh`, and `packages/sandbox-e2b/scripts/build-template.sh`, plus the matching `src/runtime-assets.ts` upload lists. Daytona stays shim-less. + +## Doctor + +`agentbox doctor` reports each integration in a dedicated `integrations:` group, driven off `ALL_CONNECTORS` (no hardcoded `'notion'` in the doctor — Linear/Trello will light up automatically when they ship). Per connector: + +- **Disabled** (default, layered config) → `[info] notion disabled (enable with \`agentbox config set --project integrations.notion.enabled true\`)`. `info` is a new status that rolls up like `ok` so a disabled integration never pushes the overall doctor status to "warn". +- **Enabled + binary missing** → `[warn] notion ntn not installed (install ntn: https://developers.notion.com/reference/notion-cli)`. Hint string comes from `connector.detect.installHint`. +- **Enabled + binary present + unauthed** → `[warn] notion not logged in (ntn login)`. Hint from `connector.detect.loginHint`. +- **Enabled + binary present + authed** → `[ ok ] notion ntn version X.Y.Z · authed`. + +**The doctor host probe does NOT set `NOTION_KEYRING=0`.** On the host the user's authed state is exactly their keychain entry; forcing the file-auth path would make `ntn api v1/users/me` look for a non-existent `~/.config/notion/auth.json` and a keychain-authed user would falsely show as "not logged in". The connector's env override applies in-box (where the carried file IS the credential), and the doctor's host probe deliberately skips it. See the comment in `apps/cli/src/lib/doctor-checks.ts:integrationsChecks`. + +The live `ntn` host probe is the orchestrator's post-merge check — it can't be verified inside an AgentBox box because the real `ntn` isn't installed there. The unit test (`apps/cli/test/doctor-integrations.test.ts`) stubs a fake `ntn` on PATH so the four status transitions are exercised in CI. + +## Carry-based file-auth for nested boxes + +For T4 nested-box e2e (box → box, exercise the integration from inside a box), the host's `ntn` auth is carried into the box as a **file**. `agentbox.yaml`'s `carry:` block ships `~/.config/notion/auth.json` (or the equivalent path) into the box, and `NOTION_KEYRING=0` is forced by the connector when the relay shells out, so `ntn` reads the carried file directly. The token still lives only at the leaf hop (the innermost agent's relay invokes the innermost host's `ntn`, which has the file). There is no token in the agent's process env (`printenv | grep -i notion` shows nothing). + +Carry is host→box and one-prompt-approved (see [`features.md`](./features.md) → `carry:`). T4 wires the actual e2e verification. + +## Cross-provider parity + +`integration..` is dispatched identically on docker and cloud because the wire shape is method-agnostic. The cloud path long-polls `/bridge/poll`, runs `executeCloudAction → runIntegrationRpc`, which reuses the exact handler. The Hetzner / Daytona / Vercel / E2B image flows all ship the `ntn` / `notion` shim (see "In-box surface" above). No provider-specific code in the integrations spine. + +## Open follow-ups + +- **Linear / Trello / ClickUp** — see [`integrations_backlog.md`](./integrations_backlog.md). Each is a new descriptor + a small shim; no relay change. ClickUp will be the one custom REST connector (no good CLI on PyPI / npm). +- **`comment.add`** — deferred; needs a Notion-API-aware payload translator that maps CLI flags to the structured `POST /v1/comments` body. +- **Least-privilege tokens** — Notion capability toggles for the host token; Trello supports `scope=read` (when we add it); Linear personal keys inherit full user perms (OAuth-only for read-scope tokens). Document on each service's user-facing page. +- **Host-initiated tokens** — the relay already accepts `params.hostInitiated` and validates it against `HostInitiatedTokens` (scope + params-hash bound). The host-CLI mint path that issues those tokens isn't wired yet for integrations; once it is, a host-typed `agentbox-ctl integration notion page.create …` can skip the prompt by minting a token first (same shape as the existing `gh.pr.*` and `cp.*` host-initiated paths). +- **Nested-box e2e** — T4 in [`notion_backlog.md`](./notion_backlog.md). Verify the carry-based file-auth path against a real Notion workspace. diff --git a/docs/notion_backlog.md b/docs/notion_backlog.md index bedf03c..13aa942 100644 --- a/docs/notion_backlog.md +++ b/docs/notion_backlog.md @@ -92,12 +92,42 @@ Make a box agent able to type `notion …` or `ntn …`. flag args to the structured POST body). Added a `whoami` read op so `ntn whoami` doesn't have to widen the `api` allowlist. -### T3 — `agentbox doctor` detection + docs ⬜ not started -- `agentbox doctor`: report `ntn` presence + auth (`ntn whoami` / `ntn doctor`), - with a friendly install/login hint. -- Docs (same change, per repo rule): new `docs/integrations.md`; a `.mdx` page + - CLI reference under `apps/web/content/docs/`; note new RPC methods in - `docs/host-relay.md`; mention in `docs/features.md`. +### T3 — `agentbox doctor` detection + docs ✅ done +- `agentbox doctor` now reports each integration in a dedicated + `integrations:` group, driven off `ALL_CONNECTORS` (no hardcoded + `'notion'`) so Linear/Trello light up here automatically when they land. + Each row probes ` ` (install check) and + ` ` (login check) and surfaces install/login hints + from new optional `IntegrationConnector.detect.installHint` / + `loginHint` fields (filled for the Notion connector). The doctor + deliberately does NOT force `NOTION_KEYRING=0` — on the host the + keychain entry IS the credential, and the file-auth env override would + make a keychain-authed user falsely show as "not logged in". A new + `info` `CheckStatus` rolls up like `ok` so a disabled-but-configured + integration never pushes the overall doctor status to "warn". Unit + test (`apps/cli/test/doctor-integrations.test.ts`) stubs a fake `ntn` + on PATH and asserts the four transitions: disabled / missing / + unauthed / authed. +- Docs: + - `docs/integrations.md` — new internal design/reference doc + (descriptor model, relay dispatch flow, the read/write Notion op + surface, the enable flag, doctor wiring, the carry-based file-auth + path for nested boxes, open follow-ups). + - `apps/web/content/docs/integrations-notion.mdx` — new user-facing + Fumadocs page (prerequisites, enabling, what works in the box, + security model). Wired into `meta.json` under a new `---Services---` + section. + - `apps/web/content/docs/configuration.mdx` — new `## integrations` + section documenting `integrations.notion.enabled`. + - `apps/web/content/docs/cli.mdx` — `agentbox doctor` sentence + updated to mention the new group. + - `docs/host-relay.md` — new RPC method-family bullet for + `integration..` (parser, allowlist, enable gate, + `refuseCall`, readiness probe, host-initiated token short-circuit, + `askPrompt` for writes, the `_*` env namespace guard). + - `docs/features.md` — Notion integration bullet; the "Additional + `/rpc` methods" line updated to list `gh.pr.*` / + `integration..` already in place. ### T4 — Nested-box e2e verification + carry + closeout ⬜ not started - Carry `ntn` file-auth into a box; from that box create a nested box; run a @@ -123,3 +153,17 @@ Make a box agent able to type `notion …` or `ntn …`. `comment.add`, added `whoami` read op). Comments deferred to a focused follow-up — they need a Notion-API-aware payload translator that maps CLI flags to the structured `POST /v1/comments` body. +- 2026-06-06: T3 shipped — `agentbox doctor` now reports the new + `integrations:` group (registry-driven), with `info` for disabled and + install/login hints sourced from the connector descriptor. + `IntegrationConnector.detect` gained optional `installHint` / + `loginHint` fields (filled for Notion: install URL + `ntn login`). + Unit test stubs a fake `ntn` on PATH and verifies the four status + transitions. Doctor's host probe does NOT set `NOTION_KEYRING=0` (a + comment in the code records why). Public docs site + internal + reference doc landed in the same PR: new `docs/integrations.md`, new + `apps/web/content/docs/integrations-notion.mdx` (Services section in + `meta.json`), config-key + doctor sentence in the published + `configuration.mdx` / `cli.mdx`, new RPC method-family bullet in + `docs/host-relay.md`, Notion entry in `docs/features.md`. T4 (nested- + box e2e + carry-based file-auth verification) is the remaining task. diff --git a/packages/integrations/src/connectors/notion.ts b/packages/integrations/src/connectors/notion.ts index f2af34c..a718f48 100644 --- a/packages/integrations/src/connectors/notion.ts +++ b/packages/integrations/src/connectors/notion.ts @@ -29,6 +29,8 @@ export const notionConnector: IntegrationConnector = { detect: { versionArgs: ['--version'], authArgs: ['api', 'v1/users/me'], + installHint: 'install ntn: https://developers.notion.com/reference/notion-cli', + loginHint: 'ntn login', }, env: { NOTION_KEYRING: '0' }, ops: { diff --git a/packages/integrations/src/types.ts b/packages/integrations/src/types.ts index 8bdf7c3..4724711 100644 --- a/packages/integrations/src/types.ts +++ b/packages/integrations/src/types.ts @@ -49,11 +49,20 @@ export interface IntegrationConnector { /** Host binary the relay execs (resolved on PATH). */ hostBin: string; /** - * How `agentbox doctor` (T3) detects host presence + auth. T1 only - * reads `versionArgs` — for the relay's "binary present?" probe. - * `authArgs` is reserved for the doctor's auth check. + * How `agentbox doctor` detects host presence + auth. The relay's + * `assertIntegrationReady` probe only reads `versionArgs` ("binary + * present?"); `agentbox doctor` additionally runs `authArgs` ("logged + * in?") and surfaces `installHint` / `loginHint` to the user when those + * probes fail. Keeping the hint strings on the descriptor (not in the + * doctor) means each connector is self-describing — when Linear lands + * its own descriptor carries its own install URL with no doctor change. */ - detect: { versionArgs: readonly string[]; authArgs?: readonly string[] }; + detect: { + versionArgs: readonly string[]; + authArgs?: readonly string[]; + installHint?: string; + loginHint?: string; + }; /** * Extra env vars the relay forces when spawning the host CLI. For Notion * this is `NOTION_KEYRING=0` so `ntn` reads file-based auth on Linux diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index ae624cd..eb5c194 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -82,6 +82,9 @@ importers: '@agentbox/ctl': specifier: workspace:* version: link:../../packages/ctl + '@agentbox/integrations': + specifier: workspace:* + version: link:../../packages/integrations '@agentbox/relay': specifier: workspace:* version: link:../../packages/relay