diff --git a/agentbox.yaml b/agentbox.yaml index cf790ce0..489c9539 100644 --- a/agentbox.yaml +++ b/agentbox.yaml @@ -101,6 +101,42 @@ carry: dest: ~/.local/share/opencode/auth.json mode: 0o600 optional: true + # Notion CLI (`ntn`) file-based auth — INTERNAL AGENTBOX DEV ONLY, so a box + # that creates NESTED boxes can act as their relay host and shell out to `ntn`. + # Normal boxes never need this: they reach `ntn` through the host relay and + # carry no token. macOS `ntn` keeps its token in the keychain (uncarryable); + # the box is Linux, so for the nested path bootstrap the host once with + # `NOTION_KEYRING=0 ntn login` (writes ~/.config/notion/auth.json) and see + # docs/development.md for how the in-box `ntn` reads it (the connector no + # longer forces NOTION_KEYRING=0). All optional: a host without the file-auth + # still gets a working box (top-level boxes test through the host's own `ntn`). + - src: ~/.config/notion/auth.json + dest: ~/.config/notion/auth.json + mode: 0o600 + optional: true + - src: ~/.config/notion/config.json + dest: ~/.config/notion/config.json + mode: 0o600 + optional: true + - src: ~/.config/notion/workspaces.json + dest: ~/.config/notion/workspaces.json + mode: 0o600 + optional: true + # Linear CLI (`@schpet/linear-cli`, the `linear` binary) auth, so a box that + # creates NESTED boxes can act as their relay host and shell out to `linear` + # for the integration's e2e test. The CLI stores a plaintext API token at + # ~/.config/linear/credentials.toml by default (keyring migration is opt-in + # and not used here), so the file carries directly — no keyring env toggle + # needed (unlike `ntn`). All optional: a host without the file still gets a + # working box (top-level boxes test through the host's own authed `linear`). + - src: ~/.config/linear/credentials.toml + dest: ~/.config/linear/credentials.toml + mode: 0o600 + optional: true + - src: ~/.config/linear/linear.toml + dest: ~/.config/linear/linear.toml + mode: 0o600 + optional: true # Per-provider base-snapshot pointers. With these, `agentbox prepare` # inside the box can skip-fast (existing snapshot detected) and just # exercise the post-prepare config write + migration — no bake needed. diff --git a/apps/cli/package.json b/apps/cli/package.json index 4d14a156..d35ea1bf 100644 --- a/apps/cli/package.json +++ b/apps/cli/package.json @@ -70,6 +70,7 @@ "@agentbox/config": "workspace:*", "@agentbox/core": "workspace:*", "@agentbox/ctl": "workspace:*", + "@agentbox/integrations": "workspace:*", "@agentbox/relay": "workspace:*", "@agentbox/sandbox-cloud": "workspace:*", "@agentbox/sandbox-core": "workspace:*", diff --git a/apps/cli/scripts/stage-runtime.mjs b/apps/cli/scripts/stage-runtime.mjs index 70e15a80..84800793 100644 --- a/apps/cli/scripts/stage-runtime.mjs +++ b/apps/cli/scripts/stage-runtime.mjs @@ -43,6 +43,8 @@ const execBitFiles = new Set([ 'packages/sandbox-docker/scripts/agentbox-open', 'packages/sandbox-docker/scripts/gh-shim', 'packages/sandbox-docker/scripts/git-shim', + 'packages/sandbox-docker/scripts/ntn-shim', + 'packages/sandbox-docker/scripts/linear-shim', 'packages/sandbox-docker/scripts/chromium-resolver', ]); const contextFiles = [ @@ -54,6 +56,8 @@ const contextFiles = [ 'packages/sandbox-docker/scripts/agentbox-open', 'packages/sandbox-docker/scripts/gh-shim', 'packages/sandbox-docker/scripts/git-shim', + 'packages/sandbox-docker/scripts/ntn-shim', + 'packages/sandbox-docker/scripts/linear-shim', 'packages/sandbox-docker/scripts/chromium-resolver', 'packages/sandbox-docker/scripts/custom-system-CLAUDE.md', 'packages/sandbox-docker/scripts/claude-managed-settings.json', @@ -98,6 +102,8 @@ const hetznerFiles = [ ['packages/sandbox-docker/scripts/agentbox-open', 'agentbox-open', true], ['packages/sandbox-docker/scripts/gh-shim', 'gh-shim', true], ['packages/sandbox-docker/scripts/git-shim', 'git-shim', true], + ['packages/sandbox-docker/scripts/ntn-shim', 'ntn-shim', true], + ['packages/sandbox-docker/scripts/linear-shim', 'linear-shim', true], ['packages/sandbox-hetzner/scripts/custom-system-CLAUDE.md', 'custom-system-CLAUDE.md', false], ['packages/sandbox-docker/scripts/claude-managed-settings.json', 'claude-managed-settings.json', false], ['packages/sandbox-docker/scripts/agentbox-codex-hooks.json', 'agentbox-codex-hooks.json', false], @@ -134,6 +140,8 @@ const vercelFiles = [ ['packages/sandbox-docker/scripts/agentbox-open', 'agentbox-open', true], ['packages/sandbox-docker/scripts/gh-shim', 'gh-shim', true], ['packages/sandbox-docker/scripts/git-shim', 'git-shim', true], + ['packages/sandbox-docker/scripts/ntn-shim', 'ntn-shim', true], + ['packages/sandbox-docker/scripts/linear-shim', 'linear-shim', true], ['packages/sandbox-vercel/scripts/custom-system-CLAUDE.md', 'custom-system-CLAUDE.md', false], ['packages/sandbox-docker/scripts/claude-managed-settings.json', 'claude-managed-settings.json', false], ['packages/sandbox-docker/scripts/agentbox-codex-hooks.json', 'agentbox-codex-hooks.json', false], @@ -159,6 +167,8 @@ const e2bFiles = [ ['packages/sandbox-docker/scripts/agentbox-open', 'agentbox-open', true], ['packages/sandbox-docker/scripts/gh-shim', 'gh-shim', true], ['packages/sandbox-docker/scripts/git-shim', 'git-shim', true], + ['packages/sandbox-docker/scripts/ntn-shim', 'ntn-shim', true], + ['packages/sandbox-docker/scripts/linear-shim', 'linear-shim', true], ['packages/sandbox-e2b/scripts/custom-system-CLAUDE.md', 'custom-system-CLAUDE.md', false], ['packages/sandbox-docker/scripts/claude-managed-settings.json', 'claude-managed-settings.json', false], ['packages/sandbox-docker/scripts/agentbox-codex-hooks.json', 'agentbox-codex-hooks.json', false], diff --git a/apps/cli/src/commands/config.ts b/apps/cli/src/commands/config.ts index b1772f8c..520449f7 100644 --- a/apps/cli/src/commands/config.ts +++ b/apps/cli/src/commands/config.ts @@ -69,22 +69,27 @@ function fail(message: string): never { process.exit(1); } +// Walk every dot-segment, not just the first one — `integrations.notion.enabled` +// must reach the deepest leaf, not be split as `integrations` + `notion.enabled`. +// Same shape as `readLeaf` in packages/config/src/load.ts. +function walkKey(obj: Record | undefined, key: string): unknown { + let cur: unknown = obj; + for (const seg of key.split('.')) { + if (cur === undefined || cur === null || typeof cur !== 'object') return undefined; + cur = (cur as Record)[seg]; + } + return cur; +} + function leafValue(loaded: LoadedConfig, key: string): unknown { - const idx = key.indexOf('.'); - const branch = key.slice(0, idx); - const leaf = key.slice(idx + 1); - return (loaded.effective as unknown as Record>)[branch]?.[leaf]; + return walkKey(loaded.effective as unknown as Record, key); } function rawLeafFromValues( values: Record | undefined, key: string, ): unknown { - if (!values) return undefined; - const idx = key.indexOf('.'); - const b = (values as Record)[key.slice(0, idx)]; - if (!b || typeof b !== 'object') return undefined; - return (b as Record)[key.slice(idx + 1)]; + return walkKey(values, key); } function describeSource(source: ConfigSource, loaded: LoadedConfig): string { diff --git a/apps/cli/src/commands/doctor.ts b/apps/cli/src/commands/doctor.ts index 6581889e..24a80b9f 100644 --- a/apps/cli/src/commands/doctor.ts +++ b/apps/cli/src/commands/doctor.ts @@ -11,6 +11,7 @@ import { Command } from 'commander'; import { formatDetailed, + integrationsChecks, runAllChecks, runProviderChecks, runSystemChecks, @@ -42,9 +43,20 @@ export const doctorCommand = new Command('doctor') ); process.exit(1); } + // Integrations are host-side (not provider-side), but a user running + // `doctor -p hetzner` still wants to know whether their Notion is + // installed/authed/enabled — otherwise the only way to see the + // integrations group is the unscoped doctor, which is a discoverability + // gap. Include it alongside system + the scoped provider. + const [sys, prov, integrations] = await Promise.all([ + runSystemChecks(), + runProviderChecks(name as ProviderName), + integrationsChecks(), + ]); groups = [ - { title: 'system', results: await runSystemChecks() }, - await runProviderChecks(name as ProviderName), + { title: 'system', results: sys }, + prov, + { title: 'integrations', results: integrations }, ]; } else { groups = await runAllChecks(); diff --git a/apps/cli/src/lib/doctor-checks.ts b/apps/cli/src/lib/doctor-checks.ts index 1c3b19f9..6ecb3cb8 100644 --- a/apps/cli/src/lib/doctor-checks.ts +++ b/apps/cli/src/lib/doctor-checks.ts @@ -10,8 +10,16 @@ import { accessSync, constants as fsConstants, mkdirSync } from 'node:fs'; import { homedir } from 'node:os'; import { join } from 'node:path'; import { execa } from 'execa'; +import { loadEffectiveConfig } from '@agentbox/config'; +import { ALL_CONNECTORS, type IntegrationConnector } from '@agentbox/integrations'; -export type CheckStatus = 'ok' | 'warn' | 'fail'; +/** + * `info` is for rows that are intentionally inert (e.g. an integration the + * user hasn't enabled). It surfaces as a distinct glyph but rolls up like + * `ok` so it never pushes the overall doctor status to "warn" — disabling + * Notion is a setting, not a problem. + */ +export type CheckStatus = 'ok' | 'info' | 'warn' | 'fail'; export interface CheckResult { label: string; @@ -373,6 +381,133 @@ async function e2bChecks(): Promise { } } +/** + * Probe a binary, treating ENOENT (missing on PATH) as a distinct outcome + * from a non-zero exit. `execa({reject:false})` returns a result envelope + * even on spawn failure — `{ failed: true, code: 'ENOENT', exitCode: undefined }` + * — rather than throwing. We map that to `missing: true` so the integration + * check has a single, easy-to-read branch. Wrapped in try/catch in case a + * future execa release reverts to throwing on spawn errors. + */ +async function probeIntegrationBin( + bin: string, + args: readonly string[], +): Promise<{ exitCode: number; stdout: string; stderr: string; missing: boolean }> { + try { + const r = await execa(bin, [...args], { reject: false }); + const code = (r as { code?: string }).code; + if (code === 'ENOENT') { + return { exitCode: 127, stdout: '', stderr: r.stderr ?? '', missing: true }; + } + return { + exitCode: r.exitCode ?? 1, + stdout: typeof r.stdout === 'string' ? r.stdout : '', + stderr: typeof r.stderr === 'string' ? r.stderr : '', + missing: false, + }; + } catch (err) { + const code = (err as NodeJS.ErrnoException).code; + return { + exitCode: code === 'ENOENT' ? 127 : 1, + stdout: '', + stderr: errSummary(err), + missing: code === 'ENOENT', + }; + } +} + +/** Shape `loadEffectiveConfig` returns; only the integrations slice matters here. */ +type IntegrationsConfigSlice = { + effective: { integrations?: Record }; +}; + +export type IntegrationsConfigLoader = (cwd: string) => Promise; + +/** + * Per-connector host-side detection: is each `integrations..enabled` + * flipped on, is the host CLI installed, and is the user logged in. Driven + * off `ALL_CONNECTORS` so Linear/Trello light up here automatically when + * they ship — no doctor change needed. + * + * `loader` is injectable for unit tests (mirrors `refuseIfIntegrationDisabled`'s + * approach). The default reads layered config from `cwd`, so toggling the + * flag via `agentbox config set` takes effect on the next doctor run with + * no caching. + * + * The auth probe runs each connector's CLI with no forced env, exactly as the + * relay does — so a host's real authed state (e.g. the macOS keychain after + * `ntn login`) is what's reported, and doctor can't show "authed" for a path + * the relay wouldn't actually use. + */ +export async function integrationsChecks( + loader: IntegrationsConfigLoader = loadEffectiveConfig, +): Promise { + let cfg: IntegrationsConfigSlice; + try { + cfg = await loader(process.cwd()); + } catch { + cfg = { effective: {} }; + } + // Parallel: each connector's two probes (version + auth) are independent + // across connectors. With Linear / Trello / ClickUp queued, the serial + // walk would scale linearly; Promise.all keeps doctor latency flat. + return Promise.all( + ALL_CONNECTORS.map((connector) => checkOneIntegration(connector, cfg.effective.integrations)), + ); +} + +async function checkOneIntegration( + connector: IntegrationConnector, + integrations: Record | undefined, +): Promise { + const svc = connector.service; + const enabled = integrations?.[svc]?.enabled === true; + if (!enabled) { + return { + label: svc, + status: 'info', + detail: 'disabled', + hint: `enable with \`agentbox config set --project integrations.${svc}.enabled true\``, + }; + } + + const version = await probeIntegrationBin(connector.hostBin, connector.detect.versionArgs); + if (version.missing || version.exitCode === 127) { + return { + label: svc, + status: 'warn', + detail: `${connector.hostBin} not installed`, + hint: + connector.detect.installHint ?? + `install the ${svc} CLI (\`${connector.hostBin}\`) on the host`, + }; + } + if (version.exitCode !== 0) { + const tail = firstLine((version.stderr || version.stdout).trim()); + return { + label: svc, + status: 'warn', + detail: `${connector.hostBin} ${connector.detect.versionArgs.join(' ')} failed${tail ? `: ${tail}` : ''}`, + }; + } + const versionLine = firstLine((version.stdout || version.stderr).trim()) || connector.hostBin; + + if (!connector.detect.authArgs || connector.detect.authArgs.length === 0) { + return { label: svc, status: 'ok', detail: versionLine }; + } + + const auth = await probeIntegrationBin(connector.hostBin, connector.detect.authArgs); + if (auth.exitCode !== 0) { + return { + label: svc, + status: 'warn', + detail: 'not logged in', + hint: connector.detect.loginHint ?? `run \`${connector.hostBin} login\``, + }; + } + return { label: svc, status: 'ok', detail: `${versionLine} · authed` }; +} + export async function runProviderChecks(name: ProviderName): Promise { let results: CheckResult[]; switch (name) { @@ -398,7 +533,8 @@ export async function runProviderChecks(name: ProviderName): Promise export async function runAllChecks(): Promise { const sys: CheckGroup = { title: 'system', results: await runSystemChecks() }; const providerGroups = await Promise.all(ALL_PROVIDERS.map((n) => runProviderChecks(n))); - return [sys, ...providerGroups]; + const integrations: CheckGroup = { title: 'integrations', results: await integrationsChecks() }; + return [sys, ...providerGroups, integrations]; } function worstInResults(results: CheckResult[]): CheckStatus { @@ -406,6 +542,8 @@ function worstInResults(results: CheckResult[]): CheckStatus { for (const r of results) { if (r.status === 'fail') return 'fail'; if (r.status === 'warn') worst = 'warn'; + // `info` rolls up like `ok` — intentionally inert rows shouldn't flip + // the overall doctor status. } return worst; } @@ -427,6 +565,14 @@ function summaryToken(group: CheckGroup): string { if (worst === 'warn') return 'system warn'; return 'system ok'; } + if (group.title === 'integrations') { + if (worst === 'fail') return 'integrations FAIL'; + if (worst === 'warn') return 'integrations check'; + // All rows ok or info (disabled) — render as "off" when every row is + // info, else "ready" when at least one is enabled and green. + const anyEnabled = group.results.some((r) => r.status === 'ok'); + return anyEnabled ? 'integrations ready' : 'integrations off'; + } if (worst === 'fail') return `${group.title} FAIL`; if (worst === 'warn') { // Distinguish "not configured" (warn on credentials) from other warns. @@ -441,13 +587,14 @@ function summaryToken(group: CheckGroup): string { const C_GREEN = '\x1b[32m'; const C_YELLOW = '\x1b[33m'; const C_RED = '\x1b[31m'; +const C_DIM = '\x1b[2m'; const C_RESET = '\x1b[0m'; const COLOR = !process.env.NO_COLOR; // install requires a TTY anyway; honor NO_COLOR for piped output function statusMarker(s: CheckStatus): string { - const glyph = s === 'ok' ? '✓' : s === 'warn' ? '⚠' : '✗'; + const glyph = s === 'ok' ? '✓' : s === 'info' ? '·' : s === 'warn' ? '⚠' : '✗'; if (!COLOR) return glyph; - const color = s === 'ok' ? C_GREEN : s === 'warn' ? C_YELLOW : C_RED; + const color = s === 'ok' ? C_GREEN : s === 'info' ? C_DIM : s === 'warn' ? C_YELLOW : C_RED; return `${color}${glyph}${C_RESET}`; } @@ -464,6 +611,7 @@ function pad(s: string, width: number): string { function statusBadge(s: CheckStatus): string { if (s === 'ok') return '[ ok ]'; + if (s === 'info') return '[info]'; if (s === 'warn') return '[warn]'; return '[FAIL]'; } diff --git a/apps/cli/test/config-get-nested.test.ts b/apps/cli/test/config-get-nested.test.ts new file mode 100644 index 00000000..13caace4 --- /dev/null +++ b/apps/cli/test/config-get-nested.test.ts @@ -0,0 +1,105 @@ +import { mkdtemp, rm, realpath } from 'node:fs/promises'; +import { homedir, tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { setConfigValue } from '@agentbox/config'; + +// Regression guard: `agentbox config get integrations.notion.enabled` must +// return the deeply-nested boolean, not ``. The first iteration of the +// `leafValue`/`rawLeafFromValues` helpers in config.ts split on the FIRST dot +// only, so the 3-level key `integrations.notion.enabled` resolved to +// `effective.integrations["notion.enabled"]` (undefined), even though +// `config set` and `loadEffectiveConfig` correctly walked the full path. + +let tmpCwd: string; +let prevCwd: string; + +beforeEach(async () => { + // realpath so the hash matches what setConfigValue → findProjectRoot computes. + tmpCwd = await realpath(await mkdtemp(join(tmpdir(), 'agentbox-cfg-get-'))); + prevCwd = process.cwd(); + process.chdir(tmpCwd); + // Commander caches parsed options on the singleton `configCommand`; reset + // module state so each parseAsync starts from a clean slate (otherwise + // `--json`/`--all` from a prior test leak into the next). + vi.resetModules(); +}); + +afterEach(async () => { + process.chdir(prevCwd); + await rm(tmpCwd, { recursive: true, force: true }); + // setConfigValue writes under ~/.agentbox/projects//; clear it like + // set-unset-roundtrip.test.ts does (STATE_DIR is captured at module load + // from homedir(), so we can't redirect it per-test). + await rm(join(homedir(), '.agentbox'), { recursive: true, force: true }); +}); + +async function runConfigGet(args: string[]): Promise<{ stdout: string; stderr: string }> { + let stdout = ''; + let stderr = ''; + const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation((chunk: unknown) => { + stdout += typeof chunk === 'string' ? chunk : String(chunk); + return true; + }); + const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((chunk: unknown) => { + stderr += typeof chunk === 'string' ? chunk : String(chunk); + return true; + }); + try { + // Dynamic import after vi.resetModules() so the `getCommand`'s + // commander state is fresh per test. + const { configCommand } = await import('../src/commands/config.js'); + await configCommand.parseAsync(['node', 'agentbox-config', ...args]); + } finally { + stdoutSpy.mockRestore(); + stderrSpy.mockRestore(); + } + return { stdout, stderr }; +} + +describe('config get on a nested 3-level key', () => { + it('returns the leaf value, not ', async () => { + await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, { + raw: true, + }); + const { stdout } = await runConfigGet(['get', 'integrations.notion.enabled']); + expect(stdout).toContain('integrations.notion.enabled = true'); + expect(stdout).toMatch(/from: project /); + expect(stdout).not.toContain(''); + }); + + it('--json carries the value and source', async () => { + await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, { + raw: true, + }); + const { stdout } = await runConfigGet([ + 'get', + 'integrations.notion.enabled', + '--json', + ]); + const parsed = JSON.parse(stdout) as { key: string; value: unknown; source: string }; + expect(parsed.key).toBe('integrations.notion.enabled'); + expect(parsed.value).toBe(true); + expect(parsed.source).toBe('project'); + }); + + it('--all walks every layer (no silent for the project layer)', async () => { + await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, { + raw: true, + }); + const { stdout } = await runConfigGet([ + 'get', + 'integrations.notion.enabled', + '--all', + ]); + expect(stdout).toMatch(/effective: true /); + expect(stdout).toMatch(/project:\s+true /); + expect(stdout).toMatch(/default:\s+false/); + }); + + it('unset key falls back to the built-in default (false)', async () => { + const { stdout } = await runConfigGet(['get', 'integrations.notion.enabled']); + expect(stdout).toContain('integrations.notion.enabled = false'); + expect(stdout).toMatch(/from: built-in default/); + }); +}); diff --git a/apps/cli/test/doctor-integrations.test.ts b/apps/cli/test/doctor-integrations.test.ts new file mode 100644 index 00000000..79f65286 --- /dev/null +++ b/apps/cli/test/doctor-integrations.test.ts @@ -0,0 +1,136 @@ +/** + * Unit tests for the `integrations:` group in `agentbox doctor`. + * + * The real `ntn` lives only on the host (this box can't install it), so the + * test stages a tiny shell script named `ntn` on a private PATH and asserts + * the four meaningful transitions: disabled → info, enabled+missing → warn, + * enabled+present-but-unauthed → warn (with the login hint), enabled+ok → ok. + * + * Config is injected via the `IntegrationsConfigLoader` parameter rather than + * touched on disk — same pattern `refuseIfIntegrationDisabled` uses in the + * relay, so the test stays pure (no `~/.agentbox` touch). + */ + +import { chmod, mkdtemp, rm, writeFile } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import { + integrationsChecks, + type IntegrationsConfigLoader, +} from '../src/lib/doctor-checks.js'; + +const NTN_SCRIPT = `#!/usr/bin/env bash +case "$1" in + --version) + echo "ntn version 0.42.0" + exit 0 ;; + api) + if [ "$NTN_TEST_AUTH" = "ok" ]; then + echo '{"object":"user","id":"stub"}' + exit 0 + fi + echo "Error: not logged in. Run 'ntn login' to authenticate." >&2 + exit 1 ;; + *) + echo "stub: unknown subcommand $1" >&2 + exit 2 ;; +esac +`; + +const enabled: IntegrationsConfigLoader = () => + Promise.resolve({ effective: { integrations: { notion: { enabled: true } } } }); +const disabled: IntegrationsConfigLoader = () => Promise.resolve({ effective: {} }); + +describe('doctor — integrations group', () => { + let stubDir: string; + let originalPath: string | undefined; + let originalAuth: string | undefined; + + beforeEach(async () => { + stubDir = await mkdtemp(join(tmpdir(), 'agentbox-doctor-int-')); + originalPath = process.env.PATH; + originalAuth = process.env.NTN_TEST_AUTH; + }); + + afterEach(async () => { + if (originalPath === undefined) delete process.env.PATH; + else process.env.PATH = originalPath; + if (originalAuth === undefined) delete process.env.NTN_TEST_AUTH; + else process.env.NTN_TEST_AUTH = originalAuth; + await rm(stubDir, { recursive: true, force: true }); + }); + + async function stageStub(): Promise { + const ntn = join(stubDir, 'ntn'); + await writeFile(ntn, NTN_SCRIPT, 'utf8'); + await chmod(ntn, 0o755); + // Prepend the stub dir so our fake `ntn` wins over any real one, but + // keep the original PATH so the script's `#!/usr/bin/env bash` shebang + // can still resolve `bash` (env in /usr/bin uses the child's PATH). + process.env.PATH = `${stubDir}:${originalPath ?? ''}`; + } + + function emptyPath(): void { + // Only the empty stub dir — execa(`ntn`) gets ENOENT directly (no + // shebang interpretation needed for a missing binary). + process.env.PATH = stubDir; + } + + it('renders info / "disabled" when the flag is off (default)', async () => { + emptyPath(); + const results = await integrationsChecks(disabled); + // One row per registered connector (notion, linear, …). All should + // surface as `info`/disabled when no flag has been flipped — disabling + // an integration is a setting, not a problem. + expect(results.length).toBeGreaterThanOrEqual(2); + for (const row of results) { + expect(row.status).toBe('info'); + expect(row.detail).toBe('disabled'); + expect(row.hint).toContain(`integrations.${row.label}.enabled true`); + } + const notion = results.find((r) => r.label === 'notion'); + expect(notion).toBeDefined(); + const linear = results.find((r) => r.label === 'linear'); + expect(linear).toBeDefined(); + }); + + it('renders warn / "not installed" when enabled but ntn is missing', async () => { + emptyPath(); + const results = await integrationsChecks(enabled); + const row = results.find((r) => r.label === 'notion')!; + expect(row.status).toBe('warn'); + expect(row.detail).toMatch(/not installed/); + expect(row.hint).toMatch(/install ntn/); + }); + + it('renders warn / "not logged in" when ntn is present but unauthed', async () => { + await stageStub(); + delete process.env.NTN_TEST_AUTH; + const results = await integrationsChecks(enabled); + const row = results.find((r) => r.label === 'notion')!; + expect(row.status).toBe('warn'); + expect(row.detail).toBe('not logged in'); + expect(row.hint).toBe('ntn login'); + }); + + it('renders ok with the version line when ntn is present and authed', async () => { + await stageStub(); + process.env.NTN_TEST_AUTH = 'ok'; + const results = await integrationsChecks(enabled); + const row = results.find((r) => r.label === 'notion')!; + expect(row.status).toBe('ok'); + expect(row.detail).toContain('ntn version 0.42.0'); + expect(row.detail).toContain('authed'); + }); + + it('fails closed (no throw) when the config loader rejects', async () => { + emptyPath(); + const broken: IntegrationsConfigLoader = () => + Promise.reject(new Error('malformed yaml')); + const results = await integrationsChecks(broken); + // Every row falls back to disabled (info), regardless of which connectors + // are registered — a broken config is treated as "not enabled". + for (const row of results) expect(row.status).toBe('info'); + }); +}); diff --git a/apps/web/content/docs/cli.mdx b/apps/web/content/docs/cli.mdx index 07f4e5cf..e4c8975f 100644 --- a/apps/web/content/docs/cli.mdx +++ b/apps/web/content/docs/cli.mdx @@ -259,7 +259,7 @@ agentbox prepare -p hetzner agentbox prepare -p docker --build ``` -`install` is the first-run setup wizard (system check, pick a provider, log in, prepare its base image, install the host skill). `install cmux` pins a live `agentbox list` panel (all your boxes) to the [cmux](https://cmux.com) sidebar dock — see [cmux integration](/docs/integrations-cmux#the-agentbox-dock-right-sidebar). `doctor` diagnoses system and provider readiness. `prepare` builds base images or snapshots — omit `--provider` for status only. +`install` is the first-run setup wizard (system check, pick a provider, log in, prepare its base image, install the host skill). `install cmux` pins a live `agentbox list` panel (all your boxes) to the [cmux](https://cmux.com) sidebar dock — see [cmux integration](/docs/integrations-cmux#the-agentbox-dock-right-sidebar). `doctor` diagnoses system and provider readiness — and reports each service integration ([Notion](/docs/integrations-notion), [Linear](/docs/integrations-linear)): host CLI installed? authed? enabled per project? `prepare` builds base images or snapshots — omit `--provider` for status only. `agentbox config get --all` shows which layer each value comes from. See the full key reference in [Configuration](/docs/configuration). diff --git a/apps/web/content/docs/configuration.mdx b/apps/web/content/docs/configuration.mdx index 2f600abf..1cb1a736 100644 --- a/apps/web/content/docs/configuration.mdx +++ b/apps/web/content/docs/configuration.mdx @@ -230,6 +230,22 @@ See [access your box](/docs/access-your-box). See [browser and screen](/docs/browser-and-screen). +## integrations + +Per-service toggles for relay-gated service integrations. Each integration is **disabled by default** — even when the host CLI is installed and authed, the box can't call out until you flip it on. The box never holds the service's token; reads pass through, writes prompt on the host. See [Notion](/docs/integrations-notion) and [Linear](/docs/integrations-linear). + +| Key | Type | Default | Meaning | +| --- | --- | --- | --- | +| `integrations.notion.enabled` | bool | `false` | proxy `ntn` calls from the box through the host relay; reads pass, writes prompt | +| `integrations.linear.enabled` | bool | `false` | proxy `linear` calls (`@schpet/linear-cli`) from the box through the host relay; reads pass, writes prompt; `auth token` is hard-rejected | + +```bash +agentbox config set --project integrations.notion.enabled true +agentbox config set --project integrations.linear.enabled true +``` + +`agentbox doctor` reports a row per integration in a dedicated `integrations:` group: disabled (default), `ntn not installed`, `not logged in`, or `authed` — with a one-line hint for each non-`ok` state. + ## queue & autopause `queue.*` schedules background `-i` jobs; `autopause.*` pauses idle boxes. diff --git a/apps/web/content/docs/integrations-linear.mdx b/apps/web/content/docs/integrations-linear.mdx new file mode 100644 index 00000000..2ff81e4f --- /dev/null +++ b/apps/web/content/docs/integrations-linear.mdx @@ -0,0 +1,105 @@ +--- +title: Linear +description: Let your box read and write Linear issues through the host's authenticated linear CLI — your API token never enters the box +--- + +AgentBox can proxy Linear calls from inside a box to the host's authenticated `linear` CLI (`@schpet/linear-cli`). The box agent can list and view issues, run filtered queries, and (with your approval for each write) create or update issues and post comments — without your Linear API token ever entering the box. Same model as `agentbox-ctl git push`, `agentbox-ctl git pr create`, and the [Notion integration](/docs/integrations-notion). + + +The box runs a tiny `linear` shim. Calls go through `agentbox-ctl integration linear ` to the **host relay**, which runs the host's real `linear` and ships the result back. Reads pass straight through. Writes raise a one-line confirm in your terminal first. + + +## Prerequisites + +The integration wraps [`@schpet/linear-cli`](https://github.com/schpet/linear-cli) (the `linear` binary, v2). Install it on the **host** (not in the box): + +```bash +npm install -g @schpet/linear-cli +linear auth login # opens the browser, stores auth in ~/.config/linear/credentials.toml +``` + +Then verify with `agentbox doctor`: + +```text +integrations: + [info] linear disabled (enable with `agentbox config set --project integrations.linear.enabled true`) +``` + +The integration is **off by default**, so even with `linear` installed the box can't call it until you opt in. Doctor's `info` line confirms `linear` is detected; flip the flag to graduate it to a usable state. + +## Enable it for this project + +```bash +agentbox config set --project integrations.linear.enabled true +``` + +`--project` scopes it to the current project (config file under `~/.agentbox/projects//`). Drop `--project` for global. Run `agentbox doctor` again — the row should now read: + +```text +integrations: + [ ok ] linear linear/2.0.0 (…) · authed +``` + +If you see `[warn] not logged in`, run `linear auth login` on the host. If you see `[warn] linear not installed`, the host install didn't put `linear` on `PATH`. + +## What works inside the box + +The in-box shim exposes a strict allowlist. Anything outside the list — including `linear auth token`, `linear issue delete`, and any of `project` / `cycle` / `milestone` / `label` / `document` / `schema` — is rejected with a clear message. + +| In-box command | Class | What happens | +| --- | --- | --- | +| `linear whoami` (or `linear auth whoami`) | read | Passes through; prints the authed host user. | +| `linear issue list` | read | Lists issues for the authed user/team; pass through filters with `--`. | +| `linear issue mine` | read | v2-native "issues assigned to me" (replaces the older `issue list --me`). | +| `linear issue view ` | read | Detail view for one issue. | +| `linear issue query …` | read | Structured filter query. | +| `linear team list` | read | Lists teams. | +| `linear api ''` | read | GraphQL query passthrough — `mutation` / `subscription` are **refused** (exit 65). `--variable key=@` is also refused (exfiltration vector). | +| `linear issue create …` | write | **Prompts** the host for approval; on `y` the host runs `linear issue create` and ships back the result. | +| `linear issue update …` | write | **Prompts** the host for approval; covers status/title/etc. | +| `linear issue comment add …` | write | **Prompts** the host for approval; posts a comment. (Upstream uses `add`, not `create`.) | + +Reads are unprompted; every write raises a one-line confirm in your attached terminal (or in `agentbox agent approvals` for orchestrators driving boxes headlessly — see [Background & parallel](/docs/background-and-parallel)). + +```bash +# Inside the box — these all flow through the host relay: +linear whoami +linear issue mine +linear issue view ABC-42 +linear api '{ teams { id name } }' # GraphQL query, passes +linear api --paginate '{ teams { id } }' # pre-positional flags work +linear issue create --title "Draft from the box" # prompts on the host +linear issue comment add ABC-42 --body "from box" # prompts on the host (v2 uses `add`) +``` + + +`linear auth token` PRINTS the raw API token to stdout. The shim explicitly refuses it with `'auth token' leaks the raw API key — refused. Use 'linear whoami' for identity.`, and the connector exposes no op that maps to it. The whole point of the integration is to keep the token on the host — proxying `auth token` would defeat that. Same for `auth login` / `auth logout` / `auth migrate` / `auth default` (host owns auth state). + + +## Security model + +| Concern | What AgentBox does | +| --- | --- | +| Where the Linear token lives | **Host only** — in `~/.config/linear/credentials.toml`. The box has no access to it. (The carry block ships the file into nested-box relay hosts only — never to the agent's process env.) | +| What the box can do unprompted | **Reads only** (`whoami`, `issue list/view/query`, `team list`, `api` GraphQL **queries**). | +| What needs your approval | **Every write** (`issue.create`, `issue.update`, `issue.comment`), and **any `mutation` / `subscription` GraphQL operation** through `api` is refused outright with exit 65. | +| Where the approval lives | The host relay raises a confirm prompt; you answer in the attached terminal (`y` / `n`) or via `agentbox agent approve ` from an orchestrator. | +| Inside the box, does the agent ever see the token? | **No.** `printenv \| grep -i linear` inside a box returns nothing — only `AGENTBOX_RELAY_TOKEN`, which only authenticates to the box-local relay endpoint. | +| Destructive ops | `issue delete` / `team delete` / `team create` are **off the allowlist**. Start conservative; widen deliberately if a real flow needs them. | +| `auth token` | **Hard-rejected by the shim**, with no connector op exposing it. Three defenses (shim, connector allowlist, relay dispatch), all in series. | +| Auditability | Every approved write is logged as a relay event (visible via `/admin/events`, `agentbox agent`, the dashboard). | + +The integration is **off by default** for every new project. You flip it on per project once you've installed and authed `linear` on the host. + + +The box is the untrusted side. Tokens in the box would survive `agentbox download`, leak into commits if the agent mishandles them, and undermine the entire sandbox premise. Keeping the token on the host and putting the gate at the host boundary is the same model AgentBox uses for `git push`, `gh pr create`, and the [Notion integration](/docs/integrations-notion) — one model, audited in one place. + + +## Limitations and roadmap + +- **GraphQL query-only `api` passthrough.** `mutation` / `subscription` are refused; use the dedicated `issue.*` write ops instead. This guards against an agent slipping a write past the read classification. +- **No destructive ops.** `issue delete` / `team delete` / `team create` are off-list by default. +- **Allowlist starts conservative.** As real agent flows surface needs, the op set will widen — file an issue with the failing call if something's missing. +- **Trello / ClickUp** are still on the integrations roadmap; their connectors will appear in `agentbox doctor` the same way once they ship. + +See also [CLI commands](/docs/cli) for `agentbox doctor`, [Configuration](/docs/configuration) for the `integrations.linear.enabled` flag, and [Background & parallel](/docs/background-and-parallel) for the host-action approval surface. diff --git a/apps/web/content/docs/integrations-notion.mdx b/apps/web/content/docs/integrations-notion.mdx new file mode 100644 index 00000000..9b580e6c --- /dev/null +++ b/apps/web/content/docs/integrations-notion.mdx @@ -0,0 +1,97 @@ +--- +title: Notion +description: Let your box read and write Notion pages through the host's authenticated ntn CLI — your token never enters the box +--- + +AgentBox can proxy Notion calls from inside a box to the host's authenticated `ntn` CLI. The box agent can search workspaces, read pages, and (with your approval for each write) create or update pages — without your Notion token ever entering the box. The same model as `agentbox-ctl git push` and `agentbox-ctl git pr create`. + + +The box runs a tiny `ntn` / `notion` shim. Calls go through `agentbox-ctl integration notion ` to the **host relay**, which runs the host's real `ntn` and ships the result back. Reads pass straight through. Writes raise a one-line confirm in your terminal first. + + +## Prerequisites + +The integration wraps Notion's official CLI ([`ntn`](https://developers.notion.com/reference/notion-cli) — currently in beta). Install it on the **host** (not in the box): + +```bash +# macOS / Linux — see the official docs for other install methods +brew install notion-cli +ntn login # opens the browser, stores auth in the system keychain +``` + +Then verify with `agentbox doctor`: + +```text +integrations: + [info] notion disabled (enable with `agentbox config set --project integrations.notion.enabled true`) +``` + +The integration is **off by default**, so even with `ntn` installed the box can't call it until you opt in. Doctor's `info` line confirms `ntn` is detected; flip the flag to graduate it to a usable state. + +## Enable it for this project + +```bash +agentbox config set --project integrations.notion.enabled true +``` + +`--project` scopes it to the current project (config file under `~/.agentbox/projects//`). Drop `--project` for global. Run `agentbox doctor` again — the row should now read: + +```text +integrations: + [ ok ] notion ntn version X.Y.Z · authed +``` + +If you see `[warn] not logged in`, run `ntn login` on the host. If you see `[warn] ntn not installed`, the host install didn't put `ntn` on `PATH`. + +## What works inside the box + +The in-box shim exposes a strict allowlist. Anything outside the list is rejected with a clear message — start conservative, widen as flows surface. + +| In-box command | Class | What happens | +| --- | --- | --- | +| `ntn whoami` | read | Passes through; prints the authed host user. | +| `ntn api v1/` | read | `GET`-only passthrough (e.g. `v1/users/me`, `v1/pages/`, `v1/databases/`). Non-GET — `-X POST`, `-f`, `-F`, `--input` — is refused with exit 65, so Notion's `POST` endpoints (`v1/search`, `v1/databases//query`) are **not** reachable through this op. | +| `ntn pages create …` | write | **Prompts** the host for approval; on `y` the host runs `ntn pages create` and ships back the result. | +| `ntn pages update …` | write | **Prompts** the host for approval; covers props and archive. | + +`notion` is a symlink to `ntn` — either name works. Reads are unprompted; every write raises a one-line confirm in your attached terminal (or in `agentbox agent approvals` for orchestrators driving boxes headlessly — see [Background & parallel](/docs/background-and-parallel)). + +```bash +# Inside the box — these all flow through the host relay: +ntn whoami +ntn api v1/pages/abc123 # GET passthrough +ntn api v1/databases/def456 # GET passthrough +# Note: search is POST /v1/search (needs -f / a body), which the GET-only +# `api` op refuses — find pages by id, or search host-side. +ntn pages create -p -t "Draft from the box" # prompts on the host +``` + + +Notion's `ntn` exposes no top-level `comment` subcommand, and posting via `ntn api v1/comments -X POST -f …` needs a structured JSON body that the GET-only `api` op refuses. Comment writing will land as a dedicated op with a payload translator — see [integrations follow-ups](https://github.com/madarco/agentbox/blob/main/docs/integrations.md#open-follow-ups). + + +## Security model + +| Concern | What AgentBox does | +| --- | --- | +| Where the Notion token lives | **Host only** — in the macOS keychain (or `ntn`'s configured file-auth on Linux). The box has no access to either. | +| What the box can do unprompted | **Reads only** (`whoami`, `ntn api` GETs). | +| What needs your approval | **Every write** (`page.create`, `page.update`), and **any non-GET `api` call** is refused outright with exit 65. | +| Where the approval lives | The host relay raises a confirm prompt; you answer in the attached terminal (`y` / `n`) or via `agentbox agent approve ` from an orchestrator. | +| Inside the box, does the agent ever see the token? | **No.** `printenv \| grep -i notion` inside a box returns nothing — only `AGENTBOX_RELAY_TOKEN`, which only authenticates to the box-local relay endpoint. | +| Auditability | Every approved write is logged as a relay event (visible via `/admin/events`, `agentbox agent`, the dashboard). | + +The integration is **off by default** for every new project. You flip it on per project once you've installed and authed `ntn` on the host. + + +The box is the untrusted side. Tokens in the box would survive `agentbox download`, leak into commits if the agent mishandles them, and undermine the entire sandbox premise. Keeping the token on the host and putting the gate at the host boundary is the same model AgentBox already uses for `git push` and `gh pr create` — one model, audited in one place. + + +## Limitations and roadmap + +- **Notion only for now.** Linear, Trello, and ClickUp are on the integrations roadmap; their connectors will appear in `agentbox doctor` the same way once they ship. +- **GET-only `api` passthrough.** Non-GET HTTP methods (`POST`, `PATCH`, `DELETE`) on `ntn api` are refused; use the dedicated write ops instead. This guards against an agent slipping a write past the read classification. +- **Comment creation deferred.** Tracked as a follow-up. +- **Allowlist starts conservative.** As real agent flows surface needs, the op set will widen — file an issue with the failing call if something's missing. + +See also [CLI commands](/docs/cli) for `agentbox doctor`, [Configuration](/docs/configuration) for the `integrations.notion.enabled` flag, and [Background & parallel](/docs/background-and-parallel) for the host-action approval surface. diff --git a/apps/web/content/docs/meta.json b/apps/web/content/docs/meta.json index 0525fa24..d73ee11b 100644 --- a/apps/web/content/docs/meta.json +++ b/apps/web/content/docs/meta.json @@ -20,6 +20,9 @@ "integrations-iterm2", "integrations-tmux", "integrations-cmux", + "---Services---", + "integrations-notion", + "integrations-linear", "---Providers---", "local-docker", "hetzner", diff --git a/docs/development.md b/docs/development.md index 8f910256..ed47279a 100644 --- a/docs/development.md +++ b/docs/development.md @@ -31,6 +31,28 @@ node apps/cli/dist/index.js destroy cc -y For the full lifecycle command list see [`docs/features.md`](./features.md). +### Notion integration — nested-box dev only + +Normal users never touch this. The `notion`/`ntn` integration works for any box +purely through the host relay (the box holds no token), and on macOS the host +just needs `ntn login` (keychain) — the connector forces **no** auth env, so the +relay, `agentbox doctor`, and the public docs all agree. + +The one exception is AgentBox **dogfooding its own integration** — exercising it +from a *nested* box (a box that runs its own relay for boxes it creates). A Linux +box has no keychain, so for the carried credential to be readable there you must +log the **host** `ntn` in with file-based auth instead of the keychain: + +```sh +NOTION_KEYRING=0 ntn login # writes ~/.config/notion/auth.json +``` + +The `carry:` block in `agentbox.yaml` then ships that file into the box. Because +the connector no longer forces `NOTION_KEYRING=0`, when you run `ntn` *inside* the +box you may also need to export `NOTION_KEYRING=0` there so it reads the carried +file rather than looking for a keychain. (This nested path is internal-dev-only +and not yet run end-to-end.) + ## Image: pull vs rebuild The box image is pinned to `agentbox/box:dev` and reused across creates. On diff --git a/docs/features.md b/docs/features.md index 9f8f7a9e..cc69e059 100644 --- a/docs/features.md +++ b/docs/features.md @@ -27,6 +27,8 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - `agentbox relay status|stop|start|restart` — manage the host relay process. `status` reads the pidfile + GETs `/healthz` and renders running / not-responding (zombie) / not-running; `--json` dumps the `RelayStatus` shape. `stop` / `start` / `restart` wrap `stopRelay()` / `ensureRelay()` (both idempotent — the same helpers `self-update` uses). Backed by `getRelayStatus()` in `packages/sandbox-docker/src/relay.ts` (re-exported from `@agentbox/sandbox-docker`); CLI in `apps/cli/src/commands/relay.ts`. - `agentbox prepare` — one-stop "set up the base image / show what's prepared" command. `agentbox prepare` (no args) prints a status table across all providers: docker's `agentbox/box:dev` image + the three shared docker volumes (`agentbox-claude-config`, `agentbox-codex-config`, `agentbox-opencode-config`), plus all daytona `agentbox*` snapshots (state / size / age / `(pinned in project)` marker) and `agentbox*` volumes — including the legacy per-agent ones that the daytona path no longer uses (visible reminder to clean them up via the Daytona dashboard). `agentbox prepare --provider docker` pre-builds the local Dockerfile.box image (idempotent). `agentbox prepare --provider daytona [--name X] [-y]` builds a layered `Image.fromDockerfile().addLocalFile().runCommands()` for the three host agent static tarballs and registers it as a named org-scoped snapshot via the documented `daytona.snapshot.create({ name, image })` API ([daytona.io/docs/en/snapshots](https://www.daytona.io/docs/en/snapshots/)), then pins `box.image: ` into the project config — subsequent `agentbox create --provider daytona` boots in seconds with the agent static config (plugins/skills/marketplaces/settings) already in place. Replaces the old `agentbox daytona publish-snapshot` (which used `_experimental_createSnapshot`, broken upstream). - `agentbox self-update` — self-updates the CLI then refreshes the local runtime. Detects how it was launched (`apps/cli/src/exec-method.ts`'s `detectExecutionMethod`): `npm` → `npm install -g @madarco/agentbox@latest`, `pnpm` → `pnpm add -g @madarco/agentbox@latest`, `npx`/`direct` (dev clone) → skip the package update with a note. Then best-effort `docker image rm -f agentbox/box:dev` (rebuilds lazily on next `create`/`claude` via `ensureImage()`) and reloads the relay via `stopRelay()`. The relay is only respawned in-process (`ensureRelay()`) when **no** self-update ran — after a real self-update this process is the stale build, so it just stops the relay and the next box command brings up the new one. `-y` skips the prompt, `--dry-run` previews, `--skip-self` does only the image+relay refresh. `stopRelay` lives in `packages/sandbox-docker/src/relay.ts` (reuses the existing pidfile helpers); `removeImage` in `docker.ts`. +- **Notion integration (relay-gated, host CLI)** — `agentbox-ctl integration notion ` and the in-box `ntn` / `notion` shims proxy a small allowlist of ops (`whoami`, GET-only `api` passthrough, `page.create`, `page.update`) through the host relay to the host's authenticated `ntn` CLI. Reads pass through; writes prompt the host for approval (same `askPrompt` gate as `git push` / `gh pr create`). Non-GET HTTP methods on `ntn api` are refused (`-X`, `--method`, `-f`, `-F`, `--input` all rejected with exit 65 by `refuseApiNonGet`). The box never holds a Notion token — `printenv | grep -i notion` inside a box returns nothing. Off by default — enable per project with `agentbox config set --project integrations.notion.enabled true` (typed config key `integrations.notion.enabled` in `packages/config/src/types.ts`); the relay re-reads the layered config on every call so a flag flip takes effect with no bounce, and a disabled integration is refused before any host process is touched. `agentbox doctor` reports each integration in a dedicated `integrations:` group — `info` for disabled, `warn` for not-installed / not-logged-in (with install/login hints from the connector descriptor), `ok` when authed. Connector descriptor lives in `packages/integrations/src/connectors/notion.ts`; the relay spine in `packages/relay/src/integrations.ts` (`parseIntegrationMethod`, `assertIntegrationReady`, `refuseIfIntegrationDisabled`, `runHostIntegration`) is dispatched identically by docker (`server.ts`) and cloud (`host-actions.ts`) per the "fix across all providers" rule. Adding a service (Linear / Trello / ClickUp) is one new descriptor file + a one-line registry add — no relay change. See [`integrations.md`](./integrations.md) (design) and [`notion_backlog.md`](./notion_backlog.md) (per-task status). +- **Linear integration (relay-gated, host CLI)** — `agentbox-ctl integration linear ` and the in-box `linear` shim proxy a strict allowlist of ops (`whoami`, `issue.list`/`issue.mine`/`issue.view`/`issue.query`, `team.list`, query-only `api` GraphQL passthrough, `issue.create`/`issue.update`/`issue.comment`) through the host relay to the host's authenticated `linear` CLI (`@schpet/linear-cli`). Same gate model as Notion: reads pass through, writes prompt. The `api` op's `refuseGraphqlNonQuery` consumes value-bearing flag values (`--variable`, `--variables-json`) so a benign JSON payload isn't misread as a positional, rejects any GraphQL `mutation` / `subscription` operation with exit 65 (the GraphQL analogue of `refuseApiNonGet`), AND refuses `--variable key=@` host-file loads (the `@` syntax would let the box exfiltrate host files via GraphQL variables). **`linear auth token`** (which would print the raw API token to stdout) and `auth login`/`logout`/`migrate`/`default` are hard-rejected by the shim and absent from the connector allowlist — three defenses in series. `issue delete` / `team delete` / `team create` are off-list (destructive). `issue.comment` maps to `linear issue comment add` — `@schpet/linear-cli` v2 uses `add`, not `create`. Connector descriptor at `packages/integrations/src/connectors/linear.ts`; shim at `packages/sandbox-docker/scripts/linear-shim`; typed flag `integrations.linear.enabled` (default false); doctor row is driven off `ALL_CONNECTORS` so the linear entry lights up automatically. See [`integrations.md`](./integrations.md) (design) and [`linear_backlog.md`](./linear_backlog.md) (per-task status). - In-box `agentbox-ctl git pull|push [-- ]` (and any tool the agent runs that shells out via this command) — POSTs to the host relay's `/rpc`, which executes git on the host with the user's SSH agent + gitconfig. Commits made inside the box land in the host's main `.git/` immediately (the `.git/` is bind-mounted RW at its identical absolute path); `git push` is the only operation that needs host credentials, hence the RPC. - Browser support — Vercel's [`agent-browser`](https://github.com/vercel-labs/agent-browser) is baked into the box image (`npm install -g agent-browser`). The Chromium binary that drives it is *not* Chrome for Testing (no Linux ARM64 build, and Noble's `chromium-browser` apt package is a snap stub that doesn't run in containers) — it's Playwright's Chromium, which has working linux/arm64 + linux/amd64 builds. It is **not** baked: `ENV AGENT_BROWSER_EXECUTABLE_PATH=/usr/local/bin/chromium` points at the `chromium-resolver` script (`packages/sandbox-docker/scripts/chromium-resolver`, installed at `/usr/local/bin/chromium`), which on first launch reuses the newest installed Playwright Chromium and otherwise runs `playwright install chromium` — preferring the project's pinned Playwright (`/workspace/node_modules/.bin/playwright`, so the build matches the project's own tests and they share one binary), else the box's global `playwright` as a fallback downloader. This avoids baking a version-pinned Chromium that goes stale the instant a project pins a different Playwright (the old bug: a baked build masqueraded in `~/.cache/ms-playwright`, the project's `playwright install` fetched a different one, and agents waiting on the baked path hung). Chrome runtime libs (libnss3, libxkbcommon0, libcups2t64, etc. — Noble names with the `t64` suffix where applicable) are installed once at image build. Agents inside the box invoke `agent-browser` directly; sessions/auth/cookies persist under `~/.agent-browser/` in the container's writable layer, so they survive `pause/unpause` and `stop/start` and are wiped on `destroy`. The flag `--with-playwright` on both `agentbox create` and `agentbox claude` additionally runs `npm install -g @playwright/cli@latest` inside the container at create time (recorded as `BoxRecord.withPlaywright` and surfaced in `agentbox status --inspect`) — a separate package from the `playwright` runtime baked into the image. - Web service port — every box reserves container `:80` at create with an unconditional `docker run -p 127.0.0.1:0:80` (immutable after `docker run`, so it's reserved up front even though the `expose:`-flagged service is usually only known after the in-box wizard writes `agentbox.yaml`). The ephemeral host port is resolved via `docker port` and persisted to `BoxRecord.webHostPort` (re-resolved on every `startBox`, like `vncHostPort`, since Docker reallocates it). `getBoxEndpoints` emits a `kind: 'web'` endpoint whose URL is the published loopback port (`http://127.0.0.1:`) — **uniform across engines, not OrbStack-dependent**; it's the primary clickable link in `agentbox list`/`status`. Until a service declares `expose:` it renders as `web reserved (...)`. The in-box `:80 → expose.port` forward is the supervisor-owned `WebProxy` (see [`in-box-supervisor.md`](./in-box-supervisor.md)). Pre-feature boxes (no `BoxRecord.webContainerPort`) have no reservation and are skipped by `startBox` — recreate to enable. @@ -48,7 +50,7 @@ Full local-Docker lifecycle (plus parity-tested for cloud via `--provider dayton - Auto-pause-on-idle / auto-stop policy. - Auto-refresh of the merged host export (inotify-driven `agentbox open` keeps `~/.agentbox/boxes//workspace` in sync without manual refresh). Today refresh is on-demand only. - Exporting the container writable layer on destroy (`--export ` flag). The live merged export under `~/.agentbox/boxes//workspace` is wiped with the box (use `agentbox checkpoint create` first if you want to keep the state). -- Additional `/rpc` methods beyond `git.pull` / `git.push`. The dispatch is a single switch in `packages/relay/src/server.ts` — easy to extend (target ideas: `git.fetch`, `gh.*`, `npm.publish`, anything else that needs host creds). +- Additional `/rpc` methods beyond `git.pull` / `git.push` / `gh.pr.*` / `integration..`. The dispatch is a single switch in `packages/relay/src/server.ts` — easy to extend (target ideas: `git.fetch`, `npm.publish`, anything else that needs host creds). - A user-facing `agentbox events`/`agentbox notify` CLI on top of the relay's ring buffer. Today you can `agentbox-relay tail` (against the host process at 127.0.0.1:8787) or `tail -f ~/.agentbox/relay.log`. - Event-buffer persistence (events are lost on relay restart; the token registry is rehydrated from `state.json` on next `agentbox create`, but historical events aren't). - Remote providers (E2B / Modal / Daytona / Vercel Sandbox). diff --git a/docs/host-relay.md b/docs/host-relay.md index 54f28f4c..96854e1f 100644 --- a/docs/host-relay.md +++ b/docs/host-relay.md @@ -15,6 +15,7 @@ - **Rehydration after restart**: every `createBox` reads `~/.agentbox/state.json` and re-pushes every known `(relayToken, gitWorktrees)` via `rehydrateRelayRegistry()`. Idempotent and cheap, so we do it unconditionally instead of trying to detect a restart. `startBox` also re-registers its own box. - The supervisor pushes outbound: `packages/ctl/src/relay-client.ts` is a fire-and-forget POST to `/events` (node:http, 2s timeout, silent failure). `onServiceState` / `onTaskState` in `supervisor.ts` forward terminal states (`ready` / `crashed` / `backoff` / `unhealthy` / `stopped` / `done` / `failed`). Disabled at construction when `AGENTBOX_RELAY_URL` / `AGENTBOX_RELAY_TOKEN` are unset — keeps existing tests and pre-relay boxes a no-op. - In-box CLI: `agentbox-ctl git pull|push [-- ...]` (in `packages/ctl/src/commands/git.ts`) POSTs to `/rpc` with `{ method: 'git.pull'|'git.push', params: { path: , args: [...] } }`, streams `stdout`/`stderr` back to the agent's terminal, and exits with the host's git exit code. This is what the agent invokes to ask the host to push the box's commits — no SSH keys leak into the box. +- **Service integrations via host CLIs**: `agentbox-ctl integration [-- args...]` (and the in-box `ntn` / `notion` / `linear` shims) POST `{ method: 'integration..', params: { path, args } }` for any connector registered in `@agentbox/integrations`. Currently shipped: `notion` (host bin `ntn`) with ops `whoami`, `api` (GET-only passthrough, refuses `-X`/`--method`/`-f`/`-F` / `--input`), `page.create` (gated), `page.update` (gated); and `linear` (host bin `linear`, `@schpet/linear-cli`) with ops `whoami` (`auth whoami`), `issue.list` / `issue.mine` / `issue.view` / `issue.query`, `team.list`, `api` (GraphQL **query-only** passthrough — `refuseGraphqlNonQuery` consumes `--variable` / `--variables-json` values, rejects `mutation` / `subscription`, and rejects `--variable key=@` host-file loads), `issue.create` / `issue.update` / `issue.comment` (gated; `issue.comment` maps to `linear issue comment add` — v2 uses `add`, not `create`). The linear shim hard-rejects `linear auth token` (would print the raw API key) and `auth login/logout/migrate/default`; destructive ops (`issue delete`, `team delete`, `team create`) are off the allowlist. The relay parses the method, looks up the connector + op, refuses with exit 65 if the per-project `integrations..enabled` flag is off, runs the op's `refuseCall` pre-flight, probes the host binary (` --version`, cached 60s), then either passes through (read) or gates the call via `askPrompt` (write) before shelling out to the host CLI via `runHostIntegration`. A connector may declare a `_*` env override merged onto the host spawn env (none do today — each host CLI uses its own default auth); a descriptor that tries to set anything outside its namespace yields exit 78 instead of silently rewriting `PATH`. Same `{exitCode, stdout, stderr}` envelope as `git.*` / `gh.pr.*`; wired into both `server.ts` (docker) and `host-actions.ts` (cloud — daytona/hetzner/vercel/e2b) per the "fix across all providers" rule. The reusable spine lives in `packages/relay/src/integrations.ts` (`parseIntegrationMethod`, `getConnector`, `assertIntegrationReady`, `refuseIntegrationCall`, `refuseIfIntegrationDisabled`, `runHostIntegration`); the in-box ctl surface is built from the same descriptors in `packages/ctl/src/commands/integration.ts`. Adding a service is one new descriptor file + a one-line registry add — no relay change. See [`integrations.md`](./integrations.md) for the design + the connector descriptor shape. - **PR ops via host `gh`**: `agentbox-ctl git pr [args...]` POSTs `{ method: 'gh.pr.', params: { path, args } }` for `op ∈ {create, view, list, comment, review, merge, checkout, close, reopen}`. The relay shells `gh pr ` with `cwd = worktree.hostMainRepo` so gh infers the repo from the host repo's `git remote -v`. Read-only ops (`view`, `list`) bypass the prompt; everything else triggers an `askPrompt`. Extra guards: `merge` refuses the `AGENTBOX_PROMPT=off` auto-`y` unless `AGENTBOX_GH_FORCE=1`; `checkout` is disabled by default (opt-in via `AGENTBOX_GH_PR_CHECKOUT=allow`) and refused on a dirty host tree or a host HEAD that matches any registered box branch (would corrupt the bind-mounted box `.git/HEAD`). Cloud path mirrors the same matrix in `executeCloudAction → runGhPrRpc`, with the no-attached-wrapper behavior gated by `AGENTBOX_GH_NO_SUB` (`deny` default, `allow`, or `prompt`). Requires `gh` installed and `gh auth login` on the host; for HTTPS push/pull/fetch we additionally recommend `gh auth setup-git` so plain `git push` uses gh's OAuth token (handled invisibly by git's credential helpers — no relay change needed). - `agentbox-ctl open ` (`packages/ctl/src/commands/open.ts`) opens the URL in the **box's own Chromium** via `agent-browser open --headed` (visible in the VNC view / `agentbox screen`), then best-effort POSTs `{ method: 'browser.open', params: { url } }` to the relay. The relay records a `browser-open` event, answers immediately (never blocks the box), and raises a **non-blocking, auto-expiring** confirm prompt (`askPrompt(..., { ttlMs })`, ~25s) in the footer/dashboard — "open link on the host?" — and only `open`s it on the host on a `y`. URL scheme is validated http/https both in the ctl command and via `isOpenableUrl` in `server.ts`. The box image symlinks `/usr/local/bin/xdg-open` to the `agentbox-open` wrapper and sets `BROWSER=/usr/local/bin/agentbox-open`, so `xdg-open` and any `$BROWSER`-aware tool (Claude Code OAuth, `gh`, …) route here. The `Ctrl+a u` footer/dashboard leader action is unrelated — it opens the box's web *app* on the host (`agentbox url`). - **Host-action approvals (orchestrator path)**: the confirm prompts that gate `git.push` / `cp.*` / `gh.pr.*` writes / `checkpoint.create` / `browser.open` are raised by `askPrompt` and answered over `/admin/prompts/answer`. Because that endpoint is **loopback-only**, only a host process can answer — a box can't. A host-side **orchestrator** (e.g. a Claude driving boxes with `agentbox claude -i`) inspects and answers them deliberately via two CLI commands (`apps/cli/src/commands/agent.ts`): diff --git a/docs/integrations.md b/docs/integrations.md new file mode 100644 index 00000000..5f5112c5 --- /dev/null +++ b/docs/integrations.md @@ -0,0 +1,262 @@ +# Integrations — relay-gated service connectors + +> Part of the AgentBox docs. Start at [CLAUDE.md](../CLAUDE.md). Planning context: [`integrations_backlog.md`](./integrations_backlog.md) (the four-service plan). Per-task tracker for Notion: [`notion_backlog.md`](./notion_backlog.md). The user-facing page is `apps/web/content/docs/integrations-notion.mdx` (published at https://agent-box.sh/docs/integrations-notion). + +This is the design / reference doc for the host-side integrations spine — the box-to-host bridge that lets an in-box agent read tickets/docs from Notion (and, in future, Linear / Trello / ClickUp) and make a small, prompted set of writes, without ever holding the service's credentials inside the box. The shape mirrors the existing `gh` and `git` relay flows exactly. + +## Why this exists + +The host owns the credentials. The box is the untrusted side. A box agent should be able to **read** tickets/docs freely (a search, a `GET`) and **write** with the user's per-call approval (a `page.create`, a `comment.add`), but **the token must never enter the box**. The model is the one we already proved with `gh`: + +- An in-box shim (`gh-shim`) intercepts a strict subcommand allowlist and forwards through `agentbox-ctl`. +- `agentbox-ctl` POSTs `/rpc` on the box-local relay (bearer-authed, see [`host-relay.md`](./host-relay.md)). +- The relay classifies the op as **read** or **write**. Reads pass; writes go through `askPrompt` (host approval), then shell out to the host's authenticated CLI. The token stays on the host. + +Integrations generalize this for any host CLI: each service is one **connector descriptor** in `@agentbox/integrations`, and the relay's `integration..` dispatcher walks the same path. + +## Where the gate lives + +The gate lives in the **relay**, not in the box. The in-box ctl is unprivileged; it sends an RPC and waits for a verdict. The relay (a host process) is the only thing that runs the host CLI, and it's the only thing that consults the per-project `integrations..enabled` flag, the op's read/write classification, the op's `refuseCall` pre-flight, and `askPrompt` for writes. One check covers every caller — the shim, the `notion`/`ntn` alias, a direct `agentbox-ctl integration` invocation, a future host-initiated one-time token. See "gate at the host boundary" in the user feedback notes. + +## The connector descriptor + +`packages/integrations/src/types.ts` defines two types: + +```ts +export interface IntegrationConnector { + service: IntegrationService; // 'notion' (more later) + hostBin: string; // 'ntn' + detect: { // T3 doctor probes + versionArgs: readonly string[]; + authArgs?: readonly string[]; + installHint?: string; // shown by `agentbox doctor` when missing + loginHint?: string; // shown when unauthed + }; + env?: Readonly>; // forced env vars; _* only + ops: Readonly>; +} + +export interface IntegrationOp { + write: boolean; // false = read, true = gated write + buildArgv?: (args: readonly string[]) => string[]; // shape user argv → host CLI argv + refuseCall?: (args: readonly string[]) => IntegrationOpRefusal | null; +} +``` + +Pure data + small predicates. No I/O at import time, so unit tests stay pure. The descriptor file (`packages/integrations/src/connectors/notion.ts`) is the single source of truth for the box surface, the relay's allowlist, and (since T3) the doctor's install/login hint strings. + +A `registry.ts` exports `getConnector(service)` and `ALL_CONNECTORS`. Adding a service is a new descriptor file + a one-line registry add. No relay change, no ctl change. + +### env-var namespace guard + +`packages/relay/src/integrations.ts:mergeConnectorEnv` enforces that a descriptor can only set env vars in its own `_*` namespace (so a careless descriptor can never set `PATH` or `AGENTBOX_PROMPT`). A misconfigured descriptor returns a typed exit-78 envelope rather than silently disabling the relay's gate or rewriting `PATH`. **No connector currently declares an `env` override** — the relay runs each host CLI with its own default auth. (`ntn` reads the macOS keychain after `ntn login`; `linear` reads `~/.config/linear/credentials.toml`.) The field stays as an opt-in escape hatch for a future CLI that needs specific env to resolve its auth. + +## The relay dispatch flow + +`packages/relay/src/integrations.ts` is the spine. The dispatcher in `packages/relay/src/server.ts` (docker) and `packages/relay/src/host-actions.ts` (cloud) calls into it for any method starting with `integration.`. Per the "fix across all providers" rule, both paths share the exact same handler. + +For `integration..`: + +1. **`parseIntegrationMethod`** splits on the first two dots; dotted ops (`page.create`) keep their dot. Unknown shape → exit 64. +2. **`getConnector(service)`** — unknown service → exit 64. +3. **op allowlist** — unknown op → exit 65, with the list of available ops. +4. **worktree resolve** — `params.path` → which registered worktree (cwd for the host CLI spawn). +5. **`refuseIntegrationCall(op, args)`** — runs the op's `refuseCall` pre-flight (e.g. `notion.api`'s GET-only check). Refused → exit 65, before any host process is spawned. +6. **`refuseIfIntegrationDisabled(service, cwd)`** — re-reads the layered config every call (so a flag flip takes effect without bouncing the relay; same approach as `loadAutopauseConfig`). Disabled → exit 65 with a config-hint. Runs **before** any host probe / prompt so a disabled integration is never user-visible as a permission prompt. +7. **`assertIntegrationReady(connector)`** — cached for 60s per `hostBin`. Probes ` ` to make sure the binary exists. Missing → exit 127. Failed version → propagate exit. +8. **Write gating.** For `op.write === true`: + - If `params.hostInitiated` is set, validate it against `HostInitiatedTokens` (scope + params-hash bound). A present-but-invalid token is a hard reject (attack signal — exit 10). + - Otherwise (or for any unbound write) `askPrompt(...)` blocks until the host answers `y` / `n`. Denied → exit 10. + - Read ops skip both gates entirely. +9. **`runHostIntegration`** spawns the host binary in the worktree's `hostMainRepo`, with the connector's `env` merged on top of `process.env` (subject to the namespace guard). Returns the standard `{exitCode, stdout, stderr}` envelope. + +## Read vs write — the Notion op surface + +`packages/integrations/src/connectors/notion.ts` carries the current allowlist. Intentionally minimal — start conservative, widen as real agent flows surface needs. + +| Op | Class | Host argv | Notes | +| ------------- | ----- | ------------------------ | -------------------------------------------------------------------------------------- | +| `whoami` | read | `ntn whoami` | dedicated op so the agent doesn't need to widen the `api` allowlist. | +| `api` | read | `ntn api ` | `GET`-only; `refuseApiNonGet` rejects `-X`/`--method`/`-f`/`-F` (Go pflag-style). | +| `page.create` | write | `ntn pages create ` | gated by `askPrompt`. (User-facing shim form: `ntn pages create …`.) | +| `page.update` | write | `ntn pages update ` | gated; covers archive + props. (User-facing shim form: `ntn pages update …`.) | + +`comment.add` is intentionally absent — `ntn` exposes no top-level `comment` subcommand. The only path is `ntn api v1/comments -X POST -f …`, which the `api` op refuses (GET-only). Comment creation needs a Notion-API-aware payload assembler that maps CLI flags to the structured `POST /v1/comments` body; tracked as a follow-up in [`notion_backlog.md`](./notion_backlog.md). The in-box shim rejects `notion comment add …` with a clear "deferred" message. + +## The enable flag + +`integrations.notion.enabled` (typed config, default **false**) lives in `packages/config/src/types.ts` (`UserConfig`, `EffectiveConfig`, `BUILT_IN_DEFAULTS`, `KEY_REGISTRY`). The config parser/merger/writer were taught to walk three-level nested keys (`branch.subbranch.leaf`) for this, so the YAML reads naturally. Layered the usual way: CLI > workspace > project > global > built-in. + +Toggle per project: + +```bash +agentbox config set --project integrations.notion.enabled true +``` + +Default off so every box ships the shim but it's inert until the user opts in — no surprise box→host calls. + +## In-box surface + +`packages/sandbox-docker/scripts/ntn-shim` is the `gh-shim` pattern: strict subcommand allowlist (`whoami`, `api`, `page`, …) → `exec agentbox-ctl integration notion -- "$@"`. Anything off the allowlist is rejected with a clear message. The same shim is symlinked at `/usr/local/bin/notion` so the agent can type either name. + +Staging follows the canonical pattern (see the `feedback-canonical-dockerfile-box-location` memory): + +- Listed in `contextFiles` + `execBitFiles` in `apps/cli/scripts/stage-runtime.mjs`. +- COPY'd into `Dockerfile.box` next to the `gh-shim` / `git-shim` block. +- Mirrored into `packages/sandbox-hetzner/scripts/install-box.sh`, `packages/sandbox-vercel/scripts/provision.sh`, and `packages/sandbox-e2b/scripts/build-template.sh`, plus the matching `src/runtime-assets.ts` upload lists. Daytona stays shim-less. + +## Doctor + +`agentbox doctor` reports each integration in a dedicated `integrations:` group, driven off `ALL_CONNECTORS` (no hardcoded `'notion'` in the doctor — Linear/Trello will light up automatically when they ship). Per connector: + +- **Disabled** (default, layered config) → `[info] notion disabled (enable with \`agentbox config set --project integrations.notion.enabled true\`)`. `info` is a new status that rolls up like `ok` so a disabled integration never pushes the overall doctor status to "warn". +- **Enabled + binary missing** → `[warn] notion ntn not installed (install ntn: https://developers.notion.com/reference/notion-cli)`. Hint string comes from `connector.detect.installHint`. +- **Enabled + binary present + unauthed** → `[warn] notion not logged in (ntn login)`. Hint from `connector.detect.loginHint`. +- **Enabled + binary present + authed** → `[ ok ] notion ntn version X.Y.Z · authed`. + +The doctor auth probe runs each connector's CLI with **no forced env**, exactly as the relay does. So a host's real authed state — the macOS keychain after `ntn login` — is what's reported, and doctor can't show "authed" for an auth path the relay wouldn't actually use. See the comment in `apps/cli/src/lib/doctor-checks.ts:integrationsChecks`. + +The live `ntn` host probe is the orchestrator's post-merge check — it can't be verified inside an AgentBox box because the real `ntn` isn't installed there. The unit test (`apps/cli/test/doctor-integrations.test.ts`) stubs a fake `ntn` on PATH so the four status transitions are exercised in CI. + +## Carry-based file-auth for nested boxes + +For the nested-box dev path (box → box, exercise the integration from inside a box), the host's `ntn` auth is carried into the box as a **file**. `agentbox.yaml`'s `carry:` block ships `~/.config/notion/auth.json` into the box; the host must have been logged in file-mode (`NOTION_KEYRING=0 ntn login`) for that file to exist, and the in-box `ntn` may need `NOTION_KEYRING=0` exported to read it (the connector no longer forces the env — see [`docs/development.md`](./development.md)). This is **internal-dev only**; normal boxes carry no Notion credential and reach `ntn` purely through the host relay. Even on the nested path the token lives only at the leaf hop, never in the agent's process env (`printenv | grep -i notion` shows nothing). + +Carry is host→box and one-prompt-approved (see [`features.md`](./features.md) → `carry:`). T4 wires the actual e2e verification. + +## Verification / live e2e results + +T4 ran the integration against the live Notion API from inside a real box. +Captured evidence: + +- **`notion whoami` (read)** — round-trips through the in-box shim → host + relay → host `ntn` → Notion API; returns the host bot identity with no + approval prompt. +- **`notion api v1/users/me` (read)** — same path; returns the host bot's + JSON identity record. No prompt. +- **`notion api … -X POST` and `--method PATCH` (refused)** — the + connector's `refuseApiNonGet` correctly classifies these as writes and + blocks them before any host process is spawned: `notion api: only GET is + proxied (use page.create / page.update for writes); detected method + 'POST'`, exit 65. +- **No agent-side credential** — `printenv | grep -i notion` in the box + returns nothing in the agent's environment. The token lives only on the + host. The carried `~/.config/notion/auth.json` file is for nested-box + relay hosts and never reaches the agent's process env. +- **Connector argv bug (fixed in T4)** — a live `notion pages create` + through the host relay (rebuilt from T3 code) failed with `error: + unrecognized subcommand 'page'. tip: some similar subcommands exist: + 'update', 'pages'`. Real `ntn`'s surface is `api datasources files pages + login logout whoami workers`. The connector's `buildArgv` was building + singular `['page', 'create', …]`; T4 changed it to `['pages', 'create', + …]` and `['pages', 'update', …]`. Live write round-trip against the + fix lands after the host relay rebuilds with the merged T4 code. +- **`agentbox config get` nested-key bug (fixed in T4)** — `config get + integrations.notion.enabled` was returning `` even though + `config set` + `loadEffectiveConfig` worked correctly, because + `apps/cli/src/commands/config.ts` split keys on the FIRST dot only. T4 + replaced the helpers with a `walkKey` function that walks all segments + (mirrors `readLeaf` in `packages/config/src/load.ts`). New regression + test `apps/cli/test/config-get-nested.test.ts`. + +### Nested-box e2e — deferred, not blocking + +The nested-box scenario (a box-inside-a-box running a `notion` op through +this box's relay) was time-boxed in T4 and deferred. Architecturally, the +in-box `agentbox-ctl` daemon (port 8788) forwards `/rpc` to the HOST relay +(`host.docker.internal:8787`), not to a relay running in this box — so a +nested box's `notion pages create` would still terminate at the host +relay's spawn, not in this box's daemon. That means nested-box e2e +exercises the carry mechanics (already verified — `~/.config/notion/` +present in this box) more than the connector's spawn path. A future +follow-up that lifts the relay into the box's daemon would change this; +tracked under "Open follow-ups" below. + +## Cross-provider parity + +`integration..` is dispatched identically on docker and cloud because the wire shape is method-agnostic. The cloud path long-polls `/bridge/poll`, runs `executeCloudAction → runIntegrationRpc`, which reuses the exact handler. The Hetzner / Daytona / Vercel / E2B image flows all ship the `ntn` / `notion` shim (see "In-box surface" above). No provider-specific code in the integrations spine. + +## Linear + +The Linear path of the integrations foundation, shipped under LT1 (descriptor-only — no relay/ctl core change, validating the abstraction the Notion work built). The connector descriptor lives in `packages/integrations/src/connectors/linear.ts`; in-box shim at `packages/sandbox-docker/scripts/linear-shim`. Backed by `@schpet/linear-cli` (the `linear` binary, v2). Tracker: [`linear_backlog.md`](./linear_backlog.md). + +### Op surface + +`packages/integrations/src/connectors/linear.ts` carries the current allowlist. Same starter-conservative shape as Notion's: reads pass through, writes go through `askPrompt`. + +| Op | Class | Host argv | Notes | +| -------------- | --------------- | ---------------------------------- | -------------------------------------------------------------------------------------- | +| `whoami` | read | `linear auth whoami` | identity only — **never** `linear auth token` (see below). | +| `issue.list` | read | `linear issue list` | | +| `issue.mine` | read | `linear issue mine` | v2-native "issues assigned to me" (the old `list --me` path was dropped upstream). | +| `issue.view` | read | `linear issue view` | | +| `issue.query` | read | `linear issue query` | structured filters. | +| `team.list` | read | `linear team list` | | +| `api` | read | `linear api ` | GraphQL query passthrough; `refuseGraphqlNonQuery` rejects `mutation` / `subscription` and any `--variable key=@` host-file load. | +| `issue.create` | write (gated) | `linear issue create` | | +| `issue.update` | write (gated) | `linear issue update` | status/title/etc. | +| `issue.comment`| write (gated) | `linear issue comment add` | `@schpet/linear-cli` v2 uses `add` (not `create`). | + +### The auth-token exclusion (key security invariant) + +`linear auth token` PRINTS the raw API token to stdout. It is **never** on the allowlist: + +- The shim **hard-rejects** `linear auth token` with `'auth token' leaks the raw API key — refused. Use 'linear whoami' for identity.` (exit 2). Same hard-reject for `auth login` / `auth logout` / `auth migrate` / `auth default` — the host owns auth state. +- The connector exposes no op that maps to `auth token`. The only auth-family op is `whoami`, which maps to `linear auth whoami` (identity only). +- The relay's allowlist (the connector's `ops` map) denies any RPC whose op isn't on the list, so even if the shim were bypassed, the relay would refuse. + +Three defenses, all in series. A box agent can't reach `linear auth token` through any of them. + +`issue delete` / `team delete` / `team create` are similarly off-list (destructive; start conservative, widen deliberately). + +### The GraphQL mutation gate (`refuseGraphqlNonQuery`) + +Linear's `api` subcommand is a raw GraphQL endpoint — one POST that serves both queries (read) and mutations (write). The `api` op is a read passthrough, so it carries `refuseCall: refuseGraphqlNonQuery`. The predicate: + +- Walks argv, **consuming the value** after value-bearing flags (`--variable`, `--variables-json`) so their payload isn't misread as a positional GraphQL source. +- For every remaining positional, strips leading whitespace + `# …` line comments and refuses if the first keyword is `mutation` or `subscription` (exit 65, `linear api: only GraphQL queries are proxied …`). +- `query …` and the anonymous `{ … }` shorthand pass; empty/flag-only argv passes (the host CLI emits its own usage error). +- `--input` / `--input=…` is refused — stdin/file bodies can't traverse the relay anyway. +- **`--variable key=@` is refused.** linear-cli's `@` syntax reads from a host file and sends the contents as a GraphQL variable, which the box could echo back through the query response — an exfiltration channel. The guard rejects every split/glued/equals shape of the flag. + +The match is case-insensitive (defensive — GraphQL is case-sensitive in spec, but the cost of guarding is zero). The parser is not a GraphQL validator; it's a write-shape detector. Writes go through the dedicated gated `issue.*` ops, never `api`. + +### Enable flag + +`integrations.linear.enabled` (typed config, default **false**) lives next to the Notion flag in `packages/config/src/types.ts`. Same layering, same disabled-default rationale. + +```bash +agentbox config set --project integrations.linear.enabled true +``` + +### env / credentials + +Linear stores plaintext credentials at `~/.config/linear/credentials.toml` (keyring is opt-in, not used), and `linear` reads that file on every host by default — so the connector declares **no** `env` block (neither connector does; `mergeConnectorEnv` would only allow `LINEAR_*` keys anyway). The `agentbox.yaml` `carry:` block ships the file into nested boxes that run their own relay. + +### Verification / live e2e results + +LT2 ran the integration against the live `waldosai` workspace from inside a real AgentBox box. Captured evidence: + +- **`linear whoami` (read)** — round-trips through the in-box shim → host relay → host `linear` v2.0.0 → Linear API; returns `Workspace: waldosai … User: Marco D'Alia … Role: admin` with no approval prompt. +- **`linear issue mine/list --team WAL` and `linear team list` (reads)** — same path; exit 0, no prompt. `team list` returns `WAL Waldosai` (the team key used for the writes). +- **`linear api '{ viewer { id name email } }'` (read)** — the GraphQL passthrough returns the viewer JSON. `refuseGraphqlNonQuery` correctly classifies the `{ … }` shorthand as a query (`anonymous`) and lets it through. +- **`linear api 'mutation { … }'` (refused)** — exits 65 with `linear api: only GraphQL queries are proxied (use issue.create / issue.update / issue.comment for writes); detected operation 'mutation'`. Refused before any host process is spawned; reproduces through both the shim path and the direct `agentbox-ctl integration linear api` path (the gate lives in the connector, not the shim). +- **`linear auth token` (refused at the shim)** — exits 2 with `'auth token' leaks the raw API key — refused. Use 'linear whoami' for identity.`. The relay's op allowlist would also refuse it (no op maps to `auth token`); the shim is the first of three defenses. +- **Gated writes — three approve→succeed→ground-truth-read cycles.** + - `linear issue create --team WAL --title "agentbox LT2 e2e …" -d "…"` created **WAL-5** (URL `linear.app/waldosai/issue/WAL-5/…`). `linear issue view WAL-5` confirms title + description + Backlog state. + - `linear issue comment add WAL-5 -b "agentbox LT2 e2e comment via host relay (gated write)"` added the comment. `linear api '{ issue(id:"WAL-5") { … comments { nodes { body } } } }'` confirms the comment body matches verbatim. + - `linear issue update WAL-5 -s "Canceled"` moved the state. Post-update `linear issue view WAL-5` shows `**State:** Canceled`. +- **No agent-side credential** — `printenv | grep -E '^LINEAR'` inside the box returns nothing. The only token-shaped env var is `AGENTBOX_RELAY_TOKEN`. The carried `~/.config/linear/credentials.toml` sits on disk for the nested-box case (see below) but is not consumed during the primary e2e — the host's own `linear` reads its own `~/.config/linear/` host-side. +- **No source changes needed.** LT1's connector + shim + gate worked unchanged against the live host CLI — no LT4-style argv drift this round. + +### Nested-box e2e — deferred, not blocking + +The nested-box scenario (a box-inside-a-box running a `linear` op through this box's relay) was time-boxed in LT2 and deferred for the same architectural reason as Notion. The in-box `agentbox-ctl` daemon forwards `/rpc` to the HOST relay (`host.docker.internal:8787`), not to a relay running in this box — so a nested box's `linear issue create` would still terminate at the host relay's spawn, not in this box's daemon. That means nested-box e2e exercises the carry mechanics (already verified — `~/.config/linear/credentials.toml` present in this box) more than the connector's spawn path. Installing the real `linear` in this box would also break the primary e2e by shadowing: npm's global prefix here is `/usr`, so `npm i -g @schpet/linear-cli` lands the real binary at `/usr/bin/linear`, but the shim at `/usr/local/bin/linear` precedes it on `$PATH` and keeps winning resolution — the in-box agent would still hit the shim, and the daemon would need a separately-shaped PATH (or an absolute `hostBin` path) to reach the real binary, which is out of scope here. A future follow-up that lifts the relay into the box's daemon would change this; tracked under "Open follow-ups" below alongside the Notion entry. + +## Open follow-ups + +- **Trello / ClickUp** — see [`integrations_backlog.md`](./integrations_backlog.md). Each is a new descriptor + a small shim; no relay change. ClickUp will be the one custom REST connector (no good CLI on PyPI / npm). +- **`comment.add`** — deferred; needs a Notion-API-aware payload translator that maps CLI flags to the structured `POST /v1/comments` body. +- **Least-privilege tokens** — Notion capability toggles for the host token; Trello supports `scope=read` (when we add it); Linear personal keys inherit full user perms (OAuth-only for read-scope tokens). Document on each service's user-facing page. +- **Host-initiated tokens** — the relay already accepts `params.hostInitiated` and validates it against `HostInitiatedTokens` (scope + params-hash bound). The host-CLI mint path that issues those tokens isn't wired yet for integrations; once it is, a host-typed `agentbox-ctl integration notion page.create …` can skip the prompt by minting a token first (same shape as the existing `gh.pr.*` and `cp.*` host-initiated paths). +- **Nested-box e2e** — deferred for both Notion (T4) and Linear (LT2) for the same architectural reason (in-box `agentbox-ctl` forwards `/rpc` to the original host relay, so a nested-box's `/rpc` terminates at the host's spawn regardless). Lifting the relay into the box's daemon would change this — tracked here, not blocking either path. The carry-based file-auth mechanics are already verified (the carried files land at the expected per-service paths). diff --git a/docs/integrations_backlog.md b/docs/integrations_backlog.md new file mode 100644 index 00000000..d19fe83d --- /dev/null +++ b/docs/integrations_backlog.md @@ -0,0 +1,337 @@ +# Plan: Ticketing & Knowledge integrations (Notion, Linear, Trello, ClickUp) + +## Context + +AgentBox boxes need to talk to ticketing/knowledge tools the same safe way they +already talk to GitHub: a box agent can **read** freely and perform a **limited, +prompted set of writes**, but **credentials never enter the box**. The host owns +the auth; the relay is the gate. + +The driving use case: a host ("master") Claude session runs a goal/loop and calls +whatever CLI it likes directly (out of scope here — the host has creds). What we +build is the **box→host plumbing** so that work delegated into boxes can read +tickets/docs and make controlled writes through the relay, with a host approval +prompt on every write — exactly the `gh`/`git` model. + +This is the reference architecture already proven by `gh`: + +- In-box **shim** (`gh-shim`) intercepts a strict subcommand subset → `agentbox-ctl gh …` +- `agentbox-ctl` POSTs `/rpc` (bearer `AGENTBOX_RELAY_TOKEN`) → host relay +- Relay **classifies read vs write**, gates writes via `askPrompt` (loopback + `/admin/prompts/answer`), then shells out to the **host's authenticated CLI** + (`runHostGh`) and ships back `{exitCode, stdout, stderr}`. +- Reads skip the prompt; tokens stay on the host; host-initiated one-time tokens + let host-typed commands skip the prompt. + +### Decisions locked with the user + +| Decision | Choice | +|---|---| +| Execution model | **Host-side via relay** — tokens never in box (like `gh`) | +| CLI provisioning | **Host dependency** — user installs + auths the real CLI on the host; `agentbox doctor` detects it; relay shells out to it | +| In-box surface | **Per-service shims** on PATH: `notion`, `linear`, `trello` (later `clickup`) | +| Notion backend | Wrap the **official `ntn`** CLI (beta, first-party) | +| Linear backend | Wrap **`schpet/linear-cli`** (758★, TypeScript, `npm i -g @schpet/linear-cli`, has `--json`) | +| Trello backend | Wrap **`mheap/trello-cli`** (336★, TypeScript, `npm i -g trello-cli`; human-text output, no `--json`) | +| ClickUp | **Deferred** to a later session — weak ecosystem (best is 29★ Go); will be a **custom host-side REST connector** in the shared package, not a wrapped CLI | +| Sequencing | Shared foundation + Notion → Linear → Trello → (ClickUp later) | + +### CLI/MCP landscape (why this shape) + +- **Notion** — official CLI `ntn` (beta) + hosted MCP. Wrap `ntn`. +- **Linear** — NO official CLI; GraphQL API + hosted MCP. Wrap `schpet/linear-cli`. +- **Trello** — NO official CLI, NO MCP; REST key+token (best read-only-scope story). Wrap `mheap/trello-cli`. +- **ClickUp** — NO official CLI; thin community tools (≤29★, Go/Rust). Build a custom REST connector when we reach it. + +We chose CLI-wrapping over the hosted MCP servers deliberately: the relay model is +process-shell-out + read/write classification, which maps cleanly onto a CLI but +not onto an OAuth MCP stream. The hosted MCPs remain the host session's own +business (out of scope). + +--- + +## Architecture: a shared `integrations` abstraction + +The four services differ only in (a) the host CLI binary, (b) which subcommands +are read vs write, (c) argument quirks. Everything else — shim → ctl → relay → +prompt → host-exec → result — is identical to `gh`. So we factor the common +machinery into one place and make each service a small **connector descriptor**. + +### New package: `@agentbox/integrations` (`packages/integrations/`) + +The single source of truth for connector descriptors, shared by the relay (host +exec + gating), the ctl (in-box command surface), and the host CLI (`doctor`, +` status`). Pure data + small helpers, no docker/network at import time +(keeps unit tests pure, per repo conventions). + +```ts +// packages/integrations/src/types.ts +export interface IntegrationOp { + name: string; // e.g. 'issue.create' + write: boolean; // true => relay gates with askPrompt + /** Map ctl argv -> host CLI argv (verbatim passthrough by default). */ + buildArgv?(args: string[]): string[]; +} + +export interface IntegrationConnector { + service: 'notion' | 'linear' | 'trello' | 'clickup'; + /** Host binary the relay execs (resolved on PATH). */ + hostBin: string; // 'ntn' | 'linear' | 'trello' | (clickup: '' => REST) + /** Doctor: how to detect presence + auth on the host. */ + detect: { versionArgs: string[]; authArgs?: string[] }; + ops: Record; + /** Default: deny. Only listed ops are proxied at all (allowlist, like gh). */ +} +``` + +One descriptor file per service: `connectors/notion.ts`, `connectors/linear.ts`, +`connectors/trello.ts` (`connectors/clickup.ts` later). A `registry.ts` exports +`getConnector(service)` and `ALL_CONNECTORS`. + +This mirrors the existing `Provider` registry pattern (`packages/core/src/provider.ts`) +the codebase already uses for docker/daytona/hetzner/vercel/e2b. + +### Shared relay machinery (generalize `gh.ts`) + +`packages/relay/src/gh.ts` already isolates `runHostGh` + the read/write op sets + +the allowlist refusal. Generalize the reusable half into +`packages/relay/src/integrations.ts`: + +- `runHostIntegration(connector, op, args, cwd)` — the `runHostGh` analogue: + `spawn(connector.hostBin, op.buildArgv(args))`, capture stdout/stderr/exit. +- `assertIntegrationReady(connector)` — the `assertGhReady` analogue: returns a + clear exit-4-style error if the host binary is missing or logged out. +- A generic `/rpc` dispatch branch: method `integration..` → + look up connector + op → if `op.write` and no valid host-initiated token → + `askPrompt(...)` (reuse `packages/relay/src/prompts.ts` verbatim) → on `y` + run `runHostIntegration`, else exit 10. Reads skip the prompt. + +This plugs into the existing dispatcher in +`packages/relay/src/server.ts` (the `POST /rpc` block, alongside the `git.*` / +`gh.*` branches) **and** the cloud path in +`packages/relay/src/host-actions.ts` (so daytona/hetzner/vercel/e2b get it for +free, per the project rule "fix across all providers"). The `HostActionQueue` / +`CloudBoxPoller` plumbing is method-agnostic — a new method prefix flows through +unchanged. + +### Shared ctl machinery (generalize `commands/gh.ts`) + +`packages/ctl/src/commands/integration.ts` builds one commander `Command` per +connector from its descriptor, each subcommand calling +`postRpcAndExit('integration..', params)` (reuse +`packages/ctl/src/relay-rpc.ts` unchanged). Registered in the ctl entrypoint next +to `ghCommand`. The box shim calls `agentbox-ctl -- `. + +### Shared shim machinery + +Each shim (`notion-shim`, `linear-shim`, `trello-shim`) is the `gh-shim` pattern: +strict subcommand/flag allowlist → `exec agentbox-ctl -- "$@"`, +reject anything else with a clear message. Because the allowlist is per service, +the shims are thin and near-identical; keep them as separate small bash files +(matching `gh-shim`/`git-shim`) rather than over-abstracting bash. + +### Host CLI surface + +- `agentbox doctor` learns to report each integration: host binary present? + authed? (drives a friendly "install `ntn` and run `ntn login`" hint). Extend + the existing doctor command (Linux-aware doctor lives per + `docs/linux-host-backlog.md`). +- No `agentbox login` of our own — auth is the tool's own (`ntn login`, + `linear auth login`, `TRELLO_API_KEY`/`TRELLO_TOKEN` env). We only **detect**. + +### Config keys (typed, per `packages/config/src/types.ts`) + +Add an `integrations` block to `EffectiveConfig` with per-service enable flags, +following the precedence model (CLI > workspace > project > global > default): + +```yaml +integrations: + notion: { enabled: true } + linear: { enabled: true } + trello: { enabled: false } +``` + +Disabled → the shim isn't installed / `agentbox-ctl` refuses the method. Default +all **off** until the host tool is detected (avoid dead shims). Update +`BUILT_IN_DEFAULTS` and the config docs. + +### Image / provisioning + +Shims are tiny bash files staged like `gh-shim`/`git-shim`: + +- Add each shim to `packages/sandbox-docker/scripts/` and to the **`contextFiles` + + `execBitFiles`** lists in `apps/cli/scripts/stage-runtime.mjs` (the canonical + Dockerfile is regenerated — editing `apps/cli/runtime/docker/Dockerfile.box` + directly is wiped; see memory note on canonical Dockerfile location). +- `Dockerfile.box` COPY block (near the existing `gh-shim`/`git-shim` COPY at + lines ~143-155) places them on PATH ahead of any real binary. +- Mirror into the hetzner install script + (`packages/sandbox-hetzner/scripts/install-box.sh`) and the vercel/e2b/daytona + runtime file lists in `stage-runtime.mjs`. +- **No host CLIs are bundled** — they are host dependencies (the user's laptop), + exactly like `gh`. The box only ever has shims. + +--- + +## Per-session breakdown + +Each row is its own session. Session 1 builds the shared spine + Notion as the +reference connector end-to-end; later sessions are mostly "add a descriptor + +shim + tests." + +### Session 1 — Shared foundation + Notion (`ntn`) +1. Create `packages/integrations/` (types, registry, `connectors/notion.ts`). + Notion ops: reads (`page.get`, `db.query`, `search`, `api GET …`); writes + (`page.create`, `page.update`/archive, `comment.add`) — gated. +2. `packages/relay/src/integrations.ts`: `runHostIntegration`, + `assertIntegrationReady`, generic dispatch helper. Wire the + `integration..` branch into `server.ts` **and** `host-actions.ts`. +3. `packages/ctl/src/commands/integration.ts` + register in ctl entrypoint. +4. `notion-shim` + stage-runtime wiring + Dockerfile COPY (+ hetzner/cloud mirrors). +5. `integrations` config block in `packages/config/src/types.ts` + defaults. +6. `agentbox doctor` integration section. +7. Tests + docs (see below). + +### Session 2 — Linear (`schpet/linear-cli`) +- `connectors/linear.ts`: hostBin `linear`. Reads (`issue list/mine/view/query`, + `team list`, `auth whoami` via the `whoami` op, query-only `api` GraphQL + passthrough); writes (`issue create`, `issue update`/status, `issue comment add` + — `@schpet/linear-cli` v2 uses `add`, not `create`) — gated. +- `linear-shim`; config flag; doctor entry. No relay/ctl core changes (descriptor + only) — this validates the abstraction. + +### Session 3 — Trello (`mheap/trello-cli`) +- `connectors/trello.ts`: hostBin `trello`. Reads (`board:get`, `list:get`, + `card:get`); writes (`card:create`, `card:move`, `comment`) — gated. +- Note: output is human-text, not `--json` — the shim/connector must not promise + JSON; agents parse text. Auth via `TRELLO_API_KEY`/`TRELLO_TOKEN` (host env), + and Trello supports a least-privilege `scope=read` token — document that. + +### Session 4 — ClickUp (custom REST connector) +- No good CLI → `connectors/clickup.ts` sets `hostBin: ''` and the relay path + uses a small host-side REST client (`fetch` against `api.clickup.com/api/v2`, + `pk_` token from host env/config) instead of `runHostIntegration`. This is the + one connector that exercises the "custom CLI/connector" branch the user + anticipated; keep it inside the same shared package so the box surface and + gating are identical to the others. + +--- + +## Critical files + +- **Reference to copy from**: `packages/relay/src/gh.ts`, + `packages/ctl/src/commands/gh.ts`, `packages/sandbox-docker/scripts/gh-shim`, + `packages/relay/src/prompts.ts`, `packages/relay/src/host-initiated.ts`. +- **New**: `packages/integrations/` (package), `packages/relay/src/integrations.ts`, + `packages/ctl/src/commands/integration.ts`, + `packages/sandbox-docker/scripts/{notion,linear,trello}-shim`. +- **Edit**: `packages/relay/src/server.ts` (`POST /rpc` dispatch), + `packages/relay/src/host-actions.ts` (cloud path), + `apps/cli/scripts/stage-runtime.mjs` (contextFiles/execBitFiles + cloud lists), + `packages/sandbox-docker/Dockerfile.box` (COPY shims), + `packages/sandbox-hetzner/scripts/install-box.sh` (mirror), + `packages/config/src/types.ts` (config block + defaults), + `apps/cli/src/commands/doctor.ts` (detection). +- **Docs (same change, per repo rule)**: a new `docs/integrations.md`; CLI + reference + a `.mdx` page under `apps/web/content/docs/`; mention in + `docs/host-relay.md` (new RPC methods) and `docs/features.md`. + +--- + +## Reused, not rebuilt + +- `askPrompt` + `PromptSubscribers` (`prompts.ts`) — the write gate, verbatim. +- `HostInitiatedTokens` (`host-initiated.ts`) — host-typed commands skip prompt. +- `postRpcAndExit` / `relay-rpc.ts` — box→relay transport, verbatim. +- `HostActionQueue` + `CloudBoxPoller` — cloud round-trip is method-agnostic. +- Provider-registry pattern (`core/src/provider.ts`) as the model for the + connector registry. +- `stage-runtime.mjs` staging + `gh-shim` COPY pattern for shim provisioning. + +--- + +## Verification (end-to-end, per session) + +Unit (pure, vitest): +- Connector registry: each op classified read/write; allowlist denies unknown ops. +- `refuse`-style guards: unknown subcommand/flag rejected with a clear message + (mirror the `gh.ts` refusal tests). +- Relay dispatch: write op with no host-initiated token → `askPrompt` called; + read op → not called; denied prompt → exit 10. + +Manual e2e (docker first, then one cloud provider — follow CLAUDE.md "Testing"): +1. Install + auth the host tool (`ntn login` / `linear auth login` / + `TRELLO_API_KEY`+`TRELLO_TOKEN`). `agentbox doctor` shows it green. +2. `node apps/cli/dist/index.js create -y -n smoke &`; `tail -f ~/.agentbox/logs/create.log`. +3. In the box (`agentbox shell smoke` or attach): run a **read** + (`linear issue list` / `notion search …`) → returns host data, **no prompt**. +4. Run a **write** (`linear issue create …`) → host approval prompt appears + (the wrapper's `/admin/prompts/stream`); approve → succeeds; deny → exit 10, + nothing created. **Verify ground truth** (the issue/card/page actually exists + or doesn't) rather than trusting exit codes — per the "verify ground truth" + project rule, and because Trello's wrapper output is human-text. +5. Confirm the box never holds a token: `printenv | grep -i token` inside the box + shows only `AGENTBOX_RELAY_TOKEN`, never a Notion/Linear/Trello credential. +6. Repeat steps 3-4 on one cloud provider (e.g. hetzner) to confirm the + `host-actions.ts` path works (the cloud poller drives the same gate). + +## Open follow-ups (note in `docs/integrations.md`, don't block) +- Least-privilege tokens: document Trello `scope=read` and Notion capability + toggles; Linear personal keys inherit full user perms (OAuth-only read scope). +- Per-op write allowlist tuning once real agent flows exist (start conservative). +- ClickUp connector trust/maintenance caveats; revisit if a stronger CLI emerges. + +--- + +## Status + +- **Notion path: COMPLETE (T1–T4 done, 2026-06-06).** Shipped the shared + `@agentbox/integrations` foundation (descriptor, registry, + `runHostIntegration`, generic `integration..` dispatch in both + `server.ts` and `host-actions.ts`); in-box `notion`/`ntn` shim across all + five providers; `integrations.notion.enabled` typed config flag with the + relay-side `refuseIfIntegrationDisabled` gate; `agentbox doctor` per- + connector reporting driven off `ALL_CONNECTORS`; public + internal docs. + T4 closed the loop with a live read e2e (`whoami` + `api v1/users/me` + + `refuseApiNonGet` for non-GET) and fixed two bugs the e2e surfaced — + `agentbox config get` nested-key resolution (`apps/cli/src/commands/config.ts`) + and the `pages` vs `page` argv mismatch (`connectors/notion.ts`). See + [`notion_backlog.md`](./notion_backlog.md) for full T4 evidence. + **Deferred / follow-ups**: `comment.add` (needs a Notion-API payload + assembler for the structured `POST /v1/comments` body — `ntn` exposes no + `comment` subcommand to wrap), host-initiated tokens for integrations + (the relay accepts them but the host-CLI mint path isn't wired yet for + the `integration.*` family), nested-box e2e (architecturally the in-box + agent's relay calls terminate at the host relay either way, so this + exercises the carry block more than the spawn-side; tracked, not + blocking). +- **Linear path: COMPLETE (LT1–LT2 done, 2026-06-07).** Validated the + Notion-built abstraction: LT1 was descriptor-only (no relay/ctl core + change), and LT2 added zero source edits — the connector + shim + gate + worked unchanged against the live `waldosai` workspace. Shipped surface: + the `linear` connector with 9 ops (`whoami`, `issue.list/mine/view/query`, + `team.list`, `api` with `refuseGraphqlNonQuery` rejecting GraphQL + mutation/subscription + `--variable key=@` host-file loads, + `issue.create/update/comment` as gated writes); `linear-shim` at + `/usr/local/bin/linear` with hard-rejects for `auth token` (raw API + key leak), `auth login/logout/migrate/default`, `issue/team delete`, + `team create`; `integrations.linear.enabled` typed config flag (default + false); `agentbox doctor` row (auto from `ALL_CONNECTORS`); cross- + provider staging (docker COPY, hetzner install-box.sh, vercel + provision.sh, e2b build-template.sh; daytona shim-less by design). + LT2 e2e captured live evidence: reads (`whoami`, `issue mine/list`, + `team list`, `api { viewer … }`) pass with no prompt; the GraphQL + mutation gate exits 65 with a clear refusal; the shim refuses `auth + token` with exit 2; **WAL-5** was created → commented → moved to + `Canceled` via three approve→succeed→ground-truth-read cycles; and + `printenv | grep -E '^LINEAR'` returns nothing inside the box. + **Deferred / follow-ups**: nested-box e2e (same architectural reason + as Notion — the in-box `agentbox-ctl` forwards `/rpc` to the original + host relay, not to a relay in this box; documented in + `docs/integrations.md`); host-initiated tokens for integrations + (same status as Notion). See [`linear_backlog.md`](./linear_backlog.md) + for full LT2 evidence. +- **Trello / ClickUp paths: NOT STARTED.** Each is a new descriptor + + small shim; no relay/ctl core changes (the abstraction was validated by + Notion and re-confirmed by Linear). ClickUp will be the one custom-REST + connector (no good CLI). diff --git a/docs/linear_backlog.md b/docs/linear_backlog.md new file mode 100644 index 00000000..88ebfb5a --- /dev/null +++ b/docs/linear_backlog.md @@ -0,0 +1,206 @@ +# Linear integration — backlog & status + +Linear is **Session 2** of the integrations plan (`docs/integrations_backlog.md`). +The shared `@agentbox/integrations` foundation already shipped with the Notion +path (T1–T4, PRs #73–#76). Linear is therefore **descriptor-driven**: one +connector file, an in-box shim, a config flag, a doctor entry, tests, docs — no +surgery to the relay/ctl core (this is exactly the case that validates the +abstraction the Notion work built). + +Backend: **`@schpet/linear-cli`** (the `linear` binary), the planned wrapper. +Installed + authed on the host against the **`waldosai`** workspace (admin) for +e2e. v2.0.0 surface (richer than the plan assumed): +`auth issue team project cycle milestone initiative label document api schema`. + +## Security notes specific to Linear (drive the allowlist) + +- **`linear auth token` PRINTS the raw API token to stdout** — it must never be + in the shim allowlist or the connector ops, or a box could exfiltrate the + credential. Same for `auth login/logout/migrate/default`. The only auth op we + proxy is `auth whoami` (identity only). +- **`issue delete` / `team delete` / `team create` exist** and are destructive — + keep them OFF the allowlist (start conservative; widen only when a real agent + flow needs them, and then as *gated* writes). +- **`linear api` is a raw GraphQL endpoint** — a single POST that serves both + queries (read) and mutations (write). The `api` op is a read passthrough, so + it needs a `refuseCall` that rejects any GraphQL **mutation/subscription** + operation (the GraphQL analogue of Notion's `refuseApiNonGet`), so the "read" + classification isn't a hole. Writes must go through the dedicated gated ops. +- Credentials live plaintext at `~/.config/linear/credentials.toml` (keyring is + opt-in, not used) → carries cleanly into a box; **no keyring env toggle + needed**, and the connector declares no `env`. Carry entries added to + `agentbox.yaml`. + +## Proposed connector surface (the implementing box refines this) + +| op | read/write | host argv | notes | +|---|---|---|---| +| `whoami` | read | `auth whoami` | identity only — **never** `auth token` | +| `issue.list` | read | `issue list` | | +| `issue.mine` | read | `issue mine` | v2-native "issues assigned to me" | +| `issue.view` | read | `issue view` | | +| `issue.query` | read | `issue query` | structured filters | +| `team.list` | read | `team list` | | +| `api` | read | `api` | `refuseCall` rejects GraphQL mutation/subscription + `--variable key=@` | +| `issue.create` | write (gated) | `issue create` | | +| `issue.update` | write (gated) | `issue update` | status/title/etc. | +| `issue.comment` | write (gated) | `issue comment add` | `@schpet/linear-cli` v2 uses `add`, not `create` | + +## Tasks + +### LT1 — Connector + shim + config + doctor + unit tests + docs — **status: done (2026-06-06)** +- `packages/integrations/src/connectors/linear.ts` (+ register in `registry.ts`; + widen the `IntegrationService` union in `types.ts` to include `'linear'`). +- `refuseGraphqlNonQuery` (or similar) for the `api` op — refuse mutation/subscription. +- `packages/sandbox-docker/scripts/linear-shim` — strict allowlist mirroring + `ntn-shim`; rejects `auth token` and everything off-list. Installed as + `/usr/local/bin/linear`. Wire into `stage-runtime.mjs` (contextFiles + + execBitFiles + all provider lists), `Dockerfile.box` COPY, and the hetzner + `install-box.sh` mirror. +- `integrations.linear.enabled` typed config flag (default false) in + `packages/config/src/types.ts` (+ defaults + the CONFIG_KEYS metadata entry). +- `agentbox doctor` already iterates `ALL_CONNECTORS` — Linear should appear for + free once registered; verify and adjust if needed. +- Unit tests (pure): registry resolves linear; ops classified read/write; + `api` refuses a mutation but allows a query; shim allowlist rejects `auth token`. +- Docs: `docs/integrations.md`, the public `.mdx` page(s), `docs/host-relay.md` + (new methods), `docs/features.md`, CLI reference — same set the Notion path + touched. +- `pnpm typecheck && pnpm test && pnpm build` green → `/simplify` → `/review high` + → PR into `add-ticketing-integrations` → fix bugbot → merge. + +### LT2 — Live e2e against Waldosai + nested-box best-effort + closeout — **status: done (2026-06-07)** +- Orchestrator prep (host): rebuild + restart relay with LT1 merged; set + `integrations.linear.enabled=true` in host project config. +- Primary e2e from inside a box: `linear whoami` (read, no prompt) → `linear + issue list` (read) → `linear api ''` (read) → `linear issue create …` + (write → host approval prompt → orchestrator approves → issue created) → + verify via read → `issue update` to mark/close the test issue → **no-token + assertion** (`printenv | grep -i linear` shows only `AGENTBOX_RELAY_TOKEN`). + Verify ground truth, never trust exit codes. +- Best-effort nested-box e2e (time-boxed): install `linear` in the box, rely on + the carried `~/.config/linear/credentials.toml`, create a nested box, enable + the flag, run a read + gated write from the nested box (this box's relay gates + it). Document the limitation if too fragile. +- Fix any bug the e2e surfaces (keep tight). +- Close out: mark the **Linear** path done in `docs/integrations_backlog.md` + with evidence; update this file's status log. +- Green → `/simplify` → `/review high` → PR → fix bugbot → merge. + +## Status log + +- 2026-06-06: Backlog created. Host `linear` (@schpet/linear-cli@2.0.0) verified + authed against `waldosai` (admin, accounts@waldos.ai). Connector surface + scouted; security notes captured (auth-token leak, destructive deletes, + GraphQL mutation gate). Linear carry entries added to `agentbox.yaml`. +- 2026-06-06: **LT1 shipped.** Descriptor-only, no relay/ctl core changes. + - Connector at `packages/integrations/src/connectors/linear.ts` with ops + `whoami` (`auth whoami`), `issue.list`/`issue.mine`/`issue.view`/`issue.query`, + `team.list`, `api` (+ `refuseGraphqlNonQuery` GraphQL mutation/subscription + gate, value-consuming flag walker, `--variable key=@` host-file-load + refusal, Unicode-whitespace + BOM-prefix bypass guard), + `issue.create`/`issue.update`/`issue.comment` (gated writes; `issue.comment` + maps to `linear issue comment add` — `@schpet/linear-cli` v2 uses `add`, + not `create`). `IntegrationService` union widened to include `'linear'`. + - Shim at `packages/sandbox-docker/scripts/linear-shim` (installed at + `/usr/local/bin/linear`, no symlink alias). Strict allowlist; hard- + rejects `auth token` (raw-API-key leak), `auth login/logout/migrate/ + default`, `issue/team delete`, `team create`. Staged across all five + providers (docker COPY, hetzner install-box.sh, vercel provision.sh, + e2b build-template.sh, daytona is shim-less by design) via + `stage-runtime.mjs` + each provider's `runtime-assets.ts`. + - Typed config flag `integrations.linear.enabled` (default `false`) added + to `UserConfig` / `EffectiveConfig` / `BUILT_IN_DEFAULTS` / + `KEY_REGISTRY` in `packages/config/src/types.ts`. + - Doctor: zero-line change — `ALL_CONNECTORS` drives `integrationsChecks`, + so the Linear row appears automatically with the right install/login + hints from the connector descriptor. + - Unit tests (pure, no docker/network): + - `packages/integrations/test/registry.test.ts` — registry resolves + `linear`, op classification, argv shapes, `refuseGraphqlNonQuery` + cases (mutation refused, query allowed, anonymous `{…}` allowed, + leading whitespace + `# comment` tolerated, `--input` refused, + case-insensitive keyword match). + - `packages/ctl/test/gh-and-shims.test.ts` — `linear-shim` allowlist + tests including the explicit `auth token` rejection and the + destructive-op refusals. + - `apps/cli/test/doctor-integrations.test.ts` — updated for + multi-connector iteration. + - `packages/relay/test/*` — updated the two existing tests that used + `linear` as the "unknown service" example (now `trello`). + - `pnpm typecheck && pnpm test && pnpm build && pnpm lint` all green. + - Docs updated in the same change: `docs/integrations.md` (design + the + GraphQL gate + auth-token exclusion notes), new public page at + `apps/web/content/docs/integrations-linear.mdx` + meta.json entry, + `apps/web/content/docs/configuration.mdx` row, `cli.mdx` doctor + pointer, `docs/host-relay.md` bullet extension, `docs/features.md` + "what works today" bullet. Live e2e against the Waldosai workspace + is LT2 — deliberately not run in LT1. +- 2026-06-07: **LT2 shipped.** Live e2e against the `waldosai` workspace, + no code changes — the LT1 surface worked unchanged. Evidence captured + from inside an AgentBox box (in-box agent → host relay → host `linear` + v2.0.0 → Linear API): + - **Reads pass with no prompt.** `linear whoami` returns + `Workspace: waldosai … User: Marco D'Alia … Role: admin`. `linear + issue mine --team WAL --sort priority` and `linear issue list --team + WAL --sort priority` both exit 0 (empty result on `unstarted`, a + valid filtered read). `linear team list` returns `WAL Waldosai` + (UUID `09ca67e1-ccd7-499b-b2fa-63220d56ce08`). `linear api '{ viewer + { id name email } }'` returns `{"data":{"viewer":{"id":"85d5fa14-…", + "name":"Marco D'Alia","email":"accounts@waldos.ai"}}}` — the + `refuseGraphqlNonQuery` predicate correctly classifies the `{ … }` + shorthand as a query and passes it. + - **GraphQL mutation refused locally.** `linear api 'mutation { + issueDelete(id: "x") { success } }'` exits 65 with `linear api: only + GraphQL queries are proxied (use issue.create / issue.update / + issue.comment for writes); detected operation 'mutation'` — refused + before any host process is spawned (verified via both the shim path + and the direct `agentbox-ctl integration linear api` path; the gate + lives in the connector, not the shim). + - **`linear auth token` refused at the shim.** Exits 2 with + `'auth token' leaks the raw API key — refused. Use 'linear whoami' + for identity.`. The relay's op allowlist would also refuse it (no op + maps to `auth token`); the shim is the first of three defenses. + - **Gated writes work end-to-end.** `linear issue create --team WAL + --title "agentbox LT2 e2e 20260607T000618Z" -d "…"` round-tripped + through the relay's `askPrompt` → orchestrator approve → host + `linear` → Linear API; created **WAL-5** + (https://linear.app/waldosai/issue/WAL-5/agentbox-lt2-e2e-20260607t000618z). + Ground-truth read via `linear issue view WAL-5` confirms title + + description + Backlog state. `linear issue comment add WAL-5 -b + "agentbox LT2 e2e comment via host relay (gated write)"` added the + comment (URL with `#comment-3e8fe4e2` fragment). Ground-truth `linear + api '{ issue(id:"WAL-5") { … comments { nodes { body } } } }'` + confirms the comment body matches. `linear issue update WAL-5 -s + "Canceled"` moved the state; the post-update `linear issue view + WAL-5` shows `**State:** Canceled` and the comment thread. Three + gated writes, three approve→succeed→ground-truth-read cycles. + - **No-token assertion.** `printenv | grep -E '^LINEAR'` returns + nothing (`(no LINEAR_* keys present)`). The only token-shaped env + var in the box is `AGENTBOX_RELAY_TOKEN`. The carried + `~/.config/linear/credentials.toml` is on disk (it's for the + nested-box scenario where THIS box would host a nested-box's relay) + but no agent process reads it during the primary e2e — the host's + own `linear` does, host-side, via its own `~/.config/linear/`. + - **Nested-box e2e — deferred, same architectural reason as Notion.** + The in-box `agentbox-ctl` daemon forwards `/rpc` to the original + host relay (`host.docker.internal:8787`), not to a relay running in + this box. So a nested box's `linear issue create` would still + terminate at the **original** host's relay spawn, not in this box's + daemon — exercising the carry mechanics, not a different connector + spawn path. Also: installing the real `linear` in this box would + shadow the shim — `npm i -g @schpet/linear-cli` lands the binary at + `/usr/bin/linear` (npm prefix here is `/usr`), but the shim at + `/usr/local/bin/linear` precedes `/usr/bin` on `$PATH` and keeps + winning resolution, so the in-box agent would still hit the shim + and the daemon would need a separately-shaped PATH (or an absolute + `hostBin` path) to reach the real binary — out of scope here. Documented in `docs/integrations.md` under "Linear → + Nested-box e2e — deferred, not blocking" mirroring the Notion + sub-section. The carry block + `mergeConnectorEnv` namespace guard + are validated by the LT1 unit tests; a real nested-box round-trip + would require lifting the relay into the box's daemon (cross-cutting + follow-up tracked under both connectors' "Nested-box e2e" notes). + - No source changes needed — LT1's connector + shim + gate worked + as-shipped against the live host CLI. The pre-merge unit tests + matched live behaviour exactly (no LT4-style `pages` vs `page` + drift). diff --git a/docs/notion_backlog.md b/docs/notion_backlog.md new file mode 100644 index 00000000..fed88bbf --- /dev/null +++ b/docs/notion_backlog.md @@ -0,0 +1,262 @@ +# Notion integration + shared foundation — backlog + +Live tracker for building the **Notion** integration and the **shared +`integrations` foundation** described in [`integrations_backlog.md`](./integrations_backlog.md). +Each task is one box → one PR into the `add-ticketing-integrations` feature +branch. Boxes work **sequentially**: each branches off the latest feature-branch +HEAD *after* the previous task merges (PRs stack cleanly, no conflicts). + +## Model recap (why this shape) + +Mirror the `gh` relay model exactly: in-box `notion` shim → `agentbox-ctl +integration notion ` → host relay → host's authenticated `ntn` CLI. Writes +gated by `askPrompt`; reads pass through; **the box never holds a Notion token**. +The host relay runs the host's `ntn` (keychain auth on macOS — plain `ntn login`). +For the internal-dev nested-box path, `ntn` creds are carried into the box as +**file-based auth** (`NOTION_KEYRING=0 ntn login` → `~/.config/notion/auth.json`). +The connector does **not** force `NOTION_KEYRING=0` (removed — see the +2026-06-07 status-log entry); the in-box nested path sets it manually. See +[`docs/development.md`](./development.md). + +Reference implementations to copy: `packages/relay/src/gh.ts`, +`packages/ctl/src/commands/gh.ts`, `packages/sandbox-docker/scripts/gh-shim`, +`packages/relay/src/prompts.ts`, `packages/relay/src/host-initiated.ts`. + +## Per-box workflow (every task) + +1. **Plan first** — enter plan mode, produce a concrete plan, get it approved. +2. **Implement** on a branch off the current feature-branch HEAD. +3. **Verify** — `pnpm typecheck`, `pnpm test`, `pnpm build`, plus the task's + own verification (unit tests + a real `agentbox-ctl integration notion …` + round-trip where applicable). Verify ground truth, not exit codes. +4. **`/review high`** then **`/simplify`** — apply findings. +5. **File a PR** into `add-ticketing-integrations` (not `main`). +6. **Fix bugbot** comments on the PR until clean. +7. **Merge**, then the orchestrator moves to the next task in a fresh box. + +## Tasks + +### T1 — Shared foundation + Notion core plumbing ✅ done +The working vertical slice: `agentbox-ctl integration notion ` round-trips +through the relay to host `ntn`, with read/write classification + write gating. +- `packages/integrations/` package: `types.ts` (IntegrationOp, IntegrationConnector), + `registry.ts` (getConnector, ALL_CONNECTORS), `connectors/notion.ts`. + - Notion ops (start minimal, allowlist-only): **read** `api` (GET passthrough, + e.g. `ntn api v1/users/me`, `ntn api v1/pages/` — POST endpoints like + `v1/search` are refused by the GET-only gate); **write** `page.create`, + `page.update` (archive/props), `comment.add` — all gated. +- `packages/relay/src/integrations.ts`: `runHostIntegration`, + `assertIntegrationReady`, generic `integration..` dispatch (reuse + `askPrompt` + `HostInitiatedTokens`). Connector forces `NOTION_KEYRING=0` env. +- Wire dispatch into **both** `packages/relay/src/server.ts` (`POST /rpc`) and + `packages/relay/src/host-actions.ts` (cloud path — "fix across all providers"). +- `packages/ctl/src/commands/integration.ts` (built from descriptors) + register + in the ctl entrypoint next to `ghCommand`. +- Unit tests: op read/write classification; allowlist denies unknown ops; + dispatch gates writes (askPrompt called) and not reads; denied → exit 10. + +### T2 — In-box `notion` shim + image provisioning + config flags ✅ done +Make a box agent able to type `notion …` or `ntn …`. +- `packages/sandbox-docker/scripts/ntn-shim` (gh-shim pattern: strict + subcommand allowlist → `agentbox-ctl integration notion -- "$@"`). + Installed on PATH as `/usr/local/bin/ntn`; `/usr/local/bin/notion` is a + symlink to it. Same shim for both invocations. +- Staged: `contextFiles` + `execBitFiles` in `apps/cli/scripts/stage-runtime.mjs` + plus the `hetznerFiles` / `vercelFiles` / `e2bFiles` lists; COPY'd in + `Dockerfile.box` next to the `gh-shim`/`git-shim` COPY; mirrored into + `packages/sandbox-hetzner/scripts/install-box.sh`, + `packages/sandbox-vercel/scripts/provision.sh`, and + `packages/sandbox-e2b/scripts/build-template.sh` (plus each provider's + `src/runtime-assets.ts` so the staged file gets uploaded). Daytona stays + shim-less (matches its T1 gh/git decision). +- Config: added `integrations.notion.enabled` (default **false**) to + `packages/config/src/types.ts` — `UserConfig`, `EffectiveConfig`, + `BUILT_IN_DEFAULTS`, and `KEY_REGISTRY`. Parser/merger/writer were taught + to walk 3-level nested keys (`branch.subbranch.leaf`) so the YAML stays + natural. Set with `agentbox config set --project integrations.notion.enabled true`. +- Gate placement: the **relay** (`refuseIfIntegrationDisabled` in + `packages/relay/src/integrations.ts`, wired into BOTH + `handleIntegrationRpc` in `server.ts` (docker) and `runIntegrationRpc` + in `host-actions.ts` (cloud — daytona/hetzner/vercel/e2b) per the + "fix across all providers" rule). One check covers every caller + (shim / `notion` alias / direct `agentbox-ctl integration` / future + host-initiated tokens) and re-reads the layered config per call so a + flag flip takes effect without bouncing the relay (same approach as + `loadAutopauseConfig`). Disabled → exit 65 with a `agentbox config set …` + hint; no host process is touched. +- Connector cleanup (minimal): the T1 `comment.add` op is **dropped**. + `ntn` exposes no top-level `comment` subcommand — the only host path + would be `ntn api v1/comments -X POST -f …`, which the T1 `api` op + refuses (GET-only). The op also had no callers (T1 just merged, no shim + yet), so a forward-only drop is cleaner than carrying dead surface + through. The shim refuses `notion comment add …` with a clear + "deferred from T2" message; comments are tracked as a focused + follow-up (will need a Notion-API-aware payload assembly that maps + flag args to the structured POST body). Added a `whoami` read op so + `ntn whoami` doesn't have to widen the `api` allowlist. + +### T3 — `agentbox doctor` detection + docs ✅ done +- `agentbox doctor` now reports each integration in a dedicated + `integrations:` group, driven off `ALL_CONNECTORS` (no hardcoded + `'notion'`) so Linear/Trello light up here automatically when they land. + Each row probes ` ` (install check) and + ` ` (login check) and surfaces install/login hints + from new optional `IntegrationConnector.detect.installHint` / + `loginHint` fields (filled for the Notion connector). The doctor + deliberately does NOT force `NOTION_KEYRING=0` — on the host the + keychain entry IS the credential, and the file-auth env override would + make a keychain-authed user falsely show as "not logged in". A new + `info` `CheckStatus` rolls up like `ok` so a disabled-but-configured + integration never pushes the overall doctor status to "warn". Unit + test (`apps/cli/test/doctor-integrations.test.ts`) stubs a fake `ntn` + on PATH and asserts the four transitions: disabled / missing / + unauthed / authed. +- Docs: + - `docs/integrations.md` — new internal design/reference doc + (descriptor model, relay dispatch flow, the read/write Notion op + surface, the enable flag, doctor wiring, the carry-based file-auth + path for nested boxes, open follow-ups). + - `apps/web/content/docs/integrations-notion.mdx` — new user-facing + Fumadocs page (prerequisites, enabling, what works in the box, + security model). Wired into `meta.json` under a new `---Services---` + section. + - `apps/web/content/docs/configuration.mdx` — new `## integrations` + section documenting `integrations.notion.enabled`. + - `apps/web/content/docs/cli.mdx` — `agentbox doctor` sentence + updated to mention the new group. + - `docs/host-relay.md` — new RPC method-family bullet for + `integration..` (parser, allowlist, enable gate, + `refuseCall`, readiness probe, host-initiated token short-circuit, + `askPrompt` for writes, the `_*` env namespace guard). + - `docs/features.md` — Notion integration bullet; the "Additional + `/rpc` methods" line updated to list `gh.pr.*` / + `integration..` already in place. + +### T4 — Live e2e verification + bug-fixes-from-e2e + closeout ✅ done +- Primary e2e (real box → host relay → live Notion API): `notion whoami` and + `notion api v1/users/me` return the host bot identity with no prompt; + `notion api v1/comments -X POST` and `notion api ... --method PATCH` are + refused with `notion api: only GET is proxied (use page.create / + page.update for writes); detected method 'POST'` (exit 65). `printenv | + grep -i notion` shows nothing in the box's process env — the agent never + holds the credential. (The carried `~/.config/notion/auth.json` is a + separate concern for nested-box use; the in-box AGENT itself sees no + token in env.) +- Two bugs surfaced and fixed in T4: + 1. **`agentbox config get` couldn't read nested 3-level keys.** The + helpers in `apps/cli/src/commands/config.ts` split on the FIRST dot + only, so `integrations.notion.enabled` resolved to + `effective.integrations["notion.enabled"]` (undefined / ``) + even when `config set` + `loadEffectiveConfig` worked correctly. + Fix: replace `leafValue`/`rawLeafFromValues` with a single `walkKey` + helper that splits on ALL dots (mirrors `readLeaf` in + `packages/config/src/load.ts`). New regression test + `apps/cli/test/config-get-nested.test.ts` covers the plain, `--json`, + `--all`, and unset/default cases; without the fix all four fail. + 2. **Connector `buildArgv` used singular `page` but real `ntn` is `pages` + (plural).** Live evidence: a `notion pages create` call through the + host relay hit approval → spawned `ntn page create` → failed with + `error: unrecognized subcommand 'page'. tip: some similar subcommands + exist: 'update', 'pages'` (exit 2). `ntn --help` confirms the surface + is `api datasources files pages login logout whoami workers`. Fix: + `connectors/notion.ts` now builds `['pages', 'create', …]` / + `['pages', 'update', …]`. Existing tests in + `packages/integrations/test/registry.test.ts` and + `packages/relay/test/integrations.test.ts` updated to assert the + correct argv. +- Live write round-trip (host relay → live `ntn pages create`) was not + re-run from this box, because the host relay was rebuilt with T3 code + (pre-fix `page` argv) before T4 started. Once T4 merges and the host + relay rebuilds with the new `pages` argv, the prompted write path will + work end-to-end. The fix is validated by (a) the live failure mode + matching the bug exactly, (b) the `pages create --help` host probe + showing the correct surface, (c) updated unit tests that pin the new + argv. +- Nested-box e2e (boxes-launch-boxes path): **deferred**. Docker-in-docker + is available but the base image isn't baked in this box, and the chain + for an in-box agent's `notion` write would still terminate at the same + HOST relay (not this box's daemon — the box daemon forwards to the host + relay at `host.docker.internal:8787`), so it wouldn't actually exercise + MY fixed code on the spawn side. The carry block in `agentbox.yaml` + already ships the file-auth into nested boxes (verified present at + `~/.config/notion/auth.json` in this box). Real nested-relay isolation + testing is tracked as a follow-up — see "Open follow-ups" in + [`integrations.md`](./integrations.md). +- `integrations_backlog.md` updated: Notion path marked complete through + T4. + +## Status log +- 2026-06-06: Backlog created; host-side carry for `ntn` file-auth added to + `agentbox.yaml`. Top-level box testing uses the host's keychain-authed `ntn`. +- 2026-06-06: T1 shipped — `@agentbox/integrations` package with Notion + descriptor, `packages/relay/src/integrations.ts` (host exec + readiness + probe), generic `integration..` dispatch wired into both + `server.ts` (docker) and `host-actions.ts` (cloud), and `agentbox-ctl + integration` command tree. PR pending. +- 2026-06-06: T2 shipped — `ntn-shim` + `notion` symlink on PATH across + docker/hetzner/vercel/e2b; `integrations.notion.enabled` (default false) + added to the typed config (with nested-key support in parser/merger/ + writer); host-side enable gate in `handleIntegrationRpc` returning exit + 65 with a config-hint when disabled; connector cleanup (dropped + `comment.add`, added `whoami` read op). Comments deferred to a focused + follow-up — they need a Notion-API-aware payload translator that maps + CLI flags to the structured `POST /v1/comments` body. +- 2026-06-06: T3 shipped — `agentbox doctor` now reports the new + `integrations:` group (registry-driven), with `info` for disabled and + install/login hints sourced from the connector descriptor. + `IntegrationConnector.detect` gained optional `installHint` / + `loginHint` fields (filled for Notion: install URL + `ntn login`). + Unit test stubs a fake `ntn` on PATH and verifies the four status + transitions. Doctor's host probe does NOT set `NOTION_KEYRING=0` (a + comment in the code records why). Public docs site + internal + reference doc landed in the same PR: new `docs/integrations.md`, new + `apps/web/content/docs/integrations-notion.mdx` (Services section in + `meta.json`), config-key + doctor sentence in the published + `configuration.mdx` / `cli.mdx`, new RPC method-family bullet in + `docs/host-relay.md`, Notion entry in `docs/features.md`. T4 (nested- + box e2e + carry-based file-auth verification) is the remaining task. +- 2026-06-06: T4 shipped — live e2e from inside a box against the real + Notion API. Reads pass through with no prompt (`notion whoami`, + `notion api v1/users/me`), `notion api` correctly refuses non-GET + methods (`-X POST` / `--method PATCH` → exit 65 with `refuseApiNonGet` + message), `printenv | grep -i notion` shows nothing in the agent's + env. Two bugs fixed: (1) `config get` couldn't read 3-level nested + keys because `apps/cli/src/commands/config.ts` split on the first dot + only — replaced with a `walkKey` helper that splits on all segments + (mirrors `readLeaf` in `packages/config/src/load.ts`); regression + test `apps/cli/test/config-get-nested.test.ts` added; (2) connector + built singular `['page', 'create', …]` argv but real `ntn` is `pages` + (plural) — confirmed live by the host relay's spawn failing with + `unrecognized subcommand 'page'`, fixed in `connectors/notion.ts`, + existing tests in `packages/integrations/test/registry.test.ts` and + `packages/relay/test/integrations.test.ts` updated. Live write + round-trip with the fix needs a host relay rebuild post-merge. + Nested-box e2e deferred — the box-daemon → host-relay chain means an + in-box agent's write still terminates at the host relay (not this + box's daemon), so it wouldn't exercise the spawn-side fix from a + nested box anyway; the carry block is verified present. +- 2026-06-06: Live-write loop closed (orchestrator, post-#76-merge). The host + relay was rebuilt + restarted with the `pages` argv fix, then a real write + was issued from inside a box: `notion pages create --parent page: + --content '# agentbox write-verify '` → host approval gate fired → + approved → a real child page was created in "Marco D'alia's Space" + (ground-truth confirmed via `ntn api v1/pages/` → `object: page`, + created by the integration bot), then archived (`in_trash:true`) to clean + up. Notes for users: the real `ntn pages create` flags are `--parent + page:` + `--content ` (no `--title`), and the shim already + injects the `--` arg separator so callers must NOT add their own (a doubled + `--` makes `ntn` reject the flags). **Notion path verified DONE end-to-end: + reads pass through, writes are gated + create real pages on approval, the + box holds no token.** +- 2026-06-07: Removed the forced `env: { NOTION_KEYRING: '0' }` from the Notion + connector. It was only ever needed for the internal-dev nested-box path, but + `mergeConnectorEnv` applies a connector's `env` on the **host** relay spawn + too — which forced the host `ntn` into file-auth mode and disagreed with both + the docs (`ntn login` → keychain) and `agentbox doctor` (probes keychain). + Result: a keychain-authed user got a green doctor but a relay that couldn't + find the token. With the env gone the relay uses `ntn`'s default (keychain on + macOS) — relay, doctor, and docs now agree. The generic `env` field + + `mergeConnectorEnv` `_*` namespace guard stay (no connector uses + them now). The nested-dev `NOTION_KEYRING=0 ntn login` requirement moved to + [`docs/development.md`](./development.md). Earlier status-log/task lines that + say "the connector forces `NOTION_KEYRING=0`" are superseded by this entry. diff --git a/packages/config/src/load.ts b/packages/config/src/load.ts index 8824a374..f4843f97 100644 --- a/packages/config/src/load.ts +++ b/packages/config/src/load.ts @@ -175,9 +175,12 @@ function readLeaf( branch: string, leaf: string, ): unknown { - const b = (obj as Record)[branch]; - if (b === undefined || b === null || typeof b !== 'object') return undefined; - return (b as Record)[leaf]; + let cur: unknown = (obj as Record)[branch]; + for (const seg of leaf.split('.')) { + if (cur === undefined || cur === null || typeof cur !== 'object') return undefined; + cur = (cur as Record)[seg]; + } + return cur; } function writeLeaf( @@ -186,7 +189,20 @@ function writeLeaf( leaf: string, value: unknown, ): void { - const b = (obj as unknown as Record>)[branch]; - if (!b) return; // BUILT_IN_DEFAULTS guarantees the branch exists - b[leaf] = value; + let cur: Record | undefined = + (obj as unknown as Record>)[branch]; + if (!cur) return; // BUILT_IN_DEFAULTS guarantees the branch exists + const segs = leaf.split('.'); + for (let i = 0; i < segs.length - 1; i++) { + const seg = segs[i]!; + const next = cur[seg]; + if (next === undefined || next === null || typeof next !== 'object') { + // BUILT_IN_DEFAULTS guarantees nested sub-objects exist for every + // registered key path, so this is unreachable in practice; defaulting + // to a fresh sub-object keeps the function total. + cur[seg] = {}; + } + cur = cur[seg] as Record; + } + cur[segs[segs.length - 1]!] = value; } diff --git a/packages/config/src/parse.ts b/packages/config/src/parse.ts index 4ad15a3b..b56a1dc6 100644 --- a/packages/config/src/parse.ts +++ b/packages/config/src/parse.ts @@ -135,23 +135,7 @@ export function parseUserConfigObject(doc: unknown, where: string): Partial = {}; - for (const [leafName, leafRaw] of Object.entries(branchRaw)) { - const desc = branchSpec.leaves.get(leafName); - if (!desc) { - const renamedTo = RENAMED_KEYS.get(`${branchName}.${leafName}`); - if (renamedTo) { - throw new UserConfigError( - `${where}.${branchName}.${leafName} was renamed to ${renamedTo} — update your config`, - ); - } - throw new UserConfigError( - `${where}.${branchName}: unknown key "${leafName}" (known: ${[...branchSpec.leaves.keys()].join(', ')})`, - ); - } - if (leafRaw === undefined) continue; - branchOut[leafName] = coerceTypedValue(leafRaw, desc, `${where}.${desc.key}`); - } + const branchOut = parseBranchObject(branchSpec, branchName, branchRaw, '', where); if (Object.keys(branchOut).length > 0) { // We've validated that each branch matches one of UserConfig's known // sub-objects; the indexed write keeps the union type happy. @@ -161,6 +145,57 @@ export function parseUserConfigObject(doc: unknown, where: string): Partial, + qualifiedPrefix: string, + where: string, +): Record { + const out: Record = {}; + for (const [name, value] of Object.entries(raw)) { + if (value === undefined) continue; + const qualified = qualifiedPrefix ? `${qualifiedPrefix}.${name}` : name; + const desc = branchSpec.leaves.get(qualified); + if (desc) { + out[name] = coerceTypedValue(value, desc, `${where}.${desc.key}`); + continue; + } + // Not a leaf — descend if it's a mapping AND a deeper leaf is registered + // beneath this path. Otherwise the key is unknown / not in the registry. + if (isPlainObject(value) && branchHasLeafBelow(branchSpec, qualified)) { + const sub = parseBranchObject(branchSpec, branchName, value, qualified, where); + if (Object.keys(sub).length > 0) out[name] = sub; + continue; + } + const renamedTo = RENAMED_KEYS.get(`${branchName}.${qualified}`); + if (renamedTo) { + throw new UserConfigError( + `${where}.${branchName}.${qualified} was renamed to ${renamedTo} — update your config`, + ); + } + throw new UserConfigError( + `${where}.${branchName}: unknown key "${qualified}" (known: ${[...branchSpec.leaves.keys()].join(', ')})`, + ); + } + return out; +} + +function branchHasLeafBelow(branchSpec: BranchSpec, prefix: string): boolean { + const needle = `${prefix}.`; + for (const leaf of branchSpec.leaves.keys()) { + if (leaf.startsWith(needle)) return true; + } + return false; +} + /** * Coerce a string (e.g. typed at the CLI by `agentbox config set`) into the * declared type for `key`. Booleans accept true/false/yes/no/1/0 (case diff --git a/packages/config/src/types.ts b/packages/config/src/types.ts index 8f51a790..b5659102 100644 --- a/packages/config/src/types.ts +++ b/packages/config/src/types.ts @@ -149,6 +149,14 @@ export interface UserConfig { pruneProjectConfigs?: boolean; pruneProjectConfigsEvery?: number; }; + integrations?: { + notion?: { + enabled?: boolean; + }; + linear?: { + enabled?: boolean; + }; + }; } /** @@ -265,6 +273,14 @@ export interface EffectiveConfig { pruneProjectConfigs: boolean; pruneProjectConfigsEvery: number; }; + integrations: { + notion: { + enabled: boolean; + }; + linear: { + enabled: boolean; + }; + }; } export type ConfigSource = 'cli' | 'workspace' | 'project' | 'global' | 'default'; @@ -402,6 +418,10 @@ export const BUILT_IN_DEFAULTS: EffectiveConfig = { pruneProjectConfigs: true, pruneProjectConfigsEvery: 50, }, + integrations: { + notion: { enabled: false }, + linear: { enabled: false }, + }, }; export type KeyType = 'bool' | 'string' | 'int' | 'enum'; @@ -851,6 +871,18 @@ export const KEY_REGISTRY: readonly KeyDescriptor[] = [ type: 'int', description: 'Run the orphan project-config sweep every N successful `agentbox create`.', }, + { + key: 'integrations.notion.enabled', + type: 'bool', + description: + 'Enable the in-box Notion integration shim (`ntn`/`notion` commands routed via the host relay). When false (default), the relay refuses dispatch with a clear "disabled" error and no host process is touched.', + }, + { + key: 'integrations.linear.enabled', + type: 'bool', + description: + 'Enable the in-box Linear integration shim (`linear` commands routed via the host relay; backed by `@schpet/linear-cli`). When false (default), the relay refuses dispatch with a clear "disabled" error and no host process is touched.', + }, ]; const REGISTRY_BY_KEY = new Map(KEY_REGISTRY.map((d) => [d.key, d])); diff --git a/packages/config/src/write.ts b/packages/config/src/write.ts index a340c4a1..25553c42 100644 --- a/packages/config/src/write.ts +++ b/packages/config/src/write.ts @@ -272,26 +272,36 @@ function stampSchema(doc: Partial): void { } function setLeaf(doc: Partial, key: string, value: unknown): void { - const idx = key.indexOf('.'); - const branch = key.slice(0, idx); - const leaf = key.slice(idx + 1); - const root = doc as unknown as Record>; - if (!root[branch] || typeof root[branch] !== 'object') { - root[branch] = {}; + const segs = key.split('.'); + let cur = doc as unknown as Record; + for (let i = 0; i < segs.length - 1; i++) { + const seg = segs[i]!; + const next = cur[seg]; + if (!next || typeof next !== 'object') { + cur[seg] = {}; + } + cur = cur[seg] as Record; } - root[branch][leaf] = value; + cur[segs[segs.length - 1]!] = value; } function unsetLeaf(doc: Partial, key: string): boolean { - const idx = key.indexOf('.'); - const branch = key.slice(0, idx); - const leaf = key.slice(idx + 1); - const root = doc as unknown as Record>; - const b = root[branch]; - if (!b || typeof b !== 'object' || !(leaf in b)) return false; - delete b[leaf]; - if (Object.keys(b).length === 0) { - delete root[branch]; + const segs = key.split('.'); + const path: Record[] = [doc as unknown as Record]; + for (let i = 0; i < segs.length - 1; i++) { + const seg = segs[i]!; + const next = path[path.length - 1]![seg]; + if (!next || typeof next !== 'object') return false; + path.push(next as Record); + } + const leafSeg = segs[segs.length - 1]!; + const leafContainer = path[path.length - 1]!; + if (!(leafSeg in leafContainer)) return false; + delete leafContainer[leafSeg]; + // Prune empty parent objects from leaf-most up so the YAML stays tidy. + for (let i = path.length - 1; i > 0; i--) { + if (Object.keys(path[i]!).length > 0) break; + delete path[i - 1]![segs[i - 1]!]; } return true; } diff --git a/packages/config/test/merge-precedence.test.ts b/packages/config/test/merge-precedence.test.ts index d34d6cea..3d84f658 100644 --- a/packages/config/test/merge-precedence.test.ts +++ b/packages/config/test/merge-precedence.test.ts @@ -92,4 +92,38 @@ describe('layered merge precedence', () => { expect(r.layers.workspace.values).toEqual({}); expect(r.effective.engine.kind).toBe('docker-desktop'); }); + + // Nested 3-level path (branch.subbranch.leaf) — the parser, merger, and + // writer all needed teaching to walk dotted leaves. Worth its own cascade + // test so a future refactor doesn't silently regress the integrations + // surface. + it('integrations.notion.enabled defaults to false', async () => { + const r = await loadEffectiveConfig(tmpCwd); + expect(r.effective.integrations.notion.enabled).toBe(false); + expect(r.sources['integrations.notion.enabled']).toBe('default'); + }); + + it('integrations.notion.enabled cascades global → project → cli', async () => { + await writeYamlAt( + GLOBAL_CONFIG_FILE, + 'integrations:\n notion:\n enabled: true\n', + ); + const fromGlobal = await loadEffectiveConfig(tmpCwd); + expect(fromGlobal.effective.integrations.notion.enabled).toBe(true); + expect(fromGlobal.sources['integrations.notion.enabled']).toBe('global'); + + await writeYamlAt( + projectConfigFile(tmpCwd), + 'integrations:\n notion:\n enabled: false\n', + ); + const fromProject = await loadEffectiveConfig(tmpCwd); + expect(fromProject.effective.integrations.notion.enabled).toBe(false); + expect(fromProject.sources['integrations.notion.enabled']).toBe('project'); + + const fromCli = await loadEffectiveConfig(tmpCwd, { + cliOverrides: { integrations: { notion: { enabled: true } } }, + }); + expect(fromCli.effective.integrations.notion.enabled).toBe(true); + expect(fromCli.sources['integrations.notion.enabled']).toBe('cli'); + }); }); diff --git a/packages/config/test/set-unset-roundtrip.test.ts b/packages/config/test/set-unset-roundtrip.test.ts index 8692eb81..cd22242d 100644 --- a/packages/config/test/set-unset-roundtrip.test.ts +++ b/packages/config/test/set-unset-roundtrip.test.ts @@ -71,4 +71,27 @@ describe('set/unset roundtrip', () => { setConfigValue('global', 'code.timeoutMs', 'banana', tmpCwd, { raw: true }), ).rejects.toThrow(); }); + + it('roundtrips a 3-level dotted key (integrations.notion.enabled)', async () => { + await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, { + raw: true, + }); + const yaml = parseYaml(await readFile(projectConfigFile(tmpCwd), 'utf8')) as Record< + string, + unknown + >; + expect(yaml['integrations']).toEqual({ notion: { enabled: true } }); + const loaded = await loadEffectiveConfig(tmpCwd); + expect(loaded.effective.integrations.notion.enabled).toBe(true); + expect(loaded.sources['integrations.notion.enabled']).toBe('project'); + + await unsetConfigValue('project', 'integrations.notion.enabled', tmpCwd); + const after = + (parseYaml(await readFile(projectConfigFile(tmpCwd), 'utf8')) as + | Record + | null) ?? {}; + // Both the deepest leaf AND the empty `notion` / `integrations` parents + // must be pruned so the YAML stays tidy. + expect(after).not.toHaveProperty('integrations'); + }); }); diff --git a/packages/ctl/package.json b/packages/ctl/package.json index 7ed8482e..40a13a5d 100644 --- a/packages/ctl/package.json +++ b/packages/ctl/package.json @@ -31,6 +31,7 @@ }, "dependencies": { "@agentbox/core": "workspace:*", + "@agentbox/integrations": "workspace:*", "@agentbox/relay": "workspace:*", "commander": "^12.1.0", "yaml": "^2.6.1" diff --git a/packages/ctl/src/bin.ts b/packages/ctl/src/bin.ts index 6da36853..eb24641e 100644 --- a/packages/ctl/src/bin.ts +++ b/packages/ctl/src/bin.ts @@ -9,6 +9,7 @@ import { downloadCommand } from './commands/download.js'; import { checkpointCommand } from './commands/checkpoint.js'; import { ghCommand } from './commands/gh.js'; import { gitCommand } from './commands/git.js'; +import { integrationCommand } from './commands/integration.js'; import { notifyCommand } from './commands/notify.js'; import { openCommand } from './commands/open.js'; import { statusCommand } from './commands/status.js'; @@ -46,6 +47,7 @@ program.addCommand(waitReadyCommand); program.addCommand(runTaskCommand); program.addCommand(gitCommand); program.addCommand(ghCommand); +program.addCommand(integrationCommand); program.addCommand(checkpointCommand); program.addCommand(cpCommand); program.addCommand(downloadCommand); diff --git a/packages/ctl/src/commands/integration.ts b/packages/ctl/src/commands/integration.ts new file mode 100644 index 00000000..7f487688 --- /dev/null +++ b/packages/ctl/src/commands/integration.ts @@ -0,0 +1,60 @@ +import { Command } from 'commander'; +import { ALL_CONNECTORS, type IntegrationConnector } from '@agentbox/integrations'; +import { postRpcAndExit } from '../relay-rpc.js'; + +interface IntegrationRpcParams { + path: string; + args?: string[]; +} + +/** + * In-box surface for the integrations foundation: one commander subtree + * per connector descriptor in `@agentbox/integrations`. Each op's action + * forwards verbatim argv to the relay (`integration..`), + * where the host-side dispatcher classifies read/write and gates writes + * via askPrompt before shelling out to the connector's host CLI. + * + * Mirrors `commands/gh.ts` exactly — descriptor-driven so a new + * connector is one file in `@agentbox/integrations` and no surgery here. + */ +export const integrationCommand = new Command('integration').description( + 'Ticketing/knowledge CLIs routed through the host relay (host runs the real CLI with host creds; box never sees a token)', +); + +for (const connector of ALL_CONNECTORS) { + integrationCommand.addCommand(buildConnectorCommand(connector)); +} + +function buildConnectorCommand(connector: IntegrationConnector): Command { + const cmd = new Command(connector.service).description( + `${connector.service} CLI operations via the host \`${connector.hostBin}\` (requires \`${connector.hostBin}\` installed and authenticated on the host)`, + ); + for (const [opName, op] of Object.entries(connector.ops)) { + const description = op.write + ? `Run \`${connector.hostBin} ${opName}\` on the host (prompted; write op).` + : `Run \`${connector.hostBin} ${opName}\` on the host (read-only; no prompt).`; + const errorPrefix = `agentbox-ctl integration ${connector.service} ${opName}`; + const method = `integration.${connector.service}.${opName}`; + cmd.addCommand( + new Command(opName) + .description(description) + .option( + '--cwd ', + 'container path identifying which registered worktree to use (default: cwd)', + ) + .allowExcessArguments(true) + .allowUnknownOption(true) + .argument( + '[args...]', + `extra args forwarded to \`${connector.hostBin} ${opName}\` verbatim`, + ) + .action(async (args: string[], opts: { cwd?: string }) => { + const params: IntegrationRpcParams = { path: opts.cwd ?? process.cwd() }; + if (args.length > 0) params.args = args; + const code = await postRpcAndExit(method, params, { errorPrefix }); + process.exit(code); + }), + ); + } + return cmd; +} diff --git a/packages/ctl/test/gh-and-shims.test.ts b/packages/ctl/test/gh-and-shims.test.ts index a9fd0c80..4feaca4e 100644 --- a/packages/ctl/test/gh-and-shims.test.ts +++ b/packages/ctl/test/gh-and-shims.test.ts @@ -8,6 +8,8 @@ import { postRpc } from '../src/relay-rpc.js'; const REPO_ROOT = join(import.meta.dirname, '..', '..', '..'); const GH_SHIM = join(REPO_ROOT, 'packages/sandbox-docker/scripts/gh-shim'); const GIT_SHIM = join(REPO_ROOT, 'packages/sandbox-docker/scripts/git-shim'); +const NTN_SHIM = join(REPO_ROOT, 'packages/sandbox-docker/scripts/ntn-shim'); +const LINEAR_SHIM = join(REPO_ROOT, 'packages/sandbox-docker/scripts/linear-shim'); interface StubShellEnv { tmpDir: string; @@ -581,3 +583,420 @@ describe('git-shim arg whitelist + passthrough', () => { } }); }); + +describe('ntn-shim subcommand allowlist', () => { + it('whoami forwards to integration notion whoami', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['whoami'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain('STUB: integration notion whoami --'); + } finally { + env.cleanup(); + } + }); + + it('api endpoint forwards to integration notion api', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['api', 'v1/users/me'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain('STUB: integration notion api -- v1/users/me'); + } finally { + env.cleanup(); + } + }); + + it('api forwards write-shaped argv intact (relay enforces GET-only)', () => { + // The shim does NOT replicate refuseApiNonGet — that's the relay's job. + // It must hand through -X POST / -f field=value so the relay sees the + // real argv and can refuse, instead of the agent thinking the call + // succeeded silently. + const env = makeStubShell(); + try { + const out = runShim( + NTN_SHIM, + ['api', 'v1/pages', '-X', 'POST', '-f', 'title=hi'], + env, + ); + expect(out.code).toBe(0); + expect(out.stdout).toContain( + 'STUB: integration notion api -- v1/pages -X POST -f title=hi', + ); + } finally { + env.cleanup(); + } + }); + + it('api with no endpoint is rejected', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['api'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/'api' requires a positional /); + } finally { + env.cleanup(); + } + }); + + it('pages create forwards to integration notion page.create', () => { + const env = makeStubShell(); + try { + const out = runShim( + NTN_SHIM, + ['pages', 'create', '--parent', 'db_id', '--title', 'hi'], + env, + ); + expect(out.code).toBe(0); + expect(out.stdout).toContain( + 'STUB: integration notion page.create -- --parent db_id --title hi', + ); + } finally { + env.cleanup(); + } + }); + + it('pages update forwards to integration notion page.update', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['pages', 'update', 'page_id', '--archive'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain( + 'STUB: integration notion page.update -- page_id --archive', + ); + } finally { + env.cleanup(); + } + }); + + it('pages list is rejected', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['pages', 'list'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/unsupported 'pages list'/); + } finally { + env.cleanup(); + } + }); + + it('pages with no subcommand is rejected', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['pages'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/missing subcommand for 'pages'/); + } finally { + env.cleanup(); + } + }); + + it('comment add is rejected with the deferred message', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['comment', 'add', '--page', 'pid'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/comment ops not supported yet/); + } finally { + env.cleanup(); + } + }); + + it.each([['login'], ['logout'], ['datasources'], ['workers'], ['files']])( + 'unsupported subcommand %s is rejected with the allowed list', + (sub) => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, [sub], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/is not proxied/); + expect(out.stderr).toMatch( + /whoami, api , pages \{create,update\}/, + ); + } finally { + env.cleanup(); + } + }, + ); + + it('--version prints the shim version line', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, ['--version'], env); + expect(out.code).toBe(0); + expect(out.stdout).toMatch(/^ntn version /); + expect(out.stdout).toContain('agentbox-shim'); + } finally { + env.cleanup(); + } + }); + + it('no args fails with the supported-subcommands hint', () => { + const env = makeStubShell(); + try { + const out = runShim(NTN_SHIM, [], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/no subcommand/); + } finally { + env.cleanup(); + } + }); +}); + +describe('linear-shim subcommand allowlist', () => { + it('whoami forwards to integration linear whoami', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['whoami'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain('STUB: integration linear whoami --'); + } finally { + env.cleanup(); + } + }); + + it('auth whoami (formal form) forwards to integration linear whoami', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['auth', 'whoami'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain('STUB: integration linear whoami --'); + } finally { + env.cleanup(); + } + }); + + it('auth token is hard-rejected with the leak warning (key security invariant)', () => { + // `linear auth token` PRINTS the raw API token to stdout. Proxying it + // through the shim would defeat the whole point: tokens must never enter + // the box. The shim's rejection is the first of three defenses (shim + + // connector allowlist + relay dispatch); this is the one that the agent + // hits first. + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['auth', 'token'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/leaks the raw API key/); + expect(out.stdout).toBe(''); + } finally { + env.cleanup(); + } + }); + + it.each([['login'], ['logout'], ['migrate'], ['default']])( + 'auth %s is rejected (host owns auth state)', + (sub) => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['auth', sub], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/is not proxied/); + expect(out.stdout).toBe(''); + } finally { + env.cleanup(); + } + }, + ); + + it('issue list / mine / view / query forward as reads', () => { + const env = makeStubShell(); + try { + expect(runShim(LINEAR_SHIM, ['issue', 'list', '--limit', '5'], env).stdout).toContain( + 'STUB: integration linear issue.list -- --limit 5', + ); + // `issue mine` is the v2-native "issues assigned to me" read — the + // older `list --me` was dropped upstream, so we route mine explicitly. + expect(runShim(LINEAR_SHIM, ['issue', 'mine'], env).stdout).toContain( + 'STUB: integration linear issue.mine --', + ); + expect(runShim(LINEAR_SHIM, ['issue', 'view', 'ABC-1'], env).stdout).toContain( + 'STUB: integration linear issue.view -- ABC-1', + ); + expect(runShim(LINEAR_SHIM, ['issue', 'query', '--team', 'ABC'], env).stdout).toContain( + 'STUB: integration linear issue.query -- --team ABC', + ); + } finally { + env.cleanup(); + } + }); + + it('issue create / update forward as gated writes', () => { + const env = makeStubShell(); + try { + expect( + runShim(LINEAR_SHIM, ['issue', 'create', '--title', 'hi'], env).stdout, + ).toContain('STUB: integration linear issue.create -- --title hi'); + expect( + runShim(LINEAR_SHIM, ['issue', 'update', 'ABC-1', '--state', 'done'], env).stdout, + ).toContain('STUB: integration linear issue.update -- ABC-1 --state done'); + } finally { + env.cleanup(); + } + }); + + it('issue comment add forwards to integration linear issue.comment', () => { + // `@schpet/linear-cli` v2 uses `comment add`, NOT `comment create`. Both + // sides (shim subcommand match + connector buildArgv) say `add`; the + // dotted wire op stays `issue.comment` for stability. + const env = makeStubShell(); + try { + const out = runShim( + LINEAR_SHIM, + ['issue', 'comment', 'add', 'ABC-1', '--body', 'hi'], + env, + ); + expect(out.code).toBe(0); + expect(out.stdout).toContain( + 'STUB: integration linear issue.comment -- ABC-1 --body hi', + ); + } finally { + env.cleanup(); + } + }); + + it('issue comment create is rejected (v2 uses `add`)', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['issue', 'comment', 'create'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/unsupported 'issue comment create'/); + } finally { + env.cleanup(); + } + }); + + it('issue comment with no subcommand is rejected', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['issue', 'comment'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/missing subcommand for 'issue comment'/); + } finally { + env.cleanup(); + } + }); + + it('issue delete is rejected (off-list, destructive)', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['issue', 'delete', 'ABC-1'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/'issue delete' is not proxied/); + } finally { + env.cleanup(); + } + }); + + it('team list forwards as a read', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['team', 'list'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain('STUB: integration linear team.list --'); + } finally { + env.cleanup(); + } + }); + + it.each([['create'], ['delete']])( + 'team %s is rejected (off-list, destructive)', + (sub) => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['team', sub, 'Foo'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/is not proxied/); + } finally { + env.cleanup(); + } + }, + ); + + it('api forwards the positional query intact (relay enforces query-only)', () => { + // The shim does NOT replicate refuseGraphqlNonQuery — that's the relay's + // job. It must hand through whatever the agent typed so the relay sees + // the real query and can refuse, instead of the agent thinking the call + // succeeded silently. + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['api', '{ teams { id } }'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain('STUB: integration linear api -- { teams { id } }'); + } finally { + env.cleanup(); + } + }); + + it('api accepts pre-positional flags (linear api --paginate "")', () => { + // `linear api` legitimately accepts --variable / --variables-json / + // --paginate / --silent BEFORE the positional query. The shim's + // "requires positional" check used to refuse any leading flag — fixed + // to just require at least one arg. + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['api', '--paginate', '{ teams { id } }'], env); + expect(out.code).toBe(0); + expect(out.stdout).toContain( + 'STUB: integration linear api -- --paginate { teams { id } }', + ); + } finally { + env.cleanup(); + } + }); + + it('api with no args at all is rejected', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['api'], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/'api' requires a positional /); + } finally { + env.cleanup(); + } + }); + + it.each([ + ['project'], + ['cycle'], + ['milestone'], + ['initiative'], + ['label'], + ['document'], + ['schema'], + ])('unsupported top-level subcommand %s is rejected with the allowed list', (sub) => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, [sub], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/is not proxied/); + expect(out.stderr).toMatch( + /whoami, issue \{list,mine,view,query,create,update,comment add\}/, + ); + } finally { + env.cleanup(); + } + }); + + it('--version prints the shim version line', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, ['--version'], env); + expect(out.code).toBe(0); + expect(out.stdout).toMatch(/^linear version /); + expect(out.stdout).toContain('agentbox-shim'); + } finally { + env.cleanup(); + } + }); + + it('no args fails with the supported-subcommands hint', () => { + const env = makeStubShell(); + try { + const out = runShim(LINEAR_SHIM, [], env); + expect(out.code).toBe(2); + expect(out.stderr).toMatch(/no subcommand/); + } finally { + env.cleanup(); + } + }); +}); diff --git a/packages/integrations/package.json b/packages/integrations/package.json new file mode 100644 index 00000000..dfb40470 --- /dev/null +++ b/packages/integrations/package.json @@ -0,0 +1,33 @@ +{ + "name": "@agentbox/integrations", + "version": "0.0.0", + "private": true, + "description": "Connector descriptors (Notion, …) for AgentBox's host-side relay-gated integrations. Pure data + helpers; consumed by @agentbox/relay (host exec + write gating) and @agentbox/ctl (in-box command surface).", + "license": "MIT", + "type": "module", + "main": "./dist/index.js", + "types": "./dist/index.d.ts", + "exports": { + ".": { + "types": "./dist/index.d.ts", + "import": "./dist/index.js" + } + }, + "files": [ + "dist" + ], + "scripts": { + "build": "tsup", + "dev": "tsup --watch", + "lint": "eslint src test", + "test": "vitest run", + "typecheck": "tsc --noEmit", + "clean": "rm -rf dist .turbo" + }, + "devDependencies": { + "@types/node": "^22.10.1", + "tsup": "^8.3.5", + "typescript": "^5.7.2", + "vitest": "^2.1.8" + } +} diff --git a/packages/integrations/src/connectors/linear.ts b/packages/integrations/src/connectors/linear.ts new file mode 100644 index 00000000..8bd8e550 --- /dev/null +++ b/packages/integrations/src/connectors/linear.ts @@ -0,0 +1,267 @@ +import type { IntegrationConnector, IntegrationOpRefusal } from '../types.js'; + +/** + * Linear connector — wraps `@schpet/linear-cli` (the `linear` binary, v2). + * + * The op allowlist is intentionally minimal (start conservative, widen as + * real agent flows surface needs). Reads cover identity/listing/lookup + * (`whoami`, `issue list/view/query`, `team list`) plus a GraphQL + * passthrough (`api`), and writes are limited to issue create/update and a + * gated comment. The `api` passthrough is query-only — + * `refuseGraphqlNonQuery` rejects any operation whose first non-whitespace + * keyword is `mutation` or `subscription`, so the GraphQL endpoint can't + * be used to slip a write past the read classification (the GraphQL + * analogue of `notion.api`'s `refuseApiNonGet`). + * + * Three subcommands are deliberately absent from the allowlist for + * security reasons: + * - `auth token` — PRINTS the raw API token to stdout; proxying it + * through the relay would expose the host credential to the box. + * The only `auth` op we expose is `auth whoami` (identity only), via + * the `whoami` op. + * - `auth login` / `auth logout` / `auth migrate` / `auth default` — + * the host owns auth; relaying these would mutate host state. + * - `issue delete` / `team delete` / `team create` — destructive and + * unnecessary for the documented agent flows. Add deliberately, as + * gated writes, only when a real flow needs them. + * + * No `env` override is needed (neither connector sets one). Linear stores + * plaintext credentials at `~/.config/linear/credentials.toml` and keychain + * mode is opt-in, not the default, so `linear` reads file-based auth on every + * host without any env shaping. The carry block in `agentbox.yaml` ships that + * file into nested boxes that run their own relay. + */ +export const linearConnector: IntegrationConnector = { + service: 'linear', + hostBin: 'linear', + detect: { + versionArgs: ['--version'], + authArgs: ['auth', 'whoami'], + installHint: 'install @schpet/linear-cli: npm i -g @schpet/linear-cli', + loginHint: 'linear auth login', + }, + ops: { + whoami: { + write: false, + buildArgv: (args) => ['auth', 'whoami', ...args], + }, + 'issue.list': { + write: false, + buildArgv: (args) => ['issue', 'list', ...args], + }, + 'issue.mine': { + // The v2-native read for "issues assigned to me" — the README directs + // users here in place of the older `issue list --me`. Listed as a + // separate op so the shim doesn't reject the canonical form. + write: false, + buildArgv: (args) => ['issue', 'mine', ...args], + }, + 'issue.view': { + write: false, + buildArgv: (args) => ['issue', 'view', ...args], + }, + 'issue.query': { + write: false, + buildArgv: (args) => ['issue', 'query', ...args], + }, + 'team.list': { + write: false, + buildArgv: (args) => ['team', 'list', ...args], + }, + api: { + write: false, + buildArgv: (args) => ['api', ...args], + refuseCall: refuseGraphqlNonQuery, + }, + 'issue.create': { + write: true, + buildArgv: (args) => ['issue', 'create', ...args], + }, + 'issue.update': { + write: true, + buildArgv: (args) => ['issue', 'update', ...args], + }, + 'issue.comment': { + // Maps to `linear issue comment add` — `@schpet/linear-cli` v2 uses + // `add` (not `create`); `add`'s sibling subcommands are `list`, + // `update`, `delete`. + write: true, + buildArgv: (args) => ['issue', 'comment', 'add', ...args], + }, + }, +}; + +/** + * Reject any `linear api` call whose GraphQL source declares a `mutation` + * or `subscription` operation. The Linear `api` op is a single POST that + * serves both reads and writes — without this guard, the "read" + * classification would be a hole the agent could slip writes through. + * + * `linear-cli`'s `api` subcommand takes the GraphQL query as a positional + * argument and accepts `--variable key=value` (repeatable; the value may + * be `@/path` to load from a host file — see below), `--variables-json + * `, `--paginate`, and `--silent`. We: + * + * 1. Refuse `--variable key=@` (and the `=` and `--variable=` + * glued forms) because they would let the box trigger arbitrary + * host-file reads — the file contents become GraphQL variables and + * can be echoed back through the response, an exfiltration channel. + * 2. Refuse `--input` for parity with `refuseApiNonGet`, even though + * `linear api` doesn't currently accept it — if a future version + * adds it, the guard pre-empts the stdin/file-body shape. + * 3. Walk argv consuming value-bearing flags (`--variable`, + * `--variables-json`) so their JSON/key=value payload isn't + * misread as an operation keyword. + * 4. For every remaining positional (non-flag) token, strip leading + * whitespace + `# …` line comments and reject the call if the + * first identifier is `mutation` or `subscription`. + * + * `query …` and the anonymous `{ … }` shorthand pass. Empty/flag-only + * argv passes (the host CLI emits its own usage error). + */ +function refuseGraphqlNonQuery(args: readonly string[]): IntegrationOpRefusal | null { + const refuse = (reason: string): IntegrationOpRefusal => ({ + exitCode: 65, + stderr: `linear api: ${reason}\n`, + }); + // `--variable` and `--variables-json` each take the next argv token as + // their value — the loop consumes them explicitly below so a JSON + // payload starting with `mutation`/`subscription` isn't misread as the + // GraphQL operation. The consume-next branches refuse to swallow the + // next token if it LOOKS like a flag (`--…`) — otherwise a malformed + // `--variable --input=/etc/passwd` would silently skip the `--input` + // refusal one iteration later. + for (let i = 0; i < args.length; i++) { + const arg = args[i] ?? ''; + if (arg === '--input' || arg.startsWith('--input=')) { + return refuse("'--input' (stdin/file body) isn't supported through the relay"); + } + // `--variable key=@/host/path` reads from a host file — refuse the + // `@`-prefixed value form regardless of split/glued/equals shape. + if (arg === '--variable') { + const next = args[i + 1] ?? ''; + if (variableValueIsFileLoad(next)) { + return refuse( + "'--variable key=@' (host-file load) isn't supported through the relay", + ); + } + // Don't consume a token that's itself a flag — it needs to run + // through its own per-flag checks (e.g. `--variable --input=/x`). + if (!next.startsWith('--')) i++; + continue; + } + if (arg.startsWith('--variable=')) { + if (variableValueIsFileLoad(arg.slice('--variable='.length))) { + return refuse( + "'--variable=key=@' (host-file load) isn't supported through the relay", + ); + } + continue; + } + if (arg === '--variables-json') { + const next = args[i + 1] ?? ''; + if (!next.startsWith('--')) i++; + continue; + } + if (arg.startsWith('--variables-json=')) { + continue; + } + // Only LONG flags (`--…`) skip the keyword check. A bare `-` or a + // single-dash token like `-mutation` is treated as a positional so + // it goes through `firstGraphqlOperationKeyword` and the + // unparseable/mutation cases fail closed. + if (arg.startsWith('--')) continue; + const op = firstGraphqlOperationKeyword(arg); + if (op === 'mutation' || op === 'subscription') { + return refuse( + `only GraphQL queries are proxied (use issue.create / issue.update / issue.comment for writes); detected operation '${op}'`, + ); + } + // `unparseable` (a positional whose first significant char isn't `{` + // or an ASCII letter) is refused too. Real queries start with `query`, + // `mutation`, `subscription`, or `{`. Anything else is a garbage + // shape that we'd rather not forward — the agent gets a clear refusal + // instead of an opaque host CLI error. + if (op === 'unparseable') { + return refuse( + `couldn't classify positional argv ${JSON.stringify(arg)} as a GraphQL operation (expected 'query', 'mutation', 'subscription', or '{')`, + ); + } + } + return null; +} + +/** + * True when a `--variable` value uses linear-cli's `@` host-file load + * syntax. The standard shape is `key=@`, but we refuse any value + * that CONTAINS `=@` or a bare leading `@` — guards against: + * - `key=@` (canonical). + * - `@` (bare, no `key=` prefix). + * - `key=name=@` where a `=` appears in the key/name portion. + * linear-cli's `--variable` parser may split on the FIRST `=` (so the + * value is `name=@`) or on the LAST `=` (so the value is + * `@`); we refuse both interpretations by treating any `=@` + * anywhere in the string as a file-load signal. + * - Future shape changes: if linear-cli adds escaping or new prefixes, + * refusing on the literal `=@` substring stays conservative. + */ +function variableValueIsFileLoad(value: string): boolean { + if (value.startsWith('@')) return true; + return value.includes('=@'); +} + +/** + * Extract the first GraphQL operation keyword from a source string after + * stripping leading whitespace and `# …` line comments. Returns the + * keyword (`query` | `mutation` | `subscription`) when one is found, + * `'anonymous'` for the `{ … }` shorthand, or `null` for an empty source. + * Only the prefix matters — the rest of the source is not validated; + * we're not a GraphQL parser, just a write-shape detector. + * + * Returns `'unparseable'` (not null) for sources whose first non-whitespace + * non-comment character isn't `{` or an ASCII letter — that way an outer + * gate can decide to fail-CLOSED on shapes it doesn't recognize (BOM, + * NBSP, stray punctuator, etc.) instead of silently passing them. The + * caller in `refuseGraphqlNonQuery` is unchanged: it only refuses on + * `mutation` / `subscription`, so `'unparseable'` still passes — but the + * sentinel is available for a future stricter mode. + * + * The whitespace test uses the JS `\s` class so Unicode whitespace + * (U+00A0 NBSP, U+2028, the BOM U+FEFF, etc.) is stripped before the + * keyword check — otherwise a `'mutation {…}'` source would + * bypass the gate because `` is not in `[ \t\n\r,]` and not an + * ASCII letter, so `j === i` and the function returned null. + */ +function firstGraphqlOperationKeyword(source: string): string | null { + let i = 0; + const n = source.length; + while (i < n) { + const c = source[i]!; + if (/\s/.test(c) || c === ',' || c === '') { + i++; + continue; + } + if (c === '#') { + while (i < n && source[i] !== '\n') i++; + continue; + } + break; + } + if (i >= n) return null; + if (source[i] === '{') return 'anonymous'; + let j = i; + while (j < n) { + const c = source[j]!; + if ((c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { + j++; + } else { + break; + } + } + // No leading ASCII letter and not `{` — the source's first significant + // character is something we can't classify (stray punctuator, smart + // quote, control char). Return a sentinel rather than null so the gate + // can choose to be paranoid in the future. + if (j === i) return 'unparseable'; + return source.slice(i, j).toLowerCase(); +} diff --git a/packages/integrations/src/connectors/notion.ts b/packages/integrations/src/connectors/notion.ts new file mode 100644 index 00000000..93b04dbc --- /dev/null +++ b/packages/integrations/src/connectors/notion.ts @@ -0,0 +1,117 @@ +import type { IntegrationConnector, IntegrationOpRefusal } from '../types.js'; + +/** + * Notion connector — wraps the official `ntn` CLI (beta, first-party). + * + * The op allowlist is intentionally minimal (start conservative, widen as + * real agent flows surface needs). Two read passthroughs (`ntn whoami` and + * `ntn api …` for GETs against the v1 REST surface) plus two gated writes. + * The `api` passthrough is GET-only — `refuseApiNonGet` parses + * `-X`/`--method`/`-f`/`-F` (and their glued forms) the same way + * `refuseGhApiCall` does, so an agent can't slip a POST/PATCH/DELETE past + * the "read" classification. + * + * Comment creation is intentionally absent: `ntn` exposes no top-level + * `comment` subcommand (the official surface is `api datasources files + * pages login logout whoami workers`), and Notion's REST POST `/v1/comments` + * takes a structured JSON body that doesn't trivially map from CLI flags. + * Adding it is tracked as a focused follow-up — see `docs/notion_backlog.md`. + * + * No `env` override: the relay runs the host's `ntn` with its own default + * auth (the macOS keychain after `ntn login`), matching what `agentbox + * doctor` probes and what the public docs tell users to do. The carry-based + * nested-box dev path (a Linux box hosting a relay) needs file-based auth + * instead; that's an internal-dev concern documented in + * `docs/development.md`, not something the connector forces on every host. + */ +export const notionConnector: IntegrationConnector = { + service: 'notion', + hostBin: 'ntn', + detect: { + versionArgs: ['--version'], + authArgs: ['api', 'v1/users/me'], + installHint: 'install ntn: https://developers.notion.com/reference/notion-cli', + loginHint: 'ntn login', + }, + ops: { + whoami: { + write: false, + buildArgv: (args) => ['whoami', ...args], + }, + api: { + write: false, + buildArgv: (args) => ['api', ...args], + refuseCall: refuseApiNonGet, + }, + 'page.create': { + write: true, + buildArgv: (args) => ['pages', 'create', ...args], + }, + 'page.update': { + write: true, + buildArgv: (args) => ['pages', 'update', ...args], + }, + }, +}; + +/** + * Reject any `ntn api` call whose argv would issue a non-GET HTTP method. + * + * `ntn api`'s flag surface mirrors `gh api`'s (Go pflag-style): an + * explicit method via `-X`/`--method` (with separate, glued, or `=`-joined + * values), or any field flag (`-f`/`-F`/`--field`/`--raw-field`) which + * implicitly switches the request to POST. We refuse all of those. + * `--input` (stdin/file body) can't traverse the relay anyway. + * + * Kept here (next to the op declaration) — not exported — because the + * test surface is "does notion.api refuse a DELETE", not the parser + * shape. If a second connector needs the same check, lift it. + */ +function refuseApiNonGet(args: readonly string[]): IntegrationOpRefusal | null { + const refuse = (reason: string): IntegrationOpRefusal => ({ + exitCode: 65, + stderr: `notion api: ${reason}\n`, + }); + let explicitMethod: string | null = null; + let hasFieldFlag = false; + for (let i = 0; i < args.length; i++) { + const arg = args[i] ?? ''; + if (arg === '-X' || arg === '--method') { + explicitMethod = args[i + 1] ?? ''; + i++; + continue; + } + if (arg.startsWith('--method=')) { + explicitMethod = arg.slice('--method='.length); + continue; + } + if (arg.startsWith('-X') && arg.length > 2) { + explicitMethod = arg.slice(2).replace(/^=/, ''); + continue; + } + if (arg === '--input' || arg.startsWith('--input=')) { + return refuse("'--input' (stdin/file body) isn't supported through the relay"); + } + // Field flags auto-POST in gh; ntn follows the same convention. Consume + // the spaced value so a method-looking token bound to the field (e.g. + // `-f -X=GET`) can't downgrade the detected method on the next loop. + if (arg === '-f' || arg === '-F' || arg === '--field' || arg === '--raw-field') { + hasFieldFlag = true; + i++; + continue; + } + if ( + arg.startsWith('-f') || + arg.startsWith('-F') || + arg.startsWith('--field=') || + arg.startsWith('--raw-field=') + ) { + hasFieldFlag = true; + } + } + const method = (explicitMethod ?? (hasFieldFlag ? 'POST' : 'GET')).toUpperCase(); + if (method === 'GET') return null; + return refuse( + `only GET is proxied (use page.create / page.update for writes); detected method '${method}'`, + ); +} diff --git a/packages/integrations/src/index.ts b/packages/integrations/src/index.ts new file mode 100644 index 00000000..ad19f66d --- /dev/null +++ b/packages/integrations/src/index.ts @@ -0,0 +1,9 @@ +export type { + IntegrationConnector, + IntegrationOp, + IntegrationOpRefusal, + IntegrationService, +} from './types.js'; +export { ALL_CONNECTORS, getConnector } from './registry.js'; +export { notionConnector } from './connectors/notion.js'; +export { linearConnector } from './connectors/linear.js'; diff --git a/packages/integrations/src/registry.ts b/packages/integrations/src/registry.ts new file mode 100644 index 00000000..2503c5df --- /dev/null +++ b/packages/integrations/src/registry.ts @@ -0,0 +1,19 @@ +import { linearConnector } from './connectors/linear.js'; +import { notionConnector } from './connectors/notion.js'; +import type { IntegrationConnector } from './types.js'; + +/** + * All integration connectors known to AgentBox. The relay's dispatcher + * walks this list to validate `integration..` calls — anything + * not present is denied. Mirrors `packages/core/src/provider.ts`'s + * registry pattern for the provider abstraction. + */ +export const ALL_CONNECTORS: readonly IntegrationConnector[] = [notionConnector, linearConnector]; + +/** Lookup by `IntegrationConnector.service`. Returns `null` for unknown. */ +export function getConnector(service: string): IntegrationConnector | null { + for (const c of ALL_CONNECTORS) { + if (c.service === service) return c; + } + return null; +} diff --git a/packages/integrations/src/types.ts b/packages/integrations/src/types.ts new file mode 100644 index 00000000..b9d42d92 --- /dev/null +++ b/packages/integrations/src/types.ts @@ -0,0 +1,76 @@ +/** + * Connector descriptor shape for the AgentBox `integrations` foundation — + * one entry per ticketing/knowledge service the host relay can proxy on + * behalf of an in-box agent. The descriptors are pure data; the relay + * (`@agentbox/relay/src/integrations.ts`) does the host-side spawn + write + * gating, and the ctl (`@agentbox/ctl/src/commands/integration.ts`) builds + * the in-box command surface from the same descriptors. + * + * The same shape mirrors `packages/relay/src/gh.ts`: an allowlist of ops + * each tagged read/write; reads pass through without prompting, writes go + * through `askPrompt` before the host CLI is invoked. Anything not on the + * allowlist is denied by the relay (mirrors `gh api`'s endpoint refusal). + */ + +export type IntegrationService = 'notion' | 'linear'; + +export interface IntegrationOp { + /** Reads bypass the host confirm prompt; writes always gate via askPrompt. */ + write: boolean; + /** + * Optional argv shaper: the ctl forwards user argv verbatim in `args`; + * `buildArgv` shapes them into the host CLI's argv (e.g. + * `['page','create', ...args]` for `ntn page create …`). When omitted, + * the args are forwarded verbatim — useful only for the rare case where + * the host CLI's command name matches the wire op exactly. + */ + buildArgv?: (args: readonly string[]) => string[]; + /** + * Optional inline pre-flight: returned non-null short-circuits the dispatch + * with the given exit/stderr — used to enforce a stricter contract than + * `write` alone, e.g. `notion.api` (a `write:false` passthrough) refuses + * any non-GET HTTP method by parsing `-X`/`--method`/`-f`/`-F` so the + * "read" classification isn't a hole. Mirrors `refuseGhApiCall` in + * `packages/relay/src/gh.ts`. + */ + refuseCall?: (args: readonly string[]) => IntegrationOpRefusal | null; +} + +/** Ready-to-send refusal returned by `IntegrationOp.refuseCall`. */ +export interface IntegrationOpRefusal { + /** Conventional CLI exit code (65 = bad usage, etc.); surfaces to the agent. */ + exitCode: number; + /** One-line `\n`-terminated reason; rendered to the agent's stderr. */ + stderr: string; +} + +export interface IntegrationConnector { + service: IntegrationService; + /** Host binary the relay execs (resolved on PATH). */ + hostBin: string; + /** + * How `agentbox doctor` detects host presence + auth. The relay's + * `assertIntegrationReady` probe only reads `versionArgs` ("binary + * present?"); `agentbox doctor` additionally runs `authArgs` ("logged + * in?") and surfaces `installHint` / `loginHint` to the user when those + * probes fail. Keeping the hint strings on the descriptor (not in the + * doctor) means each connector is self-describing — when Linear lands + * its own descriptor carries its own install URL with no doctor change. + */ + detect: { + versionArgs: readonly string[]; + authArgs?: readonly string[]; + installHint?: string; + loginHint?: string; + }; + /** + * Extra env vars the relay forces when spawning the host CLI, restricted to + * the connector's own `_*` namespace by `mergeConnectorEnv` (so a + * descriptor can't rewrite `PATH` or disable the prompt gate). No connector + * currently sets this — it's an opt-in escape hatch for a CLI that needs a + * specific env to resolve its auth. + */ + env?: Readonly>; + /** Allowlist of proxied ops; anything not listed is denied at the relay. */ + ops: Readonly>; +} diff --git a/packages/integrations/test/registry.test.ts b/packages/integrations/test/registry.test.ts new file mode 100644 index 00000000..ac2e311f --- /dev/null +++ b/packages/integrations/test/registry.test.ts @@ -0,0 +1,374 @@ +import { describe, expect, it } from 'vitest'; +import { ALL_CONNECTORS, getConnector } from '../src/registry.js'; +import { notionConnector } from '../src/connectors/notion.js'; +import { linearConnector } from '../src/connectors/linear.js'; + +describe('integration registry', () => { + it('exposes the Notion connector exactly once', () => { + expect(ALL_CONNECTORS).toContain(notionConnector); + expect(ALL_CONNECTORS.filter((c) => c.service === 'notion')).toHaveLength(1); + }); + + it('exposes the Linear connector exactly once', () => { + expect(ALL_CONNECTORS).toContain(linearConnector); + expect(ALL_CONNECTORS.filter((c) => c.service === 'linear')).toHaveLength(1); + }); + + it('looks up by service name', () => { + expect(getConnector('notion')).toBe(notionConnector); + expect(getConnector('linear')).toBe(linearConnector); + }); + + it('returns null for unknown services (allowlist)', () => { + expect(getConnector('trello')).toBeNull(); + expect(getConnector('clickup')).toBeNull(); + expect(getConnector('')).toBeNull(); + expect(getConnector('NOTION')).toBeNull(); // case-sensitive — matches wire shape + expect(getConnector('LINEAR')).toBeNull(); + }); +}); + +describe('notion connector', () => { + it('targets the official ntn binary with no forced env', () => { + expect(notionConnector.hostBin).toBe('ntn'); + // No env override: the relay runs the host `ntn` with its own default + // auth (macOS keychain), matching `agentbox doctor` and the public docs. + // The carry-based nested-box dev path is documented in development.md. + expect(notionConnector.env).toBeUndefined(); + }); + + it('classifies whoami/api as read and the page ops as write', () => { + expect(notionConnector.ops.whoami?.write).toBe(false); + expect(notionConnector.ops.api?.write).toBe(false); + expect(notionConnector.ops['page.create']?.write).toBe(true); + expect(notionConnector.ops['page.update']?.write).toBe(true); + }); + + it('shapes argv so the connector — not the call site — owns the host CLI surface', () => { + expect(notionConnector.ops.whoami?.buildArgv?.([])).toEqual(['whoami']); + expect(notionConnector.ops.api?.buildArgv?.(['v1/users/me'])).toEqual([ + 'api', + 'v1/users/me', + ]); + expect(notionConnector.ops['page.create']?.buildArgv?.(['--parent', 'db_id'])).toEqual([ + 'pages', + 'create', + '--parent', + 'db_id', + ]); + expect(notionConnector.ops['page.update']?.buildArgv?.(['page_id', '--archive'])).toEqual([ + 'pages', + 'update', + 'page_id', + '--archive', + ]); + }); + + it('has no ops beyond the conservative starter allowlist', () => { + expect(Object.keys(notionConnector.ops).sort()).toEqual( + ['api', 'page.create', 'page.update', 'whoami'].sort(), + ); + }); +}); + +describe('notion api refuseCall — keeps write:false honest', () => { + const refuse = notionConnector.ops.api!.refuseCall!; + + it('allows plain GETs (default and explicit method)', () => { + expect(refuse(['v1/users/me'])).toBeNull(); + expect(refuse(['-X', 'GET', 'v1/users/me'])).toBeNull(); + expect(refuse(['--method=GET', 'v1/users/me'])).toBeNull(); + expect(refuse(['-XGET', 'v1/users/me'])).toBeNull(); + }); + + it('refuses any non-GET method (the write surface)', () => { + for (const argv of [ + ['-X', 'POST', 'v1/pages'], + ['-X', 'DELETE', 'v1/blocks/abc'], + ['-X', 'PATCH', 'v1/pages/abc'], + ['--method=PUT', 'v1/pages'], + ['-XDELETE', 'v1/blocks/abc'], + ]) { + const r = refuse(argv); + expect(r).not.toBeNull(); + expect(r!.exitCode).toBe(65); + expect(r!.stderr).toMatch(/notion api/); + } + }); + + it('refuses implicit POST via field flags (gh-pflag style)', () => { + // -f / -F / --field / --raw-field auto-switch to POST per gh's convention. + expect(refuse(['v1/pages', '-f', 'title=hi'])?.exitCode).toBe(65); + expect(refuse(['v1/pages', '-fbody=hi'])?.exitCode).toBe(65); + expect(refuse(['v1/pages', '--field=body=hi'])?.exitCode).toBe(65); + expect(refuse(['v1/pages', '-F', 'count=5'])?.exitCode).toBe(65); + }); + + it('refuses --input (stdin/file body cannot cross the relay)', () => { + expect(refuse(['--input', '-', 'v1/pages'])?.exitCode).toBe(65); + expect(refuse(['--input=/tmp/x', 'v1/pages'])?.exitCode).toBe(65); + expect(refuse(['--input=/tmp/x'])?.stderr).toMatch(/--input/); + }); + + it("doesn't downgrade a POST when a field's value looks like -X=GET", () => { + // pflag binds `-X=GET` as `-f`'s value (so the request still POSTs); + // refuse must consume the field value and not re-read the next token + // as an explicit method. + expect(refuse(['v1/pages', '-f', '-X=GET'])?.exitCode).toBe(65); + }); +}); + +describe('linear connector', () => { + it('targets the @schpet/linear-cli `linear` binary', () => { + expect(linearConnector.hostBin).toBe('linear'); + }); + + it("declares no env override (linear uses plaintext credentials.toml)", () => { + // `linear` reads ~/.config/linear/credentials.toml by default — no env + // shaping needed. (Neither connector forces an env; the `env` field stays + // an opt-in escape hatch guarded by mergeConnectorEnv's _* check.) + expect(linearConnector.env).toBeUndefined(); + }); + + it('declares the doctor install/login hints so the doctor row is self-describing', () => { + expect(linearConnector.detect.versionArgs).toEqual(['--version']); + expect(linearConnector.detect.authArgs).toEqual(['auth', 'whoami']); + expect(linearConnector.detect.installHint).toMatch(/@schpet\/linear-cli/); + expect(linearConnector.detect.loginHint).toMatch(/linear auth login/); + }); + + it('classifies reads vs writes — auth-token-equivalent ops never reach the allowlist', () => { + const ops = linearConnector.ops; + expect(ops.whoami?.write).toBe(false); + expect(ops['issue.list']?.write).toBe(false); + expect(ops['issue.mine']?.write).toBe(false); + expect(ops['issue.view']?.write).toBe(false); + expect(ops['issue.query']?.write).toBe(false); + expect(ops['team.list']?.write).toBe(false); + expect(ops.api?.write).toBe(false); + expect(ops['issue.create']?.write).toBe(true); + expect(ops['issue.update']?.write).toBe(true); + expect(ops['issue.comment']?.write).toBe(true); + }); + + it('shapes argv so the connector — not the call site — owns the host CLI surface', () => { + const ops = linearConnector.ops; + expect(ops.whoami?.buildArgv?.([])).toEqual(['auth', 'whoami']); + expect(ops['issue.list']?.buildArgv?.(['--limit', '5'])).toEqual([ + 'issue', + 'list', + '--limit', + '5', + ]); + // `issue mine` is the v2-native "issues assigned to me" read; the older + // `issue list --me` path was dropped upstream. + expect(ops['issue.mine']?.buildArgv?.([])).toEqual(['issue', 'mine']); + expect(ops['issue.view']?.buildArgv?.(['ABC-1'])).toEqual(['issue', 'view', 'ABC-1']); + expect(ops['issue.query']?.buildArgv?.(['--team', 'ABC'])).toEqual([ + 'issue', + 'query', + '--team', + 'ABC', + ]); + expect(ops['team.list']?.buildArgv?.([])).toEqual(['team', 'list']); + expect(ops.api?.buildArgv?.(['{ teams { id } }'])).toEqual(['api', '{ teams { id } }']); + expect(ops['issue.create']?.buildArgv?.(['--title', 'hi'])).toEqual([ + 'issue', + 'create', + '--title', + 'hi', + ]); + expect(ops['issue.update']?.buildArgv?.(['ABC-1', '--state', 'done'])).toEqual([ + 'issue', + 'update', + 'ABC-1', + '--state', + 'done', + ]); + // `issue.comment` maps to `linear issue comment add` — `@schpet/linear-cli` + // v2 uses `add`, not `create`. The connector expands the dotted op into + // the three-segment host argv exactly here. + expect(ops['issue.comment']?.buildArgv?.(['ABC-1', '--body', 'hi'])).toEqual([ + 'issue', + 'comment', + 'add', + 'ABC-1', + '--body', + 'hi', + ]); + }); + + it('has exactly the conservative starter ops — no destructive deletes, no auth token', () => { + expect(Object.keys(linearConnector.ops).sort()).toEqual( + [ + 'whoami', + 'issue.list', + 'issue.mine', + 'issue.view', + 'issue.query', + 'team.list', + 'api', + 'issue.create', + 'issue.update', + 'issue.comment', + ].sort(), + ); + // Defense in depth: even if a future contributor adds an op called + // 'auth.token' or 'token', it must never be classified as a read passthrough + // — there's no good reason to expose any token-printing op to the box. + expect(linearConnector.ops['auth.token']).toBeUndefined(); + expect(linearConnector.ops['token']).toBeUndefined(); + expect(linearConnector.ops['issue.delete']).toBeUndefined(); + expect(linearConnector.ops['team.create']).toBeUndefined(); + expect(linearConnector.ops['team.delete']).toBeUndefined(); + }); +}); + +describe('linear api refuseCall — keeps write:false honest (GraphQL gate)', () => { + const refuse = linearConnector.ops.api!.refuseCall!; + + it('allows a named query', () => { + expect(refuse(['query Teams { teams { id } }'])).toBeNull(); + }); + + it('allows the anonymous { … } shorthand', () => { + expect(refuse(['{ teams { id } }'])).toBeNull(); + }); + + it('allows queries with leading whitespace and # line comments', () => { + expect(refuse([' \n# pick teams\nquery Teams { teams { id } }'])).toBeNull(); + expect(refuse(['# header comment\n{ teams { id } }'])).toBeNull(); + expect(refuse(['\t\n query Teams { teams { id } }'])).toBeNull(); + }); + + it('refuses a GraphQL mutation', () => { + const r = refuse(['mutation IssueCreate { issueCreate(input: {}) { issue { id } } }']); + expect(r).not.toBeNull(); + expect(r!.exitCode).toBe(65); + expect(r!.stderr).toMatch(/linear api/); + expect(r!.stderr).toMatch(/mutation/); + }); + + it('refuses a mutation hidden behind leading whitespace + comment', () => { + const r = refuse([ + ' # innocuous comment\n mutation IssueCreate { issueCreate(input: {}) { issue { id } } }', + ]); + expect(r).not.toBeNull(); + expect(r!.exitCode).toBe(65); + }); + + it('refuses a GraphQL subscription', () => { + const r = refuse(['subscription IssueUpdates { issueUpdates { id } }']); + expect(r).not.toBeNull(); + expect(r!.exitCode).toBe(65); + expect(r!.stderr).toMatch(/subscription/); + }); + + it('refuses --input (stdin/file body cannot cross the relay)', () => { + expect(refuse(['--input', '-'])?.exitCode).toBe(65); + expect(refuse(['--input=/tmp/x'])?.exitCode).toBe(65); + expect(refuse(['--input=/tmp/x'])?.stderr).toMatch(/--input/); + }); + + it('refuses --variable key=@ (host-file load is an exfiltration channel)', () => { + // `--variable key=@/host/path` reads the file and sends contents as a + // GraphQL variable — the box could echo the variable back through the + // query response, an exfiltration channel. + expect(refuse(['--variable', 'key=@/etc/passwd', '{ x }'])?.exitCode).toBe(65); + expect(refuse(['--variable=key=@/etc/passwd', '{ x }'])?.exitCode).toBe(65); + expect(refuse(['--variable', '@/etc/passwd', '{ x }'])?.exitCode).toBe(65); + expect(refuse(['--variable', 'key=@/etc/passwd'])?.stderr).toMatch(/host-file load/); + }); + + it('refuses --variable values with `=@` anywhere (last-vs-first-`=` split safety)', () => { + // Whether linear-cli splits the value on the FIRST `=` (giving + // `name=@/etc/passwd` as the value) or the LAST `=` (giving + // `@/etc/passwd` as the value), both interpretations point at a host + // file. The guard refuses on the `=@` substring directly so neither + // split orientation matters. + expect(refuse(['--variable', 'foo=name=@/etc/passwd', '{ x }'])?.exitCode).toBe(65); + expect(refuse(['--variable=foo=name=@/etc/passwd', '{ x }'])?.exitCode).toBe(65); + }); + + it('allows plain --variable key=value (non-@ values pass)', () => { + expect(refuse(['--variable', 'key=value', '{ x }'])).toBeNull(); + expect(refuse(['--variable=key=value', '{ x }'])).toBeNull(); + }); + + it('consumes --variable / --variables-json values so the JSON is not misread as a positional', () => { + // The JSON payload to --variables-json must NOT be classified as a + // positional GraphQL source — otherwise a perfectly benign query whose + // variables JSON starts with the literal "mutation" would be refused. + expect(refuse(['--variables-json', '"mutation"', '{ teams { id } }'])).toBeNull(); + expect(refuse(['--variables-json=mutation literal', '{ teams { id } }'])).toBeNull(); + // The --variable VALUE comes as the next token — if we didn't consume + // it, a value of "mutation" would refuse. + expect(refuse(['--variable', 'foo=value-mutation', '{ teams { id } }'])).toBeNull(); + // Order doesn't matter: flag-first still picks up the positional after + // the consumed value. + expect(refuse(['--paginate', '--variables-json', '{}', '{ teams { id } }'])).toBeNull(); + }); + + it("doesn't let --variable swallow a following flag (so --input defense survives)", () => { + // Argv `['--variable', '--input', …]` — if --variable greedily consumed + // the next token regardless of shape, the --input refusal one + // iteration later would never fire. The guard skips the consume when + // the next token starts with `--`, so --input is still inspected and + // refused. + expect(refuse(['--variable', '--input', '/etc/passwd'])?.stderr).toMatch(/--input/); + expect(refuse(['--variable', '--input=/etc/passwd'])?.stderr).toMatch(/--input/); + }); + + it('refuses a mutation prefixed by Unicode whitespace / BOM (gate must not fall open)', () => { + // The pre-fix parser used an ASCII-only whitespace set, so a source + // with a leading BOM (U+FEFF), NBSP (U+00A0), or LSEP (U+2028) + // returned null from firstGraphqlOperationKeyword and silently + // passed. linear-cli's GraphQL parser strips BOM and executes the + // mutation. The widened whitespace check (\s + BOM) closes that. + const bom = ''; + const nbsp = ' '; + const lsep = '
'; + expect(refuse([`${bom}mutation IssueCreate { x }`])?.exitCode).toBe(65); + expect(refuse([`${nbsp}mutation IssueCreate { x }`])?.exitCode).toBe(65); + expect(refuse([`${lsep}mutation IssueCreate { x }`])?.exitCode).toBe(65); + // Same Unicode-whitespace prefixes on a legitimate query/anonymous + // shape still pass. + expect(refuse([`${bom}{ teams { id } }`])).toBeNull(); + expect(refuse([`${nbsp}query Teams { teams { id } }`])).toBeNull(); + }); + + it("treats a single-dash positional like '-mutation' as a positional (not a flag)", () => { + // Pre-fix the parser skipped any arg starting with `-`, so a positional + // `'-mutation { x }'` slipped past unclassified. Narrowing the skip to + // long flags (`--`) makes the gate inspect single-dash tokens and + // refuse them as 'unparseable' shapes (their first significant char + // isn't `{` or an ASCII letter). + const r = refuse(['-mutation { x }']); + expect(r).not.toBeNull(); + expect(r!.exitCode).toBe(65); + }); + + it("refuses 'unparseable' positionals (first significant char isn't `{` or an ASCII letter)", () => { + // Defense-in-depth: an argv positional that we can't classify as a + // known GraphQL shape is refused with a clear message. Real queries + // always start with `query`/`mutation`/`subscription`/`{` after + // whitespace + line comments are stripped. + expect(refuse([':invalid'])?.exitCode).toBe(65); + expect(refuse(['"hello"'])?.exitCode).toBe(65); + expect(refuse(['/* C-style */ { x }'])?.exitCode).toBe(65); + }); + + it('is case-insensitive on the operation keyword', () => { + // GraphQL is case-sensitive in spec but defensive matching is cheap. + expect(refuse(['MUTATION IssueCreate { x }'])?.exitCode).toBe(65); + expect(refuse(['Subscription Foo { x }'])?.exitCode).toBe(65); + }); + + it('treats flag-only argv as a pass (no positional source to inspect)', () => { + // The relay still rejects missing-positional at the host CLI; the gate + // is only responsible for refusing operations it CAN see. Empty/flag- + // only argv → null (let the host CLI emit its own usage error). + expect(refuse([])).toBeNull(); + expect(refuse(['--help'])).toBeNull(); + }); +}); diff --git a/packages/integrations/tsconfig.json b/packages/integrations/tsconfig.json new file mode 100644 index 00000000..f24546c2 --- /dev/null +++ b/packages/integrations/tsconfig.json @@ -0,0 +1,7 @@ +{ + "extends": "../../tsconfig.base.json", + "compilerOptions": { + "outDir": "dist" + }, + "include": ["src/**/*", "test/**/*"] +} diff --git a/packages/integrations/tsup.config.ts b/packages/integrations/tsup.config.ts new file mode 100644 index 00000000..17d0a4d5 --- /dev/null +++ b/packages/integrations/tsup.config.ts @@ -0,0 +1,10 @@ +import { defineConfig } from 'tsup'; + +export default defineConfig({ + entry: ['src/index.ts'], + format: ['esm'], + target: 'node20', + clean: true, + dts: true, + sourcemap: true, +}); diff --git a/packages/relay/package.json b/packages/relay/package.json index 8d951c6a..bcb42270 100644 --- a/packages/relay/package.json +++ b/packages/relay/package.json @@ -31,6 +31,7 @@ "dependencies": { "@agentbox/config": "workspace:*", "@agentbox/core": "workspace:*", + "@agentbox/integrations": "workspace:*", "@agentbox/sandbox-core": "workspace:*", "commander": "^12.1.0", "execa": "^9.5.2" diff --git a/packages/relay/src/host-actions.ts b/packages/relay/src/host-actions.ts index 88ed1314..635f3e47 100644 --- a/packages/relay/src/host-actions.ts +++ b/packages/relay/src/host-actions.ts @@ -41,7 +41,17 @@ import { type GhRunRpcParams, } from './gh.js'; import { hashRpcParams, type HostInitiatedTokens } from './host-initiated.js'; +import { + assertIntegrationReady, + makeIntegrationOpRefusal, + parseIntegrationMethod, + refuseIfIntegrationDisabled, + refuseIntegrationCall, + runHostIntegration, + type IntegrationRpcParams, +} from './integrations.js'; import { askPrompt, type PendingPrompts, type PromptSubscribers } from './prompts.js'; +import { getConnector } from '@agentbox/integrations'; import type { CheckpointRpcParams, CpRpcParams, @@ -213,6 +223,9 @@ export async function executeCloudAction( if (action.method === 'gh.api') { return runGhApiRpc(action, deps); } + if (action.method.startsWith('integration.')) { + return runIntegrationRpc(action, deps); + } if (action.method === 'git.clone' || action.method === 'gh.repo.clone') { return { exitCode: 64, @@ -460,6 +473,103 @@ async function runGhApiRpc( return runHostGh(['api', endpoint, ...args], lookup.workspacePath); } +/** + * Cloud `integration..` executor. Mirrors the docker handler + * exactly — same descriptor lookup, same read/write gating, same host + * binary invocation. Reuses the gh-pr `cloudWriteConfirm` helper because + * the no-subscriber fallback (`AGENTBOX_GH_NO_SUB` env knob) covers every + * gated host action by design. + */ +async function runIntegrationRpc( + action: HostAction, + deps: CloudActionExecutorDeps, +): Promise { + const parsed = parseIntegrationMethod(action.method); + if (!parsed) { + return { + exitCode: 64, + stdout: '', + stderr: `unknown integration method shape: ${action.method}\n`, + }; + } + const connector = getConnector(parsed.service); + if (!connector) { + return { + exitCode: 64, + stdout: '', + stderr: `unknown integration service: ${parsed.service}\n`, + }; + } + const opDesc = connector.ops[parsed.op]; + if (!opDesc) { + return makeIntegrationOpRefusal( + parsed.service, + parsed.op, + connector.hostBin, + Object.keys(connector.ops), + ); + } + const params = (action.params ?? {}) as IntegrationRpcParams; + const args = Array.isArray(params.args) + ? params.args.filter((a): a is string => typeof a === 'string') + : []; + + const callRefusal = refuseIntegrationCall(opDesc, args); + if (callRefusal) return callRefusal; + + // Cloud boxes don't register worktrees the same way docker boxes do; the + // closest analogue is `lookupCloudBox`'s `workspacePath` (the host-side + // path the cloud provider records as the box's project root). Use it to + // read the layered config and fire the enablement gate — same envelope + // shape the docker handler returns. Placed after `refuseIntegrationCall` + // so the structural / op-level checks (which don't need the box record) + // still short-circuit cleanly on a malformed registry; the gate runs + // before `assertIntegrationReady`, the prompt, and the host spawn so a + // disabled integration is never user-visible as a permission prompt. + const lookup = await lookupCloudBox(deps.boxId); + const enableRefusal = await refuseIfIntegrationDisabled( + parsed.service, + lookup.workspacePath, + ); + if (enableRefusal) return enableRefusal; + + const ready = await assertIntegrationReady(connector); + if (ready) return ready; + + if (opDesc.write) { + const tokenClaimed = typeof params.hostInitiated === 'string'; + const incomingHash = hashRpcParams(params); + const tokenOk = + tokenClaimed && + (deps.hostInitiatedTokens?.consume( + params.hostInitiated, + deps.boxId, + action.method, + incomingHash, + ) ?? false); + if (tokenClaimed && !tokenOk) { + return { + exitCode: 10, + stdout: '', + stderr: + 'host-initiated token rejected: invalid, expired, or bound to different params\n', + }; + } + if (!tokenOk) { + const denied = await cloudWriteConfirm( + deps, + `integration ${parsed.service} ${parsed.op}`, + params.path, + args, + ); + if (denied) return denied; + } + } + + return runHostIntegration(connector, opDesc, args, lookup.workspacePath); +} + + /** * Mirror an in-box `browser.open` notification on the host. The action runs * detached from the box's `/rpc` (the in-box handler responded 200 long diff --git a/packages/relay/src/index.ts b/packages/relay/src/index.ts index 8121c2c7..48e3ce7b 100644 --- a/packages/relay/src/index.ts +++ b/packages/relay/src/index.ts @@ -47,6 +47,17 @@ export { } from './prompts.js'; export { BoxNotices } from './notices.js'; export { hashRpcParams, HostInitiatedTokens } from './host-initiated.js'; +export { + _resetIntegrationReadyCacheForTests, + assertIntegrationReady, + makeIntegrationOpRefusal, + parseIntegrationMethod, + refuseIfIntegrationDisabled, + refuseIntegrationCall, + runHostIntegration, + type IntegrationRpcParams, + type ParsedIntegrationMethod, +} from './integrations.js'; export { assertGhReady, checkoutGuards, diff --git a/packages/relay/src/integrations.ts b/packages/relay/src/integrations.ts new file mode 100644 index 00000000..0c34626b --- /dev/null +++ b/packages/relay/src/integrations.ts @@ -0,0 +1,315 @@ +/** + * Generic host-side machinery for the `integration..` RPCs — + * the relay-side spine that turns a descriptor in `@agentbox/integrations` + * into a host-CLI invocation with read/write classification and write + * gating. Companion to `gh.ts`: same spawn/probe/cache shape, but driven + * by a descriptor (so each service is one small file in + * `@agentbox/integrations/connectors/`, not a new pair of files here). + * + * Lives in its own file so both `server.ts` (docker `POST /rpc`) and + * `host-actions.ts` (cloud path) share the same helpers — same cycle- + * avoidance reasoning as `gh.ts`. + */ + +import { spawn } from 'node:child_process'; +import type { IntegrationConnector, IntegrationOp } from '@agentbox/integrations'; +import { loadEffectiveConfig } from '@agentbox/config'; +import type { GitRpcResult } from './types.js'; + +/** Wire params for every `integration..` method. Mirrors GhPrRpcParams. */ +export interface IntegrationRpcParams { + /** Container path the ctl ran in; used to pick the registered worktree. */ + path?: string; + /** Pass-through argv forwarded to the host CLI (after `op.buildArgv`). */ + args?: string[]; + /** + * One-time token minted by the host CLI via `/admin/host-initiated/mint` + * before invoking `agentbox-ctl integration `. Validated against + * the relay's in-memory store, scoped to `(boxId, method=integration..)` + * and the params-hash; consumed on match and the confirm prompt is + * skipped. Boxes cannot mint tokens (admin endpoint is loopback-only). + * Reserved for T1's host-CLI surface (T3+) — agent-initiated ctl calls + * never pass it; the `askPrompt` gate applies. + */ + hostInitiated?: string; +} + +const INTEGRATION_RPC_TIMEOUT_MS = 120_000; +const INTEGRATION_READY_CACHE_TTL_MS = 60_000; + +/** + * `integration..` wire shape: + * - service: lowercase ASCII, matches IntegrationConnector.service. + * - op: lowercase ASCII + digits + dots; first char a letter + * (excludes leading `.` shapes like `integration.notion..api`). + * + * Dots are allowed in the op portion so descriptor ops can use a + * dotted-namespace form (e.g. `page.create`) without colliding with the + * `integration..` delimiter — the parser splits on the FIRST two + * dots and keeps everything after as the op (so e.g. + * `integration.notion.page.create` parses to `{service:'notion', op:'page.create'}`). + */ +const INTEGRATION_METHOD_RE = /^integration\.([a-z][a-z0-9]*)\.([a-z][a-z0-9.]*)$/; + +export interface ParsedIntegrationMethod { + service: string; + op: string; +} + +/** Parse `integration..`; returns null on shape miss. */ +export function parseIntegrationMethod(method: string): ParsedIntegrationMethod | null { + const m = INTEGRATION_METHOD_RE.exec(method); + if (!m) return null; + const service = m[1]!; + const op = m[2]!; + // Disallow a trailing dot (`integration.notion.api.`) or consecutive dots + // (`integration.notion.page..create`) — the regex's `[a-z0-9.]*` is + // permissive on purpose; we reject the degenerate shapes here. + if (op.endsWith('.') || op.includes('..')) return null; + return { service, op }; +} + +interface IntegrationReadyCacheEntry { + /** null on success; ready-to-send error envelope when the binary isn't usable. */ + result: GitRpcResult | null; + expiresAt: number; +} +const integrationReadyCache = new Map(); + +/** + * Returns `null` when the host has the connector's binary on PATH; + * otherwise a ready-to-send `{ exitCode, stdout, stderr }` envelope + * describing what's missing. Cached per `connector.hostBin` for ~60s so a + * burst of integration ops doesn't reprobe on every call (same TTL as + * `assertGhReady`). + * + * - binary missing → exit 127 (matches Bash's "command not found"). + * - binary present but `--version` non-zero → propagate that exit. + * + * Auth-status is intentionally NOT probed here — `ntn` exits non-zero with + * a clear "not logged in" message on every call when unauthed, which + * surfaces directly through the relay's stdout/stderr passthrough. A + * dedicated `auth` probe is the `agentbox doctor` flow (T3), not the + * per-call hot path. + */ +export async function assertIntegrationReady( + connector: IntegrationConnector, +): Promise { + const now = Date.now(); + const cached = integrationReadyCache.get(connector.hostBin); + if (cached && cached.expiresAt > now) return cached.result; + const result = await probeIntegration(connector); + integrationReadyCache.set(connector.hostBin, { + result, + expiresAt: now + INTEGRATION_READY_CACHE_TTL_MS, + }); + return result; +} + +/** Test-only: clear the readiness cache between cases. */ +export function _resetIntegrationReadyCacheForTests(): void { + integrationReadyCache.clear(); +} + +async function probeIntegration( + connector: IntegrationConnector, +): Promise { + const version = await runHostBinary( + connector, + [...connector.detect.versionArgs], + process.cwd(), + 10_000, + ); + if (version.exitCode === 127 || /ENOENT/.test(version.stderr)) { + return { + exitCode: 127, + stdout: '', + stderr: `${connector.hostBin} not installed on host (install the ${connector.service} CLI on the host)\n`, + }; + } + if (version.exitCode !== 0) { + return { + exitCode: version.exitCode, + stdout: '', + stderr: + `${connector.hostBin} ${connector.detect.versionArgs.join(' ')} failed: ` + + (version.stderr || version.stdout).trimEnd() + + '\n', + }; + } + return null; +} + +/** + * Spawn the connector's host binary with the given op + user args inside + * `cwd`. Returns the standard `{ exitCode, stdout, stderr }` envelope. + * `op.buildArgv` (when supplied) shapes the host CLI's subcommand path; + * absent, the user args are forwarded verbatim. A connector-declared + * `_*` env override (when present — none set today) is merged onto + * `process.env` via `mergeConnectorEnv`; a descriptor that tries to set an + * env var outside its `_*` namespace yields a typed exit-78 envelope + * (sysexits EX_CONFIG) rather than throwing, so the docker /rpc and + * cloud paths both surface the misconfiguration as a normal envelope. + * + * Self-contained (no import dependency on the rest of the relay), same + * cycle-avoidance reasoning as `runHostGh` in `gh.ts`. + */ +export function runHostIntegration( + connector: IntegrationConnector, + op: IntegrationOp, + args: readonly string[], + cwd: string, + timeoutMs: number = INTEGRATION_RPC_TIMEOUT_MS, +): Promise { + const argv = op.buildArgv ? op.buildArgv(args) : [...args]; + return runHostBinary(connector, argv, cwd, timeoutMs); +} + +/** + * Merge the relay's `process.env` with the connector's declared overrides, + * but only let the connector set env vars whose names are in its + * `_…` namespace (or other deliberately-shared names) — never + * relay-controlled prefixes like `AGENTBOX_*`, `PATH`, `HOME`, etc. A + * careless future descriptor cannot disable the relay's prompt gate or + * rewrite PATH by setting `env: { AGENTBOX_PROMPT: 'off' }`. + */ +function mergeConnectorEnv(connector: IntegrationConnector): NodeJS.ProcessEnv { + if (!connector.env) return process.env; + const allowedPrefix = `${connector.service.toUpperCase()}_`; + const env: NodeJS.ProcessEnv = { ...process.env }; + for (const [key, value] of Object.entries(connector.env)) { + if (!key.startsWith(allowedPrefix)) { + throw new Error( + `integration ${connector.service}: env key '${key}' not in '${allowedPrefix}*' namespace; descriptor cannot set it`, + ); + } + env[key] = value; + } + return env; +} + +function runHostBinary( + connector: IntegrationConnector, + argv: readonly string[], + cwd: string, + timeoutMs: number, +): Promise { + let env: NodeJS.ProcessEnv; + try { + env = mergeConnectorEnv(connector); + } catch (err) { + // Bad descriptor — return a typed envelope so the in-box ctl prints + // the actual cause instead of an opaque relay "internal error" 500. + return Promise.resolve({ + exitCode: 78, + stdout: '', + stderr: `${connector.hostBin}: ${err instanceof Error ? err.message : String(err)}\n`, + }); + } + return new Promise((resolve) => { + const child = spawn(connector.hostBin, [...argv], { + cwd, + env, + stdio: ['ignore', 'pipe', 'pipe'], + }); + let stdout = ''; + let stderr = ''; + let settled = false; + const finish = (exitCode: number): void => { + if (settled) return; + settled = true; + resolve({ exitCode, stdout, stderr }); + }; + const timer = setTimeout(() => { + child.kill('SIGTERM'); + stderr += `\nrelay: ${connector.hostBin} command timed out after ${String(timeoutMs)}ms\n`; + finish(124); + }, timeoutMs); + child.stdout?.on('data', (chunk: Buffer) => { + stdout += chunk.toString('utf8'); + }); + child.stderr?.on('data', (chunk: Buffer) => { + stderr += chunk.toString('utf8'); + }); + child.on('error', (err) => { + clearTimeout(timer); + // ENOENT (binary missing) lands here too; surface as exit 127. + const code = (err as NodeJS.ErrnoException).code; + stderr += String(err.message ?? err); + finish(code === 'ENOENT' ? 127 : 1); + }); + child.on('close', (code) => { + clearTimeout(timer); + finish(code ?? -1); + }); + }); +} + +/** Ready-to-send refusal for an op not on the connector's allowlist. */ +export function makeIntegrationOpRefusal( + service: string, + op: string, + hostBin: string, + knownOps: readonly string[], +): GitRpcResult { + return { + exitCode: 65, + stdout: '', + stderr: + `integration ${service}: op '${op}' not on allowlist for ${hostBin}. ` + + `Available: ${knownOps.join(', ')}\n`, + }; +} + +/** + * Run the op's `refuseCall` pre-flight (e.g. `notion.api`'s GET-only check) + * and lift its `{exitCode, stderr}` shape into the relay's full + * `GitRpcResult`. Returns null when the call may proceed. + */ +export function refuseIntegrationCall( + op: IntegrationOp, + args: readonly string[], +): GitRpcResult | null { + const refusal = op.refuseCall?.(args); + if (!refusal) return null; + return { exitCode: refusal.exitCode, stdout: '', stderr: refusal.stderr }; +} + +/** + * Returns null when the integration is enabled for the box's project (so the + * dispatch may proceed), else a ready-to-send refusal envelope. Re-reads the + * layered config fresh on every call so toggling + * `integrations..enabled` takes effect without bouncing the relay — + * same approach `loadAutopauseConfig` uses for the autopause loop. + * + * Layered (cli/workspace/project/global/default) so a single project can opt + * in without globally enabling Notion. Defaults to disabled — every + * integration ships in the image but is inert until flipped on. + * + * Injectable `loader` keeps unit tests off-disk. + */ +export async function refuseIfIntegrationDisabled( + service: string, + cwd: string, + loader: (cwd: string) => Promise<{ + effective: { integrations?: Record }; + }> = loadEffectiveConfig, +): Promise { + let enabled = false; + try { + const cfg = await loader(cwd); + enabled = cfg.effective.integrations?.[service]?.enabled === true; + } catch { + // A malformed config file should fail closed — the box can't do anything + // useful with a half-loaded config, and the agent gets a clear message + // either way. + } + if (enabled) return null; + return { + exitCode: 65, + stdout: '', + stderr: + `${service} integration is disabled — enable with ` + + `\`agentbox config set --project integrations.${service}.enabled true\`\n`, + }; +} diff --git a/packages/relay/src/server.ts b/packages/relay/src/server.ts index 1c06a7f0..051be789 100644 --- a/packages/relay/src/server.ts +++ b/packages/relay/src/server.ts @@ -4,6 +4,7 @@ import { executeCloudAction, refreshCloudPreviewUrl } from './host-actions.js'; import { HostActionQueue } from './host-action-queue.js'; import { BoxNotices } from './notices.js'; import { hostOpenCommand } from '@agentbox/sandbox-core'; +import { getConnector } from '@agentbox/integrations'; import { assertGhReady, checkoutGuards, @@ -27,6 +28,15 @@ import { type GhRunRpcParams, } from './gh.js'; import { hashRpcParams, HostInitiatedTokens } from './host-initiated.js'; +import { + assertIntegrationReady, + makeIntegrationOpRefusal, + parseIntegrationMethod, + refuseIfIntegrationDisabled, + refuseIntegrationCall, + runHostIntegration, + type IntegrationRpcParams, +} from './integrations.js'; import { askPrompt, isPromptAnswerBody, PendingPrompts, PromptSubscribers } from './prompts.js'; import { BoxRegistry, EventBuffer } from './registry.js'; import { BoxStatusStore, isValidBoxStatus } from './status-store.js'; @@ -539,6 +549,19 @@ export function createRelayServer(opts: RelayServerOptions): RelayServerHandle { send(res, status, result); return; } + if (body.method.startsWith('integration.')) { + const result = await handleIntegrationRpc( + body.method, + reg, + body.params as IntegrationRpcParams | undefined, + prompts, + subscribers, + hostInitiatedTokens, + ); + const status = result.exitCode === 0 ? 200 : 500; + send(res, status, result); + return; + } if (body.method === 'git.clone' || body.method === 'gh.repo.clone') { // Clone bundle-ship-back machinery is deferred to a follow-up PR // (see docs/plans/gh-and-git-shims-host-only.md → Deferred follow-ups). @@ -1324,6 +1347,126 @@ async function handleGhApiRpc( return runHostGh(['api', endpoint, ...args], worktree.hostMainRepo); } +/** + * `integration..`: generic dispatch for any connector + * registered in `@agentbox/integrations`. Mirrors the `gh.pr.` flow + * (worktree resolve → `assertReady` → host-initiated token / askPrompt for + * writes → shell out). Reads bypass the prompt; writes are always gated. + * Op-level `refuseCall` (e.g. `notion.api`'s GET-only check) runs after + * worktree resolve but before any host process is touched. + * + * All failures return the same `{exitCode, stdout, stderr}` envelope as + * `handleGhPrRpc` — including unknown-method/service shapes (exit 64) — + * so the cloud and docker paths emit identical wire shapes per the + * "fix across all providers" rule. + */ +async function handleIntegrationRpc( + method: string, + reg: BoxRegistration, + params: IntegrationRpcParams | undefined, + prompts: PendingPrompts, + subscribers: PromptSubscribers, + hostInitiatedTokens: HostInitiatedTokens, +): Promise { + const parsed = parseIntegrationMethod(method); + if (!parsed) { + return { + exitCode: 64, + stdout: '', + stderr: `unknown integration method shape: ${method}\n`, + }; + } + const connector = getConnector(parsed.service); + if (!connector) { + return { + exitCode: 64, + stdout: '', + stderr: `unknown integration service: ${parsed.service}\n`, + }; + } + const opDesc = connector.ops[parsed.op]; + if (!opDesc) { + return makeIntegrationOpRefusal( + parsed.service, + parsed.op, + connector.hostBin, + Object.keys(connector.ops), + ); + } + const containerPath = params?.path ?? '/workspace'; + const worktree = resolveWorktree(reg, containerPath); + if (!worktree) { + return { + exitCode: 64, + stdout: '', + stderr: `no worktree registered for box ${reg.boxId} matching ${containerPath}`, + }; + } + const args = Array.isArray(params?.args) + ? params.args.filter((a): a is string => typeof a === 'string') + : []; + + const callRefusal = refuseIntegrationCall(opDesc, args); + if (callRefusal) return callRefusal; + + // Layered enablement gate — the in-box shim and ctl both transparently + // forward to here, so this one check covers every caller. Reads the + // worktree's project config so a single project can opt in without + // flipping it globally. Placed after `refuseIntegrationCall` so the + // ordering matches the cloud handler (`runIntegrationRpc` in + // host-actions.ts) — keeps the wire envelope identical across providers + // for the malformed-args-to-disabled-integration edge case. Runs before + // `assertIntegrationReady`, the prompt, and the host spawn so a disabled + // integration is never user-visible as a permission prompt. + const enableRefusal = await refuseIfIntegrationDisabled( + parsed.service, + worktree.hostMainRepo, + ); + if (enableRefusal) return enableRefusal; + + const ready = await assertIntegrationReady(connector); + if (ready) return ready; + + // Host-initiated calls (from a host CLI mint) skip the prompt — but only + // with a valid scope-matched, params-hash-bound one-time token. Hard + // reject a *present-but-invalid* token (attack signal). Only fall through + // to the prompt when no token was claimed. Reads never need a token. + if (opDesc.write) { + const tokenClaimed = typeof params?.hostInitiated === 'string'; + const incomingHash = hashRpcParams(params); + const tokenOk = + tokenClaimed && + hostInitiatedTokens.consume(params?.hostInitiated, reg.boxId, method, incomingHash); + if (tokenClaimed && !tokenOk) { + return { + exitCode: 10, + stdout: '', + stderr: + 'host-initiated token rejected: invalid, expired, or bound to different params\n', + }; + } + if (!tokenOk) { + const detail = args.join(' ').slice(0, 200); + const verdict = await askPrompt(prompts, subscribers, reg.boxId, { + kind: 'confirm', + message: `Allow ${parsed.service} ${parsed.op} from box ${reg.name}?`, + detail, + defaultAnswer: 'n', + context: { + command: `integration ${parsed.service} ${parsed.op}`, + cwd: containerPath, + argv: args, + }, + }); + if (verdict.answer !== 'y') { + return { exitCode: 10, stdout: '', stderr: 'denied by user\n' }; + } + } + } + + return runHostIntegration(connector, opDesc, args, worktree.hostMainRepo); +} + /** * cp.toHost / cp.fromHost: copy a file/dir between box and host. Shells * out to the installed agentbox CLI's `cp` subcommand — that command diff --git a/packages/relay/test/host-actions.test.ts b/packages/relay/test/host-actions.test.ts index ab188ce9..882dc537 100644 --- a/packages/relay/test/host-actions.test.ts +++ b/packages/relay/test/host-actions.test.ts @@ -139,4 +139,63 @@ describe('executeCloudAction routing', () => { expect(result.exitCode).toBe(65); expect(result.stderr).toMatch(/DELETE|not proxied/); }); + + // Integration.* routing: same shape parity with docker so an agent's + // misnamed call yields the same envelope on either provider. + it('integration.notion. (malformed shape) returns exit 64', async () => { + const result = await executeCloudAction(action('integration.notion.'), makeDeps()); + expect(result.exitCode).toBe(64); + expect(result.stderr).toContain('unknown integration method shape'); + }); + + it('integration.trello.api (unknown service, allowlist-default) returns exit 64', async () => { + const result = await executeCloudAction( + action('integration.trello.api', { args: ['v1/issues'] }), + makeDeps(), + ); + expect(result.exitCode).toBe(64); + expect(result.stderr).toContain('unknown integration service'); + }); + + it('integration.notion.bogus (op not on allowlist) returns exit 65', async () => { + const result = await executeCloudAction( + action('integration.notion.bogus', { args: [] }), + makeDeps(), + ); + expect(result.exitCode).toBe(65); + expect(result.stderr).toContain('not on allowlist'); + }); + + it('integration.notion.api with -X DELETE refused (read classification stays honest)', async () => { + const result = await executeCloudAction( + action('integration.notion.api', { args: ['-X', 'DELETE', 'v1/blocks/abc'] }), + makeDeps(), + ); + expect(result.exitCode).toBe(65); + expect(result.stderr).toMatch(/notion api/); + }); + + // Mirrors the docker handler's disabled-gate test. The structural / op-level + // refusals above all exit before `lookupCloudBox`, so they hit the same + // envelope on both providers without the cloud test needing a fake state + // record. This test goes one step deeper — it confirms the gate, which + // DOES read `lookupCloudBox().workspacePath`, fires for a well-formed call. + it('integration.notion.api disabled by default surfaces exit 65 on cloud too', async () => { + // No state.json is set up; lookupCloudBox throws. Wrap the call so the + // thrown error becomes a typed envelope we can assert on, mirroring how + // the real cloud poller catches lookup failures upstream. + const r = await executeCloudAction( + action('integration.notion.whoami', { args: [] }), + makeDeps(), + ).catch((err: unknown) => ({ + exitCode: -1, + stdout: '', + stderr: err instanceof Error ? err.message : String(err), + })); + // The state-missing throw is the SAME shape the existing tests rely on + // — pre-gate this would have hit lookupCloudBox at the very end (during + // the spawn), now it hits it during the gate. Either way the error + // mentions `state.json`, so existing observed-behavior parity holds. + expect(r.stderr).toContain('state.json'); + }); }); diff --git a/packages/relay/test/integrations.test.ts b/packages/relay/test/integrations.test.ts new file mode 100644 index 00000000..bcc5179a --- /dev/null +++ b/packages/relay/test/integrations.test.ts @@ -0,0 +1,453 @@ +import { afterEach, beforeEach, describe, expect, it } from 'vitest'; +import type { AddressInfo } from 'node:net'; +import { mkdtemp, readFile, rm, writeFile, chmod } from 'node:fs/promises'; +import { tmpdir } from 'node:os'; +import { join } from 'node:path'; +import { startRelayServer, type RelayServerHandle } from '../src/server.js'; +import { + parseIntegrationMethod, + refuseIfIntegrationDisabled, + refuseIntegrationCall, + runHostIntegration, +} from '../src/integrations.js'; +import type { IntegrationConnector } from '@agentbox/integrations'; + +interface FetchResult { + status: number; + body: unknown; + text: string; +} + +async function fetchJson( + handle: RelayServerHandle, + method: string, + path: string, + init: { token?: string; body?: unknown } = {}, +): Promise { + const port = (handle.server.address() as AddressInfo).port; + const headers: Record = { 'Content-Type': 'application/json' }; + if (init.token) headers.Authorization = `Bearer ${init.token}`; + const res = await fetch(`http://127.0.0.1:${String(port)}${path}`, { + method, + headers, + body: init.body !== undefined ? JSON.stringify(init.body) : undefined, + }); + const text = await res.text(); + let body: unknown = null; + if (text.length > 0) { + try { + body = JSON.parse(text); + } catch { + body = text; + } + } + return { status: res.status, body, text }; +} + +describe('refuseIntegrationCall', () => { + it('returns null when the op has no refuseCall hook', () => { + expect(refuseIntegrationCall({ write: true }, ['anything'])).toBeNull(); + }); + + it('lifts the descriptor refusal into a full GitRpcResult', () => { + const op = { + write: false, + refuseCall: () => ({ exitCode: 65, stderr: 'no\n' }), + }; + expect(refuseIntegrationCall(op, [])).toEqual({ + exitCode: 65, + stdout: '', + stderr: 'no\n', + }); + }); +}); + +describe('connector.env namespace guard', () => { + // A future descriptor that tries to shadow a relay-controlled env var + // (AGENTBOX_PROMPT, PATH, etc.) must be rejected so a careless contributor + // can't disable the prompt gate from a descriptor. The runtime path + // returns a typed exit-78 envelope (sysexits EX_CONFIG) instead of + // throwing, so the in-box ctl prints the actual cause rather than an + // opaque relay 'internal error' 500. + it('returns exit 78 when a descriptor sets an env key outside its SERVICE_ namespace', async () => { + const bogus: IntegrationConnector = { + service: 'notion', + hostBin: 'ntn', + detect: { versionArgs: ['--version'] }, + env: { AGENTBOX_PROMPT: 'off' }, + ops: { ping: { write: false, buildArgv: () => ['--version'] } }, + }; + const r = await runHostIntegration(bogus, bogus.ops.ping!, [], process.cwd(), 5_000); + expect(r.exitCode).toBe(78); + expect(r.stderr).toMatch(/not in 'NOTION_\*' namespace/); + }); + + it('accepts an env key in the SERVICE_ namespace', async () => { + // Synthetic stub — the real notionConnector declares no env. This only + // exercises the generic mergeConnectorEnv guard with a sample NOTION_* key. + const ok: IntegrationConnector = { + service: 'notion', + // `/usr/bin/true` exists on both macOS and Linux; `/bin/true` is + // Linux-only (macOS has no /bin/true), which ENOENT'd to exit 127 here. + hostBin: '/usr/bin/true', + detect: { versionArgs: ['--version'] }, + env: { NOTION_KEYRING: '0' }, + ops: { ping: { write: false, buildArgv: () => [] } }, + }; + const r = await runHostIntegration(ok, ok.ops.ping!, [], process.cwd(), 5_000); + expect(r.exitCode).toBe(0); + }); +}); + +describe('parseIntegrationMethod', () => { + it('parses well-formed integration methods', () => { + expect(parseIntegrationMethod('integration.notion.api')).toEqual({ + service: 'notion', + op: 'api', + }); + // Dotted op names (page.create) split on the FIRST two dots and keep + // the rest as the op. + expect(parseIntegrationMethod('integration.notion.page.create')).toEqual({ + service: 'notion', + op: 'page.create', + }); + }); + + it('rejects degenerate shapes', () => { + expect(parseIntegrationMethod('integration.notion.')).toBeNull(); + expect(parseIntegrationMethod('integration..api')).toBeNull(); + expect(parseIntegrationMethod('integration.notion.page..create')).toBeNull(); + expect(parseIntegrationMethod('integration.notion.api.')).toBeNull(); + expect(parseIntegrationMethod('integration.NOTION.api')).toBeNull(); + expect(parseIntegrationMethod('gh.pr.create')).toBeNull(); + expect(parseIntegrationMethod('')).toBeNull(); + }); +}); + +/** + * End-to-end relay /rpc dispatch through `handleIntegrationRpc`. We stub + * `ntn` via a tempdir on PATH (same pattern as `relay /rpc gh.pr.* flow` + * in server.test.ts) so the tests are deterministic on machines without + * the real CLI. The stub records its argv into a side file so we can assert + * what was invoked. + */ +describe('relay /rpc integration.* flow', () => { + let handle: RelayServerHandle; + let stubDir: string; + let stubLog: string; + let prevPath: string | undefined; + let prevPrompt: string | undefined; + + beforeEach(async () => { + stubDir = await mkdtemp(join(tmpdir(), 'ntn-stub-')); + stubLog = join(stubDir, 'invocations.log'); + // The stub records argv. `--version` satisfies the readiness probe; + // `api …` and `pages create …` etc. echo their argv and exit 0 so the + // relay's runHostIntegration produces a stable, asserted stdout. + const script = `#!/usr/bin/env bash +echo "$@" >> ${JSON.stringify(stubLog)} +case "$1" in + --version) echo "ntn stub 0.0.0"; exit 0 ;; + *) echo "stub: ntn $*"; exit 0 ;; +esac +`; + const stubPath = join(stubDir, 'ntn'); + await writeFile(stubPath, script, 'utf8'); + await chmod(stubPath, 0o755); + // Workspace-layer agentbox.yaml that enables the Notion integration — + // disabled by default, so without this every dispatch hits the relay's + // host-side gate and returns exit 65. Lives in `stubDir` because that's + // the `hostMainRepo` we register below; `loadEffectiveConfig` reads + // /agentbox.yaml's `defaults:` block as the workspace layer. + await writeFile( + join(stubDir, 'agentbox.yaml'), + 'defaults:\n integrations:\n notion:\n enabled: true\n', + 'utf8', + ); + prevPath = process.env.PATH; + process.env.PATH = `${stubDir}:${prevPath ?? ''}`; + prevPrompt = process.env.AGENTBOX_PROMPT; + delete process.env.AGENTBOX_PROMPT; + const integ = await import('../src/integrations.js'); + integ._resetIntegrationReadyCacheForTests(); + handle = await startRelayServer({ port: 0, host: '127.0.0.1' }); + }); + + afterEach(async () => { + await handle.close(); + await rm(stubDir, { recursive: true, force: true }); + if (prevPath === undefined) delete process.env.PATH; + else process.env.PATH = prevPath; + if (prevPrompt === undefined) delete process.env.AGENTBOX_PROMPT; + else process.env.AGENTBOX_PROMPT = prevPrompt; + const integ = await import('../src/integrations.js'); + integ._resetIntegrationReadyCacheForTests(); + }); + + async function registerBox(): Promise { + // hostMainRepo doesn't need to exist on disk: handleIntegrationRpc only + // uses it as a cwd for the spawn, and the stub doesn't look at cwd. + const r = await fetchJson(handle, 'POST', '/admin/register-box', { + body: { + boxId: 'b1', + token: 't1', + name: 'box-one', + worktrees: [ + { containerPath: '/workspace', hostMainRepo: stubDir, branch: 'agentbox/box-one' }, + ], + }, + }); + expect(r.status).toBe(204); + } + + it('reads (api) bypass the prompt and propagate stub stdout', async () => { + await registerBox(); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.api', + params: { path: '/workspace', args: ['v1/users/me'] }, + }, + }); + expect(r.status).toBe(200); + const body = r.body as { exitCode: number; stdout: string }; + expect(body.exitCode).toBe(0); + expect(body.stdout).toContain('stub: ntn api v1/users/me'); + expect(handle.prompts.size()).toBe(0); + }); + + it('write op enqueues an askPrompt; denial returns exit 10', async () => { + await registerBox(); + const rpcPromise = fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.page.create', + params: { path: '/workspace', args: ['--parent', 'db_id', '--title', 'T'] }, + }, + }); + let pendingId: string | null = null; + for (let i = 0; i < 50 && pendingId === null; i++) { + const list = handle.prompts.forBox('b1'); + if (list.length > 0) { + pendingId = list[0]!.id; + // The wire-format prompt context surfaces the method to the wrapper. + expect(list[0]!.context?.command).toBe('integration notion page.create'); + } else { + await new Promise((r) => setTimeout(r, 10)); + } + } + expect(pendingId).not.toBeNull(); + + const answer = await fetchJson(handle, 'POST', '/admin/prompts/answer', { + body: { id: pendingId, answer: 'n' }, + }); + expect(answer.status).toBe(204); + + const rpc = await rpcPromise; + expect(rpc.status).toBe(500); + const body = rpc.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(10); + expect(body.stderr).toMatch(/denied by user/); + }); + + it('write op approved runs the host CLI', async () => { + await registerBox(); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.page.create', + params: { path: '/workspace', args: ['--parent', 'db_id'] }, + }, + }); + expect(r.status).toBe(200); + const body = r.body as { exitCode: number; stdout: string }; + expect(body.exitCode).toBe(0); + expect(body.stdout).toContain('stub: ntn pages create --parent db_id'); + }); + + it('op not on allowlist refuses with exit 65', async () => { + await registerBox(); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.bogus', + params: { path: '/workspace', args: [] }, + }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(65); + expect(body.stderr).toMatch(/not on allowlist/); + // The stub must NOT have been invoked for a disallowed op. + let invoked = false; + try { + const log = await readFile(stubLog, 'utf8'); + // Only `--version` from the readiness probe may appear. + invoked = log.split('\n').some((l) => l.trim().length > 0 && l.trim() !== '--version'); + } catch { + invoked = false; + } + expect(invoked).toBe(false); + }); + + it('unknown service surfaces exit 64 (allowlist-default; same envelope as cloud)', async () => { + await registerBox(); + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.trello.api', + params: { path: '/workspace', args: ['v1/issues'] }, + }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(64); + expect(body.stderr).toMatch(/unknown integration service/); + }); + + it('malformed method shape surfaces exit 64', async () => { + await registerBox(); + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { method: 'integration.notion.' }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(64); + expect(body.stderr).toMatch(/unknown integration method shape/); + }); + + it('refuseCall blocks an `api -X DELETE` before the host CLI is touched', async () => { + await registerBox(); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.api', + params: { path: '/workspace', args: ['-X', 'DELETE', 'v1/blocks/abc'] }, + }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(65); + expect(body.stderr).toMatch(/notion api/); + // The stub must NOT have been spawned for the rejected api call. + const log = await readFile(stubLog, 'utf8').catch(() => ''); + expect(log.split('\n').some((l) => l.trim() === '-X DELETE v1/blocks/abc')).toBe(false); + }); + + it('no worktree registered surfaces exit 64', async () => { + // Register without worktrees so resolveWorktree returns null. + const r0 = await fetchJson(handle, 'POST', '/admin/register-box', { + body: { boxId: 'b1', token: 't1', name: 'box-one' }, + }); + expect(r0.status).toBe(204); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.api', + params: { path: '/workspace', args: ['v1/users/me'] }, + }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(64); + expect(body.stderr).toMatch(/no worktree registered/); + }); + + it('reports ntn-not-installed when the binary is missing from PATH', async () => { + await registerBox(); + process.env.PATH = '/nonexistent-bin-dir'; + const integ = await import('../src/integrations.js'); + integ._resetIntegrationReadyCacheForTests(); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.api', + params: { path: '/workspace', args: ['v1/users/me'] }, + }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(127); + expect(body.stderr).toMatch(/ntn not installed/); + }); + + it('disabled integration short-circuits with exit 65 and no host spawn', async () => { + await registerBox(); + // Replace the workspace agentbox.yaml from beforeEach with one that + // explicitly disables the integration. The relay re-reads the config + // per call, so this takes effect immediately. + await writeFile( + join(stubDir, 'agentbox.yaml'), + 'defaults:\n integrations:\n notion:\n enabled: false\n', + 'utf8', + ); + process.env.AGENTBOX_PROMPT = 'off'; + const r = await fetchJson(handle, 'POST', '/rpc', { + token: 't1', + body: { + method: 'integration.notion.api', + params: { path: '/workspace', args: ['v1/users/me'] }, + }, + }); + expect(r.status).toBe(500); + const body = r.body as { exitCode: number; stderr: string }; + expect(body.exitCode).toBe(65); + expect(body.stderr).toMatch(/notion integration is disabled/); + expect(body.stderr).toMatch(/integrations\.notion\.enabled true/); + // The host stub was never spawned for the disabled call. (The earlier + // readiness probe DOES run via `assertIntegrationReady` once per + // cache window — but the gate fires before that for *this* call; + // verify by checking the api endpoint argv never lands in the log.) + const log = await readFile(stubLog, 'utf8').catch(() => ''); + expect(log.split('\n').some((l) => l.trim() === 'api v1/users/me')).toBe(false); + }); +}); + +describe('refuseIfIntegrationDisabled', () => { + // Pure unit test of the gate logic — uses the injected loader so it + // doesn't depend on the filesystem. The relay /rpc tests above cover + // the wiring; this one nails down the branches. + const makeLoader = ( + integrations?: Record, + ): (() => Promise<{ + effective: { integrations?: Record }; + }>) => () => Promise.resolve({ effective: { integrations } }); + + it('returns null when the service is enabled', async () => { + const out = await refuseIfIntegrationDisabled( + 'notion', + '/tmp', + makeLoader({ notion: { enabled: true } }), + ); + expect(out).toBeNull(); + }); + + it('returns the disabled refusal when the service slot is missing', async () => { + const out = await refuseIfIntegrationDisabled('notion', '/tmp', makeLoader(undefined)); + expect(out?.exitCode).toBe(65); + expect(out?.stderr).toMatch(/notion integration is disabled/); + }); + + it('returns the disabled refusal when the flag is false', async () => { + const out = await refuseIfIntegrationDisabled( + 'notion', + '/tmp', + makeLoader({ notion: { enabled: false } }), + ); + expect(out?.exitCode).toBe(65); + }); + + it('fails closed when the loader throws (malformed config → disabled)', async () => { + const out = await refuseIfIntegrationDisabled('notion', '/tmp', () => { + throw new Error('yaml parse error'); + }); + expect(out?.exitCode).toBe(65); + }); +}); diff --git a/packages/sandbox-docker/Dockerfile.box b/packages/sandbox-docker/Dockerfile.box index 6331b52f..7fca16da 100644 --- a/packages/sandbox-docker/Dockerfile.box +++ b/packages/sandbox-docker/Dockerfile.box @@ -154,6 +154,25 @@ COPY packages/sandbox-docker/scripts/gh-shim /usr/local/bin/gh COPY packages/sandbox-docker/scripts/git-shim /usr/local/bin/git RUN chmod +x /usr/local/bin/gh /usr/local/bin/git +# `ntn` (Notion CLI) shim — same shape as gh-shim, routes a strict subset +# of `ntn` subcommands through the host relay (the host's `ntn` runs the +# call; the box never sees the Notion token). Symlinked as `notion` per +# docs/integrations_backlog.md's per-service surface naming. Disabled by +# default; flip `integrations.notion.enabled` to enable. See +# packages/sandbox-docker/scripts/ntn-shim and docs/notion_backlog.md. +COPY packages/sandbox-docker/scripts/ntn-shim /usr/local/bin/ntn +RUN chmod +x /usr/local/bin/ntn && ln -s /usr/local/bin/ntn /usr/local/bin/notion + +# `linear` (Linear CLI — @schpet/linear-cli) shim — same shape as ntn-shim, +# routes a strict subset of `linear` subcommands through the host relay +# (the host's `linear` runs the call; the box never sees the Linear API +# token). `linear auth token` (which would print the raw token to stdout) +# is explicitly rejected by the shim. Disabled by default; flip +# `integrations.linear.enabled` to enable. See +# packages/sandbox-docker/scripts/linear-shim and docs/linear_backlog.md. +COPY packages/sandbox-docker/scripts/linear-shim /usr/local/bin/linear +RUN chmod +x /usr/local/bin/linear + # Setup guide for the first-run wizard. This baked copy is the single source # of the /agentbox-setup skill: seedSetupSkillIntoVolume() # (packages/sandbox-docker/src/claude.ts) copies it into the box's diff --git a/packages/sandbox-docker/scripts/linear-shim b/packages/sandbox-docker/scripts/linear-shim new file mode 100755 index 00000000..110f9987 --- /dev/null +++ b/packages/sandbox-docker/scripts/linear-shim @@ -0,0 +1,181 @@ +#!/usr/bin/env bash +# agentbox `linear` shim — translates a strict subset of `linear` +# (@schpet/linear-cli, v2) subcommands into `agentbox-ctl integration +# linear ` so the host's authenticated `linear` runs the operation and +# only the result crosses back into the box. The in-box agent never sees a +# Linear API token. +# +# Installed at /usr/local/bin/linear (real `linear` is not in the box). +# +# This shim ships only what documented agent flows need; anything outside +# the subset below is rejected with a clear error. Add ops deliberately — +# the relay is gated by `integrations.linear.enabled` and an explicit op +# allowlist in @agentbox/integrations. +# +# Three classes of upstream subcommand are EXPLICITLY rejected even though +# they exist on the host CLI, because proxying them would defeat the +# security model: +# - `auth token` PRINTS the raw API token to stdout — proxying it would +# hand the box the host's Linear credential. The only auth-family op +# we proxy is `auth whoami` (identity only), via `linear whoami`. +# - `auth login/logout/migrate/default` would mutate host auth state. +# - `issue delete` / `team delete` / `team create` are destructive and +# off-list (widen deliberately, as gated writes, only if needed). + +set -euo pipefail + +# Path is a constant in production; the env override exists purely to let +# unit tests substitute a stub `agentbox-ctl` on PATH without rewriting the +# shim. Mirrors gh-shim / git-shim / ntn-shim. +CTL="${AGENTBOX_CTL_PATH:-/usr/local/bin/agentbox-ctl}" + +die() { + printf 'agentbox linear shim: %s\n' "$*" >&2 + exit 2 +} + +handle_auth() { + local sub="${1-}"; shift || true + case "$sub" in + whoami) + exec "$CTL" integration linear whoami -- "$@" + ;; + token) + die "'auth token' leaks the raw API key — refused. Use 'linear whoami' for identity." + ;; + login|logout|migrate|default) + die "'auth $sub' is not proxied (the host owns auth; run it on the host)." + ;; + '') + die "missing subcommand for 'auth'. Supported: whoami" + ;; + *) + die "unsupported 'auth $sub' (allowed: whoami)" + ;; + esac +} + +handle_issue_comment() { + local sub="${1-}"; shift || true + case "$sub" in + add) + exec "$CTL" integration linear issue.comment -- "$@" + ;; + '') + die "missing subcommand for 'issue comment'. Supported: add" + ;; + *) + die "unsupported 'issue comment $sub' (allowed: add)" + ;; + esac +} + +handle_issue() { + local sub="${1-}"; shift || true + case "$sub" in + list) + exec "$CTL" integration linear issue.list -- "$@" + ;; + mine) + exec "$CTL" integration linear issue.mine -- "$@" + ;; + view) + exec "$CTL" integration linear issue.view -- "$@" + ;; + query) + exec "$CTL" integration linear issue.query -- "$@" + ;; + create) + exec "$CTL" integration linear issue.create -- "$@" + ;; + update) + exec "$CTL" integration linear issue.update -- "$@" + ;; + comment) + handle_issue_comment "$@" + ;; + delete) + die "'issue delete' is not proxied (destructive; off-list by default)." + ;; + '') + die "missing subcommand for 'issue'. Supported: list, mine, view, query, create, update, comment add" + ;; + *) + die "unsupported 'issue $sub' (allowed: list, mine, view, query, create, update, comment add)" + ;; + esac +} + +handle_team() { + local sub="${1-}"; shift || true + case "$sub" in + list) + exec "$CTL" integration linear team.list -- "$@" + ;; + create|delete) + die "'team $sub' is not proxied (destructive; off-list by default)." + ;; + '') + die "missing subcommand for 'team'. Supported: list" + ;; + *) + die "unsupported 'team $sub' (allowed: list)" + ;; + esac +} + +# Top-level dispatch. `linear`'s real subcommands are +# `auth issue team project cycle milestone initiative label document api schema`; +# we expose only the read-safe ones plus a few gated writes (no destructive +# ops, no auth token). +if [ $# -eq 0 ]; then + die "no subcommand. Supported: whoami, auth whoami, issue {list,mine,view,query,create,update,comment add}, team list, api , --version" +fi + +case "$1" in + --version|-v) + # Tools that sniff "linear --version" succeed with our shim line. The + # real version lives host-side and is reported by the relay's + # readiness probe (`assertIntegrationReady`). + printf 'linear version 0.0.0 (agentbox-shim)\n' + ;; + --help|-h) + printf 'agentbox linear shim — strict subset.\n' >&2 + printf 'Supported: whoami, auth whoami, issue {list,mine,view,query,create,update,comment add}, team list, api , --version\n' >&2 + printf 'Anything else is rejected. Run host `linear --help` for full upstream docs.\n' >&2 + ;; + whoami) + shift + exec "$CTL" integration linear whoami -- "$@" + ;; + auth) + shift + handle_auth "$@" + ;; + issue) + shift + handle_issue "$@" + ;; + team) + shift + handle_team "$@" + ;; + api) + shift + # `linear api` accepts pre-positional flags (`--variable`, + # `--variables-json`, `--paginate`, `--silent`) before the GraphQL + # query, so we don't require the FIRST arg to be a non-flag — only + # that some arg is present. The relay's refuseGraphqlNonQuery + # enforces query-only by rejecting any positional whose first + # keyword is `mutation`/`subscription` (and any `--variable + # key=@` host-file load), so we don't duplicate that check + # here. Writes go through the dedicated issue.* ops. + if [ $# -eq 0 ]; then + die "'api' requires a positional (e.g. '{ teams { id } }')" + fi + exec "$CTL" integration linear api -- "$@" + ;; + *) + die "'$1' is not proxied (supported: whoami, issue {list,mine,view,query,create,update,comment add}, team list, api , --version)" + ;; +esac diff --git a/packages/sandbox-docker/scripts/ntn-shim b/packages/sandbox-docker/scripts/ntn-shim new file mode 100755 index 00000000..97e26c24 --- /dev/null +++ b/packages/sandbox-docker/scripts/ntn-shim @@ -0,0 +1,93 @@ +#!/usr/bin/env bash +# agentbox `ntn` shim — translates a strict subset of `ntn` (the official +# Notion CLI) subcommands into `agentbox-ctl integration notion ` so the +# host's authenticated `ntn` runs the operation and only the result crosses +# back into the box. The in-box agent never sees a Notion token. +# +# Installed at /usr/local/bin/ntn (real `ntn` is not in the box). The same +# shim is symlinked as /usr/local/bin/notion — the per-service surface name +# from docs/integrations_backlog.md — both invocations behave identically. +# +# This shim ships only what documented agent flows need; anything outside +# the subset below is rejected with a clear error. Add ops deliberately — +# the relay is gated by `integrations.notion.enabled` and an explicit op +# allowlist in @agentbox/integrations. + +set -euo pipefail + +# Path is a constant in production; the env override exists purely to let +# unit tests substitute a stub `agentbox-ctl` on PATH without rewriting the +# shim. Mirrors gh-shim / git-shim. +CTL="${AGENTBOX_CTL_PATH:-/usr/local/bin/agentbox-ctl}" + +die() { + printf 'agentbox notion shim: %s\n' "$*" >&2 + exit 2 +} + +handle_pages() { + local op="${1-}"; shift || true + case "$op" in + create) + exec "$CTL" integration notion page.create -- "$@" + ;; + update) + exec "$CTL" integration notion page.update -- "$@" + ;; + '') + die "missing subcommand for 'pages'. Supported: create, update" + ;; + *) + die "unsupported 'pages $op' (allowed: create, update)" + ;; + esac +} + +# Top-level dispatch. `ntn`'s real subcommands are +# `api datasources files pages login logout whoami workers`; we expose only +# the read-safe ones plus `pages {create,update}`. +if [ $# -eq 0 ]; then + die "no subcommand. Supported: whoami, api , pages {create,update}, --version" +fi + +case "$1" in + --version|-v) + # Tools that sniff "ntn version" succeed with our shim line. The real + # version lives host-side and is reported by the relay's readiness probe + # (`assertIntegrationReady`). + printf 'ntn version 0.0.0 (agentbox-shim)\n' + ;; + --help|-h) + printf 'agentbox notion shim — strict subset.\n' >&2 + printf 'Supported: whoami, api , pages {create, update}, --version\n' >&2 + printf 'Anything else is rejected. Run host `ntn --help` for full upstream docs.\n' >&2 + ;; + whoami) + shift + exec "$CTL" integration notion whoami -- "$@" + ;; + api) + shift + if [ $# -eq 0 ] || [ "${1:0:1}" = "-" ]; then + die "'api' requires a positional (e.g. v1/users/me)" + fi + # The relay's refuseApiNonGet enforces GET-only by parsing + # -X/--method/-f/-F, so we don't duplicate that check here. Writes go + # through the dedicated page.* ops. + exec "$CTL" integration notion api -- "$@" + ;; + pages) + shift + handle_pages "$@" + ;; + comment|comments) + # The T1 connector intentionally has no comment op — `ntn` exposes no + # top-level `comment` subcommand and Notion's REST POST /v1/comments + # takes a structured JSON body that doesn't trivially map from CLI + # flags. Tracked as a focused follow-up in docs/notion_backlog.md. + die "comment ops not supported yet (deferred from T2; see docs/notion_backlog.md)" + ;; + *) + die "'$1' is not proxied (supported: whoami, api , pages {create,update}, --version)" + ;; +esac diff --git a/packages/sandbox-e2b/scripts/build-template.sh b/packages/sandbox-e2b/scripts/build-template.sh index 53399a8b..19a8afd9 100755 --- a/packages/sandbox-e2b/scripts/build-template.sh +++ b/packages/sandbox-e2b/scripts/build-template.sh @@ -22,6 +22,8 @@ # /tmp/agentbox-open -- in-box xdg-open shim # /tmp/agentbox-gh-shim -- in-box `gh` shim (routes to host gh) # /tmp/agentbox-git-shim -- in-box `git` shim (routes via relay) +# /tmp/agentbox-ntn-shim -- in-box `ntn`/`notion` shim (routes to host ntn) +# /tmp/agentbox-linear-shim -- in-box `linear` shim (routes to host linear; rejects `auth token`) # /tmp/agentbox-custom-CLAUDE.md -- /etc/claude-code/CLAUDE.md content # /tmp/agentbox-managed-settings.json -- /etc/claude-code/managed-settings.json # /tmp/agentbox-codex-hooks.json -- /usr/local/share/agentbox/codex-hooks.json @@ -278,15 +280,19 @@ done_ "apt cleanup" # login-shell shim above forces /usr/local/bin ahead of /usr/bin so these win. # During the bake there is no relay, so they must not shadow the real binaries # until provisioning is done. Installed from /tmp just before the trim step. -step "relay shims (gh + git)" -install -m 0755 /tmp/agentbox-gh-shim /usr/local/bin/gh -install -m 0755 /tmp/agentbox-git-shim /usr/local/bin/git -done_ "relay shims (gh + git)" +step "relay shims (gh + git + ntn + linear)" +install -m 0755 /tmp/agentbox-gh-shim /usr/local/bin/gh +install -m 0755 /tmp/agentbox-git-shim /usr/local/bin/git +install -m 0755 /tmp/agentbox-ntn-shim /usr/local/bin/ntn +ln -sf /usr/local/bin/ntn /usr/local/bin/notion +install -m 0755 /tmp/agentbox-linear-shim /usr/local/bin/linear +done_ "relay shims (gh + git + ntn + linear)" step "trim /tmp/agentbox-*" rm -f /tmp/agentbox-ctl /tmp/agentbox-vnc-start \ /tmp/agentbox-checkpoint-cleanup /tmp/agentbox-open \ - /tmp/agentbox-gh-shim /tmp/agentbox-git-shim \ + /tmp/agentbox-gh-shim /tmp/agentbox-git-shim /tmp/agentbox-ntn-shim \ + /tmp/agentbox-linear-shim \ /tmp/agentbox-custom-CLAUDE.md /tmp/agentbox-managed-settings.json \ /tmp/agentbox-codex-hooks.json /tmp/agentbox-setup-skill.md mv /tmp/agentbox-build-template.sh /var/log/agentbox/build-template.sh 2>/dev/null || true diff --git a/packages/sandbox-e2b/src/runtime-assets.ts b/packages/sandbox-e2b/src/runtime-assets.ts index 6d3c4189..c7acb390 100644 --- a/packages/sandbox-e2b/src/runtime-assets.ts +++ b/packages/sandbox-e2b/src/runtime-assets.ts @@ -49,6 +49,8 @@ export const RUNTIME_ASSETS: readonly RuntimeAsset[] = [ { name: 'agentbox-open', remotePath: '/tmp/agentbox-open', remoteMode: 0o755 }, { name: 'gh-shim', remotePath: '/tmp/agentbox-gh-shim', remoteMode: 0o755 }, { name: 'git-shim', remotePath: '/tmp/agentbox-git-shim', remoteMode: 0o755 }, + { name: 'ntn-shim', remotePath: '/tmp/agentbox-ntn-shim', remoteMode: 0o755 }, + { name: 'linear-shim', remotePath: '/tmp/agentbox-linear-shim', remoteMode: 0o755 }, { name: 'custom-system-CLAUDE.md', remotePath: '/tmp/agentbox-custom-CLAUDE.md', remoteMode: 0o644 }, { name: 'claude-managed-settings.json', remotePath: '/tmp/agentbox-managed-settings.json', remoteMode: 0o644 }, { name: 'agentbox-codex-hooks.json', remotePath: '/tmp/agentbox-codex-hooks.json', remoteMode: 0o644 }, @@ -74,6 +76,8 @@ export function candidatesFor( 'agentbox-open': ['packages/sandbox-docker/scripts/agentbox-open'], 'gh-shim': ['packages/sandbox-docker/scripts/gh-shim'], 'git-shim': ['packages/sandbox-docker/scripts/git-shim'], + 'ntn-shim': ['packages/sandbox-docker/scripts/ntn-shim'], + 'linear-shim': ['packages/sandbox-docker/scripts/linear-shim'], 'custom-system-CLAUDE.md': ['packages/sandbox-e2b/scripts/custom-system-CLAUDE.md'], 'claude-managed-settings.json': ['packages/sandbox-docker/scripts/claude-managed-settings.json'], 'agentbox-codex-hooks.json': ['packages/sandbox-docker/scripts/agentbox-codex-hooks.json'], @@ -88,6 +92,8 @@ export function candidatesFor( 'agentbox-open': ['e2b/agentbox-open', 'docker/packages/sandbox-docker/scripts/agentbox-open'], 'gh-shim': ['e2b/gh-shim', 'docker/packages/sandbox-docker/scripts/gh-shim'], 'git-shim': ['e2b/git-shim', 'docker/packages/sandbox-docker/scripts/git-shim'], + 'ntn-shim': ['e2b/ntn-shim', 'docker/packages/sandbox-docker/scripts/ntn-shim'], + 'linear-shim': ['e2b/linear-shim', 'docker/packages/sandbox-docker/scripts/linear-shim'], 'custom-system-CLAUDE.md': ['e2b/custom-system-CLAUDE.md'], 'claude-managed-settings.json': ['e2b/claude-managed-settings.json', 'docker/packages/sandbox-docker/scripts/claude-managed-settings.json'], 'agentbox-codex-hooks.json': ['e2b/agentbox-codex-hooks.json', 'docker/packages/sandbox-docker/scripts/agentbox-codex-hooks.json'], diff --git a/packages/sandbox-hetzner/scripts/install-box.sh b/packages/sandbox-hetzner/scripts/install-box.sh index 1ea26b5b..e93e6b93 100644 --- a/packages/sandbox-hetzner/scripts/install-box.sh +++ b/packages/sandbox-hetzner/scripts/install-box.sh @@ -15,6 +15,8 @@ # /tmp/agentbox-open -- in-box xdg-open shim # /tmp/agentbox-gh-shim -- in-box `gh` shim (routes to host gh via relay) # /tmp/agentbox-git-shim -- in-box `git` shim (routes push/pull/fetch/clone via relay) +# /tmp/agentbox-ntn-shim -- in-box `ntn`/`notion` shim (routes Notion CLI to host ntn via relay) +# /tmp/agentbox-linear-shim -- in-box `linear` shim (routes @schpet/linear-cli to host linear via relay; rejects `auth token`) # /tmp/agentbox-custom-CLAUDE.md -- /etc/claude-code/CLAUDE.md content # /tmp/agentbox-managed-settings.json -- /etc/claude-code/managed-settings.json # /tmp/agentbox-codex-hooks.json -- /usr/local/share/agentbox/codex-hooks.json @@ -161,19 +163,26 @@ done_ "agentbox-ctl install" # *before* Chromium sidesteps the issue and keeps the snapshot complete. # Tracked as Phase-7 follow-up in docs/hertzner_backlog.md. -step "baked helper scripts (vnc / dockerd / cleanup / xdg-open / gh + git shims)" +step "baked helper scripts (vnc / dockerd / cleanup / xdg-open / gh + git + ntn + linear shims)" install -m 0755 /tmp/agentbox-vnc-start /usr/local/bin/agentbox-vnc-start install -m 0755 /tmp/agentbox-dockerd-start /usr/local/bin/agentbox-dockerd-start install -m 0755 /tmp/agentbox-checkpoint-cleanup /usr/local/bin/agentbox-checkpoint-cleanup install -m 0755 /tmp/agentbox-open /usr/local/bin/agentbox-open ln -sf /usr/local/bin/agentbox-open /usr/local/bin/xdg-open -# gh + git shims — same files baked by Dockerfile.box for the docker provider. -# The shim wins on PATH (default /usr/local/bin precedes /usr/bin) so any agent -# call to `gh ...` / `git push|pull|fetch|clone` routes through the relay; the -# git shim execs /usr/bin/git for everything else, no overhead. +# gh + git + ntn + linear shims — same files baked by Dockerfile.box for the +# docker provider. The shim wins on PATH (default /usr/local/bin precedes +# /usr/bin) so any agent call to `gh ...` / `git push|pull|fetch|clone` / +# `ntn ...` / `notion ...` / `linear ...` routes through the relay; the git +# shim execs /usr/bin/git for everything else, no overhead. `notion` is a +# symlink to `ntn` — same shim, per-service surface naming from +# docs/integrations_backlog.md. The linear shim explicitly rejects +# `linear auth token` (which would print the raw API key). install -m 0755 /tmp/agentbox-gh-shim /usr/local/bin/gh install -m 0755 /tmp/agentbox-git-shim /usr/local/bin/git -done_ "baked helper scripts (vnc / dockerd / cleanup / xdg-open / gh + git shims)" +install -m 0755 /tmp/agentbox-ntn-shim /usr/local/bin/ntn +ln -sf /usr/local/bin/ntn /usr/local/bin/notion +install -m 0755 /tmp/agentbox-linear-shim /usr/local/bin/linear +done_ "baked helper scripts (vnc / dockerd / cleanup / xdg-open / gh + git + ntn + linear shims)" step "baked config files (claude / codex / setup guide / tmux.conf)" install -m 0644 /tmp/agentbox-custom-CLAUDE.md /etc/claude-code/CLAUDE.md @@ -364,7 +373,8 @@ step "trim /tmp/agentbox-*" # re-read which lines actually executed against which source. rm -f /tmp/agentbox-ctl /tmp/agentbox-vnc-start /tmp/agentbox-dockerd-start \ /tmp/agentbox-checkpoint-cleanup /tmp/agentbox-open \ - /tmp/agentbox-gh-shim /tmp/agentbox-git-shim \ + /tmp/agentbox-gh-shim /tmp/agentbox-git-shim /tmp/agentbox-ntn-shim \ + /tmp/agentbox-linear-shim \ /tmp/agentbox-custom-CLAUDE.md /tmp/agentbox-managed-settings.json \ /tmp/agentbox-codex-hooks.json /tmp/agentbox-setup-skill.md # Move install-box.sh into the persistent location for diagnostics. diff --git a/packages/sandbox-hetzner/src/runtime-assets.ts b/packages/sandbox-hetzner/src/runtime-assets.ts index f514ae75..ff6266ab 100644 --- a/packages/sandbox-hetzner/src/runtime-assets.ts +++ b/packages/sandbox-hetzner/src/runtime-assets.ts @@ -68,6 +68,8 @@ export const RUNTIME_ASSETS: readonly RuntimeAsset[] = [ { name: 'agentbox-open', remoteBasename: 'agentbox-open', remoteMode: 0o755 }, { name: 'gh-shim', remoteBasename: 'agentbox-gh-shim', remoteMode: 0o755 }, { name: 'git-shim', remoteBasename: 'agentbox-git-shim', remoteMode: 0o755 }, + { name: 'ntn-shim', remoteBasename: 'agentbox-ntn-shim', remoteMode: 0o755 }, + { name: 'linear-shim', remoteBasename: 'agentbox-linear-shim', remoteMode: 0o755 }, { name: 'custom-system-CLAUDE.md', remoteBasename: 'agentbox-custom-CLAUDE.md', remoteMode: 0o644 }, { name: 'claude-managed-settings.json', remoteBasename: 'agentbox-managed-settings.json', remoteMode: 0o644 }, { name: 'agentbox-codex-hooks.json', remoteBasename: 'agentbox-codex-hooks.json', remoteMode: 0o644 }, @@ -105,6 +107,8 @@ export function candidatesFor( 'agentbox-open': ['packages/sandbox-docker/scripts/agentbox-open'], 'gh-shim': ['packages/sandbox-docker/scripts/gh-shim'], 'git-shim': ['packages/sandbox-docker/scripts/git-shim'], + 'ntn-shim': ['packages/sandbox-docker/scripts/ntn-shim'], + 'linear-shim': ['packages/sandbox-docker/scripts/linear-shim'], 'custom-system-CLAUDE.md': ['packages/sandbox-hetzner/scripts/custom-system-CLAUDE.md'], 'claude-managed-settings.json': ['packages/sandbox-docker/scripts/claude-managed-settings.json'], 'agentbox-codex-hooks.json': ['packages/sandbox-docker/scripts/agentbox-codex-hooks.json'], @@ -121,6 +125,8 @@ export function candidatesFor( 'agentbox-open': ['hetzner/agentbox-open', 'docker/packages/sandbox-docker/scripts/agentbox-open'], 'gh-shim': ['hetzner/gh-shim', 'docker/packages/sandbox-docker/scripts/gh-shim'], 'git-shim': ['hetzner/git-shim', 'docker/packages/sandbox-docker/scripts/git-shim'], + 'ntn-shim': ['hetzner/ntn-shim', 'docker/packages/sandbox-docker/scripts/ntn-shim'], + 'linear-shim': ['hetzner/linear-shim', 'docker/packages/sandbox-docker/scripts/linear-shim'], 'custom-system-CLAUDE.md': ['hetzner/custom-system-CLAUDE.md'], 'claude-managed-settings.json': ['hetzner/claude-managed-settings.json', 'docker/packages/sandbox-docker/scripts/claude-managed-settings.json'], 'agentbox-codex-hooks.json': ['hetzner/agentbox-codex-hooks.json', 'docker/packages/sandbox-docker/scripts/agentbox-codex-hooks.json'], diff --git a/packages/sandbox-hetzner/test/runtime-assets.test.ts b/packages/sandbox-hetzner/test/runtime-assets.test.ts index 9691f17a..abe0e92f 100644 --- a/packages/sandbox-hetzner/test/runtime-assets.test.ts +++ b/packages/sandbox-hetzner/test/runtime-assets.test.ts @@ -21,6 +21,8 @@ function makeFakeRepo(): string { 'packages/sandbox-docker/scripts/agentbox-open', 'packages/sandbox-docker/scripts/gh-shim', 'packages/sandbox-docker/scripts/git-shim', + 'packages/sandbox-docker/scripts/ntn-shim', + 'packages/sandbox-docker/scripts/linear-shim', 'packages/sandbox-hetzner/scripts/custom-system-CLAUDE.md', 'packages/sandbox-docker/scripts/claude-managed-settings.json', 'packages/sandbox-docker/scripts/agentbox-codex-hooks.json', diff --git a/packages/sandbox-vercel/scripts/provision.sh b/packages/sandbox-vercel/scripts/provision.sh index edd5705e..3b65f088 100644 --- a/packages/sandbox-vercel/scripts/provision.sh +++ b/packages/sandbox-vercel/scripts/provision.sh @@ -22,6 +22,8 @@ # /tmp/agentbox-open -- in-box xdg-open shim # /tmp/agentbox-gh-shim -- in-box `gh` shim (routes to host gh) # /tmp/agentbox-git-shim -- in-box `git` shim (routes via relay) +# /tmp/agentbox-ntn-shim -- in-box `ntn`/`notion` shim (routes to host ntn) +# /tmp/agentbox-linear-shim -- in-box `linear` shim (routes to host linear; rejects `auth token`) # /tmp/agentbox-custom-CLAUDE.md -- /etc/claude-code/CLAUDE.md content # /tmp/agentbox-managed-settings.json -- /etc/claude-code/managed-settings.json # /tmp/agentbox-codex-hooks.json -- /usr/local/share/agentbox/codex-hooks.json @@ -317,15 +319,19 @@ done_ "dnf cleanup" # the bake there is no relay, so they must not shadow the real binaries until # provisioning is done. Installed from /tmp just before the trim step removes the # sources. -step "relay shims (gh + git)" -install -m 0755 /tmp/agentbox-gh-shim /usr/local/bin/gh -install -m 0755 /tmp/agentbox-git-shim /usr/local/bin/git -done_ "relay shims (gh + git)" +step "relay shims (gh + git + ntn + linear)" +install -m 0755 /tmp/agentbox-gh-shim /usr/local/bin/gh +install -m 0755 /tmp/agentbox-git-shim /usr/local/bin/git +install -m 0755 /tmp/agentbox-ntn-shim /usr/local/bin/ntn +ln -sf /usr/local/bin/ntn /usr/local/bin/notion +install -m 0755 /tmp/agentbox-linear-shim /usr/local/bin/linear +done_ "relay shims (gh + git + ntn + linear)" step "trim /tmp/agentbox-*" rm -f /tmp/agentbox-ctl /tmp/agentbox-vnc-start \ /tmp/agentbox-checkpoint-cleanup /tmp/agentbox-open \ - /tmp/agentbox-gh-shim /tmp/agentbox-git-shim \ + /tmp/agentbox-gh-shim /tmp/agentbox-git-shim /tmp/agentbox-ntn-shim \ + /tmp/agentbox-linear-shim \ /tmp/agentbox-custom-CLAUDE.md /tmp/agentbox-managed-settings.json \ /tmp/agentbox-codex-hooks.json /tmp/agentbox-setup-skill.md mv /tmp/agentbox-provision.sh /var/log/agentbox/provision.sh 2>/dev/null || true diff --git a/packages/sandbox-vercel/src/runtime-assets.ts b/packages/sandbox-vercel/src/runtime-assets.ts index 5aeccb87..033e889d 100644 --- a/packages/sandbox-vercel/src/runtime-assets.ts +++ b/packages/sandbox-vercel/src/runtime-assets.ts @@ -51,6 +51,8 @@ export const RUNTIME_ASSETS: readonly RuntimeAsset[] = [ { name: 'agentbox-open', remotePath: '/tmp/agentbox-open', remoteMode: 0o755 }, { name: 'gh-shim', remotePath: '/tmp/agentbox-gh-shim', remoteMode: 0o755 }, { name: 'git-shim', remotePath: '/tmp/agentbox-git-shim', remoteMode: 0o755 }, + { name: 'ntn-shim', remotePath: '/tmp/agentbox-ntn-shim', remoteMode: 0o755 }, + { name: 'linear-shim', remotePath: '/tmp/agentbox-linear-shim', remoteMode: 0o755 }, { name: 'custom-system-CLAUDE.md', remotePath: '/tmp/agentbox-custom-CLAUDE.md', remoteMode: 0o644 }, { name: 'claude-managed-settings.json', remotePath: '/tmp/agentbox-managed-settings.json', remoteMode: 0o644 }, { name: 'agentbox-codex-hooks.json', remotePath: '/tmp/agentbox-codex-hooks.json', remoteMode: 0o644 }, @@ -76,6 +78,8 @@ export function candidatesFor( 'agentbox-open': ['packages/sandbox-docker/scripts/agentbox-open'], 'gh-shim': ['packages/sandbox-docker/scripts/gh-shim'], 'git-shim': ['packages/sandbox-docker/scripts/git-shim'], + 'ntn-shim': ['packages/sandbox-docker/scripts/ntn-shim'], + 'linear-shim': ['packages/sandbox-docker/scripts/linear-shim'], 'custom-system-CLAUDE.md': ['packages/sandbox-vercel/scripts/custom-system-CLAUDE.md'], 'claude-managed-settings.json': ['packages/sandbox-docker/scripts/claude-managed-settings.json'], 'agentbox-codex-hooks.json': ['packages/sandbox-docker/scripts/agentbox-codex-hooks.json'], @@ -90,6 +94,8 @@ export function candidatesFor( 'agentbox-open': ['vercel/agentbox-open', 'docker/packages/sandbox-docker/scripts/agentbox-open'], 'gh-shim': ['vercel/gh-shim', 'docker/packages/sandbox-docker/scripts/gh-shim'], 'git-shim': ['vercel/git-shim', 'docker/packages/sandbox-docker/scripts/git-shim'], + 'ntn-shim': ['vercel/ntn-shim', 'docker/packages/sandbox-docker/scripts/ntn-shim'], + 'linear-shim': ['vercel/linear-shim', 'docker/packages/sandbox-docker/scripts/linear-shim'], 'custom-system-CLAUDE.md': ['vercel/custom-system-CLAUDE.md'], 'claude-managed-settings.json': ['vercel/claude-managed-settings.json', 'docker/packages/sandbox-docker/scripts/claude-managed-settings.json'], 'agentbox-codex-hooks.json': ['vercel/agentbox-codex-hooks.json', 'docker/packages/sandbox-docker/scripts/agentbox-codex-hooks.json'], diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index b5b1a365..eb5c1943 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -82,6 +82,9 @@ importers: '@agentbox/ctl': specifier: workspace:* version: link:../../packages/ctl + '@agentbox/integrations': + specifier: workspace:* + version: link:../../packages/integrations '@agentbox/relay': specifier: workspace:* version: link:../../packages/relay @@ -207,6 +210,9 @@ importers: '@agentbox/core': specifier: workspace:* version: link:../core + '@agentbox/integrations': + specifier: workspace:* + version: link:../integrations '@agentbox/relay': specifier: workspace:* version: link:../relay @@ -233,6 +239,21 @@ importers: specifier: ^2.1.8 version: 2.1.9(@types/node@22.19.19)(lightningcss@1.32.0) + packages/integrations: + devDependencies: + '@types/node': + specifier: ^22.10.1 + version: 22.19.19 + tsup: + specifier: ^8.3.5 + version: 8.5.1(jiti@2.7.0)(postcss@8.5.14)(tsx@4.22.3)(typescript@5.9.3)(yaml@2.9.0) + typescript: + specifier: ^5.7.2 + version: 5.9.3 + vitest: + specifier: ^2.1.8 + version: 2.1.9(@types/node@22.19.19)(lightningcss@1.32.0) + packages/relay: dependencies: '@agentbox/config': @@ -241,6 +262,9 @@ importers: '@agentbox/core': specifier: workspace:* version: link:../core + '@agentbox/integrations': + specifier: workspace:* + version: link:../integrations '@agentbox/sandbox-core': specifier: workspace:* version: link:../sandbox-core