Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 14 additions & 9 deletions apps/cli/src/commands/config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,22 +69,27 @@ function fail(message: string): never {
process.exit(1);
}

// Walk every dot-segment, not just the first one — `integrations.notion.enabled`
// must reach the deepest leaf, not be split as `integrations` + `notion.enabled`.
// Same shape as `readLeaf` in packages/config/src/load.ts.
function walkKey(obj: Record<string, unknown> | undefined, key: string): unknown {
let cur: unknown = obj;
for (const seg of key.split('.')) {
if (cur === undefined || cur === null || typeof cur !== 'object') return undefined;
cur = (cur as Record<string, unknown>)[seg];
}
return cur;
}

function leafValue(loaded: LoadedConfig, key: string): unknown {
const idx = key.indexOf('.');
const branch = key.slice(0, idx);
const leaf = key.slice(idx + 1);
return (loaded.effective as unknown as Record<string, Record<string, unknown>>)[branch]?.[leaf];
return walkKey(loaded.effective as unknown as Record<string, unknown>, key);
}

function rawLeafFromValues(
values: Record<string, unknown> | undefined,
key: string,
): unknown {
if (!values) return undefined;
const idx = key.indexOf('.');
const b = (values as Record<string, unknown>)[key.slice(0, idx)];
if (!b || typeof b !== 'object') return undefined;
return (b as Record<string, unknown>)[key.slice(idx + 1)];
return walkKey(values, key);
}

function describeSource(source: ConfigSource, loaded: LoadedConfig): string {
Expand Down
105 changes: 105 additions & 0 deletions apps/cli/test/config-get-nested.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import { mkdtemp, rm, realpath } from 'node:fs/promises';
import { homedir, tmpdir } from 'node:os';
import { join } from 'node:path';
import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
import { setConfigValue } from '@agentbox/config';

// Regression guard: `agentbox config get integrations.notion.enabled` must
// return the deeply-nested boolean, not `<unset>`. The first iteration of the
// `leafValue`/`rawLeafFromValues` helpers in config.ts split on the FIRST dot
// only, so the 3-level key `integrations.notion.enabled` resolved to
// `effective.integrations["notion.enabled"]` (undefined), even though
// `config set` and `loadEffectiveConfig` correctly walked the full path.

let tmpCwd: string;
let prevCwd: string;

beforeEach(async () => {
// realpath so the hash matches what setConfigValue → findProjectRoot computes.
tmpCwd = await realpath(await mkdtemp(join(tmpdir(), 'agentbox-cfg-get-')));
prevCwd = process.cwd();
process.chdir(tmpCwd);
// Commander caches parsed options on the singleton `configCommand`; reset
// module state so each parseAsync starts from a clean slate (otherwise
// `--json`/`--all` from a prior test leak into the next).
vi.resetModules();
});

afterEach(async () => {
process.chdir(prevCwd);
await rm(tmpCwd, { recursive: true, force: true });
// setConfigValue writes under ~/.agentbox/projects/<hash>/; clear it like
// set-unset-roundtrip.test.ts does (STATE_DIR is captured at module load
// from homedir(), so we can't redirect it per-test).
await rm(join(homedir(), '.agentbox'), { recursive: true, force: true });
});

async function runConfigGet(args: string[]): Promise<{ stdout: string; stderr: string }> {
let stdout = '';
let stderr = '';
const stdoutSpy = vi.spyOn(process.stdout, 'write').mockImplementation((chunk: unknown) => {
stdout += typeof chunk === 'string' ? chunk : String(chunk);
return true;
});
const stderrSpy = vi.spyOn(process.stderr, 'write').mockImplementation((chunk: unknown) => {
stderr += typeof chunk === 'string' ? chunk : String(chunk);
return true;
});
try {
// Dynamic import after vi.resetModules() so the `getCommand`'s
// commander state is fresh per test.
const { configCommand } = await import('../src/commands/config.js');
await configCommand.parseAsync(['node', 'agentbox-config', ...args]);
} finally {
stdoutSpy.mockRestore();
stderrSpy.mockRestore();
}
return { stdout, stderr };
}

describe('config get on a nested 3-level key', () => {
it('returns the leaf value, not <unset>', async () => {
await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, {
raw: true,
});
const { stdout } = await runConfigGet(['get', 'integrations.notion.enabled']);
expect(stdout).toContain('integrations.notion.enabled = true');
expect(stdout).toMatch(/from: project /);
expect(stdout).not.toContain('<unset>');
});

it('--json carries the value and source', async () => {
await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, {
raw: true,
});
const { stdout } = await runConfigGet([
'get',
'integrations.notion.enabled',
'--json',
]);
const parsed = JSON.parse(stdout) as { key: string; value: unknown; source: string };
expect(parsed.key).toBe('integrations.notion.enabled');
expect(parsed.value).toBe(true);
expect(parsed.source).toBe('project');
});

it('--all walks every layer (no silent <unset> for the project layer)', async () => {
await setConfigValue('project', 'integrations.notion.enabled', 'true', tmpCwd, {
raw: true,
});
const { stdout } = await runConfigGet([
'get',
'integrations.notion.enabled',
'--all',
]);
expect(stdout).toMatch(/effective: true /);
expect(stdout).toMatch(/project:\s+true /);
expect(stdout).toMatch(/default:\s+false/);
});

it('unset key falls back to the built-in default (false)', async () => {
const { stdout } = await runConfigGet(['get', 'integrations.notion.enabled']);
expect(stdout).toContain('integrations.notion.enabled = false');
expect(stdout).toMatch(/from: built-in default/);
});
});
48 changes: 48 additions & 0 deletions docs/integrations.md
Original file line number Diff line number Diff line change
Expand Up @@ -128,6 +128,54 @@ For T4 nested-box e2e (box → box, exercise the integration from inside a box),

Carry is host→box and one-prompt-approved (see [`features.md`](./features.md) → `carry:`). T4 wires the actual e2e verification.

## Verification / live e2e results

T4 ran the integration against the live Notion API from inside a real box.
Captured evidence:

- **`notion whoami` (read)** — round-trips through the in-box shim → host
relay → host `ntn` → Notion API; returns the host bot identity with no
approval prompt.
- **`notion api v1/users/me` (read)** — same path; returns the host bot's
JSON identity record. No prompt.
- **`notion api … -X POST` and `--method PATCH` (refused)** — the
connector's `refuseApiNonGet` correctly classifies these as writes and
blocks them before any host process is spawned: `notion api: only GET is
proxied (use page.create / page.update for writes); detected method
'POST'`, exit 65.
- **No agent-side credential** — `printenv | grep -i notion` in the box
returns nothing in the agent's environment. The token lives only on the
host. The carried `~/.config/notion/auth.json` file is for nested-box
relay hosts and never reaches the agent's process env.
- **Connector argv bug (fixed in T4)** — a live `notion pages create`
through the host relay (rebuilt from T3 code) failed with `error:
unrecognized subcommand 'page'. tip: some similar subcommands exist:
'update', 'pages'`. Real `ntn`'s surface is `api datasources files pages
login logout whoami workers`. The connector's `buildArgv` was building
singular `['page', 'create', …]`; T4 changed it to `['pages', 'create',
…]` and `['pages', 'update', …]`. Live write round-trip against the
fix lands after the host relay rebuilds with the merged T4 code.
- **`agentbox config get` nested-key bug (fixed in T4)** — `config get
integrations.notion.enabled` was returning `<unset>` even though
`config set` + `loadEffectiveConfig` worked correctly, because
`apps/cli/src/commands/config.ts` split keys on the FIRST dot only. T4
replaced the helpers with a `walkKey` function that walks all segments
(mirrors `readLeaf` in `packages/config/src/load.ts`). New regression
test `apps/cli/test/config-get-nested.test.ts`.

### Nested-box e2e — deferred, not blocking

The nested-box scenario (a box-inside-a-box running a `notion` op through
this box's relay) was time-boxed in T4 and deferred. Architecturally, the
in-box `agentbox-ctl` daemon (port 8788) forwards `/rpc` to the HOST relay
(`host.docker.internal:8787`), not to a relay running in this box — so a
nested box's `notion pages create` would still terminate at the host
relay's spawn, not in this box's daemon. That means nested-box e2e
exercises the carry mechanics (already verified — `~/.config/notion/`
present in this box) more than the connector's spawn path. A future
follow-up that lifts the relay into the box's daemon would change this;
tracked under "Open follow-ups" below.

## Cross-provider parity

`integration.<service>.<op>` is dispatched identically on docker and cloud because the wire shape is method-agnostic. The cloud path long-polls `/bridge/poll`, runs `executeCloudAction → runIntegrationRpc`, which reuses the exact handler. The Hetzner / Daytona / Vercel / E2B image flows all ship the `ntn` / `notion` shim (see "In-box surface" above). No provider-specific code in the integrations spine.
Expand Down
28 changes: 28 additions & 0 deletions docs/integrations_backlog.md
Original file line number Diff line number Diff line change
Expand Up @@ -278,3 +278,31 @@ Manual e2e (docker first, then one cloud provider — follow CLAUDE.md "Testing"
toggles; Linear personal keys inherit full user perms (OAuth-only read scope).
- Per-op write allowlist tuning once real agent flows exist (start conservative).
- ClickUp connector trust/maintenance caveats; revisit if a stronger CLI emerges.

---

## Status

- **Notion path: COMPLETE (T1–T4 done, 2026-06-06).** Shipped the shared
`@agentbox/integrations` foundation (descriptor, registry,
`runHostIntegration`, generic `integration.<svc>.<op>` dispatch in both
`server.ts` and `host-actions.ts`); in-box `notion`/`ntn` shim across all
five providers; `integrations.notion.enabled` typed config flag with the
relay-side `refuseIfIntegrationDisabled` gate; `agentbox doctor` per-
connector reporting driven off `ALL_CONNECTORS`; public + internal docs.
T4 closed the loop with a live read e2e (`whoami` + `api v1/users/me` +
`refuseApiNonGet` for non-GET) and fixed two bugs the e2e surfaced —
`agentbox config get` nested-key resolution (`apps/cli/src/commands/config.ts`)
and the `pages` vs `page` argv mismatch (`connectors/notion.ts`). See
[`notion_backlog.md`](./notion_backlog.md) for full T4 evidence.
**Deferred / follow-ups**: `comment.add` (needs a Notion-API payload
assembler for the structured `POST /v1/comments` body — `ntn` exposes no
`comment` subcommand to wrap), host-initiated tokens for integrations
(the relay accepts them but the host-CLI mint path isn't wired yet for
the `integration.*` family), nested-box e2e (architecturally the in-box
agent's relay calls terminate at the host relay either way, so this
exercises the carry block more than the spawn-side; tracked, not
blocking).
- **Linear / Trello / ClickUp paths: NOT STARTED.** Each is a new descriptor
+ small shim; no relay/ctl core changes (the abstraction was validated by
Notion). ClickUp will be the one custom-REST connector (no good CLI).
79 changes: 72 additions & 7 deletions docs/notion_backlog.md
Original file line number Diff line number Diff line change
Expand Up @@ -129,13 +129,58 @@ Make a box agent able to type `notion …` or `ntn …`.
`/rpc` methods" line updated to list `gh.pr.*` /
`integration.<svc>.<op>` already in place.

### T4 — Nested-box e2e verification + carry + closeout ⬜ not started
- Carry `ntn` file-auth into a box; from that box create a nested box; run a
`notion` read (no prompt) + a `notion` write (prompted, approve→succeeds,
deny→nothing created), verifying ground truth in the live Notion space.
- Confirm a box never holds a Notion token (`printenv | grep -i notion`).
- Fix anything the e2e surfaces; mark the Notion path done in
`integrations_backlog.md`.
### T4 — Live e2e verification + bug-fixes-from-e2e + closeout ✅ done
- Primary e2e (real box → host relay → live Notion API): `notion whoami` and
`notion api v1/users/me` return the host bot identity with no prompt;
`notion api v1/comments -X POST` and `notion api ... --method PATCH` are
refused with `notion api: only GET is proxied (use page.create /
page.update for writes); detected method 'POST'` (exit 65). `printenv |
grep -i notion` shows nothing in the box's process env — the agent never
holds the credential. (The carried `~/.config/notion/auth.json` is a
separate concern for nested-box use; the in-box AGENT itself sees no
token in env.)
- Two bugs surfaced and fixed in T4:
1. **`agentbox config get` couldn't read nested 3-level keys.** The
helpers in `apps/cli/src/commands/config.ts` split on the FIRST dot
only, so `integrations.notion.enabled` resolved to
`effective.integrations["notion.enabled"]` (undefined / `<unset>`)
even when `config set` + `loadEffectiveConfig` worked correctly.
Fix: replace `leafValue`/`rawLeafFromValues` with a single `walkKey`
helper that splits on ALL dots (mirrors `readLeaf` in
`packages/config/src/load.ts`). New regression test
`apps/cli/test/config-get-nested.test.ts` covers the plain, `--json`,
`--all`, and unset/default cases; without the fix all four fail.
2. **Connector `buildArgv` used singular `page` but real `ntn` is `pages`
(plural).** Live evidence: a `notion pages create` call through the
host relay hit approval → spawned `ntn page create` → failed with
`error: unrecognized subcommand 'page'. tip: some similar subcommands
exist: 'update', 'pages'` (exit 2). `ntn --help` confirms the surface
is `api datasources files pages login logout whoami workers`. Fix:
`connectors/notion.ts` now builds `['pages', 'create', …]` /
`['pages', 'update', …]`. Existing tests in
`packages/integrations/test/registry.test.ts` and
`packages/relay/test/integrations.test.ts` updated to assert the
correct argv.
- Live write round-trip (host relay → live `ntn pages create`) was not
re-run from this box, because the host relay was rebuilt with T3 code
(pre-fix `page` argv) before T4 started. Once T4 merges and the host
relay rebuilds with the new `pages` argv, the prompted write path will
work end-to-end. The fix is validated by (a) the live failure mode
matching the bug exactly, (b) the `pages create --help` host probe
showing the correct surface, (c) updated unit tests that pin the new
argv.
- Nested-box e2e (boxes-launch-boxes path): **deferred**. Docker-in-docker
is available but the base image isn't baked in this box, and the chain
for an in-box agent's `notion` write would still terminate at the same
HOST relay (not this box's daemon — the box daemon forwards to the host
relay at `host.docker.internal:8787`), so it wouldn't actually exercise
MY fixed code on the spawn side. The carry block in `agentbox.yaml`
already ships the file-auth into nested boxes (verified present at
`~/.config/notion/auth.json` in this box). Real nested-relay isolation
testing is tracked as a follow-up — see "Open follow-ups" in
[`integrations.md`](./integrations.md).
- `integrations_backlog.md` updated: Notion path marked complete through
T4.

## Status log
- 2026-06-06: Backlog created; host-side carry for `ntn` file-auth added to
Expand Down Expand Up @@ -167,3 +212,23 @@ Make a box agent able to type `notion …` or `ntn …`.
`configuration.mdx` / `cli.mdx`, new RPC method-family bullet in
`docs/host-relay.md`, Notion entry in `docs/features.md`. T4 (nested-
box e2e + carry-based file-auth verification) is the remaining task.
- 2026-06-06: T4 shipped — live e2e from inside a box against the real
Notion API. Reads pass through with no prompt (`notion whoami`,
`notion api v1/users/me`), `notion api` correctly refuses non-GET
methods (`-X POST` / `--method PATCH` → exit 65 with `refuseApiNonGet`
message), `printenv | grep -i notion` shows nothing in the agent's
env. Two bugs fixed: (1) `config get` couldn't read 3-level nested
keys because `apps/cli/src/commands/config.ts` split on the first dot
only — replaced with a `walkKey` helper that splits on all segments
(mirrors `readLeaf` in `packages/config/src/load.ts`); regression
test `apps/cli/test/config-get-nested.test.ts` added; (2) connector
built singular `['page', 'create', …]` argv but real `ntn` is `pages`
(plural) — confirmed live by the host relay's spawn failing with
`unrecognized subcommand 'page'`, fixed in `connectors/notion.ts`,
existing tests in `packages/integrations/test/registry.test.ts` and
`packages/relay/test/integrations.test.ts` updated. Live write
round-trip with the fix needs a host relay rebuild post-merge.
Nested-box e2e deferred — the box-daemon → host-relay chain means an
in-box agent's write still terminates at the host relay (not this
box's daemon), so it wouldn't exercise the spawn-side fix from a
nested box anyway; the carry block is verified present.
4 changes: 2 additions & 2 deletions packages/integrations/src/connectors/notion.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,11 +45,11 @@ export const notionConnector: IntegrationConnector = {
},
'page.create': {
write: true,
buildArgv: (args) => ['page', 'create', ...args],
buildArgv: (args) => ['pages', 'create', ...args],
},
'page.update': {
write: true,
buildArgv: (args) => ['page', 'update', ...args],
buildArgv: (args) => ['pages', 'update', ...args],
},
},
};
Expand Down
4 changes: 2 additions & 2 deletions packages/integrations/test/registry.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,13 +42,13 @@ describe('notion connector', () => {
'v1/users/me',
]);
expect(notionConnector.ops['page.create']?.buildArgv?.(['--parent', 'db_id'])).toEqual([
'page',
'pages',
'create',
'--parent',
'db_id',
]);
expect(notionConnector.ops['page.update']?.buildArgv?.(['page_id', '--archive'])).toEqual([
'page',
'pages',
'update',
'page_id',
'--archive',
Expand Down
4 changes: 2 additions & 2 deletions packages/relay/test/integrations.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -140,7 +140,7 @@ describe('relay /rpc integration.* flow', () => {
stubLog = join(stubDir, 'invocations.log');
stubEnvLog = join(stubDir, 'env.log');
// The stub records argv + the NOTION_KEYRING env value. `--version`
// satisfies the readiness probe; `api …` and `page create …` etc.
// satisfies the readiness probe; `api …` and `pages create …` etc.
// echo their argv and exit 0 so the relay's runHostIntegration
// produces a stable, asserted stdout.
const script = `#!/usr/bin/env bash
Expand Down Expand Up @@ -269,7 +269,7 @@ esac
expect(r.status).toBe(200);
const body = r.body as { exitCode: number; stdout: string };
expect(body.exitCode).toBe(0);
expect(body.stdout).toContain('stub: ntn page create --parent db_id');
expect(body.stdout).toContain('stub: ntn pages create --parent db_id');
});

it('op not on allowlist refuses with exit 65', async () => {
Expand Down
Loading