Skip to content

Commit 31532c3

Browse files
committed
sync(bfmono): test(gambit): align sandbox assertion + extend runtime coverage (+19 more) (bfmono@2967fb893)
This PR is an automated gambitmono sync of bfmono Gambit packages. - Source: `packages/gambit/` - Core: `packages/gambit-core/` - bfmono rev: 2967fb893 Changes: - 2967fb893 test(gambit): align sandbox assertion + extend runtime coverage - b746e6453 fix(gambit-core): honor directory permission scopes - 2dcd5f19d fix(gambit-core): include orchestration worker in bootstrap reads - 52d54d3c7 fix(gambit-core): constrain inspect worker bootstrap reads - a4b617849 fix(gambit-core): tighten worker bootstrap reads and child deadlines - 9b8b6fa96 fix(gambit-core): harden worker bootstrap import parsing - 3966dcbb8 fix(gambit-core): preserve nested sandboxing in orchestration worker - 717c5a40c test(gambit-core): add phase-5 orchestration-worker serial invariants - 7e6296a1d feat(gambit-core): run root llm orchestration in worker sandbox - 3c1b5851a docs(gambit): document worker sandbox defaults and rollback flags - 14a94c886 feat(gambit): default worker exec on cli surfaces with rollback controls - 21e6b4d37 fix(gambit-server): persist and catch backend errors across build/test/grade - 29eaa58b7 fix(gambit): enforce inherited permissions in OpenAI compat actions - d502df5cd feat(gambit-core): add permission-gated built-in file tooling - c04141504 feat(gambit-cli): add session permission override flags - 85442ffcd test(gambit-core): add worker bootstrap permission regressions - 08bab3af1 feat(gambit-core): add worker compute runner with bridged permissions - 70ec3b942 feat(gambit): add tool-call-aware grader schemas and root-deck guards - cae381f00 feat(gambit): align scaffolds with product command and hourglass policies - 5faa48b35 feat(gambit): move bot policy to folder and enforce policy summarizer flow Do not edit this repo directly; make changes in bfmono and re-run the sync.
1 parent 5356ee6 commit 31532c3

27 files changed

Lines changed: 8168 additions & 2648 deletions

docs/external/reference/cli.md

Lines changed: 12 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,15 +11,15 @@ How to run Gambit, the agent harness framework, locally and observe runs.
1111
- Command help: `deno run -A src/cli.ts help <command>` (or
1212
`deno run -A src/cli.ts <command> -h`).
1313
- Run once:
14-
`deno run -A src/cli.ts run <deck> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose]`
14+
`deno run -A src/cli.ts run <deck> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]`
1515
- Check models: `deno run -A src/cli.ts check <deck>`
1616
- REPL: `deno run -A src/cli.ts repl <deck>` (defaults to
1717
`src/decks/gambit-assistant.deck.md` in a local checkout). Streams by default
1818
and keeps state in memory for the session.
1919
- Test bot (CLI):
20-
`deno run -A src/cli.ts test-bot <root-deck> --test-deck <persona-deck> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck> ...] [--trace <file>] [--responses] [--verbose]`
20+
`deno run -A src/cli.ts test-bot <root-deck> --test-deck <persona-deck> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck> ...] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]`
2121
- Grade (CLI):
22-
`deno run -A src/cli.ts grade <grader-deck> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose]`
22+
`deno run -A src/cli.ts grade <grader-deck> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]`
2323
- Export bundle (CLI):
2424
`deno run -A src/cli.ts export [<deck>] --state <file> --out <bundle.tar.gz>`
2525
- Debug UI: `deno run -A src/cli.ts serve <deck> --port 8000` then open
@@ -46,6 +46,15 @@ How to run Gambit, the agent harness framework, locally and observe runs.
4646
- `GAMBIT_RESPONSES_MODE=1`: env alternative to `--responses` for runtime/state.
4747
- `GAMBIT_OPENROUTER_RESPONSES=1`: route OpenRouter calls through the Responses
4848
API (experimental; chat remains the default path).
49+
- Worker execution defaults on for deck-executing surfaces. Use
50+
`--no-worker-sandbox` (or `--legacy-exec`) to roll back to legacy in-process
51+
execution. `--sandbox/--no-sandbox` still work as deprecated aliases.
52+
- `gambit.toml` config equivalent:
53+
```toml
54+
[execution]
55+
worker_sandbox = false # same as --no-worker-sandbox
56+
# legacy_exec = true # equivalent rollback toggle
57+
```
4958

5059
## State and tracing
5160

docs/external/reference/cli/commands/bot.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,17 @@
11
+++
22
command = "bot"
33
summary = "Run the Gambit bot assistant"
4-
usage = "gambit bot [<dir>] [--bot-root <dir>] [--model <id>] [--model-force <id>] [--responses] [--verbose]"
4+
usage = "gambit bot [<dir>] [--bot-root <dir>] [--model <id>] [--model-force <id>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
55
flags = [
66
"--bot-root <dir> Allowed folder for bot file writes (defaults to workspace.decks if set; overrides <dir>)",
77
"--model <id> Default model id",
88
"--model-force <id> Override model id",
99
"--responses Run runtime/state in Responses mode",
10+
"--worker-sandbox Force worker execution on",
11+
"--no-worker-sandbox Force worker execution off",
12+
"--legacy-exec Alias for --no-worker-sandbox",
13+
"--sandbox Deprecated alias for --worker-sandbox",
14+
"--no-sandbox Deprecated alias for --no-worker-sandbox",
1015
"--verbose Print trace events to console",
1116
]
1217
+++

docs/external/reference/cli/commands/grade.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,19 @@
11
+++
22
command = "grade"
33
summary = "Grade a saved state file"
4-
usage = "gambit grade <grader-deck.(ts|md)> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose]"
4+
usage = "gambit grade <grader-deck.(ts|md)> --state <file> [--model <id>] [--model-force <id>] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
55
flags = [
66
"--grader <path> Grader deck path (overrides positional)",
77
"--state <file> Load/persist state",
88
"--model <id> Default model id",
99
"--model-force <id> Override model id",
1010
"--trace <file> Write trace events to file (JSONL)",
1111
"--responses Run runtime/state in Responses mode",
12+
"--worker-sandbox Force worker execution on",
13+
"--no-worker-sandbox Force worker execution off",
14+
"--legacy-exec Alias for --no-worker-sandbox",
15+
"--sandbox Deprecated alias for --worker-sandbox",
16+
"--no-sandbox Deprecated alias for --no-worker-sandbox",
1217
"--verbose Print trace events to console",
1318
]
1419
+++

docs/external/reference/cli/commands/repl.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,14 +1,25 @@
11
+++
22
command = "repl"
33
summary = "Start an interactive REPL"
4-
usage = "gambit repl <deck.(ts|md)> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--responses] [--verbose]"
4+
usage = "gambit repl <deck.(ts|md)> [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--responses] [--verbose] [-A|--allow-all|--allow-<kind>] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
55
flags = [
66
"--context <json|string> Context payload (seeds gambit_context; legacy --init still works)",
77
"--message <json|string> Initial user message (sent before assistant speaks)",
88
"--model <id> Default model id",
99
"--model-force <id> Override model id",
1010
"--responses Run runtime/state in Responses mode",
1111
"--verbose Print trace events to console",
12+
"-A, --allow-all Allow all session permissions (read/write/run/net/env)",
13+
"--allow-read[=<paths>] Session read override (all when value omitted)",
14+
"--allow-write[=<paths>] Session write override (all when value omitted)",
15+
"--allow-run[=<entries>] Session run override (all when value omitted)",
16+
"--allow-net[=<hosts>] Session net override (all when value omitted)",
17+
"--allow-env[=<names>] Session env override (all when value omitted)",
18+
"--worker-sandbox Force worker execution on",
19+
"--no-worker-sandbox Force worker execution off",
20+
"--legacy-exec Alias for --no-worker-sandbox",
21+
"--sandbox Deprecated alias for --worker-sandbox",
22+
"--no-sandbox Deprecated alias for --no-worker-sandbox",
1223
]
1324
+++
1425

docs/external/reference/cli/commands/run.md

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
+++
22
command = "run"
33
summary = "Run a deck once"
4-
usage = "gambit run [<deck.(ts|md)>] [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose]"
4+
usage = "gambit run [<deck.(ts|md)>] [--context <json|string>] [--message <json|string>] [--model <id>] [--model-force <id>] [--trace <file>] [--state <file>] [--stream] [--responses] [--verbose] [-A|--allow-all|--allow-<kind>] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
55
flags = [
66
"--context <json|string> Context payload (seeds gambit_context; legacy --init still works)",
77
"--message <json|string> Initial user message (sent before assistant speaks)",
@@ -12,6 +12,17 @@ flags = [
1212
"--stream Enable streaming responses",
1313
"--responses Run runtime/state in Responses mode",
1414
"--verbose Print trace events to console",
15+
"-A, --allow-all Allow all session permissions (read/write/run/net/env)",
16+
"--allow-read[=<paths>] Session read override (all when value omitted)",
17+
"--allow-write[=<paths>] Session write override (all when value omitted)",
18+
"--allow-run[=<entries>] Session run override (all when value omitted)",
19+
"--allow-net[=<hosts>] Session net override (all when value omitted)",
20+
"--allow-env[=<names>] Session env override (all when value omitted)",
21+
"--worker-sandbox Force worker execution on",
22+
"--no-worker-sandbox Force worker execution off",
23+
"--legacy-exec Alias for --no-worker-sandbox",
24+
"--sandbox Deprecated alias for --worker-sandbox",
25+
"--no-sandbox Deprecated alias for --no-worker-sandbox",
1526
]
1627
+++
1728

docs/external/reference/cli/commands/serve.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
+++
22
command = "serve"
33
summary = "Run the debug UI server"
4-
usage = "gambit serve [<deck.(ts|md)>] [--model <id>] [--model-force <id>] [--port <n>] [--responses] [--verbose] [--watch] [--no-bundle] [--no-sourcemap]"
4+
usage = "gambit serve [<deck.(ts|md)>] [--model <id>] [--model-force <id>] [--port <n>] [--responses] [--verbose] [--watch] [--no-bundle] [--no-sourcemap] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
55
flags = [
66
"--model <id> Default model id",
77
"--model-force <id> Override model id",
@@ -13,6 +13,11 @@ flags = [
1313
"--sourcemap Generate external source maps (serve; default in dev)",
1414
"--no-sourcemap Disable source map generation (serve)",
1515
"--platform <platform> Bundle target platform: deno (default) or web (browser)",
16+
"--worker-sandbox Force worker execution on",
17+
"--no-worker-sandbox Force worker execution off",
18+
"--legacy-exec Alias for --no-worker-sandbox",
19+
"--sandbox Deprecated alias for --worker-sandbox",
20+
"--no-sandbox Deprecated alias for --no-worker-sandbox",
1621
"--verbose Print trace events to console",
1722
]
1823
+++

docs/external/reference/cli/commands/test-bot.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
+++
22
command = "test-bot"
33
summary = "Run a persona/test-bot loop"
4-
usage = "gambit test-bot <root-deck.(ts|md)> --test-deck <persona-deck.(ts|md)> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck.(ts|md)> ...] [--trace <file>] [--responses] [--verbose]"
4+
usage = "gambit test-bot <root-deck.(ts|md)> --test-deck <persona-deck.(ts|md)> [--context <json|string>] [--bot-input <json|string>] [--message <json|string>] [--max-turns <n>] [--state <file>] [--grade <grader-deck.(ts|md)> ...] [--trace <file>] [--responses] [--verbose] [--worker-sandbox|--no-worker-sandbox|--legacy-exec]"
55
flags = [
66
"--test-deck <path> Persona/test deck path",
77
"--grade <path> Grader deck path (repeatable)",
@@ -14,6 +14,11 @@ flags = [
1414
"--model-force <id> Override model id",
1515
"--trace <file> Write trace events to file (JSONL)",
1616
"--responses Run runtime/state in Responses mode",
17+
"--worker-sandbox Force worker execution on",
18+
"--no-worker-sandbox Force worker execution off",
19+
"--legacy-exec Alias for --no-worker-sandbox",
20+
"--sandbox Deprecated alias for --worker-sandbox",
21+
"--no-sandbox Deprecated alias for --no-worker-sandbox",
1722
"--verbose Print trace events to console",
1823
]
1924
+++

packages/gambit-core/src/permissions.test.ts

Lines changed: 68 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
1-
import { assert, assertEquals } from "@std/assert";
1+
import { assert, assertEquals, assertThrows } from "@std/assert";
22
import * as path from "@std/path";
33
import {
44
canReadPath,
55
canRunCommand,
66
canRunPath,
7+
canWritePath,
78
normalizePermissionDeclaration,
89
normalizePermissionDeclarationToSet,
10+
type PermissionDeclarationInput,
911
resolveEffectivePermissions,
1012
} from "./permissions.ts";
1113

@@ -129,6 +131,48 @@ Deno.test("child-only inherited permissions use child baseDir for relative check
129131
);
130132
});
131133

134+
Deno.test("path grants cover descendant files within the directory tree", () => {
135+
const set = normalizePermissionDeclarationToSet(
136+
{
137+
read: ["./shared"],
138+
write: ["./shared", "./local.txt"],
139+
},
140+
"/workspace/decks/root",
141+
);
142+
assert(set, "expected normalized permission set");
143+
144+
assertEquals(
145+
canReadPath(set, "./shared/prompts/prompt.txt"),
146+
true,
147+
"read grants must apply to files beneath a declared directory",
148+
);
149+
assertEquals(
150+
canReadPath(set, "./shared"),
151+
true,
152+
"read grants must apply to the directory itself",
153+
);
154+
assertEquals(
155+
canReadPath(set, "./other/path.txt"),
156+
false,
157+
"read grants must not leak into sibling directories",
158+
);
159+
assertEquals(
160+
canWritePath(set, "./shared/prompts/prompt.txt"),
161+
true,
162+
"write grants must apply to files beneath a declared directory",
163+
);
164+
assertEquals(
165+
canWritePath(set, "./local.txt"),
166+
true,
167+
"write grants must still allow file-specific declarations",
168+
);
169+
assertEquals(
170+
canWritePath(set, "./local.txt.bak"),
171+
false,
172+
"write grants must not allow unrelated files",
173+
);
174+
});
175+
132176
Deno.test("run grants keep path vs command semantics separate", () => {
133177
const set = normalizePermissionDeclarationToSet(
134178
{
@@ -147,22 +191,33 @@ Deno.test("run grants keep path vs command semantics separate", () => {
147191
assertEquals(canRunCommand(set, "bin/tool"), false);
148192
});
149193

150-
Deno.test("run object-form booleans honor all-access semantics", () => {
151-
const pathsTrue = normalizePermissionDeclarationToSet(
152-
{ run: { paths: true } },
194+
Deno.test("run=true grants all run access", () => {
195+
const runAll = normalizePermissionDeclarationToSet(
196+
{ run: true },
153197
"/workspace",
154198
);
155-
assert(pathsTrue, "expected normalized permission set for paths=true");
156-
assertEquals(canRunPath(pathsTrue, "/workspace/bin/anything"), true);
157-
assertEquals(canRunCommand(pathsTrue, "anything"), true);
199+
assert(runAll, "expected normalized permission set for run=true");
200+
assertEquals(canRunPath(runAll, "/workspace/bin/anything"), true);
201+
assertEquals(canRunCommand(runAll, "anything"), true);
202+
});
158203

159-
const commandsTrue = normalizePermissionDeclarationToSet(
160-
{ run: { commands: true } },
161-
"/workspace",
204+
Deno.test("run object-form booleans are rejected", () => {
205+
const invalidPaths = {
206+
run: { paths: true },
207+
} as unknown as PermissionDeclarationInput;
208+
const invalidCommands = {
209+
run: { commands: false },
210+
} as unknown as PermissionDeclarationInput;
211+
assertThrows(
212+
() => normalizePermissionDeclarationToSet(invalidPaths, "/workspace"),
213+
Error,
214+
"permissions.run.paths must be an array in object form",
215+
);
216+
assertThrows(
217+
() => normalizePermissionDeclarationToSet(invalidCommands, "/workspace"),
218+
Error,
219+
"permissions.run.commands must be an array in object form",
162220
);
163-
assert(commandsTrue, "expected normalized permission set for commands=true");
164-
assertEquals(canRunPath(commandsTrue, "/workspace/bin/anything"), true);
165-
assertEquals(canRunCommand(commandsTrue, "anything"), true);
166221
});
167222

168223
Deno.test("unspecified kinds deny by default when a layer is provided", () => {

packages/gambit-core/src/permissions.ts

Lines changed: 24 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@ export type RunPermissionInput =
1111
| boolean
1212
| Array<string>
1313
| {
14-
paths?: boolean | Array<string>;
15-
commands?: boolean | Array<string>;
14+
paths?: Array<string>;
15+
commands?: Array<string>;
1616
};
1717

1818
export type PermissionDeclarationInput = Partial<{
@@ -175,19 +175,22 @@ function normalizeRun(
175175
paths?: unknown;
176176
commands?: unknown;
177177
};
178+
if (typeof record.paths === "boolean") {
179+
throw new Error(
180+
"permissions.run.paths must be an array in object form; use permissions.run=true for full run access",
181+
);
182+
}
183+
if (typeof record.commands === "boolean") {
184+
throw new Error(
185+
"permissions.run.commands must be an array in object form; use permissions.run=true for full run access",
186+
);
187+
}
178188
const pathsScope = normalizeList(record.paths, "run", baseDir, {
179189
resolvePaths: true,
180190
});
181191
const commandsScope = normalizeList(record.commands, "run", baseDir, {
182192
resolvePaths: false,
183193
});
184-
if (pathsScope.all || commandsScope.all) {
185-
return {
186-
all: true,
187-
paths: new Set<string>(),
188-
commands: new Set<string>(),
189-
};
190-
}
191194
return {
192195
all: false,
193196
paths: pathsScope.values,
@@ -424,9 +427,20 @@ export function resolveEffectivePermissions(args: {
424427
};
425428
}
426429

430+
/**
431+
* Checks whether `target` is covered by `scope`, treating each value as either
432+
* an exact path grant or the root of an allowed directory tree.
433+
*/
427434
function matchScope(scope: NormalizedScope, target: string): boolean {
428435
if (scope.all) return true;
429-
return scope.values.has(target);
436+
for (const root of scope.values) {
437+
if (root === target) return true;
438+
const rel = path.relative(root, target);
439+
if (rel.length > 0 && !rel.startsWith("..") && !path.isAbsolute(rel)) {
440+
return true;
441+
}
442+
}
443+
return false;
430444
}
431445

432446
/**

0 commit comments

Comments
 (0)