Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions packages/gambit-core/src/runtime.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5646,6 +5646,79 @@ Deck.
assertEquals(seenParams?.verbosity, "high");
});

Deno.test("modelParams.additionalParams passes through to provider params", async () => {
const dir = await Deno.makeTempDir();
const deckPath = await writeTempDeck(
dir,
"root.deck.md",
`
+++
modelParams = { model = "dummy-model", additionalParams = { codex = { project_doc_max_bytes = 0, profile = { name = "gambit" } } } }
+++

Deck.
`.trim(),
);

let seenParams: Record<string, unknown> | undefined;
const provider: ModelProvider = {
chat: (input) => {
seenParams = input.params;
return Promise.resolve({
message: { role: "assistant", content: "ok" },
finishReason: "stop",
});
},
};

await runDeck({
path: deckPath,
input: "hi",
modelProvider: provider,
isRoot: true,
});

assertEquals(seenParams?.codex, {
project_doc_max_bytes: 0,
profile: { name: "gambit" },
});
});

Deno.test("modelParams supported fields override additionalParams duplicates", async () => {
const dir = await Deno.makeTempDir();
const deckPath = await writeTempDeck(
dir,
"root.deck.md",
`
+++
modelParams = { model = "dummy-model", verbosity = "high", additionalParams = { verbosity = "low" } }
+++

Deck.
`.trim(),
);

let seenParams: Record<string, unknown> | undefined;
const provider: ModelProvider = {
chat: (input) => {
seenParams = input.params;
return Promise.resolve({
message: { role: "assistant", content: "ok" },
finishReason: "stop",
});
},
};

await runDeck({
path: deckPath,
input: "hi",
modelProvider: provider,
isRoot: true,
});

assertEquals(seenParams?.verbosity, "high");
});

Deno.test("worker sandbox denies write when write permission is absent", async () => {
const dir = await Deno.makeTempDir();
const modHref = modImportPath();
Expand Down
6 changes: 5 additions & 1 deletion packages/gambit-core/src/runtime.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1017,8 +1017,12 @@ function toProviderParams(
max_tokens,
verbosity,
reasoning,
additionalParams,
} = params;
const out: Record<string, unknown> = {};
const out: Record<string, unknown> = additionalParams &&
typeof additionalParams === "object" && !Array.isArray(additionalParams)
? { ...additionalParams }
: {};
if (temperature !== undefined) out.temperature = temperature;
if (top_p !== undefined) out.top_p = top_p;
if (frequency_penalty !== undefined) {
Expand Down
1 change: 1 addition & 0 deletions packages/gambit-core/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ export type ModelParams = {
effort?: "none" | "low" | "medium" | "high" | "xhigh";
summary?: "concise" | "detailed" | "auto";
};
additionalParams?: Record<string, JSONValue>;
};

export type Guardrails = {
Expand Down
44 changes: 42 additions & 2 deletions src/cli.codex_smoke.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -67,19 +67,26 @@ async function writeDeck(
dir: string,
model: string,
verbosity?: "low" | "medium" | "high",
body = "Smoke deck.",
extraFrontmatter = "",
): Promise<string> {
const deckPath = path.join(dir, "root.deck.md");
const verbosityLine = verbosity ? `verbosity = "${verbosity}"\n` : "";
const frontmatterSuffix = extraFrontmatter ? `${extraFrontmatter}\n` : "";
const contents = `+++
label = "codex smoke"

[modelParams]
model = "${model}"
${verbosityLine}+++

Smoke deck.
${body}
`;
await Deno.writeTextFile(deckPath, contents);
const finalContents = contents.replace(
`${verbosityLine}+++\n\n`,
`${verbosityLine}${frontmatterSuffix}+++\n\n`,
);
await Deno.writeTextFile(deckPath, finalContents);
return deckPath;
}

Expand Down Expand Up @@ -324,6 +331,13 @@ Deno.test({
);
assertEquals(defaultRun.argsLog.includes("\n-m\n"), false);
assertEquals(defaultRun.argsLog.includes('model_verbosity="high"'), true);
assertEquals(defaultRun.argsLog.includes("project_doc_max_bytes="), false);
assertEquals(
defaultRun.argsLog.includes('instructions="Smoke deck."'),
true,
);
assertEquals(defaultRun.argsLog.includes("SYSTEM:\n"), false);
assertEquals(defaultRun.argsLog.endsWith("\nhi\n"), true);

const passthroughDeck = await writeDeck(
dir,
Expand All @@ -349,6 +363,32 @@ Deno.test({
passthroughRun.argsLog.includes('model_verbosity="high"'),
true,
);

const projectDocDeck = await writeDeck(
dir,
"codex-cli/default",
undefined,
"Smoke deck.",
"additionalParams = { codex = { project_doc_max_bytes = 0 } }",
);
const projectDocRun = await runDeck({
deckPath: projectDocDeck,
codexBinPath: mock.binPath,
argsLogPath: mock.argsLogPath,
cwd: dir,
});
assertEquals(
projectDocRun.code,
0,
formatCommandDiagnostics(
"run codex-cli/default project docs",
projectDocRun,
),
);
assertEquals(
projectDocRun.argsLog.includes("project_doc_max_bytes=0"),
true,
);
} finally {
await Deno.remove(dir, { recursive: true }).catch((err) => {
if (err instanceof Deno.errors.NotFound) return;
Expand Down
124 changes: 124 additions & 0 deletions src/providers/codex.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,39 @@ Deno.test("codex provider resume does not replay transcript when no new user mes
assertEquals(args[args.length - 1], "");
});

Deno.test("codex provider uses codex instructions config for fresh system prompts", () => {
const args = parseCodexArgsForTest({
model: "codex-cli/default",
messages: [
{ role: "system", content: "deck system prompt" },
{ role: "user", content: "hello" },
],
});
const joined = args.join(" ");
assertEquals(joined.includes('instructions="deck system prompt"'), true);
assertEquals(joined.includes("SYSTEM:\\n"), false);
assertEquals(args[args.length - 1], "hello");
});

Deno.test("codex provider fresh prompt keeps non-system continuation payloads only", () => {
const args = parseCodexArgsForTest({
model: "codex-cli/default",
messages: [
{ role: "system", content: "deck system prompt" },
{ role: "user", content: "hello" },
{ role: "assistant", content: "hi there" },
{ role: "user", content: "follow up" },
],
});
const joined = args.join(" ");
assertEquals(joined.includes('instructions="deck system prompt"'), true);
assertEquals(joined.includes("SYSTEM:\\n"), false);
assertEquals(
args[args.length - 1],
"USER:\nhello\n\nASSISTANT:\nhi there\n\nUSER:\nfollow up",
);
});

Deno.test("codex provider responses returns updatedState with thread metadata", async () => {
const provider = createCodexProvider({
runCommand: () =>
Expand Down Expand Up @@ -837,13 +870,32 @@ Deno.test("codex provider configures workspace-write sandbox automatically", ()
});
const joined = args.join(" ");
assertEquals(joined.includes('approval_policy="never"'), true);
assertEquals(joined.includes("project_doc_max_bytes="), false);
assertEquals(joined.includes('sandbox_mode="workspace-write"'), true);
assertEquals(
joined.includes('sandbox_workspace_write.writable_roots=["/tmp/test-cwd"]'),
true,
);
});

Deno.test("codex provider forwards additionalParams.codex config entries", () => {
const args = parseCodexArgsForTest({
model: "codex-cli/default",
messages: [{ role: "user", content: "hi" }],
params: {
codex: {
project_doc_max_bytes: 0,
profile: { name: "gambit" },
project_root_markers: [".git", ".hg"],
},
},
});
const joined = args.join(" ");
assertEquals(joined.includes("project_doc_max_bytes=0"), true);
assertEquals(joined.includes('profile.name="gambit"'), true);
assertEquals(joined.includes('project_root_markers=[".git", ".hg"]'), true);
});

Deno.test("codex provider skips sandbox config when yolo env is enabled", () => {
const previous = Deno.env.get("GAMBIT_CODEX_SKIP_SANDBOX_CONFIG");
Deno.env.set("GAMBIT_CODEX_SKIP_SANDBOX_CONFIG", "1");
Expand Down Expand Up @@ -1012,6 +1064,78 @@ Deno.test("codex provider forwards codex-cli/<model> through -m", () => {
assertEquals(args[modelArgIndex + 1], "gpt-5.2-codex");
});

Deno.test("codex provider keeps saved-state threads isolated across runs", async () => {
const calls: Array<Array<string>> = [];
const provider = createCodexProvider({
runCommand: ({ args }) => {
calls.push(args);
const threadId = args.includes("thread-a")
? "thread-a"
: args.includes("thread-b")
? "thread-b"
: "thread-new";
return Promise.resolve({
success: true,
code: 0,
stdout: enc.encode(
[
JSON.stringify({
type: "item.completed",
item: {
id: `msg-${threadId}`,
type: "agent_message",
text: `reply-${threadId}`,
},
}),
].join("\n"),
),
stderr: new Uint8Array(),
});
},
});

const [a, b] = await Promise.all([
provider.chat({
model: "codex-cli/default",
messages: [
{ role: "system", content: "system-a" },
{ role: "user", content: "follow up a" },
],
state: {
runId: "run-a",
messages: [],
meta: { "codex.threadId": "thread-a" },
} as SavedState,
}),
provider.chat({
model: "codex-cli/default",
messages: [
{ role: "system", content: "system-b" },
{ role: "user", content: "follow up b" },
],
state: {
runId: "run-b",
messages: [],
meta: { "codex.threadId": "thread-b" },
} as SavedState,
}),
]);

assertEquals(a.updatedState?.meta?.["codex.threadId"], "thread-a");
assertEquals(b.updatedState?.meta?.["codex.threadId"], "thread-b");
assertEquals(calls.length, 2);
assertEquals(calls[0].includes("thread-a"), true);
assertEquals(calls[0].includes("thread-b"), false);
assertEquals(calls[1].includes("thread-b"), true);
assertEquals(calls[1].includes("thread-a"), false);
assertEquals(calls[0].join(" ").includes('instructions="system-a"'), true);
assertEquals(calls[0].join(" ").includes('instructions="system-b"'), false);
assertEquals(calls[1].join(" ").includes('instructions="system-b"'), true);
assertEquals(calls[1].join(" ").includes('instructions="system-a"'), false);
assertEquals(calls[0][calls[0].length - 1], "follow up a");
assertEquals(calls[1][calls[1].length - 1], "follow up b");
});

Deno.test("codex provider rejects legacy codex prefix", () => {
const error = assertThrows(() =>
parseCodexArgsForTest({
Expand Down
Loading
Loading