diff --git a/packages/gambit-core/src/runtime.test.ts b/packages/gambit-core/src/runtime.test.ts index e3eae388..fc7b422b 100644 --- a/packages/gambit-core/src/runtime.test.ts +++ b/packages/gambit-core/src/runtime.test.ts @@ -5646,6 +5646,79 @@ Deck. assertEquals(seenParams?.verbosity, "high"); }); +Deno.test("modelParams.additionalParams passes through to provider params", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "root.deck.md", + ` ++++ +modelParams = { model = "dummy-model", additionalParams = { codex = { project_doc_max_bytes = 0, profile = { name = "gambit" } } } } ++++ + +Deck. +`.trim(), + ); + + let seenParams: Record | undefined; + const provider: ModelProvider = { + chat: (input) => { + seenParams = input.params; + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + }); + + assertEquals(seenParams?.codex, { + project_doc_max_bytes: 0, + profile: { name: "gambit" }, + }); +}); + +Deno.test("modelParams supported fields override additionalParams duplicates", async () => { + const dir = await Deno.makeTempDir(); + const deckPath = await writeTempDeck( + dir, + "root.deck.md", + ` ++++ +modelParams = { model = "dummy-model", verbosity = "high", additionalParams = { verbosity = "low" } } ++++ + +Deck. +`.trim(), + ); + + let seenParams: Record | undefined; + const provider: ModelProvider = { + chat: (input) => { + seenParams = input.params; + return Promise.resolve({ + message: { role: "assistant", content: "ok" }, + finishReason: "stop", + }); + }, + }; + + await runDeck({ + path: deckPath, + input: "hi", + modelProvider: provider, + isRoot: true, + }); + + assertEquals(seenParams?.verbosity, "high"); +}); + Deno.test("worker sandbox denies write when write permission is absent", async () => { const dir = await Deno.makeTempDir(); const modHref = modImportPath(); diff --git a/packages/gambit-core/src/runtime.ts b/packages/gambit-core/src/runtime.ts index c319585a..2bb92d70 100644 --- a/packages/gambit-core/src/runtime.ts +++ b/packages/gambit-core/src/runtime.ts @@ -1017,8 +1017,12 @@ function toProviderParams( max_tokens, verbosity, reasoning, + additionalParams, } = params; - const out: Record = {}; + const out: Record = additionalParams && + typeof additionalParams === "object" && !Array.isArray(additionalParams) + ? { ...additionalParams } + : {}; if (temperature !== undefined) out.temperature = temperature; if (top_p !== undefined) out.top_p = top_p; if (frequency_penalty !== undefined) { diff --git a/packages/gambit-core/src/types.ts b/packages/gambit-core/src/types.ts index f34928a7..84214d08 100644 --- a/packages/gambit-core/src/types.ts +++ b/packages/gambit-core/src/types.ts @@ -36,6 +36,7 @@ export type ModelParams = { effort?: "none" | "low" | "medium" | "high" | "xhigh"; summary?: "concise" | "detailed" | "auto"; }; + additionalParams?: Record; }; export type Guardrails = { diff --git a/src/cli.codex_smoke.test.ts b/src/cli.codex_smoke.test.ts index f3dd32b5..fc389b55 100644 --- a/src/cli.codex_smoke.test.ts +++ b/src/cli.codex_smoke.test.ts @@ -67,9 +67,12 @@ async function writeDeck( dir: string, model: string, verbosity?: "low" | "medium" | "high", + body = "Smoke deck.", + extraFrontmatter = "", ): Promise { const deckPath = path.join(dir, "root.deck.md"); const verbosityLine = verbosity ? `verbosity = "${verbosity}"\n` : ""; + const frontmatterSuffix = extraFrontmatter ? `${extraFrontmatter}\n` : ""; const contents = `+++ label = "codex smoke" @@ -77,9 +80,13 @@ label = "codex smoke" model = "${model}" ${verbosityLine}+++ -Smoke deck. +${body} `; - await Deno.writeTextFile(deckPath, contents); + const finalContents = contents.replace( + `${verbosityLine}+++\n\n`, + `${verbosityLine}${frontmatterSuffix}+++\n\n`, + ); + await Deno.writeTextFile(deckPath, finalContents); return deckPath; } @@ -324,6 +331,13 @@ Deno.test({ ); assertEquals(defaultRun.argsLog.includes("\n-m\n"), false); assertEquals(defaultRun.argsLog.includes('model_verbosity="high"'), true); + assertEquals(defaultRun.argsLog.includes("project_doc_max_bytes="), false); + assertEquals( + defaultRun.argsLog.includes('instructions="Smoke deck."'), + true, + ); + assertEquals(defaultRun.argsLog.includes("SYSTEM:\n"), false); + assertEquals(defaultRun.argsLog.endsWith("\nhi\n"), true); const passthroughDeck = await writeDeck( dir, @@ -349,6 +363,32 @@ Deno.test({ passthroughRun.argsLog.includes('model_verbosity="high"'), true, ); + + const projectDocDeck = await writeDeck( + dir, + "codex-cli/default", + undefined, + "Smoke deck.", + "additionalParams = { codex = { project_doc_max_bytes = 0 } }", + ); + const projectDocRun = await runDeck({ + deckPath: projectDocDeck, + codexBinPath: mock.binPath, + argsLogPath: mock.argsLogPath, + cwd: dir, + }); + assertEquals( + projectDocRun.code, + 0, + formatCommandDiagnostics( + "run codex-cli/default project docs", + projectDocRun, + ), + ); + assertEquals( + projectDocRun.argsLog.includes("project_doc_max_bytes=0"), + true, + ); } finally { await Deno.remove(dir, { recursive: true }).catch((err) => { if (err instanceof Deno.errors.NotFound) return; diff --git a/src/providers/codex.test.ts b/src/providers/codex.test.ts index b35698a0..a5acf5bd 100644 --- a/src/providers/codex.test.ts +++ b/src/providers/codex.test.ts @@ -90,6 +90,39 @@ Deno.test("codex provider resume does not replay transcript when no new user mes assertEquals(args[args.length - 1], ""); }); +Deno.test("codex provider uses codex instructions config for fresh system prompts", () => { + const args = parseCodexArgsForTest({ + model: "codex-cli/default", + messages: [ + { role: "system", content: "deck system prompt" }, + { role: "user", content: "hello" }, + ], + }); + const joined = args.join(" "); + assertEquals(joined.includes('instructions="deck system prompt"'), true); + assertEquals(joined.includes("SYSTEM:\\n"), false); + assertEquals(args[args.length - 1], "hello"); +}); + +Deno.test("codex provider fresh prompt keeps non-system continuation payloads only", () => { + const args = parseCodexArgsForTest({ + model: "codex-cli/default", + messages: [ + { role: "system", content: "deck system prompt" }, + { role: "user", content: "hello" }, + { role: "assistant", content: "hi there" }, + { role: "user", content: "follow up" }, + ], + }); + const joined = args.join(" "); + assertEquals(joined.includes('instructions="deck system prompt"'), true); + assertEquals(joined.includes("SYSTEM:\\n"), false); + assertEquals( + args[args.length - 1], + "USER:\nhello\n\nASSISTANT:\nhi there\n\nUSER:\nfollow up", + ); +}); + Deno.test("codex provider responses returns updatedState with thread metadata", async () => { const provider = createCodexProvider({ runCommand: () => @@ -837,6 +870,7 @@ Deno.test("codex provider configures workspace-write sandbox automatically", () }); const joined = args.join(" "); assertEquals(joined.includes('approval_policy="never"'), true); + assertEquals(joined.includes("project_doc_max_bytes="), false); assertEquals(joined.includes('sandbox_mode="workspace-write"'), true); assertEquals( joined.includes('sandbox_workspace_write.writable_roots=["/tmp/test-cwd"]'), @@ -844,6 +878,24 @@ Deno.test("codex provider configures workspace-write sandbox automatically", () ); }); +Deno.test("codex provider forwards additionalParams.codex config entries", () => { + const args = parseCodexArgsForTest({ + model: "codex-cli/default", + messages: [{ role: "user", content: "hi" }], + params: { + codex: { + project_doc_max_bytes: 0, + profile: { name: "gambit" }, + project_root_markers: [".git", ".hg"], + }, + }, + }); + const joined = args.join(" "); + assertEquals(joined.includes("project_doc_max_bytes=0"), true); + assertEquals(joined.includes('profile.name="gambit"'), true); + assertEquals(joined.includes('project_root_markers=[".git", ".hg"]'), true); +}); + Deno.test("codex provider skips sandbox config when yolo env is enabled", () => { const previous = Deno.env.get("GAMBIT_CODEX_SKIP_SANDBOX_CONFIG"); Deno.env.set("GAMBIT_CODEX_SKIP_SANDBOX_CONFIG", "1"); @@ -1012,6 +1064,78 @@ Deno.test("codex provider forwards codex-cli/ through -m", () => { assertEquals(args[modelArgIndex + 1], "gpt-5.2-codex"); }); +Deno.test("codex provider keeps saved-state threads isolated across runs", async () => { + const calls: Array> = []; + const provider = createCodexProvider({ + runCommand: ({ args }) => { + calls.push(args); + const threadId = args.includes("thread-a") + ? "thread-a" + : args.includes("thread-b") + ? "thread-b" + : "thread-new"; + return Promise.resolve({ + success: true, + code: 0, + stdout: enc.encode( + [ + JSON.stringify({ + type: "item.completed", + item: { + id: `msg-${threadId}`, + type: "agent_message", + text: `reply-${threadId}`, + }, + }), + ].join("\n"), + ), + stderr: new Uint8Array(), + }); + }, + }); + + const [a, b] = await Promise.all([ + provider.chat({ + model: "codex-cli/default", + messages: [ + { role: "system", content: "system-a" }, + { role: "user", content: "follow up a" }, + ], + state: { + runId: "run-a", + messages: [], + meta: { "codex.threadId": "thread-a" }, + } as SavedState, + }), + provider.chat({ + model: "codex-cli/default", + messages: [ + { role: "system", content: "system-b" }, + { role: "user", content: "follow up b" }, + ], + state: { + runId: "run-b", + messages: [], + meta: { "codex.threadId": "thread-b" }, + } as SavedState, + }), + ]); + + assertEquals(a.updatedState?.meta?.["codex.threadId"], "thread-a"); + assertEquals(b.updatedState?.meta?.["codex.threadId"], "thread-b"); + assertEquals(calls.length, 2); + assertEquals(calls[0].includes("thread-a"), true); + assertEquals(calls[0].includes("thread-b"), false); + assertEquals(calls[1].includes("thread-b"), true); + assertEquals(calls[1].includes("thread-a"), false); + assertEquals(calls[0].join(" ").includes('instructions="system-a"'), true); + assertEquals(calls[0].join(" ").includes('instructions="system-b"'), false); + assertEquals(calls[1].join(" ").includes('instructions="system-b"'), true); + assertEquals(calls[1].join(" ").includes('instructions="system-a"'), false); + assertEquals(calls[0][calls[0].length - 1], "follow up a"); + assertEquals(calls[1][calls[1].length - 1], "follow up b"); +}); + Deno.test("codex provider rejects legacy codex prefix", () => { const error = assertThrows(() => parseCodexArgsForTest({ diff --git a/src/providers/codex.ts b/src/providers/codex.ts index 06ebc27d..42be1821 100644 --- a/src/providers/codex.ts +++ b/src/providers/codex.ts @@ -132,12 +132,78 @@ function tomlStringArray(values: Array): string { return `[${values.map(tomlString).join(",")}]`; } +function tomlKeySegment(value: string): string { + return /^[A-Za-z0-9_-]+$/.test(value) ? value : tomlString(value); +} + +function tomlValue(value: unknown): string { + if (typeof value === "string") return tomlString(value); + if (typeof value === "number") { + if (!Number.isFinite(value)) { + throw new Error(`Invalid Codex config number: ${value}`); + } + return String(value); + } + if (typeof value === "boolean") return value ? "true" : "false"; + if (value === null) return "null"; + if (Array.isArray(value)) { + return `[${value.map((entry) => tomlValue(entry)).join(", ")}]`; + } + if (value && typeof value === "object") { + const entries = Object.entries(value) + .filter(([, entry]) => entry !== undefined) + .sort(([a], [b]) => a.localeCompare(b)); + return `{ ${ + entries + .map(([key, entry]) => `${tomlKeySegment(key)} = ${tomlValue(entry)}`) + .join(", ") + } }`; + } + throw new Error( + `Unsupported Codex config value type: ${typeof value}.`, + ); +} + +function codexAdditionalConfigArgs( + params?: Record, +): Array { + const codex = params?.codex; + if (!codex || typeof codex !== "object" || Array.isArray(codex)) return []; + const args: Array = []; + const visit = (prefix: Array, value: unknown) => { + if (value === undefined) return; + if ( + value && typeof value === "object" && !Array.isArray(value) + ) { + const entries = Object.entries(value).sort(([a], [b]) => + a.localeCompare(b) + ); + for (const [key, entry] of entries) { + visit([...prefix, key], entry); + } + return; + } + const dottedKey = prefix.map(tomlKeySegment).join("."); + args.push("-c", `${dottedKey}=${tomlValue(value)}`); + }; + for ( + const [key, value] of Object.entries(codex).sort(([a], [b]) => + a.localeCompare(b) + ) + ) { + visit([key], value); + } + return args; +} + function codexConfigArgs(input: { cwd: string; deckPath?: string; params?: Record; + instructions?: string; }): Array { const args: Array = []; + args.push(...codexAdditionalConfigArgs(input.params)); args.push("-c", `approval_policy=${tomlString("never")}`); if (!shouldSkipCodexSandboxConfig(input.params)) { args.push("-c", `sandbox_mode=${tomlString("workspace-write")}`); @@ -178,6 +244,9 @@ function codexConfigArgs(input: { if (typeof verbosity === "string" && verbosity.trim()) { args.push("-c", `model_verbosity=${tomlString(verbosity.trim())}`); } + if (typeof input.instructions === "string" && input.instructions.trim()) { + args.push("-c", `instructions=${tomlString(input.instructions.trim())}`); + } if (shouldEnableMcpBridge() && MCP_SERVER_PATH) { args.push("-c", `mcp_servers.gambit.command=${tomlString("deno")}`); @@ -735,8 +804,19 @@ function stringContent(content: ModelMessage["content"]): string { return ""; } -function renderMessagesForPrompt(messages: Array): string { +function codexInstructionsForMessages(messages: Array): string { return messages + .filter((message) => message.role === "system") + .map((message) => stringContent(message.content)) + .filter(Boolean) + .join("\n\n"); +} + +function renderNonSystemMessagesForPrompt( + messages: Array, +): string { + return messages + .filter((message) => message.role !== "system") .map((message) => { const content = stringContent(message.content); if (!content) return ""; @@ -764,7 +844,17 @@ function promptForCodexTurn(input: { // Thread resume should be incremental: only send the newest user turn. return latestUserPrompt(input.messages); } - return renderMessagesForPrompt(input.messages); + const nonSystemMessages = input.messages.filter((message) => + message.role !== "system" + ); + const latestUser = latestUserPrompt(nonSystemMessages); + if ( + nonSystemMessages.length <= 1 && + nonSystemMessages.every((message) => message.role === "user") + ) { + return latestUser; + } + return renderNonSystemMessagesForPrompt(nonSystemMessages); } function parseNumber(input: unknown): number { @@ -1058,6 +1148,7 @@ export function createCodexProvider(opts?: { ? priorThreadIdRaw.trim() : undefined; const model = normalizeCodexModel(input.model); + const instructions = codexInstructionsForMessages(input.messages); const prompt = promptForCodexTurn({ messages: input.messages, priorThreadId, @@ -1076,6 +1167,7 @@ export function createCodexProvider(opts?: { cwd, deckPath: input.deckPath, params: input.params, + instructions, }), ); if (model && model !== "default") { @@ -1313,6 +1405,7 @@ export function parseCodexArgsForTest(input: { ? priorThreadIdRaw.trim() : undefined; const model = normalizeCodexModel(input.model); + const instructions = codexInstructionsForMessages(input.messages); const prompt = promptForCodexTurn({ messages: input.messages, priorThreadId, @@ -1325,6 +1418,7 @@ export function parseCodexArgsForTest(input: { cwd: input.cwd ?? runCwd(), deckPath: input.deckPath, params: input.params, + instructions, }), ); if (model && model !== "default") {