From 74a45bc73da9fefca849eceffcd6df331334c077 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 15:29:09 -0700 Subject: [PATCH 1/5] feat: Add `@openai/codex-sdk` instrumentation --- e2e/config/pr-comment-scenarios.json | 6 + ...nai-codex-v0128-auto-hook.span-events.json | 204 +++++ ...penai-codex-v0128-wrapped.span-events.json | 204 +++++ .../assertions.ts | 213 ++++++ .../mock-codex-cli.mjs | 101 +++ .../openai-codex-instrumentation/package.json | 14 + .../pnpm-lock.yaml | 93 +++ .../scenario.impl.mjs | 83 ++ .../openai-codex-instrumentation/scenario.mjs | 5 + .../scenario.openai-codex-v0128.mjs | 5 + .../scenario.openai-codex-v0128.ts | 5 + .../scenario.test.ts | 59 ++ .../openai-codex-instrumentation/scenario.ts | 5 + .../auto-instrumentations/bundler/plugin.ts | 2 + .../configs/openai-codex.ts | 33 + js/src/auto-instrumentations/hook.mts | 10 + js/src/auto-instrumentations/index.ts | 1 + js/src/exports.ts | 1 + .../instrumentation/braintrust-plugin.test.ts | 53 ++ js/src/instrumentation/braintrust-plugin.ts | 14 + .../plugins/openai-codex-channels.ts | 29 + .../plugins/openai-codex-plugin.ts | 707 ++++++++++++++++++ js/src/instrumentation/registry.test.ts | 1 + js/src/instrumentation/registry.ts | 12 + js/src/vendor-sdk-types/openai-codex.ts | 215 ++++++ js/src/wrappers/openai-codex.ts | 186 +++++ 26 files changed, 2261 insertions(+) create mode 100644 e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json create mode 100644 e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json create mode 100644 e2e/scenarios/openai-codex-instrumentation/assertions.ts create mode 100755 e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs create mode 100644 e2e/scenarios/openai-codex-instrumentation/package.json create mode 100644 e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.mjs create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.test.ts create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.ts create mode 100644 js/src/auto-instrumentations/configs/openai-codex.ts create mode 100644 js/src/instrumentation/plugins/openai-codex-channels.ts create mode 100644 js/src/instrumentation/plugins/openai-codex-plugin.ts create mode 100644 js/src/vendor-sdk-types/openai-codex.ts create mode 100644 js/src/wrappers/openai-codex.ts diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json index 0108b9e78..bfaf885e0 100644 --- a/e2e/config/pr-comment-scenarios.json +++ b/e2e/config/pr-comment-scenarios.json @@ -9,6 +9,12 @@ { "variantKey": "openai-v6", "label": "v6" } ] }, + { + "scenarioDirName": "openai-codex-instrumentation", + "label": "OpenAI Codex Instrumentation", + "metadataScenario": "openai-codex-instrumentation", + "variants": [{ "variantKey": "openai-codex-v0128", "label": "v0.128" }] + }, { "scenarioDirName": "anthropic-instrumentation", "label": "Anthropic Instrumentation", diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json new file mode 100644 index 000000000..7a5fd7ed0 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json @@ -0,0 +1,204 @@ +{ + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "openai-codex-instrumentation" + }, + "metric_keys": [], + "name": "openai-codex-instrumentation-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "run": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "run" + }, + "metric_keys": [], + "name": "openai-codex-run-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "duration", + "prompt_cached_tokens", + "prompt_tokens", + "tokens" + ], + "name": "OpenAI Codex", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "streamed": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "runStreamed" + }, + "metric_keys": [], + "name": "openai-codex-run-streamed-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "duration", + "prompt_cached_tokens", + "prompt_tokens", + "tokens" + ], + "name": "OpenAI Codex", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "tools": [ + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "command_execution", + "openai_codex.command.status": "completed", + "openai_codex.item_type": "command_execution" + }, + "metric_keys": [], + "name": "tool: command_execution", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "read_file", + "openai_codex.item_type": "mcp_tool_call", + "openai_codex.mcp.server": "filesystem", + "openai_codex.mcp.status": "completed" + }, + "metric_keys": [], + "name": "tool: read_file", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "web_search", + "openai_codex.item_type": "web_search" + }, + "metric_keys": [], + "name": "tool: web_search", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "command_execution", + "openai_codex.command.status": "completed", + "openai_codex.item_type": "command_execution" + }, + "metric_keys": [], + "name": "tool: command_execution", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "read_file", + "openai_codex.item_type": "mcp_tool_call", + "openai_codex.mcp.server": "filesystem", + "openai_codex.mcp.status": "completed" + }, + "metric_keys": [], + "name": "tool: read_file", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "web_search", + "openai_codex.item_type": "web_search" + }, + "metric_keys": [], + "name": "tool: web_search", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + ] +} diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json new file mode 100644 index 000000000..7a5fd7ed0 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json @@ -0,0 +1,204 @@ +{ + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "openai-codex-instrumentation" + }, + "metric_keys": [], + "name": "openai-codex-instrumentation-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "run": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "run" + }, + "metric_keys": [], + "name": "openai-codex-run-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "duration", + "prompt_cached_tokens", + "prompt_tokens", + "tokens" + ], + "name": "OpenAI Codex", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "streamed": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "runStreamed" + }, + "metric_keys": [], + "name": "openai-codex-run-streamed-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [ + "completion_reasoning_tokens", + "completion_tokens", + "duration", + "prompt_cached_tokens", + "prompt_tokens", + "tokens" + ], + "name": "OpenAI Codex", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "tools": [ + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "command_execution", + "openai_codex.command.status": "completed", + "openai_codex.item_type": "command_execution" + }, + "metric_keys": [], + "name": "tool: command_execution", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "read_file", + "openai_codex.item_type": "mcp_tool_call", + "openai_codex.mcp.server": "filesystem", + "openai_codex.mcp.status": "completed" + }, + "metric_keys": [], + "name": "tool: read_file", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "web_search", + "openai_codex.item_type": "web_search" + }, + "metric_keys": [], + "name": "tool: web_search", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "command_execution", + "openai_codex.command.status": "completed", + "openai_codex.item_type": "command_execution" + }, + "metric_keys": [], + "name": "tool: command_execution", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "gen_ai.tool.name": "read_file", + "openai_codex.item_type": "mcp_tool_call", + "openai_codex.mcp.server": "filesystem", + "openai_codex.mcp.status": "completed" + }, + "metric_keys": [], + "name": "tool: read_file", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + { + "has_input": true, + "has_output": false, + "metadata": { + "gen_ai.tool.name": "web_search", + "openai_codex.item_type": "web_search" + }, + "metric_keys": [], + "name": "tool: web_search", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + ] +} diff --git a/e2e/scenarios/openai-codex-instrumentation/assertions.ts b/e2e/scenarios/openai-codex-instrumentation/assertions.ts new file mode 100644 index 000000000..4d54f6f21 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/assertions.ts @@ -0,0 +1,213 @@ +import { beforeAll, describe, expect, test } from "vitest"; +import { E2E_TAGS } from "../../helpers/tags"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; +import { + formatJsonFileSnapshot, + resolveFileSnapshotPath, +} from "../../helpers/file-snapshot"; +import { withScenarioHarness } from "../../helpers/scenario-harness"; +import { findLatestSpan } from "../../helpers/trace-selectors"; +import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; +import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs"; + +type RunOpenAICodexScenario = (harness: { + runNodeScenarioDir: (options: { + entry: string; + nodeArgs: string[]; + runContext?: { variantKey: string }; + scenarioDir: string; + timeoutMs: number; + }) => Promise; + runScenarioDir: (options: { + entry: string; + runContext?: { variantKey: string }; + scenarioDir: string; + timeoutMs: number; + }) => Promise; +}) => Promise; + +const METADATA_KEYS = [ + "provider", + "model", + "operation", + "scenario", + "gen_ai.tool.name", + "openai_codex.operation", + "openai_codex.model", + "openai_codex.thread_id", + "openai_codex.item_type", + "openai_codex.command.status", + "openai_codex.mcp.server", + "openai_codex.mcp.status", +] as const; + +function summarizeSpan(event: CapturedLogEvent | undefined): Json { + if (!event) { + return null; + } + const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record< + string, + Json + >; + if (summary.metadata && typeof summary.metadata === "object") { + const metadata = summary.metadata as Record; + if (typeof metadata["openai_codex.thread_id"] === "string") { + metadata["openai_codex.thread_id"] = ""; + } + } + return summary; +} + +function findCodexTask(events: CapturedLogEvent[], operationName: string) { + const operation = findLatestSpan(events, operationName); + return [...events] + .reverse() + .find( + (event) => + event.span.name === "OpenAI Codex" && + event.span.parentIds.includes(operation?.span.id ?? ""), + ); +} + +function latestSpansByType( + events: CapturedLogEvent[], + type: string, +): CapturedLogEvent[] { + const order: string[] = []; + const latest = new Map(); + + for (const event of events) { + if (event.span.type !== type || !event.span.id) { + continue; + } + if (!latest.has(event.span.id)) { + order.push(event.span.id); + } + latest.set(event.span.id, event); + } + + return order.flatMap((spanId) => { + const event = latest.get(spanId); + return event ? [event] : []; + }); +} + +function summarize(events: CapturedLogEvent[]): Json { + const runTask = findCodexTask(events, "openai-codex-run-operation"); + const streamedTask = findCodexTask( + events, + "openai-codex-run-streamed-operation", + ); + const toolSpans = latestSpansByType(events, "tool"); + + return normalizeForSnapshot({ + root: summarizeSpan(findLatestSpan(events, ROOT_NAME)), + run: { + operation: summarizeSpan( + findLatestSpan(events, "openai-codex-run-operation"), + ), + task: summarizeSpan(runTask), + }, + streamed: { + operation: summarizeSpan( + findLatestSpan(events, "openai-codex-run-streamed-operation"), + ), + task: summarizeSpan(streamedTask), + }, + tools: toolSpans.map(summarizeSpan), + } as Json); +} + +export function defineOpenAICodexInstrumentationAssertions(options: { + name: string; + runScenario: RunOpenAICodexScenario; + snapshotName: string; + testFileUrl: string; + timeoutMs: number; +}): void { + const snapshotPath = resolveFileSnapshotPath( + options.testFileUrl, + `${options.snapshotName}.span-events.json`, + ); + const testConfig = { + tags: [E2E_TAGS.hermetic], + timeout: options.timeoutMs, + }; + + describe(options.name, () => { + let events: CapturedLogEvent[] = []; + + beforeAll(async () => { + await withScenarioHarness(async (harness) => { + await options.runScenario(harness); + events = harness.events(); + }); + }, options.timeoutMs); + + test("captures the root trace", testConfig, () => { + const root = findLatestSpan(events, ROOT_NAME); + + expect(root).toBeDefined(); + expect(root?.row.metadata).toMatchObject({ scenario: SCENARIO_NAME }); + }); + + test("captures Codex task spans", testConfig, () => { + for (const operationName of [ + "openai-codex-run-operation", + "openai-codex-run-streamed-operation", + ]) { + const operation = findLatestSpan(events, operationName); + const task = findCodexTask(events, operationName); + + expect(operation).toBeDefined(); + expect(task).toBeDefined(); + expect(task?.span.parentIds).toEqual([operation?.span.id ?? ""]); + expect(task?.row.metadata).toMatchObject({ + provider: "openai", + }); + } + }); + + test("captures command and MCP tool spans", testConfig, () => { + const toolSpans = latestSpansByType(events, "tool"); + + expect( + toolSpans.some( + (event) => + event.span.name === "tool: command_execution" && + event.output === "codex_tool_ok", + ), + ).toBe(true); + expect( + toolSpans.some( + (event) => + event.span.name === "tool: read_file" && + event.metadata?.["openai_codex.mcp.server"] === "filesystem", + ), + ).toBe(true); + }); + + test("captures final responses and usage metrics", testConfig, () => { + const runTask = findCodexTask(events, "openai-codex-run-operation"); + const streamedTask = findCodexTask( + events, + "openai-codex-run-streamed-operation", + ); + + expect(runTask?.output).toContain("RUN_OK"); + expect(streamedTask?.output).toContain("STREAM_OK"); + expect(runTask?.metrics).toMatchObject({ + completion_tokens: 7, + prompt_cached_tokens: 3, + prompt_tokens: 11, + }); + }); + + test("matches the shared span snapshot", testConfig, async () => { + await expect( + formatJsonFileSnapshot(summarize(events)), + ).toMatchFileSnapshot(snapshotPath); + }); + }); +} diff --git a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs new file mode 100755 index 000000000..510abdf04 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs @@ -0,0 +1,101 @@ +#!/usr/bin/env node + +let input = ""; + +process.stdin.setEncoding("utf8"); +process.stdin.on("data", (chunk) => { + input += chunk; +}); +process.stdin.on("end", () => { + const isStream = input.includes("stream"); + const suffix = isStream ? "STREAM_OK" : "RUN_OK"; + const threadId = isStream ? "thread_stream" : "thread_run"; + const events = [ + { type: "thread.started", thread_id: threadId }, + { type: "turn.started" }, + { + type: "item.started", + item: { + id: `${threadId}_command`, + type: "command_execution", + command: "printf codex_tool_ok", + aggregated_output: "", + status: "in_progress", + }, + }, + { + type: "item.completed", + item: { + id: `${threadId}_command`, + type: "command_execution", + command: "printf codex_tool_ok", + aggregated_output: "codex_tool_ok", + exit_code: 0, + status: "completed", + }, + }, + { + type: "item.started", + item: { + id: `${threadId}_mcp`, + type: "mcp_tool_call", + server: "filesystem", + tool: "read_file", + arguments: { path: "README.md" }, + status: "in_progress", + }, + }, + { + type: "item.completed", + item: { + id: `${threadId}_mcp`, + type: "mcp_tool_call", + server: "filesystem", + tool: "read_file", + arguments: { path: "README.md" }, + result: { + content: [{ type: "text", text: "mock file" }], + structured_content: { ok: true }, + }, + status: "completed", + }, + }, + { + type: "item.completed", + item: { + id: `${threadId}_web`, + type: "web_search", + query: "braintrust codex instrumentation", + }, + }, + { + type: "item.completed", + item: { + id: `${threadId}_reasoning`, + type: "reasoning", + text: `reasoning ${suffix}`, + }, + }, + { + type: "item.completed", + item: { + id: `${threadId}_message`, + type: "agent_message", + text: `Codex ${suffix}`, + }, + }, + { + type: "turn.completed", + usage: { + input_tokens: 11, + cached_input_tokens: 3, + output_tokens: 7, + reasoning_output_tokens: 5, + }, + }, + ]; + + for (const event of events) { + process.stdout.write(`${JSON.stringify(event)}\n`); + } +}); diff --git a/e2e/scenarios/openai-codex-instrumentation/package.json b/e2e/scenarios/openai-codex-instrumentation/package.json new file mode 100644 index 000000000..babaee877 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/package.json @@ -0,0 +1,14 @@ +{ + "name": "@braintrust/e2e-openai-codex-instrumentation", + "private": true, + "braintrustScenario": { + "canary": { + "dependencies": { + "openai-codex-sdk-v0128": "@openai/codex-sdk@latest" + } + } + }, + "dependencies": { + "openai-codex-sdk-v0128": "npm:@openai/codex-sdk@0.128.0" + } +} diff --git a/e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml b/e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml new file mode 100644 index 000000000..015850049 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml @@ -0,0 +1,93 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + openai-codex-sdk-v0128: + specifier: npm:@openai/codex-sdk@0.128.0 + version: '@openai/codex-sdk@0.128.0' + +packages: + + '@openai/codex-sdk@0.128.0': + resolution: {integrity: sha512-Eao0LLA5x90qwU6SXYd21h4KxdCef1WpCvHFgKdbqzWMJ79lUvguGDGvx1RheP+zTdKGxJfJ6dulI5wSXoUBhQ==} + engines: {node: '>=18'} + + '@openai/codex@0.128.0': + resolution: {integrity: sha512-+xp6ODmFfBNnexIWRHApEaPXot2j6gyM8A5we/5IS/uY4eYHj4arETct4hQ5M4eO+MK7JY3ZU4xhuobhlysr0A==} + engines: {node: '>=16'} + hasBin: true + + '@openai/codex@0.128.0-darwin-arm64': + resolution: {integrity: sha512-w+6zohfHx/kHBdles/CyFKaY57u9I3nK8QI9+NrdwMliKA0b7xn13yblRNkMpe09j6vL1oAWoxYsMOQ/vjBGug==} + engines: {node: '>=16'} + cpu: [arm64] + os: [darwin] + + '@openai/codex@0.128.0-darwin-x64': + resolution: {integrity: sha512-SDbn6fO22Puy8xmMIbZi4f2znMrUEPwABApke4mo+4ihaauwuVjeqzXvW5SPJz5ty/bG11/mSupQgReT7T8BBw==} + engines: {node: '>=16'} + cpu: [x64] + os: [darwin] + + '@openai/codex@0.128.0-linux-arm64': + resolution: {integrity: sha512-+SvH73H60qvCXFuQGP/EsmR//s1hHMBR22PvJkXvM/hdnTIGucx+JqRUjAWdmmQ1IU6j3kgwVvdLW/6ICB+M6w==} + engines: {node: '>=16'} + cpu: [arm64] + os: [linux] + + '@openai/codex@0.128.0-linux-x64': + resolution: {integrity: sha512-2lnSPA05CRRuKAzFW8BCmmNCSieDcToLwfC2ALLbBYilGLgzhRibjlDglK9F1BkEzfohSSWJu4PBbRu/aG60lQ==} + engines: {node: '>=16'} + cpu: [x64] + os: [linux] + + '@openai/codex@0.128.0-win32-arm64': + resolution: {integrity: sha512-ECJvsqmYFdA9pn42xxK3Odp/G16AjmBW0BglX8L0PwPjqbstbmlew9bfHf7xvL+SNfNl4NmyotW0+RNo1phgaA==} + engines: {node: '>=16'} + cpu: [arm64] + os: [win32] + + '@openai/codex@0.128.0-win32-x64': + resolution: {integrity: sha512-k3jmUAFrzkUtvjGTXvSKjQqJLLlzjxp/VoHJDYedgmXUn6j70HxK38IwapzmnYfiBiTuzETvGwjXHzZgzKjhoQ==} + engines: {node: '>=16'} + cpu: [x64] + os: [win32] + +snapshots: + + '@openai/codex-sdk@0.128.0': + dependencies: + '@openai/codex': 0.128.0 + + '@openai/codex@0.128.0': + optionalDependencies: + '@openai/codex-darwin-arm64': '@openai/codex@0.128.0-darwin-arm64' + '@openai/codex-darwin-x64': '@openai/codex@0.128.0-darwin-x64' + '@openai/codex-linux-arm64': '@openai/codex@0.128.0-linux-arm64' + '@openai/codex-linux-x64': '@openai/codex@0.128.0-linux-x64' + '@openai/codex-win32-arm64': '@openai/codex@0.128.0-win32-arm64' + '@openai/codex-win32-x64': '@openai/codex@0.128.0-win32-x64' + + '@openai/codex@0.128.0-darwin-arm64': + optional: true + + '@openai/codex@0.128.0-darwin-x64': + optional: true + + '@openai/codex@0.128.0-linux-arm64': + optional: true + + '@openai/codex@0.128.0-linux-x64': + optional: true + + '@openai/codex@0.128.0-win32-arm64': + optional: true + + '@openai/codex@0.128.0-win32-x64': + optional: true diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs new file mode 100644 index 000000000..c63ae20e6 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs @@ -0,0 +1,83 @@ +import { wrapOpenAICodexSDK } from "braintrust"; +import { + collectAsync, + runOperation, + runTracedScenario, +} from "../../helpers/provider-runtime.mjs"; +import path from "node:path"; +import { fileURLToPath } from "node:url"; + +export const ROOT_NAME = "openai-codex-instrumentation-root"; +export const SCENARIO_NAME = "openai-codex-instrumentation"; + +const SCENARIO_DIR = path.dirname(fileURLToPath(import.meta.url)); +const MOCK_CODEX_PATH = path.join(SCENARIO_DIR, "mock-codex-cli.mjs"); + +function createClient(SDK) { + const { Codex } = SDK; + return new Codex({ + apiKey: "test-key", + codexPathOverride: MOCK_CODEX_PATH, + env: { + PATH: process.env.PATH ?? "", + }, + }); +} + +function startThread(client) { + return client.startThread({ + approvalPolicy: "never", + model: "gpt-5-codex", + modelReasoningEffort: "low", + networkAccessEnabled: false, + sandboxMode: "danger-full-access", + webSearchMode: "disabled", + workingDirectory: process.cwd(), + }); +} + +async function runOpenAICodexScenario({ decorateSDK, sdk }) { + const instrumentedSDK = decorateSDK ? decorateSDK(sdk) : sdk; + const client = createClient(instrumentedSDK); + + await runTracedScenario({ + callback: async () => { + await runOperation("openai-codex-run-operation", "run", async () => { + const thread = startThread(client); + await thread.run("Return Codex RUN_OK after using a command."); + }); + + await runOperation( + "openai-codex-run-streamed-operation", + "runStreamed", + async () => { + const thread = startThread(client); + const streamedTurn = await thread.runStreamed( + "Return Codex STREAM_OK after using a command in stream mode.", + ); + await collectAsync(streamedTurn.events); + }, + ); + }, + flushCount: 2, + flushDelayMs: 100, + metadata: { + scenario: SCENARIO_NAME, + }, + projectNameBase: "e2e-openai-codex-instrumentation", + rootName: ROOT_NAME, + }); +} + +export async function runWrappedOpenAICodexInstrumentation(sdk) { + await runOpenAICodexScenario({ + decorateSDK: wrapOpenAICodexSDK, + sdk, + }); +} + +export async function runAutoOpenAICodexInstrumentation(sdk) { + await runOpenAICodexScenario({ + sdk, + }); +} diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.mjs new file mode 100644 index 000000000..d7cbe608d --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.mjs @@ -0,0 +1,5 @@ +import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoOpenAICodexInstrumentation } from "./scenario.impl.mjs"; + +runMain(() => runAutoOpenAICodexInstrumentation(OpenAICodexSDK)); diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs new file mode 100644 index 000000000..d7cbe608d --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs @@ -0,0 +1,5 @@ +import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoOpenAICodexInstrumentation } from "./scenario.impl.mjs"; + +runMain(() => runAutoOpenAICodexInstrumentation(OpenAICodexSDK)); diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts new file mode 100644 index 000000000..c502c8b44 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts @@ -0,0 +1,5 @@ +import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runWrappedOpenAICodexInstrumentation } from "./scenario.impl.mjs"; + +runMain(() => runWrappedOpenAICodexInstrumentation(OpenAICodexSDK)); diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts new file mode 100644 index 000000000..d23b8df49 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts @@ -0,0 +1,59 @@ +import { describe } from "vitest"; +import { + prepareScenarioDir, + readInstalledPackageVersion, + resolveScenarioDir, +} from "../../helpers/scenario-harness"; +import { defineOpenAICodexInstrumentationAssertions } from "./assertions"; + +const scenarioDir = await prepareScenarioDir({ + scenarioDir: resolveScenarioDir(import.meta.url), +}); +const TIMEOUT_MS = 120_000; +const openAICodexScenario = { + autoEntry: "scenario.openai-codex-v0128.mjs", + autoSnapshotName: "openai-codex-v0128-auto-hook", + dependencyName: "openai-codex-sdk-v0128", + version: await readInstalledPackageVersion( + scenarioDir, + "openai-codex-sdk-v0128", + ), + wrapperEntry: "scenario.openai-codex-v0128.ts", + wrapperSnapshotName: "openai-codex-v0128-wrapped", + variantKey: "openai-codex-v0128", +}; + +describe("wrapped instrumentation", () => { + defineOpenAICodexInstrumentationAssertions({ + name: `openai codex sdk ${openAICodexScenario.version}`, + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: openAICodexScenario.wrapperEntry, + runContext: { variantKey: openAICodexScenario.variantKey }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: openAICodexScenario.wrapperSnapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); +}); + +describe("auto-hook instrumentation", () => { + defineOpenAICodexInstrumentationAssertions({ + name: `openai codex sdk ${openAICodexScenario.version}`, + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: openAICodexScenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { variantKey: openAICodexScenario.variantKey }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: openAICodexScenario.autoSnapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); +}); diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.ts new file mode 100644 index 000000000..c502c8b44 --- /dev/null +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.ts @@ -0,0 +1,5 @@ +import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runWrappedOpenAICodexInstrumentation } from "./scenario.impl.mjs"; + +runMain(() => runWrappedOpenAICodexInstrumentation(OpenAICodexSDK)); diff --git a/js/src/auto-instrumentations/bundler/plugin.ts b/js/src/auto-instrumentations/bundler/plugin.ts index 2462c5faf..df4ec1052 100644 --- a/js/src/auto-instrumentations/bundler/plugin.ts +++ b/js/src/auto-instrumentations/bundler/plugin.ts @@ -21,6 +21,7 @@ import { readFileSync } from "fs"; import { fileURLToPath } from "url"; import moduleDetailsFromPath from "module-details-from-path"; import { openaiConfigs } from "../configs/openai"; +import { openAICodexConfigs } from "../configs/openai-codex"; import { anthropicConfigs } from "../configs/anthropic"; import { aiSDKConfigs } from "../configs/ai-sdk"; import { claudeAgentSDKConfigs } from "../configs/claude-agent-sdk"; @@ -74,6 +75,7 @@ function getModuleVersion(basedir: string): string | undefined { export const unplugin = createUnplugin((options = {}) => { const allInstrumentations = [ ...openaiConfigs, + ...openAICodexConfigs, ...anthropicConfigs, ...aiSDKConfigs, ...claudeAgentSDKConfigs, diff --git a/js/src/auto-instrumentations/configs/openai-codex.ts b/js/src/auto-instrumentations/configs/openai-codex.ts new file mode 100644 index 000000000..adf9046a3 --- /dev/null +++ b/js/src/auto-instrumentations/configs/openai-codex.ts @@ -0,0 +1,33 @@ +import type { InstrumentationConfig } from "@apm-js-collab/code-transformer"; +import { openAICodexChannels } from "../../instrumentation/plugins/openai-codex-channels"; + +const openAICodexVersionRange = ">=0.128.0 <1.0.0"; + +export const openAICodexConfigs: InstrumentationConfig[] = [ + { + channelName: openAICodexChannels.run.channelName, + module: { + name: "@openai/codex-sdk", + versionRange: openAICodexVersionRange, + filePath: "dist/index.js", + }, + functionQuery: { + className: "Thread", + methodName: "run", + kind: "Async", + }, + }, + { + channelName: openAICodexChannels.runStreamed.channelName, + module: { + name: "@openai/codex-sdk", + versionRange: openAICodexVersionRange, + filePath: "dist/index.js", + }, + functionQuery: { + className: "Thread", + methodName: "runStreamed", + kind: "Async", + }, + }, +]; diff --git a/js/src/auto-instrumentations/hook.mts b/js/src/auto-instrumentations/hook.mts index 46fe0f0d2..f292b684f 100644 --- a/js/src/auto-instrumentations/hook.mts +++ b/js/src/auto-instrumentations/hook.mts @@ -15,6 +15,7 @@ import { register } from "node:module"; import { openaiConfigs } from "./configs/openai.js"; +import { openAICodexConfigs } from "./configs/openai-codex.js"; import { anthropicConfigs } from "./configs/anthropic.js"; import { aiSDKConfigs } from "./configs/ai-sdk.js"; import { claudeAgentSDKConfigs } from "./configs/claude-agent-sdk.js"; @@ -62,6 +63,15 @@ const disabledIntegrations = readDisabledIntegrations(); // transformation and runtime plugins stay aligned. const allConfigs = [ ...(isDisabled(disabledIntegrations, "openai") ? [] : openaiConfigs), + ...(isDisabled( + disabledIntegrations, + "openai-codex", + "openai-codex-sdk", + "codex", + "codex-sdk", + ) + ? [] + : openAICodexConfigs), ...(isDisabled(disabledIntegrations, "anthropic") ? [] : anthropicConfigs), ...(isDisabled(disabledIntegrations, "aisdk", "ai-sdk", "vercel-ai") ? [] diff --git a/js/src/auto-instrumentations/index.ts b/js/src/auto-instrumentations/index.ts index bdac954ab..6a5eb850f 100644 --- a/js/src/auto-instrumentations/index.ts +++ b/js/src/auto-instrumentations/index.ts @@ -29,6 +29,7 @@ */ export { openaiConfigs } from "./configs/openai"; +export { openAICodexConfigs } from "./configs/openai-codex"; export { anthropicConfigs } from "./configs/anthropic"; export { aiSDKConfigs } from "./configs/ai-sdk"; export { claudeAgentSDKConfigs } from "./configs/claude-agent-sdk"; diff --git a/js/src/exports.ts b/js/src/exports.ts index 02dadbf66..21e057ff1 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -177,6 +177,7 @@ export { export { wrapAnthropic } from "./wrappers/anthropic"; export { wrapMastraAgent } from "./wrappers/mastra"; export { wrapClaudeAgentSDK } from "./wrappers/claude-agent-sdk/claude-agent-sdk"; +export { wrapOpenAICodexSDK } from "./wrappers/openai-codex"; export { wrapCursorSDK } from "./wrappers/cursor-sdk"; export { wrapGoogleGenAI } from "./wrappers/google-genai"; export { wrapGoogleADK } from "./wrappers/google-adk"; diff --git a/js/src/instrumentation/braintrust-plugin.test.ts b/js/src/instrumentation/braintrust-plugin.test.ts index aec836b66..c537cb601 100644 --- a/js/src/instrumentation/braintrust-plugin.test.ts +++ b/js/src/instrumentation/braintrust-plugin.test.ts @@ -1,6 +1,7 @@ import { describe, it, expect, vi, beforeEach } from "vitest"; import { BraintrustPlugin } from "./braintrust-plugin"; import { OpenAIPlugin } from "./plugins/openai-plugin"; +import { OpenAICodexPlugin } from "./plugins/openai-codex-plugin"; import { AnthropicPlugin } from "./plugins/anthropic-plugin"; import { AISDKPlugin } from "./plugins/ai-sdk-plugin"; import { ClaudeAgentSDKPlugin } from "./plugins/claude-agent-sdk-plugin"; @@ -37,6 +38,10 @@ vi.mock("./plugins/anthropic-plugin", () => ({ AnthropicPlugin: createPluginClassMock(), })); +vi.mock("./plugins/openai-codex-plugin", () => ({ + OpenAICodexPlugin: createPluginClassMock(), +})); + vi.mock("./plugins/ai-sdk-plugin", () => ({ AISDKPlugin: createPluginClassMock(), })); @@ -97,6 +102,15 @@ describe("BraintrustPlugin", () => { expect(mockInstance.enable).toHaveBeenCalledTimes(1); }); + it("should create and enable OpenAI Codex plugin by default", () => { + const plugin = new BraintrustPlugin(); + plugin.enable(); + + expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1); + const mockInstance = vi.mocked(OpenAICodexPlugin).mock.results[0].value; + expect(mockInstance.enable).toHaveBeenCalledTimes(1); + }); + it("should create and enable AI SDK plugin by default", () => { const plugin = new BraintrustPlugin(); plugin.enable(); @@ -186,6 +200,7 @@ describe("BraintrustPlugin", () => { plugin.enable(); expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1); expect(AnthropicPlugin).toHaveBeenCalledTimes(1); expect(AISDKPlugin).toHaveBeenCalledTimes(1); expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1); @@ -203,6 +218,7 @@ describe("BraintrustPlugin", () => { plugin.enable(); expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1); expect(AnthropicPlugin).toHaveBeenCalledTimes(1); expect(AISDKPlugin).toHaveBeenCalledTimes(1); expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1); @@ -220,6 +236,7 @@ describe("BraintrustPlugin", () => { plugin.enable(); expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1); expect(AnthropicPlugin).toHaveBeenCalledTimes(1); expect(AISDKPlugin).toHaveBeenCalledTimes(1); expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1); @@ -260,6 +277,7 @@ describe("BraintrustPlugin", () => { expect(AnthropicPlugin).not.toHaveBeenCalled(); // Other plugins should still be created expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1); expect(AISDKPlugin).toHaveBeenCalledTimes(1); expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1); expect(GoogleGenAIPlugin).toHaveBeenCalledTimes(1); @@ -268,6 +286,30 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).toHaveBeenCalledTimes(1); }); + it("should not create OpenAI Codex plugin when openaiCodex: false", () => { + const plugin = new BraintrustPlugin({ + integrations: { openaiCodex: false }, + }); + plugin.enable(); + + expect(OpenAICodexPlugin).not.toHaveBeenCalled(); + expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(AnthropicPlugin).toHaveBeenCalledTimes(1); + expect(AISDKPlugin).toHaveBeenCalledTimes(1); + }); + + it("should not create OpenAI Codex plugin when codex: false", () => { + const plugin = new BraintrustPlugin({ + integrations: { codex: false }, + }); + plugin.enable(); + + expect(OpenAICodexPlugin).not.toHaveBeenCalled(); + expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(AnthropicPlugin).toHaveBeenCalledTimes(1); + expect(AISDKPlugin).toHaveBeenCalledTimes(1); + }); + it("should not create AI SDK plugin when aisdk: false", () => { const plugin = new BraintrustPlugin({ integrations: { aisdk: false }, @@ -417,6 +459,8 @@ describe("BraintrustPlugin", () => { const plugin = new BraintrustPlugin({ integrations: { openai: false, + openaiCodex: false, + codex: false, anthropic: false, aisdk: false, claudeAgentSDK: false, @@ -432,6 +476,7 @@ describe("BraintrustPlugin", () => { plugin.enable(); expect(OpenAIPlugin).not.toHaveBeenCalled(); + expect(OpenAICodexPlugin).not.toHaveBeenCalled(); expect(AnthropicPlugin).not.toHaveBeenCalled(); expect(AISDKPlugin).not.toHaveBeenCalled(); expect(ClaudeAgentSDKPlugin).not.toHaveBeenCalled(); @@ -567,6 +612,8 @@ describe("BraintrustPlugin", () => { plugin.enable(); const openaiMock = vi.mocked(OpenAIPlugin).mock.results[0].value; + const openAICodexMock = + vi.mocked(OpenAICodexPlugin).mock.results[0].value; const anthropicMock = vi.mocked(AnthropicPlugin).mock.results[0].value; const aiSDKMock = vi.mocked(AISDKPlugin).mock.results[0].value; const claudeAgentSDKMock = @@ -583,6 +630,7 @@ describe("BraintrustPlugin", () => { const groqMock = vi.mocked(GroqPlugin).mock.results[0].value; expect(openaiMock.enable).toHaveBeenCalledTimes(1); + expect(openAICodexMock.enable).toHaveBeenCalledTimes(1); expect(anthropicMock.enable).toHaveBeenCalledTimes(1); expect(aiSDKMock.enable).toHaveBeenCalledTimes(1); expect(claudeAgentSDKMock.enable).toHaveBeenCalledTimes(1); @@ -600,6 +648,8 @@ describe("BraintrustPlugin", () => { plugin.enable(); const openaiMock = vi.mocked(OpenAIPlugin).mock.results[0].value; + const openAICodexMock = + vi.mocked(OpenAICodexPlugin).mock.results[0].value; const anthropicMock = vi.mocked(AnthropicPlugin).mock.results[0].value; const aiSDKMock = vi.mocked(AISDKPlugin).mock.results[0].value; const claudeAgentSDKMock = @@ -618,6 +668,7 @@ describe("BraintrustPlugin", () => { plugin.disable(); expect(openaiMock.disable).toHaveBeenCalledTimes(1); + expect(openAICodexMock.disable).toHaveBeenCalledTimes(1); expect(anthropicMock.disable).toHaveBeenCalledTimes(1); expect(aiSDKMock.disable).toHaveBeenCalledTimes(1); expect(claudeAgentSDKMock.disable).toHaveBeenCalledTimes(1); @@ -662,6 +713,7 @@ describe("BraintrustPlugin", () => { // Should not create any plugins expect(OpenAIPlugin).not.toHaveBeenCalled(); + expect(OpenAICodexPlugin).not.toHaveBeenCalled(); expect(AnthropicPlugin).not.toHaveBeenCalled(); expect(AISDKPlugin).not.toHaveBeenCalled(); expect(ClaudeAgentSDKPlugin).not.toHaveBeenCalled(); @@ -684,6 +736,7 @@ describe("BraintrustPlugin", () => { plugin.enable(); expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1); expect(AnthropicPlugin).toHaveBeenCalledTimes(1); expect(AISDKPlugin).toHaveBeenCalledTimes(1); expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1); diff --git a/js/src/instrumentation/braintrust-plugin.ts b/js/src/instrumentation/braintrust-plugin.ts index 5db01b441..d03932203 100644 --- a/js/src/instrumentation/braintrust-plugin.ts +++ b/js/src/instrumentation/braintrust-plugin.ts @@ -1,5 +1,6 @@ import { BasePlugin } from "./core"; import { OpenAIPlugin } from "./plugins/openai-plugin"; +import { OpenAICodexPlugin } from "./plugins/openai-codex-plugin"; import { AnthropicPlugin } from "./plugins/anthropic-plugin"; import { AISDKPlugin } from "./plugins/ai-sdk-plugin"; import { ClaudeAgentSDKPlugin } from "./plugins/claude-agent-sdk-plugin"; @@ -16,6 +17,8 @@ import { GroqPlugin } from "./plugins/groq-plugin"; export interface BraintrustPluginConfig { integrations?: { openai?: boolean; + openaiCodex?: boolean; + codex?: boolean; anthropic?: boolean; vercel?: boolean; aisdk?: boolean; @@ -53,6 +56,7 @@ export interface BraintrustPluginConfig { export class BraintrustPlugin extends BasePlugin { private config: BraintrustPluginConfig; private openaiPlugin: OpenAIPlugin | null = null; + private openAICodexPlugin: OpenAICodexPlugin | null = null; private anthropicPlugin: AnthropicPlugin | null = null; private aiSDKPlugin: AISDKPlugin | null = null; private claudeAgentSDKPlugin: ClaudeAgentSDKPlugin | null = null; @@ -80,6 +84,11 @@ export class BraintrustPlugin extends BasePlugin { this.openaiPlugin.enable(); } + if (integrations.openaiCodex !== false && integrations.codex !== false) { + this.openAICodexPlugin = new OpenAICodexPlugin(); + this.openAICodexPlugin.enable(); + } + // Enable Anthropic integration (default: true) if (integrations.anthropic !== false) { this.anthropicPlugin = new AnthropicPlugin(); @@ -154,6 +163,11 @@ export class BraintrustPlugin extends BasePlugin { this.openaiPlugin = null; } + if (this.openAICodexPlugin) { + this.openAICodexPlugin.disable(); + this.openAICodexPlugin = null; + } + if (this.anthropicPlugin) { this.anthropicPlugin.disable(); this.anthropicPlugin = null; diff --git a/js/src/instrumentation/plugins/openai-codex-channels.ts b/js/src/instrumentation/plugins/openai-codex-channels.ts new file mode 100644 index 000000000..8e31d1bbe --- /dev/null +++ b/js/src/instrumentation/plugins/openai-codex-channels.ts @@ -0,0 +1,29 @@ +import { channel, defineChannels } from "../core/channel-definitions"; +import type { + OpenAICodexInput, + OpenAICodexStreamedTurn, + OpenAICodexThread, + OpenAICodexThreadEvent, + OpenAICodexTurn, + OpenAICodexTurnOptions, +} from "../../vendor-sdk-types/openai-codex"; + +export const openAICodexChannels = defineChannels("@openai/codex-sdk", { + run: channel< + [OpenAICodexInput, OpenAICodexTurnOptions | undefined], + OpenAICodexTurn, + { operation?: "run"; thread?: OpenAICodexThread } + >({ + channelName: "Thread.run", + kind: "async", + }), + runStreamed: channel< + [OpenAICodexInput, OpenAICodexTurnOptions | undefined], + OpenAICodexStreamedTurn, + { operation?: "runStreamed"; thread?: OpenAICodexThread }, + OpenAICodexThreadEvent + >({ + channelName: "Thread.runStreamed", + kind: "async", + }), +}); diff --git a/js/src/instrumentation/plugins/openai-codex-plugin.ts b/js/src/instrumentation/plugins/openai-codex-plugin.ts new file mode 100644 index 000000000..deffe8187 --- /dev/null +++ b/js/src/instrumentation/plugins/openai-codex-plugin.ts @@ -0,0 +1,707 @@ +import { BasePlugin } from "../core"; +import type { ChannelMessage } from "../core/channel-definitions"; +import type { IsoChannelHandlers } from "../../isomorph"; +import { debugLogger } from "../../debug-logger"; +import { startSpan } from "../../logger"; +import type { Span } from "../../logger"; +import { getCurrentUnixTimestamp } from "../../util"; +import { SpanTypeAttribute } from "../../../util/index"; +import { openAICodexChannels } from "./openai-codex-channels"; +import type { + OpenAICodexCommandExecutionItem, + OpenAICodexFileChangeItem, + OpenAICodexInput, + OpenAICodexMcpToolCallItem, + OpenAICodexStreamedTurn, + OpenAICodexThread, + OpenAICodexThreadEvent, + OpenAICodexThreadItem, + OpenAICodexThreadOptions, + OpenAICodexTurn, + OpenAICodexTurnOptions, + OpenAICodexUsage, + OpenAICodexWebSearchItem, +} from "../../vendor-sdk-types/openai-codex"; + +type CodexRunState = { + activeItemSpans: Map; + completedItems: OpenAICodexThreadItem[]; + finalResponse?: string; + finalized: boolean; + metadata: Record; + metrics: Record; + outputText: string[]; + span: Span; + startTime: number; +}; + +const PATCHED_STREAMED_TURN = Symbol.for( + "braintrust.openai-codex.patched-streamed-turn", +); + +export class OpenAICodexPlugin extends BasePlugin { + protected onEnable(): void { + this.subscribeToRun(); + this.subscribeToRunStreamed(); + } + + protected onDisable(): void { + for (const unsubscribe of this.unsubscribers) { + unsubscribe(); + } + this.unsubscribers = []; + } + + private subscribeToRun(): void { + const channel = openAICodexChannels.run.tracingChannel(); + const states = new WeakMap(); + + const handlers: IsoChannelHandlers< + ChannelMessage + > = { + start: (event) => { + states.set(event, startCodexRun(event, "Thread.run")); + }, + asyncEnd: (event) => { + const state = states.get(event); + if (!state) { + return; + } + states.delete(event); + void finalizeCompletedRun(state, event.result); + }, + error: (event) => { + const state = states.get(event); + if (!state) { + return; + } + states.delete(event); + void finalizeCodexRun(state, { error: event.error }); + }, + }; + + channel.subscribe(handlers); + this.unsubscribers.push(() => { + channel.unsubscribe(handlers); + }); + } + + private subscribeToRunStreamed(): void { + const channel = openAICodexChannels.runStreamed.tracingChannel(); + const states = new WeakMap(); + + const handlers: IsoChannelHandlers< + ChannelMessage + > = { + start: (event) => { + states.set(event, startCodexRun(event, "Thread.runStreamed")); + }, + asyncEnd: (event) => { + const state = states.get(event); + if (!state) { + return; + } + states.delete(event); + patchStreamedTurn(event.result, state); + }, + error: (event) => { + const state = states.get(event); + if (!state) { + return; + } + states.delete(event); + void finalizeCodexRun(state, { error: event.error }); + }, + }; + + channel.subscribe(handlers); + this.unsubscribers.push(() => { + channel.unsubscribe(handlers); + }); + } +} + +function startCodexRun( + event: ChannelMessage< + typeof openAICodexChannels.run | typeof openAICodexChannels.runStreamed + >, + operation: "Thread.run" | "Thread.runStreamed", +): CodexRunState { + const input = event.arguments[0]; + const turnOptions = event.arguments[1]; + const thread = event.thread ?? extractThreadFromEvent(event); + const metadata = { + ...extractThreadMetadata(thread), + ...extractTurnOptionsMetadata(turnOptions), + "openai_codex.operation": operation, + provider: "openai", + ...(event.moduleVersion + ? { "openai_codex.version": event.moduleVersion } + : {}), + }; + const span = startSpan({ + name: "OpenAI Codex", + spanAttributes: { type: SpanTypeAttribute.TASK }, + }); + const startTime = getCurrentUnixTimestamp(); + safeLog(span, { + input: sanitizeInput(input), + metadata, + }); + + return { + activeItemSpans: new Map(), + completedItems: [], + finalized: false, + metadata, + metrics: {}, + outputText: [], + span, + startTime, + }; +} + +function patchStreamedTurn( + streamedTurn: OpenAICodexStreamedTurn | undefined, + state: CodexRunState, +): void { + if (!streamedTurn || typeof streamedTurn !== "object") { + void finalizeCodexRun(state, { output: streamedTurn }); + return; + } + + const turnRecord = streamedTurn as OpenAICodexStreamedTurn & + Record; + if ( + turnRecord[PATCHED_STREAMED_TURN] || + !isAsyncIterable(turnRecord.events) + ) { + return; + } + + try { + Object.defineProperty(turnRecord, PATCHED_STREAMED_TURN, { + configurable: false, + enumerable: false, + value: true, + }); + turnRecord.events = patchCodexEventStream(turnRecord.events, state); + } catch { + void finalizeCodexRun(state, { output: streamedTurn }); + } +} + +async function* patchCodexEventStream( + events: AsyncGenerator, + state: CodexRunState, +): AsyncGenerator { + try { + for await (const event of events) { + try { + await handleCodexEvent(state, event); + } catch (error) { + logInstrumentationError("OpenAI Codex stream event", error); + } + yield event; + } + await finalizeCodexRun(state); + } catch (error) { + await finalizeCodexRun(state, { error }); + throw error; + } +} + +async function handleCodexEvent( + state: CodexRunState, + event: OpenAICodexThreadEvent, +): Promise { + switch (event.type) { + case "thread.started": + state.metadata["openai_codex.thread_id"] = event.thread_id; + return; + case "turn.completed": + Object.assign(state.metrics, extractUsageMetrics(event.usage)); + return; + case "turn.failed": + await finalizeCodexRun(state, { + error: event.error?.message ?? "Codex turn failed", + }); + return; + case "item.started": + await startCodexItemSpan(state, event.item); + return; + case "item.updated": + updateCodexItem(state, event.item); + return; + case "item.completed": + state.completedItems.push(event.item); + collectOutputText(state, event.item); + await finishCodexItemSpan(state, event.item); + return; + case "error": + await finalizeCodexRun(state, { error: event.message }); + return; + default: + return; + } +} + +async function finalizeCompletedRun( + state: CodexRunState, + turn: OpenAICodexTurn | undefined, +): Promise { + if (!turn) { + await finalizeCodexRun(state, { output: turn }); + return; + } + + Object.assign(state.metrics, extractUsageMetrics(turn.usage)); + state.finalResponse = turn.finalResponse; + + for (const item of turn.items ?? []) { + state.completedItems.push(item); + collectOutputText(state, item); + await createCompletedItemSpan(state, item); + } + + await finalizeCodexRun(state, { output: turn.finalResponse }); +} + +async function finalizeCodexRun( + state: CodexRunState, + params: { + error?: unknown; + output?: unknown; + } = {}, +): Promise { + if (state.finalized) { + return; + } + state.finalized = true; + + const output = + params.output ?? + state.finalResponse ?? + (state.outputText.length > 0 ? state.outputText.join("\n") : undefined); + const metrics = { + ...cleanMetrics(state.metrics), + ...buildDurationMetrics(state.startTime), + }; + + try { + const error = params.error; + safeLog(state.span, { + ...(error + ? { error: error instanceof Error ? error.message : String(error) } + : {}), + metadata: state.metadata, + metrics, + output, + }); + } finally { + endOpenItemSpans(state); + state.span.end(); + } +} + +async function createCompletedItemSpan( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise { + const spanArgs = await itemSpanArgs(state, item); + if (!spanArgs) { + return; + } + + const span = startSpan(spanArgs.start); + safeLog(span, spanArgs.end); + span.end(); +} + +async function startCodexItemSpan( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise { + const itemId = item.id; + if (!itemId || state.activeItemSpans.has(itemId)) { + return; + } + const spanArgs = await itemSpanArgs(state, item); + if (!spanArgs) { + return; + } + state.activeItemSpans.set(itemId, startSpan(spanArgs.start)); +} + +function updateCodexItem( + state: CodexRunState, + item: OpenAICodexThreadItem, +): void { + if (item.type === "agent_message" && typeof item.text === "string") { + state.finalResponse = item.text; + } +} + +async function finishCodexItemSpan( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise { + const itemId = item.id; + if (!itemId) { + await createCompletedItemSpan(state, item); + return; + } + + const span = state.activeItemSpans.get(itemId); + if (!span) { + await createCompletedItemSpan(state, item); + return; + } + + state.activeItemSpans.delete(itemId); + const spanArgs = await itemSpanArgs(state, item); + if (spanArgs) { + safeLog(span, spanArgs.end); + } + span.end(); +} + +async function itemSpanArgs( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise< + | { + start: Parameters[0]; + end: Parameters[0]; + } + | undefined +> { + const parent = await state.span.export(); + const baseMetadata = { + "openai_codex.item_id": item.id, + "openai_codex.item_type": item.type, + }; + + switch (item.type) { + case "command_execution": + return commandSpanArgs(parent, baseMetadata, item); + case "mcp_tool_call": + return mcpToolSpanArgs(parent, baseMetadata, item); + case "web_search": + return webSearchSpanArgs(parent, baseMetadata, item); + case "file_change": + return fileChangeSpanArgs(parent, baseMetadata, item); + default: + return undefined; + } +} + +function commandSpanArgs( + parent: string, + baseMetadata: Record, + item: OpenAICodexCommandExecutionItem, +) { + const metadata = { + ...baseMetadata, + "gen_ai.tool.name": "command_execution", + "openai_codex.command.exit_code": item.exit_code, + "openai_codex.command.status": item.status, + }; + return { + start: { + event: { input: item.command, metadata }, + name: "tool: command_execution", + parent, + spanAttributes: { type: SpanTypeAttribute.TOOL }, + }, + end: { + ...(item.status === "failed" + ? { error: item.aggregated_output || "Command execution failed" } + : {}), + metadata, + output: item.aggregated_output, + }, + }; +} + +function mcpToolSpanArgs( + parent: string, + baseMetadata: Record, + item: OpenAICodexMcpToolCallItem, +) { + const toolName = item.tool || "mcp_tool_call"; + const metadata = { + ...baseMetadata, + "gen_ai.tool.name": toolName, + "openai_codex.mcp.server": item.server, + "openai_codex.mcp.status": item.status, + }; + return { + start: { + event: { + input: { + arguments: item.arguments, + server: item.server, + tool: item.tool, + }, + metadata, + }, + name: `tool: ${toolName}`, + parent, + spanAttributes: { type: SpanTypeAttribute.TOOL }, + }, + end: { + ...(item.error?.message ? { error: item.error.message } : {}), + metadata, + output: item.result, + }, + }; +} + +function webSearchSpanArgs( + parent: string, + baseMetadata: Record, + item: OpenAICodexWebSearchItem, +) { + const metadata = { + ...baseMetadata, + "gen_ai.tool.name": "web_search", + }; + return { + start: { + event: { input: item.query, metadata }, + name: "tool: web_search", + parent, + spanAttributes: { type: SpanTypeAttribute.TOOL }, + }, + end: { metadata }, + }; +} + +function fileChangeSpanArgs( + parent: string, + baseMetadata: Record, + item: OpenAICodexFileChangeItem, +) { + const metadata = { + ...baseMetadata, + "gen_ai.tool.name": "file_change", + "openai_codex.file_change.status": item.status, + }; + return { + start: { + event: { input: item.changes, metadata }, + name: "tool: file_change", + parent, + spanAttributes: { type: SpanTypeAttribute.TOOL }, + }, + end: { + ...(item.status === "failed" ? { error: "File change failed" } : {}), + metadata, + output: item.changes, + }, + }; +} + +function endOpenItemSpans(state: CodexRunState): void { + for (const [, span] of state.activeItemSpans) { + safeLog(span, { error: "Codex item did not complete" }); + span.end(); + } + state.activeItemSpans.clear(); +} + +function collectOutputText( + state: CodexRunState, + item: OpenAICodexThreadItem, +): void { + if (item.type === "agent_message" && typeof item.text === "string") { + state.finalResponse = item.text; + state.outputText.push(item.text); + } else if ( + item.type === "reasoning" && + typeof item.text === "string" && + !state.finalResponse + ) { + state.outputText.push(item.text); + } +} + +function extractThreadFromEvent( + event: ChannelMessage< + typeof openAICodexChannels.run | typeof openAICodexChannels.runStreamed + >, +): OpenAICodexThread | undefined { + return event.self && typeof event.self === "object" + ? (event.self as OpenAICodexThread) + : undefined; +} + +function extractThreadMetadata( + thread: OpenAICodexThread | undefined, +): Record { + const threadOptions = extractThreadOptions(thread); + return { + ...(thread?.id ? { "openai_codex.thread_id": thread.id } : {}), + ...extractThreadOptionsMetadata(threadOptions), + }; +} + +function extractThreadOptions( + thread: OpenAICodexThread | undefined, +): OpenAICodexThreadOptions | undefined { + if (!thread || typeof thread !== "object") { + return undefined; + } + const value = Reflect.get(thread, "_threadOptions"); + return value && typeof value === "object" + ? (value as OpenAICodexThreadOptions) + : undefined; +} + +function extractThreadOptionsMetadata( + options: OpenAICodexThreadOptions | undefined, +): Record { + if (!options) { + return {}; + } + + return { + ...(options.model ? { model: options.model } : {}), + ...(options.model ? { "openai_codex.model": options.model } : {}), + ...(options.sandboxMode + ? { "openai_codex.sandbox_mode": options.sandboxMode } + : {}), + ...(options.workingDirectory + ? { "openai_codex.working_directory": options.workingDirectory } + : {}), + ...(options.skipGitRepoCheck !== undefined + ? { "openai_codex.skip_git_repo_check": options.skipGitRepoCheck } + : {}), + ...(options.modelReasoningEffort + ? { + "openai_codex.model_reasoning_effort": options.modelReasoningEffort, + } + : {}), + ...(options.networkAccessEnabled !== undefined + ? { + "openai_codex.network_access_enabled": options.networkAccessEnabled, + } + : {}), + ...(options.webSearchMode + ? { "openai_codex.web_search_mode": options.webSearchMode } + : {}), + ...(options.webSearchEnabled !== undefined + ? { "openai_codex.web_search_enabled": options.webSearchEnabled } + : {}), + ...(options.approvalPolicy + ? { "openai_codex.approval_policy": options.approvalPolicy } + : {}), + ...(options.additionalDirectories + ? { + "openai_codex.additional_directories": options.additionalDirectories, + } + : {}), + }; +} + +function extractTurnOptionsMetadata( + options: OpenAICodexTurnOptions | undefined, +): Record { + if (!options) { + return {}; + } + + return { + ...(options.outputSchema !== undefined + ? { "openai_codex.output_schema": true } + : {}), + }; +} + +function sanitizeInput(input: OpenAICodexInput): unknown { + if (typeof input === "string") { + return input; + } + + return input.map((item) => { + if (item.type === "local_image") { + return { + path: item.path, + type: "local_image", + }; + } + return item; + }); +} + +function extractUsageMetrics( + usage: OpenAICodexUsage | null | undefined, +): Record { + if (!usage) { + return {}; + } + + const metrics: Record = {}; + if (usage.input_tokens !== undefined) { + metrics.prompt_tokens = usage.input_tokens; + } + if (usage.cached_input_tokens !== undefined) { + metrics.prompt_cached_tokens = usage.cached_input_tokens; + } + if (usage.output_tokens !== undefined) { + metrics.completion_tokens = usage.output_tokens; + } + if (usage.reasoning_output_tokens !== undefined) { + metrics.completion_reasoning_tokens = usage.reasoning_output_tokens; + } + + metrics.tokens = + (metrics.prompt_tokens ?? 0) + + (metrics.completion_tokens ?? 0) + + (metrics.prompt_cached_tokens ?? 0) + + (metrics.completion_reasoning_tokens ?? 0); + return metrics; +} + +function buildDurationMetrics(startTime: number): Record { + const end = getCurrentUnixTimestamp(); + return { + duration: end - startTime, + end, + start: startTime, + }; +} + +function cleanMetrics(metrics: Record): Record { + const cleaned: Record = {}; + for (const [key, value] of Object.entries(metrics)) { + if (value !== undefined && Number.isFinite(value)) { + cleaned[key] = value; + } + } + return cleaned; +} + +function isAsyncIterable(value: unknown): value is AsyncGenerator { + return ( + !!value && + typeof value === "object" && + Symbol.asyncIterator in value && + typeof (value as { [Symbol.asyncIterator]?: unknown })[ + Symbol.asyncIterator + ] === "function" + ); +} + +function safeLog(span: Span, event: Parameters[0]): void { + try { + span.log(event); + } catch (error) { + logInstrumentationError("OpenAI Codex span log", error); + } +} + +function logInstrumentationError(context: string, error: unknown): void { + debugLogger.error(`Error processing ${context}:`, error); +} diff --git a/js/src/instrumentation/registry.test.ts b/js/src/instrumentation/registry.test.ts index 9e8c6a889..3875b6ad0 100644 --- a/js/src/instrumentation/registry.test.ts +++ b/js/src/instrumentation/registry.test.ts @@ -117,6 +117,7 @@ describe("configureInstrumentation API", () => { configureInstrumentation({ integrations: { openai: false, + openaiCodex: false, anthropic: true, huggingface: true, openrouter: false, diff --git a/js/src/instrumentation/registry.ts b/js/src/instrumentation/registry.ts index 8e09f214a..41991a384 100644 --- a/js/src/instrumentation/registry.ts +++ b/js/src/instrumentation/registry.ts @@ -15,6 +15,8 @@ export interface InstrumentationConfig { */ integrations?: { openai?: boolean; + openaiCodex?: boolean; + codex?: boolean; anthropic?: boolean; vercel?: boolean; aisdk?: boolean; @@ -107,6 +109,8 @@ class PluginRegistry { private getDefaultConfig(): Record { return { openai: true, + openaiCodex: true, + codex: true, anthropic: true, vercel: true, aisdk: true, @@ -139,6 +143,14 @@ class PluginRegistry { for (const sdk of disabled) { if (sdk === "cursor-sdk") { integrations.cursorSDK = false; + } else if ( + sdk === "openai-codex" || + sdk === "openai-codex-sdk" || + sdk === "codex-sdk" + ) { + integrations.openaiCodex = false; + } else if (sdk === "codex") { + integrations.codex = false; } else { integrations[sdk] = false; } diff --git a/js/src/vendor-sdk-types/openai-codex.ts b/js/src/vendor-sdk-types/openai-codex.ts new file mode 100644 index 000000000..7d09a71a0 --- /dev/null +++ b/js/src/vendor-sdk-types/openai-codex.ts @@ -0,0 +1,215 @@ +/** + * Vendored types for @openai/codex-sdk used by Braintrust instrumentation. + * + * Keep this surface intentionally narrow. These types are not exported to SDK + * users and should only cover fields we read, wrap, or log. + */ + +export interface OpenAICodexSDKModule { + Codex: OpenAICodexClass; + Thread?: OpenAICodexThreadClass; + [key: string]: unknown; +} + +export interface OpenAICodexClass { + new (options?: OpenAICodexOptions): OpenAICodexClient; + [key: string]: unknown; +} + +export interface OpenAICodexClient { + startThread(options?: OpenAICodexThreadOptions): OpenAICodexThread; + resumeThread( + id: string, + options?: OpenAICodexThreadOptions, + ): OpenAICodexThread; + [key: string]: unknown; +} + +export interface OpenAICodexThreadClass { + new (...args: unknown[]): OpenAICodexThread; + [key: string]: unknown; +} + +export interface OpenAICodexThread { + readonly id?: string | null; + run( + input: OpenAICodexInput, + turnOptions?: OpenAICodexTurnOptions, + ): Promise; + runStreamed( + input: OpenAICodexInput, + turnOptions?: OpenAICodexTurnOptions, + ): Promise; + [key: string]: unknown; +} + +export interface OpenAICodexOptions { + codexPathOverride?: string; + baseUrl?: string; + apiKey?: string; + config?: OpenAICodexConfigObject; + env?: Record; +} + +export type OpenAICodexConfigValue = + | string + | number + | boolean + | OpenAICodexConfigValue[] + | OpenAICodexConfigObject; + +export interface OpenAICodexConfigObject { + [key: string]: OpenAICodexConfigValue; +} + +export type OpenAICodexApprovalMode = + | "never" + | "on-request" + | "on-failure" + | "untrusted"; + +export type OpenAICodexSandboxMode = + | "read-only" + | "workspace-write" + | "danger-full-access"; + +export type OpenAICodexModelReasoningEffort = + | "minimal" + | "low" + | "medium" + | "high" + | "xhigh"; + +export type OpenAICodexWebSearchMode = "disabled" | "cached" | "live"; + +export interface OpenAICodexThreadOptions { + model?: string; + sandboxMode?: OpenAICodexSandboxMode; + workingDirectory?: string; + skipGitRepoCheck?: boolean; + modelReasoningEffort?: OpenAICodexModelReasoningEffort; + networkAccessEnabled?: boolean; + webSearchMode?: OpenAICodexWebSearchMode; + webSearchEnabled?: boolean; + approvalPolicy?: OpenAICodexApprovalMode; + additionalDirectories?: string[]; +} + +export interface OpenAICodexTurnOptions { + outputSchema?: unknown; + signal?: AbortSignal; +} + +export type OpenAICodexInput = + | string + | Array< + | { type: "text"; text: string } + | { type: "local_image"; path: string } + | { type?: string; [key: string]: unknown } + >; + +export interface OpenAICodexUsage { + input_tokens?: number; + cached_input_tokens?: number; + output_tokens?: number; + reasoning_output_tokens?: number; +} + +export interface OpenAICodexTurn { + items: OpenAICodexThreadItem[]; + finalResponse: string; + usage: OpenAICodexUsage | null; +} + +export interface OpenAICodexStreamedTurn { + events: AsyncGenerator; +} + +export type OpenAICodexThreadEvent = + | { type: "thread.started"; thread_id: string } + | { type: "turn.started" } + | { type: "turn.completed"; usage: OpenAICodexUsage } + | { type: "turn.failed"; error: OpenAICodexThreadError } + | { type: "item.started"; item: OpenAICodexThreadItem } + | { type: "item.updated"; item: OpenAICodexThreadItem } + | { type: "item.completed"; item: OpenAICodexThreadItem } + | { type: "error"; message: string } + | { type?: string; [key: string]: unknown }; + +export interface OpenAICodexThreadError { + message?: string; + [key: string]: unknown; +} + +export type OpenAICodexThreadItem = + | OpenAICodexAgentMessageItem + | OpenAICodexReasoningItem + | OpenAICodexCommandExecutionItem + | OpenAICodexFileChangeItem + | OpenAICodexMcpToolCallItem + | OpenAICodexWebSearchItem + | OpenAICodexTodoListItem + | OpenAICodexErrorItem + | { id?: string; type?: string; [key: string]: unknown }; + +export interface OpenAICodexAgentMessageItem { + id?: string; + type: "agent_message"; + text?: string; +} + +export interface OpenAICodexReasoningItem { + id?: string; + type: "reasoning"; + text?: string; +} + +export interface OpenAICodexCommandExecutionItem { + id?: string; + type: "command_execution"; + command?: string; + aggregated_output?: string; + exit_code?: number; + status?: "in_progress" | "completed" | "failed"; +} + +export interface OpenAICodexFileChangeItem { + id?: string; + type: "file_change"; + changes?: Array<{ path?: string; kind?: "add" | "delete" | "update" }>; + status?: "completed" | "failed"; +} + +export interface OpenAICodexMcpToolCallItem { + id?: string; + type: "mcp_tool_call"; + server?: string; + tool?: string; + arguments?: unknown; + result?: { + content?: unknown; + structured_content?: unknown; + }; + error?: { + message?: string; + }; + status?: "in_progress" | "completed" | "failed"; +} + +export interface OpenAICodexWebSearchItem { + id?: string; + type: "web_search"; + query?: string; +} + +export interface OpenAICodexTodoListItem { + id?: string; + type: "todo_list"; + items?: Array<{ text?: string; completed?: boolean }>; +} + +export interface OpenAICodexErrorItem { + id?: string; + type: "error"; + message?: string; +} diff --git a/js/src/wrappers/openai-codex.ts b/js/src/wrappers/openai-codex.ts new file mode 100644 index 000000000..2031fcd1a --- /dev/null +++ b/js/src/wrappers/openai-codex.ts @@ -0,0 +1,186 @@ +import { openAICodexChannels } from "../instrumentation/plugins/openai-codex-channels"; +import type { + OpenAICodexClass, + OpenAICodexClient, + OpenAICodexInput, + OpenAICodexSDKModule, + OpenAICodexStreamedTurn, + OpenAICodexThread, + OpenAICodexThreadOptions, + OpenAICodexTurn, + OpenAICodexTurnOptions, +} from "../vendor-sdk-types/openai-codex"; + +const WRAPPED_CLIENT = Symbol.for("braintrust.openai-codex.wrapped-client"); +const WRAPPED_THREAD = Symbol.for("braintrust.openai-codex.wrapped-thread"); + +/** + * Wraps the OpenAI Codex TypeScript SDK with Braintrust tracing. The wrapper + * emits diagnostics-channel events; the OpenAI Codex plugin owns span lifecycle. + */ +export function wrapOpenAICodexSDK(sdk: T): T { + if (!sdk || typeof sdk !== "object") { + return sdk; + } + + const maybeSDK = sdk as Record; + if (hasCodexClientShape(maybeSDK)) { + return wrapCodexClient(maybeSDK as unknown as OpenAICodexClient) as T; + } + + if (!maybeSDK.Codex || typeof maybeSDK.Codex !== "function") { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. + console.warn("Unsupported OpenAI Codex SDK. Not wrapping."); + return sdk; + } + + const target = isModuleNamespace(sdk) + ? Object.setPrototypeOf({}, sdk) + : (sdk as Record); + + return new Proxy(target, { + get(target, prop, receiver) { + const value = Reflect.get(target, prop, receiver); + if (prop === "Codex" && typeof value === "function") { + return wrapCodexClass(value as unknown as OpenAICodexClass); + } + if (typeof value === "function") { + return value.bind(target); + } + return value; + }, + }) as T; +} + +function hasCodexClientShape(value: Record): boolean { + return ( + typeof value.startThread === "function" && + typeof value.resumeThread === "function" + ); +} + +function isModuleNamespace(obj: unknown): boolean { + if (!obj || typeof obj !== "object") { + return false; + } + if (obj.constructor?.name === "Module") { + return true; + } + const keys = Object.keys(obj); + if (keys.length === 0) { + return false; + } + const descriptor = Object.getOwnPropertyDescriptor(obj, keys[0]); + return descriptor ? !descriptor.configurable && !descriptor.writable : false; +} + +function wrapCodexClass(Codex: OpenAICodexClass): OpenAICodexClass { + return new Proxy(Codex, { + construct(target, args, newTarget) { + return wrapCodexClient(Reflect.construct(target, args, newTarget)); + }, + get(target, prop, receiver) { + const value = Reflect.get(target, prop, receiver); + if (typeof value === "function") { + return value.bind(target); + } + return value; + }, + }) as OpenAICodexClass; +} + +function wrapCodexClient(client: OpenAICodexClient): OpenAICodexClient { + if (!client || typeof client !== "object") { + return client; + } + if ((client as unknown as Record)[WRAPPED_CLIENT]) { + return client; + } + + return new Proxy(client, { + get(target, prop, receiver) { + if (prop === WRAPPED_CLIENT) { + return true; + } + + const value = Reflect.get(target, prop, receiver); + if (prop === "startThread" && typeof value === "function") { + return function (options?: OpenAICodexThreadOptions) { + return wrapCodexThread(Reflect.apply(value, target, [options])); + }; + } + if (prop === "resumeThread" && typeof value === "function") { + return function (id: string, options?: OpenAICodexThreadOptions) { + return wrapCodexThread(Reflect.apply(value, target, [id, options])); + }; + } + if (typeof value === "function") { + return value.bind(target); + } + return value; + }, + }); +} + +function wrapCodexThread(thread: OpenAICodexThread): OpenAICodexThread { + if (!thread || typeof thread !== "object") { + return thread; + } + if ((thread as unknown as Record)[WRAPPED_THREAD]) { + return thread; + } + + return new Proxy(thread, { + get(target, prop, receiver) { + if (prop === WRAPPED_THREAD) { + return true; + } + + const value = Reflect.get(target, prop, receiver); + if (prop === "run" && typeof value === "function") { + return function ( + input: OpenAICodexInput, + turnOptions?: OpenAICodexTurnOptions, + ): Promise { + const args = [input, turnOptions] as [ + OpenAICodexInput, + OpenAICodexTurnOptions | undefined, + ]; + return openAICodexChannels.run.tracePromise( + () => Reflect.apply(value, target, args), + { + arguments: args, + operation: "run", + thread: target, + } as never, + ); + }; + } + if (prop === "runStreamed" && typeof value === "function") { + return function ( + input: OpenAICodexInput, + turnOptions?: OpenAICodexTurnOptions, + ): Promise { + const args = [input, turnOptions] as [ + OpenAICodexInput, + OpenAICodexTurnOptions | undefined, + ]; + return openAICodexChannels.runStreamed.tracePromise( + () => Reflect.apply(value, target, args), + { + arguments: args, + operation: "runStreamed", + thread: target, + } as never, + ); + }; + } + if (typeof value === "function") { + return value.bind(target); + } + return value; + }, + }); +} + +export type { OpenAICodexSDKModule }; From 1ef443ac20e19a4ecaf14b2e4612902a3c77bd0a Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 15:39:19 -0700 Subject: [PATCH 2/5] fix --- js/src/vendor-sdk-types/openai-codex.ts | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/js/src/vendor-sdk-types/openai-codex.ts b/js/src/vendor-sdk-types/openai-codex.ts index 7d09a71a0..5d0f2b35c 100644 --- a/js/src/vendor-sdk-types/openai-codex.ts +++ b/js/src/vendor-sdk-types/openai-codex.ts @@ -133,8 +133,7 @@ export type OpenAICodexThreadEvent = | { type: "item.started"; item: OpenAICodexThreadItem } | { type: "item.updated"; item: OpenAICodexThreadItem } | { type: "item.completed"; item: OpenAICodexThreadItem } - | { type: "error"; message: string } - | { type?: string; [key: string]: unknown }; + | { type: "error"; message: string }; export interface OpenAICodexThreadError { message?: string; @@ -149,8 +148,7 @@ export type OpenAICodexThreadItem = | OpenAICodexMcpToolCallItem | OpenAICodexWebSearchItem | OpenAICodexTodoListItem - | OpenAICodexErrorItem - | { id?: string; type?: string; [key: string]: unknown }; + | OpenAICodexErrorItem; export interface OpenAICodexAgentMessageItem { id?: string; From aba7db7bcb22464312f89be745144e05b41fe87d Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 15:39:50 -0700 Subject: [PATCH 3/5] cs --- .changeset/twenty-ideas-doubt.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/twenty-ideas-doubt.md diff --git a/.changeset/twenty-ideas-doubt.md b/.changeset/twenty-ideas-doubt.md new file mode 100644 index 000000000..55d61a938 --- /dev/null +++ b/.changeset/twenty-ideas-doubt.md @@ -0,0 +1,5 @@ +--- +"braintrust": minor +--- + +feat: Add @openai/codex-sdk instrumentation From bcc034bd8b69c96660df3ee6f323f106ac1c4b5f Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 5 May 2026 10:02:47 -0700 Subject: [PATCH 4/5] capture llm calls --- ...nai-codex-v0128-auto-hook.span-events.json | 196 ++++++++++++++++- ...penai-codex-v0128-wrapped.span-events.json | 196 ++++++++++++++++- .../assertions.ts | 103 +++++++++ .../mock-codex-cli.mjs | 26 ++- .../plugins/openai-codex-plugin.ts | 197 +++++++++++++++++- 5 files changed, 696 insertions(+), 22 deletions(-) diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json index 7a5fd7ed0..36e8471bd 100644 --- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json +++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json @@ -1,4 +1,188 @@ { + "llms": [ + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 1, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning before command RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 2, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after command RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 3, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after mcp RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 4, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "message": "Codex RUN_OK", + "reasoning": "final reasoning RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 1, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning before command STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 2, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after command STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 3, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after mcp STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 4, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "message": "Codex STREAM_OK", + "reasoning": "final reasoning STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + } + ], "root": { "has_input": false, "has_output": false, @@ -109,7 +293,7 @@ "metric_keys": [], "name": "tool: command_execution", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -127,7 +311,7 @@ "metric_keys": [], "name": "tool: read_file", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -143,7 +327,7 @@ "metric_keys": [], "name": "tool: web_search", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -160,7 +344,7 @@ "metric_keys": [], "name": "tool: command_execution", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -178,7 +362,7 @@ "metric_keys": [], "name": "tool: read_file", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -194,7 +378,7 @@ "metric_keys": [], "name": "tool: web_search", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json index 7a5fd7ed0..36e8471bd 100644 --- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json +++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json @@ -1,4 +1,188 @@ { + "llms": [ + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 1, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning before command RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 2, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after command RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 3, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after mcp RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 4, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.run", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "message": "Codex RUN_OK", + "reasoning": "final reasoning RUN_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 1, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning before command STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 2, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after command STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 3, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "reasoning": "reasoning after mcp STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": true, + "metadata": { + "model": "gpt-5-codex", + "openai_codex.llm_sequence": 4, + "openai_codex.model": "gpt-5-codex", + "openai_codex.operation": "Thread.runStreamed", + "openai_codex.thread_id": "", + "provider": "openai" + }, + "metric_keys": [], + "name": "OpenAI Codex LLM", + "output": { + "message": "Codex STREAM_OK", + "reasoning": "final reasoning STREAM_OK" + }, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + } + ], "root": { "has_input": false, "has_output": false, @@ -109,7 +293,7 @@ "metric_keys": [], "name": "tool: command_execution", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -127,7 +311,7 @@ "metric_keys": [], "name": "tool: read_file", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -143,7 +327,7 @@ "metric_keys": [], "name": "tool: web_search", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -160,7 +344,7 @@ "metric_keys": [], "name": "tool: command_execution", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -178,7 +362,7 @@ "metric_keys": [], "name": "tool: read_file", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -194,7 +378,7 @@ "metric_keys": [], "name": "tool: web_search", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], diff --git a/e2e/scenarios/openai-codex-instrumentation/assertions.ts b/e2e/scenarios/openai-codex-instrumentation/assertions.ts index 4d54f6f21..2fca6b76d 100644 --- a/e2e/scenarios/openai-codex-instrumentation/assertions.ts +++ b/e2e/scenarios/openai-codex-instrumentation/assertions.ts @@ -33,6 +33,7 @@ const METADATA_KEYS = [ "operation", "scenario", "gen_ai.tool.name", + "openai_codex.llm_sequence", "openai_codex.operation", "openai_codex.model", "openai_codex.thread_id", @@ -59,6 +60,29 @@ function summarizeSpan(event: CapturedLogEvent | undefined): Json { return summary; } +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null && !Array.isArray(value); +} + +function summarizeLlmOutput(output: unknown): Json { + if (!isRecord(output)) { + return null; + } + + return { + ...(typeof output.reasoning === "string" + ? { reasoning: output.reasoning } + : {}), + ...(typeof output.message === "string" ? { message: output.message } : {}), + } as Json; +} + +function summarizeLlmSpan(event: CapturedLogEvent | undefined): Json { + const summary = summarizeSpan(event) as Record; + summary.output = summarizeLlmOutput(event?.output); + return summary as Json; +} + function findCodexTask(events: CapturedLogEvent[], operationName: string) { const operation = findLatestSpan(events, operationName); return [...events] @@ -93,12 +117,44 @@ function latestSpansByType( }); } +function latestSpansForParent( + events: CapturedLogEvent[], + parentSpanId: string | undefined, +): CapturedLogEvent[] { + if (!parentSpanId) { + return []; + } + + const order: string[] = []; + const latest = new Map(); + + for (const event of events) { + if (!event.span.id || !event.span.parentIds.includes(parentSpanId)) { + continue; + } + if (!latest.has(event.span.id)) { + order.push(event.span.id); + } + latest.set(event.span.id, event); + } + + return order.flatMap((spanId) => { + const event = latest.get(spanId); + return event ? [event] : []; + }); +} + +function childSpanLabel(event: CapturedLogEvent): string { + return event.span.type === "llm" ? "llm" : (event.span.name ?? ""); +} + function summarize(events: CapturedLogEvent[]): Json { const runTask = findCodexTask(events, "openai-codex-run-operation"); const streamedTask = findCodexTask( events, "openai-codex-run-streamed-operation", ); + const llmSpans = latestSpansByType(events, "llm"); const toolSpans = latestSpansByType(events, "tool"); return normalizeForSnapshot({ @@ -115,6 +171,7 @@ function summarize(events: CapturedLogEvent[]): Json { ), task: summarizeSpan(streamedTask), }, + llms: llmSpans.map(summarizeLlmSpan), tools: toolSpans.map(summarizeSpan), } as Json); } @@ -169,6 +226,52 @@ export function defineOpenAICodexInstrumentationAssertions(options: { } }); + test("captures LLM spans around tool calls", testConfig, () => { + const llmSpans = latestSpansByType(events, "llm"); + + expect(llmSpans).toHaveLength(8); + expect( + llmSpans.every((event) => event.span.name === "OpenAI Codex LLM"), + ).toBe(true); + expect( + llmSpans.some((event) => { + const output = event.output as + | { message?: string; reasoning?: string } + | undefined; + return ( + output?.reasoning === "final reasoning RUN_OK" && + output.message === "Codex RUN_OK" + ); + }), + ).toBe(true); + expect( + llmSpans.some((event) => { + const output = event.output as + | { message?: string; reasoning?: string } + | undefined; + return output?.reasoning === "reasoning after command STREAM_OK"; + }), + ).toBe(true); + + for (const operationName of [ + "openai-codex-run-operation", + "openai-codex-run-streamed-operation", + ]) { + const task = findCodexTask(events, operationName); + expect( + latestSpansForParent(events, task?.span.id).map(childSpanLabel), + ).toEqual([ + "llm", + "tool: command_execution", + "llm", + "tool: read_file", + "llm", + "tool: web_search", + "llm", + ]); + } + }); + test("captures command and MCP tool spans", testConfig, () => { const toolSpans = latestSpansByType(events, "tool"); diff --git a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs index 510abdf04..e7243154d 100755 --- a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs +++ b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs @@ -13,6 +13,14 @@ process.stdin.on("end", () => { const events = [ { type: "thread.started", thread_id: threadId }, { type: "turn.started" }, + { + type: "item.completed", + item: { + id: `${threadId}_reasoning_before_command`, + type: "reasoning", + text: `reasoning before command ${suffix}`, + }, + }, { type: "item.started", item: { @@ -34,6 +42,14 @@ process.stdin.on("end", () => { status: "completed", }, }, + { + type: "item.completed", + item: { + id: `${threadId}_reasoning_after_command`, + type: "reasoning", + text: `reasoning after command ${suffix}`, + }, + }, { type: "item.started", item: { @@ -60,6 +76,14 @@ process.stdin.on("end", () => { status: "completed", }, }, + { + type: "item.completed", + item: { + id: `${threadId}_reasoning_after_mcp`, + type: "reasoning", + text: `reasoning after mcp ${suffix}`, + }, + }, { type: "item.completed", item: { @@ -73,7 +97,7 @@ process.stdin.on("end", () => { item: { id: `${threadId}_reasoning`, type: "reasoning", - text: `reasoning ${suffix}`, + text: `final reasoning ${suffix}`, }, }, { diff --git a/js/src/instrumentation/plugins/openai-codex-plugin.ts b/js/src/instrumentation/plugins/openai-codex-plugin.ts index deffe8187..587bf901a 100644 --- a/js/src/instrumentation/plugins/openai-codex-plugin.ts +++ b/js/src/instrumentation/plugins/openai-codex-plugin.ts @@ -24,10 +24,13 @@ import type { } from "../../vendor-sdk-types/openai-codex"; type CodexRunState = { + activeLlmSpan?: CodexLlmSpanState; activeItemSpans: Map; completedItems: OpenAICodexThreadItem[]; finalResponse?: string; finalized: boolean; + input: unknown; + llmSequence: number; metadata: Record; metrics: Record; outputText: string[]; @@ -35,6 +38,15 @@ type CodexRunState = { startTime: number; }; +type CodexLlmSpanState = { + anonymousMessages: string[]; + anonymousReasoning: string[]; + messagesById: Map; + metadata: Record; + reasoningById: Map; + span: Span; +}; + const PATCHED_STREAMED_TURN = Symbol.for( "braintrust.openai-codex.patched-streamed-turn", ); @@ -130,6 +142,7 @@ function startCodexRun( const input = event.arguments[0]; const turnOptions = event.arguments[1]; const thread = event.thread ?? extractThreadFromEvent(event); + const sanitizedInput = sanitizeInput(input); const metadata = { ...extractThreadMetadata(thread), ...extractTurnOptionsMetadata(turnOptions), @@ -145,7 +158,7 @@ function startCodexRun( }); const startTime = getCurrentUnixTimestamp(); safeLog(span, { - input: sanitizeInput(input), + input: sanitizedInput, metadata, }); @@ -153,6 +166,8 @@ function startCodexRun( activeItemSpans: new Map(), completedItems: [], finalized: false, + input: sanitizedInput, + llmSequence: 0, metadata, metrics: {}, outputText: [], @@ -228,15 +243,13 @@ async function handleCodexEvent( }); return; case "item.started": - await startCodexItemSpan(state, event.item); + await handleCodexItemStarted(state, event.item); return; case "item.updated": - updateCodexItem(state, event.item); + await handleCodexItemUpdated(state, event.item); return; case "item.completed": - state.completedItems.push(event.item); - collectOutputText(state, event.item); - await finishCodexItemSpan(state, event.item); + await handleCodexItemCompleted(state, event.item); return; case "error": await finalizeCodexRun(state, { error: event.message }); @@ -259,9 +272,7 @@ async function finalizeCompletedRun( state.finalResponse = turn.finalResponse; for (const item of turn.items ?? []) { - state.completedItems.push(item); - collectOutputText(state, item); - await createCompletedItemSpan(state, item); + await handleCodexItemCompleted(state, item); } await finalizeCodexRun(state, { output: turn.finalResponse }); @@ -288,6 +299,8 @@ async function finalizeCodexRun( ...buildDurationMetrics(state.startTime), }; + await finishActiveLlmSpan(state, params.error); + try { const error = params.error; safeLog(state.span, { @@ -304,6 +317,43 @@ async function finalizeCodexRun( } } +async function handleCodexItemStarted( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise { + if (isCodexToolItem(item)) { + await finishActiveLlmSpan(state); + await startCodexItemSpan(state, item); + return; + } + + await recordCodexLlmItem(state, item, { allowAnonymousText: false }); +} + +async function handleCodexItemUpdated( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise { + updateCodexItem(state, item); + await recordCodexLlmItem(state, item, { allowAnonymousText: false }); +} + +async function handleCodexItemCompleted( + state: CodexRunState, + item: OpenAICodexThreadItem, +): Promise { + state.completedItems.push(item); + collectOutputText(state, item); + + if (isCodexToolItem(item)) { + await finishActiveLlmSpan(state); + await finishCodexItemSpan(state, item); + return; + } + + await recordCodexLlmItem(state, item, { allowAnonymousText: true }); +} + async function createCompletedItemSpan( state: CodexRunState, item: OpenAICodexThreadItem, @@ -318,6 +368,126 @@ async function createCompletedItemSpan( span.end(); } +async function recordCodexLlmItem( + state: CodexRunState, + item: OpenAICodexThreadItem, + options: { allowAnonymousText: boolean }, +): Promise { + if (item.type !== "agent_message" && item.type !== "reasoning") { + return; + } + + const text = typeof item.text === "string" ? item.text : undefined; + const active = await ensureActiveLlmSpan(state); + if (!text) { + return; + } + + if (item.type === "agent_message") { + if (item.id) { + active.messagesById.set(item.id, text); + } else if (options.allowAnonymousText) { + active.anonymousMessages.push(text); + } + } else if (item.id) { + active.reasoningById.set(item.id, text); + } else if (options.allowAnonymousText) { + active.anonymousReasoning.push(text); + } +} + +async function ensureActiveLlmSpan( + state: CodexRunState, +): Promise { + if (state.activeLlmSpan) { + return state.activeLlmSpan; + } + + const sequence = state.llmSequence + 1; + state.llmSequence = sequence; + const metadata = { + ...(state.metadata.provider ? { provider: state.metadata.provider } : {}), + ...(state.metadata.model ? { model: state.metadata.model } : {}), + ...(state.metadata["openai_codex.model"] + ? { "openai_codex.model": state.metadata["openai_codex.model"] } + : {}), + ...(state.metadata["openai_codex.model_reasoning_effort"] + ? { + "openai_codex.model_reasoning_effort": + state.metadata["openai_codex.model_reasoning_effort"], + } + : {}), + ...(state.metadata["openai_codex.operation"] + ? { "openai_codex.operation": state.metadata["openai_codex.operation"] } + : {}), + ...(state.metadata["openai_codex.thread_id"] + ? { "openai_codex.thread_id": state.metadata["openai_codex.thread_id"] } + : {}), + "openai_codex.llm_sequence": sequence, + }; + + const span = startSpan({ + event: { + ...(sequence === 1 ? { input: state.input } : {}), + metadata, + }, + name: "OpenAI Codex LLM", + parent: await state.span.export(), + spanAttributes: { type: SpanTypeAttribute.LLM }, + }); + + state.activeLlmSpan = { + anonymousMessages: [], + anonymousReasoning: [], + messagesById: new Map(), + metadata, + reasoningById: new Map(), + span, + }; + return state.activeLlmSpan; +} + +async function finishActiveLlmSpan( + state: CodexRunState, + error?: unknown, +): Promise { + const active = state.activeLlmSpan; + if (!active) { + return; + } + + state.activeLlmSpan = undefined; + const output = buildLlmOutput(active); + safeLog(active.span, { + ...(error + ? { error: error instanceof Error ? error.message : String(error) } + : {}), + metadata: active.metadata, + ...(output ? { output } : {}), + }); + active.span.end(); +} + +function buildLlmOutput( + active: CodexLlmSpanState, +): Record | undefined { + const reasoning = [ + ...active.reasoningById.values(), + ...active.anonymousReasoning, + ] + .filter((text) => text.length > 0) + .join("\n"); + const message = [...active.messagesById.values(), ...active.anonymousMessages] + .filter((text) => text.length > 0) + .join("\n"); + const output = { + ...(reasoning ? { reasoning } : {}), + ...(message ? { message } : {}), + }; + + return Object.keys(output).length > 0 ? output : undefined; +} + async function startCodexItemSpan( state: CodexRunState, item: OpenAICodexThreadItem, @@ -366,6 +536,15 @@ async function finishCodexItemSpan( span.end(); } +function isCodexToolItem(item: OpenAICodexThreadItem): boolean { + return ( + item.type === "command_execution" || + item.type === "file_change" || + item.type === "mcp_tool_call" || + item.type === "web_search" + ); +} + async function itemSpanArgs( state: CodexRunState, item: OpenAICodexThreadItem, From 0a78b8c04c0aef209b094e9e17c734ecbaf4a9bd Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 5 May 2026 15:25:59 -0700 Subject: [PATCH 5/5] actually run codex --- ...nai-codex-v0128-auto-hook.span-events.json | 20 +- ...penai-codex-v0128-wrapped.span-events.json | 20 +- .../assertions.ts | 357 ++++++++++++------ .../mock-codex-cli.mjs | 15 +- .../scenario.impl.mjs | 206 ++++++++-- .../scenario.test.ts | 69 ++-- 6 files changed, 473 insertions(+), 214 deletions(-) diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json index 36e8471bd..df218b703 100644 --- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json +++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json @@ -13,7 +13,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning before command RUN_OK" + "reasoning": "reasoning before command OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -35,7 +35,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after command RUN_OK" + "reasoning": "reasoning after command OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -57,7 +57,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after mcp RUN_OK" + "reasoning": "reasoning after mcp OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -79,8 +79,8 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "message": "Codex RUN_OK", - "reasoning": "final reasoning RUN_OK" + "message": "Codex OPENAI_CODEX_RUN_OK", + "reasoning": "final reasoning OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -103,7 +103,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning before command STREAM_OK" + "reasoning": "reasoning before command OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", @@ -126,7 +126,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after command STREAM_OK" + "reasoning": "reasoning after command OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", @@ -149,7 +149,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after mcp STREAM_OK" + "reasoning": "reasoning after mcp OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", @@ -172,8 +172,8 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "message": "Codex STREAM_OK", - "reasoning": "final reasoning STREAM_OK" + "message": "Codex OPENAI_CODEX_STREAM_OK", + "reasoning": "final reasoning OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json index 36e8471bd..df218b703 100644 --- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json +++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json @@ -13,7 +13,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning before command RUN_OK" + "reasoning": "reasoning before command OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -35,7 +35,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after command RUN_OK" + "reasoning": "reasoning after command OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -57,7 +57,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after mcp RUN_OK" + "reasoning": "reasoning after mcp OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -79,8 +79,8 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "message": "Codex RUN_OK", - "reasoning": "final reasoning RUN_OK" + "message": "Codex OPENAI_CODEX_RUN_OK", + "reasoning": "final reasoning OPENAI_CODEX_RUN_OK" }, "root_span_id": "", "span_id": "", @@ -103,7 +103,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning before command STREAM_OK" + "reasoning": "reasoning before command OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", @@ -126,7 +126,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after command STREAM_OK" + "reasoning": "reasoning after command OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", @@ -149,7 +149,7 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "reasoning": "reasoning after mcp STREAM_OK" + "reasoning": "reasoning after mcp OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", @@ -172,8 +172,8 @@ "metric_keys": [], "name": "OpenAI Codex LLM", "output": { - "message": "Codex STREAM_OK", - "reasoning": "final reasoning STREAM_OK" + "message": "Codex OPENAI_CODEX_STREAM_OK", + "reasoning": "final reasoning OPENAI_CODEX_STREAM_OK" }, "root_span_id": "", "span_id": "", diff --git a/e2e/scenarios/openai-codex-instrumentation/assertions.ts b/e2e/scenarios/openai-codex-instrumentation/assertions.ts index 2fca6b76d..cb859d997 100644 --- a/e2e/scenarios/openai-codex-instrumentation/assertions.ts +++ b/e2e/scenarios/openai-codex-instrumentation/assertions.ts @@ -1,12 +1,12 @@ import { beforeAll, describe, expect, test } from "vitest"; -import { E2E_TAGS } from "../../helpers/tags"; -import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; -import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; import { formatJsonFileSnapshot, resolveFileSnapshotPath, } from "../../helpers/file-snapshot"; +import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; import { withScenarioHarness } from "../../helpers/scenario-harness"; +import { E2E_TAGS } from "../../helpers/tags"; import { findLatestSpan } from "../../helpers/trace-selectors"; import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs"; @@ -14,6 +14,7 @@ import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs"; type RunOpenAICodexScenario = (harness: { runNodeScenarioDir: (options: { entry: string; + env?: Record; nodeArgs: string[]; runContext?: { variantKey: string }; scenarioDir: string; @@ -21,12 +22,25 @@ type RunOpenAICodexScenario = (harness: { }) => Promise; runScenarioDir: (options: { entry: string; + env?: Record; runContext?: { variantKey: string }; scenarioDir: string; timeoutMs: number; }) => Promise; }) => Promise; +type CodexScenarioMode = "mock" | "real"; + +const OPERATION_NAMES = [ + "openai-codex-run-operation", + "openai-codex-run-streamed-operation", +] as const; + +const EXPECTED_MARKERS = { + "openai-codex-run-operation": "OPENAI_CODEX_RUN_OK", + "openai-codex-run-streamed-operation": "OPENAI_CODEX_STREAM_OK", +} as const; + const METADATA_KEYS = [ "provider", "model", @@ -43,46 +57,6 @@ const METADATA_KEYS = [ "openai_codex.mcp.status", ] as const; -function summarizeSpan(event: CapturedLogEvent | undefined): Json { - if (!event) { - return null; - } - const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record< - string, - Json - >; - if (summary.metadata && typeof summary.metadata === "object") { - const metadata = summary.metadata as Record; - if (typeof metadata["openai_codex.thread_id"] === "string") { - metadata["openai_codex.thread_id"] = ""; - } - } - return summary; -} - -function isRecord(value: unknown): value is Record { - return typeof value === "object" && value !== null && !Array.isArray(value); -} - -function summarizeLlmOutput(output: unknown): Json { - if (!isRecord(output)) { - return null; - } - - return { - ...(typeof output.reasoning === "string" - ? { reasoning: output.reasoning } - : {}), - ...(typeof output.message === "string" ? { message: output.message } : {}), - } as Json; -} - -function summarizeLlmSpan(event: CapturedLogEvent | undefined): Json { - const summary = summarizeSpan(event) as Record; - summary.output = summarizeLlmOutput(event?.output); - return summary as Json; -} - function findCodexTask(events: CapturedLogEvent[], operationName: string) { const operation = findLatestSpan(events, operationName); return [...events] @@ -144,10 +118,83 @@ function latestSpansForParent( }); } +function expectPositiveMetric( + event: CapturedLogEvent | undefined, + keys: string[], +): void { + const hasPositiveMetric = keys.some((key) => { + const value = event?.metrics?.[key]; + return typeof value === "number" && value > 0; + }); + + expect(hasPositiveMetric).toBe(true); +} + +function outputText(event: CapturedLogEvent | undefined): string { + return typeof event?.output === "string" + ? event.output + : JSON.stringify(event?.output ?? ""); +} + +function sequenceNumber(event: CapturedLogEvent): number | undefined { + const value = event.metadata?.["openai_codex.llm_sequence"]; + return typeof value === "number" ? value : undefined; +} + function childSpanLabel(event: CapturedLogEvent): string { return event.span.type === "llm" ? "llm" : (event.span.name ?? ""); } +function llmOutput(event: CapturedLogEvent): { + message?: string; + reasoning?: string; +} { + return event.output && + typeof event.output === "object" && + !Array.isArray(event.output) + ? (event.output as { message?: string; reasoning?: string }) + : {}; +} + +function summarizeSpan(event: CapturedLogEvent | undefined): Json { + if (!event) { + return null; + } + const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record< + string, + Json + >; + if (summary.metadata && typeof summary.metadata === "object") { + const metadata = summary.metadata as Record; + if (typeof metadata["openai_codex.thread_id"] === "string") { + metadata["openai_codex.thread_id"] = ""; + } + } + return summary; +} + +function summarizeLlmOutput(output: unknown): Json { + if (typeof output !== "object" || output === null || Array.isArray(output)) { + return null; + } + const outputRecord = output as { message?: unknown; reasoning?: unknown }; + + return { + ...(typeof outputRecord.reasoning === "string" + ? { reasoning: outputRecord.reasoning } + : {}), + ...(typeof outputRecord.message === "string" + ? { message: outputRecord.message } + : {}), + } as Json; +} + +function summarizeLlmSpan(event: CapturedLogEvent | undefined): Json { + const summary = summarizeSpan(event) as Record; + summary.output = summarizeLlmOutput(event?.output); + return summary as Json; +} + function summarize(events: CapturedLogEvent[]): Json { const runTask = findCodexTask(events, "openai-codex-run-operation"); const streamedTask = findCodexTask( @@ -176,19 +223,31 @@ function summarize(events: CapturedLogEvent[]): Json { } as Json); } +function mockSnapshotPath(options: { + snapshotName?: string; + testFileUrl?: string; +}): string { + if (!options.snapshotName || !options.testFileUrl) { + throw new Error( + "Mock OpenAI Codex instrumentation assertions require snapshotName and testFileUrl", + ); + } + return resolveFileSnapshotPath( + options.testFileUrl, + `${options.snapshotName}.span-events.json`, + ); +} + export function defineOpenAICodexInstrumentationAssertions(options: { + mode: CodexScenarioMode; name: string; runScenario: RunOpenAICodexScenario; - snapshotName: string; - testFileUrl: string; + snapshotName?: string; + testFileUrl?: string; timeoutMs: number; }): void { - const snapshotPath = resolveFileSnapshotPath( - options.testFileUrl, - `${options.snapshotName}.span-events.json`, - ); const testConfig = { - tags: [E2E_TAGS.hermetic], + ...(options.mode === "mock" ? { tags: [E2E_TAGS.hermetic] } : {}), timeout: options.timeoutMs, }; @@ -210,10 +269,7 @@ export function defineOpenAICodexInstrumentationAssertions(options: { }); test("captures Codex task spans", testConfig, () => { - for (const operationName of [ - "openai-codex-run-operation", - "openai-codex-run-streamed-operation", - ]) { + for (const operationName of OPERATION_NAMES) { const operation = findLatestSpan(events, operationName); const task = findCodexTask(events, operationName); @@ -223,94 +279,149 @@ export function defineOpenAICodexInstrumentationAssertions(options: { expect(task?.row.metadata).toMatchObject({ provider: "openai", }); + expect(task?.row.metadata?.["openai_codex.model"]).toEqual( + expect.any(String), + ); } }); - test("captures LLM spans around tool calls", testConfig, () => { + test("captures dynamic LLM spans for each Codex turn", testConfig, () => { const llmSpans = latestSpansByType(events, "llm"); - expect(llmSpans).toHaveLength(8); + expect(llmSpans.length).toBeGreaterThanOrEqual(OPERATION_NAMES.length); expect( llmSpans.every((event) => event.span.name === "OpenAI Codex LLM"), ).toBe(true); - expect( - llmSpans.some((event) => { - const output = event.output as - | { message?: string; reasoning?: string } - | undefined; - return ( - output?.reasoning === "final reasoning RUN_OK" && - output.message === "Codex RUN_OK" - ); - }), - ).toBe(true); - expect( - llmSpans.some((event) => { - const output = event.output as - | { message?: string; reasoning?: string } - | undefined; - return output?.reasoning === "reasoning after command STREAM_OK"; - }), - ).toBe(true); - for (const operationName of [ - "openai-codex-run-operation", - "openai-codex-run-streamed-operation", - ]) { + for (const operationName of OPERATION_NAMES) { const task = findCodexTask(events, operationName); - expect( - latestSpansForParent(events, task?.span.id).map(childSpanLabel), - ).toEqual([ - "llm", - "tool: command_execution", - "llm", - "tool: read_file", - "llm", - "tool: web_search", - "llm", - ]); + const childSpans = latestSpansForParent(events, task?.span.id); + const taskLlmSpans = childSpans.filter( + (event) => event.span.type === "llm", + ); + const sequences = taskLlmSpans + .map(sequenceNumber) + .filter((value): value is number => value !== undefined); + + expect(taskLlmSpans.length).toBeGreaterThanOrEqual(1); + expect(sequences[0]).toBe(1); + expect(sequences).toEqual([...sequences].sort((a, b) => a - b)); + expect(taskLlmSpans.some((event) => outputText(event).length > 2)).toBe( + true, + ); } }); - test("captures command and MCP tool spans", testConfig, () => { - const toolSpans = latestSpansByType(events, "tool"); + test( + "captures Codex tool spans when the agent uses tools", + testConfig, + () => { + const toolSpans = latestSpansByType(events, "tool"); - expect( - toolSpans.some( - (event) => - event.span.name === "tool: command_execution" && - event.output === "codex_tool_ok", - ), - ).toBe(true); - expect( - toolSpans.some( - (event) => - event.span.name === "tool: read_file" && - event.metadata?.["openai_codex.mcp.server"] === "filesystem", - ), - ).toBe(true); - }); + expect(toolSpans.length).toBeGreaterThanOrEqual(OPERATION_NAMES.length); + expect( + toolSpans.some( + (event) => event.span.name === "tool: command_execution", + ), + ).toBe(true); + + for (const operationName of OPERATION_NAMES) { + const task = findCodexTask(events, operationName); + const childSpans = latestSpansForParent(events, task?.span.id); + const childTypes = childSpans.map((event) => event.span.type); + + expect(childTypes).toContain("llm"); + expect(childTypes).toContain("tool"); + } + }, + ); test("captures final responses and usage metrics", testConfig, () => { - const runTask = findCodexTask(events, "openai-codex-run-operation"); - const streamedTask = findCodexTask( - events, - "openai-codex-run-streamed-operation", + for (const operationName of OPERATION_NAMES) { + const task = findCodexTask(events, operationName); + + expect(outputText(task)).toContain(EXPECTED_MARKERS[operationName]); + expectPositiveMetric(task, [ + "tokens", + "prompt_tokens", + "completion_tokens", + ]); + } + }); + + if (options.mode === "mock") { + test( + "captures deterministic mock LLM and tool details", + testConfig, + () => { + const llmSpans = latestSpansByType(events, "llm"); + const toolSpans = latestSpansByType(events, "tool"); + + expect(llmSpans).toHaveLength(8); + expect( + llmSpans.some((event) => { + const output = llmOutput(event); + return ( + output.reasoning === "final reasoning OPENAI_CODEX_RUN_OK" && + output.message === "Codex OPENAI_CODEX_RUN_OK" + ); + }), + ).toBe(true); + expect( + llmSpans.some( + (event) => + llmOutput(event).reasoning === + "reasoning after command OPENAI_CODEX_STREAM_OK", + ), + ).toBe(true); + + for (const operationName of OPERATION_NAMES) { + const task = findCodexTask(events, operationName); + expect( + latestSpansForParent(events, task?.span.id).map(childSpanLabel), + ).toEqual([ + "llm", + "tool: command_execution", + "llm", + "tool: read_file", + "llm", + "tool: web_search", + "llm", + ]); + } + + expect( + toolSpans.some( + (event) => + event.span.name === "tool: command_execution" && + event.output === "codex_tool_ok", + ), + ).toBe(true); + expect( + toolSpans.some( + (event) => + event.span.name === "tool: read_file" && + event.metadata?.["openai_codex.mcp.server"] === "filesystem", + ), + ).toBe(true); + }, ); - expect(runTask?.output).toContain("RUN_OK"); - expect(streamedTask?.output).toContain("STREAM_OK"); - expect(runTask?.metrics).toMatchObject({ - completion_tokens: 7, - prompt_cached_tokens: 3, - prompt_tokens: 11, + test("captures deterministic mock usage metrics", testConfig, () => { + const runTask = findCodexTask(events, "openai-codex-run-operation"); + + expect(runTask?.metrics).toMatchObject({ + completion_tokens: 7, + prompt_cached_tokens: 3, + prompt_tokens: 11, + }); }); - }); - test("matches the shared span snapshot", testConfig, async () => { - await expect( - formatJsonFileSnapshot(summarize(events)), - ).toMatchFileSnapshot(snapshotPath); - }); + test("matches the mock span snapshot", testConfig, async () => { + await expect( + formatJsonFileSnapshot(summarize(events)), + ).toMatchFileSnapshot(mockSnapshotPath(options)); + }); + } }); } diff --git a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs index e7243154d..c0506f887 100755 --- a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs +++ b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs @@ -7,8 +7,9 @@ process.stdin.on("data", (chunk) => { input += chunk; }); process.stdin.on("end", () => { - const isStream = input.includes("stream"); - const suffix = isStream ? "STREAM_OK" : "RUN_OK"; + const isStream = + input.includes("OPENAI_CODEX_STREAM_OK") || input.includes("stream"); + const marker = isStream ? "OPENAI_CODEX_STREAM_OK" : "OPENAI_CODEX_RUN_OK"; const threadId = isStream ? "thread_stream" : "thread_run"; const events = [ { type: "thread.started", thread_id: threadId }, @@ -18,7 +19,7 @@ process.stdin.on("end", () => { item: { id: `${threadId}_reasoning_before_command`, type: "reasoning", - text: `reasoning before command ${suffix}`, + text: `reasoning before command ${marker}`, }, }, { @@ -47,7 +48,7 @@ process.stdin.on("end", () => { item: { id: `${threadId}_reasoning_after_command`, type: "reasoning", - text: `reasoning after command ${suffix}`, + text: `reasoning after command ${marker}`, }, }, { @@ -81,7 +82,7 @@ process.stdin.on("end", () => { item: { id: `${threadId}_reasoning_after_mcp`, type: "reasoning", - text: `reasoning after mcp ${suffix}`, + text: `reasoning after mcp ${marker}`, }, }, { @@ -97,7 +98,7 @@ process.stdin.on("end", () => { item: { id: `${threadId}_reasoning`, type: "reasoning", - text: `final reasoning ${suffix}`, + text: `final reasoning ${marker}`, }, }, { @@ -105,7 +106,7 @@ process.stdin.on("end", () => { item: { id: `${threadId}_message`, type: "agent_message", - text: `Codex ${suffix}`, + text: `Codex ${marker}`, }, }, { diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs index c63ae20e6..2444db4a4 100644 --- a/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs @@ -4,6 +4,8 @@ import { runOperation, runTracedScenario, } from "../../helpers/provider-runtime.mjs"; +import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises"; +import os from "node:os"; import path from "node:path"; import { fileURLToPath } from "node:url"; @@ -12,8 +14,82 @@ export const SCENARIO_NAME = "openai-codex-instrumentation"; const SCENARIO_DIR = path.dirname(fileURLToPath(import.meta.url)); const MOCK_CODEX_PATH = path.join(SCENARIO_DIR, "mock-codex-cli.mjs"); +const RUN_MARKER = "OPENAI_CODEX_RUN_OK"; +const STREAM_MARKER = "OPENAI_CODEX_STREAM_OK"; -function createClient(SDK) { +function parseEnvLine(line) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith("#")) { + return; + } + + const withoutExport = trimmed.startsWith("export ") + ? trimmed.slice("export ".length).trim() + : trimmed; + const separator = withoutExport.indexOf("="); + if (separator <= 0) { + return; + } + + const key = withoutExport.slice(0, separator).trim(); + let value = withoutExport.slice(separator + 1).trim(); + if ( + (value.startsWith('"') && value.endsWith('"')) || + (value.startsWith("'") && value.endsWith("'")) + ) { + value = value.slice(1, -1); + } + return { key, value }; +} + +async function loadRootEnv() { + const repoRoot = process.env.BRAINTRUST_E2E_REPO_ROOT; + if (!repoRoot) { + return; + } + + let contents; + try { + contents = await readFile(path.join(repoRoot, ".env"), "utf8"); + } catch { + return; + } + + for (const line of contents.split(/\r?\n/)) { + const parsed = parseEnvLine(line); + if (parsed && process.env[parsed.key] === undefined) { + process.env[parsed.key] = parsed.value; + } + } +} + +function stringEnv() { + return Object.fromEntries( + Object.entries(process.env).filter((entry) => entry[1] !== undefined), + ); +} + +function requireOpenAIKey() { + const apiKey = process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new Error( + "OPENAI_API_KEY is required to run openai-codex-instrumentation against the real Codex SDK", + ); + } + return apiKey; +} + +function scenarioMode() { + const mode = process.env.OPENAI_CODEX_E2E_MODE ?? "mock"; + if (mode !== "mock" && mode !== "real") { + throw new Error( + `OPENAI_CODEX_E2E_MODE must be "mock" or "real", received ${JSON.stringify(mode)}`, + ); + } + return mode; +} + +function createMockClient(SDK) { const { Codex } = SDK; return new Codex({ apiKey: "test-key", @@ -24,49 +100,111 @@ function createClient(SDK) { }); } -function startThread(client) { +function createRealClient(SDK) { + const { Codex } = SDK; + return new Codex({ + apiKey: requireOpenAIKey(), + env: stringEnv(), + }); +} + +function createClient(SDK, mode) { + return mode === "real" ? createRealClient(SDK) : createMockClient(SDK); +} + +function startThread(client, mode, workingDirectory) { return client.startThread({ approvalPolicy: "never", - model: "gpt-5-codex", + model: process.env.OPENAI_CODEX_E2E_MODEL ?? "gpt-5-codex", modelReasoningEffort: "low", networkAccessEnabled: false, - sandboxMode: "danger-full-access", + sandboxMode: mode === "real" ? "workspace-write" : "danger-full-access", + ...(mode === "real" ? { skipGitRepoCheck: true } : {}), webSearchMode: "disabled", - workingDirectory: process.cwd(), + workingDirectory, }); } +async function createWorkspace(marker) { + const workingDirectory = await mkdtemp( + path.join(os.tmpdir(), "braintrust-codex-e2e-"), + ); + await writeFile( + path.join(workingDirectory, "codex-input.txt"), + `The final answer marker is ${marker}.\n`, + "utf8", + ); + return workingDirectory; +} + +function realPrompt(marker) { + return [ + "You are running inside an SDK instrumentation test.", + "Before answering, use the shell to run `cat codex-input.txt`.", + "Then answer in one short sentence.", + `The final response must include the exact marker ${marker}.`, + ].join(" "); +} + +function mockPrompt(marker, operation) { + return `Return Codex ${marker} after using a command in ${operation} mode.`; +} + async function runOpenAICodexScenario({ decorateSDK, sdk }) { + const mode = scenarioMode(); + if (mode === "real") { + await loadRootEnv(); + } const instrumentedSDK = decorateSDK ? decorateSDK(sdk) : sdk; - const client = createClient(instrumentedSDK); - - await runTracedScenario({ - callback: async () => { - await runOperation("openai-codex-run-operation", "run", async () => { - const thread = startThread(client); - await thread.run("Return Codex RUN_OK after using a command."); - }); - - await runOperation( - "openai-codex-run-streamed-operation", - "runStreamed", - async () => { - const thread = startThread(client); - const streamedTurn = await thread.runStreamed( - "Return Codex STREAM_OK after using a command in stream mode.", - ); - await collectAsync(streamedTurn.events); - }, - ); - }, - flushCount: 2, - flushDelayMs: 100, - metadata: { - scenario: SCENARIO_NAME, - }, - projectNameBase: "e2e-openai-codex-instrumentation", - rootName: ROOT_NAME, - }); + const client = createClient(instrumentedSDK, mode); + let runWorkingDirectory = process.cwd(); + let streamedWorkingDirectory = process.cwd(); + const runPrompt = + mode === "real" ? realPrompt(RUN_MARKER) : mockPrompt(RUN_MARKER, "run"); + const streamedPrompt = + mode === "real" + ? realPrompt(STREAM_MARKER) + : mockPrompt(STREAM_MARKER, "stream"); + + try { + if (mode === "real") { + runWorkingDirectory = await createWorkspace(RUN_MARKER); + streamedWorkingDirectory = await createWorkspace(STREAM_MARKER); + } + + await runTracedScenario({ + callback: async () => { + await runOperation("openai-codex-run-operation", "run", async () => { + const thread = startThread(client, mode, runWorkingDirectory); + await thread.run(runPrompt); + }); + + await runOperation( + "openai-codex-run-streamed-operation", + "runStreamed", + async () => { + const thread = startThread(client, mode, streamedWorkingDirectory); + const streamedTurn = await thread.runStreamed(streamedPrompt); + await collectAsync(streamedTurn.events); + }, + ); + }, + flushCount: 2, + flushDelayMs: 100, + metadata: { + scenario: SCENARIO_NAME, + }, + projectNameBase: "e2e-openai-codex-instrumentation", + rootName: ROOT_NAME, + }); + } finally { + if (mode === "real") { + await Promise.allSettled([ + rm(runWorkingDirectory, { force: true, recursive: true }), + rm(streamedWorkingDirectory, { force: true, recursive: true }), + ]); + } + } } export async function runWrappedOpenAICodexInstrumentation(sdk) { diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts index d23b8df49..1f2aeb80e 100644 --- a/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts +++ b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts @@ -9,7 +9,8 @@ import { defineOpenAICodexInstrumentationAssertions } from "./assertions"; const scenarioDir = await prepareScenarioDir({ scenarioDir: resolveScenarioDir(import.meta.url), }); -const TIMEOUT_MS = 120_000; +const TIMEOUT_MS = 240_000; +const CODEX_SCENARIO_MODES = ["mock", "real"] as const; const openAICodexScenario = { autoEntry: "scenario.openai-codex-v0128.mjs", autoSnapshotName: "openai-codex-v0128-auto-hook", @@ -24,36 +25,44 @@ const openAICodexScenario = { }; describe("wrapped instrumentation", () => { - defineOpenAICodexInstrumentationAssertions({ - name: `openai codex sdk ${openAICodexScenario.version}`, - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: openAICodexScenario.wrapperEntry, - runContext: { variantKey: openAICodexScenario.variantKey }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: openAICodexScenario.wrapperSnapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); + for (const mode of CODEX_SCENARIO_MODES) { + defineOpenAICodexInstrumentationAssertions({ + mode, + name: `openai codex sdk ${openAICodexScenario.version} (${mode})`, + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: openAICodexScenario.wrapperEntry, + env: { OPENAI_CODEX_E2E_MODE: mode }, + runContext: { variantKey: openAICodexScenario.variantKey }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: openAICodexScenario.wrapperSnapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + } }); describe("auto-hook instrumentation", () => { - defineOpenAICodexInstrumentationAssertions({ - name: `openai codex sdk ${openAICodexScenario.version}`, - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: openAICodexScenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { variantKey: openAICodexScenario.variantKey }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: openAICodexScenario.autoSnapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); + for (const mode of CODEX_SCENARIO_MODES) { + defineOpenAICodexInstrumentationAssertions({ + mode, + name: `openai codex sdk ${openAICodexScenario.version} (${mode})`, + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: openAICodexScenario.autoEntry, + env: { OPENAI_CODEX_E2E_MODE: mode }, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { variantKey: openAICodexScenario.variantKey }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: openAICodexScenario.autoSnapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + } });