From 74a45bc73da9fefca849eceffcd6df331334c077 Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 15:29:09 -0700
Subject: [PATCH 1/5] feat: Add `@openai/codex-sdk` instrumentation

---
 e2e/config/pr-comment-scenarios.json          |   6 +
 ...nai-codex-v0128-auto-hook.span-events.json | 204 +++++
 ...penai-codex-v0128-wrapped.span-events.json | 204 +++++
 .../assertions.ts                             | 213 ++++++
 .../mock-codex-cli.mjs                        | 101 +++
 .../openai-codex-instrumentation/package.json |  14 +
 .../pnpm-lock.yaml                            |  93 +++
 .../scenario.impl.mjs                         |  83 ++
 .../openai-codex-instrumentation/scenario.mjs |   5 +
 .../scenario.openai-codex-v0128.mjs           |   5 +
 .../scenario.openai-codex-v0128.ts            |   5 +
 .../scenario.test.ts                          |  59 ++
 .../openai-codex-instrumentation/scenario.ts  |   5 +
 .../auto-instrumentations/bundler/plugin.ts   |   2 +
 .../configs/openai-codex.ts                   |  33 +
 js/src/auto-instrumentations/hook.mts         |  10 +
 js/src/auto-instrumentations/index.ts         |   1 +
 js/src/exports.ts                             |   1 +
 .../instrumentation/braintrust-plugin.test.ts |  53 ++
 js/src/instrumentation/braintrust-plugin.ts   |  14 +
 .../plugins/openai-codex-channels.ts          |  29 +
 .../plugins/openai-codex-plugin.ts            | 707 ++++++++++++++++++
 js/src/instrumentation/registry.test.ts       |   1 +
 js/src/instrumentation/registry.ts            |  12 +
 js/src/vendor-sdk-types/openai-codex.ts       | 215 ++++++
 js/src/wrappers/openai-codex.ts               | 186 +++++
 26 files changed, 2261 insertions(+)
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/assertions.ts
 create mode 100755 e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/package.json
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.mjs
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.test.ts
 create mode 100644 e2e/scenarios/openai-codex-instrumentation/scenario.ts
 create mode 100644 js/src/auto-instrumentations/configs/openai-codex.ts
 create mode 100644 js/src/instrumentation/plugins/openai-codex-channels.ts
 create mode 100644 js/src/instrumentation/plugins/openai-codex-plugin.ts
 create mode 100644 js/src/vendor-sdk-types/openai-codex.ts
 create mode 100644 js/src/wrappers/openai-codex.ts

diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json
index 0108b9e78..bfaf885e0 100644
--- a/e2e/config/pr-comment-scenarios.json
+++ b/e2e/config/pr-comment-scenarios.json
@@ -9,6 +9,12 @@
       { "variantKey": "openai-v6", "label": "v6" }
     ]
   },
+  {
+    "scenarioDirName": "openai-codex-instrumentation",
+    "label": "OpenAI Codex Instrumentation",
+    "metadataScenario": "openai-codex-instrumentation",
+    "variants": [{ "variantKey": "openai-codex-v0128", "label": "v0.128" }]
+  },
   {
     "scenarioDirName": "anthropic-instrumentation",
     "label": "Anthropic Instrumentation",
diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
new file mode 100644
index 000000000..7a5fd7ed0
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
@@ -0,0 +1,204 @@
+{
+  "root": {
+    "has_input": false,
+    "has_output": false,
+    "metadata": {
+      "scenario": "openai-codex-instrumentation"
+    },
+    "metric_keys": [],
+    "name": "openai-codex-instrumentation-root",
+    "root_span_id": "<span:1>",
+    "span_id": "<span:1>",
+    "span_parents": [],
+    "type": "task"
+  },
+  "run": {
+    "operation": {
+      "has_input": false,
+      "has_output": false,
+      "metadata": {
+        "operation": "run"
+      },
+      "metric_keys": [],
+      "name": "openai-codex-run-operation",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:2>",
+      "span_parents": [
+        "<span:1>"
+      ],
+      "type": null
+    },
+    "task": {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [
+        "completion_reasoning_tokens",
+        "completion_tokens",
+        "duration",
+        "prompt_cached_tokens",
+        "prompt_tokens",
+        "tokens"
+      ],
+      "name": "OpenAI Codex",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:3>",
+      "span_parents": [
+        "<span:2>"
+      ],
+      "type": "task"
+    }
+  },
+  "streamed": {
+    "operation": {
+      "has_input": false,
+      "has_output": false,
+      "metadata": {
+        "operation": "runStreamed"
+      },
+      "metric_keys": [],
+      "name": "openai-codex-run-streamed-operation",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:4>",
+      "span_parents": [
+        "<span:1>"
+      ],
+      "type": null
+    },
+    "task": {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [
+        "completion_reasoning_tokens",
+        "completion_tokens",
+        "duration",
+        "prompt_cached_tokens",
+        "prompt_tokens",
+        "tokens"
+      ],
+      "name": "OpenAI Codex",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:5>",
+      "span_parents": [
+        "<span:4>"
+      ],
+      "type": "task"
+    }
+  },
+  "tools": [
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "command_execution",
+        "openai_codex.command.status": "completed",
+        "openai_codex.item_type": "command_execution"
+      },
+      "metric_keys": [],
+      "name": "tool: command_execution",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:6>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "read_file",
+        "openai_codex.item_type": "mcp_tool_call",
+        "openai_codex.mcp.server": "filesystem",
+        "openai_codex.mcp.status": "completed"
+      },
+      "metric_keys": [],
+      "name": "tool: read_file",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:7>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": false,
+      "metadata": {
+        "gen_ai.tool.name": "web_search",
+        "openai_codex.item_type": "web_search"
+      },
+      "metric_keys": [],
+      "name": "tool: web_search",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:8>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "command_execution",
+        "openai_codex.command.status": "completed",
+        "openai_codex.item_type": "command_execution"
+      },
+      "metric_keys": [],
+      "name": "tool: command_execution",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:9>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "read_file",
+        "openai_codex.item_type": "mcp_tool_call",
+        "openai_codex.mcp.server": "filesystem",
+        "openai_codex.mcp.status": "completed"
+      },
+      "metric_keys": [],
+      "name": "tool: read_file",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:10>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": false,
+      "metadata": {
+        "gen_ai.tool.name": "web_search",
+        "openai_codex.item_type": "web_search"
+      },
+      "metric_keys": [],
+      "name": "tool: web_search",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:11>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "tool"
+    }
+  ]
+}
diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
new file mode 100644
index 000000000..7a5fd7ed0
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
@@ -0,0 +1,204 @@
+{
+  "root": {
+    "has_input": false,
+    "has_output": false,
+    "metadata": {
+      "scenario": "openai-codex-instrumentation"
+    },
+    "metric_keys": [],
+    "name": "openai-codex-instrumentation-root",
+    "root_span_id": "<span:1>",
+    "span_id": "<span:1>",
+    "span_parents": [],
+    "type": "task"
+  },
+  "run": {
+    "operation": {
+      "has_input": false,
+      "has_output": false,
+      "metadata": {
+        "operation": "run"
+      },
+      "metric_keys": [],
+      "name": "openai-codex-run-operation",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:2>",
+      "span_parents": [
+        "<span:1>"
+      ],
+      "type": null
+    },
+    "task": {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [
+        "completion_reasoning_tokens",
+        "completion_tokens",
+        "duration",
+        "prompt_cached_tokens",
+        "prompt_tokens",
+        "tokens"
+      ],
+      "name": "OpenAI Codex",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:3>",
+      "span_parents": [
+        "<span:2>"
+      ],
+      "type": "task"
+    }
+  },
+  "streamed": {
+    "operation": {
+      "has_input": false,
+      "has_output": false,
+      "metadata": {
+        "operation": "runStreamed"
+      },
+      "metric_keys": [],
+      "name": "openai-codex-run-streamed-operation",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:4>",
+      "span_parents": [
+        "<span:1>"
+      ],
+      "type": null
+    },
+    "task": {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [
+        "completion_reasoning_tokens",
+        "completion_tokens",
+        "duration",
+        "prompt_cached_tokens",
+        "prompt_tokens",
+        "tokens"
+      ],
+      "name": "OpenAI Codex",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:5>",
+      "span_parents": [
+        "<span:4>"
+      ],
+      "type": "task"
+    }
+  },
+  "tools": [
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "command_execution",
+        "openai_codex.command.status": "completed",
+        "openai_codex.item_type": "command_execution"
+      },
+      "metric_keys": [],
+      "name": "tool: command_execution",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:6>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "read_file",
+        "openai_codex.item_type": "mcp_tool_call",
+        "openai_codex.mcp.server": "filesystem",
+        "openai_codex.mcp.status": "completed"
+      },
+      "metric_keys": [],
+      "name": "tool: read_file",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:7>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": false,
+      "metadata": {
+        "gen_ai.tool.name": "web_search",
+        "openai_codex.item_type": "web_search"
+      },
+      "metric_keys": [],
+      "name": "tool: web_search",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:8>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "command_execution",
+        "openai_codex.command.status": "completed",
+        "openai_codex.item_type": "command_execution"
+      },
+      "metric_keys": [],
+      "name": "tool: command_execution",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:9>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "gen_ai.tool.name": "read_file",
+        "openai_codex.item_type": "mcp_tool_call",
+        "openai_codex.mcp.server": "filesystem",
+        "openai_codex.mcp.status": "completed"
+      },
+      "metric_keys": [],
+      "name": "tool: read_file",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:10>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "tool"
+    },
+    {
+      "has_input": true,
+      "has_output": false,
+      "metadata": {
+        "gen_ai.tool.name": "web_search",
+        "openai_codex.item_type": "web_search"
+      },
+      "metric_keys": [],
+      "name": "tool: web_search",
+      "root_span_id": "<span:1>",
+      "span_id": "<span:11>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "tool"
+    }
+  ]
+}
diff --git a/e2e/scenarios/openai-codex-instrumentation/assertions.ts b/e2e/scenarios/openai-codex-instrumentation/assertions.ts
new file mode 100644
index 000000000..4d54f6f21
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/assertions.ts
@@ -0,0 +1,213 @@
+import { beforeAll, describe, expect, test } from "vitest";
+import { E2E_TAGS } from "../../helpers/tags";
+import { normalizeForSnapshot, type Json } from "../../helpers/normalize";
+import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server";
+import {
+  formatJsonFileSnapshot,
+  resolveFileSnapshotPath,
+} from "../../helpers/file-snapshot";
+import { withScenarioHarness } from "../../helpers/scenario-harness";
+import { findLatestSpan } from "../../helpers/trace-selectors";
+import { summarizeWrapperContract } from "../../helpers/wrapper-contract";
+import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs";
+
+type RunOpenAICodexScenario = (harness: {
+  runNodeScenarioDir: (options: {
+    entry: string;
+    nodeArgs: string[];
+    runContext?: { variantKey: string };
+    scenarioDir: string;
+    timeoutMs: number;
+  }) => Promise<unknown>;
+  runScenarioDir: (options: {
+    entry: string;
+    runContext?: { variantKey: string };
+    scenarioDir: string;
+    timeoutMs: number;
+  }) => Promise<unknown>;
+}) => Promise<void>;
+
+const METADATA_KEYS = [
+  "provider",
+  "model",
+  "operation",
+  "scenario",
+  "gen_ai.tool.name",
+  "openai_codex.operation",
+  "openai_codex.model",
+  "openai_codex.thread_id",
+  "openai_codex.item_type",
+  "openai_codex.command.status",
+  "openai_codex.mcp.server",
+  "openai_codex.mcp.status",
+] as const;
+
+function summarizeSpan(event: CapturedLogEvent | undefined): Json {
+  if (!event) {
+    return null;
+  }
+  const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record<
+    string,
+    Json
+  >;
+  if (summary.metadata && typeof summary.metadata === "object") {
+    const metadata = summary.metadata as Record<string, Json>;
+    if (typeof metadata["openai_codex.thread_id"] === "string") {
+      metadata["openai_codex.thread_id"] = "<thread-id>";
+    }
+  }
+  return summary;
+}
+
+function findCodexTask(events: CapturedLogEvent[], operationName: string) {
+  const operation = findLatestSpan(events, operationName);
+  return [...events]
+    .reverse()
+    .find(
+      (event) =>
+        event.span.name === "OpenAI Codex" &&
+        event.span.parentIds.includes(operation?.span.id ?? ""),
+    );
+}
+
+function latestSpansByType(
+  events: CapturedLogEvent[],
+  type: string,
+): CapturedLogEvent[] {
+  const order: string[] = [];
+  const latest = new Map<string, CapturedLogEvent>();
+
+  for (const event of events) {
+    if (event.span.type !== type || !event.span.id) {
+      continue;
+    }
+    if (!latest.has(event.span.id)) {
+      order.push(event.span.id);
+    }
+    latest.set(event.span.id, event);
+  }
+
+  return order.flatMap((spanId) => {
+    const event = latest.get(spanId);
+    return event ? [event] : [];
+  });
+}
+
+function summarize(events: CapturedLogEvent[]): Json {
+  const runTask = findCodexTask(events, "openai-codex-run-operation");
+  const streamedTask = findCodexTask(
+    events,
+    "openai-codex-run-streamed-operation",
+  );
+  const toolSpans = latestSpansByType(events, "tool");
+
+  return normalizeForSnapshot({
+    root: summarizeSpan(findLatestSpan(events, ROOT_NAME)),
+    run: {
+      operation: summarizeSpan(
+        findLatestSpan(events, "openai-codex-run-operation"),
+      ),
+      task: summarizeSpan(runTask),
+    },
+    streamed: {
+      operation: summarizeSpan(
+        findLatestSpan(events, "openai-codex-run-streamed-operation"),
+      ),
+      task: summarizeSpan(streamedTask),
+    },
+    tools: toolSpans.map(summarizeSpan),
+  } as Json);
+}
+
+export function defineOpenAICodexInstrumentationAssertions(options: {
+  name: string;
+  runScenario: RunOpenAICodexScenario;
+  snapshotName: string;
+  testFileUrl: string;
+  timeoutMs: number;
+}): void {
+  const snapshotPath = resolveFileSnapshotPath(
+    options.testFileUrl,
+    `${options.snapshotName}.span-events.json`,
+  );
+  const testConfig = {
+    tags: [E2E_TAGS.hermetic],
+    timeout: options.timeoutMs,
+  };
+
+  describe(options.name, () => {
+    let events: CapturedLogEvent[] = [];
+
+    beforeAll(async () => {
+      await withScenarioHarness(async (harness) => {
+        await options.runScenario(harness);
+        events = harness.events();
+      });
+    }, options.timeoutMs);
+
+    test("captures the root trace", testConfig, () => {
+      const root = findLatestSpan(events, ROOT_NAME);
+
+      expect(root).toBeDefined();
+      expect(root?.row.metadata).toMatchObject({ scenario: SCENARIO_NAME });
+    });
+
+    test("captures Codex task spans", testConfig, () => {
+      for (const operationName of [
+        "openai-codex-run-operation",
+        "openai-codex-run-streamed-operation",
+      ]) {
+        const operation = findLatestSpan(events, operationName);
+        const task = findCodexTask(events, operationName);
+
+        expect(operation).toBeDefined();
+        expect(task).toBeDefined();
+        expect(task?.span.parentIds).toEqual([operation?.span.id ?? ""]);
+        expect(task?.row.metadata).toMatchObject({
+          provider: "openai",
+        });
+      }
+    });
+
+    test("captures command and MCP tool spans", testConfig, () => {
+      const toolSpans = latestSpansByType(events, "tool");
+
+      expect(
+        toolSpans.some(
+          (event) =>
+            event.span.name === "tool: command_execution" &&
+            event.output === "codex_tool_ok",
+        ),
+      ).toBe(true);
+      expect(
+        toolSpans.some(
+          (event) =>
+            event.span.name === "tool: read_file" &&
+            event.metadata?.["openai_codex.mcp.server"] === "filesystem",
+        ),
+      ).toBe(true);
+    });
+
+    test("captures final responses and usage metrics", testConfig, () => {
+      const runTask = findCodexTask(events, "openai-codex-run-operation");
+      const streamedTask = findCodexTask(
+        events,
+        "openai-codex-run-streamed-operation",
+      );
+
+      expect(runTask?.output).toContain("RUN_OK");
+      expect(streamedTask?.output).toContain("STREAM_OK");
+      expect(runTask?.metrics).toMatchObject({
+        completion_tokens: 7,
+        prompt_cached_tokens: 3,
+        prompt_tokens: 11,
+      });
+    });
+
+    test("matches the shared span snapshot", testConfig, async () => {
+      await expect(
+        formatJsonFileSnapshot(summarize(events)),
+      ).toMatchFileSnapshot(snapshotPath);
+    });
+  });
+}
diff --git a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
new file mode 100755
index 000000000..510abdf04
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
@@ -0,0 +1,101 @@
+#!/usr/bin/env node
+
+let input = "";
+
+process.stdin.setEncoding("utf8");
+process.stdin.on("data", (chunk) => {
+  input += chunk;
+});
+process.stdin.on("end", () => {
+  const isStream = input.includes("stream");
+  const suffix = isStream ? "STREAM_OK" : "RUN_OK";
+  const threadId = isStream ? "thread_stream" : "thread_run";
+  const events = [
+    { type: "thread.started", thread_id: threadId },
+    { type: "turn.started" },
+    {
+      type: "item.started",
+      item: {
+        id: `${threadId}_command`,
+        type: "command_execution",
+        command: "printf codex_tool_ok",
+        aggregated_output: "",
+        status: "in_progress",
+      },
+    },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_command`,
+        type: "command_execution",
+        command: "printf codex_tool_ok",
+        aggregated_output: "codex_tool_ok",
+        exit_code: 0,
+        status: "completed",
+      },
+    },
+    {
+      type: "item.started",
+      item: {
+        id: `${threadId}_mcp`,
+        type: "mcp_tool_call",
+        server: "filesystem",
+        tool: "read_file",
+        arguments: { path: "README.md" },
+        status: "in_progress",
+      },
+    },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_mcp`,
+        type: "mcp_tool_call",
+        server: "filesystem",
+        tool: "read_file",
+        arguments: { path: "README.md" },
+        result: {
+          content: [{ type: "text", text: "mock file" }],
+          structured_content: { ok: true },
+        },
+        status: "completed",
+      },
+    },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_web`,
+        type: "web_search",
+        query: "braintrust codex instrumentation",
+      },
+    },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_reasoning`,
+        type: "reasoning",
+        text: `reasoning ${suffix}`,
+      },
+    },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_message`,
+        type: "agent_message",
+        text: `Codex ${suffix}`,
+      },
+    },
+    {
+      type: "turn.completed",
+      usage: {
+        input_tokens: 11,
+        cached_input_tokens: 3,
+        output_tokens: 7,
+        reasoning_output_tokens: 5,
+      },
+    },
+  ];
+
+  for (const event of events) {
+    process.stdout.write(`${JSON.stringify(event)}\n`);
+  }
+});
diff --git a/e2e/scenarios/openai-codex-instrumentation/package.json b/e2e/scenarios/openai-codex-instrumentation/package.json
new file mode 100644
index 000000000..babaee877
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/package.json
@@ -0,0 +1,14 @@
+{
+  "name": "@braintrust/e2e-openai-codex-instrumentation",
+  "private": true,
+  "braintrustScenario": {
+    "canary": {
+      "dependencies": {
+        "openai-codex-sdk-v0128": "@openai/codex-sdk@latest"
+      }
+    }
+  },
+  "dependencies": {
+    "openai-codex-sdk-v0128": "npm:@openai/codex-sdk@0.128.0"
+  }
+}
diff --git a/e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml b/e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml
new file mode 100644
index 000000000..015850049
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/pnpm-lock.yaml
@@ -0,0 +1,93 @@
+lockfileVersion: '9.0'
+
+settings:
+  autoInstallPeers: true
+  excludeLinksFromLockfile: false
+
+importers:
+
+  .:
+    dependencies:
+      openai-codex-sdk-v0128:
+        specifier: npm:@openai/codex-sdk@0.128.0
+        version: '@openai/codex-sdk@0.128.0'
+
+packages:
+
+  '@openai/codex-sdk@0.128.0':
+    resolution: {integrity: sha512-Eao0LLA5x90qwU6SXYd21h4KxdCef1WpCvHFgKdbqzWMJ79lUvguGDGvx1RheP+zTdKGxJfJ6dulI5wSXoUBhQ==}
+    engines: {node: '>=18'}
+
+  '@openai/codex@0.128.0':
+    resolution: {integrity: sha512-+xp6ODmFfBNnexIWRHApEaPXot2j6gyM8A5we/5IS/uY4eYHj4arETct4hQ5M4eO+MK7JY3ZU4xhuobhlysr0A==}
+    engines: {node: '>=16'}
+    hasBin: true
+
+  '@openai/codex@0.128.0-darwin-arm64':
+    resolution: {integrity: sha512-w+6zohfHx/kHBdles/CyFKaY57u9I3nK8QI9+NrdwMliKA0b7xn13yblRNkMpe09j6vL1oAWoxYsMOQ/vjBGug==}
+    engines: {node: '>=16'}
+    cpu: [arm64]
+    os: [darwin]
+
+  '@openai/codex@0.128.0-darwin-x64':
+    resolution: {integrity: sha512-SDbn6fO22Puy8xmMIbZi4f2znMrUEPwABApke4mo+4ihaauwuVjeqzXvW5SPJz5ty/bG11/mSupQgReT7T8BBw==}
+    engines: {node: '>=16'}
+    cpu: [x64]
+    os: [darwin]
+
+  '@openai/codex@0.128.0-linux-arm64':
+    resolution: {integrity: sha512-+SvH73H60qvCXFuQGP/EsmR//s1hHMBR22PvJkXvM/hdnTIGucx+JqRUjAWdmmQ1IU6j3kgwVvdLW/6ICB+M6w==}
+    engines: {node: '>=16'}
+    cpu: [arm64]
+    os: [linux]
+
+  '@openai/codex@0.128.0-linux-x64':
+    resolution: {integrity: sha512-2lnSPA05CRRuKAzFW8BCmmNCSieDcToLwfC2ALLbBYilGLgzhRibjlDglK9F1BkEzfohSSWJu4PBbRu/aG60lQ==}
+    engines: {node: '>=16'}
+    cpu: [x64]
+    os: [linux]
+
+  '@openai/codex@0.128.0-win32-arm64':
+    resolution: {integrity: sha512-ECJvsqmYFdA9pn42xxK3Odp/G16AjmBW0BglX8L0PwPjqbstbmlew9bfHf7xvL+SNfNl4NmyotW0+RNo1phgaA==}
+    engines: {node: '>=16'}
+    cpu: [arm64]
+    os: [win32]
+
+  '@openai/codex@0.128.0-win32-x64':
+    resolution: {integrity: sha512-k3jmUAFrzkUtvjGTXvSKjQqJLLlzjxp/VoHJDYedgmXUn6j70HxK38IwapzmnYfiBiTuzETvGwjXHzZgzKjhoQ==}
+    engines: {node: '>=16'}
+    cpu: [x64]
+    os: [win32]
+
+snapshots:
+
+  '@openai/codex-sdk@0.128.0':
+    dependencies:
+      '@openai/codex': 0.128.0
+
+  '@openai/codex@0.128.0':
+    optionalDependencies:
+      '@openai/codex-darwin-arm64': '@openai/codex@0.128.0-darwin-arm64'
+      '@openai/codex-darwin-x64': '@openai/codex@0.128.0-darwin-x64'
+      '@openai/codex-linux-arm64': '@openai/codex@0.128.0-linux-arm64'
+      '@openai/codex-linux-x64': '@openai/codex@0.128.0-linux-x64'
+      '@openai/codex-win32-arm64': '@openai/codex@0.128.0-win32-arm64'
+      '@openai/codex-win32-x64': '@openai/codex@0.128.0-win32-x64'
+
+  '@openai/codex@0.128.0-darwin-arm64':
+    optional: true
+
+  '@openai/codex@0.128.0-darwin-x64':
+    optional: true
+
+  '@openai/codex@0.128.0-linux-arm64':
+    optional: true
+
+  '@openai/codex@0.128.0-linux-x64':
+    optional: true
+
+  '@openai/codex@0.128.0-win32-arm64':
+    optional: true
+
+  '@openai/codex@0.128.0-win32-x64':
+    optional: true
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs
new file mode 100644
index 000000000..c63ae20e6
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs
@@ -0,0 +1,83 @@
+import { wrapOpenAICodexSDK } from "braintrust";
+import {
+  collectAsync,
+  runOperation,
+  runTracedScenario,
+} from "../../helpers/provider-runtime.mjs";
+import path from "node:path";
+import { fileURLToPath } from "node:url";
+
+export const ROOT_NAME = "openai-codex-instrumentation-root";
+export const SCENARIO_NAME = "openai-codex-instrumentation";
+
+const SCENARIO_DIR = path.dirname(fileURLToPath(import.meta.url));
+const MOCK_CODEX_PATH = path.join(SCENARIO_DIR, "mock-codex-cli.mjs");
+
+function createClient(SDK) {
+  const { Codex } = SDK;
+  return new Codex({
+    apiKey: "test-key",
+    codexPathOverride: MOCK_CODEX_PATH,
+    env: {
+      PATH: process.env.PATH ?? "",
+    },
+  });
+}
+
+function startThread(client) {
+  return client.startThread({
+    approvalPolicy: "never",
+    model: "gpt-5-codex",
+    modelReasoningEffort: "low",
+    networkAccessEnabled: false,
+    sandboxMode: "danger-full-access",
+    webSearchMode: "disabled",
+    workingDirectory: process.cwd(),
+  });
+}
+
+async function runOpenAICodexScenario({ decorateSDK, sdk }) {
+  const instrumentedSDK = decorateSDK ? decorateSDK(sdk) : sdk;
+  const client = createClient(instrumentedSDK);
+
+  await runTracedScenario({
+    callback: async () => {
+      await runOperation("openai-codex-run-operation", "run", async () => {
+        const thread = startThread(client);
+        await thread.run("Return Codex RUN_OK after using a command.");
+      });
+
+      await runOperation(
+        "openai-codex-run-streamed-operation",
+        "runStreamed",
+        async () => {
+          const thread = startThread(client);
+          const streamedTurn = await thread.runStreamed(
+            "Return Codex STREAM_OK after using a command in stream mode.",
+          );
+          await collectAsync(streamedTurn.events);
+        },
+      );
+    },
+    flushCount: 2,
+    flushDelayMs: 100,
+    metadata: {
+      scenario: SCENARIO_NAME,
+    },
+    projectNameBase: "e2e-openai-codex-instrumentation",
+    rootName: ROOT_NAME,
+  });
+}
+
+export async function runWrappedOpenAICodexInstrumentation(sdk) {
+  await runOpenAICodexScenario({
+    decorateSDK: wrapOpenAICodexSDK,
+    sdk,
+  });
+}
+
+export async function runAutoOpenAICodexInstrumentation(sdk) {
+  await runOpenAICodexScenario({
+    sdk,
+  });
+}
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.mjs
new file mode 100644
index 000000000..d7cbe608d
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.mjs
@@ -0,0 +1,5 @@
+import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js";
+import { runMain } from "../../helpers/provider-runtime.mjs";
+import { runAutoOpenAICodexInstrumentation } from "./scenario.impl.mjs";
+
+runMain(() => runAutoOpenAICodexInstrumentation(OpenAICodexSDK));
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs
new file mode 100644
index 000000000..d7cbe608d
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.mjs
@@ -0,0 +1,5 @@
+import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js";
+import { runMain } from "../../helpers/provider-runtime.mjs";
+import { runAutoOpenAICodexInstrumentation } from "./scenario.impl.mjs";
+
+runMain(() => runAutoOpenAICodexInstrumentation(OpenAICodexSDK));
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts
new file mode 100644
index 000000000..c502c8b44
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.openai-codex-v0128.ts
@@ -0,0 +1,5 @@
+import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js";
+import { runMain } from "../../helpers/provider-runtime.mjs";
+import { runWrappedOpenAICodexInstrumentation } from "./scenario.impl.mjs";
+
+runMain(() => runWrappedOpenAICodexInstrumentation(OpenAICodexSDK));
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts
new file mode 100644
index 000000000..d23b8df49
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts
@@ -0,0 +1,59 @@
+import { describe } from "vitest";
+import {
+  prepareScenarioDir,
+  readInstalledPackageVersion,
+  resolveScenarioDir,
+} from "../../helpers/scenario-harness";
+import { defineOpenAICodexInstrumentationAssertions } from "./assertions";
+
+const scenarioDir = await prepareScenarioDir({
+  scenarioDir: resolveScenarioDir(import.meta.url),
+});
+const TIMEOUT_MS = 120_000;
+const openAICodexScenario = {
+  autoEntry: "scenario.openai-codex-v0128.mjs",
+  autoSnapshotName: "openai-codex-v0128-auto-hook",
+  dependencyName: "openai-codex-sdk-v0128",
+  version: await readInstalledPackageVersion(
+    scenarioDir,
+    "openai-codex-sdk-v0128",
+  ),
+  wrapperEntry: "scenario.openai-codex-v0128.ts",
+  wrapperSnapshotName: "openai-codex-v0128-wrapped",
+  variantKey: "openai-codex-v0128",
+};
+
+describe("wrapped instrumentation", () => {
+  defineOpenAICodexInstrumentationAssertions({
+    name: `openai codex sdk ${openAICodexScenario.version}`,
+    runScenario: async ({ runScenarioDir }) => {
+      await runScenarioDir({
+        entry: openAICodexScenario.wrapperEntry,
+        runContext: { variantKey: openAICodexScenario.variantKey },
+        scenarioDir,
+        timeoutMs: TIMEOUT_MS,
+      });
+    },
+    snapshotName: openAICodexScenario.wrapperSnapshotName,
+    testFileUrl: import.meta.url,
+    timeoutMs: TIMEOUT_MS,
+  });
+});
+
+describe("auto-hook instrumentation", () => {
+  defineOpenAICodexInstrumentationAssertions({
+    name: `openai codex sdk ${openAICodexScenario.version}`,
+    runScenario: async ({ runNodeScenarioDir }) => {
+      await runNodeScenarioDir({
+        entry: openAICodexScenario.autoEntry,
+        nodeArgs: ["--import", "braintrust/hook.mjs"],
+        runContext: { variantKey: openAICodexScenario.variantKey },
+        scenarioDir,
+        timeoutMs: TIMEOUT_MS,
+      });
+    },
+    snapshotName: openAICodexScenario.autoSnapshotName,
+    testFileUrl: import.meta.url,
+    timeoutMs: TIMEOUT_MS,
+  });
+});
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.ts
new file mode 100644
index 000000000..c502c8b44
--- /dev/null
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.ts
@@ -0,0 +1,5 @@
+import * as OpenAICodexSDK from "./node_modules/openai-codex-sdk-v0128/dist/index.js";
+import { runMain } from "../../helpers/provider-runtime.mjs";
+import { runWrappedOpenAICodexInstrumentation } from "./scenario.impl.mjs";
+
+runMain(() => runWrappedOpenAICodexInstrumentation(OpenAICodexSDK));
diff --git a/js/src/auto-instrumentations/bundler/plugin.ts b/js/src/auto-instrumentations/bundler/plugin.ts
index 2462c5faf..df4ec1052 100644
--- a/js/src/auto-instrumentations/bundler/plugin.ts
+++ b/js/src/auto-instrumentations/bundler/plugin.ts
@@ -21,6 +21,7 @@ import { readFileSync } from "fs";
 import { fileURLToPath } from "url";
 import moduleDetailsFromPath from "module-details-from-path";
 import { openaiConfigs } from "../configs/openai";
+import { openAICodexConfigs } from "../configs/openai-codex";
 import { anthropicConfigs } from "../configs/anthropic";
 import { aiSDKConfigs } from "../configs/ai-sdk";
 import { claudeAgentSDKConfigs } from "../configs/claude-agent-sdk";
@@ -74,6 +75,7 @@ function getModuleVersion(basedir: string): string | undefined {
 export const unplugin = createUnplugin<BundlerPluginOptions>((options = {}) => {
   const allInstrumentations = [
     ...openaiConfigs,
+    ...openAICodexConfigs,
     ...anthropicConfigs,
     ...aiSDKConfigs,
     ...claudeAgentSDKConfigs,
diff --git a/js/src/auto-instrumentations/configs/openai-codex.ts b/js/src/auto-instrumentations/configs/openai-codex.ts
new file mode 100644
index 000000000..adf9046a3
--- /dev/null
+++ b/js/src/auto-instrumentations/configs/openai-codex.ts
@@ -0,0 +1,33 @@
+import type { InstrumentationConfig } from "@apm-js-collab/code-transformer";
+import { openAICodexChannels } from "../../instrumentation/plugins/openai-codex-channels";
+
+const openAICodexVersionRange = ">=0.128.0 <1.0.0";
+
+export const openAICodexConfigs: InstrumentationConfig[] = [
+  {
+    channelName: openAICodexChannels.run.channelName,
+    module: {
+      name: "@openai/codex-sdk",
+      versionRange: openAICodexVersionRange,
+      filePath: "dist/index.js",
+    },
+    functionQuery: {
+      className: "Thread",
+      methodName: "run",
+      kind: "Async",
+    },
+  },
+  {
+    channelName: openAICodexChannels.runStreamed.channelName,
+    module: {
+      name: "@openai/codex-sdk",
+      versionRange: openAICodexVersionRange,
+      filePath: "dist/index.js",
+    },
+    functionQuery: {
+      className: "Thread",
+      methodName: "runStreamed",
+      kind: "Async",
+    },
+  },
+];
diff --git a/js/src/auto-instrumentations/hook.mts b/js/src/auto-instrumentations/hook.mts
index 46fe0f0d2..f292b684f 100644
--- a/js/src/auto-instrumentations/hook.mts
+++ b/js/src/auto-instrumentations/hook.mts
@@ -15,6 +15,7 @@
 
 import { register } from "node:module";
 import { openaiConfigs } from "./configs/openai.js";
+import { openAICodexConfigs } from "./configs/openai-codex.js";
 import { anthropicConfigs } from "./configs/anthropic.js";
 import { aiSDKConfigs } from "./configs/ai-sdk.js";
 import { claudeAgentSDKConfigs } from "./configs/claude-agent-sdk.js";
@@ -62,6 +63,15 @@ const disabledIntegrations = readDisabledIntegrations();
 // transformation and runtime plugins stay aligned.
 const allConfigs = [
   ...(isDisabled(disabledIntegrations, "openai") ? [] : openaiConfigs),
+  ...(isDisabled(
+    disabledIntegrations,
+    "openai-codex",
+    "openai-codex-sdk",
+    "codex",
+    "codex-sdk",
+  )
+    ? []
+    : openAICodexConfigs),
   ...(isDisabled(disabledIntegrations, "anthropic") ? [] : anthropicConfigs),
   ...(isDisabled(disabledIntegrations, "aisdk", "ai-sdk", "vercel-ai")
     ? []
diff --git a/js/src/auto-instrumentations/index.ts b/js/src/auto-instrumentations/index.ts
index bdac954ab..6a5eb850f 100644
--- a/js/src/auto-instrumentations/index.ts
+++ b/js/src/auto-instrumentations/index.ts
@@ -29,6 +29,7 @@
  */
 
 export { openaiConfigs } from "./configs/openai";
+export { openAICodexConfigs } from "./configs/openai-codex";
 export { anthropicConfigs } from "./configs/anthropic";
 export { aiSDKConfigs } from "./configs/ai-sdk";
 export { claudeAgentSDKConfigs } from "./configs/claude-agent-sdk";
diff --git a/js/src/exports.ts b/js/src/exports.ts
index 02dadbf66..21e057ff1 100644
--- a/js/src/exports.ts
+++ b/js/src/exports.ts
@@ -177,6 +177,7 @@ export {
 export { wrapAnthropic } from "./wrappers/anthropic";
 export { wrapMastraAgent } from "./wrappers/mastra";
 export { wrapClaudeAgentSDK } from "./wrappers/claude-agent-sdk/claude-agent-sdk";
+export { wrapOpenAICodexSDK } from "./wrappers/openai-codex";
 export { wrapCursorSDK } from "./wrappers/cursor-sdk";
 export { wrapGoogleGenAI } from "./wrappers/google-genai";
 export { wrapGoogleADK } from "./wrappers/google-adk";
diff --git a/js/src/instrumentation/braintrust-plugin.test.ts b/js/src/instrumentation/braintrust-plugin.test.ts
index aec836b66..c537cb601 100644
--- a/js/src/instrumentation/braintrust-plugin.test.ts
+++ b/js/src/instrumentation/braintrust-plugin.test.ts
@@ -1,6 +1,7 @@
 import { describe, it, expect, vi, beforeEach } from "vitest";
 import { BraintrustPlugin } from "./braintrust-plugin";
 import { OpenAIPlugin } from "./plugins/openai-plugin";
+import { OpenAICodexPlugin } from "./plugins/openai-codex-plugin";
 import { AnthropicPlugin } from "./plugins/anthropic-plugin";
 import { AISDKPlugin } from "./plugins/ai-sdk-plugin";
 import { ClaudeAgentSDKPlugin } from "./plugins/claude-agent-sdk-plugin";
@@ -37,6 +38,10 @@ vi.mock("./plugins/anthropic-plugin", () => ({
   AnthropicPlugin: createPluginClassMock(),
 }));
 
+vi.mock("./plugins/openai-codex-plugin", () => ({
+  OpenAICodexPlugin: createPluginClassMock(),
+}));
+
 vi.mock("./plugins/ai-sdk-plugin", () => ({
   AISDKPlugin: createPluginClassMock(),
 }));
@@ -97,6 +102,15 @@ describe("BraintrustPlugin", () => {
       expect(mockInstance.enable).toHaveBeenCalledTimes(1);
     });
 
+    it("should create and enable OpenAI Codex plugin by default", () => {
+      const plugin = new BraintrustPlugin();
+      plugin.enable();
+
+      expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1);
+      const mockInstance = vi.mocked(OpenAICodexPlugin).mock.results[0].value;
+      expect(mockInstance.enable).toHaveBeenCalledTimes(1);
+    });
+
     it("should create and enable AI SDK plugin by default", () => {
       const plugin = new BraintrustPlugin();
       plugin.enable();
@@ -186,6 +200,7 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1);
       expect(AnthropicPlugin).toHaveBeenCalledTimes(1);
       expect(AISDKPlugin).toHaveBeenCalledTimes(1);
       expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1);
@@ -203,6 +218,7 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1);
       expect(AnthropicPlugin).toHaveBeenCalledTimes(1);
       expect(AISDKPlugin).toHaveBeenCalledTimes(1);
       expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1);
@@ -220,6 +236,7 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1);
       expect(AnthropicPlugin).toHaveBeenCalledTimes(1);
       expect(AISDKPlugin).toHaveBeenCalledTimes(1);
       expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1);
@@ -260,6 +277,7 @@ describe("BraintrustPlugin", () => {
       expect(AnthropicPlugin).not.toHaveBeenCalled();
       // Other plugins should still be created
       expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1);
       expect(AISDKPlugin).toHaveBeenCalledTimes(1);
       expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1);
       expect(GoogleGenAIPlugin).toHaveBeenCalledTimes(1);
@@ -268,6 +286,30 @@ describe("BraintrustPlugin", () => {
       expect(OpenRouterAgentPlugin).toHaveBeenCalledTimes(1);
     });
 
+    it("should not create OpenAI Codex plugin when openaiCodex: false", () => {
+      const plugin = new BraintrustPlugin({
+        integrations: { openaiCodex: false },
+      });
+      plugin.enable();
+
+      expect(OpenAICodexPlugin).not.toHaveBeenCalled();
+      expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(AnthropicPlugin).toHaveBeenCalledTimes(1);
+      expect(AISDKPlugin).toHaveBeenCalledTimes(1);
+    });
+
+    it("should not create OpenAI Codex plugin when codex: false", () => {
+      const plugin = new BraintrustPlugin({
+        integrations: { codex: false },
+      });
+      plugin.enable();
+
+      expect(OpenAICodexPlugin).not.toHaveBeenCalled();
+      expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(AnthropicPlugin).toHaveBeenCalledTimes(1);
+      expect(AISDKPlugin).toHaveBeenCalledTimes(1);
+    });
+
     it("should not create AI SDK plugin when aisdk: false", () => {
       const plugin = new BraintrustPlugin({
         integrations: { aisdk: false },
@@ -417,6 +459,8 @@ describe("BraintrustPlugin", () => {
       const plugin = new BraintrustPlugin({
         integrations: {
           openai: false,
+          openaiCodex: false,
+          codex: false,
           anthropic: false,
           aisdk: false,
           claudeAgentSDK: false,
@@ -432,6 +476,7 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       expect(OpenAIPlugin).not.toHaveBeenCalled();
+      expect(OpenAICodexPlugin).not.toHaveBeenCalled();
       expect(AnthropicPlugin).not.toHaveBeenCalled();
       expect(AISDKPlugin).not.toHaveBeenCalled();
       expect(ClaudeAgentSDKPlugin).not.toHaveBeenCalled();
@@ -567,6 +612,8 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       const openaiMock = vi.mocked(OpenAIPlugin).mock.results[0].value;
+      const openAICodexMock =
+        vi.mocked(OpenAICodexPlugin).mock.results[0].value;
       const anthropicMock = vi.mocked(AnthropicPlugin).mock.results[0].value;
       const aiSDKMock = vi.mocked(AISDKPlugin).mock.results[0].value;
       const claudeAgentSDKMock =
@@ -583,6 +630,7 @@ describe("BraintrustPlugin", () => {
       const groqMock = vi.mocked(GroqPlugin).mock.results[0].value;
 
       expect(openaiMock.enable).toHaveBeenCalledTimes(1);
+      expect(openAICodexMock.enable).toHaveBeenCalledTimes(1);
       expect(anthropicMock.enable).toHaveBeenCalledTimes(1);
       expect(aiSDKMock.enable).toHaveBeenCalledTimes(1);
       expect(claudeAgentSDKMock.enable).toHaveBeenCalledTimes(1);
@@ -600,6 +648,8 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       const openaiMock = vi.mocked(OpenAIPlugin).mock.results[0].value;
+      const openAICodexMock =
+        vi.mocked(OpenAICodexPlugin).mock.results[0].value;
       const anthropicMock = vi.mocked(AnthropicPlugin).mock.results[0].value;
       const aiSDKMock = vi.mocked(AISDKPlugin).mock.results[0].value;
       const claudeAgentSDKMock =
@@ -618,6 +668,7 @@ describe("BraintrustPlugin", () => {
       plugin.disable();
 
       expect(openaiMock.disable).toHaveBeenCalledTimes(1);
+      expect(openAICodexMock.disable).toHaveBeenCalledTimes(1);
       expect(anthropicMock.disable).toHaveBeenCalledTimes(1);
       expect(aiSDKMock.disable).toHaveBeenCalledTimes(1);
       expect(claudeAgentSDKMock.disable).toHaveBeenCalledTimes(1);
@@ -662,6 +713,7 @@ describe("BraintrustPlugin", () => {
 
       // Should not create any plugins
       expect(OpenAIPlugin).not.toHaveBeenCalled();
+      expect(OpenAICodexPlugin).not.toHaveBeenCalled();
       expect(AnthropicPlugin).not.toHaveBeenCalled();
       expect(AISDKPlugin).not.toHaveBeenCalled();
       expect(ClaudeAgentSDKPlugin).not.toHaveBeenCalled();
@@ -684,6 +736,7 @@ describe("BraintrustPlugin", () => {
       plugin.enable();
 
       expect(OpenAIPlugin).toHaveBeenCalledTimes(1);
+      expect(OpenAICodexPlugin).toHaveBeenCalledTimes(1);
       expect(AnthropicPlugin).toHaveBeenCalledTimes(1);
       expect(AISDKPlugin).toHaveBeenCalledTimes(1);
       expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1);
diff --git a/js/src/instrumentation/braintrust-plugin.ts b/js/src/instrumentation/braintrust-plugin.ts
index 5db01b441..d03932203 100644
--- a/js/src/instrumentation/braintrust-plugin.ts
+++ b/js/src/instrumentation/braintrust-plugin.ts
@@ -1,5 +1,6 @@
 import { BasePlugin } from "./core";
 import { OpenAIPlugin } from "./plugins/openai-plugin";
+import { OpenAICodexPlugin } from "./plugins/openai-codex-plugin";
 import { AnthropicPlugin } from "./plugins/anthropic-plugin";
 import { AISDKPlugin } from "./plugins/ai-sdk-plugin";
 import { ClaudeAgentSDKPlugin } from "./plugins/claude-agent-sdk-plugin";
@@ -16,6 +17,8 @@ import { GroqPlugin } from "./plugins/groq-plugin";
 export interface BraintrustPluginConfig {
   integrations?: {
     openai?: boolean;
+    openaiCodex?: boolean;
+    codex?: boolean;
     anthropic?: boolean;
     vercel?: boolean;
     aisdk?: boolean;
@@ -53,6 +56,7 @@ export interface BraintrustPluginConfig {
 export class BraintrustPlugin extends BasePlugin {
   private config: BraintrustPluginConfig;
   private openaiPlugin: OpenAIPlugin | null = null;
+  private openAICodexPlugin: OpenAICodexPlugin | null = null;
   private anthropicPlugin: AnthropicPlugin | null = null;
   private aiSDKPlugin: AISDKPlugin | null = null;
   private claudeAgentSDKPlugin: ClaudeAgentSDKPlugin | null = null;
@@ -80,6 +84,11 @@ export class BraintrustPlugin extends BasePlugin {
       this.openaiPlugin.enable();
     }
 
+    if (integrations.openaiCodex !== false && integrations.codex !== false) {
+      this.openAICodexPlugin = new OpenAICodexPlugin();
+      this.openAICodexPlugin.enable();
+    }
+
     // Enable Anthropic integration (default: true)
     if (integrations.anthropic !== false) {
       this.anthropicPlugin = new AnthropicPlugin();
@@ -154,6 +163,11 @@ export class BraintrustPlugin extends BasePlugin {
       this.openaiPlugin = null;
     }
 
+    if (this.openAICodexPlugin) {
+      this.openAICodexPlugin.disable();
+      this.openAICodexPlugin = null;
+    }
+
     if (this.anthropicPlugin) {
       this.anthropicPlugin.disable();
       this.anthropicPlugin = null;
diff --git a/js/src/instrumentation/plugins/openai-codex-channels.ts b/js/src/instrumentation/plugins/openai-codex-channels.ts
new file mode 100644
index 000000000..8e31d1bbe
--- /dev/null
+++ b/js/src/instrumentation/plugins/openai-codex-channels.ts
@@ -0,0 +1,29 @@
+import { channel, defineChannels } from "../core/channel-definitions";
+import type {
+  OpenAICodexInput,
+  OpenAICodexStreamedTurn,
+  OpenAICodexThread,
+  OpenAICodexThreadEvent,
+  OpenAICodexTurn,
+  OpenAICodexTurnOptions,
+} from "../../vendor-sdk-types/openai-codex";
+
+export const openAICodexChannels = defineChannels("@openai/codex-sdk", {
+  run: channel<
+    [OpenAICodexInput, OpenAICodexTurnOptions | undefined],
+    OpenAICodexTurn,
+    { operation?: "run"; thread?: OpenAICodexThread }
+  >({
+    channelName: "Thread.run",
+    kind: "async",
+  }),
+  runStreamed: channel<
+    [OpenAICodexInput, OpenAICodexTurnOptions | undefined],
+    OpenAICodexStreamedTurn,
+    { operation?: "runStreamed"; thread?: OpenAICodexThread },
+    OpenAICodexThreadEvent
+  >({
+    channelName: "Thread.runStreamed",
+    kind: "async",
+  }),
+});
diff --git a/js/src/instrumentation/plugins/openai-codex-plugin.ts b/js/src/instrumentation/plugins/openai-codex-plugin.ts
new file mode 100644
index 000000000..deffe8187
--- /dev/null
+++ b/js/src/instrumentation/plugins/openai-codex-plugin.ts
@@ -0,0 +1,707 @@
+import { BasePlugin } from "../core";
+import type { ChannelMessage } from "../core/channel-definitions";
+import type { IsoChannelHandlers } from "../../isomorph";
+import { debugLogger } from "../../debug-logger";
+import { startSpan } from "../../logger";
+import type { Span } from "../../logger";
+import { getCurrentUnixTimestamp } from "../../util";
+import { SpanTypeAttribute } from "../../../util/index";
+import { openAICodexChannels } from "./openai-codex-channels";
+import type {
+  OpenAICodexCommandExecutionItem,
+  OpenAICodexFileChangeItem,
+  OpenAICodexInput,
+  OpenAICodexMcpToolCallItem,
+  OpenAICodexStreamedTurn,
+  OpenAICodexThread,
+  OpenAICodexThreadEvent,
+  OpenAICodexThreadItem,
+  OpenAICodexThreadOptions,
+  OpenAICodexTurn,
+  OpenAICodexTurnOptions,
+  OpenAICodexUsage,
+  OpenAICodexWebSearchItem,
+} from "../../vendor-sdk-types/openai-codex";
+
+type CodexRunState = {
+  activeItemSpans: Map<string, Span>;
+  completedItems: OpenAICodexThreadItem[];
+  finalResponse?: string;
+  finalized: boolean;
+  metadata: Record<string, unknown>;
+  metrics: Record<string, number>;
+  outputText: string[];
+  span: Span;
+  startTime: number;
+};
+
+const PATCHED_STREAMED_TURN = Symbol.for(
+  "braintrust.openai-codex.patched-streamed-turn",
+);
+
+export class OpenAICodexPlugin extends BasePlugin {
+  protected onEnable(): void {
+    this.subscribeToRun();
+    this.subscribeToRunStreamed();
+  }
+
+  protected onDisable(): void {
+    for (const unsubscribe of this.unsubscribers) {
+      unsubscribe();
+    }
+    this.unsubscribers = [];
+  }
+
+  private subscribeToRun(): void {
+    const channel = openAICodexChannels.run.tracingChannel();
+    const states = new WeakMap<object, CodexRunState>();
+
+    const handlers: IsoChannelHandlers<
+      ChannelMessage<typeof openAICodexChannels.run>
+    > = {
+      start: (event) => {
+        states.set(event, startCodexRun(event, "Thread.run"));
+      },
+      asyncEnd: (event) => {
+        const state = states.get(event);
+        if (!state) {
+          return;
+        }
+        states.delete(event);
+        void finalizeCompletedRun(state, event.result);
+      },
+      error: (event) => {
+        const state = states.get(event);
+        if (!state) {
+          return;
+        }
+        states.delete(event);
+        void finalizeCodexRun(state, { error: event.error });
+      },
+    };
+
+    channel.subscribe(handlers);
+    this.unsubscribers.push(() => {
+      channel.unsubscribe(handlers);
+    });
+  }
+
+  private subscribeToRunStreamed(): void {
+    const channel = openAICodexChannels.runStreamed.tracingChannel();
+    const states = new WeakMap<object, CodexRunState>();
+
+    const handlers: IsoChannelHandlers<
+      ChannelMessage<typeof openAICodexChannels.runStreamed>
+    > = {
+      start: (event) => {
+        states.set(event, startCodexRun(event, "Thread.runStreamed"));
+      },
+      asyncEnd: (event) => {
+        const state = states.get(event);
+        if (!state) {
+          return;
+        }
+        states.delete(event);
+        patchStreamedTurn(event.result, state);
+      },
+      error: (event) => {
+        const state = states.get(event);
+        if (!state) {
+          return;
+        }
+        states.delete(event);
+        void finalizeCodexRun(state, { error: event.error });
+      },
+    };
+
+    channel.subscribe(handlers);
+    this.unsubscribers.push(() => {
+      channel.unsubscribe(handlers);
+    });
+  }
+}
+
+function startCodexRun(
+  event: ChannelMessage<
+    typeof openAICodexChannels.run | typeof openAICodexChannels.runStreamed
+  >,
+  operation: "Thread.run" | "Thread.runStreamed",
+): CodexRunState {
+  const input = event.arguments[0];
+  const turnOptions = event.arguments[1];
+  const thread = event.thread ?? extractThreadFromEvent(event);
+  const metadata = {
+    ...extractThreadMetadata(thread),
+    ...extractTurnOptionsMetadata(turnOptions),
+    "openai_codex.operation": operation,
+    provider: "openai",
+    ...(event.moduleVersion
+      ? { "openai_codex.version": event.moduleVersion }
+      : {}),
+  };
+  const span = startSpan({
+    name: "OpenAI Codex",
+    spanAttributes: { type: SpanTypeAttribute.TASK },
+  });
+  const startTime = getCurrentUnixTimestamp();
+  safeLog(span, {
+    input: sanitizeInput(input),
+    metadata,
+  });
+
+  return {
+    activeItemSpans: new Map(),
+    completedItems: [],
+    finalized: false,
+    metadata,
+    metrics: {},
+    outputText: [],
+    span,
+    startTime,
+  };
+}
+
+function patchStreamedTurn(
+  streamedTurn: OpenAICodexStreamedTurn | undefined,
+  state: CodexRunState,
+): void {
+  if (!streamedTurn || typeof streamedTurn !== "object") {
+    void finalizeCodexRun(state, { output: streamedTurn });
+    return;
+  }
+
+  const turnRecord = streamedTurn as OpenAICodexStreamedTurn &
+    Record<PropertyKey, unknown>;
+  if (
+    turnRecord[PATCHED_STREAMED_TURN] ||
+    !isAsyncIterable(turnRecord.events)
+  ) {
+    return;
+  }
+
+  try {
+    Object.defineProperty(turnRecord, PATCHED_STREAMED_TURN, {
+      configurable: false,
+      enumerable: false,
+      value: true,
+    });
+    turnRecord.events = patchCodexEventStream(turnRecord.events, state);
+  } catch {
+    void finalizeCodexRun(state, { output: streamedTurn });
+  }
+}
+
+async function* patchCodexEventStream(
+  events: AsyncGenerator<OpenAICodexThreadEvent>,
+  state: CodexRunState,
+): AsyncGenerator<OpenAICodexThreadEvent> {
+  try {
+    for await (const event of events) {
+      try {
+        await handleCodexEvent(state, event);
+      } catch (error) {
+        logInstrumentationError("OpenAI Codex stream event", error);
+      }
+      yield event;
+    }
+    await finalizeCodexRun(state);
+  } catch (error) {
+    await finalizeCodexRun(state, { error });
+    throw error;
+  }
+}
+
+async function handleCodexEvent(
+  state: CodexRunState,
+  event: OpenAICodexThreadEvent,
+): Promise<void> {
+  switch (event.type) {
+    case "thread.started":
+      state.metadata["openai_codex.thread_id"] = event.thread_id;
+      return;
+    case "turn.completed":
+      Object.assign(state.metrics, extractUsageMetrics(event.usage));
+      return;
+    case "turn.failed":
+      await finalizeCodexRun(state, {
+        error: event.error?.message ?? "Codex turn failed",
+      });
+      return;
+    case "item.started":
+      await startCodexItemSpan(state, event.item);
+      return;
+    case "item.updated":
+      updateCodexItem(state, event.item);
+      return;
+    case "item.completed":
+      state.completedItems.push(event.item);
+      collectOutputText(state, event.item);
+      await finishCodexItemSpan(state, event.item);
+      return;
+    case "error":
+      await finalizeCodexRun(state, { error: event.message });
+      return;
+    default:
+      return;
+  }
+}
+
+async function finalizeCompletedRun(
+  state: CodexRunState,
+  turn: OpenAICodexTurn | undefined,
+): Promise<void> {
+  if (!turn) {
+    await finalizeCodexRun(state, { output: turn });
+    return;
+  }
+
+  Object.assign(state.metrics, extractUsageMetrics(turn.usage));
+  state.finalResponse = turn.finalResponse;
+
+  for (const item of turn.items ?? []) {
+    state.completedItems.push(item);
+    collectOutputText(state, item);
+    await createCompletedItemSpan(state, item);
+  }
+
+  await finalizeCodexRun(state, { output: turn.finalResponse });
+}
+
+async function finalizeCodexRun(
+  state: CodexRunState,
+  params: {
+    error?: unknown;
+    output?: unknown;
+  } = {},
+): Promise<void> {
+  if (state.finalized) {
+    return;
+  }
+  state.finalized = true;
+
+  const output =
+    params.output ??
+    state.finalResponse ??
+    (state.outputText.length > 0 ? state.outputText.join("\n") : undefined);
+  const metrics = {
+    ...cleanMetrics(state.metrics),
+    ...buildDurationMetrics(state.startTime),
+  };
+
+  try {
+    const error = params.error;
+    safeLog(state.span, {
+      ...(error
+        ? { error: error instanceof Error ? error.message : String(error) }
+        : {}),
+      metadata: state.metadata,
+      metrics,
+      output,
+    });
+  } finally {
+    endOpenItemSpans(state);
+    state.span.end();
+  }
+}
+
+async function createCompletedItemSpan(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<void> {
+  const spanArgs = await itemSpanArgs(state, item);
+  if (!spanArgs) {
+    return;
+  }
+
+  const span = startSpan(spanArgs.start);
+  safeLog(span, spanArgs.end);
+  span.end();
+}
+
+async function startCodexItemSpan(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<void> {
+  const itemId = item.id;
+  if (!itemId || state.activeItemSpans.has(itemId)) {
+    return;
+  }
+  const spanArgs = await itemSpanArgs(state, item);
+  if (!spanArgs) {
+    return;
+  }
+  state.activeItemSpans.set(itemId, startSpan(spanArgs.start));
+}
+
+function updateCodexItem(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): void {
+  if (item.type === "agent_message" && typeof item.text === "string") {
+    state.finalResponse = item.text;
+  }
+}
+
+async function finishCodexItemSpan(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<void> {
+  const itemId = item.id;
+  if (!itemId) {
+    await createCompletedItemSpan(state, item);
+    return;
+  }
+
+  const span = state.activeItemSpans.get(itemId);
+  if (!span) {
+    await createCompletedItemSpan(state, item);
+    return;
+  }
+
+  state.activeItemSpans.delete(itemId);
+  const spanArgs = await itemSpanArgs(state, item);
+  if (spanArgs) {
+    safeLog(span, spanArgs.end);
+  }
+  span.end();
+}
+
+async function itemSpanArgs(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<
+  | {
+      start: Parameters<typeof startSpan>[0];
+      end: Parameters<Span["log"]>[0];
+    }
+  | undefined
+> {
+  const parent = await state.span.export();
+  const baseMetadata = {
+    "openai_codex.item_id": item.id,
+    "openai_codex.item_type": item.type,
+  };
+
+  switch (item.type) {
+    case "command_execution":
+      return commandSpanArgs(parent, baseMetadata, item);
+    case "mcp_tool_call":
+      return mcpToolSpanArgs(parent, baseMetadata, item);
+    case "web_search":
+      return webSearchSpanArgs(parent, baseMetadata, item);
+    case "file_change":
+      return fileChangeSpanArgs(parent, baseMetadata, item);
+    default:
+      return undefined;
+  }
+}
+
+function commandSpanArgs(
+  parent: string,
+  baseMetadata: Record<string, unknown>,
+  item: OpenAICodexCommandExecutionItem,
+) {
+  const metadata = {
+    ...baseMetadata,
+    "gen_ai.tool.name": "command_execution",
+    "openai_codex.command.exit_code": item.exit_code,
+    "openai_codex.command.status": item.status,
+  };
+  return {
+    start: {
+      event: { input: item.command, metadata },
+      name: "tool: command_execution",
+      parent,
+      spanAttributes: { type: SpanTypeAttribute.TOOL },
+    },
+    end: {
+      ...(item.status === "failed"
+        ? { error: item.aggregated_output || "Command execution failed" }
+        : {}),
+      metadata,
+      output: item.aggregated_output,
+    },
+  };
+}
+
+function mcpToolSpanArgs(
+  parent: string,
+  baseMetadata: Record<string, unknown>,
+  item: OpenAICodexMcpToolCallItem,
+) {
+  const toolName = item.tool || "mcp_tool_call";
+  const metadata = {
+    ...baseMetadata,
+    "gen_ai.tool.name": toolName,
+    "openai_codex.mcp.server": item.server,
+    "openai_codex.mcp.status": item.status,
+  };
+  return {
+    start: {
+      event: {
+        input: {
+          arguments: item.arguments,
+          server: item.server,
+          tool: item.tool,
+        },
+        metadata,
+      },
+      name: `tool: ${toolName}`,
+      parent,
+      spanAttributes: { type: SpanTypeAttribute.TOOL },
+    },
+    end: {
+      ...(item.error?.message ? { error: item.error.message } : {}),
+      metadata,
+      output: item.result,
+    },
+  };
+}
+
+function webSearchSpanArgs(
+  parent: string,
+  baseMetadata: Record<string, unknown>,
+  item: OpenAICodexWebSearchItem,
+) {
+  const metadata = {
+    ...baseMetadata,
+    "gen_ai.tool.name": "web_search",
+  };
+  return {
+    start: {
+      event: { input: item.query, metadata },
+      name: "tool: web_search",
+      parent,
+      spanAttributes: { type: SpanTypeAttribute.TOOL },
+    },
+    end: { metadata },
+  };
+}
+
+function fileChangeSpanArgs(
+  parent: string,
+  baseMetadata: Record<string, unknown>,
+  item: OpenAICodexFileChangeItem,
+) {
+  const metadata = {
+    ...baseMetadata,
+    "gen_ai.tool.name": "file_change",
+    "openai_codex.file_change.status": item.status,
+  };
+  return {
+    start: {
+      event: { input: item.changes, metadata },
+      name: "tool: file_change",
+      parent,
+      spanAttributes: { type: SpanTypeAttribute.TOOL },
+    },
+    end: {
+      ...(item.status === "failed" ? { error: "File change failed" } : {}),
+      metadata,
+      output: item.changes,
+    },
+  };
+}
+
+function endOpenItemSpans(state: CodexRunState): void {
+  for (const [, span] of state.activeItemSpans) {
+    safeLog(span, { error: "Codex item did not complete" });
+    span.end();
+  }
+  state.activeItemSpans.clear();
+}
+
+function collectOutputText(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): void {
+  if (item.type === "agent_message" && typeof item.text === "string") {
+    state.finalResponse = item.text;
+    state.outputText.push(item.text);
+  } else if (
+    item.type === "reasoning" &&
+    typeof item.text === "string" &&
+    !state.finalResponse
+  ) {
+    state.outputText.push(item.text);
+  }
+}
+
+function extractThreadFromEvent(
+  event: ChannelMessage<
+    typeof openAICodexChannels.run | typeof openAICodexChannels.runStreamed
+  >,
+): OpenAICodexThread | undefined {
+  return event.self && typeof event.self === "object"
+    ? (event.self as OpenAICodexThread)
+    : undefined;
+}
+
+function extractThreadMetadata(
+  thread: OpenAICodexThread | undefined,
+): Record<string, unknown> {
+  const threadOptions = extractThreadOptions(thread);
+  return {
+    ...(thread?.id ? { "openai_codex.thread_id": thread.id } : {}),
+    ...extractThreadOptionsMetadata(threadOptions),
+  };
+}
+
+function extractThreadOptions(
+  thread: OpenAICodexThread | undefined,
+): OpenAICodexThreadOptions | undefined {
+  if (!thread || typeof thread !== "object") {
+    return undefined;
+  }
+  const value = Reflect.get(thread, "_threadOptions");
+  return value && typeof value === "object"
+    ? (value as OpenAICodexThreadOptions)
+    : undefined;
+}
+
+function extractThreadOptionsMetadata(
+  options: OpenAICodexThreadOptions | undefined,
+): Record<string, unknown> {
+  if (!options) {
+    return {};
+  }
+
+  return {
+    ...(options.model ? { model: options.model } : {}),
+    ...(options.model ? { "openai_codex.model": options.model } : {}),
+    ...(options.sandboxMode
+      ? { "openai_codex.sandbox_mode": options.sandboxMode }
+      : {}),
+    ...(options.workingDirectory
+      ? { "openai_codex.working_directory": options.workingDirectory }
+      : {}),
+    ...(options.skipGitRepoCheck !== undefined
+      ? { "openai_codex.skip_git_repo_check": options.skipGitRepoCheck }
+      : {}),
+    ...(options.modelReasoningEffort
+      ? {
+          "openai_codex.model_reasoning_effort": options.modelReasoningEffort,
+        }
+      : {}),
+    ...(options.networkAccessEnabled !== undefined
+      ? {
+          "openai_codex.network_access_enabled": options.networkAccessEnabled,
+        }
+      : {}),
+    ...(options.webSearchMode
+      ? { "openai_codex.web_search_mode": options.webSearchMode }
+      : {}),
+    ...(options.webSearchEnabled !== undefined
+      ? { "openai_codex.web_search_enabled": options.webSearchEnabled }
+      : {}),
+    ...(options.approvalPolicy
+      ? { "openai_codex.approval_policy": options.approvalPolicy }
+      : {}),
+    ...(options.additionalDirectories
+      ? {
+          "openai_codex.additional_directories": options.additionalDirectories,
+        }
+      : {}),
+  };
+}
+
+function extractTurnOptionsMetadata(
+  options: OpenAICodexTurnOptions | undefined,
+): Record<string, unknown> {
+  if (!options) {
+    return {};
+  }
+
+  return {
+    ...(options.outputSchema !== undefined
+      ? { "openai_codex.output_schema": true }
+      : {}),
+  };
+}
+
+function sanitizeInput(input: OpenAICodexInput): unknown {
+  if (typeof input === "string") {
+    return input;
+  }
+
+  return input.map((item) => {
+    if (item.type === "local_image") {
+      return {
+        path: item.path,
+        type: "local_image",
+      };
+    }
+    return item;
+  });
+}
+
+function extractUsageMetrics(
+  usage: OpenAICodexUsage | null | undefined,
+): Record<string, number> {
+  if (!usage) {
+    return {};
+  }
+
+  const metrics: Record<string, number> = {};
+  if (usage.input_tokens !== undefined) {
+    metrics.prompt_tokens = usage.input_tokens;
+  }
+  if (usage.cached_input_tokens !== undefined) {
+    metrics.prompt_cached_tokens = usage.cached_input_tokens;
+  }
+  if (usage.output_tokens !== undefined) {
+    metrics.completion_tokens = usage.output_tokens;
+  }
+  if (usage.reasoning_output_tokens !== undefined) {
+    metrics.completion_reasoning_tokens = usage.reasoning_output_tokens;
+  }
+
+  metrics.tokens =
+    (metrics.prompt_tokens ?? 0) +
+    (metrics.completion_tokens ?? 0) +
+    (metrics.prompt_cached_tokens ?? 0) +
+    (metrics.completion_reasoning_tokens ?? 0);
+  return metrics;
+}
+
+function buildDurationMetrics(startTime: number): Record<string, number> {
+  const end = getCurrentUnixTimestamp();
+  return {
+    duration: end - startTime,
+    end,
+    start: startTime,
+  };
+}
+
+function cleanMetrics(metrics: Record<string, number>): Record<string, number> {
+  const cleaned: Record<string, number> = {};
+  for (const [key, value] of Object.entries(metrics)) {
+    if (value !== undefined && Number.isFinite(value)) {
+      cleaned[key] = value;
+    }
+  }
+  return cleaned;
+}
+
+function isAsyncIterable(value: unknown): value is AsyncGenerator<unknown> {
+  return (
+    !!value &&
+    typeof value === "object" &&
+    Symbol.asyncIterator in value &&
+    typeof (value as { [Symbol.asyncIterator]?: unknown })[
+      Symbol.asyncIterator
+    ] === "function"
+  );
+}
+
+function safeLog(span: Span, event: Parameters<Span["log"]>[0]): void {
+  try {
+    span.log(event);
+  } catch (error) {
+    logInstrumentationError("OpenAI Codex span log", error);
+  }
+}
+
+function logInstrumentationError(context: string, error: unknown): void {
+  debugLogger.error(`Error processing ${context}:`, error);
+}
diff --git a/js/src/instrumentation/registry.test.ts b/js/src/instrumentation/registry.test.ts
index 9e8c6a889..3875b6ad0 100644
--- a/js/src/instrumentation/registry.test.ts
+++ b/js/src/instrumentation/registry.test.ts
@@ -117,6 +117,7 @@ describe("configureInstrumentation API", () => {
     configureInstrumentation({
       integrations: {
         openai: false,
+        openaiCodex: false,
         anthropic: true,
         huggingface: true,
         openrouter: false,
diff --git a/js/src/instrumentation/registry.ts b/js/src/instrumentation/registry.ts
index 8e09f214a..41991a384 100644
--- a/js/src/instrumentation/registry.ts
+++ b/js/src/instrumentation/registry.ts
@@ -15,6 +15,8 @@ export interface InstrumentationConfig {
    */
   integrations?: {
     openai?: boolean;
+    openaiCodex?: boolean;
+    codex?: boolean;
     anthropic?: boolean;
     vercel?: boolean;
     aisdk?: boolean;
@@ -107,6 +109,8 @@ class PluginRegistry {
   private getDefaultConfig(): Record<string, boolean> {
     return {
       openai: true,
+      openaiCodex: true,
+      codex: true,
       anthropic: true,
       vercel: true,
       aisdk: true,
@@ -139,6 +143,14 @@ class PluginRegistry {
       for (const sdk of disabled) {
         if (sdk === "cursor-sdk") {
           integrations.cursorSDK = false;
+        } else if (
+          sdk === "openai-codex" ||
+          sdk === "openai-codex-sdk" ||
+          sdk === "codex-sdk"
+        ) {
+          integrations.openaiCodex = false;
+        } else if (sdk === "codex") {
+          integrations.codex = false;
         } else {
           integrations[sdk] = false;
         }
diff --git a/js/src/vendor-sdk-types/openai-codex.ts b/js/src/vendor-sdk-types/openai-codex.ts
new file mode 100644
index 000000000..7d09a71a0
--- /dev/null
+++ b/js/src/vendor-sdk-types/openai-codex.ts
@@ -0,0 +1,215 @@
+/**
+ * Vendored types for @openai/codex-sdk used by Braintrust instrumentation.
+ *
+ * Keep this surface intentionally narrow. These types are not exported to SDK
+ * users and should only cover fields we read, wrap, or log.
+ */
+
+export interface OpenAICodexSDKModule {
+  Codex: OpenAICodexClass;
+  Thread?: OpenAICodexThreadClass;
+  [key: string]: unknown;
+}
+
+export interface OpenAICodexClass {
+  new (options?: OpenAICodexOptions): OpenAICodexClient;
+  [key: string]: unknown;
+}
+
+export interface OpenAICodexClient {
+  startThread(options?: OpenAICodexThreadOptions): OpenAICodexThread;
+  resumeThread(
+    id: string,
+    options?: OpenAICodexThreadOptions,
+  ): OpenAICodexThread;
+  [key: string]: unknown;
+}
+
+export interface OpenAICodexThreadClass {
+  new (...args: unknown[]): OpenAICodexThread;
+  [key: string]: unknown;
+}
+
+export interface OpenAICodexThread {
+  readonly id?: string | null;
+  run(
+    input: OpenAICodexInput,
+    turnOptions?: OpenAICodexTurnOptions,
+  ): Promise<OpenAICodexTurn>;
+  runStreamed(
+    input: OpenAICodexInput,
+    turnOptions?: OpenAICodexTurnOptions,
+  ): Promise<OpenAICodexStreamedTurn>;
+  [key: string]: unknown;
+}
+
+export interface OpenAICodexOptions {
+  codexPathOverride?: string;
+  baseUrl?: string;
+  apiKey?: string;
+  config?: OpenAICodexConfigObject;
+  env?: Record<string, string>;
+}
+
+export type OpenAICodexConfigValue =
+  | string
+  | number
+  | boolean
+  | OpenAICodexConfigValue[]
+  | OpenAICodexConfigObject;
+
+export interface OpenAICodexConfigObject {
+  [key: string]: OpenAICodexConfigValue;
+}
+
+export type OpenAICodexApprovalMode =
+  | "never"
+  | "on-request"
+  | "on-failure"
+  | "untrusted";
+
+export type OpenAICodexSandboxMode =
+  | "read-only"
+  | "workspace-write"
+  | "danger-full-access";
+
+export type OpenAICodexModelReasoningEffort =
+  | "minimal"
+  | "low"
+  | "medium"
+  | "high"
+  | "xhigh";
+
+export type OpenAICodexWebSearchMode = "disabled" | "cached" | "live";
+
+export interface OpenAICodexThreadOptions {
+  model?: string;
+  sandboxMode?: OpenAICodexSandboxMode;
+  workingDirectory?: string;
+  skipGitRepoCheck?: boolean;
+  modelReasoningEffort?: OpenAICodexModelReasoningEffort;
+  networkAccessEnabled?: boolean;
+  webSearchMode?: OpenAICodexWebSearchMode;
+  webSearchEnabled?: boolean;
+  approvalPolicy?: OpenAICodexApprovalMode;
+  additionalDirectories?: string[];
+}
+
+export interface OpenAICodexTurnOptions {
+  outputSchema?: unknown;
+  signal?: AbortSignal;
+}
+
+export type OpenAICodexInput =
+  | string
+  | Array<
+      | { type: "text"; text: string }
+      | { type: "local_image"; path: string }
+      | { type?: string; [key: string]: unknown }
+    >;
+
+export interface OpenAICodexUsage {
+  input_tokens?: number;
+  cached_input_tokens?: number;
+  output_tokens?: number;
+  reasoning_output_tokens?: number;
+}
+
+export interface OpenAICodexTurn {
+  items: OpenAICodexThreadItem[];
+  finalResponse: string;
+  usage: OpenAICodexUsage | null;
+}
+
+export interface OpenAICodexStreamedTurn {
+  events: AsyncGenerator<OpenAICodexThreadEvent>;
+}
+
+export type OpenAICodexThreadEvent =
+  | { type: "thread.started"; thread_id: string }
+  | { type: "turn.started" }
+  | { type: "turn.completed"; usage: OpenAICodexUsage }
+  | { type: "turn.failed"; error: OpenAICodexThreadError }
+  | { type: "item.started"; item: OpenAICodexThreadItem }
+  | { type: "item.updated"; item: OpenAICodexThreadItem }
+  | { type: "item.completed"; item: OpenAICodexThreadItem }
+  | { type: "error"; message: string }
+  | { type?: string; [key: string]: unknown };
+
+export interface OpenAICodexThreadError {
+  message?: string;
+  [key: string]: unknown;
+}
+
+export type OpenAICodexThreadItem =
+  | OpenAICodexAgentMessageItem
+  | OpenAICodexReasoningItem
+  | OpenAICodexCommandExecutionItem
+  | OpenAICodexFileChangeItem
+  | OpenAICodexMcpToolCallItem
+  | OpenAICodexWebSearchItem
+  | OpenAICodexTodoListItem
+  | OpenAICodexErrorItem
+  | { id?: string; type?: string; [key: string]: unknown };
+
+export interface OpenAICodexAgentMessageItem {
+  id?: string;
+  type: "agent_message";
+  text?: string;
+}
+
+export interface OpenAICodexReasoningItem {
+  id?: string;
+  type: "reasoning";
+  text?: string;
+}
+
+export interface OpenAICodexCommandExecutionItem {
+  id?: string;
+  type: "command_execution";
+  command?: string;
+  aggregated_output?: string;
+  exit_code?: number;
+  status?: "in_progress" | "completed" | "failed";
+}
+
+export interface OpenAICodexFileChangeItem {
+  id?: string;
+  type: "file_change";
+  changes?: Array<{ path?: string; kind?: "add" | "delete" | "update" }>;
+  status?: "completed" | "failed";
+}
+
+export interface OpenAICodexMcpToolCallItem {
+  id?: string;
+  type: "mcp_tool_call";
+  server?: string;
+  tool?: string;
+  arguments?: unknown;
+  result?: {
+    content?: unknown;
+    structured_content?: unknown;
+  };
+  error?: {
+    message?: string;
+  };
+  status?: "in_progress" | "completed" | "failed";
+}
+
+export interface OpenAICodexWebSearchItem {
+  id?: string;
+  type: "web_search";
+  query?: string;
+}
+
+export interface OpenAICodexTodoListItem {
+  id?: string;
+  type: "todo_list";
+  items?: Array<{ text?: string; completed?: boolean }>;
+}
+
+export interface OpenAICodexErrorItem {
+  id?: string;
+  type: "error";
+  message?: string;
+}
diff --git a/js/src/wrappers/openai-codex.ts b/js/src/wrappers/openai-codex.ts
new file mode 100644
index 000000000..2031fcd1a
--- /dev/null
+++ b/js/src/wrappers/openai-codex.ts
@@ -0,0 +1,186 @@
+import { openAICodexChannels } from "../instrumentation/plugins/openai-codex-channels";
+import type {
+  OpenAICodexClass,
+  OpenAICodexClient,
+  OpenAICodexInput,
+  OpenAICodexSDKModule,
+  OpenAICodexStreamedTurn,
+  OpenAICodexThread,
+  OpenAICodexThreadOptions,
+  OpenAICodexTurn,
+  OpenAICodexTurnOptions,
+} from "../vendor-sdk-types/openai-codex";
+
+const WRAPPED_CLIENT = Symbol.for("braintrust.openai-codex.wrapped-client");
+const WRAPPED_THREAD = Symbol.for("braintrust.openai-codex.wrapped-thread");
+
+/**
+ * Wraps the OpenAI Codex TypeScript SDK with Braintrust tracing. The wrapper
+ * emits diagnostics-channel events; the OpenAI Codex plugin owns span lifecycle.
+ */
+export function wrapOpenAICodexSDK<T>(sdk: T): T {
+  if (!sdk || typeof sdk !== "object") {
+    return sdk;
+  }
+
+  const maybeSDK = sdk as Record<PropertyKey, unknown>;
+  if (hasCodexClientShape(maybeSDK)) {
+    return wrapCodexClient(maybeSDK as unknown as OpenAICodexClient) as T;
+  }
+
+  if (!maybeSDK.Codex || typeof maybeSDK.Codex !== "function") {
+    // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage.
+    console.warn("Unsupported OpenAI Codex SDK. Not wrapping.");
+    return sdk;
+  }
+
+  const target = isModuleNamespace(sdk)
+    ? Object.setPrototypeOf({}, sdk)
+    : (sdk as Record<PropertyKey, unknown>);
+
+  return new Proxy(target, {
+    get(target, prop, receiver) {
+      const value = Reflect.get(target, prop, receiver);
+      if (prop === "Codex" && typeof value === "function") {
+        return wrapCodexClass(value as unknown as OpenAICodexClass);
+      }
+      if (typeof value === "function") {
+        return value.bind(target);
+      }
+      return value;
+    },
+  }) as T;
+}
+
+function hasCodexClientShape(value: Record<PropertyKey, unknown>): boolean {
+  return (
+    typeof value.startThread === "function" &&
+    typeof value.resumeThread === "function"
+  );
+}
+
+function isModuleNamespace(obj: unknown): boolean {
+  if (!obj || typeof obj !== "object") {
+    return false;
+  }
+  if (obj.constructor?.name === "Module") {
+    return true;
+  }
+  const keys = Object.keys(obj);
+  if (keys.length === 0) {
+    return false;
+  }
+  const descriptor = Object.getOwnPropertyDescriptor(obj, keys[0]);
+  return descriptor ? !descriptor.configurable && !descriptor.writable : false;
+}
+
+function wrapCodexClass(Codex: OpenAICodexClass): OpenAICodexClass {
+  return new Proxy(Codex, {
+    construct(target, args, newTarget) {
+      return wrapCodexClient(Reflect.construct(target, args, newTarget));
+    },
+    get(target, prop, receiver) {
+      const value = Reflect.get(target, prop, receiver);
+      if (typeof value === "function") {
+        return value.bind(target);
+      }
+      return value;
+    },
+  }) as OpenAICodexClass;
+}
+
+function wrapCodexClient(client: OpenAICodexClient): OpenAICodexClient {
+  if (!client || typeof client !== "object") {
+    return client;
+  }
+  if ((client as unknown as Record<PropertyKey, unknown>)[WRAPPED_CLIENT]) {
+    return client;
+  }
+
+  return new Proxy(client, {
+    get(target, prop, receiver) {
+      if (prop === WRAPPED_CLIENT) {
+        return true;
+      }
+
+      const value = Reflect.get(target, prop, receiver);
+      if (prop === "startThread" && typeof value === "function") {
+        return function (options?: OpenAICodexThreadOptions) {
+          return wrapCodexThread(Reflect.apply(value, target, [options]));
+        };
+      }
+      if (prop === "resumeThread" && typeof value === "function") {
+        return function (id: string, options?: OpenAICodexThreadOptions) {
+          return wrapCodexThread(Reflect.apply(value, target, [id, options]));
+        };
+      }
+      if (typeof value === "function") {
+        return value.bind(target);
+      }
+      return value;
+    },
+  });
+}
+
+function wrapCodexThread(thread: OpenAICodexThread): OpenAICodexThread {
+  if (!thread || typeof thread !== "object") {
+    return thread;
+  }
+  if ((thread as unknown as Record<PropertyKey, unknown>)[WRAPPED_THREAD]) {
+    return thread;
+  }
+
+  return new Proxy(thread, {
+    get(target, prop, receiver) {
+      if (prop === WRAPPED_THREAD) {
+        return true;
+      }
+
+      const value = Reflect.get(target, prop, receiver);
+      if (prop === "run" && typeof value === "function") {
+        return function (
+          input: OpenAICodexInput,
+          turnOptions?: OpenAICodexTurnOptions,
+        ): Promise<OpenAICodexTurn> {
+          const args = [input, turnOptions] as [
+            OpenAICodexInput,
+            OpenAICodexTurnOptions | undefined,
+          ];
+          return openAICodexChannels.run.tracePromise(
+            () => Reflect.apply(value, target, args),
+            {
+              arguments: args,
+              operation: "run",
+              thread: target,
+            } as never,
+          );
+        };
+      }
+      if (prop === "runStreamed" && typeof value === "function") {
+        return function (
+          input: OpenAICodexInput,
+          turnOptions?: OpenAICodexTurnOptions,
+        ): Promise<OpenAICodexStreamedTurn> {
+          const args = [input, turnOptions] as [
+            OpenAICodexInput,
+            OpenAICodexTurnOptions | undefined,
+          ];
+          return openAICodexChannels.runStreamed.tracePromise(
+            () => Reflect.apply(value, target, args),
+            {
+              arguments: args,
+              operation: "runStreamed",
+              thread: target,
+            } as never,
+          );
+        };
+      }
+      if (typeof value === "function") {
+        return value.bind(target);
+      }
+      return value;
+    },
+  });
+}
+
+export type { OpenAICodexSDKModule };

From 1ef443ac20e19a4ecaf14b2e4612902a3c77bd0a Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 15:39:19 -0700
Subject: [PATCH 2/5] fix

---
 js/src/vendor-sdk-types/openai-codex.ts | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/js/src/vendor-sdk-types/openai-codex.ts b/js/src/vendor-sdk-types/openai-codex.ts
index 7d09a71a0..5d0f2b35c 100644
--- a/js/src/vendor-sdk-types/openai-codex.ts
+++ b/js/src/vendor-sdk-types/openai-codex.ts
@@ -133,8 +133,7 @@ export type OpenAICodexThreadEvent =
   | { type: "item.started"; item: OpenAICodexThreadItem }
   | { type: "item.updated"; item: OpenAICodexThreadItem }
   | { type: "item.completed"; item: OpenAICodexThreadItem }
-  | { type: "error"; message: string }
-  | { type?: string; [key: string]: unknown };
+  | { type: "error"; message: string };
 
 export interface OpenAICodexThreadError {
   message?: string;
@@ -149,8 +148,7 @@ export type OpenAICodexThreadItem =
   | OpenAICodexMcpToolCallItem
   | OpenAICodexWebSearchItem
   | OpenAICodexTodoListItem
-  | OpenAICodexErrorItem
-  | { id?: string; type?: string; [key: string]: unknown };
+  | OpenAICodexErrorItem;
 
 export interface OpenAICodexAgentMessageItem {
   id?: string;

From aba7db7bcb22464312f89be745144e05b41fe87d Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Mon, 4 May 2026 15:39:50 -0700
Subject: [PATCH 3/5] cs

---
 .changeset/twenty-ideas-doubt.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/twenty-ideas-doubt.md

diff --git a/.changeset/twenty-ideas-doubt.md b/.changeset/twenty-ideas-doubt.md
new file mode 100644
index 000000000..55d61a938
--- /dev/null
+++ b/.changeset/twenty-ideas-doubt.md
@@ -0,0 +1,5 @@
+---
+"braintrust": minor
+---
+
+feat: Add @openai/codex-sdk instrumentation

From bcc034bd8b69c96660df3ee6f323f106ac1c4b5f Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Tue, 5 May 2026 10:02:47 -0700
Subject: [PATCH 4/5] capture llm calls

---
 ...nai-codex-v0128-auto-hook.span-events.json | 196 ++++++++++++++++-
 ...penai-codex-v0128-wrapped.span-events.json | 196 ++++++++++++++++-
 .../assertions.ts                             | 103 +++++++++
 .../mock-codex-cli.mjs                        |  26 ++-
 .../plugins/openai-codex-plugin.ts            | 197 +++++++++++++++++-
 5 files changed, 696 insertions(+), 22 deletions(-)

diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
index 7a5fd7ed0..36e8471bd 100644
--- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
+++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
@@ -1,4 +1,188 @@
 {
+  "llms": [
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 1,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning before command RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:6>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 2,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after command RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:7>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 3,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after mcp RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:8>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 4,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "message": "Codex RUN_OK",
+        "reasoning": "final reasoning RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:9>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 1,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning before command STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:10>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 2,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after command STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:11>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 3,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after mcp STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:12>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 4,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "message": "Codex STREAM_OK",
+        "reasoning": "final reasoning STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:13>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    }
+  ],
   "root": {
     "has_input": false,
     "has_output": false,
@@ -109,7 +293,7 @@
       "metric_keys": [],
       "name": "tool: command_execution",
       "root_span_id": "<span:1>",
-      "span_id": "<span:6>",
+      "span_id": "<span:14>",
       "span_parents": [
         "<span:3>"
       ],
@@ -127,7 +311,7 @@
       "metric_keys": [],
       "name": "tool: read_file",
       "root_span_id": "<span:1>",
-      "span_id": "<span:7>",
+      "span_id": "<span:15>",
       "span_parents": [
         "<span:3>"
       ],
@@ -143,7 +327,7 @@
       "metric_keys": [],
       "name": "tool: web_search",
       "root_span_id": "<span:1>",
-      "span_id": "<span:8>",
+      "span_id": "<span:16>",
       "span_parents": [
         "<span:3>"
       ],
@@ -160,7 +344,7 @@
       "metric_keys": [],
       "name": "tool: command_execution",
       "root_span_id": "<span:1>",
-      "span_id": "<span:9>",
+      "span_id": "<span:17>",
       "span_parents": [
         "<span:5>"
       ],
@@ -178,7 +362,7 @@
       "metric_keys": [],
       "name": "tool: read_file",
       "root_span_id": "<span:1>",
-      "span_id": "<span:10>",
+      "span_id": "<span:18>",
       "span_parents": [
         "<span:5>"
       ],
@@ -194,7 +378,7 @@
       "metric_keys": [],
       "name": "tool: web_search",
       "root_span_id": "<span:1>",
-      "span_id": "<span:11>",
+      "span_id": "<span:19>",
       "span_parents": [
         "<span:5>"
       ],
diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
index 7a5fd7ed0..36e8471bd 100644
--- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
+++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
@@ -1,4 +1,188 @@
 {
+  "llms": [
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 1,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning before command RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:6>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 2,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after command RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:7>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 3,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after mcp RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:8>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 4,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.run",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "message": "Codex RUN_OK",
+        "reasoning": "final reasoning RUN_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:9>",
+      "span_parents": [
+        "<span:3>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": true,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 1,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning before command STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:10>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 2,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after command STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:11>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 3,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "reasoning": "reasoning after mcp STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:12>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    },
+    {
+      "has_input": false,
+      "has_output": true,
+      "metadata": {
+        "model": "gpt-5-codex",
+        "openai_codex.llm_sequence": 4,
+        "openai_codex.model": "gpt-5-codex",
+        "openai_codex.operation": "Thread.runStreamed",
+        "openai_codex.thread_id": "<thread-id>",
+        "provider": "openai"
+      },
+      "metric_keys": [],
+      "name": "OpenAI Codex LLM",
+      "output": {
+        "message": "Codex STREAM_OK",
+        "reasoning": "final reasoning STREAM_OK"
+      },
+      "root_span_id": "<span:1>",
+      "span_id": "<span:13>",
+      "span_parents": [
+        "<span:5>"
+      ],
+      "type": "llm"
+    }
+  ],
   "root": {
     "has_input": false,
     "has_output": false,
@@ -109,7 +293,7 @@
       "metric_keys": [],
       "name": "tool: command_execution",
       "root_span_id": "<span:1>",
-      "span_id": "<span:6>",
+      "span_id": "<span:14>",
       "span_parents": [
         "<span:3>"
       ],
@@ -127,7 +311,7 @@
       "metric_keys": [],
       "name": "tool: read_file",
       "root_span_id": "<span:1>",
-      "span_id": "<span:7>",
+      "span_id": "<span:15>",
       "span_parents": [
         "<span:3>"
       ],
@@ -143,7 +327,7 @@
       "metric_keys": [],
       "name": "tool: web_search",
       "root_span_id": "<span:1>",
-      "span_id": "<span:8>",
+      "span_id": "<span:16>",
       "span_parents": [
         "<span:3>"
       ],
@@ -160,7 +344,7 @@
       "metric_keys": [],
       "name": "tool: command_execution",
       "root_span_id": "<span:1>",
-      "span_id": "<span:9>",
+      "span_id": "<span:17>",
       "span_parents": [
         "<span:5>"
       ],
@@ -178,7 +362,7 @@
       "metric_keys": [],
       "name": "tool: read_file",
       "root_span_id": "<span:1>",
-      "span_id": "<span:10>",
+      "span_id": "<span:18>",
       "span_parents": [
         "<span:5>"
       ],
@@ -194,7 +378,7 @@
       "metric_keys": [],
       "name": "tool: web_search",
       "root_span_id": "<span:1>",
-      "span_id": "<span:11>",
+      "span_id": "<span:19>",
       "span_parents": [
         "<span:5>"
       ],
diff --git a/e2e/scenarios/openai-codex-instrumentation/assertions.ts b/e2e/scenarios/openai-codex-instrumentation/assertions.ts
index 4d54f6f21..2fca6b76d 100644
--- a/e2e/scenarios/openai-codex-instrumentation/assertions.ts
+++ b/e2e/scenarios/openai-codex-instrumentation/assertions.ts
@@ -33,6 +33,7 @@ const METADATA_KEYS = [
   "operation",
   "scenario",
   "gen_ai.tool.name",
+  "openai_codex.llm_sequence",
   "openai_codex.operation",
   "openai_codex.model",
   "openai_codex.thread_id",
@@ -59,6 +60,29 @@ function summarizeSpan(event: CapturedLogEvent | undefined): Json {
   return summary;
 }
 
+function isRecord(value: unknown): value is Record<string, unknown> {
+  return typeof value === "object" && value !== null && !Array.isArray(value);
+}
+
+function summarizeLlmOutput(output: unknown): Json {
+  if (!isRecord(output)) {
+    return null;
+  }
+
+  return {
+    ...(typeof output.reasoning === "string"
+      ? { reasoning: output.reasoning }
+      : {}),
+    ...(typeof output.message === "string" ? { message: output.message } : {}),
+  } as Json;
+}
+
+function summarizeLlmSpan(event: CapturedLogEvent | undefined): Json {
+  const summary = summarizeSpan(event) as Record<string, Json>;
+  summary.output = summarizeLlmOutput(event?.output);
+  return summary as Json;
+}
+
 function findCodexTask(events: CapturedLogEvent[], operationName: string) {
   const operation = findLatestSpan(events, operationName);
   return [...events]
@@ -93,12 +117,44 @@ function latestSpansByType(
   });
 }
 
+function latestSpansForParent(
+  events: CapturedLogEvent[],
+  parentSpanId: string | undefined,
+): CapturedLogEvent[] {
+  if (!parentSpanId) {
+    return [];
+  }
+
+  const order: string[] = [];
+  const latest = new Map<string, CapturedLogEvent>();
+
+  for (const event of events) {
+    if (!event.span.id || !event.span.parentIds.includes(parentSpanId)) {
+      continue;
+    }
+    if (!latest.has(event.span.id)) {
+      order.push(event.span.id);
+    }
+    latest.set(event.span.id, event);
+  }
+
+  return order.flatMap((spanId) => {
+    const event = latest.get(spanId);
+    return event ? [event] : [];
+  });
+}
+
+function childSpanLabel(event: CapturedLogEvent): string {
+  return event.span.type === "llm" ? "llm" : (event.span.name ?? "");
+}
+
 function summarize(events: CapturedLogEvent[]): Json {
   const runTask = findCodexTask(events, "openai-codex-run-operation");
   const streamedTask = findCodexTask(
     events,
     "openai-codex-run-streamed-operation",
   );
+  const llmSpans = latestSpansByType(events, "llm");
   const toolSpans = latestSpansByType(events, "tool");
 
   return normalizeForSnapshot({
@@ -115,6 +171,7 @@ function summarize(events: CapturedLogEvent[]): Json {
       ),
       task: summarizeSpan(streamedTask),
     },
+    llms: llmSpans.map(summarizeLlmSpan),
     tools: toolSpans.map(summarizeSpan),
   } as Json);
 }
@@ -169,6 +226,52 @@ export function defineOpenAICodexInstrumentationAssertions(options: {
       }
     });
 
+    test("captures LLM spans around tool calls", testConfig, () => {
+      const llmSpans = latestSpansByType(events, "llm");
+
+      expect(llmSpans).toHaveLength(8);
+      expect(
+        llmSpans.every((event) => event.span.name === "OpenAI Codex LLM"),
+      ).toBe(true);
+      expect(
+        llmSpans.some((event) => {
+          const output = event.output as
+            | { message?: string; reasoning?: string }
+            | undefined;
+          return (
+            output?.reasoning === "final reasoning RUN_OK" &&
+            output.message === "Codex RUN_OK"
+          );
+        }),
+      ).toBe(true);
+      expect(
+        llmSpans.some((event) => {
+          const output = event.output as
+            | { message?: string; reasoning?: string }
+            | undefined;
+          return output?.reasoning === "reasoning after command STREAM_OK";
+        }),
+      ).toBe(true);
+
+      for (const operationName of [
+        "openai-codex-run-operation",
+        "openai-codex-run-streamed-operation",
+      ]) {
+        const task = findCodexTask(events, operationName);
+        expect(
+          latestSpansForParent(events, task?.span.id).map(childSpanLabel),
+        ).toEqual([
+          "llm",
+          "tool: command_execution",
+          "llm",
+          "tool: read_file",
+          "llm",
+          "tool: web_search",
+          "llm",
+        ]);
+      }
+    });
+
     test("captures command and MCP tool spans", testConfig, () => {
       const toolSpans = latestSpansByType(events, "tool");
 
diff --git a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
index 510abdf04..e7243154d 100755
--- a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
+++ b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
@@ -13,6 +13,14 @@ process.stdin.on("end", () => {
   const events = [
     { type: "thread.started", thread_id: threadId },
     { type: "turn.started" },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_reasoning_before_command`,
+        type: "reasoning",
+        text: `reasoning before command ${suffix}`,
+      },
+    },
     {
       type: "item.started",
       item: {
@@ -34,6 +42,14 @@ process.stdin.on("end", () => {
         status: "completed",
       },
     },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_reasoning_after_command`,
+        type: "reasoning",
+        text: `reasoning after command ${suffix}`,
+      },
+    },
     {
       type: "item.started",
       item: {
@@ -60,6 +76,14 @@ process.stdin.on("end", () => {
         status: "completed",
       },
     },
+    {
+      type: "item.completed",
+      item: {
+        id: `${threadId}_reasoning_after_mcp`,
+        type: "reasoning",
+        text: `reasoning after mcp ${suffix}`,
+      },
+    },
     {
       type: "item.completed",
       item: {
@@ -73,7 +97,7 @@ process.stdin.on("end", () => {
       item: {
         id: `${threadId}_reasoning`,
         type: "reasoning",
-        text: `reasoning ${suffix}`,
+        text: `final reasoning ${suffix}`,
       },
     },
     {
diff --git a/js/src/instrumentation/plugins/openai-codex-plugin.ts b/js/src/instrumentation/plugins/openai-codex-plugin.ts
index deffe8187..587bf901a 100644
--- a/js/src/instrumentation/plugins/openai-codex-plugin.ts
+++ b/js/src/instrumentation/plugins/openai-codex-plugin.ts
@@ -24,10 +24,13 @@ import type {
 } from "../../vendor-sdk-types/openai-codex";
 
 type CodexRunState = {
+  activeLlmSpan?: CodexLlmSpanState;
   activeItemSpans: Map<string, Span>;
   completedItems: OpenAICodexThreadItem[];
   finalResponse?: string;
   finalized: boolean;
+  input: unknown;
+  llmSequence: number;
   metadata: Record<string, unknown>;
   metrics: Record<string, number>;
   outputText: string[];
@@ -35,6 +38,15 @@ type CodexRunState = {
   startTime: number;
 };
 
+type CodexLlmSpanState = {
+  anonymousMessages: string[];
+  anonymousReasoning: string[];
+  messagesById: Map<string, string>;
+  metadata: Record<string, unknown>;
+  reasoningById: Map<string, string>;
+  span: Span;
+};
+
 const PATCHED_STREAMED_TURN = Symbol.for(
   "braintrust.openai-codex.patched-streamed-turn",
 );
@@ -130,6 +142,7 @@ function startCodexRun(
   const input = event.arguments[0];
   const turnOptions = event.arguments[1];
   const thread = event.thread ?? extractThreadFromEvent(event);
+  const sanitizedInput = sanitizeInput(input);
   const metadata = {
     ...extractThreadMetadata(thread),
     ...extractTurnOptionsMetadata(turnOptions),
@@ -145,7 +158,7 @@ function startCodexRun(
   });
   const startTime = getCurrentUnixTimestamp();
   safeLog(span, {
-    input: sanitizeInput(input),
+    input: sanitizedInput,
     metadata,
   });
 
@@ -153,6 +166,8 @@ function startCodexRun(
     activeItemSpans: new Map(),
     completedItems: [],
     finalized: false,
+    input: sanitizedInput,
+    llmSequence: 0,
     metadata,
     metrics: {},
     outputText: [],
@@ -228,15 +243,13 @@ async function handleCodexEvent(
       });
       return;
     case "item.started":
-      await startCodexItemSpan(state, event.item);
+      await handleCodexItemStarted(state, event.item);
       return;
     case "item.updated":
-      updateCodexItem(state, event.item);
+      await handleCodexItemUpdated(state, event.item);
       return;
     case "item.completed":
-      state.completedItems.push(event.item);
-      collectOutputText(state, event.item);
-      await finishCodexItemSpan(state, event.item);
+      await handleCodexItemCompleted(state, event.item);
       return;
     case "error":
       await finalizeCodexRun(state, { error: event.message });
@@ -259,9 +272,7 @@ async function finalizeCompletedRun(
   state.finalResponse = turn.finalResponse;
 
   for (const item of turn.items ?? []) {
-    state.completedItems.push(item);
-    collectOutputText(state, item);
-    await createCompletedItemSpan(state, item);
+    await handleCodexItemCompleted(state, item);
   }
 
   await finalizeCodexRun(state, { output: turn.finalResponse });
@@ -288,6 +299,8 @@ async function finalizeCodexRun(
     ...buildDurationMetrics(state.startTime),
   };
 
+  await finishActiveLlmSpan(state, params.error);
+
   try {
     const error = params.error;
     safeLog(state.span, {
@@ -304,6 +317,43 @@ async function finalizeCodexRun(
   }
 }
 
+async function handleCodexItemStarted(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<void> {
+  if (isCodexToolItem(item)) {
+    await finishActiveLlmSpan(state);
+    await startCodexItemSpan(state, item);
+    return;
+  }
+
+  await recordCodexLlmItem(state, item, { allowAnonymousText: false });
+}
+
+async function handleCodexItemUpdated(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<void> {
+  updateCodexItem(state, item);
+  await recordCodexLlmItem(state, item, { allowAnonymousText: false });
+}
+
+async function handleCodexItemCompleted(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+): Promise<void> {
+  state.completedItems.push(item);
+  collectOutputText(state, item);
+
+  if (isCodexToolItem(item)) {
+    await finishActiveLlmSpan(state);
+    await finishCodexItemSpan(state, item);
+    return;
+  }
+
+  await recordCodexLlmItem(state, item, { allowAnonymousText: true });
+}
+
 async function createCompletedItemSpan(
   state: CodexRunState,
   item: OpenAICodexThreadItem,
@@ -318,6 +368,126 @@ async function createCompletedItemSpan(
   span.end();
 }
 
+async function recordCodexLlmItem(
+  state: CodexRunState,
+  item: OpenAICodexThreadItem,
+  options: { allowAnonymousText: boolean },
+): Promise<void> {
+  if (item.type !== "agent_message" && item.type !== "reasoning") {
+    return;
+  }
+
+  const text = typeof item.text === "string" ? item.text : undefined;
+  const active = await ensureActiveLlmSpan(state);
+  if (!text) {
+    return;
+  }
+
+  if (item.type === "agent_message") {
+    if (item.id) {
+      active.messagesById.set(item.id, text);
+    } else if (options.allowAnonymousText) {
+      active.anonymousMessages.push(text);
+    }
+  } else if (item.id) {
+    active.reasoningById.set(item.id, text);
+  } else if (options.allowAnonymousText) {
+    active.anonymousReasoning.push(text);
+  }
+}
+
+async function ensureActiveLlmSpan(
+  state: CodexRunState,
+): Promise<CodexLlmSpanState> {
+  if (state.activeLlmSpan) {
+    return state.activeLlmSpan;
+  }
+
+  const sequence = state.llmSequence + 1;
+  state.llmSequence = sequence;
+  const metadata = {
+    ...(state.metadata.provider ? { provider: state.metadata.provider } : {}),
+    ...(state.metadata.model ? { model: state.metadata.model } : {}),
+    ...(state.metadata["openai_codex.model"]
+      ? { "openai_codex.model": state.metadata["openai_codex.model"] }
+      : {}),
+    ...(state.metadata["openai_codex.model_reasoning_effort"]
+      ? {
+          "openai_codex.model_reasoning_effort":
+            state.metadata["openai_codex.model_reasoning_effort"],
+        }
+      : {}),
+    ...(state.metadata["openai_codex.operation"]
+      ? { "openai_codex.operation": state.metadata["openai_codex.operation"] }
+      : {}),
+    ...(state.metadata["openai_codex.thread_id"]
+      ? { "openai_codex.thread_id": state.metadata["openai_codex.thread_id"] }
+      : {}),
+    "openai_codex.llm_sequence": sequence,
+  };
+
+  const span = startSpan({
+    event: {
+      ...(sequence === 1 ? { input: state.input } : {}),
+      metadata,
+    },
+    name: "OpenAI Codex LLM",
+    parent: await state.span.export(),
+    spanAttributes: { type: SpanTypeAttribute.LLM },
+  });
+
+  state.activeLlmSpan = {
+    anonymousMessages: [],
+    anonymousReasoning: [],
+    messagesById: new Map(),
+    metadata,
+    reasoningById: new Map(),
+    span,
+  };
+  return state.activeLlmSpan;
+}
+
+async function finishActiveLlmSpan(
+  state: CodexRunState,
+  error?: unknown,
+): Promise<void> {
+  const active = state.activeLlmSpan;
+  if (!active) {
+    return;
+  }
+
+  state.activeLlmSpan = undefined;
+  const output = buildLlmOutput(active);
+  safeLog(active.span, {
+    ...(error
+      ? { error: error instanceof Error ? error.message : String(error) }
+      : {}),
+    metadata: active.metadata,
+    ...(output ? { output } : {}),
+  });
+  active.span.end();
+}
+
+function buildLlmOutput(
+  active: CodexLlmSpanState,
+): Record<string, string> | undefined {
+  const reasoning = [
+    ...active.reasoningById.values(),
+    ...active.anonymousReasoning,
+  ]
+    .filter((text) => text.length > 0)
+    .join("\n");
+  const message = [...active.messagesById.values(), ...active.anonymousMessages]
+    .filter((text) => text.length > 0)
+    .join("\n");
+  const output = {
+    ...(reasoning ? { reasoning } : {}),
+    ...(message ? { message } : {}),
+  };
+
+  return Object.keys(output).length > 0 ? output : undefined;
+}
+
 async function startCodexItemSpan(
   state: CodexRunState,
   item: OpenAICodexThreadItem,
@@ -366,6 +536,15 @@ async function finishCodexItemSpan(
   span.end();
 }
 
+function isCodexToolItem(item: OpenAICodexThreadItem): boolean {
+  return (
+    item.type === "command_execution" ||
+    item.type === "file_change" ||
+    item.type === "mcp_tool_call" ||
+    item.type === "web_search"
+  );
+}
+
 async function itemSpanArgs(
   state: CodexRunState,
   item: OpenAICodexThreadItem,

From 0a78b8c04c0aef209b094e9e17c734ecbaf4a9bd Mon Sep 17 00:00:00 2001
From: Luca Forstner <luca.forstner@gmail.com>
Date: Tue, 5 May 2026 15:25:59 -0700
Subject: [PATCH 5/5] actually run codex

---
 ...nai-codex-v0128-auto-hook.span-events.json |  20 +-
 ...penai-codex-v0128-wrapped.span-events.json |  20 +-
 .../assertions.ts                             | 357 ++++++++++++------
 .../mock-codex-cli.mjs                        |  15 +-
 .../scenario.impl.mjs                         | 206 ++++++++--
 .../scenario.test.ts                          |  69 ++--
 6 files changed, 473 insertions(+), 214 deletions(-)

diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
index 36e8471bd..df218b703 100644
--- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
+++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-auto-hook.span-events.json
@@ -13,7 +13,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning before command RUN_OK"
+        "reasoning": "reasoning before command OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:6>",
@@ -35,7 +35,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after command RUN_OK"
+        "reasoning": "reasoning after command OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:7>",
@@ -57,7 +57,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after mcp RUN_OK"
+        "reasoning": "reasoning after mcp OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:8>",
@@ -79,8 +79,8 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "message": "Codex RUN_OK",
-        "reasoning": "final reasoning RUN_OK"
+        "message": "Codex OPENAI_CODEX_RUN_OK",
+        "reasoning": "final reasoning OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:9>",
@@ -103,7 +103,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning before command STREAM_OK"
+        "reasoning": "reasoning before command OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:10>",
@@ -126,7 +126,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after command STREAM_OK"
+        "reasoning": "reasoning after command OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:11>",
@@ -149,7 +149,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after mcp STREAM_OK"
+        "reasoning": "reasoning after mcp OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:12>",
@@ -172,8 +172,8 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "message": "Codex STREAM_OK",
-        "reasoning": "final reasoning STREAM_OK"
+        "message": "Codex OPENAI_CODEX_STREAM_OK",
+        "reasoning": "final reasoning OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:13>",
diff --git a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
index 36e8471bd..df218b703 100644
--- a/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
+++ b/e2e/scenarios/openai-codex-instrumentation/__snapshots__/openai-codex-v0128-wrapped.span-events.json
@@ -13,7 +13,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning before command RUN_OK"
+        "reasoning": "reasoning before command OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:6>",
@@ -35,7 +35,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after command RUN_OK"
+        "reasoning": "reasoning after command OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:7>",
@@ -57,7 +57,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after mcp RUN_OK"
+        "reasoning": "reasoning after mcp OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:8>",
@@ -79,8 +79,8 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "message": "Codex RUN_OK",
-        "reasoning": "final reasoning RUN_OK"
+        "message": "Codex OPENAI_CODEX_RUN_OK",
+        "reasoning": "final reasoning OPENAI_CODEX_RUN_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:9>",
@@ -103,7 +103,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning before command STREAM_OK"
+        "reasoning": "reasoning before command OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:10>",
@@ -126,7 +126,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after command STREAM_OK"
+        "reasoning": "reasoning after command OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:11>",
@@ -149,7 +149,7 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "reasoning": "reasoning after mcp STREAM_OK"
+        "reasoning": "reasoning after mcp OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:12>",
@@ -172,8 +172,8 @@
       "metric_keys": [],
       "name": "OpenAI Codex LLM",
       "output": {
-        "message": "Codex STREAM_OK",
-        "reasoning": "final reasoning STREAM_OK"
+        "message": "Codex OPENAI_CODEX_STREAM_OK",
+        "reasoning": "final reasoning OPENAI_CODEX_STREAM_OK"
       },
       "root_span_id": "<span:1>",
       "span_id": "<span:13>",
diff --git a/e2e/scenarios/openai-codex-instrumentation/assertions.ts b/e2e/scenarios/openai-codex-instrumentation/assertions.ts
index 2fca6b76d..cb859d997 100644
--- a/e2e/scenarios/openai-codex-instrumentation/assertions.ts
+++ b/e2e/scenarios/openai-codex-instrumentation/assertions.ts
@@ -1,12 +1,12 @@
 import { beforeAll, describe, expect, test } from "vitest";
-import { E2E_TAGS } from "../../helpers/tags";
-import { normalizeForSnapshot, type Json } from "../../helpers/normalize";
-import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server";
 import {
   formatJsonFileSnapshot,
   resolveFileSnapshotPath,
 } from "../../helpers/file-snapshot";
+import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server";
+import { normalizeForSnapshot, type Json } from "../../helpers/normalize";
 import { withScenarioHarness } from "../../helpers/scenario-harness";
+import { E2E_TAGS } from "../../helpers/tags";
 import { findLatestSpan } from "../../helpers/trace-selectors";
 import { summarizeWrapperContract } from "../../helpers/wrapper-contract";
 import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs";
@@ -14,6 +14,7 @@ import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs";
 type RunOpenAICodexScenario = (harness: {
   runNodeScenarioDir: (options: {
     entry: string;
+    env?: Record<string, string>;
     nodeArgs: string[];
     runContext?: { variantKey: string };
     scenarioDir: string;
@@ -21,12 +22,25 @@ type RunOpenAICodexScenario = (harness: {
   }) => Promise<unknown>;
   runScenarioDir: (options: {
     entry: string;
+    env?: Record<string, string>;
     runContext?: { variantKey: string };
     scenarioDir: string;
     timeoutMs: number;
   }) => Promise<unknown>;
 }) => Promise<void>;
 
+type CodexScenarioMode = "mock" | "real";
+
+const OPERATION_NAMES = [
+  "openai-codex-run-operation",
+  "openai-codex-run-streamed-operation",
+] as const;
+
+const EXPECTED_MARKERS = {
+  "openai-codex-run-operation": "OPENAI_CODEX_RUN_OK",
+  "openai-codex-run-streamed-operation": "OPENAI_CODEX_STREAM_OK",
+} as const;
+
 const METADATA_KEYS = [
   "provider",
   "model",
@@ -43,46 +57,6 @@ const METADATA_KEYS = [
   "openai_codex.mcp.status",
 ] as const;
 
-function summarizeSpan(event: CapturedLogEvent | undefined): Json {
-  if (!event) {
-    return null;
-  }
-  const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record<
-    string,
-    Json
-  >;
-  if (summary.metadata && typeof summary.metadata === "object") {
-    const metadata = summary.metadata as Record<string, Json>;
-    if (typeof metadata["openai_codex.thread_id"] === "string") {
-      metadata["openai_codex.thread_id"] = "<thread-id>";
-    }
-  }
-  return summary;
-}
-
-function isRecord(value: unknown): value is Record<string, unknown> {
-  return typeof value === "object" && value !== null && !Array.isArray(value);
-}
-
-function summarizeLlmOutput(output: unknown): Json {
-  if (!isRecord(output)) {
-    return null;
-  }
-
-  return {
-    ...(typeof output.reasoning === "string"
-      ? { reasoning: output.reasoning }
-      : {}),
-    ...(typeof output.message === "string" ? { message: output.message } : {}),
-  } as Json;
-}
-
-function summarizeLlmSpan(event: CapturedLogEvent | undefined): Json {
-  const summary = summarizeSpan(event) as Record<string, Json>;
-  summary.output = summarizeLlmOutput(event?.output);
-  return summary as Json;
-}
-
 function findCodexTask(events: CapturedLogEvent[], operationName: string) {
   const operation = findLatestSpan(events, operationName);
   return [...events]
@@ -144,10 +118,83 @@ function latestSpansForParent(
   });
 }
 
+function expectPositiveMetric(
+  event: CapturedLogEvent | undefined,
+  keys: string[],
+): void {
+  const hasPositiveMetric = keys.some((key) => {
+    const value = event?.metrics?.[key];
+    return typeof value === "number" && value > 0;
+  });
+
+  expect(hasPositiveMetric).toBe(true);
+}
+
+function outputText(event: CapturedLogEvent | undefined): string {
+  return typeof event?.output === "string"
+    ? event.output
+    : JSON.stringify(event?.output ?? "");
+}
+
+function sequenceNumber(event: CapturedLogEvent): number | undefined {
+  const value = event.metadata?.["openai_codex.llm_sequence"];
+  return typeof value === "number" ? value : undefined;
+}
+
 function childSpanLabel(event: CapturedLogEvent): string {
   return event.span.type === "llm" ? "llm" : (event.span.name ?? "");
 }
 
+function llmOutput(event: CapturedLogEvent): {
+  message?: string;
+  reasoning?: string;
+} {
+  return event.output &&
+    typeof event.output === "object" &&
+    !Array.isArray(event.output)
+    ? (event.output as { message?: string; reasoning?: string })
+    : {};
+}
+
+function summarizeSpan(event: CapturedLogEvent | undefined): Json {
+  if (!event) {
+    return null;
+  }
+  const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record<
+    string,
+    Json
+  >;
+  if (summary.metadata && typeof summary.metadata === "object") {
+    const metadata = summary.metadata as Record<string, Json>;
+    if (typeof metadata["openai_codex.thread_id"] === "string") {
+      metadata["openai_codex.thread_id"] = "<thread-id>";
+    }
+  }
+  return summary;
+}
+
+function summarizeLlmOutput(output: unknown): Json {
+  if (typeof output !== "object" || output === null || Array.isArray(output)) {
+    return null;
+  }
+  const outputRecord = output as { message?: unknown; reasoning?: unknown };
+
+  return {
+    ...(typeof outputRecord.reasoning === "string"
+      ? { reasoning: outputRecord.reasoning }
+      : {}),
+    ...(typeof outputRecord.message === "string"
+      ? { message: outputRecord.message }
+      : {}),
+  } as Json;
+}
+
+function summarizeLlmSpan(event: CapturedLogEvent | undefined): Json {
+  const summary = summarizeSpan(event) as Record<string, Json>;
+  summary.output = summarizeLlmOutput(event?.output);
+  return summary as Json;
+}
+
 function summarize(events: CapturedLogEvent[]): Json {
   const runTask = findCodexTask(events, "openai-codex-run-operation");
   const streamedTask = findCodexTask(
@@ -176,19 +223,31 @@ function summarize(events: CapturedLogEvent[]): Json {
   } as Json);
 }
 
+function mockSnapshotPath(options: {
+  snapshotName?: string;
+  testFileUrl?: string;
+}): string {
+  if (!options.snapshotName || !options.testFileUrl) {
+    throw new Error(
+      "Mock OpenAI Codex instrumentation assertions require snapshotName and testFileUrl",
+    );
+  }
+  return resolveFileSnapshotPath(
+    options.testFileUrl,
+    `${options.snapshotName}.span-events.json`,
+  );
+}
+
 export function defineOpenAICodexInstrumentationAssertions(options: {
+  mode: CodexScenarioMode;
   name: string;
   runScenario: RunOpenAICodexScenario;
-  snapshotName: string;
-  testFileUrl: string;
+  snapshotName?: string;
+  testFileUrl?: string;
   timeoutMs: number;
 }): void {
-  const snapshotPath = resolveFileSnapshotPath(
-    options.testFileUrl,
-    `${options.snapshotName}.span-events.json`,
-  );
   const testConfig = {
-    tags: [E2E_TAGS.hermetic],
+    ...(options.mode === "mock" ? { tags: [E2E_TAGS.hermetic] } : {}),
     timeout: options.timeoutMs,
   };
 
@@ -210,10 +269,7 @@ export function defineOpenAICodexInstrumentationAssertions(options: {
     });
 
     test("captures Codex task spans", testConfig, () => {
-      for (const operationName of [
-        "openai-codex-run-operation",
-        "openai-codex-run-streamed-operation",
-      ]) {
+      for (const operationName of OPERATION_NAMES) {
         const operation = findLatestSpan(events, operationName);
         const task = findCodexTask(events, operationName);
 
@@ -223,94 +279,149 @@ export function defineOpenAICodexInstrumentationAssertions(options: {
         expect(task?.row.metadata).toMatchObject({
           provider: "openai",
         });
+        expect(task?.row.metadata?.["openai_codex.model"]).toEqual(
+          expect.any(String),
+        );
       }
     });
 
-    test("captures LLM spans around tool calls", testConfig, () => {
+    test("captures dynamic LLM spans for each Codex turn", testConfig, () => {
       const llmSpans = latestSpansByType(events, "llm");
 
-      expect(llmSpans).toHaveLength(8);
+      expect(llmSpans.length).toBeGreaterThanOrEqual(OPERATION_NAMES.length);
       expect(
         llmSpans.every((event) => event.span.name === "OpenAI Codex LLM"),
       ).toBe(true);
-      expect(
-        llmSpans.some((event) => {
-          const output = event.output as
-            | { message?: string; reasoning?: string }
-            | undefined;
-          return (
-            output?.reasoning === "final reasoning RUN_OK" &&
-            output.message === "Codex RUN_OK"
-          );
-        }),
-      ).toBe(true);
-      expect(
-        llmSpans.some((event) => {
-          const output = event.output as
-            | { message?: string; reasoning?: string }
-            | undefined;
-          return output?.reasoning === "reasoning after command STREAM_OK";
-        }),
-      ).toBe(true);
 
-      for (const operationName of [
-        "openai-codex-run-operation",
-        "openai-codex-run-streamed-operation",
-      ]) {
+      for (const operationName of OPERATION_NAMES) {
         const task = findCodexTask(events, operationName);
-        expect(
-          latestSpansForParent(events, task?.span.id).map(childSpanLabel),
-        ).toEqual([
-          "llm",
-          "tool: command_execution",
-          "llm",
-          "tool: read_file",
-          "llm",
-          "tool: web_search",
-          "llm",
-        ]);
+        const childSpans = latestSpansForParent(events, task?.span.id);
+        const taskLlmSpans = childSpans.filter(
+          (event) => event.span.type === "llm",
+        );
+        const sequences = taskLlmSpans
+          .map(sequenceNumber)
+          .filter((value): value is number => value !== undefined);
+
+        expect(taskLlmSpans.length).toBeGreaterThanOrEqual(1);
+        expect(sequences[0]).toBe(1);
+        expect(sequences).toEqual([...sequences].sort((a, b) => a - b));
+        expect(taskLlmSpans.some((event) => outputText(event).length > 2)).toBe(
+          true,
+        );
       }
     });
 
-    test("captures command and MCP tool spans", testConfig, () => {
-      const toolSpans = latestSpansByType(events, "tool");
+    test(
+      "captures Codex tool spans when the agent uses tools",
+      testConfig,
+      () => {
+        const toolSpans = latestSpansByType(events, "tool");
 
-      expect(
-        toolSpans.some(
-          (event) =>
-            event.span.name === "tool: command_execution" &&
-            event.output === "codex_tool_ok",
-        ),
-      ).toBe(true);
-      expect(
-        toolSpans.some(
-          (event) =>
-            event.span.name === "tool: read_file" &&
-            event.metadata?.["openai_codex.mcp.server"] === "filesystem",
-        ),
-      ).toBe(true);
-    });
+        expect(toolSpans.length).toBeGreaterThanOrEqual(OPERATION_NAMES.length);
+        expect(
+          toolSpans.some(
+            (event) => event.span.name === "tool: command_execution",
+          ),
+        ).toBe(true);
+
+        for (const operationName of OPERATION_NAMES) {
+          const task = findCodexTask(events, operationName);
+          const childSpans = latestSpansForParent(events, task?.span.id);
+          const childTypes = childSpans.map((event) => event.span.type);
+
+          expect(childTypes).toContain("llm");
+          expect(childTypes).toContain("tool");
+        }
+      },
+    );
 
     test("captures final responses and usage metrics", testConfig, () => {
-      const runTask = findCodexTask(events, "openai-codex-run-operation");
-      const streamedTask = findCodexTask(
-        events,
-        "openai-codex-run-streamed-operation",
+      for (const operationName of OPERATION_NAMES) {
+        const task = findCodexTask(events, operationName);
+
+        expect(outputText(task)).toContain(EXPECTED_MARKERS[operationName]);
+        expectPositiveMetric(task, [
+          "tokens",
+          "prompt_tokens",
+          "completion_tokens",
+        ]);
+      }
+    });
+
+    if (options.mode === "mock") {
+      test(
+        "captures deterministic mock LLM and tool details",
+        testConfig,
+        () => {
+          const llmSpans = latestSpansByType(events, "llm");
+          const toolSpans = latestSpansByType(events, "tool");
+
+          expect(llmSpans).toHaveLength(8);
+          expect(
+            llmSpans.some((event) => {
+              const output = llmOutput(event);
+              return (
+                output.reasoning === "final reasoning OPENAI_CODEX_RUN_OK" &&
+                output.message === "Codex OPENAI_CODEX_RUN_OK"
+              );
+            }),
+          ).toBe(true);
+          expect(
+            llmSpans.some(
+              (event) =>
+                llmOutput(event).reasoning ===
+                "reasoning after command OPENAI_CODEX_STREAM_OK",
+            ),
+          ).toBe(true);
+
+          for (const operationName of OPERATION_NAMES) {
+            const task = findCodexTask(events, operationName);
+            expect(
+              latestSpansForParent(events, task?.span.id).map(childSpanLabel),
+            ).toEqual([
+              "llm",
+              "tool: command_execution",
+              "llm",
+              "tool: read_file",
+              "llm",
+              "tool: web_search",
+              "llm",
+            ]);
+          }
+
+          expect(
+            toolSpans.some(
+              (event) =>
+                event.span.name === "tool: command_execution" &&
+                event.output === "codex_tool_ok",
+            ),
+          ).toBe(true);
+          expect(
+            toolSpans.some(
+              (event) =>
+                event.span.name === "tool: read_file" &&
+                event.metadata?.["openai_codex.mcp.server"] === "filesystem",
+            ),
+          ).toBe(true);
+        },
       );
 
-      expect(runTask?.output).toContain("RUN_OK");
-      expect(streamedTask?.output).toContain("STREAM_OK");
-      expect(runTask?.metrics).toMatchObject({
-        completion_tokens: 7,
-        prompt_cached_tokens: 3,
-        prompt_tokens: 11,
+      test("captures deterministic mock usage metrics", testConfig, () => {
+        const runTask = findCodexTask(events, "openai-codex-run-operation");
+
+        expect(runTask?.metrics).toMatchObject({
+          completion_tokens: 7,
+          prompt_cached_tokens: 3,
+          prompt_tokens: 11,
+        });
       });
-    });
 
-    test("matches the shared span snapshot", testConfig, async () => {
-      await expect(
-        formatJsonFileSnapshot(summarize(events)),
-      ).toMatchFileSnapshot(snapshotPath);
-    });
+      test("matches the mock span snapshot", testConfig, async () => {
+        await expect(
+          formatJsonFileSnapshot(summarize(events)),
+        ).toMatchFileSnapshot(mockSnapshotPath(options));
+      });
+    }
   });
 }
diff --git a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
index e7243154d..c0506f887 100755
--- a/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
+++ b/e2e/scenarios/openai-codex-instrumentation/mock-codex-cli.mjs
@@ -7,8 +7,9 @@ process.stdin.on("data", (chunk) => {
   input += chunk;
 });
 process.stdin.on("end", () => {
-  const isStream = input.includes("stream");
-  const suffix = isStream ? "STREAM_OK" : "RUN_OK";
+  const isStream =
+    input.includes("OPENAI_CODEX_STREAM_OK") || input.includes("stream");
+  const marker = isStream ? "OPENAI_CODEX_STREAM_OK" : "OPENAI_CODEX_RUN_OK";
   const threadId = isStream ? "thread_stream" : "thread_run";
   const events = [
     { type: "thread.started", thread_id: threadId },
@@ -18,7 +19,7 @@ process.stdin.on("end", () => {
       item: {
         id: `${threadId}_reasoning_before_command`,
         type: "reasoning",
-        text: `reasoning before command ${suffix}`,
+        text: `reasoning before command ${marker}`,
       },
     },
     {
@@ -47,7 +48,7 @@ process.stdin.on("end", () => {
       item: {
         id: `${threadId}_reasoning_after_command`,
         type: "reasoning",
-        text: `reasoning after command ${suffix}`,
+        text: `reasoning after command ${marker}`,
       },
     },
     {
@@ -81,7 +82,7 @@ process.stdin.on("end", () => {
       item: {
         id: `${threadId}_reasoning_after_mcp`,
         type: "reasoning",
-        text: `reasoning after mcp ${suffix}`,
+        text: `reasoning after mcp ${marker}`,
       },
     },
     {
@@ -97,7 +98,7 @@ process.stdin.on("end", () => {
       item: {
         id: `${threadId}_reasoning`,
         type: "reasoning",
-        text: `final reasoning ${suffix}`,
+        text: `final reasoning ${marker}`,
       },
     },
     {
@@ -105,7 +106,7 @@ process.stdin.on("end", () => {
       item: {
         id: `${threadId}_message`,
         type: "agent_message",
-        text: `Codex ${suffix}`,
+        text: `Codex ${marker}`,
       },
     },
     {
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs
index c63ae20e6..2444db4a4 100644
--- a/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.impl.mjs
@@ -4,6 +4,8 @@ import {
   runOperation,
   runTracedScenario,
 } from "../../helpers/provider-runtime.mjs";
+import { mkdtemp, readFile, rm, writeFile } from "node:fs/promises";
+import os from "node:os";
 import path from "node:path";
 import { fileURLToPath } from "node:url";
 
@@ -12,8 +14,82 @@ export const SCENARIO_NAME = "openai-codex-instrumentation";
 
 const SCENARIO_DIR = path.dirname(fileURLToPath(import.meta.url));
 const MOCK_CODEX_PATH = path.join(SCENARIO_DIR, "mock-codex-cli.mjs");
+const RUN_MARKER = "OPENAI_CODEX_RUN_OK";
+const STREAM_MARKER = "OPENAI_CODEX_STREAM_OK";
 
-function createClient(SDK) {
+function parseEnvLine(line) {
+  const trimmed = line.trim();
+  if (!trimmed || trimmed.startsWith("#")) {
+    return;
+  }
+
+  const withoutExport = trimmed.startsWith("export ")
+    ? trimmed.slice("export ".length).trim()
+    : trimmed;
+  const separator = withoutExport.indexOf("=");
+  if (separator <= 0) {
+    return;
+  }
+
+  const key = withoutExport.slice(0, separator).trim();
+  let value = withoutExport.slice(separator + 1).trim();
+  if (
+    (value.startsWith('"') && value.endsWith('"')) ||
+    (value.startsWith("'") && value.endsWith("'"))
+  ) {
+    value = value.slice(1, -1);
+  }
+  return { key, value };
+}
+
+async function loadRootEnv() {
+  const repoRoot = process.env.BRAINTRUST_E2E_REPO_ROOT;
+  if (!repoRoot) {
+    return;
+  }
+
+  let contents;
+  try {
+    contents = await readFile(path.join(repoRoot, ".env"), "utf8");
+  } catch {
+    return;
+  }
+
+  for (const line of contents.split(/\r?\n/)) {
+    const parsed = parseEnvLine(line);
+    if (parsed && process.env[parsed.key] === undefined) {
+      process.env[parsed.key] = parsed.value;
+    }
+  }
+}
+
+function stringEnv() {
+  return Object.fromEntries(
+    Object.entries(process.env).filter((entry) => entry[1] !== undefined),
+  );
+}
+
+function requireOpenAIKey() {
+  const apiKey = process.env.OPENAI_API_KEY;
+  if (!apiKey) {
+    throw new Error(
+      "OPENAI_API_KEY is required to run openai-codex-instrumentation against the real Codex SDK",
+    );
+  }
+  return apiKey;
+}
+
+function scenarioMode() {
+  const mode = process.env.OPENAI_CODEX_E2E_MODE ?? "mock";
+  if (mode !== "mock" && mode !== "real") {
+    throw new Error(
+      `OPENAI_CODEX_E2E_MODE must be "mock" or "real", received ${JSON.stringify(mode)}`,
+    );
+  }
+  return mode;
+}
+
+function createMockClient(SDK) {
   const { Codex } = SDK;
   return new Codex({
     apiKey: "test-key",
@@ -24,49 +100,111 @@ function createClient(SDK) {
   });
 }
 
-function startThread(client) {
+function createRealClient(SDK) {
+  const { Codex } = SDK;
+  return new Codex({
+    apiKey: requireOpenAIKey(),
+    env: stringEnv(),
+  });
+}
+
+function createClient(SDK, mode) {
+  return mode === "real" ? createRealClient(SDK) : createMockClient(SDK);
+}
+
+function startThread(client, mode, workingDirectory) {
   return client.startThread({
     approvalPolicy: "never",
-    model: "gpt-5-codex",
+    model: process.env.OPENAI_CODEX_E2E_MODEL ?? "gpt-5-codex",
     modelReasoningEffort: "low",
     networkAccessEnabled: false,
-    sandboxMode: "danger-full-access",
+    sandboxMode: mode === "real" ? "workspace-write" : "danger-full-access",
+    ...(mode === "real" ? { skipGitRepoCheck: true } : {}),
     webSearchMode: "disabled",
-    workingDirectory: process.cwd(),
+    workingDirectory,
   });
 }
 
+async function createWorkspace(marker) {
+  const workingDirectory = await mkdtemp(
+    path.join(os.tmpdir(), "braintrust-codex-e2e-"),
+  );
+  await writeFile(
+    path.join(workingDirectory, "codex-input.txt"),
+    `The final answer marker is ${marker}.\n`,
+    "utf8",
+  );
+  return workingDirectory;
+}
+
+function realPrompt(marker) {
+  return [
+    "You are running inside an SDK instrumentation test.",
+    "Before answering, use the shell to run `cat codex-input.txt`.",
+    "Then answer in one short sentence.",
+    `The final response must include the exact marker ${marker}.`,
+  ].join(" ");
+}
+
+function mockPrompt(marker, operation) {
+  return `Return Codex ${marker} after using a command in ${operation} mode.`;
+}
+
 async function runOpenAICodexScenario({ decorateSDK, sdk }) {
+  const mode = scenarioMode();
+  if (mode === "real") {
+    await loadRootEnv();
+  }
   const instrumentedSDK = decorateSDK ? decorateSDK(sdk) : sdk;
-  const client = createClient(instrumentedSDK);
-
-  await runTracedScenario({
-    callback: async () => {
-      await runOperation("openai-codex-run-operation", "run", async () => {
-        const thread = startThread(client);
-        await thread.run("Return Codex RUN_OK after using a command.");
-      });
-
-      await runOperation(
-        "openai-codex-run-streamed-operation",
-        "runStreamed",
-        async () => {
-          const thread = startThread(client);
-          const streamedTurn = await thread.runStreamed(
-            "Return Codex STREAM_OK after using a command in stream mode.",
-          );
-          await collectAsync(streamedTurn.events);
-        },
-      );
-    },
-    flushCount: 2,
-    flushDelayMs: 100,
-    metadata: {
-      scenario: SCENARIO_NAME,
-    },
-    projectNameBase: "e2e-openai-codex-instrumentation",
-    rootName: ROOT_NAME,
-  });
+  const client = createClient(instrumentedSDK, mode);
+  let runWorkingDirectory = process.cwd();
+  let streamedWorkingDirectory = process.cwd();
+  const runPrompt =
+    mode === "real" ? realPrompt(RUN_MARKER) : mockPrompt(RUN_MARKER, "run");
+  const streamedPrompt =
+    mode === "real"
+      ? realPrompt(STREAM_MARKER)
+      : mockPrompt(STREAM_MARKER, "stream");
+
+  try {
+    if (mode === "real") {
+      runWorkingDirectory = await createWorkspace(RUN_MARKER);
+      streamedWorkingDirectory = await createWorkspace(STREAM_MARKER);
+    }
+
+    await runTracedScenario({
+      callback: async () => {
+        await runOperation("openai-codex-run-operation", "run", async () => {
+          const thread = startThread(client, mode, runWorkingDirectory);
+          await thread.run(runPrompt);
+        });
+
+        await runOperation(
+          "openai-codex-run-streamed-operation",
+          "runStreamed",
+          async () => {
+            const thread = startThread(client, mode, streamedWorkingDirectory);
+            const streamedTurn = await thread.runStreamed(streamedPrompt);
+            await collectAsync(streamedTurn.events);
+          },
+        );
+      },
+      flushCount: 2,
+      flushDelayMs: 100,
+      metadata: {
+        scenario: SCENARIO_NAME,
+      },
+      projectNameBase: "e2e-openai-codex-instrumentation",
+      rootName: ROOT_NAME,
+    });
+  } finally {
+    if (mode === "real") {
+      await Promise.allSettled([
+        rm(runWorkingDirectory, { force: true, recursive: true }),
+        rm(streamedWorkingDirectory, { force: true, recursive: true }),
+      ]);
+    }
+  }
 }
 
 export async function runWrappedOpenAICodexInstrumentation(sdk) {
diff --git a/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts
index d23b8df49..1f2aeb80e 100644
--- a/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/openai-codex-instrumentation/scenario.test.ts
@@ -9,7 +9,8 @@ import { defineOpenAICodexInstrumentationAssertions } from "./assertions";
 const scenarioDir = await prepareScenarioDir({
   scenarioDir: resolveScenarioDir(import.meta.url),
 });
-const TIMEOUT_MS = 120_000;
+const TIMEOUT_MS = 240_000;
+const CODEX_SCENARIO_MODES = ["mock", "real"] as const;
 const openAICodexScenario = {
   autoEntry: "scenario.openai-codex-v0128.mjs",
   autoSnapshotName: "openai-codex-v0128-auto-hook",
@@ -24,36 +25,44 @@ const openAICodexScenario = {
 };
 
 describe("wrapped instrumentation", () => {
-  defineOpenAICodexInstrumentationAssertions({
-    name: `openai codex sdk ${openAICodexScenario.version}`,
-    runScenario: async ({ runScenarioDir }) => {
-      await runScenarioDir({
-        entry: openAICodexScenario.wrapperEntry,
-        runContext: { variantKey: openAICodexScenario.variantKey },
-        scenarioDir,
-        timeoutMs: TIMEOUT_MS,
-      });
-    },
-    snapshotName: openAICodexScenario.wrapperSnapshotName,
-    testFileUrl: import.meta.url,
-    timeoutMs: TIMEOUT_MS,
-  });
+  for (const mode of CODEX_SCENARIO_MODES) {
+    defineOpenAICodexInstrumentationAssertions({
+      mode,
+      name: `openai codex sdk ${openAICodexScenario.version} (${mode})`,
+      runScenario: async ({ runScenarioDir }) => {
+        await runScenarioDir({
+          entry: openAICodexScenario.wrapperEntry,
+          env: { OPENAI_CODEX_E2E_MODE: mode },
+          runContext: { variantKey: openAICodexScenario.variantKey },
+          scenarioDir,
+          timeoutMs: TIMEOUT_MS,
+        });
+      },
+      snapshotName: openAICodexScenario.wrapperSnapshotName,
+      testFileUrl: import.meta.url,
+      timeoutMs: TIMEOUT_MS,
+    });
+  }
 });
 
 describe("auto-hook instrumentation", () => {
-  defineOpenAICodexInstrumentationAssertions({
-    name: `openai codex sdk ${openAICodexScenario.version}`,
-    runScenario: async ({ runNodeScenarioDir }) => {
-      await runNodeScenarioDir({
-        entry: openAICodexScenario.autoEntry,
-        nodeArgs: ["--import", "braintrust/hook.mjs"],
-        runContext: { variantKey: openAICodexScenario.variantKey },
-        scenarioDir,
-        timeoutMs: TIMEOUT_MS,
-      });
-    },
-    snapshotName: openAICodexScenario.autoSnapshotName,
-    testFileUrl: import.meta.url,
-    timeoutMs: TIMEOUT_MS,
-  });
+  for (const mode of CODEX_SCENARIO_MODES) {
+    defineOpenAICodexInstrumentationAssertions({
+      mode,
+      name: `openai codex sdk ${openAICodexScenario.version} (${mode})`,
+      runScenario: async ({ runNodeScenarioDir }) => {
+        await runNodeScenarioDir({
+          entry: openAICodexScenario.autoEntry,
+          env: { OPENAI_CODEX_E2E_MODE: mode },
+          nodeArgs: ["--import", "braintrust/hook.mjs"],
+          runContext: { variantKey: openAICodexScenario.variantKey },
+          scenarioDir,
+          timeoutMs: TIMEOUT_MS,
+        });
+      },
+      snapshotName: openAICodexScenario.autoSnapshotName,
+      testFileUrl: import.meta.url,
+      timeoutMs: TIMEOUT_MS,
+    });
+  }
 });