braintrustdata · Cedric / ViaDézo1er (viadezo1er) · May 8, 2026 · May 8, 2026 · May 8, 2026 · May 8, 2026
diff --git a/.changeset/dirty-hotels-show.md b/.changeset/dirty-hotels-show.md
@@ -0,0 +1,9 @@
+---
+"braintrust": patch
+---
+
+OpenAI's `chat.completions.parse()` calls `create()._thenUnwrap(...)`, which produces two `APIPromise` instances sharing the same responsePromise.
+
+Our SDK called `.then()` on each of them, triggered a `parseResponse`, which calls `response.json()` on the HTTP response body. However a `Response` body can only be read once.
+
+Might fix a customer issue https://braintrustdata.slack.com/archives/C0B27UX9UDR/p1778223683547239
diff --git a/e2e/scenarios/openai-instrumentation/scenario.real-http.mjs b/e2e/scenarios/openai-instrumentation/scenario.real-http.mjs
@@ -0,0 +1,87 @@
+/**
+ * Reproduction scenario for "Body is unusable: Body has already been read".
+ *
+ * Uses a real Node.js HTTP server so OpenAI SDK receives genuine undici
+ * Response objects (not in-process mocks), matching the client's environment.
+ *
+ * Root cause: chat.completions.parse calls create()._thenUnwrap(...), producing
+ * two APIPromise instances (P1 from create, P2 from _thenUnwrap) that share the
+ * same responsePromise. Both create and parse are instrumented by hook.mjs, so
+ * tracePromise calls void P1.then(...) AND void P2.then(...). Each eventually
+ * calls response.json() on the same Response object. Mock/cassette responses
+ * allow multiple reads (in-memory buffer); real undici responses do not.
+ *
+ * Run with:
+ *   node --import braintrust/hook.mjs scenario.real-http.mjs
+ */
+import * as http from "node:http";
+import OpenAI from "openai";
+import { runMain } from "../../helpers/provider-runtime.mjs";
+
+const CHAT_RESPONSE = JSON.stringify({
+  id: "chatcmpl-fixture",
+  object: "chat.completion",
+  created: 1740000000,
+  model: "gpt-4o-mini-2024-07-18",
+  choices: [
+    {
+      index: 0,
+      message: { role: "assistant", content: '{"answer":4}' },
+      finish_reason: "stop",
+    },
+  ],
+  usage: { prompt_tokens: 10, completion_tokens: 2, total_tokens: 12 },
+});
+
+function startServer() {
+  return new Promise((resolve, reject) => {
+    const server = http.createServer((req, res) => {
+      let body = "";
+      req.on("data", (chunk) => (body += chunk));
+      req.on("end", () => {
+        res.writeHead(200, { "content-type": "application/json" });
+        res.end(CHAT_RESPONSE);
+      });
+    });
+    server.on("error", reject);
+    server.listen(0, "127.0.0.1", () => {
+      const { port } = server.address();
+      resolve({ server, port });
+    });
+  });
+}
+
+runMain(async () => {
+  const { server, port } = await startServer();
+  try {
+    const client = new OpenAI({
+      apiKey: "test-key",
+      baseURL: `http://127.0.0.1:${port}/v1`,
+    });
+
+    // .parse() internally calls create()._thenUnwrap(...) — this is what triggers
+    // the double body read when both create and parse are instrumented.
+    const result = await client.chat.completions.parse({
+      model: "gpt-4o-mini-2024-07-18",
+      messages: [{ role: "user", content: "What is 2+2?" }],
+      response_format: {
+        type: "json_schema",
+        json_schema: {
+          name: "math_response",
+          schema: {
+            type: "object",
+            properties: { answer: { type: "number" } },
+            required: ["answer"],
+          },
+        },
+      },
+      max_tokens: 12,
+    });
+
+    if (!result.choices[0].message.parsed) {
+      throw new Error(`Unexpected response: ${JSON.stringify(result)}`);
+    }
+  } finally {
+    server.close();
+  }
+});
diff --git a/e2e/scenarios/openai-instrumentation/scenario.test.ts b/e2e/scenarios/openai-instrumentation/scenario.test.ts
@@ -1,8 +1,9 @@
-import { describe } from "vitest";
+import { describe, it } from "vitest";
 import {
   prepareScenarioDir,
   readInstalledPackageVersion,
   resolveScenarioDir,
+  runNodeScenarioDir,
 } from "../../helpers/scenario-harness";
 import { cassetteTagsFor } from "../../helpers/tags";
 import { defineOpenAIInstrumentationAssertions } from "./assertions";
@@ -43,6 +44,23 @@ const openaiScenarios = await Promise.all(
   })),
 );
 
+// Regression test: verify hook.mjs doesn't cause "Body already read" with real undici responses.
+// The cassette layer returns in-process Response mocks that mask this bug; this test bypasses it.
+describe("real HTTP server (undici responses)", () => {
+  it(
+    "hook.mjs does not cause 'Body already read' on non-streaming create()",
+    async () => {
+      await runNodeScenarioDir({
+        entry: "scenario.real-http.mjs",
+        nodeArgs: ["--import", "braintrust/hook.mjs"],
+        scenarioDir,
+        timeoutMs: TIMEOUT_MS,
+      });
+    },
+    TIMEOUT_MS,
+  );
+});
+
 for (const scenario of openaiScenarios) {
   const assertPrivateFieldMethodsOperation =
     !scenario.disablePrivateFieldMethodsAssertion;

diff --git a/js/src/auto-instrumentations/loader/cjs-patch.ts b/js/src/auto-instrumentations/loader/cjs-patch.ts
@@ -11,6 +11,7 @@ import * as NodeModule from "node:module";
 import { sep } from "node:path";
 import moduleDetailsFromPath from "module-details-from-path";
 import { getPackageName, getPackageVersion } from "./get-package-version.js";
+import { OPENAI_API_PROMISE_PATCH } from "./openai-api-promise-patch.js";
 
 export class ModulePatch {
   private packages: Set<string>;
@@ -52,10 +53,17 @@ export class ModulePatch {
           return self.originalCompile.apply(this, args);
         }
 
-        const version = getPackageVersion(resolvedModule.basedir);
-
-        // Normalize module path for WASM transformer (expects forward slashes)
+        // Patch OpenAI's APIPromise to prevent double-read of HTTP response bodies.
         const normalizedModulePath = resolvedModule.path.replace(/\\/g, "/");
+        if (
+          packageName === "openai" &&
+          normalizedModulePath.includes("api-promise")
+        ) {
+          args[0] = content + OPENAI_API_PROMISE_PATCH;
+          return self.originalCompile.apply(this, args);
+        }
+
+        const version = getPackageVersion(resolvedModule.basedir);
 
         const transformer = self.instrumentator.getTransformer(
           packageName,

diff --git a/js/src/auto-instrumentations/loader/esm-hook.mts b/js/src/auto-instrumentations/loader/esm-hook.mts
@@ -12,6 +12,7 @@ import {
 } from "@apm-js-collab/code-transformer";
 import moduleDetailsFromPath from "module-details-from-path";
 import { getPackageName, getPackageVersion } from "./get-package-version.js";
+import { OPENAI_API_PROMISE_PATCH } from "./openai-api-promise-patch.js";
 
 let instrumentator: any;
 let packages: Set<string>;
@@ -69,9 +70,26 @@ export async function resolve(
   return url;
 }
 
+function isOpenAIApiPromise(url: string): boolean {
+  return url.includes("/openai") && url.includes("api-promise");
+}
+
 export async function load(url: string, context: any, nextLoad: Function) {
   const result = await nextLoad(url, context);
 
+  // Patch OpenAI's APIPromise to prevent double-read of HTTP response bodies.
+  if (isOpenAIApiPromise(url)) {
+    if (result.format === "commonjs") {
+      const parsedUrl = new URL(result.responseURL ?? url);
+      result.source ??= await readFile(parsedUrl);
+    }
+    if (result.source) {
+      result.source = result.source.toString("utf8") + OPENAI_API_PROMISE_PATCH;
+      result.shortCircuit = true;
+    }
+    return result;
+  }
+
   if (!transformers.has(url)) {
     // No transformation needed for this module
     return result;

diff --git a/js/src/auto-instrumentations/loader/openai-api-promise-patch.ts b/js/src/auto-instrumentations/loader/openai-api-promise-patch.ts
@@ -0,0 +1,28 @@
+/**
+ * Appended to OpenAI's api-promise.mjs to make APIPromise.prototype.parseResponse
+ * idempotent per-instance. Without this, chat.completions.parse() triggers two
+ * instrumented .then() calls (one on the create() APIPromise, one on the
+ * _thenUnwrap() APIPromise) that share the same responsePromise, causing both to
+ * call defaultParseResponse → response.json() on the same undici Response body.
+ * Real HTTP responses can only be read once, so the second read throws
+ * "Body is unusable: Body has already been read".
+ */
+export const OPENAI_API_PROMISE_PATCH = `
+;(function __btPatchAPIPromise() {
+  if (typeof APIPromise === "undefined" || APIPromise.prototype.__btParsePatched) return;
+  APIPromise.prototype.__btParsePatched = true;
+  var _origThen = APIPromise.prototype.then;
+  APIPromise.prototype.then = function __btThen(onfulfilled, onrejected) {
+    if (!this.__btParseWrapped && Object.prototype.hasOwnProperty.call(this, "parseResponse")) {
+      this.__btParseWrapped = true;
+      var _origParse = this.parseResponse;
+      var _cached;
+      this.parseResponse = function() {
+        if (!_cached) _cached = _origParse.apply(this, arguments);
+        return _cached;
+      };
+    }
+    return _origThen.call(this, onfulfilled, onrejected);
+  };
+})();
+`;