From ef6b9e94130bf0227dccb98aed7a5f38904e6124 Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephenbelanger@s-belanger.localdomain>
Date: Wed, 6 May 2026 14:13:20 -0700
Subject: [PATCH 1/5] perf(e2e): parallelize scenario variants within test
 files

Each multi-variant e2e scenario (e.g. anthropic: 6 SDK versions, ai-sdk:
4 versions) previously ran all describe blocks sequentially inside one
Vitest file. Since each withScenarioHarness call starts an isolated mock
server on its own ephemeral port with a unique testRunId, the tests are
already concurrency-safe.

Wrap the outer for-loops in describe.concurrent so all SDK versions run
their beforeAll subprocess hooks in parallel (bounded by maxConcurrency:
5). Inner describe.sequential blocks keep "wrapped" and "auto-hook"
variants for the same version sequential, avoiding snapshot write races
on --update runs.

Also parallelize test:external in js/: the isolated ai-sdk-v5, ai-sdk-v6,
and claude-agent-sdk suites (each with their own node_modules) now run
concurrently via a new cross-platform run-parallel.mjs script, while
openai/anthropic/google-genai (shared node_modules + pnpm prune) remain
sequential.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../ai-sdk-instrumentation/scenario.test.ts   | 129 +++++++++---------
 .../scenario.test.ts                          |  96 ++++++-------
 .../scenario.test.ts                          |   4 +-
 .../cohere-instrumentation/scenario.test.ts   | 114 ++++++++--------
 .../scenario.test.ts                          |  84 ++++++------
 .../scenario.test.ts                          |  72 +++++-----
 .../scenario.test.ts                          |  72 +++++-----
 .../mistral-instrumentation/scenario.test.ts  | 116 ++++++++--------
 .../openai-instrumentation/scenario.test.ts   |  90 ++++++------
 .../scenario.test.ts                          |  84 ++++++------
 e2e/vitest.config.mts                         |   4 +
 js/package.json                               |   3 +-
 js/scripts/run-parallel.mjs                   |  46 +++++++
 13 files changed, 496 insertions(+), 418 deletions(-)
 create mode 100644 js/scripts/run-parallel.mjs

diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
index daa037e9f..9efe4decd 100644
--- a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
@@ -30,69 +30,72 @@ function parseMajorVersion(version: string): number {
   return Number.isFinite(major) ? major : 0;
 }
 
-for (const scenario of aiSDKScenarios) {
-  const sdkMajorVersion = parseMajorVersion(scenario.version);
-  const supportsRichInputScenarios = sdkMajorVersion >= 5;
-  const supportsOutputObjectScenario = supportsRichInputScenarios;
-  const supportsAttachmentScenario = supportsRichInputScenarios;
+describe.concurrent("variants", () => {
+  for (const scenario of aiSDKScenarios) {
+    const sdkMajorVersion = parseMajorVersion(scenario.version);
+    const supportsRichInputScenarios = sdkMajorVersion >= 5;
+    const supportsOutputObjectScenario = supportsRichInputScenarios;
+    const supportsAttachmentScenario = supportsRichInputScenarios;
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
-  describe(`ai sdk ${scenario.version}`, { tags }, () => {
-    defineAISDKInstrumentationAssertions({
-      agentSpanName: scenario.agentSpanName,
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsAttachmentScenario,
-      supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions,
-      supportsDenyOutputOverrideScenario: supportsRichInputScenarios,
-      supportsGenerateObject: scenario.supportsGenerateObject,
-      supportsOutputObjectScenario,
-      supportsRerank: scenario.supportsRerank !== false,
-      supportsStreamObject: scenario.supportsStreamObject,
-      supportsToolExecution: scenario.supportsToolExecution,
-      sdkMajorVersion,
-      testFileUrl: import.meta.url,
-      timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
-    });
+    describe.sequential(`ai sdk ${scenario.version}`, { tags }, () => {
+      defineAISDKInstrumentationAssertions({
+        agentSpanName: scenario.agentSpanName,
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsAttachmentScenario,
+        supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions,
+        supportsDenyOutputOverrideScenario: supportsRichInputScenarios,
+        supportsGenerateObject: scenario.supportsGenerateObject,
+        supportsOutputObjectScenario,
+        supportsRerank: scenario.supportsRerank !== false,
+        supportsStreamObject: scenario.supportsStreamObject,
+        supportsToolExecution: scenario.supportsToolExecution,
+        sdkMajorVersion,
+        testFileUrl: import.meta.url,
+        timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
+      });
 
-    defineAISDKInstrumentationAssertions({
-      agentSpanName: scenario.agentSpanName,
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsAttachmentScenario,
-      supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions,
-      supportsDenyOutputOverrideScenario: supportsRichInputScenarios,
-      supportsGenerateObject: scenario.supportsGenerateObject,
-      supportsOutputObjectScenario,
-      supportsRerank: scenario.supportsRerank !== false,
-      supportsStreamObject: scenario.supportsStreamObject,
-      supportsToolExecution: scenario.supportsToolExecution,
-      sdkMajorVersion,
-      testFileUrl: import.meta.url,
-      timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
+      defineAISDKInstrumentationAssertions({
+        agentSpanName: scenario.agentSpanName,
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsAttachmentScenario,
+        supportsProviderCacheAssertions:
+          scenario.supportsProviderCacheAssertions,
+        supportsDenyOutputOverrideScenario: supportsRichInputScenarios,
+        supportsGenerateObject: scenario.supportsGenerateObject,
+        supportsOutputObjectScenario,
+        supportsRerank: scenario.supportsRerank !== false,
+        supportsStreamObject: scenario.supportsStreamObject,
+        supportsToolExecution: scenario.supportsToolExecution,
+        sdkMajorVersion,
+        testFileUrl: import.meta.url,
+        timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts
index 007dced98..709344571 100644
--- a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts
@@ -74,52 +74,54 @@ const anthropicScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of anthropicScenarios) {
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
-  describe(`anthropic sdk ${scenario.version}`, { tags }, () => {
-    defineAnthropicInstrumentationAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsBetaMessages: scenario.supportsBetaMessages,
-      supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true,
-      supportsServerToolUse: scenario.supportsServerToolUse ?? true,
-      supportsThinking: scenario.supportsThinking,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-    });
+describe.concurrent("variants", () => {
+  for (const scenario of anthropicScenarios) {
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+    describe.sequential(`anthropic sdk ${scenario.version}`, { tags }, () => {
+      defineAnthropicInstrumentationAssertions({
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsBetaMessages: scenario.supportsBetaMessages,
+        supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true,
+        supportsServerToolUse: scenario.supportsServerToolUse ?? true,
+        supportsThinking: scenario.supportsThinking,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+      });
 
-    defineAnthropicInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsBetaMessages: scenario.supportsBetaMessages,
-      supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true,
-      supportsServerToolUse: scenario.supportsServerToolUse ?? true,
-      supportsThinking: scenario.supportsThinking,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
+      defineAnthropicInstrumentationAssertions({
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsBetaMessages: scenario.supportsBetaMessages,
+        supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true,
+        supportsServerToolUse: scenario.supportsServerToolUse ?? true,
+        supportsThinking: scenario.supportsThinking,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
index 9670e4b5b..ed1088cc3 100644
--- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
@@ -53,7 +53,7 @@ const claudeAgentSDKScenarios = await Promise.all(
   }),
 );
 
-describe("wrapped instrumentation", () => {
+describe.concurrent("wrapped instrumentation", () => {
   for (const scenario of claudeAgentSDKScenarios) {
     const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
     describe(`claude agent sdk ${scenario.version}`, { tags }, () => {
@@ -80,7 +80,7 @@ describe("wrapped instrumentation", () => {
   }
 });
 
-describe("auto-hook instrumentation", () => {
+describe.concurrent("auto-hook instrumentation", () => {
   for (const scenario of claudeAgentSDKScenarios) {
     const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
     describe(`claude agent sdk ${scenario.version}`, { tags }, () => {
diff --git a/e2e/scenarios/cohere-instrumentation/scenario.test.ts b/e2e/scenarios/cohere-instrumentation/scenario.test.ts
index 0dd756a62..95e446e8a 100644
--- a/e2e/scenarios/cohere-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/cohere-instrumentation/scenario.test.ts
@@ -26,62 +26,64 @@ const cohereScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of cohereScenarios) {
-  const supportsThinking = scenario.supportsThinking ?? true;
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of cohereScenarios) {
+    const supportsThinking = scenario.supportsThinking ?? true;
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`cohere sdk ${scenario.version}`, { tags }, () => {
-    defineCohereInstrumentationAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          env: {
-            COHERE_PACKAGE_NAME: scenario.dependencyName,
-            COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0",
-          },
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
-        });
-      },
-      requireChatStreamOutput: scenario.snapshotName !== "cohere-v7-14-0",
-      snapshotName:
-        scenario.snapshotName === "cohere-v7-14-0"
-          ? "cohere-v7-14-0-wrapped"
-          : scenario.snapshotName,
-      supportsThinking,
-      testFileUrl: import.meta.url,
-      timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
-      useV2Namespace: scenario.useV2Namespace ?? false,
-    });
+    describe.sequential(`cohere sdk ${scenario.version}`, { tags }, () => {
+      defineCohereInstrumentationAssertions({
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            env: {
+              COHERE_PACKAGE_NAME: scenario.dependencyName,
+              COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0",
+            },
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
+          });
+        },
+        requireChatStreamOutput: scenario.snapshotName !== "cohere-v7-14-0",
+        snapshotName:
+          scenario.snapshotName === "cohere-v7-14-0"
+            ? "cohere-v7-14-0-wrapped"
+            : scenario.snapshotName,
+        supportsThinking,
+        testFileUrl: import.meta.url,
+        timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
+        useV2Namespace: scenario.useV2Namespace ?? false,
+      });
 
-    defineCohereInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          env: {
-            COHERE_PACKAGE_NAME: scenario.dependencyName,
-            COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0",
-          },
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsThinking,
-      testFileUrl: import.meta.url,
-      timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
-      useV2Namespace: scenario.useV2Namespace ?? false,
+      defineCohereInstrumentationAssertions({
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            env: {
+              COHERE_PACKAGE_NAME: scenario.dependencyName,
+              COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0",
+            },
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsThinking,
+        testFileUrl: import.meta.url,
+        timeoutMs: COHERE_SCENARIO_TIMEOUT_MS,
+        useV2Namespace: scenario.useV2Namespace ?? false,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts
index db86866b1..87cfbd5be 100644
--- a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts
@@ -35,47 +35,49 @@ const googleADKScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of googleADKScenarios) {
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of googleADKScenarios) {
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`google adk sdk ${scenario.version}`, { tags }, () => {
-    defineGoogleADKInstrumentationAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      expectLLMSpan: false,
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-    });
+    describe.sequential(`google adk sdk ${scenario.version}`, { tags }, () => {
+      defineGoogleADKInstrumentationAssertions({
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        expectLLMSpan: false,
+        snapshotName: scenario.snapshotName,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+      });
 
-    defineGoogleADKInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      expectLLMSpan: true,
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
+      defineGoogleADKInstrumentationAssertions({
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        expectLLMSpan: true,
+        snapshotName: scenario.snapshotName,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/scenarios/google-genai-instrumentation/scenario.test.ts b/e2e/scenarios/google-genai-instrumentation/scenario.test.ts
index 1cdf32bd7..418cd969c 100644
--- a/e2e/scenarios/google-genai-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/google-genai-instrumentation/scenario.test.ts
@@ -47,45 +47,51 @@ const googleGenAIScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of googleGenAIScenarios) {
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of googleGenAIScenarios) {
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`google genai sdk ${scenario.version}`, { tags }, () => {
-    defineGoogleGenAIInstrumentationAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
+    describe.sequential(
+      `google genai sdk ${scenario.version}`,
+      { tags },
+      () => {
+        defineGoogleGenAIInstrumentationAssertions({
+          name: "wrapped instrumentation",
+          runScenario: async ({ runScenarioDir }) => {
+            await runScenarioDir({
+              entry: scenario.wrapperEntry,
+              runContext: {
+                variantKey: scenario.snapshotName,
+                originalScenarioDir,
+              },
+              scenarioDir,
+              timeoutMs: TIMEOUT_MS,
+            });
           },
-          scenarioDir,
+          snapshotName: scenario.snapshotName,
+          testFileUrl: import.meta.url,
           timeoutMs: TIMEOUT_MS,
         });
-      },
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-    });
 
-    defineGoogleGenAIInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
+        defineGoogleGenAIInstrumentationAssertions({
+          name: "auto-hook instrumentation",
+          runScenario: async ({ runNodeScenarioDir }) => {
+            await runNodeScenarioDir({
+              entry: scenario.autoEntry,
+              nodeArgs: ["--import", "braintrust/hook.mjs"],
+              runContext: {
+                variantKey: scenario.snapshotName,
+                originalScenarioDir,
+              },
+              scenarioDir,
+              timeoutMs: TIMEOUT_MS,
+            });
           },
-          scenarioDir,
+          snapshotName: scenario.snapshotName,
+          testFileUrl: import.meta.url,
           timeoutMs: TIMEOUT_MS,
         });
       },
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-    });
-  });
-}
+    );
+  }
+});
diff --git a/e2e/scenarios/huggingface-instrumentation/scenario.test.ts b/e2e/scenarios/huggingface-instrumentation/scenario.test.ts
index 594594dd7..72ac69256 100644
--- a/e2e/scenarios/huggingface-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/huggingface-instrumentation/scenario.test.ts
@@ -26,45 +26,51 @@ const huggingFaceScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of huggingFaceScenarios) {
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of huggingFaceScenarios) {
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`huggingface inference sdk ${scenario.version}`, { tags }, () => {
-    defineHuggingFaceInstrumentationAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
+    describe.sequential(
+      `huggingface inference sdk ${scenario.version}`,
+      { tags },
+      () => {
+        defineHuggingFaceInstrumentationAssertions({
+          name: "wrapped instrumentation",
+          runScenario: async ({ runScenarioDir }) => {
+            await runScenarioDir({
+              entry: scenario.wrapperEntry,
+              runContext: {
+                variantKey: scenario.snapshotName,
+                originalScenarioDir,
+              },
+              scenarioDir,
+              timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS,
+            });
           },
-          scenarioDir,
+          snapshotName: scenario.snapshotName,
+          testFileUrl: import.meta.url,
           timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS,
         });
-      },
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS,
-    });
 
-    defineHuggingFaceInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
+        defineHuggingFaceInstrumentationAssertions({
+          name: "auto-hook instrumentation",
+          runScenario: async ({ runNodeScenarioDir }) => {
+            await runNodeScenarioDir({
+              entry: scenario.autoEntry,
+              nodeArgs: ["--import", "braintrust/hook.mjs"],
+              runContext: {
+                variantKey: scenario.snapshotName,
+                originalScenarioDir,
+              },
+              scenarioDir,
+              timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS,
+            });
           },
-          scenarioDir,
+          snapshotName: scenario.snapshotName,
+          testFileUrl: import.meta.url,
           timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS,
         });
       },
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS,
-    });
-  });
-}
+    );
+  }
+});
diff --git a/e2e/scenarios/mistral-instrumentation/scenario.test.ts b/e2e/scenarios/mistral-instrumentation/scenario.test.ts
index d7b6cf68f..e6ddddb0f 100644
--- a/e2e/scenarios/mistral-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/mistral-instrumentation/scenario.test.ts
@@ -26,63 +26,65 @@ const mistralScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of mistralScenarios) {
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of mistralScenarios) {
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`mistral sdk ${scenario.version}`, { tags }, () => {
-    defineMistralInstrumentationAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      ...(scenario.supportsThinkingStream === false
-        ? { supportsThinkingStream: false }
-        : {}),
-      ...(scenario.supportsClassifiers === false
-        ? { supportsClassifiers: false }
-        : {}),
-      ...(scenario.supportsClassify === false
-        ? { supportsClassify: false }
-        : {}),
-      testFileUrl: import.meta.url,
-      timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
-    });
+    describe.sequential(`mistral sdk ${scenario.version}`, { tags }, () => {
+      defineMistralInstrumentationAssertions({
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        ...(scenario.supportsThinkingStream === false
+          ? { supportsThinkingStream: false }
+          : {}),
+        ...(scenario.supportsClassifiers === false
+          ? { supportsClassifiers: false }
+          : {}),
+        ...(scenario.supportsClassify === false
+          ? { supportsClassify: false }
+          : {}),
+        testFileUrl: import.meta.url,
+        timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
+      });
 
-    defineMistralInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      ...(scenario.supportsThinkingStream === false
-        ? { supportsThinkingStream: false }
-        : {}),
-      ...(scenario.supportsClassifiers === false
-        ? { supportsClassifiers: false }
-        : {}),
-      ...(scenario.supportsClassify === false
-        ? { supportsClassify: false }
-        : {}),
-      testFileUrl: import.meta.url,
-      timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
+      defineMistralInstrumentationAssertions({
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        ...(scenario.supportsThinkingStream === false
+          ? { supportsThinkingStream: false }
+          : {}),
+        ...(scenario.supportsClassifiers === false
+          ? { supportsClassifiers: false }
+          : {}),
+        ...(scenario.supportsClassify === false
+          ? { supportsClassify: false }
+          : {}),
+        testFileUrl: import.meta.url,
+        timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/scenarios/openai-instrumentation/scenario.test.ts b/e2e/scenarios/openai-instrumentation/scenario.test.ts
index 6438545bc..a2cba02d6 100644
--- a/e2e/scenarios/openai-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/openai-instrumentation/scenario.test.ts
@@ -43,50 +43,52 @@ const openaiScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of openaiScenarios) {
-  const assertPrivateFieldMethodsOperation =
-    !scenario.disablePrivateFieldMethodsAssertion;
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of openaiScenarios) {
+    const assertPrivateFieldMethodsOperation =
+      !scenario.disablePrivateFieldMethodsAssertion;
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`openai sdk ${scenario.version}`, { tags }, () => {
-    defineOpenAIInstrumentationAssertions({
-      assertPrivateFieldMethodsOperation,
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-      version: scenario.version,
-    });
+    describe.sequential(`openai sdk ${scenario.version}`, { tags }, () => {
+      defineOpenAIInstrumentationAssertions({
+        assertPrivateFieldMethodsOperation,
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+        version: scenario.version,
+      });
 
-    defineOpenAIInstrumentationAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-      version: scenario.version,
+      defineOpenAIInstrumentationAssertions({
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+        version: scenario.version,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/scenarios/openrouter-instrumentation/scenario.test.ts b/e2e/scenarios/openrouter-instrumentation/scenario.test.ts
index 091c00c7a..6ff1a662d 100644
--- a/e2e/scenarios/openrouter-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/openrouter-instrumentation/scenario.test.ts
@@ -37,47 +37,49 @@ const openRouterScenarios = await Promise.all(
   })),
 );
 
-for (const scenario of openRouterScenarios) {
-  const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
+describe.concurrent("variants", () => {
+  for (const scenario of openRouterScenarios) {
+    const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
 
-  describe(`openrouter sdk ${scenario.version}`, { tags }, () => {
-    defineOpenRouterTraceAssertions({
-      name: "wrapped instrumentation",
-      runScenario: async ({ runScenarioDir }) => {
-        await runScenarioDir({
-          entry: scenario.wrapperEntry,
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsRerank: scenario.supportsRerank,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
-    });
+    describe.sequential(`openrouter sdk ${scenario.version}`, { tags }, () => {
+      defineOpenRouterTraceAssertions({
+        name: "wrapped instrumentation",
+        runScenario: async ({ runScenarioDir }) => {
+          await runScenarioDir({
+            entry: scenario.wrapperEntry,
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsRerank: scenario.supportsRerank,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+      });
 
-    defineOpenRouterTraceAssertions({
-      name: "auto-hook instrumentation",
-      runScenario: async ({ runNodeScenarioDir }) => {
-        await runNodeScenarioDir({
-          entry: scenario.autoEntry,
-          nodeArgs: ["--import", "braintrust/hook.mjs"],
-          runContext: {
-            variantKey: scenario.snapshotName,
-            originalScenarioDir,
-          },
-          scenarioDir,
-          timeoutMs: TIMEOUT_MS,
-        });
-      },
-      snapshotName: scenario.snapshotName,
-      supportsRerank: scenario.supportsRerank,
-      testFileUrl: import.meta.url,
-      timeoutMs: TIMEOUT_MS,
+      defineOpenRouterTraceAssertions({
+        name: "auto-hook instrumentation",
+        runScenario: async ({ runNodeScenarioDir }) => {
+          await runNodeScenarioDir({
+            entry: scenario.autoEntry,
+            nodeArgs: ["--import", "braintrust/hook.mjs"],
+            runContext: {
+              variantKey: scenario.snapshotName,
+              originalScenarioDir,
+            },
+            scenarioDir,
+            timeoutMs: TIMEOUT_MS,
+          });
+        },
+        snapshotName: scenario.snapshotName,
+        supportsRerank: scenario.supportsRerank,
+        testFileUrl: import.meta.url,
+        timeoutMs: TIMEOUT_MS,
+      });
     });
-  });
-}
+  }
+});
diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts
index 9f4e1b515..d6ee62af4 100644
--- a/e2e/vitest.config.mts
+++ b/e2e/vitest.config.mts
@@ -14,6 +14,10 @@ export default defineConfig({
     slowTestThreshold: 120_000,
     // Default to one retry for provider/network flake in non-hermetic scenarios.
     retry: 1,
+    // Allow up to 5 describe blocks to run their beforeAll hooks concurrently
+    // within a file. Bounded to avoid overwhelming CI with too many subprocesses
+    // at once. Tune down if CI shows memory pressure or flaky timeouts.
+    maxConcurrency: 5,
     setupFiles: ["./vitest.setup.ts"],
     tags: [
       {
diff --git a/js/package.json b/js/package.json
index 75c3ec39e..754be23d1 100644
--- a/js/package.json
+++ b/js/package.json
@@ -118,7 +118,8 @@
     "test": "vitest run --exclude \"src/wrappers/**/*.test.ts\" --exclude \"src/otel/**/*.test.ts\" --exclude \"smoke/**/*.test.ts\" --exclude \"src/zod/**/*.test.ts\" --exclude \"tests/api-compatibility/**\"",
     "test:core": "pnpm prune && pnpm test",
     "test:checks": "pnpm run test:core && pnpm run test:vitest",
-    "test:external": "pnpm run test:external:openai && pnpm run test:external:anthropic && pnpm run test:external:google-genai && pnpm run test:external:ai-sdk && pnpm run test:external:claude-agent-sdk",
+    "test:external": "pnpm run test:external:sequential && node scripts/run-parallel.mjs test:external:ai-sdk-v5 test:external:ai-sdk-v6 test:external:claude-agent-sdk",
+    "test:external:sequential": "pnpm run test:external:openai && pnpm run test:external:anthropic && pnpm run test:external:google-genai",
     "test:external:openai": "bash scripts/test-provider.sh test:openai openai",
     "test:external:anthropic": "bash scripts/test-provider.sh test:anthropic @anthropic-ai/sdk",
     "test:external:google-genai": "bash scripts/test-provider.sh test:google-genai @google/genai",
diff --git a/js/scripts/run-parallel.mjs b/js/scripts/run-parallel.mjs
new file mode 100644
index 000000000..b9650460e
--- /dev/null
+++ b/js/scripts/run-parallel.mjs
@@ -0,0 +1,46 @@
+#!/usr/bin/env node
+// Run multiple pnpm scripts concurrently and exit non-zero if any fail.
+// Cross-platform (works on Windows, macOS, Linux).
+//
+// Usage: node scripts/run-parallel.mjs <script1> [script2 ...]
+import { spawn } from "node:child_process";
+import { dirname, join } from "node:path";
+import { fileURLToPath } from "node:url";
+
+const scripts = process.argv.slice(2);
+if (!scripts.length) {
+  console.error("Usage: run-parallel.mjs <script1> [script2 ...]");
+  process.exit(1);
+}
+
+const pnpm = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
+const pkgDir = join(dirname(fileURLToPath(import.meta.url)), "..");
+
+const results = await Promise.allSettled(
+  scripts.map(
+    (script) =>
+      new Promise((resolve, reject) => {
+        const child = spawn(pnpm, ["run", script], {
+          cwd: pkgDir,
+          stdio: "inherit",
+          shell: false,
+        });
+        child.on("error", reject);
+        child.on("close", (code) => {
+          if (code === 0) {
+            resolve(script);
+          } else {
+            reject(new Error(`${script} exited with code ${code}`));
+          }
+        });
+      }),
+  ),
+);
+
+const failures = results.filter((r) => r.status === "rejected");
+for (const f of failures) {
+  console.error(f.reason.message);
+}
+if (failures.length > 0) {
+  process.exit(1);
+}

From b476f80782a61d64f070e2eba096aec055a3c1bc Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephenbelanger@s-belanger.localdomain>
Date: Wed, 6 May 2026 14:25:10 -0700
Subject: [PATCH 2/5] chore: add changeset for e2e parallelization

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .changeset/parallelize-e2e-scenarios.md | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 .changeset/parallelize-e2e-scenarios.md

diff --git a/.changeset/parallelize-e2e-scenarios.md b/.changeset/parallelize-e2e-scenarios.md
new file mode 100644
index 000000000..cf9520f66
--- /dev/null
+++ b/.changeset/parallelize-e2e-scenarios.md
@@ -0,0 +1,5 @@
+---
+"braintrust": patch
+---
+
+perf(e2e): parallelize scenario variants within test files

From f94b1a7559ffbfce49f57c743196c263b180cf6b Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephenbelanger@s-belanger.localdomain>
Date: Wed, 6 May 2026 14:55:28 -0700
Subject: [PATCH 3/5] fix(e2e): use describe.sequential wrappers in
 claude-agent-sdk test and shell:true in run-parallel

- Add describe.sequential around each defineClaudeAgentSDKInstrumentationAssertions
  call so toMatchFileSnapshot has a test context when describe.concurrent propagates
  concurrency into the test bodies
- Use shell:true in run-parallel.mjs so pnpm.cmd works on Windows

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../claude-agent-sdk-instrumentation/scenario.test.ts        | 4 ++--
 js/scripts/run-parallel.mjs                                  | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
index ed1088cc3..39761ac8d 100644
--- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
@@ -56,7 +56,7 @@ const claudeAgentSDKScenarios = await Promise.all(
 describe.concurrent("wrapped instrumentation", () => {
   for (const scenario of claudeAgentSDKScenarios) {
     const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
-    describe(`claude agent sdk ${scenario.version}`, { tags }, () => {
+    describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => {
       defineClaudeAgentSDKInstrumentationAssertions({
         assertLocalToolHandlerParenting: true,
         expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails,
@@ -83,7 +83,7 @@ describe.concurrent("wrapped instrumentation", () => {
 describe.concurrent("auto-hook instrumentation", () => {
   for (const scenario of claudeAgentSDKScenarios) {
     const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
-    describe(`claude agent sdk ${scenario.version}`, { tags }, () => {
+    describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => {
       defineClaudeAgentSDKInstrumentationAssertions({
         assertLocalToolHandlerParenting: true,
         expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails,
diff --git a/js/scripts/run-parallel.mjs b/js/scripts/run-parallel.mjs
index b9650460e..49b23cb15 100644
--- a/js/scripts/run-parallel.mjs
+++ b/js/scripts/run-parallel.mjs
@@ -13,17 +13,16 @@ if (!scripts.length) {
   process.exit(1);
 }
 
-const pnpm = process.platform === "win32" ? "pnpm.cmd" : "pnpm";
 const pkgDir = join(dirname(fileURLToPath(import.meta.url)), "..");
 
 const results = await Promise.allSettled(
   scripts.map(
     (script) =>
       new Promise((resolve, reject) => {
-        const child = spawn(pnpm, ["run", script], {
+        const child = spawn("pnpm", ["run", script], {
           cwd: pkgDir,
           stdio: "inherit",
-          shell: false,
+          shell: true,
         });
         child.on("error", reject);
         child.on("close", (code) => {

From 27b75d0090ddc2bf707569d98835b25b3510d884 Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephenbelanger@s-belanger.localdomain>
Date: Wed, 6 May 2026 17:36:37 -0700
Subject: [PATCH 4/5] fix(e2e): update google-adk snapshots for e.span.ended
 filter and sortBySpanDepth

Adds sortBySpanDepth helper and switches both snapshot tests to filter by
e.span.ended so wrapped mode (single flush) and auto-hook mode (early
flushes from LLM spans) produce the same end-phase event set. Rewrites
the four google-adk snapshot files to reflect 5 end-phase events with
correct runner token metrics.

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../google-adk-v061.log-payloads.json         | 72 +++----------------
 .../google-adk-v061.span-events.json          | 52 +++-----------
 .../google-adk-v1000.log-payloads.json        | 72 +++----------------
 .../google-adk-v1000.span-events.json         | 52 +++-----------
 .../google-adk-instrumentation/assertions.ts  | 47 ++++++++----
 5 files changed, 75 insertions(+), 220 deletions(-)

diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json
index 406961eca..7d8b910a3 100644
--- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json
+++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json
@@ -4,6 +4,7 @@
       "scenario": "google-adk-instrumentation"
     },
     "metrics": {
+      "end": 0,
       "start": 0
     },
     "name": "google-adk-instrumentation-root",
@@ -14,6 +15,7 @@
       "operation": "simple-run"
     },
     "metrics": {
+      "end": 0,
       "start": 0
     },
     "name": "adk-simple-run-operation",
@@ -34,7 +36,12 @@
       "provider": "google-adk"
     },
     "metrics": {
-      "start": 0
+      "completion_tokens": "<number>",
+      "duration": 0,
+      "end": 0,
+      "prompt_tokens": "<number>",
+      "start": 0,
+      "tokens": "<number>"
     },
     "name": "Google ADK Runner",
     "type": "task"
@@ -46,6 +53,8 @@
       "provider": "google-adk"
     },
     "metrics": {
+      "duration": 0,
+      "end": 0,
       "start": 0
     },
     "name": "Agent: weather_agent",
@@ -72,66 +81,5 @@
       "temperature": 72
     },
     "type": "tool"
-  },
-  {
-    "metadata": {
-      "google_adk.agent_name": "weather_agent",
-      "model": "gemini-2.5-flash-lite",
-      "provider": "google-adk"
-    },
-    "metrics": {
-      "duration": 0,
-      "end": 0,
-      "start": 0
-    },
-    "name": "Agent: weather_agent",
-    "type": "task"
-  },
-  {
-    "input": {
-      "messages": [
-        {
-          "content": "What is the weather in Paris, France?",
-          "role": "user"
-        }
-      ]
-    },
-    "metadata": {
-      "google_adk.session_id": "test-session-1",
-      "google_adk.user_id": "test-user",
-      "provider": "google-adk"
-    },
-    "metrics": {
-      "completion_tokens": "<number>",
-      "duration": 0,
-      "end": 0,
-      "prompt_tokens": "<number>",
-      "start": 0,
-      "tokens": "<number>"
-    },
-    "name": "Google ADK Runner",
-    "type": "task"
-  },
-  {
-    "metadata": {
-      "operation": "simple-run"
-    },
-    "metrics": {
-      "end": 0,
-      "start": 0
-    },
-    "name": "adk-simple-run-operation",
-    "type": null
-  },
-  {
-    "metadata": {
-      "scenario": "google-adk-instrumentation"
-    },
-    "metrics": {
-      "end": 0,
-      "start": 0
-    },
-    "name": "google-adk-instrumentation-root",
-    "type": "task"
   }
 ]
diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json
index 2bf665c39..2f9f5d9e3 100644
--- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json
+++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json
@@ -34,7 +34,12 @@
       "google_adk.user_id": "test-user",
       "provider": "google-adk"
     },
-    "metric_keys": [],
+    "metric_keys": [
+      "completion_tokens",
+      "duration",
+      "prompt_tokens",
+      "tokens"
+    ],
     "name": "Google ADK Runner",
     "root_span_id": "<span:1>",
     "span_id": "<span:3>",
@@ -50,7 +55,9 @@
       "model": "gemini-2.5-flash-lite",
       "provider": "google-adk"
     },
-    "metric_keys": [],
+    "metric_keys": [
+      "duration"
+    ],
     "name": "Agent: weather_agent",
     "root_span_id": "<span:1>",
     "span_id": "<span:4>",
@@ -72,49 +79,10 @@
     ],
     "name": "tool: get_weather",
     "root_span_id": "<span:1>",
-    "span_id": "<span:5>",
+    "span_id": "<span:6>",
     "span_parents": [
       "<span:4>"
     ],
     "type": "tool"
-  },
-  {
-    "has_input": false,
-    "metadata": {
-      "google_adk.agent_name": "weather_agent",
-      "model": "gemini-2.5-flash-lite",
-      "provider": "google-adk"
-    },
-    "metric_keys": [
-      "duration"
-    ],
-    "name": "Agent: weather_agent",
-    "root_span_id": "<span:1>",
-    "span_id": "<span:4>",
-    "span_parents": [
-      "<span:3>"
-    ],
-    "type": "task"
-  },
-  {
-    "has_input": true,
-    "metadata": {
-      "google_adk.session_id": "test-session-1",
-      "google_adk.user_id": "test-user",
-      "provider": "google-adk"
-    },
-    "metric_keys": [
-      "completion_tokens",
-      "duration",
-      "prompt_tokens",
-      "tokens"
-    ],
-    "name": "Google ADK Runner",
-    "root_span_id": "<span:1>",
-    "span_id": "<span:3>",
-    "span_parents": [
-      "<span:2>"
-    ],
-    "type": "task"
   }
 ]
diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json
index 406961eca..7d8b910a3 100644
--- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json
+++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json
@@ -4,6 +4,7 @@
       "scenario": "google-adk-instrumentation"
     },
     "metrics": {
+      "end": 0,
       "start": 0
     },
     "name": "google-adk-instrumentation-root",
@@ -14,6 +15,7 @@
       "operation": "simple-run"
     },
     "metrics": {
+      "end": 0,
       "start": 0
     },
     "name": "adk-simple-run-operation",
@@ -34,7 +36,12 @@
       "provider": "google-adk"
     },
     "metrics": {
-      "start": 0
+      "completion_tokens": "<number>",
+      "duration": 0,
+      "end": 0,
+      "prompt_tokens": "<number>",
+      "start": 0,
+      "tokens": "<number>"
     },
     "name": "Google ADK Runner",
     "type": "task"
@@ -46,6 +53,8 @@
       "provider": "google-adk"
     },
     "metrics": {
+      "duration": 0,
+      "end": 0,
       "start": 0
     },
     "name": "Agent: weather_agent",
@@ -72,66 +81,5 @@
       "temperature": 72
     },
     "type": "tool"
-  },
-  {
-    "metadata": {
-      "google_adk.agent_name": "weather_agent",
-      "model": "gemini-2.5-flash-lite",
-      "provider": "google-adk"
-    },
-    "metrics": {
-      "duration": 0,
-      "end": 0,
-      "start": 0
-    },
-    "name": "Agent: weather_agent",
-    "type": "task"
-  },
-  {
-    "input": {
-      "messages": [
-        {
-          "content": "What is the weather in Paris, France?",
-          "role": "user"
-        }
-      ]
-    },
-    "metadata": {
-      "google_adk.session_id": "test-session-1",
-      "google_adk.user_id": "test-user",
-      "provider": "google-adk"
-    },
-    "metrics": {
-      "completion_tokens": "<number>",
-      "duration": 0,
-      "end": 0,
-      "prompt_tokens": "<number>",
-      "start": 0,
-      "tokens": "<number>"
-    },
-    "name": "Google ADK Runner",
-    "type": "task"
-  },
-  {
-    "metadata": {
-      "operation": "simple-run"
-    },
-    "metrics": {
-      "end": 0,
-      "start": 0
-    },
-    "name": "adk-simple-run-operation",
-    "type": null
-  },
-  {
-    "metadata": {
-      "scenario": "google-adk-instrumentation"
-    },
-    "metrics": {
-      "end": 0,
-      "start": 0
-    },
-    "name": "google-adk-instrumentation-root",
-    "type": "task"
   }
 ]
diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json
index 2bf665c39..2f9f5d9e3 100644
--- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json
+++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json
@@ -34,7 +34,12 @@
       "google_adk.user_id": "test-user",
       "provider": "google-adk"
     },
-    "metric_keys": [],
+    "metric_keys": [
+      "completion_tokens",
+      "duration",
+      "prompt_tokens",
+      "tokens"
+    ],
     "name": "Google ADK Runner",
     "root_span_id": "<span:1>",
     "span_id": "<span:3>",
@@ -50,7 +55,9 @@
       "model": "gemini-2.5-flash-lite",
       "provider": "google-adk"
     },
-    "metric_keys": [],
+    "metric_keys": [
+      "duration"
+    ],
     "name": "Agent: weather_agent",
     "root_span_id": "<span:1>",
     "span_id": "<span:4>",
@@ -72,49 +79,10 @@
     ],
     "name": "tool: get_weather",
     "root_span_id": "<span:1>",
-    "span_id": "<span:5>",
+    "span_id": "<span:6>",
     "span_parents": [
       "<span:4>"
     ],
     "type": "tool"
-  },
-  {
-    "has_input": false,
-    "metadata": {
-      "google_adk.agent_name": "weather_agent",
-      "model": "gemini-2.5-flash-lite",
-      "provider": "google-adk"
-    },
-    "metric_keys": [
-      "duration"
-    ],
-    "name": "Agent: weather_agent",
-    "root_span_id": "<span:1>",
-    "span_id": "<span:4>",
-    "span_parents": [
-      "<span:3>"
-    ],
-    "type": "task"
-  },
-  {
-    "has_input": true,
-    "metadata": {
-      "google_adk.session_id": "test-session-1",
-      "google_adk.user_id": "test-user",
-      "provider": "google-adk"
-    },
-    "metric_keys": [
-      "completion_tokens",
-      "duration",
-      "prompt_tokens",
-      "tokens"
-    ],
-    "name": "Google ADK Runner",
-    "root_span_id": "<span:1>",
-    "span_id": "<span:3>",
-    "span_parents": [
-      "<span:2>"
-    ],
-    "type": "task"
   }
 ]
diff --git a/e2e/scenarios/google-adk-instrumentation/assertions.ts b/e2e/scenarios/google-adk-instrumentation/assertions.ts
index 1e6d4c2b5..85d632335 100644
--- a/e2e/scenarios/google-adk-instrumentation/assertions.ts
+++ b/e2e/scenarios/google-adk-instrumentation/assertions.ts
@@ -125,6 +125,31 @@ function dedupeSnapshotItems(items: Json[]): Json[] {
   return deduped;
 }
 
+function sortBySpanDepth(events: CapturedLogEvent[]): CapturedLogEvent[] {
+  const lastById = new Map<string, CapturedLogEvent>();
+  for (const event of events) {
+    if (event.span.id) {
+      lastById.set(event.span.id, event);
+    }
+  }
+
+  const depthCache = new Map<string, number>();
+  function getDepth(spanId: string | undefined): number {
+    if (!spanId) return 0;
+    if (depthCache.has(spanId)) return depthCache.get(spanId)!;
+    const event = lastById.get(spanId);
+    if (!event || event.span.parentIds.length === 0) {
+      depthCache.set(spanId, 0);
+      return 0;
+    }
+    const depth = 1 + getDepth(event.span.parentIds[0]);
+    depthCache.set(spanId, depth);
+    return depth;
+  }
+
+  return [...events].sort((a, b) => getDepth(a.span.id) - getDepth(b.span.id));
+}
+
 function hasOptionalADKTaskOutput(event: CapturedLogEvent): boolean {
   return (
     event.span.type === "task" &&
@@ -291,13 +316,11 @@ export function defineGoogleADKInstrumentationAssertions(options: {
     });
 
     test("matches the shared span snapshot", testConfig, async () => {
-      const relevantEvents = events.filter(
-        (e) =>
-          e.span.name !== undefined &&
-          e.span.type !== "llm" &&
-          // Wrapped mode logs an extra start-only tool row. Normalize to the
-          // terminal tool record so wrapped and auto-hook snapshots stay aligned.
-          (e.span.type !== "tool" || e.output !== undefined),
+      const relevantEvents = sortBySpanDepth(
+        events.filter(
+          (e) =>
+            e.span.name !== undefined && e.span.type !== "llm" && e.span.ended,
+        ),
       );
       const spanSummary = normalizeForSnapshot(
         dedupeSnapshotItems(
@@ -311,11 +334,11 @@ export function defineGoogleADKInstrumentationAssertions(options: {
     });
 
     test("matches the shared payload snapshot", testConfig, async () => {
-      const relevantEvents = events.filter(
-        (e) =>
-          e.span.name !== undefined &&
-          e.span.type !== "llm" &&
-          (e.span.type !== "tool" || e.output !== undefined),
+      const relevantEvents = sortBySpanDepth(
+        events.filter(
+          (e) =>
+            e.span.name !== undefined && e.span.type !== "llm" && e.span.ended,
+        ),
       );
       const payloadSummary = normalizeForSnapshot(
         dedupeSnapshotItems(

From fe2a4c83e265d6fb1fac75a8e57d5eb47e11fe2e Mon Sep 17 00:00:00 2001
From: Stephen Belanger <stephenbelanger@s-belanger.localdomain>
Date: Wed, 6 May 2026 17:42:22 -0700
Subject: [PATCH 5/5] chore(e2e): apply prettier formatting to scenario test
 files

Co-Authored-By: Claude Sonnet 4.6 <noreply@anthropic.com>
---
 .../ai-sdk-instrumentation/scenario.test.ts   |  3 +-
 .../scenario.test.ts                          | 94 ++++++++++---------
 2 files changed, 53 insertions(+), 44 deletions(-)

diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
index 9efe4decd..6980f5c81 100644
--- a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts
@@ -55,7 +55,8 @@ describe.concurrent("variants", () => {
         },
         snapshotName: scenario.snapshotName,
         supportsAttachmentScenario,
-        supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions,
+        supportsProviderCacheAssertions:
+          scenario.supportsProviderCacheAssertions,
         supportsDenyOutputOverrideScenario: supportsRichInputScenarios,
         supportsGenerateObject: scenario.supportsGenerateObject,
         supportsOutputObjectScenario,
diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
index 39761ac8d..d9a6402cc 100644
--- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
+++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts
@@ -56,54 +56,62 @@ const claudeAgentSDKScenarios = await Promise.all(
 describe.concurrent("wrapped instrumentation", () => {
   for (const scenario of claudeAgentSDKScenarios) {
     const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
-    describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => {
-      defineClaudeAgentSDKInstrumentationAssertions({
-        assertLocalToolHandlerParenting: true,
-        expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails,
-        name: "scenario",
-        runScenario: async ({ runScenarioDir }) => {
-          await runScenarioDir({
-            entry: scenario.wrapperEntry,
-            runContext: {
-              variantKey: scenario.snapshotName,
-              originalScenarioDir,
-            },
-            scenarioDir,
-            timeoutMs: TIMEOUT_MS,
-          });
-        },
-        snapshotName: scenario.snapshotName,
-        testFileUrl: import.meta.url,
-        timeoutMs: TIMEOUT_MS,
-      });
-    });
+    describe.sequential(
+      `claude agent sdk ${scenario.version}`,
+      { tags },
+      () => {
+        defineClaudeAgentSDKInstrumentationAssertions({
+          assertLocalToolHandlerParenting: true,
+          expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails,
+          name: "scenario",
+          runScenario: async ({ runScenarioDir }) => {
+            await runScenarioDir({
+              entry: scenario.wrapperEntry,
+              runContext: {
+                variantKey: scenario.snapshotName,
+                originalScenarioDir,
+              },
+              scenarioDir,
+              timeoutMs: TIMEOUT_MS,
+            });
+          },
+          snapshotName: scenario.snapshotName,
+          testFileUrl: import.meta.url,
+          timeoutMs: TIMEOUT_MS,
+        });
+      },
+    );
   }
 });
 
 describe.concurrent("auto-hook instrumentation", () => {
   for (const scenario of claudeAgentSDKScenarios) {
     const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName);
-    describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => {
-      defineClaudeAgentSDKInstrumentationAssertions({
-        assertLocalToolHandlerParenting: true,
-        expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails,
-        name: "scenario",
-        runScenario: async ({ runNodeScenarioDir }) => {
-          await runNodeScenarioDir({
-            entry: scenario.autoEntry,
-            nodeArgs: ["--import", "braintrust/hook.mjs"],
-            runContext: {
-              variantKey: scenario.snapshotName,
-              originalScenarioDir,
-            },
-            scenarioDir,
-            timeoutMs: TIMEOUT_MS,
-          });
-        },
-        snapshotName: scenario.snapshotName,
-        testFileUrl: import.meta.url,
-        timeoutMs: TIMEOUT_MS,
-      });
-    });
+    describe.sequential(
+      `claude agent sdk ${scenario.version}`,
+      { tags },
+      () => {
+        defineClaudeAgentSDKInstrumentationAssertions({
+          assertLocalToolHandlerParenting: true,
+          expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails,
+          name: "scenario",
+          runScenario: async ({ runNodeScenarioDir }) => {
+            await runNodeScenarioDir({
+              entry: scenario.autoEntry,
+              nodeArgs: ["--import", "braintrust/hook.mjs"],
+              runContext: {
+                variantKey: scenario.snapshotName,
+                originalScenarioDir,
+              },
+              scenarioDir,
+              timeoutMs: TIMEOUT_MS,
+            });
+          },
+          snapshotName: scenario.snapshotName,
+          testFileUrl: import.meta.url,
+          timeoutMs: TIMEOUT_MS,
+        });
+      },
+    );
   }
 });