From ef6b9e94130bf0227dccb98aed7a5f38904e6124 Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 6 May 2026 14:13:20 -0700 Subject: [PATCH 1/5] perf(e2e): parallelize scenario variants within test files Each multi-variant e2e scenario (e.g. anthropic: 6 SDK versions, ai-sdk: 4 versions) previously ran all describe blocks sequentially inside one Vitest file. Since each withScenarioHarness call starts an isolated mock server on its own ephemeral port with a unique testRunId, the tests are already concurrency-safe. Wrap the outer for-loops in describe.concurrent so all SDK versions run their beforeAll subprocess hooks in parallel (bounded by maxConcurrency: 5). Inner describe.sequential blocks keep "wrapped" and "auto-hook" variants for the same version sequential, avoiding snapshot write races on --update runs. Also parallelize test:external in js/: the isolated ai-sdk-v5, ai-sdk-v6, and claude-agent-sdk suites (each with their own node_modules) now run concurrently via a new cross-platform run-parallel.mjs script, while openai/anthropic/google-genai (shared node_modules + pnpm prune) remain sequential. Co-Authored-By: Claude Sonnet 4.6 --- .../ai-sdk-instrumentation/scenario.test.ts | 129 +++++++++--------- .../scenario.test.ts | 96 ++++++------- .../scenario.test.ts | 4 +- .../cohere-instrumentation/scenario.test.ts | 114 ++++++++-------- .../scenario.test.ts | 84 ++++++------ .../scenario.test.ts | 72 +++++----- .../scenario.test.ts | 72 +++++----- .../mistral-instrumentation/scenario.test.ts | 116 ++++++++-------- .../openai-instrumentation/scenario.test.ts | 90 ++++++------ .../scenario.test.ts | 84 ++++++------ e2e/vitest.config.mts | 4 + js/package.json | 3 +- js/scripts/run-parallel.mjs | 46 +++++++ 13 files changed, 496 insertions(+), 418 deletions(-) create mode 100644 js/scripts/run-parallel.mjs diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts index daa037e9f..9efe4decd 100644 --- a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts @@ -30,69 +30,72 @@ function parseMajorVersion(version: string): number { return Number.isFinite(major) ? major : 0; } -for (const scenario of aiSDKScenarios) { - const sdkMajorVersion = parseMajorVersion(scenario.version); - const supportsRichInputScenarios = sdkMajorVersion >= 5; - const supportsOutputObjectScenario = supportsRichInputScenarios; - const supportsAttachmentScenario = supportsRichInputScenarios; +describe.concurrent("variants", () => { + for (const scenario of aiSDKScenarios) { + const sdkMajorVersion = parseMajorVersion(scenario.version); + const supportsRichInputScenarios = sdkMajorVersion >= 5; + const supportsOutputObjectScenario = supportsRichInputScenarios; + const supportsAttachmentScenario = supportsRichInputScenarios; + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`ai sdk ${scenario.version}`, { tags }, () => { - defineAISDKInstrumentationAssertions({ - agentSpanName: scenario.agentSpanName, - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsAttachmentScenario, - supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions, - supportsDenyOutputOverrideScenario: supportsRichInputScenarios, - supportsGenerateObject: scenario.supportsGenerateObject, - supportsOutputObjectScenario, - supportsRerank: scenario.supportsRerank !== false, - supportsStreamObject: scenario.supportsStreamObject, - supportsToolExecution: scenario.supportsToolExecution, - sdkMajorVersion, - testFileUrl: import.meta.url, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, - }); + describe.sequential(`ai sdk ${scenario.version}`, { tags }, () => { + defineAISDKInstrumentationAssertions({ + agentSpanName: scenario.agentSpanName, + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsAttachmentScenario, + supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions, + supportsDenyOutputOverrideScenario: supportsRichInputScenarios, + supportsGenerateObject: scenario.supportsGenerateObject, + supportsOutputObjectScenario, + supportsRerank: scenario.supportsRerank !== false, + supportsStreamObject: scenario.supportsStreamObject, + supportsToolExecution: scenario.supportsToolExecution, + sdkMajorVersion, + testFileUrl: import.meta.url, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); - defineAISDKInstrumentationAssertions({ - agentSpanName: scenario.agentSpanName, - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsAttachmentScenario, - supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions, - supportsDenyOutputOverrideScenario: supportsRichInputScenarios, - supportsGenerateObject: scenario.supportsGenerateObject, - supportsOutputObjectScenario, - supportsRerank: scenario.supportsRerank !== false, - supportsStreamObject: scenario.supportsStreamObject, - supportsToolExecution: scenario.supportsToolExecution, - sdkMajorVersion, - testFileUrl: import.meta.url, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + defineAISDKInstrumentationAssertions({ + agentSpanName: scenario.agentSpanName, + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsAttachmentScenario, + supportsProviderCacheAssertions: + scenario.supportsProviderCacheAssertions, + supportsDenyOutputOverrideScenario: supportsRichInputScenarios, + supportsGenerateObject: scenario.supportsGenerateObject, + supportsOutputObjectScenario, + supportsRerank: scenario.supportsRerank !== false, + supportsStreamObject: scenario.supportsStreamObject, + supportsToolExecution: scenario.supportsToolExecution, + sdkMajorVersion, + testFileUrl: import.meta.url, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts index 007dced98..709344571 100644 --- a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts +++ b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts @@ -74,52 +74,54 @@ const anthropicScenarios = await Promise.all( })), ); -for (const scenario of anthropicScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`anthropic sdk ${scenario.version}`, { tags }, () => { - defineAnthropicInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsBetaMessages: scenario.supportsBetaMessages, - supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, - supportsServerToolUse: scenario.supportsServerToolUse ?? true, - supportsThinking: scenario.supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); +describe.concurrent("variants", () => { + for (const scenario of anthropicScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); + describe.sequential(`anthropic sdk ${scenario.version}`, { tags }, () => { + defineAnthropicInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsBetaMessages: scenario.supportsBetaMessages, + supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, + supportsServerToolUse: scenario.supportsServerToolUse ?? true, + supportsThinking: scenario.supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); - defineAnthropicInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsBetaMessages: scenario.supportsBetaMessages, - supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, - supportsServerToolUse: scenario.supportsServerToolUse ?? true, - supportsThinking: scenario.supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + defineAnthropicInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsBetaMessages: scenario.supportsBetaMessages, + supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, + supportsServerToolUse: scenario.supportsServerToolUse ?? true, + supportsThinking: scenario.supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts index 9670e4b5b..ed1088cc3 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts @@ -53,7 +53,7 @@ const claudeAgentSDKScenarios = await Promise.all( }), ); -describe("wrapped instrumentation", () => { +describe.concurrent("wrapped instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); describe(`claude agent sdk ${scenario.version}`, { tags }, () => { @@ -80,7 +80,7 @@ describe("wrapped instrumentation", () => { } }); -describe("auto-hook instrumentation", () => { +describe.concurrent("auto-hook instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); describe(`claude agent sdk ${scenario.version}`, { tags }, () => { diff --git a/e2e/scenarios/cohere-instrumentation/scenario.test.ts b/e2e/scenarios/cohere-instrumentation/scenario.test.ts index 0dd756a62..95e446e8a 100644 --- a/e2e/scenarios/cohere-instrumentation/scenario.test.ts +++ b/e2e/scenarios/cohere-instrumentation/scenario.test.ts @@ -26,62 +26,64 @@ const cohereScenarios = await Promise.all( })), ); -for (const scenario of cohereScenarios) { - const supportsThinking = scenario.supportsThinking ?? true; - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of cohereScenarios) { + const supportsThinking = scenario.supportsThinking ?? true; + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`cohere sdk ${scenario.version}`, { tags }, () => { - defineCohereInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - env: { - COHERE_PACKAGE_NAME: scenario.dependencyName, - COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", - }, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - }); - }, - requireChatStreamOutput: scenario.snapshotName !== "cohere-v7-14-0", - snapshotName: - scenario.snapshotName === "cohere-v7-14-0" - ? "cohere-v7-14-0-wrapped" - : scenario.snapshotName, - supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - useV2Namespace: scenario.useV2Namespace ?? false, - }); + describe.sequential(`cohere sdk ${scenario.version}`, { tags }, () => { + defineCohereInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + env: { + COHERE_PACKAGE_NAME: scenario.dependencyName, + COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", + }, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + }); + }, + requireChatStreamOutput: scenario.snapshotName !== "cohere-v7-14-0", + snapshotName: + scenario.snapshotName === "cohere-v7-14-0" + ? "cohere-v7-14-0-wrapped" + : scenario.snapshotName, + supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + useV2Namespace: scenario.useV2Namespace ?? false, + }); - defineCohereInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - env: { - COHERE_PACKAGE_NAME: scenario.dependencyName, - COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", - }, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - useV2Namespace: scenario.useV2Namespace ?? false, + defineCohereInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + env: { + COHERE_PACKAGE_NAME: scenario.dependencyName, + COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", + }, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + useV2Namespace: scenario.useV2Namespace ?? false, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts index db86866b1..87cfbd5be 100644 --- a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts @@ -35,47 +35,49 @@ const googleADKScenarios = await Promise.all( })), ); -for (const scenario of googleADKScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of googleADKScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`google adk sdk ${scenario.version}`, { tags }, () => { - defineGoogleADKInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - expectLLMSpan: false, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); + describe.sequential(`google adk sdk ${scenario.version}`, { tags }, () => { + defineGoogleADKInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + expectLLMSpan: false, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); - defineGoogleADKInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - expectLLMSpan: true, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + defineGoogleADKInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + expectLLMSpan: true, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/google-genai-instrumentation/scenario.test.ts b/e2e/scenarios/google-genai-instrumentation/scenario.test.ts index 1cdf32bd7..418cd969c 100644 --- a/e2e/scenarios/google-genai-instrumentation/scenario.test.ts +++ b/e2e/scenarios/google-genai-instrumentation/scenario.test.ts @@ -47,45 +47,51 @@ const googleGenAIScenarios = await Promise.all( })), ); -for (const scenario of googleGenAIScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of googleGenAIScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`google genai sdk ${scenario.version}`, { tags }, () => { - defineGoogleGenAIInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + describe.sequential( + `google genai sdk ${scenario.version}`, + { tags }, + () => { + defineGoogleGenAIInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: TIMEOUT_MS, }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - defineGoogleGenAIInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + defineGoogleGenAIInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: TIMEOUT_MS, }); }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - }); -} + ); + } +}); diff --git a/e2e/scenarios/huggingface-instrumentation/scenario.test.ts b/e2e/scenarios/huggingface-instrumentation/scenario.test.ts index 594594dd7..72ac69256 100644 --- a/e2e/scenarios/huggingface-instrumentation/scenario.test.ts +++ b/e2e/scenarios/huggingface-instrumentation/scenario.test.ts @@ -26,45 +26,51 @@ const huggingFaceScenarios = await Promise.all( })), ); -for (const scenario of huggingFaceScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of huggingFaceScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`huggingface inference sdk ${scenario.version}`, { tags }, () => { - defineHuggingFaceInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + describe.sequential( + `huggingface inference sdk ${scenario.version}`, + { tags }, + () => { + defineHuggingFaceInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, - }); - defineHuggingFaceInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + defineHuggingFaceInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, }); }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, - }); - }); -} + ); + } +}); diff --git a/e2e/scenarios/mistral-instrumentation/scenario.test.ts b/e2e/scenarios/mistral-instrumentation/scenario.test.ts index d7b6cf68f..e6ddddb0f 100644 --- a/e2e/scenarios/mistral-instrumentation/scenario.test.ts +++ b/e2e/scenarios/mistral-instrumentation/scenario.test.ts @@ -26,63 +26,65 @@ const mistralScenarios = await Promise.all( })), ); -for (const scenario of mistralScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of mistralScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`mistral sdk ${scenario.version}`, { tags }, () => { - defineMistralInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - ...(scenario.supportsThinkingStream === false - ? { supportsThinkingStream: false } - : {}), - ...(scenario.supportsClassifiers === false - ? { supportsClassifiers: false } - : {}), - ...(scenario.supportsClassify === false - ? { supportsClassify: false } - : {}), - testFileUrl: import.meta.url, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, - }); + describe.sequential(`mistral sdk ${scenario.version}`, { tags }, () => { + defineMistralInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + ...(scenario.supportsThinkingStream === false + ? { supportsThinkingStream: false } + : {}), + ...(scenario.supportsClassifiers === false + ? { supportsClassifiers: false } + : {}), + ...(scenario.supportsClassify === false + ? { supportsClassify: false } + : {}), + testFileUrl: import.meta.url, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); - defineMistralInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - ...(scenario.supportsThinkingStream === false - ? { supportsThinkingStream: false } - : {}), - ...(scenario.supportsClassifiers === false - ? { supportsClassifiers: false } - : {}), - ...(scenario.supportsClassify === false - ? { supportsClassify: false } - : {}), - testFileUrl: import.meta.url, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + defineMistralInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + ...(scenario.supportsThinkingStream === false + ? { supportsThinkingStream: false } + : {}), + ...(scenario.supportsClassifiers === false + ? { supportsClassifiers: false } + : {}), + ...(scenario.supportsClassify === false + ? { supportsClassify: false } + : {}), + testFileUrl: import.meta.url, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/openai-instrumentation/scenario.test.ts b/e2e/scenarios/openai-instrumentation/scenario.test.ts index 6438545bc..a2cba02d6 100644 --- a/e2e/scenarios/openai-instrumentation/scenario.test.ts +++ b/e2e/scenarios/openai-instrumentation/scenario.test.ts @@ -43,50 +43,52 @@ const openaiScenarios = await Promise.all( })), ); -for (const scenario of openaiScenarios) { - const assertPrivateFieldMethodsOperation = - !scenario.disablePrivateFieldMethodsAssertion; - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of openaiScenarios) { + const assertPrivateFieldMethodsOperation = + !scenario.disablePrivateFieldMethodsAssertion; + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`openai sdk ${scenario.version}`, { tags }, () => { - defineOpenAIInstrumentationAssertions({ - assertPrivateFieldMethodsOperation, - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - version: scenario.version, - }); + describe.sequential(`openai sdk ${scenario.version}`, { tags }, () => { + defineOpenAIInstrumentationAssertions({ + assertPrivateFieldMethodsOperation, + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + version: scenario.version, + }); - defineOpenAIInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - version: scenario.version, + defineOpenAIInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + version: scenario.version, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/openrouter-instrumentation/scenario.test.ts b/e2e/scenarios/openrouter-instrumentation/scenario.test.ts index 091c00c7a..6ff1a662d 100644 --- a/e2e/scenarios/openrouter-instrumentation/scenario.test.ts +++ b/e2e/scenarios/openrouter-instrumentation/scenario.test.ts @@ -37,47 +37,49 @@ const openRouterScenarios = await Promise.all( })), ); -for (const scenario of openRouterScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of openRouterScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`openrouter sdk ${scenario.version}`, { tags }, () => { - defineOpenRouterTraceAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsRerank: scenario.supportsRerank, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); + describe.sequential(`openrouter sdk ${scenario.version}`, { tags }, () => { + defineOpenRouterTraceAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsRerank: scenario.supportsRerank, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); - defineOpenRouterTraceAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsRerank: scenario.supportsRerank, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + defineOpenRouterTraceAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsRerank: scenario.supportsRerank, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts index 9f4e1b515..d6ee62af4 100644 --- a/e2e/vitest.config.mts +++ b/e2e/vitest.config.mts @@ -14,6 +14,10 @@ export default defineConfig({ slowTestThreshold: 120_000, // Default to one retry for provider/network flake in non-hermetic scenarios. retry: 1, + // Allow up to 5 describe blocks to run their beforeAll hooks concurrently + // within a file. Bounded to avoid overwhelming CI with too many subprocesses + // at once. Tune down if CI shows memory pressure or flaky timeouts. + maxConcurrency: 5, setupFiles: ["./vitest.setup.ts"], tags: [ { diff --git a/js/package.json b/js/package.json index 75c3ec39e..754be23d1 100644 --- a/js/package.json +++ b/js/package.json @@ -118,7 +118,8 @@ "test": "vitest run --exclude \"src/wrappers/**/*.test.ts\" --exclude \"src/otel/**/*.test.ts\" --exclude \"smoke/**/*.test.ts\" --exclude \"src/zod/**/*.test.ts\" --exclude \"tests/api-compatibility/**\"", "test:core": "pnpm prune && pnpm test", "test:checks": "pnpm run test:core && pnpm run test:vitest", - "test:external": "pnpm run test:external:openai && pnpm run test:external:anthropic && pnpm run test:external:google-genai && pnpm run test:external:ai-sdk && pnpm run test:external:claude-agent-sdk", + "test:external": "pnpm run test:external:sequential && node scripts/run-parallel.mjs test:external:ai-sdk-v5 test:external:ai-sdk-v6 test:external:claude-agent-sdk", + "test:external:sequential": "pnpm run test:external:openai && pnpm run test:external:anthropic && pnpm run test:external:google-genai", "test:external:openai": "bash scripts/test-provider.sh test:openai openai", "test:external:anthropic": "bash scripts/test-provider.sh test:anthropic @anthropic-ai/sdk", "test:external:google-genai": "bash scripts/test-provider.sh test:google-genai @google/genai", diff --git a/js/scripts/run-parallel.mjs b/js/scripts/run-parallel.mjs new file mode 100644 index 000000000..b9650460e --- /dev/null +++ b/js/scripts/run-parallel.mjs @@ -0,0 +1,46 @@ +#!/usr/bin/env node +// Run multiple pnpm scripts concurrently and exit non-zero if any fail. +// Cross-platform (works on Windows, macOS, Linux). +// +// Usage: node scripts/run-parallel.mjs [script2 ...] +import { spawn } from "node:child_process"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const scripts = process.argv.slice(2); +if (!scripts.length) { + console.error("Usage: run-parallel.mjs [script2 ...]"); + process.exit(1); +} + +const pnpm = process.platform === "win32" ? "pnpm.cmd" : "pnpm"; +const pkgDir = join(dirname(fileURLToPath(import.meta.url)), ".."); + +const results = await Promise.allSettled( + scripts.map( + (script) => + new Promise((resolve, reject) => { + const child = spawn(pnpm, ["run", script], { + cwd: pkgDir, + stdio: "inherit", + shell: false, + }); + child.on("error", reject); + child.on("close", (code) => { + if (code === 0) { + resolve(script); + } else { + reject(new Error(`${script} exited with code ${code}`)); + } + }); + }), + ), +); + +const failures = results.filter((r) => r.status === "rejected"); +for (const f of failures) { + console.error(f.reason.message); +} +if (failures.length > 0) { + process.exit(1); +} From b476f80782a61d64f070e2eba096aec055a3c1bc Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 6 May 2026 14:25:10 -0700 Subject: [PATCH 2/5] chore: add changeset for e2e parallelization Co-Authored-By: Claude Sonnet 4.6 --- .changeset/parallelize-e2e-scenarios.md | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 .changeset/parallelize-e2e-scenarios.md diff --git a/.changeset/parallelize-e2e-scenarios.md b/.changeset/parallelize-e2e-scenarios.md new file mode 100644 index 000000000..cf9520f66 --- /dev/null +++ b/.changeset/parallelize-e2e-scenarios.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +perf(e2e): parallelize scenario variants within test files From f94b1a7559ffbfce49f57c743196c263b180cf6b Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 6 May 2026 14:55:28 -0700 Subject: [PATCH 3/5] fix(e2e): use describe.sequential wrappers in claude-agent-sdk test and shell:true in run-parallel - Add describe.sequential around each defineClaudeAgentSDKInstrumentationAssertions call so toMatchFileSnapshot has a test context when describe.concurrent propagates concurrency into the test bodies - Use shell:true in run-parallel.mjs so pnpm.cmd works on Windows Co-Authored-By: Claude Sonnet 4.6 --- .../claude-agent-sdk-instrumentation/scenario.test.ts | 4 ++-- js/scripts/run-parallel.mjs | 5 ++--- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts index ed1088cc3..39761ac8d 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts @@ -56,7 +56,7 @@ const claudeAgentSDKScenarios = await Promise.all( describe.concurrent("wrapped instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`claude agent sdk ${scenario.version}`, { tags }, () => { + describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => { defineClaudeAgentSDKInstrumentationAssertions({ assertLocalToolHandlerParenting: true, expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, @@ -83,7 +83,7 @@ describe.concurrent("wrapped instrumentation", () => { describe.concurrent("auto-hook instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`claude agent sdk ${scenario.version}`, { tags }, () => { + describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => { defineClaudeAgentSDKInstrumentationAssertions({ assertLocalToolHandlerParenting: true, expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, diff --git a/js/scripts/run-parallel.mjs b/js/scripts/run-parallel.mjs index b9650460e..49b23cb15 100644 --- a/js/scripts/run-parallel.mjs +++ b/js/scripts/run-parallel.mjs @@ -13,17 +13,16 @@ if (!scripts.length) { process.exit(1); } -const pnpm = process.platform === "win32" ? "pnpm.cmd" : "pnpm"; const pkgDir = join(dirname(fileURLToPath(import.meta.url)), ".."); const results = await Promise.allSettled( scripts.map( (script) => new Promise((resolve, reject) => { - const child = spawn(pnpm, ["run", script], { + const child = spawn("pnpm", ["run", script], { cwd: pkgDir, stdio: "inherit", - shell: false, + shell: true, }); child.on("error", reject); child.on("close", (code) => { From 27b75d0090ddc2bf707569d98835b25b3510d884 Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 6 May 2026 17:36:37 -0700 Subject: [PATCH 4/5] fix(e2e): update google-adk snapshots for e.span.ended filter and sortBySpanDepth Adds sortBySpanDepth helper and switches both snapshot tests to filter by e.span.ended so wrapped mode (single flush) and auto-hook mode (early flushes from LLM spans) produce the same end-phase event set. Rewrites the four google-adk snapshot files to reflect 5 end-phase events with correct runner token metrics. Co-Authored-By: Claude Sonnet 4.6 --- .../google-adk-v061.log-payloads.json | 72 +++---------------- .../google-adk-v061.span-events.json | 52 +++----------- .../google-adk-v1000.log-payloads.json | 72 +++---------------- .../google-adk-v1000.span-events.json | 52 +++----------- .../google-adk-instrumentation/assertions.ts | 47 ++++++++---- 5 files changed, 75 insertions(+), 220 deletions(-) diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json index 406961eca..7d8b910a3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json @@ -4,6 +4,7 @@ "scenario": "google-adk-instrumentation" }, "metrics": { + "end": 0, "start": 0 }, "name": "google-adk-instrumentation-root", @@ -14,6 +15,7 @@ "operation": "simple-run" }, "metrics": { + "end": 0, "start": 0 }, "name": "adk-simple-run-operation", @@ -34,7 +36,12 @@ "provider": "google-adk" }, "metrics": { - "start": 0 + "completion_tokens": "", + "duration": 0, + "end": 0, + "prompt_tokens": "", + "start": 0, + "tokens": "" }, "name": "Google ADK Runner", "type": "task" @@ -46,6 +53,8 @@ "provider": "google-adk" }, "metrics": { + "duration": 0, + "end": 0, "start": 0 }, "name": "Agent: weather_agent", @@ -72,66 +81,5 @@ "temperature": 72 }, "type": "tool" - }, - { - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metrics": { - "duration": 0, - "end": 0, - "start": 0 - }, - "name": "Agent: weather_agent", - "type": "task" - }, - { - "input": { - "messages": [ - { - "content": "What is the weather in Paris, France?", - "role": "user" - } - ] - }, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metrics": { - "completion_tokens": "", - "duration": 0, - "end": 0, - "prompt_tokens": "", - "start": 0, - "tokens": "" - }, - "name": "Google ADK Runner", - "type": "task" - }, - { - "metadata": { - "operation": "simple-run" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "adk-simple-run-operation", - "type": null - }, - { - "metadata": { - "scenario": "google-adk-instrumentation" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "google-adk-instrumentation-root", - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json index 2bf665c39..2f9f5d9e3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json @@ -34,7 +34,12 @@ "google_adk.user_id": "test-user", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "completion_tokens", + "duration", + "prompt_tokens", + "tokens" + ], "name": "Google ADK Runner", "root_span_id": "", "span_id": "", @@ -50,7 +55,9 @@ "model": "gemini-2.5-flash-lite", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "duration" + ], "name": "Agent: weather_agent", "root_span_id": "", "span_id": "", @@ -72,49 +79,10 @@ ], "name": "tool: get_weather", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], "type": "tool" - }, - { - "has_input": false, - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metric_keys": [ - "duration" - ], - "name": "Agent: weather_agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" - }, - { - "has_input": true, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metric_keys": [ - "completion_tokens", - "duration", - "prompt_tokens", - "tokens" - ], - "name": "Google ADK Runner", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json index 406961eca..7d8b910a3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json @@ -4,6 +4,7 @@ "scenario": "google-adk-instrumentation" }, "metrics": { + "end": 0, "start": 0 }, "name": "google-adk-instrumentation-root", @@ -14,6 +15,7 @@ "operation": "simple-run" }, "metrics": { + "end": 0, "start": 0 }, "name": "adk-simple-run-operation", @@ -34,7 +36,12 @@ "provider": "google-adk" }, "metrics": { - "start": 0 + "completion_tokens": "", + "duration": 0, + "end": 0, + "prompt_tokens": "", + "start": 0, + "tokens": "" }, "name": "Google ADK Runner", "type": "task" @@ -46,6 +53,8 @@ "provider": "google-adk" }, "metrics": { + "duration": 0, + "end": 0, "start": 0 }, "name": "Agent: weather_agent", @@ -72,66 +81,5 @@ "temperature": 72 }, "type": "tool" - }, - { - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metrics": { - "duration": 0, - "end": 0, - "start": 0 - }, - "name": "Agent: weather_agent", - "type": "task" - }, - { - "input": { - "messages": [ - { - "content": "What is the weather in Paris, France?", - "role": "user" - } - ] - }, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metrics": { - "completion_tokens": "", - "duration": 0, - "end": 0, - "prompt_tokens": "", - "start": 0, - "tokens": "" - }, - "name": "Google ADK Runner", - "type": "task" - }, - { - "metadata": { - "operation": "simple-run" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "adk-simple-run-operation", - "type": null - }, - { - "metadata": { - "scenario": "google-adk-instrumentation" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "google-adk-instrumentation-root", - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json index 2bf665c39..2f9f5d9e3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json @@ -34,7 +34,12 @@ "google_adk.user_id": "test-user", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "completion_tokens", + "duration", + "prompt_tokens", + "tokens" + ], "name": "Google ADK Runner", "root_span_id": "", "span_id": "", @@ -50,7 +55,9 @@ "model": "gemini-2.5-flash-lite", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "duration" + ], "name": "Agent: weather_agent", "root_span_id": "", "span_id": "", @@ -72,49 +79,10 @@ ], "name": "tool: get_weather", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], "type": "tool" - }, - { - "has_input": false, - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metric_keys": [ - "duration" - ], - "name": "Agent: weather_agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" - }, - { - "has_input": true, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metric_keys": [ - "completion_tokens", - "duration", - "prompt_tokens", - "tokens" - ], - "name": "Google ADK Runner", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/assertions.ts b/e2e/scenarios/google-adk-instrumentation/assertions.ts index 1e6d4c2b5..85d632335 100644 --- a/e2e/scenarios/google-adk-instrumentation/assertions.ts +++ b/e2e/scenarios/google-adk-instrumentation/assertions.ts @@ -125,6 +125,31 @@ function dedupeSnapshotItems(items: Json[]): Json[] { return deduped; } +function sortBySpanDepth(events: CapturedLogEvent[]): CapturedLogEvent[] { + const lastById = new Map(); + for (const event of events) { + if (event.span.id) { + lastById.set(event.span.id, event); + } + } + + const depthCache = new Map(); + function getDepth(spanId: string | undefined): number { + if (!spanId) return 0; + if (depthCache.has(spanId)) return depthCache.get(spanId)!; + const event = lastById.get(spanId); + if (!event || event.span.parentIds.length === 0) { + depthCache.set(spanId, 0); + return 0; + } + const depth = 1 + getDepth(event.span.parentIds[0]); + depthCache.set(spanId, depth); + return depth; + } + + return [...events].sort((a, b) => getDepth(a.span.id) - getDepth(b.span.id)); +} + function hasOptionalADKTaskOutput(event: CapturedLogEvent): boolean { return ( event.span.type === "task" && @@ -291,13 +316,11 @@ export function defineGoogleADKInstrumentationAssertions(options: { }); test("matches the shared span snapshot", testConfig, async () => { - const relevantEvents = events.filter( - (e) => - e.span.name !== undefined && - e.span.type !== "llm" && - // Wrapped mode logs an extra start-only tool row. Normalize to the - // terminal tool record so wrapped and auto-hook snapshots stay aligned. - (e.span.type !== "tool" || e.output !== undefined), + const relevantEvents = sortBySpanDepth( + events.filter( + (e) => + e.span.name !== undefined && e.span.type !== "llm" && e.span.ended, + ), ); const spanSummary = normalizeForSnapshot( dedupeSnapshotItems( @@ -311,11 +334,11 @@ export function defineGoogleADKInstrumentationAssertions(options: { }); test("matches the shared payload snapshot", testConfig, async () => { - const relevantEvents = events.filter( - (e) => - e.span.name !== undefined && - e.span.type !== "llm" && - (e.span.type !== "tool" || e.output !== undefined), + const relevantEvents = sortBySpanDepth( + events.filter( + (e) => + e.span.name !== undefined && e.span.type !== "llm" && e.span.ended, + ), ); const payloadSummary = normalizeForSnapshot( dedupeSnapshotItems( From fe2a4c83e265d6fb1fac75a8e57d5eb47e11fe2e Mon Sep 17 00:00:00 2001 From: Stephen Belanger Date: Wed, 6 May 2026 17:42:22 -0700 Subject: [PATCH 5/5] chore(e2e): apply prettier formatting to scenario test files Co-Authored-By: Claude Sonnet 4.6 --- .../ai-sdk-instrumentation/scenario.test.ts | 3 +- .../scenario.test.ts | 94 ++++++++++--------- 2 files changed, 53 insertions(+), 44 deletions(-) diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts index 9efe4decd..6980f5c81 100644 --- a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts @@ -55,7 +55,8 @@ describe.concurrent("variants", () => { }, snapshotName: scenario.snapshotName, supportsAttachmentScenario, - supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions, + supportsProviderCacheAssertions: + scenario.supportsProviderCacheAssertions, supportsDenyOutputOverrideScenario: supportsRichInputScenarios, supportsGenerateObject: scenario.supportsGenerateObject, supportsOutputObjectScenario, diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts index 39761ac8d..d9a6402cc 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts @@ -56,54 +56,62 @@ const claudeAgentSDKScenarios = await Promise.all( describe.concurrent("wrapped instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => { - defineClaudeAgentSDKInstrumentationAssertions({ - assertLocalToolHandlerParenting: true, - expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, - name: "scenario", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - }); + describe.sequential( + `claude agent sdk ${scenario.version}`, + { tags }, + () => { + defineClaudeAgentSDKInstrumentationAssertions({ + assertLocalToolHandlerParenting: true, + expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, + name: "scenario", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + }, + ); } }); describe.concurrent("auto-hook instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe.sequential(`claude agent sdk ${scenario.version}`, { tags }, () => { - defineClaudeAgentSDKInstrumentationAssertions({ - assertLocalToolHandlerParenting: true, - expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, - name: "scenario", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - }); + describe.sequential( + `claude agent sdk ${scenario.version}`, + { tags }, + () => { + defineClaudeAgentSDKInstrumentationAssertions({ + assertLocalToolHandlerParenting: true, + expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, + name: "scenario", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + }, + ); } });