diff --git a/.changeset/parallelize-e2e-scenarios.md b/.changeset/parallelize-e2e-scenarios.md new file mode 100644 index 000000000..cf9520f66 --- /dev/null +++ b/.changeset/parallelize-e2e-scenarios.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +perf(e2e): parallelize scenario variants within test files diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts index daa037e9f..6980f5c81 100644 --- a/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/ai-sdk-instrumentation/scenario.test.ts @@ -30,69 +30,73 @@ function parseMajorVersion(version: string): number { return Number.isFinite(major) ? major : 0; } -for (const scenario of aiSDKScenarios) { - const sdkMajorVersion = parseMajorVersion(scenario.version); - const supportsRichInputScenarios = sdkMajorVersion >= 5; - const supportsOutputObjectScenario = supportsRichInputScenarios; - const supportsAttachmentScenario = supportsRichInputScenarios; +describe.concurrent("variants", () => { + for (const scenario of aiSDKScenarios) { + const sdkMajorVersion = parseMajorVersion(scenario.version); + const supportsRichInputScenarios = sdkMajorVersion >= 5; + const supportsOutputObjectScenario = supportsRichInputScenarios; + const supportsAttachmentScenario = supportsRichInputScenarios; + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`ai sdk ${scenario.version}`, { tags }, () => { - defineAISDKInstrumentationAssertions({ - agentSpanName: scenario.agentSpanName, - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsAttachmentScenario, - supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions, - supportsDenyOutputOverrideScenario: supportsRichInputScenarios, - supportsGenerateObject: scenario.supportsGenerateObject, - supportsOutputObjectScenario, - supportsRerank: scenario.supportsRerank !== false, - supportsStreamObject: scenario.supportsStreamObject, - supportsToolExecution: scenario.supportsToolExecution, - sdkMajorVersion, - testFileUrl: import.meta.url, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, - }); + describe.sequential(`ai sdk ${scenario.version}`, { tags }, () => { + defineAISDKInstrumentationAssertions({ + agentSpanName: scenario.agentSpanName, + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsAttachmentScenario, + supportsProviderCacheAssertions: + scenario.supportsProviderCacheAssertions, + supportsDenyOutputOverrideScenario: supportsRichInputScenarios, + supportsGenerateObject: scenario.supportsGenerateObject, + supportsOutputObjectScenario, + supportsRerank: scenario.supportsRerank !== false, + supportsStreamObject: scenario.supportsStreamObject, + supportsToolExecution: scenario.supportsToolExecution, + sdkMajorVersion, + testFileUrl: import.meta.url, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); - defineAISDKInstrumentationAssertions({ - agentSpanName: scenario.agentSpanName, - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsAttachmentScenario, - supportsProviderCacheAssertions: scenario.supportsProviderCacheAssertions, - supportsDenyOutputOverrideScenario: supportsRichInputScenarios, - supportsGenerateObject: scenario.supportsGenerateObject, - supportsOutputObjectScenario, - supportsRerank: scenario.supportsRerank !== false, - supportsStreamObject: scenario.supportsStreamObject, - supportsToolExecution: scenario.supportsToolExecution, - sdkMajorVersion, - testFileUrl: import.meta.url, - timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + defineAISDKInstrumentationAssertions({ + agentSpanName: scenario.agentSpanName, + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsAttachmentScenario, + supportsProviderCacheAssertions: + scenario.supportsProviderCacheAssertions, + supportsDenyOutputOverrideScenario: supportsRichInputScenarios, + supportsGenerateObject: scenario.supportsGenerateObject, + supportsOutputObjectScenario, + supportsRerank: scenario.supportsRerank !== false, + supportsStreamObject: scenario.supportsStreamObject, + supportsToolExecution: scenario.supportsToolExecution, + sdkMajorVersion, + testFileUrl: import.meta.url, + timeoutMs: AI_SDK_SCENARIO_TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts index 007dced98..709344571 100644 --- a/e2e/scenarios/anthropic-instrumentation/scenario.test.ts +++ b/e2e/scenarios/anthropic-instrumentation/scenario.test.ts @@ -74,52 +74,54 @@ const anthropicScenarios = await Promise.all( })), ); -for (const scenario of anthropicScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`anthropic sdk ${scenario.version}`, { tags }, () => { - defineAnthropicInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsBetaMessages: scenario.supportsBetaMessages, - supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, - supportsServerToolUse: scenario.supportsServerToolUse ?? true, - supportsThinking: scenario.supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); +describe.concurrent("variants", () => { + for (const scenario of anthropicScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); + describe.sequential(`anthropic sdk ${scenario.version}`, { tags }, () => { + defineAnthropicInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsBetaMessages: scenario.supportsBetaMessages, + supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, + supportsServerToolUse: scenario.supportsServerToolUse ?? true, + supportsThinking: scenario.supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); - defineAnthropicInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsBetaMessages: scenario.supportsBetaMessages, - supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, - supportsServerToolUse: scenario.supportsServerToolUse ?? true, - supportsThinking: scenario.supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + defineAnthropicInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsBetaMessages: scenario.supportsBetaMessages, + supportsBetaToolRunner: scenario.supportsBetaToolRunner ?? true, + supportsServerToolUse: scenario.supportsServerToolUse ?? true, + supportsThinking: scenario.supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts index 9670e4b5b..d9a6402cc 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts @@ -53,57 +53,65 @@ const claudeAgentSDKScenarios = await Promise.all( }), ); -describe("wrapped instrumentation", () => { +describe.concurrent("wrapped instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`claude agent sdk ${scenario.version}`, { tags }, () => { - defineClaudeAgentSDKInstrumentationAssertions({ - assertLocalToolHandlerParenting: true, - expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, - name: "scenario", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - }); + describe.sequential( + `claude agent sdk ${scenario.version}`, + { tags }, + () => { + defineClaudeAgentSDKInstrumentationAssertions({ + assertLocalToolHandlerParenting: true, + expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, + name: "scenario", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + }, + ); } }); -describe("auto-hook instrumentation", () => { +describe.concurrent("auto-hook instrumentation", () => { for (const scenario of claudeAgentSDKScenarios) { const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`claude agent sdk ${scenario.version}`, { tags }, () => { - defineClaudeAgentSDKInstrumentationAssertions({ - assertLocalToolHandlerParenting: true, - expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, - name: "scenario", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - }); + describe.sequential( + `claude agent sdk ${scenario.version}`, + { tags }, + () => { + defineClaudeAgentSDKInstrumentationAssertions({ + assertLocalToolHandlerParenting: true, + expectTaskLifecycleDetails: scenario.expectTaskLifecycleDetails, + name: "scenario", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + }, + ); } }); diff --git a/e2e/scenarios/cohere-instrumentation/scenario.test.ts b/e2e/scenarios/cohere-instrumentation/scenario.test.ts index 0dd756a62..95e446e8a 100644 --- a/e2e/scenarios/cohere-instrumentation/scenario.test.ts +++ b/e2e/scenarios/cohere-instrumentation/scenario.test.ts @@ -26,62 +26,64 @@ const cohereScenarios = await Promise.all( })), ); -for (const scenario of cohereScenarios) { - const supportsThinking = scenario.supportsThinking ?? true; - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of cohereScenarios) { + const supportsThinking = scenario.supportsThinking ?? true; + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`cohere sdk ${scenario.version}`, { tags }, () => { - defineCohereInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - env: { - COHERE_PACKAGE_NAME: scenario.dependencyName, - COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", - }, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - }); - }, - requireChatStreamOutput: scenario.snapshotName !== "cohere-v7-14-0", - snapshotName: - scenario.snapshotName === "cohere-v7-14-0" - ? "cohere-v7-14-0-wrapped" - : scenario.snapshotName, - supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - useV2Namespace: scenario.useV2Namespace ?? false, - }); + describe.sequential(`cohere sdk ${scenario.version}`, { tags }, () => { + defineCohereInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + env: { + COHERE_PACKAGE_NAME: scenario.dependencyName, + COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", + }, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + }); + }, + requireChatStreamOutput: scenario.snapshotName !== "cohere-v7-14-0", + snapshotName: + scenario.snapshotName === "cohere-v7-14-0" + ? "cohere-v7-14-0-wrapped" + : scenario.snapshotName, + supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + useV2Namespace: scenario.useV2Namespace ?? false, + }); - defineCohereInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - env: { - COHERE_PACKAGE_NAME: scenario.dependencyName, - COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", - }, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsThinking, - testFileUrl: import.meta.url, - timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, - useV2Namespace: scenario.useV2Namespace ?? false, + defineCohereInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + env: { + COHERE_PACKAGE_NAME: scenario.dependencyName, + COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", + }, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsThinking, + testFileUrl: import.meta.url, + timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, + useV2Namespace: scenario.useV2Namespace ?? false, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json index 406961eca..7d8b910a3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json @@ -4,6 +4,7 @@ "scenario": "google-adk-instrumentation" }, "metrics": { + "end": 0, "start": 0 }, "name": "google-adk-instrumentation-root", @@ -14,6 +15,7 @@ "operation": "simple-run" }, "metrics": { + "end": 0, "start": 0 }, "name": "adk-simple-run-operation", @@ -34,7 +36,12 @@ "provider": "google-adk" }, "metrics": { - "start": 0 + "completion_tokens": "", + "duration": 0, + "end": 0, + "prompt_tokens": "", + "start": 0, + "tokens": "" }, "name": "Google ADK Runner", "type": "task" @@ -46,6 +53,8 @@ "provider": "google-adk" }, "metrics": { + "duration": 0, + "end": 0, "start": 0 }, "name": "Agent: weather_agent", @@ -72,66 +81,5 @@ "temperature": 72 }, "type": "tool" - }, - { - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metrics": { - "duration": 0, - "end": 0, - "start": 0 - }, - "name": "Agent: weather_agent", - "type": "task" - }, - { - "input": { - "messages": [ - { - "content": "What is the weather in Paris, France?", - "role": "user" - } - ] - }, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metrics": { - "completion_tokens": "", - "duration": 0, - "end": 0, - "prompt_tokens": "", - "start": 0, - "tokens": "" - }, - "name": "Google ADK Runner", - "type": "task" - }, - { - "metadata": { - "operation": "simple-run" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "adk-simple-run-operation", - "type": null - }, - { - "metadata": { - "scenario": "google-adk-instrumentation" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "google-adk-instrumentation-root", - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json index 2bf665c39..2f9f5d9e3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json @@ -34,7 +34,12 @@ "google_adk.user_id": "test-user", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "completion_tokens", + "duration", + "prompt_tokens", + "tokens" + ], "name": "Google ADK Runner", "root_span_id": "", "span_id": "", @@ -50,7 +55,9 @@ "model": "gemini-2.5-flash-lite", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "duration" + ], "name": "Agent: weather_agent", "root_span_id": "", "span_id": "", @@ -72,49 +79,10 @@ ], "name": "tool: get_weather", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], "type": "tool" - }, - { - "has_input": false, - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metric_keys": [ - "duration" - ], - "name": "Agent: weather_agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" - }, - { - "has_input": true, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metric_keys": [ - "completion_tokens", - "duration", - "prompt_tokens", - "tokens" - ], - "name": "Google ADK Runner", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json index 406961eca..7d8b910a3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json @@ -4,6 +4,7 @@ "scenario": "google-adk-instrumentation" }, "metrics": { + "end": 0, "start": 0 }, "name": "google-adk-instrumentation-root", @@ -14,6 +15,7 @@ "operation": "simple-run" }, "metrics": { + "end": 0, "start": 0 }, "name": "adk-simple-run-operation", @@ -34,7 +36,12 @@ "provider": "google-adk" }, "metrics": { - "start": 0 + "completion_tokens": "", + "duration": 0, + "end": 0, + "prompt_tokens": "", + "start": 0, + "tokens": "" }, "name": "Google ADK Runner", "type": "task" @@ -46,6 +53,8 @@ "provider": "google-adk" }, "metrics": { + "duration": 0, + "end": 0, "start": 0 }, "name": "Agent: weather_agent", @@ -72,66 +81,5 @@ "temperature": 72 }, "type": "tool" - }, - { - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metrics": { - "duration": 0, - "end": 0, - "start": 0 - }, - "name": "Agent: weather_agent", - "type": "task" - }, - { - "input": { - "messages": [ - { - "content": "What is the weather in Paris, France?", - "role": "user" - } - ] - }, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metrics": { - "completion_tokens": "", - "duration": 0, - "end": 0, - "prompt_tokens": "", - "start": 0, - "tokens": "" - }, - "name": "Google ADK Runner", - "type": "task" - }, - { - "metadata": { - "operation": "simple-run" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "adk-simple-run-operation", - "type": null - }, - { - "metadata": { - "scenario": "google-adk-instrumentation" - }, - "metrics": { - "end": 0, - "start": 0 - }, - "name": "google-adk-instrumentation-root", - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json index 2bf665c39..2f9f5d9e3 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json @@ -34,7 +34,12 @@ "google_adk.user_id": "test-user", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "completion_tokens", + "duration", + "prompt_tokens", + "tokens" + ], "name": "Google ADK Runner", "root_span_id": "", "span_id": "", @@ -50,7 +55,9 @@ "model": "gemini-2.5-flash-lite", "provider": "google-adk" }, - "metric_keys": [], + "metric_keys": [ + "duration" + ], "name": "Agent: weather_agent", "root_span_id": "", "span_id": "", @@ -72,49 +79,10 @@ ], "name": "tool: get_weather", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], "type": "tool" - }, - { - "has_input": false, - "metadata": { - "google_adk.agent_name": "weather_agent", - "model": "gemini-2.5-flash-lite", - "provider": "google-adk" - }, - "metric_keys": [ - "duration" - ], - "name": "Agent: weather_agent", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" - }, - { - "has_input": true, - "metadata": { - "google_adk.session_id": "test-session-1", - "google_adk.user_id": "test-user", - "provider": "google-adk" - }, - "metric_keys": [ - "completion_tokens", - "duration", - "prompt_tokens", - "tokens" - ], - "name": "Google ADK Runner", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "task" } ] diff --git a/e2e/scenarios/google-adk-instrumentation/assertions.ts b/e2e/scenarios/google-adk-instrumentation/assertions.ts index 1e6d4c2b5..85d632335 100644 --- a/e2e/scenarios/google-adk-instrumentation/assertions.ts +++ b/e2e/scenarios/google-adk-instrumentation/assertions.ts @@ -125,6 +125,31 @@ function dedupeSnapshotItems(items: Json[]): Json[] { return deduped; } +function sortBySpanDepth(events: CapturedLogEvent[]): CapturedLogEvent[] { + const lastById = new Map(); + for (const event of events) { + if (event.span.id) { + lastById.set(event.span.id, event); + } + } + + const depthCache = new Map(); + function getDepth(spanId: string | undefined): number { + if (!spanId) return 0; + if (depthCache.has(spanId)) return depthCache.get(spanId)!; + const event = lastById.get(spanId); + if (!event || event.span.parentIds.length === 0) { + depthCache.set(spanId, 0); + return 0; + } + const depth = 1 + getDepth(event.span.parentIds[0]); + depthCache.set(spanId, depth); + return depth; + } + + return [...events].sort((a, b) => getDepth(a.span.id) - getDepth(b.span.id)); +} + function hasOptionalADKTaskOutput(event: CapturedLogEvent): boolean { return ( event.span.type === "task" && @@ -291,13 +316,11 @@ export function defineGoogleADKInstrumentationAssertions(options: { }); test("matches the shared span snapshot", testConfig, async () => { - const relevantEvents = events.filter( - (e) => - e.span.name !== undefined && - e.span.type !== "llm" && - // Wrapped mode logs an extra start-only tool row. Normalize to the - // terminal tool record so wrapped and auto-hook snapshots stay aligned. - (e.span.type !== "tool" || e.output !== undefined), + const relevantEvents = sortBySpanDepth( + events.filter( + (e) => + e.span.name !== undefined && e.span.type !== "llm" && e.span.ended, + ), ); const spanSummary = normalizeForSnapshot( dedupeSnapshotItems( @@ -311,11 +334,11 @@ export function defineGoogleADKInstrumentationAssertions(options: { }); test("matches the shared payload snapshot", testConfig, async () => { - const relevantEvents = events.filter( - (e) => - e.span.name !== undefined && - e.span.type !== "llm" && - (e.span.type !== "tool" || e.output !== undefined), + const relevantEvents = sortBySpanDepth( + events.filter( + (e) => + e.span.name !== undefined && e.span.type !== "llm" && e.span.ended, + ), ); const payloadSummary = normalizeForSnapshot( dedupeSnapshotItems( diff --git a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts index db86866b1..87cfbd5be 100644 --- a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts @@ -35,47 +35,49 @@ const googleADKScenarios = await Promise.all( })), ); -for (const scenario of googleADKScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of googleADKScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`google adk sdk ${scenario.version}`, { tags }, () => { - defineGoogleADKInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - expectLLMSpan: false, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); + describe.sequential(`google adk sdk ${scenario.version}`, { tags }, () => { + defineGoogleADKInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + expectLLMSpan: false, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); - defineGoogleADKInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - expectLLMSpan: true, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + defineGoogleADKInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + expectLLMSpan: true, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/google-genai-instrumentation/scenario.test.ts b/e2e/scenarios/google-genai-instrumentation/scenario.test.ts index 1cdf32bd7..418cd969c 100644 --- a/e2e/scenarios/google-genai-instrumentation/scenario.test.ts +++ b/e2e/scenarios/google-genai-instrumentation/scenario.test.ts @@ -47,45 +47,51 @@ const googleGenAIScenarios = await Promise.all( })), ); -for (const scenario of googleGenAIScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of googleGenAIScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`google genai sdk ${scenario.version}`, { tags }, () => { - defineGoogleGenAIInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + describe.sequential( + `google genai sdk ${scenario.version}`, + { tags }, + () => { + defineGoogleGenAIInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: TIMEOUT_MS, }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - defineGoogleGenAIInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + defineGoogleGenAIInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: TIMEOUT_MS, }); }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - }); -} + ); + } +}); diff --git a/e2e/scenarios/huggingface-instrumentation/scenario.test.ts b/e2e/scenarios/huggingface-instrumentation/scenario.test.ts index 594594dd7..72ac69256 100644 --- a/e2e/scenarios/huggingface-instrumentation/scenario.test.ts +++ b/e2e/scenarios/huggingface-instrumentation/scenario.test.ts @@ -26,45 +26,51 @@ const huggingFaceScenarios = await Promise.all( })), ); -for (const scenario of huggingFaceScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of huggingFaceScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`huggingface inference sdk ${scenario.version}`, { tags }, () => { - defineHuggingFaceInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + describe.sequential( + `huggingface inference sdk ${scenario.version}`, + { tags }, + () => { + defineHuggingFaceInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, - }); - defineHuggingFaceInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, + defineHuggingFaceInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, + }); }, - scenarioDir, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, }); }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: HUGGINGFACE_SCENARIO_TIMEOUT_MS, - }); - }); -} + ); + } +}); diff --git a/e2e/scenarios/mistral-instrumentation/scenario.test.ts b/e2e/scenarios/mistral-instrumentation/scenario.test.ts index d7b6cf68f..e6ddddb0f 100644 --- a/e2e/scenarios/mistral-instrumentation/scenario.test.ts +++ b/e2e/scenarios/mistral-instrumentation/scenario.test.ts @@ -26,63 +26,65 @@ const mistralScenarios = await Promise.all( })), ); -for (const scenario of mistralScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of mistralScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`mistral sdk ${scenario.version}`, { tags }, () => { - defineMistralInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - ...(scenario.supportsThinkingStream === false - ? { supportsThinkingStream: false } - : {}), - ...(scenario.supportsClassifiers === false - ? { supportsClassifiers: false } - : {}), - ...(scenario.supportsClassify === false - ? { supportsClassify: false } - : {}), - testFileUrl: import.meta.url, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, - }); + describe.sequential(`mistral sdk ${scenario.version}`, { tags }, () => { + defineMistralInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + ...(scenario.supportsThinkingStream === false + ? { supportsThinkingStream: false } + : {}), + ...(scenario.supportsClassifiers === false + ? { supportsClassifiers: false } + : {}), + ...(scenario.supportsClassify === false + ? { supportsClassify: false } + : {}), + testFileUrl: import.meta.url, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); - defineMistralInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - ...(scenario.supportsThinkingStream === false - ? { supportsThinkingStream: false } - : {}), - ...(scenario.supportsClassifiers === false - ? { supportsClassifiers: false } - : {}), - ...(scenario.supportsClassify === false - ? { supportsClassify: false } - : {}), - testFileUrl: import.meta.url, - timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + defineMistralInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + ...(scenario.supportsThinkingStream === false + ? { supportsThinkingStream: false } + : {}), + ...(scenario.supportsClassifiers === false + ? { supportsClassifiers: false } + : {}), + ...(scenario.supportsClassify === false + ? { supportsClassify: false } + : {}), + testFileUrl: import.meta.url, + timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/openai-instrumentation/scenario.test.ts b/e2e/scenarios/openai-instrumentation/scenario.test.ts index 6438545bc..a2cba02d6 100644 --- a/e2e/scenarios/openai-instrumentation/scenario.test.ts +++ b/e2e/scenarios/openai-instrumentation/scenario.test.ts @@ -43,50 +43,52 @@ const openaiScenarios = await Promise.all( })), ); -for (const scenario of openaiScenarios) { - const assertPrivateFieldMethodsOperation = - !scenario.disablePrivateFieldMethodsAssertion; - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of openaiScenarios) { + const assertPrivateFieldMethodsOperation = + !scenario.disablePrivateFieldMethodsAssertion; + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`openai sdk ${scenario.version}`, { tags }, () => { - defineOpenAIInstrumentationAssertions({ - assertPrivateFieldMethodsOperation, - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - version: scenario.version, - }); + describe.sequential(`openai sdk ${scenario.version}`, { tags }, () => { + defineOpenAIInstrumentationAssertions({ + assertPrivateFieldMethodsOperation, + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + version: scenario.version, + }); - defineOpenAIInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - version: scenario.version, + defineOpenAIInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + version: scenario.version, + }); }); - }); -} + } +}); diff --git a/e2e/scenarios/openrouter-instrumentation/scenario.test.ts b/e2e/scenarios/openrouter-instrumentation/scenario.test.ts index 091c00c7a..6ff1a662d 100644 --- a/e2e/scenarios/openrouter-instrumentation/scenario.test.ts +++ b/e2e/scenarios/openrouter-instrumentation/scenario.test.ts @@ -37,47 +37,49 @@ const openRouterScenarios = await Promise.all( })), ); -for (const scenario of openRouterScenarios) { - const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); +describe.concurrent("variants", () => { + for (const scenario of openRouterScenarios) { + const tags = cassetteTagsFor(import.meta.url, scenario.snapshotName); - describe(`openrouter sdk ${scenario.version}`, { tags }, () => { - defineOpenRouterTraceAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: scenario.wrapperEntry, - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsRerank: scenario.supportsRerank, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); + describe.sequential(`openrouter sdk ${scenario.version}`, { tags }, () => { + defineOpenRouterTraceAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsRerank: scenario.supportsRerank, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); - defineOpenRouterTraceAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: scenario.autoEntry, - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { - variantKey: scenario.snapshotName, - originalScenarioDir, - }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); - }, - snapshotName: scenario.snapshotName, - supportsRerank: scenario.supportsRerank, - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + defineOpenRouterTraceAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { + variantKey: scenario.snapshotName, + originalScenarioDir, + }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: scenario.snapshotName, + supportsRerank: scenario.supportsRerank, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); - }); -} + } +}); diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts index 9f4e1b515..d6ee62af4 100644 --- a/e2e/vitest.config.mts +++ b/e2e/vitest.config.mts @@ -14,6 +14,10 @@ export default defineConfig({ slowTestThreshold: 120_000, // Default to one retry for provider/network flake in non-hermetic scenarios. retry: 1, + // Allow up to 5 describe blocks to run their beforeAll hooks concurrently + // within a file. Bounded to avoid overwhelming CI with too many subprocesses + // at once. Tune down if CI shows memory pressure or flaky timeouts. + maxConcurrency: 5, setupFiles: ["./vitest.setup.ts"], tags: [ { diff --git a/js/package.json b/js/package.json index 75c3ec39e..754be23d1 100644 --- a/js/package.json +++ b/js/package.json @@ -118,7 +118,8 @@ "test": "vitest run --exclude \"src/wrappers/**/*.test.ts\" --exclude \"src/otel/**/*.test.ts\" --exclude \"smoke/**/*.test.ts\" --exclude \"src/zod/**/*.test.ts\" --exclude \"tests/api-compatibility/**\"", "test:core": "pnpm prune && pnpm test", "test:checks": "pnpm run test:core && pnpm run test:vitest", - "test:external": "pnpm run test:external:openai && pnpm run test:external:anthropic && pnpm run test:external:google-genai && pnpm run test:external:ai-sdk && pnpm run test:external:claude-agent-sdk", + "test:external": "pnpm run test:external:sequential && node scripts/run-parallel.mjs test:external:ai-sdk-v5 test:external:ai-sdk-v6 test:external:claude-agent-sdk", + "test:external:sequential": "pnpm run test:external:openai && pnpm run test:external:anthropic && pnpm run test:external:google-genai", "test:external:openai": "bash scripts/test-provider.sh test:openai openai", "test:external:anthropic": "bash scripts/test-provider.sh test:anthropic @anthropic-ai/sdk", "test:external:google-genai": "bash scripts/test-provider.sh test:google-genai @google/genai", diff --git a/js/scripts/run-parallel.mjs b/js/scripts/run-parallel.mjs new file mode 100644 index 000000000..49b23cb15 --- /dev/null +++ b/js/scripts/run-parallel.mjs @@ -0,0 +1,45 @@ +#!/usr/bin/env node +// Run multiple pnpm scripts concurrently and exit non-zero if any fail. +// Cross-platform (works on Windows, macOS, Linux). +// +// Usage: node scripts/run-parallel.mjs [script2 ...] +import { spawn } from "node:child_process"; +import { dirname, join } from "node:path"; +import { fileURLToPath } from "node:url"; + +const scripts = process.argv.slice(2); +if (!scripts.length) { + console.error("Usage: run-parallel.mjs [script2 ...]"); + process.exit(1); +} + +const pkgDir = join(dirname(fileURLToPath(import.meta.url)), ".."); + +const results = await Promise.allSettled( + scripts.map( + (script) => + new Promise((resolve, reject) => { + const child = spawn("pnpm", ["run", script], { + cwd: pkgDir, + stdio: "inherit", + shell: true, + }); + child.on("error", reject); + child.on("close", (code) => { + if (code === 0) { + resolve(script); + } else { + reject(new Error(`${script} exited with code ${code}`)); + } + }); + }), + ), +); + +const failures = results.filter((r) => r.status === "rejected"); +for (const f of failures) { + console.error(f.reason.message); +} +if (failures.length > 0) { + process.exit(1); +}