From 5f87e25966370162cfe29eebada17c23b7cac2f3 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 20 Apr 2026 21:49:01 +0000 Subject: [PATCH 01/26] chore: Sync release branch to main (a6d62c4832d5) (#1875) Synchronizes the main branch with the release branch. (changed files should generally only be package versions, changeset files, and changelogs) --------- Co-authored-by: Luca Forstner Co-authored-by: github-actions[bot] <41898282+github-actions[bot]@users.noreply.github.com> --- .changeset/slimy-hands-run.md | 5 ----- js/CHANGELOG.md | 1 + 2 files changed, 1 insertion(+), 5 deletions(-) delete mode 100644 .changeset/slimy-hands-run.md diff --git a/.changeset/slimy-hands-run.md b/.changeset/slimy-hands-run.md deleted file mode 100644 index b2228fa68..000000000 --- a/.changeset/slimy-hands-run.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": minor ---- - -feat: Instrument Anthropic SDK tool runner diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 5730498bc..0a374e030 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -8,6 +8,7 @@ - feat: Add `cohere-ai` instrumentation ([#1781](https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1781)) - feat: Add reranking instrumentation for AI SDK and Openrouter SDK [#1824](https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1824) - feat: Instrument Google GenAI `embedContent` for text ([#1821](https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1821)) +- feat: Instrument Anthropic SDK tool runner ([1833](https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1833)) ### Other Changes From a25315ca47543a81ca2077c18fef59888923ba82 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 21 Apr 2026 10:48:57 +0200 Subject: [PATCH 02/26] fix: Fix export map for bundler plugins (#1870) Ref https://linear.app/braintrustdata/issue/BT-4857/bundler-plugin-cjs-exports-map-may-be-broken-webpackjs-not-found-at --- .changeset/cold-geese-move.md | 5 +++++ js/package.json | 8 ++++---- 2 files changed, 9 insertions(+), 4 deletions(-) create mode 100644 .changeset/cold-geese-move.md diff --git a/.changeset/cold-geese-move.md b/.changeset/cold-geese-move.md new file mode 100644 index 000000000..77496c322 --- /dev/null +++ b/.changeset/cold-geese-move.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix: Fix export map for bundler plugins diff --git a/js/package.json b/js/package.json index f55991fe7..604affe2e 100644 --- a/js/package.json +++ b/js/package.json @@ -79,13 +79,13 @@ "types": "./dist/auto-instrumentations/bundler/vite.d.ts", "import": "./dist/auto-instrumentations/bundler/vite.mjs", "module": "./dist/auto-instrumentations/bundler/vite.mjs", - "require": "./dist/auto-instrumentations/bundler/vite.js" + "require": "./dist/auto-instrumentations/bundler/vite.cjs" }, "./webpack": { "types": "./dist/auto-instrumentations/bundler/webpack.d.ts", "import": "./dist/auto-instrumentations/bundler/webpack.mjs", "module": "./dist/auto-instrumentations/bundler/webpack.mjs", - "require": "./dist/auto-instrumentations/bundler/webpack.js" + "require": "./dist/auto-instrumentations/bundler/webpack.cjs" }, "./webpack-loader": { "types": "./dist/auto-instrumentations/bundler/webpack-loader.d.ts", @@ -95,13 +95,13 @@ "types": "./dist/auto-instrumentations/bundler/esbuild.d.ts", "import": "./dist/auto-instrumentations/bundler/esbuild.mjs", "module": "./dist/auto-instrumentations/bundler/esbuild.mjs", - "require": "./dist/auto-instrumentations/bundler/esbuild.js" + "require": "./dist/auto-instrumentations/bundler/esbuild.cjs" }, "./rollup": { "types": "./dist/auto-instrumentations/bundler/rollup.d.ts", "import": "./dist/auto-instrumentations/bundler/rollup.mjs", "module": "./dist/auto-instrumentations/bundler/rollup.mjs", - "require": "./dist/auto-instrumentations/bundler/rollup.js" + "require": "./dist/auto-instrumentations/bundler/rollup.cjs" } }, "files": [ From 2eb3287660c1ff4f3a9700f6f9277720dc6209fb Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 21 Apr 2026 12:06:10 +0200 Subject: [PATCH 03/26] test: Don't use discontinued models (#1873) Moves us away from discontinued models for testing. --- .../ai-sdk-instrumentation/scenario.impl.mjs | 2 +- .../anthropic-v0273.log-payloads.json | 44 ++++----- .../anthropic-v0273.span-events.json | 12 +-- .../anthropic-v0390.log-payloads.json | 60 ++++++------ .../anthropic-v0390.span-events.json | 16 +-- .../anthropic-v0712.log-payloads.json | 98 ++++++++++--------- .../anthropic-v0712.span-events.json | 22 ++--- .../anthropic-v0730.log-payloads.json | 98 ++++++++++--------- .../anthropic-v0730.span-events.json | 22 ++--- .../anthropic-v0780.log-payloads.json | 98 ++++++++++--------- .../anthropic-v0780.span-events.json | 22 ++--- .../anthropic-v0800.log-payloads.json | 98 ++++++++++--------- .../anthropic-v0800.span-events.json | 22 ++--- .../anthropic-instrumentation/assertions.ts | 33 +++++-- .../scenario.impl.mjs | 2 +- .../claude-agent-sdk-v0.1.span-events.json | 16 +-- .../claude-agent-sdk-v0.2.76.span-events.json | 16 +-- .../claude-agent-sdk-v0.2.79.span-events.json | 16 +-- .../claude-agent-sdk-v0.2.81.span-events.json | 16 +-- .../assertions.ts | 6 ++ .../scenario.impl.mjs | 2 +- .../huggingface-v281.log-payloads.json | 18 ++-- .../huggingface-v3150.log-payloads.json | 18 ++-- .../huggingface-v41315.log-payloads.json | 18 ++-- .../huggingface-instrumentation/assertions.ts | 9 +- .../google-genai-example.js | 2 +- js/examples/claude-agent-sdk/async_iter.ts | 2 +- js/examples/claude-agent-sdk/index.ts | 2 +- js/src/wrappers/anthropic.test.ts | 17 ++-- .../claude-agent-sdk/claude-agent-sdk.test.ts | 4 +- js/src/wrappers/google-genai.test.ts | 2 +- 31 files changed, 443 insertions(+), 370 deletions(-) diff --git a/e2e/scenarios/ai-sdk-instrumentation/scenario.impl.mjs b/e2e/scenarios/ai-sdk-instrumentation/scenario.impl.mjs index aee9a0030..1f72e0804 100644 --- a/e2e/scenarios/ai-sdk-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/ai-sdk-instrumentation/scenario.impl.mjs @@ -143,7 +143,7 @@ async function runAISDKInstrumentationScenario( ) { const instrumentedAI = decorateAI ? decorateAI(options.ai) : options.ai; const openaiModel = options.openai("gpt-4o-mini-2024-07-18"); - const anthropicModel = options.anthropic?.("claude-3-haiku-20240307"); + const anthropicModel = options.anthropic?.("claude-haiku-4-5"); const openaiEmbeddingModel = options.openai.textEmbeddingModel( "text-embedding-3-small", ); diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json index 69590d2f3..bd580cbc8 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.log-payloads.json @@ -29,26 +29,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 5, + "completion_tokens": 4, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 12, "start": 0, "time_to_first_token": 0, - "tokens": 17 + "tokens": 16 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "OK.", + "text": "OK", "type": "text" } ], @@ -93,20 +93,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 29, + "completion_tokens": "", "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 1389, "start": 0, "time_to_first_token": 0, - "tokens": 1418 + "tokens": "" }, "name": "anthropic.messages.create", "output": { @@ -139,23 +139,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -177,23 +177,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -215,7 +215,7 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null @@ -225,10 +225,10 @@ "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 480 + "tokens": 713 }, "name": "anthropic.messages.create", "output": { @@ -265,20 +265,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 357, + "prompt_tokens": 589, "start": 0, "time_to_first_token": 0, - "tokens": 412 + "tokens": 645 }, "name": "anthropic.messages.create", "output": { diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-events.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-events.json index 187e089f8..50ae8de47 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-events.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0273.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -69,7 +69,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -107,7 +107,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -145,7 +145,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -183,7 +183,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -221,7 +221,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json index ba4f1d519..748d47113 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.log-payloads.json @@ -29,26 +29,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 5, + "completion_tokens": 4, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 12, "start": 0, "time_to_first_token": 0, - "tokens": 17 + "tokens": 16 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "OK.", + "text": "OK", "type": "text" } ], @@ -93,20 +93,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 29, + "completion_tokens": "", "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 1389, "start": 0, "time_to_first_token": 0, - "tokens": 1418 + "tokens": "" }, "name": "anthropic.messages.create", "output": { @@ -139,23 +139,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -177,23 +177,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -215,7 +215,7 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null @@ -225,10 +225,10 @@ "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 480 + "tokens": 713 }, "name": "anthropic.messages.create", "output": { @@ -265,20 +265,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 357, + "prompt_tokens": 589, "start": 0, "time_to_first_token": 0, - "tokens": 412 + "tokens": 645 }, "name": "anthropic.messages.create", "output": { @@ -315,26 +315,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 6, + "completion_tokens": 5, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 13, "start": 0, "time_to_first_token": 0, - "tokens": 19 + "tokens": 18 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "BETA.", + "text": "BETA", "type": "text" } ], @@ -361,23 +361,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" } ] diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-events.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-events.json index 0bfc89b4c..10d51bfdc 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-events.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0390.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -69,7 +69,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -107,7 +107,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -145,7 +145,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -183,7 +183,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -221,7 +221,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -259,7 +259,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -297,7 +297,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json index a782f61c0..7b7fd46d3 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.log-payloads.json @@ -29,26 +29,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 5, + "completion_tokens": 4, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 12, "start": 0, "time_to_first_token": 0, - "tokens": 17 + "tokens": 16 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "OK.", + "text": "OK", "type": "text" } ], @@ -93,20 +93,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 29, + "completion_tokens": "", "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 1389, "start": 0, "time_to_first_token": 0, - "tokens": 1418 + "tokens": "" }, "name": "anthropic.messages.create", "output": { @@ -139,23 +139,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -177,23 +177,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -215,7 +215,7 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null @@ -225,10 +225,10 @@ "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 480 + "tokens": 713 }, "name": "anthropic.messages.create", "output": { @@ -265,20 +265,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 357, + "prompt_tokens": 589, "start": 0, "time_to_first_token": 0, - "tokens": 412 + "tokens": 645 }, "name": "anthropic.messages.create", "output": { @@ -365,26 +365,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 6, + "completion_tokens": 5, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 13, "start": 0, "time_to_first_token": 0, - "tokens": 19 + "tokens": 18 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "BETA.", + "text": "BETA", "type": "text" } ], @@ -411,23 +411,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -450,25 +450,30 @@ ], "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 58, + "completion_tokens": 72, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 829, + "prompt_tokens": 1294, "start": 0, "time_to_first_token": 0, - "tokens": 887 + "tokens": 1366 }, "name": "anthropic.beta.messages.toolRunner", "output": { - "content": [], + "content": [ + { + "text": "The weather in Paris, France is 18C and sunny.", + "type": "text" + } + ], "role": "assistant" }, "type": "task" @@ -505,20 +510,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 375, + "prompt_tokens": 607, "start": 0, "time_to_first_token": 0, - "tokens": 430 + "tokens": 663 }, "name": "anthropic.messages.create", "output": { @@ -570,24 +575,29 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 3, + "completion_tokens": 16, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 457 + "tokens": 703 }, "name": "anthropic.messages.create", "output": { - "content": [], + "content": [ + { + "text": "", + "type": "text" + } + ], "role": "assistant" }, "type": "llm" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-events.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-events.json index 9cd6b6844..0acff38b6 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-events.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0712.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -69,7 +69,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -107,7 +107,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -145,7 +145,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -183,7 +183,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -221,7 +221,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -297,7 +297,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -335,7 +335,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -374,7 +374,7 @@ "has_output": true, "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic" }, @@ -426,7 +426,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -449,7 +449,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json index a782f61c0..7b7fd46d3 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.log-payloads.json @@ -29,26 +29,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 5, + "completion_tokens": 4, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 12, "start": 0, "time_to_first_token": 0, - "tokens": 17 + "tokens": 16 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "OK.", + "text": "OK", "type": "text" } ], @@ -93,20 +93,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 29, + "completion_tokens": "", "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 1389, "start": 0, "time_to_first_token": 0, - "tokens": 1418 + "tokens": "" }, "name": "anthropic.messages.create", "output": { @@ -139,23 +139,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -177,23 +177,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -215,7 +215,7 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null @@ -225,10 +225,10 @@ "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 480 + "tokens": 713 }, "name": "anthropic.messages.create", "output": { @@ -265,20 +265,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 357, + "prompt_tokens": 589, "start": 0, "time_to_first_token": 0, - "tokens": 412 + "tokens": 645 }, "name": "anthropic.messages.create", "output": { @@ -365,26 +365,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 6, + "completion_tokens": 5, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 13, "start": 0, "time_to_first_token": 0, - "tokens": 19 + "tokens": 18 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "BETA.", + "text": "BETA", "type": "text" } ], @@ -411,23 +411,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -450,25 +450,30 @@ ], "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 58, + "completion_tokens": 72, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 829, + "prompt_tokens": 1294, "start": 0, "time_to_first_token": 0, - "tokens": 887 + "tokens": 1366 }, "name": "anthropic.beta.messages.toolRunner", "output": { - "content": [], + "content": [ + { + "text": "The weather in Paris, France is 18C and sunny.", + "type": "text" + } + ], "role": "assistant" }, "type": "task" @@ -505,20 +510,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 375, + "prompt_tokens": 607, "start": 0, "time_to_first_token": 0, - "tokens": 430 + "tokens": 663 }, "name": "anthropic.messages.create", "output": { @@ -570,24 +575,29 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 3, + "completion_tokens": 16, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 457 + "tokens": 703 }, "name": "anthropic.messages.create", "output": { - "content": [], + "content": [ + { + "text": "", + "type": "text" + } + ], "role": "assistant" }, "type": "llm" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-events.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-events.json index 9cd6b6844..0acff38b6 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-events.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0730.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -69,7 +69,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -107,7 +107,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -145,7 +145,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -183,7 +183,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -221,7 +221,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -297,7 +297,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -335,7 +335,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -374,7 +374,7 @@ "has_output": true, "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic" }, @@ -426,7 +426,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -449,7 +449,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json index a782f61c0..7b7fd46d3 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.log-payloads.json @@ -29,26 +29,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 5, + "completion_tokens": 4, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 12, "start": 0, "time_to_first_token": 0, - "tokens": 17 + "tokens": 16 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "OK.", + "text": "OK", "type": "text" } ], @@ -93,20 +93,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 29, + "completion_tokens": "", "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 1389, "start": 0, "time_to_first_token": 0, - "tokens": 1418 + "tokens": "" }, "name": "anthropic.messages.create", "output": { @@ -139,23 +139,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -177,23 +177,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -215,7 +215,7 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null @@ -225,10 +225,10 @@ "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 480 + "tokens": 713 }, "name": "anthropic.messages.create", "output": { @@ -265,20 +265,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 357, + "prompt_tokens": 589, "start": 0, "time_to_first_token": 0, - "tokens": 412 + "tokens": 645 }, "name": "anthropic.messages.create", "output": { @@ -365,26 +365,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 6, + "completion_tokens": 5, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 13, "start": 0, "time_to_first_token": 0, - "tokens": 19 + "tokens": 18 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "BETA.", + "text": "BETA", "type": "text" } ], @@ -411,23 +411,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -450,25 +450,30 @@ ], "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 58, + "completion_tokens": 72, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 829, + "prompt_tokens": 1294, "start": 0, "time_to_first_token": 0, - "tokens": 887 + "tokens": 1366 }, "name": "anthropic.beta.messages.toolRunner", "output": { - "content": [], + "content": [ + { + "text": "The weather in Paris, France is 18C and sunny.", + "type": "text" + } + ], "role": "assistant" }, "type": "task" @@ -505,20 +510,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 375, + "prompt_tokens": 607, "start": 0, "time_to_first_token": 0, - "tokens": 430 + "tokens": 663 }, "name": "anthropic.messages.create", "output": { @@ -570,24 +575,29 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 3, + "completion_tokens": 16, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 457 + "tokens": 703 }, "name": "anthropic.messages.create", "output": { - "content": [], + "content": [ + { + "text": "", + "type": "text" + } + ], "role": "assistant" }, "type": "llm" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-events.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-events.json index 9cd6b6844..0acff38b6 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-events.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0780.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -69,7 +69,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -107,7 +107,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -145,7 +145,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -183,7 +183,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -221,7 +221,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -297,7 +297,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -335,7 +335,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -374,7 +374,7 @@ "has_output": true, "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic" }, @@ -426,7 +426,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -449,7 +449,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json index a782f61c0..7b7fd46d3 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.log-payloads.json @@ -29,26 +29,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 5, + "completion_tokens": 4, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 12, "start": 0, "time_to_first_token": 0, - "tokens": 17 + "tokens": 16 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "OK.", + "text": "OK", "type": "text" } ], @@ -93,20 +93,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 29, + "completion_tokens": "", "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 1389, "start": 0, "time_to_first_token": 0, - "tokens": 1418 + "tokens": "" }, "name": "anthropic.messages.create", "output": { @@ -139,23 +139,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -177,23 +177,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -215,7 +215,7 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null @@ -225,10 +225,10 @@ "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 480 + "tokens": 713 }, "name": "anthropic.messages.create", "output": { @@ -265,20 +265,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 357, + "prompt_tokens": 589, "start": 0, "time_to_first_token": 0, - "tokens": 412 + "tokens": 645 }, "name": "anthropic.messages.create", "output": { @@ -365,26 +365,26 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 6, + "completion_tokens": 5, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 13, "start": 0, "time_to_first_token": 0, - "tokens": 19 + "tokens": 18 }, "name": "anthropic.messages.create", "output": { "content": [ { - "text": "BETA.", + "text": "BETA", "type": "text" } ], @@ -411,23 +411,23 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 18, + "completion_tokens": 15, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, "prompt_tokens": 24, "start": 0, "time_to_first_token": 0, - "tokens": 42 + "tokens": 39 }, "name": "anthropic.messages.create", - "output": "1 - one\n2 - two\n3 - three", + "output": "1 one\n2 two\n3 three", "type": "llm" }, { @@ -450,25 +450,30 @@ ], "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 58, + "completion_tokens": 72, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 829, + "prompt_tokens": 1294, "start": 0, "time_to_first_token": 0, - "tokens": 887 + "tokens": 1366 }, "name": "anthropic.beta.messages.toolRunner", "output": { - "content": [], + "content": [ + { + "text": "The weather in Paris, France is 18C and sunny.", + "type": "text" + } + ], "role": "assistant" }, "type": "task" @@ -505,20 +510,20 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "tool_use", "stop_sequence": null }, "metrics": { - "completion_tokens": 55, + "completion_tokens": 56, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 375, + "prompt_tokens": 607, "start": 0, "time_to_first_token": 0, - "tokens": 430 + "tokens": 663 }, "name": "anthropic.messages.create", "output": { @@ -570,24 +575,29 @@ } ], "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic", "stop_reason": "end_turn", "stop_sequence": null }, "metrics": { - "completion_tokens": 3, + "completion_tokens": 16, "end": 0, "prompt_cache_creation_tokens": 0, "prompt_cached_tokens": 0, - "prompt_tokens": 454, + "prompt_tokens": 687, "start": 0, "time_to_first_token": 0, - "tokens": 457 + "tokens": 703 }, "name": "anthropic.messages.create", "output": { - "content": [], + "content": [ + { + "text": "", + "type": "text" + } + ], "role": "assistant" }, "type": "llm" diff --git a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-events.json b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-events.json index 9cd6b6844..0acff38b6 100644 --- a/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-events.json +++ b/e2e/scenarios/anthropic-instrumentation/__snapshots__/anthropic-v0800.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -69,7 +69,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -107,7 +107,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -145,7 +145,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -183,7 +183,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -221,7 +221,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -297,7 +297,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -335,7 +335,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -374,7 +374,7 @@ "has_output": true, "metadata": { "anthropic_tool_runner_iterations": 2, - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "operation": "toolRunner", "provider": "anthropic" }, @@ -426,7 +426,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ @@ -449,7 +449,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-3-haiku-20240307", + "model": "claude-haiku-4-5", "provider": "anthropic" }, "metric_keys": [ diff --git a/e2e/scenarios/anthropic-instrumentation/assertions.ts b/e2e/scenarios/anthropic-instrumentation/assertions.ts index 01974fca8..2401efc27 100644 --- a/e2e/scenarios/anthropic-instrumentation/assertions.ts +++ b/e2e/scenarios/anthropic-instrumentation/assertions.ts @@ -73,6 +73,23 @@ function pickMetadata( return Object.keys(picked).length > 0 ? (picked as Json) : null; } +function normalizeMetricValues( + metrics: Json, + keys: string[], + replacement: Json, +): void { + if (!metrics || typeof metrics !== "object" || Array.isArray(metrics)) { + return; + } + + const metricsRecord = metrics as Record; + for (const key of keys) { + if (typeof metricsRecord[key] === "number") { + metricsRecord[key] = replacement; + } + } +} + function summarizeAnthropicPayload(event: CapturedLogEvent): Json { const normalizeToolResultIds = ( messages: @@ -166,12 +183,11 @@ function summarizeAnthropicPayload(event: CapturedLogEvent): Json { // Thinking token counts vary per run (temperature=1, variable thinking depth). // Zero them out so the payload snapshot is stable. if (summary.metrics && typeof summary.metrics === "object") { - const metrics = summary.metrics as Record; - for (const key of ["completion_tokens", "tokens"]) { - if (key in metrics) { - metrics[key] = 0; - } - } + normalizeMetricValues( + summary.metrics, + ["completion_tokens", "tokens"], + 0, + ); } return summary; } @@ -216,6 +232,11 @@ function summarizeAnthropicPayload(event: CapturedLogEvent): Json { if (hasAttachmentInput && textBlock) { textBlock.text = ""; summary.output = output as Json; + normalizeMetricValues( + summary.metrics, + ["completion_tokens", "tokens"], + "", + ); } const hasToolResultInput = input?.some( diff --git a/e2e/scenarios/anthropic-instrumentation/scenario.impl.mjs b/e2e/scenarios/anthropic-instrumentation/scenario.impl.mjs index ab6d7f2ba..7bd22954a 100644 --- a/e2e/scenarios/anthropic-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/anthropic-instrumentation/scenario.impl.mjs @@ -6,7 +6,7 @@ import { runTracedScenario, } from "../../helpers/provider-runtime.mjs"; -const ANTHROPIC_MODEL = "claude-3-haiku-20240307"; +const ANTHROPIC_MODEL = "claude-haiku-4-5"; const ROOT_NAME = "anthropic-instrumentation-root"; const SCENARIO_NAME = "anthropic-instrumentation"; const WEATHER_TOOL = { diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json index 68fbc65d5..78a9015fa 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json @@ -8,7 +8,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -46,7 +46,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -66,7 +66,7 @@ "Use the calculator tool to multiply 15 by 7. Do not answer from memory." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -100,7 +100,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -136,7 +136,7 @@ "Use the calculator tool to divide 2 by 0. Do not recover from the error." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -170,7 +170,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -235,7 +235,7 @@ "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -286,7 +286,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json index e947c64e5..1a12cdb33 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json @@ -8,7 +8,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -46,7 +46,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -66,7 +66,7 @@ "Use the calculator tool to multiply 15 by 7. Do not answer from memory." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -100,7 +100,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -136,7 +136,7 @@ "Use the calculator tool to divide 2 by 0. Do not recover from the error." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -170,7 +170,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -235,7 +235,7 @@ "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -288,7 +288,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json index e947c64e5..1a12cdb33 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json @@ -8,7 +8,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -46,7 +46,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -66,7 +66,7 @@ "Use the calculator tool to multiply 15 by 7. Do not answer from memory." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -100,7 +100,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -136,7 +136,7 @@ "Use the calculator tool to divide 2 by 0. Do not recover from the error." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -170,7 +170,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -235,7 +235,7 @@ "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -288,7 +288,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json index e947c64e5..1a12cdb33 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json @@ -8,7 +8,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -46,7 +46,7 @@ "Part 2" ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -66,7 +66,7 @@ "Use the calculator tool to multiply 15 by 7. Do not answer from memory." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -100,7 +100,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -136,7 +136,7 @@ "Use the calculator tool to divide 2 by 0. Do not recover from the error." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -170,7 +170,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", @@ -235,7 +235,7 @@ "Spawn a math-expert subagent to add 15 and 27 using the calculator tool. Report the result. Do not solve it yourself." ], "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [ "completion_tokens", @@ -288,7 +288,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "claude-haiku-4-5-20251001" + "model": "claude-haiku-4-5" }, "metric_keys": [], "name": "Claude Agent", diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts index 11efcb451..ffc8e6b06 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts @@ -101,6 +101,12 @@ function summarizeSpan( } if (summary.metadata && typeof summary.metadata === "object") { const metadata = summary.metadata as Record; + if ( + typeof metadata.model === "string" && + metadata.model.startsWith("claude-haiku-4-5-") + ) { + metadata.model = "claude-haiku-4-5"; + } if (typeof metadata["claude_agent_sdk.description"] === "string") { metadata["claude_agent_sdk.description"] = ""; } diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs index f9cd8d392..683918c11 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs @@ -6,7 +6,7 @@ import { } from "../../helpers/provider-runtime.mjs"; import { z } from "zod"; -const CLAUDE_AGENT_MODEL = "claude-haiku-4-5-20251001"; +const CLAUDE_AGENT_MODEL = "claude-haiku-4-5"; export const ROOT_NAME = "claude-agent-sdk-root"; export const SCENARIO_NAME = "claude-agent-sdk-traces"; diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json index 0543ea40d..5d8ada30c 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json @@ -119,10 +119,10 @@ "completion_accepted_prediction_tokens": 0, "completion_reasoning_tokens": 0, "completion_rejected_prediction_tokens": 0, - "completion_tokens": 3, + "completion_tokens": "", "prompt_cached_tokens": 0, - "prompt_tokens": 40, - "tokens": 43 + "prompt_tokens": "", + "tokens": "" }, "project_id": "", "root_span_id": "", @@ -253,11 +253,11 @@ "completion_accepted_prediction_tokens": 0, "completion_reasoning_tokens": 0, "completion_rejected_prediction_tokens": 0, - "completion_tokens": 3, + "completion_tokens": "", "prompt_cached_tokens": 0, - "prompt_tokens": 40, + "prompt_tokens": "", "time_to_first_token": "", - "tokens": 43 + "tokens": "" }, "project_id": "", "root_span_id": "", @@ -381,10 +381,10 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 4, - "prompt_tokens": 6, + "completion_tokens": "", + "prompt_tokens": "", "time_to_first_token": "", - "tokens": 10 + "tokens": "" }, "project_id": "", "root_span_id": "", diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json index 1e9d02b65..3ab9d6929 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json @@ -115,9 +115,9 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 1, - "prompt_tokens": 40, - "tokens": 41 + "completion_tokens": "", + "prompt_tokens": "", + "tokens": "" }, "project_id": "", "root_span_id": "", @@ -244,10 +244,10 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 1, - "prompt_tokens": 40, + "completion_tokens": "", + "prompt_tokens": "", "time_to_first_token": "", - "tokens": 41 + "tokens": "" }, "project_id": "", "root_span_id": "", @@ -475,10 +475,10 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 4, - "prompt_tokens": 5, + "completion_tokens": "", + "prompt_tokens": "", "time_to_first_token": "", - "tokens": 9 + "tokens": "" }, "project_id": "", "root_span_id": "", diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json index 1e9d02b65..3ab9d6929 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json @@ -115,9 +115,9 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 1, - "prompt_tokens": 40, - "tokens": 41 + "completion_tokens": "", + "prompt_tokens": "", + "tokens": "" }, "project_id": "", "root_span_id": "", @@ -244,10 +244,10 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 1, - "prompt_tokens": 40, + "completion_tokens": "", + "prompt_tokens": "", "time_to_first_token": "", - "tokens": 41 + "tokens": "" }, "project_id": "", "root_span_id": "", @@ -475,10 +475,10 @@ "id": "", "log_id": "g", "metrics": { - "completion_tokens": 4, - "prompt_tokens": 5, + "completion_tokens": "", + "prompt_tokens": "", "time_to_first_token": "", - "tokens": 9 + "tokens": "" }, "project_id": "", "root_span_id": "", diff --git a/e2e/scenarios/huggingface-instrumentation/assertions.ts b/e2e/scenarios/huggingface-instrumentation/assertions.ts index 209587de7..60b9df63b 100644 --- a/e2e/scenarios/huggingface-instrumentation/assertions.ts +++ b/e2e/scenarios/huggingface-instrumentation/assertions.ts @@ -121,7 +121,14 @@ function normalizeMetrics(value: Json): Json { for (const [key, entry] of Object.entries(value)) { if ( typeof entry === "number" && - ["end", "start", "time_to_first_token"].includes(key) + [ + "completion_tokens", + "end", + "prompt_tokens", + "start", + "time_to_first_token", + "tokens", + ].includes(key) ) { normalized[key] = ""; continue; diff --git a/js/examples/auto-instrumentation/google-genai-example.js b/js/examples/auto-instrumentation/google-genai-example.js index 772b3ca3d..7aa7aaa63 100644 --- a/js/examples/auto-instrumentation/google-genai-example.js +++ b/js/examples/auto-instrumentation/google-genai-example.js @@ -33,7 +33,7 @@ async function main() { // This call will be automatically traced to Braintrust const response = await ai.models.generateContent({ - model: "gemini-2.0-flash-001", + model: "gemini-2.5-flash-lite", contents: "Explain what Gemini models are in one sentence.", }); diff --git a/js/examples/claude-agent-sdk/async_iter.ts b/js/examples/claude-agent-sdk/async_iter.ts index 26c2088ef..207e21b25 100644 --- a/js/examples/claude-agent-sdk/async_iter.ts +++ b/js/examples/claude-agent-sdk/async_iter.ts @@ -105,7 +105,7 @@ async function main() { for await (const message of query({ prompt: conversationPrompt, options: { - model: "claude-haiku-4-5-20251001", + model: "claude-haiku-4-5", permissionMode: "bypassPermissions", maxTurns: 1, }, diff --git a/js/examples/claude-agent-sdk/index.ts b/js/examples/claude-agent-sdk/index.ts index 2a7b1fa94..fcc807e96 100644 --- a/js/examples/claude-agent-sdk/index.ts +++ b/js/examples/claude-agent-sdk/index.ts @@ -117,7 +117,7 @@ Report all results.`; description: "Math specialist for calculations and explanations", prompt: "You are a math expert. Perform calculations step by step.", - model: "haiku", + model: "claude-haiku-4-5", }, }, mcpServers: { diff --git a/js/src/wrappers/anthropic.test.ts b/js/src/wrappers/anthropic.test.ts index 5002f6e3e..e8f1c3f5d 100644 --- a/js/src/wrappers/anthropic.test.ts +++ b/js/src/wrappers/anthropic.test.ts @@ -14,8 +14,8 @@ import { _exportsForTestingOnly, Attachment, initLogger } from "../logger"; import { configureNode } from "../node/config"; import { getCurrentUnixTimestamp } from "../util"; -// use the cheapest model for tests -const TEST_MODEL = "claude-3-haiku-20240307"; +// use a low-cost current Haiku alias for tests +const TEST_MODEL = "claude-haiku-4-5"; interface TextBlock { type: "text"; @@ -131,7 +131,7 @@ describe("anthropic client unit tests", { retry: 3 }, () => { expect(message.content[0].type).toBe("text"); const content = message.content[0] as unknown; if (typeof content === "object" && content !== null && "text" in content) { - expect(content.text).toContain("old pond"); + expect(content.text.toLowerCase()).toContain("old pond"); } else { throw new Error("Content is not a text block"); } @@ -387,12 +387,11 @@ describe("anthropic client unit tests", { retry: 3 }, () => { .toLowerCase() .replace(/\n/g, " ") .replace(/'/g, ""); - // Validate we collected all the text, so check the first, line, the last line - // and a few others too. + // Validate we collected the streamed text without relying on one exact phrasing. expect(output).toContain("shall i compare thee to a summers day"); - expect(output).toContain("too hot the eye of heaven shines"); - expect(output).toContain("so long as men can breathe or eyes can see"); - expect(output).toContain("so long lives this and this gives life to thee"); + expect(output).toContain("shakespeare"); + expect(output).toContain("summer"); + expect(output.length).toBeGreaterThan(200); expect(span["span_attributes"].type).toBe("llm"); expect(span["span_attributes"].name).toBe("anthropic.messages.create"); @@ -706,7 +705,7 @@ describe("anthropic client unit tests", { retry: 3 }, () => { "iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAYAAAAfFcSJAAAADUlEQVR42mP8z8DwHwAFBQIAX8jx0gAAAABJRU5ErkJggg=="; const response = await client.messages.create({ - model: "claude-haiku-4-5-20251001", + model: TEST_MODEL, messages: [ { role: "user", diff --git a/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts b/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts index 4fb558a38..8db8aebf4 100644 --- a/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts +++ b/js/src/wrappers/claude-agent-sdk/claude-agent-sdk.test.ts @@ -631,7 +631,7 @@ try { // Initialize Braintrust state once per process } -const TEST_MODEL = "claude-haiku-4-5-20251001"; +const TEST_MODEL = "claude-haiku-4-5"; describe.skipIf(!claudeSDK)("claude-agent-sdk integration tests", () => { // eslint-disable-next-line @typescript-eslint/no-explicit-any @@ -919,7 +919,7 @@ describe.skipIf(!claudeSDK)("claude-agent-sdk integration tests", () => { "Math specialist. Use the calculator tool for calculations.", prompt: "You are a math expert. Use the calculator tool to perform the requested calculation. Be concise.", - model: "haiku", + model: TEST_MODEL, }, }, mcpServers: { diff --git a/js/src/wrappers/google-genai.test.ts b/js/src/wrappers/google-genai.test.ts index f6d3a3305..8723debfa 100644 --- a/js/src/wrappers/google-genai.test.ts +++ b/js/src/wrappers/google-genai.test.ts @@ -18,7 +18,7 @@ import { import { wrapGoogleGenAI } from "./google-genai"; import { getCurrentUnixTimestamp } from "../util"; -const TEST_MODEL = "gemini-2.0-flash-001"; +const TEST_MODEL = "gemini-2.5-flash-lite"; const TEST_SUITE_OPTIONS = { timeout: 10000, retry: 3 }; async function drainRawEvents(backgroundLogger: TestBackgroundLogger) { From 798763e834fa37aeac161d70d8faaf0403244a08 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 21 Apr 2026 13:33:35 +0200 Subject: [PATCH 04/26] feat: Capture thinking with cohere (#1861) Fixes https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1845 --- .changeset/honest-mails-show.md | 5 + .../cohere-v7-20-0.span-events.json | 54 ++++- .../cohere-v7-21-0.span-events.json | 54 ++++- .../__snapshots__/cohere-v7.span-events.json | 54 ++++- .../__snapshots__/cohere-v8.span-events.json | 54 ++++- .../cohere-instrumentation/assertions.ts | 90 +++++++- .../scenario.cohere-v7.mjs | 9 +- .../scenario.cohere-v7.ts | 9 +- .../cohere-instrumentation/scenario.impl.mjs | 58 ++++- .../cohere-instrumentation/scenario.test.ts | 10 + .../auto-instrumentations/configs/cohere.ts | 52 +++++ .../plugins/cohere-plugin.test.ts | 119 ++++++++++ .../instrumentation/plugins/cohere-plugin.ts | 206 +++++++++++++++++- js/src/vendor-sdk-types/cohere.ts | 10 + 14 files changed, 750 insertions(+), 34 deletions(-) create mode 100644 .changeset/honest-mails-show.md diff --git a/.changeset/honest-mails-show.md b/.changeset/honest-mails-show.md new file mode 100644 index 000000000..ca2cf8bbb --- /dev/null +++ b/.changeset/honest-mails-show.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +feat: Capture thinking with cohere diff --git a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-20-0.span-events.json b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-20-0.span-events.json index f31d4de70..72036b039 100644 --- a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-20-0.span-events.json +++ b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-20-0.span-events.json @@ -86,6 +86,48 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "cohere-chat-stream-thinking-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "command-a-reasoning-08-2025", + "provider": "cohere", + "thinking": { + "tokenBudget": 128, + "type": "enabled" + } + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "reasoning_tokens", + "time_to_first_token", + "tokens" + ], + "name": "cohere.chatStream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -95,7 +137,7 @@ "metric_keys": [], "name": "cohere-embed-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +156,9 @@ ], "name": "cohere.embed", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -129,7 +171,7 @@ "metric_keys": [], "name": "cohere-rerank-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -149,9 +191,9 @@ ], "name": "cohere.rerank", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-21-0.span-events.json b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-21-0.span-events.json index f31d4de70..72036b039 100644 --- a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-21-0.span-events.json +++ b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7-21-0.span-events.json @@ -86,6 +86,48 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "cohere-chat-stream-thinking-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "command-a-reasoning-08-2025", + "provider": "cohere", + "thinking": { + "tokenBudget": 128, + "type": "enabled" + } + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "reasoning_tokens", + "time_to_first_token", + "tokens" + ], + "name": "cohere.chatStream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -95,7 +137,7 @@ "metric_keys": [], "name": "cohere-embed-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +156,9 @@ ], "name": "cohere.embed", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -129,7 +171,7 @@ "metric_keys": [], "name": "cohere-rerank-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -149,9 +191,9 @@ ], "name": "cohere.rerank", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7.span-events.json b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7.span-events.json index f31d4de70..72036b039 100644 --- a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7.span-events.json +++ b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v7.span-events.json @@ -86,6 +86,48 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "cohere-chat-stream-thinking-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "command-a-reasoning-08-2025", + "provider": "cohere", + "thinking": { + "tokenBudget": 128, + "type": "enabled" + } + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "reasoning_tokens", + "time_to_first_token", + "tokens" + ], + "name": "cohere.chatStream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -95,7 +137,7 @@ "metric_keys": [], "name": "cohere-embed-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +156,9 @@ ], "name": "cohere.embed", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -129,7 +171,7 @@ "metric_keys": [], "name": "cohere-rerank-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -149,9 +191,9 @@ ], "name": "cohere.rerank", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v8.span-events.json b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v8.span-events.json index 9a11e3ffa..ac3ba97a0 100644 --- a/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v8.span-events.json +++ b/e2e/scenarios/cohere-instrumentation/__snapshots__/cohere-v8.span-events.json @@ -86,6 +86,48 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "cohere-chat-stream-thinking-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "command-a-reasoning-08-2025", + "provider": "cohere", + "thinking": { + "tokenBudget": 128, + "type": "enabled" + } + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "reasoning_tokens", + "time_to_first_token", + "tokens" + ], + "name": "cohere.chatStream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -95,7 +137,7 @@ "metric_keys": [], "name": "cohere-embed-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -115,9 +157,9 @@ ], "name": "cohere.embed", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -130,7 +172,7 @@ "metric_keys": [], "name": "cohere-rerank-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -150,9 +192,9 @@ ], "name": "cohere.rerank", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/cohere-instrumentation/assertions.ts b/e2e/scenarios/cohere-instrumentation/assertions.ts index 3394006a9..25932f96c 100644 --- a/e2e/scenarios/cohere-instrumentation/assertions.ts +++ b/e2e/scenarios/cohere-instrumentation/assertions.ts @@ -13,6 +13,7 @@ import { ROOT_NAME, SCENARIO_NAME } from "./constants.mjs"; type RunCohereScenario = (harness: { runNodeScenarioDir: (options: { entry: string; + env?: Record; nodeArgs: string[]; runContext?: { variantKey: string }; scenarioDir: string; @@ -20,6 +21,7 @@ type RunCohereScenario = (harness: { }) => Promise; runScenarioDir: (options: { entry: string; + env?: Record; runContext?: { variantKey: string }; scenarioDir: string; timeoutMs: number; @@ -35,16 +37,23 @@ function findCohereSpan( return spans.find((candidate) => candidate.output !== undefined) ?? spans[0]; } -function buildSpanSummary(events: CapturedLogEvent[]): Json { +function buildSpanSummary( + events: CapturedLogEvent[], + supportsThinking: boolean, +): Json { const chatOperation = findLatestSpan(events, "cohere-chat-operation"); const chatStreamOperation = findLatestSpan( events, "cohere-chat-stream-operation", ); + const chatStreamThinkingOperation = findLatestSpan( + events, + "cohere-chat-stream-thinking-operation", + ); const embedOperation = findLatestSpan(events, "cohere-embed-operation"); const rerankOperation = findLatestSpan(events, "cohere-rerank-operation"); - return [ + const summaryEvents = [ findLatestSpan(events, ROOT_NAME), chatOperation, findCohereSpan(events, chatOperation?.span.id, "cohere.chat"), @@ -54,7 +63,22 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { findCohereSpan(events, embedOperation?.span.id, "cohere.embed"), rerankOperation, findCohereSpan(events, rerankOperation?.span.id, "cohere.rerank"), - ].map((event) => + ]; + + if (supportsThinking) { + summaryEvents.splice( + 5, + 0, + chatStreamThinkingOperation, + findCohereSpan( + events, + chatStreamThinkingOperation?.span.id, + "cohere.chatStream", + ), + ); + } + + return summaryEvents.map((event) => summarizeWrapperContract(event!, [ "document_count", "inputType", @@ -62,6 +86,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { "operation", "provider", "scenario", + "thinking", "topN", ]), ) as Json; @@ -71,6 +96,7 @@ export function defineCohereInstrumentationAssertions(options: { name: string; runScenario: RunCohereScenario; snapshotName: string; + supportsThinking: boolean; testFileUrl: string; timeoutMs: number; }): void { @@ -132,6 +158,60 @@ export function defineCohereInstrumentationAssertions(options: { expect(chatStreamSpan?.output).toBeDefined(); }); + if (options.supportsThinking) { + test("captures reasoning content for chatStream", testConfig, () => { + const root = findLatestSpan(events, ROOT_NAME); + const operation = findLatestSpan( + events, + "cohere-chat-stream-thinking-operation", + ); + const span = findCohereSpan( + events, + operation?.span.id, + "cohere.chatStream", + ); + const output = span?.output as + | { + content?: Array<{ + text?: string; + thinking?: string; + type?: string; + }>; + } + | undefined; + const metrics = (span?.metrics ?? {}) as Record; + + expect(operation).toBeDefined(); + expect(span).toBeDefined(); + expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(span?.row.metadata).toMatchObject({ + model: "command-a-reasoning-08-2025", + provider: "cohere", + thinking: { + tokenBudget: 128, + type: "enabled", + }, + }); + expect(metrics).toMatchObject({ + completion_tokens: expect.any(Number), + prompt_tokens: expect.any(Number), + reasoning_tokens: expect.any(Number), + time_to_first_token: expect.any(Number), + }); + expect( + output?.content?.some( + (block) => + block.type === "thinking" && typeof block.thinking === "string", + ), + ).toBe(true); + expect( + output?.content?.some( + (block) => block.type === "text" && typeof block.text === "string", + ), + ).toBe(true); + }); + } + test("captures embed span", testConfig, () => { const operation = findLatestSpan(events, "cohere-embed-operation"); const span = findCohereSpan(events, operation?.span.id, "cohere.embed"); @@ -164,7 +244,9 @@ export function defineCohereInstrumentationAssertions(options: { test("matches span snapshot", testConfig, async () => { await expect( - formatJsonFileSnapshot(buildSpanSummary(events)), + formatJsonFileSnapshot( + buildSpanSummary(events, options.supportsThinking), + ), ).toMatchFileSnapshot(spanSnapshotPath); }); }); diff --git a/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.mjs b/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.mjs index 04a97bcf5..6b0f4d5bd 100644 --- a/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.mjs +++ b/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.mjs @@ -1,9 +1,16 @@ -import { CohereClient as CohereClientV7 } from "cohere-sdk-v7"; +import { + CohereClient as CohereClientV7, + CohereClientV2 as CohereClientV7V2, +} from "cohere-sdk-v7"; import { runMain } from "../../helpers/provider-runtime.mjs"; import { runAutoCohereInstrumentation } from "./scenario.impl.mjs"; runMain(async () => runAutoCohereInstrumentation(CohereClientV7, { apiVersion: "v7", + ThinkingCohereClient: + process.env.COHERE_SUPPORTS_THINKING === "1" + ? CohereClientV7V2 + : undefined, }), ); diff --git a/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.ts b/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.ts index c7b926f6e..3a01391a8 100644 --- a/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.ts +++ b/e2e/scenarios/cohere-instrumentation/scenario.cohere-v7.ts @@ -1,5 +1,8 @@ import { wrapCohere } from "braintrust"; -import { CohereClient as CohereClientV7 } from "cohere-sdk-v7"; +import { + CohereClient as CohereClientV7, + CohereClientV2 as CohereClientV7V2, +} from "cohere-sdk-v7"; import { runMain } from "../../helpers/scenario-runtime"; import { runWrappedCohereInstrumentation } from "./scenario.impl.mjs"; @@ -7,5 +10,9 @@ runMain(async () => runWrappedCohereInstrumentation(CohereClientV7, { apiVersion: "v7", decorateClient: wrapCohere, + ThinkingCohereClient: + process.env.COHERE_SUPPORTS_THINKING === "1" + ? CohereClientV7V2 + : undefined, }), ); diff --git a/e2e/scenarios/cohere-instrumentation/scenario.impl.mjs b/e2e/scenarios/cohere-instrumentation/scenario.impl.mjs index 410919de0..b1e74a8a6 100644 --- a/e2e/scenarios/cohere-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/cohere-instrumentation/scenario.impl.mjs @@ -15,7 +15,7 @@ import { SCENARIO_NAME, } from "./constants.mjs"; -export const COHERE_SCENARIO_TIMEOUT_MS = 120_000; +export const COHERE_SCENARIO_TIMEOUT_MS = 240_000; export const COHERE_SCENARIO_SPECS = [ { @@ -23,6 +23,7 @@ export const COHERE_SCENARIO_SPECS = [ autoEntry: "scenario.cohere-v7.mjs", dependencyName: "cohere-sdk-v7-14-0", snapshotName: "cohere-v7-14-0", + supportsThinking: false, wrapperEntry: "scenario.cohere-v7.ts", }, { @@ -84,6 +85,36 @@ function getChatRequest(apiVersion, { stream = false } = {}) { }; } +function getThinkingChatRequest() { + return { + model: "command-a-reasoning-08-2025", + messages: [ + { + role: "user", + content: "What is 2+2? Reply with the number only.", + }, + ], + maxTokens: 256, + temperature: 0, + thinking: { + type: "enabled", + tokenBudget: 128, + }, + }; +} + +function shouldRunThinkingScenario(apiVersion) { + if (process.env.COHERE_SUPPORTS_THINKING === "1") { + return true; + } + + if (process.env.COHERE_SUPPORTS_THINKING === "0") { + return false; + } + + return apiVersion === "v8"; +} + function getEmbedRequest(apiVersion) { if (apiVersion === "v8") { return { @@ -128,7 +159,7 @@ function getRerankRequest(apiVersion) { async function runCohereInstrumentationScenario( CohereClient, - { apiVersion, decorateClient } = {}, + { apiVersion, decorateClient, ThinkingCohereClient } = {}, ) { const apiKey = getApiKey(); if (!apiKey) { @@ -139,6 +170,16 @@ async function runCohereInstrumentationScenario( token: apiKey, }); const client = decorateClient ? decorateClient(baseClient) : baseClient; + const thinkingClientClass = ThinkingCohereClient ?? CohereClient; + const thinkingBaseClient = + thinkingClientClass === CohereClient + ? baseClient + : new thinkingClientClass({ + token: apiKey, + }); + const thinkingClient = decorateClient + ? decorateClient(thinkingBaseClient) + : thinkingBaseClient; await runTracedScenario({ callback: async () => { @@ -157,6 +198,19 @@ async function runCohereInstrumentationScenario( }, ); + if (shouldRunThinkingScenario(apiVersion)) { + await runOperation( + "cohere-chat-stream-thinking-operation", + "chat-stream-thinking", + async () => { + const stream = await thinkingClient.chatStream( + getThinkingChatRequest(), + ); + await collectAsync(stream); + }, + ); + } + await runOperation("cohere-embed-operation", "embed", async () => { await client.embed(getEmbedRequest(apiVersion)); }); diff --git a/e2e/scenarios/cohere-instrumentation/scenario.test.ts b/e2e/scenarios/cohere-instrumentation/scenario.test.ts index 652733408..70c6ad6ea 100644 --- a/e2e/scenarios/cohere-instrumentation/scenario.test.ts +++ b/e2e/scenarios/cohere-instrumentation/scenario.test.ts @@ -25,18 +25,24 @@ const cohereScenarios = await Promise.all( ); for (const scenario of cohereScenarios) { + const supportsThinking = scenario.supportsThinking ?? true; + describe(`cohere sdk ${scenario.version}`, () => { defineCohereInstrumentationAssertions({ name: "wrapped instrumentation", runScenario: async ({ runScenarioDir }) => { await runScenarioDir({ entry: scenario.wrapperEntry, + env: { + COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", + }, runContext: { variantKey: scenario.snapshotName }, scenarioDir, timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, }); }, snapshotName: scenario.snapshotName, + supportsThinking, testFileUrl: import.meta.url, timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, }); @@ -46,6 +52,9 @@ for (const scenario of cohereScenarios) { runScenario: async ({ runNodeScenarioDir }) => { await runNodeScenarioDir({ entry: scenario.autoEntry, + env: { + COHERE_SUPPORTS_THINKING: supportsThinking ? "1" : "0", + }, nodeArgs: ["--import", "braintrust/hook.mjs"], runContext: { variantKey: scenario.snapshotName }, scenarioDir, @@ -53,6 +62,7 @@ for (const scenario of cohereScenarios) { }); }, snapshotName: scenario.snapshotName, + supportsThinking, testFileUrl: import.meta.url, timeoutMs: COHERE_SCENARIO_TIMEOUT_MS, }); diff --git a/js/src/auto-instrumentations/configs/cohere.ts b/js/src/auto-instrumentations/configs/cohere.ts index c12d0a594..955038c84 100644 --- a/js/src/auto-instrumentations/configs/cohere.ts +++ b/js/src/auto-instrumentations/configs/cohere.ts @@ -15,6 +15,19 @@ export const cohereConfigs: InstrumentationConfig[] = [ kind: "Async", }, }, + { + channelName: cohereChannels.chat.channelName, + module: { + name: "cohere-ai", + versionRange: ">=7.20.0 <8.0.0", + filePath: "api/resources/v2/client/Client.js", + }, + functionQuery: { + className: "V2Client", + methodName: "chat", + kind: "Async", + }, + }, { channelName: cohereChannels.chat.channelName, module: { @@ -54,6 +67,19 @@ export const cohereConfigs: InstrumentationConfig[] = [ kind: "Async", }, }, + { + channelName: cohereChannels.chatStream.channelName, + module: { + name: "cohere-ai", + versionRange: ">=7.20.0 <8.0.0", + filePath: "api/resources/v2/client/Client.js", + }, + functionQuery: { + className: "V2Client", + methodName: "chatStream", + kind: "Async", + }, + }, { channelName: cohereChannels.chatStream.channelName, module: { @@ -93,6 +119,19 @@ export const cohereConfigs: InstrumentationConfig[] = [ kind: "Async", }, }, + { + channelName: cohereChannels.embed.channelName, + module: { + name: "cohere-ai", + versionRange: ">=7.20.0 <8.0.0", + filePath: "api/resources/v2/client/Client.js", + }, + functionQuery: { + className: "V2Client", + methodName: "embed", + kind: "Async", + }, + }, { channelName: cohereChannels.embed.channelName, module: { @@ -132,6 +171,19 @@ export const cohereConfigs: InstrumentationConfig[] = [ kind: "Async", }, }, + { + channelName: cohereChannels.rerank.channelName, + module: { + name: "cohere-ai", + versionRange: ">=7.20.0 <8.0.0", + filePath: "api/resources/v2/client/Client.js", + }, + functionQuery: { + className: "V2Client", + methodName: "rerank", + kind: "Async", + }, + }, { channelName: cohereChannels.rerank.channelName, module: { diff --git a/js/src/instrumentation/plugins/cohere-plugin.test.ts b/js/src/instrumentation/plugins/cohere-plugin.test.ts index b760e2ae6..c2ea4fbf1 100644 --- a/js/src/instrumentation/plugins/cohere-plugin.test.ts +++ b/js/src/instrumentation/plugins/cohere-plugin.test.ts @@ -13,6 +13,7 @@ describe("parseCohereMetricsFromUsage", () => { tokens: { inputTokens: 10, outputTokens: 4, + reasoning_tokens: 6, }, cachedTokens: 3, }, @@ -25,6 +26,7 @@ describe("parseCohereMetricsFromUsage", () => { ).toEqual({ prompt_tokens: 10, completion_tokens: 4, + reasoning_tokens: 6, tokens: 14, prompt_cached_tokens: 3, search_units: 1, @@ -193,4 +195,121 @@ describe("aggregateCohereChatStreamChunks", () => { ], }); }); + + it("aggregates v8 thinking blocks and reasoning token metrics", () => { + const aggregated = aggregateCohereChatStreamChunks([ + { + type: "message-start", + id: "resp_reasoning", + delta: { + message: { + role: "assistant", + }, + }, + }, + { + type: "content-start", + index: 0, + delta: { + message: { + content: { + type: "thinking", + thinking: "", + }, + }, + }, + }, + { + type: "content-delta", + index: 0, + delta: { + message: { + content: { + thinking: "Let me think. ", + }, + }, + }, + }, + { + type: "content-delta", + index: 0, + delta: { + message: { + content: { + thinking: "2 + 2 = 4.", + }, + }, + }, + }, + { + type: "content-start", + index: 1, + delta: { + message: { + content: { + type: "text", + text: "", + }, + }, + }, + }, + { + type: "content-delta", + index: 1, + delta: { + message: { + content: { + text: "4", + }, + }, + }, + }, + { + type: "tool-plan-delta", + delta: { + message: { + toolPlan: "Answer directly", + }, + }, + }, + { + type: "message-end", + delta: { + finishReason: "COMPLETE", + usage: { + tokens: { + inputTokens: 7, + outputTokens: 3, + reasoning_tokens: 11, + }, + }, + }, + }, + ]); + + expect(aggregated.metadata).toEqual({ + id: "resp_reasoning", + finish_reason: "COMPLETE", + }); + expect(aggregated.metrics).toEqual({ + prompt_tokens: 7, + completion_tokens: 3, + reasoning_tokens: 11, + tokens: 10, + }); + expect(aggregated.output).toEqual({ + role: "assistant", + toolPlan: "Answer directly", + content: [ + { + type: "thinking", + thinking: "Let me think. 2 + 2 = 4.", + }, + { + type: "text", + text: "4", + }, + ], + }); + }); }); diff --git a/js/src/instrumentation/plugins/cohere-plugin.ts b/js/src/instrumentation/plugins/cohere-plugin.ts index 5e337a200..afaea628c 100644 --- a/js/src/instrumentation/plugins/cohere-plugin.ts +++ b/js/src/instrumentation/plugins/cohere-plugin.ts @@ -124,6 +124,11 @@ const CHAT_REQUEST_METADATA_ALLOWLIST = new Set([ "strictTools", "strict_tools", "temperature", + "thinking", + "thinkingTokenBudget", + "thinkingType", + "thinking_token_budget", + "thinking_type", "toolChoice", "tool_choice", ]); @@ -428,6 +433,14 @@ function mergeUsageMetrics( "tokens", tokenContainer.totalTokens ?? tokenContainer.total_tokens, ); + setMetricIfNumber( + metrics, + "reasoning_tokens", + tokenContainer.reasoningTokens ?? + tokenContainer.reasoning_tokens ?? + tokenContainer.thinkingTokens ?? + tokenContainer.thinking_tokens, + ); } const billedUnits = @@ -551,6 +564,129 @@ function extractV8DeltaText(chunk: CohereChatStreamEvent): string | undefined { return undefined; } +type CohereContentBlockType = "text" | "thinking"; + +type AggregatedCohereContentBlock = { + text: string; + thinking: string; + type?: CohereContentBlockType; +}; + +type SerializedCohereContentBlock = + | { + text: string; + type: "text"; + } + | { + thinking: string; + type: "thinking"; + }; + +function getV8ContentIndex(chunk: CohereChatStreamEvent): number { + return typeof chunk.index === "number" ? chunk.index : 0; +} + +function toContentBlockType( + value: unknown, +): CohereContentBlockType | undefined { + return value === "text" || value === "thinking" ? value : undefined; +} + +function getOrCreateContentBlock( + contentBlocksByIndex: Record, + contentBlockOrder: number[], + index: number, +): AggregatedCohereContentBlock { + if (!contentBlockOrder.includes(index)) { + contentBlockOrder.push(index); + } + + if (!(index in contentBlocksByIndex)) { + contentBlocksByIndex[index] = { + text: "", + thinking: "", + }; + } + + return contentBlocksByIndex[index]; +} + +function appendV8ContentBlock( + contentBlocksByIndex: Record, + contentBlockOrder: number[], + index: number, + content: unknown, +): void { + if (typeof content === "string") { + const block = getOrCreateContentBlock( + contentBlocksByIndex, + contentBlockOrder, + index, + ); + block.type ??= "text"; + block.text += content; + return; + } + + if (!isObject(content)) { + return; + } + + const block = getOrCreateContentBlock( + contentBlocksByIndex, + contentBlockOrder, + index, + ); + const contentType = toContentBlockType(content.type); + if (contentType) { + block.type = contentType; + } + + if (typeof content.text === "string") { + block.type ??= "text"; + block.text += content.text; + } + + if (typeof content.thinking === "string") { + block.type ??= "thinking"; + block.thinking += content.thinking; + } +} + +function serializeAggregatedContentBlocks( + contentBlocksByIndex: Record, + contentBlockOrder: number[], +): SerializedCohereContentBlock[] { + return contentBlockOrder + .sort((left, right) => left - right) + .flatMap((index) => { + const block = contentBlocksByIndex[index]; + if (!block) { + return []; + } + + if (block.type === "thinking" && block.thinking.length > 0) { + return [{ type: "thinking", thinking: block.thinking }]; + } + + if (block.text.length > 0) { + return [{ type: "text", text: block.text }]; + } + + if (block.thinking.length > 0) { + return [{ type: "thinking", thinking: block.thinking }]; + } + + return []; + }); +} + +function hasThinkingContent( + contentBlocks: Array<{ type: "text" | "thinking" }>, +): boolean { + return contentBlocks.some((block) => block.type === "thinking"); +} + export function aggregateCohereChatStreamChunks( chunks: CohereChatStreamEvent[], ): { @@ -559,11 +695,14 @@ export function aggregateCohereChatStreamChunks( metadata: Record; } { const textDeltas: string[] = []; + const contentBlocksByIndex: Record = {}; + const contentBlockOrder: number[] = []; const toolCallsByIndex: Record = {}; const toolCallOrder: number[] = []; let terminalResponse: CohereChatResponse | undefined; let role: string | undefined; let finishReason: string | undefined; + let toolPlan = ""; let metadata: Record = {}; let metrics: Record = {}; @@ -642,6 +781,14 @@ export function aggregateCohereChatStreamChunks( } if (eventType === "content-delta") { + appendV8ContentBlock( + contentBlocksByIndex, + contentBlockOrder, + getV8ContentIndex(chunk), + isObject(chunk.delta) && isObject(chunk.delta.message) + ? chunk.delta.message.content + : undefined, + ); const text = extractV8DeltaText(chunk); if (text) { textDeltas.push(text); @@ -649,6 +796,34 @@ export function aggregateCohereChatStreamChunks( continue; } + if (eventType === "content-start") { + appendV8ContentBlock( + contentBlocksByIndex, + contentBlockOrder, + getV8ContentIndex(chunk), + isObject(chunk.delta) && isObject(chunk.delta.message) + ? chunk.delta.message.content + : undefined, + ); + continue; + } + + if (eventType === "tool-plan-delta") { + if (isObject(chunk.delta) && isObject(chunk.delta.message)) { + const deltaToolPlan = + typeof chunk.delta.message.toolPlan === "string" + ? chunk.delta.message.toolPlan + : typeof chunk.delta.message.tool_plan === "string" + ? chunk.delta.message.tool_plan + : undefined; + + if (deltaToolPlan) { + toolPlan += deltaToolPlan; + } + } + continue; + } + if (eventType === "tool-call-start") { const toolCalls = isObject(chunk.delta) && isObject(chunk.delta.message) @@ -721,14 +896,41 @@ export function aggregateCohereChatStreamChunks( .sort((left, right) => left - right) .map((index) => toolCallsByIndex[index]) .filter((toolCall): toolCall is CohereToolCall => isObject(toolCall)); + const aggregatedContentBlocks = serializeAggregatedContentBlocks( + contentBlocksByIndex, + contentBlockOrder, + ); let output: unknown = extractCohereChatOutput(terminalResponse); if (output === undefined) { const mergedText = textDeltas.join(""); - if (mergedToolCalls.length > 0 || role || mergedText.length > 0) { + const shouldUseStructuredContent = + hasThinkingContent(aggregatedContentBlocks) || toolPlan.length > 0; + + if (shouldUseStructuredContent) { + output = { + ...(role ? { role } : {}), + ...(aggregatedContentBlocks.length > 0 + ? { content: aggregatedContentBlocks } + : {}), + ...(toolPlan.length > 0 ? { toolPlan } : {}), + ...(mergedToolCalls.length > 0 ? { toolCalls: mergedToolCalls } : {}), + }; + } else if ( + mergedToolCalls.length > 0 || + role || + mergedText.length > 0 || + aggregatedContentBlocks.length > 0 + ) { + const textContent = + mergedText.length > 0 + ? mergedText + : aggregatedContentBlocks[0]?.type === "text" + ? aggregatedContentBlocks[0].text + : undefined; output = { ...(role ? { role } : {}), - ...(mergedText.length > 0 ? { content: mergedText } : {}), + ...(textContent ? { content: textContent } : {}), ...(mergedToolCalls.length > 0 ? { toolCalls: mergedToolCalls } : {}), }; } diff --git a/js/src/vendor-sdk-types/cohere.ts b/js/src/vendor-sdk-types/cohere.ts index fc6347c45..920c01f3b 100644 --- a/js/src/vendor-sdk-types/cohere.ts +++ b/js/src/vendor-sdk-types/cohere.ts @@ -3,6 +3,10 @@ export type CohereTokenUsage = { input_tokens?: number; outputTokens?: number; output_tokens?: number; + reasoningTokens?: number; + reasoning_tokens?: number; + thinkingTokens?: number; + thinking_tokens?: number; totalTokens?: number; total_tokens?: number; [key: string]: unknown; @@ -68,6 +72,8 @@ export type CohereChatResponse = { message?: { role?: string; content?: unknown; + toolPlan?: string; + tool_plan?: string; toolCalls?: CohereToolCall[]; tool_calls?: CohereToolCall[]; [key: string]: unknown; @@ -96,9 +102,13 @@ export type CohereChatStreamEvent = { usage?: CohereUsageLike; message?: { role?: string; + toolPlan?: string; + tool_plan?: string; content?: | string | { + type?: "text" | "thinking"; + thinking?: string; text?: string; [key: string]: unknown; } From 9ee6400d0539681d5fbb6441254da13969cd10ca Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 21 Apr 2026 16:44:05 +0200 Subject: [PATCH 05/26] fix(huggingface): Capture streamed tool calls (#1848) We were ignoring streamed tool calls. Fixes https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1846 --- .changeset/sunny-cities-dream.md | 5 + .../huggingface-v281.log-payloads.json | 216 ++++++++++++--- .../huggingface-v281.span-events.json | 54 +++- .../huggingface-v3150.log-payloads.json | 246 ++++++++++++++---- .../huggingface-v3150.span-events.json | 55 +++- .../huggingface-v41315.log-payloads.json | 246 ++++++++++++++---- .../huggingface-v41315.span-events.json | 55 +++- .../huggingface-instrumentation/assertions.ts | 149 ++++++++++- .../scenario.impl.mjs | 40 +++ .../plugins/huggingface-plugin.test.ts | 148 +++++++++++ .../plugins/huggingface-plugin.ts | 116 ++++++++- 11 files changed, 1175 insertions(+), 155 deletions(-) create mode 100644 .changeset/sunny-cities-dream.md create mode 100644 js/src/instrumentation/plugins/huggingface-plugin.test.ts diff --git a/.changeset/sunny-cities-dream.md b/.changeset/sunny-cities-dream.md new file mode 100644 index 000000000..45a22fd0e --- /dev/null +++ b/.changeset/sunny-cities-dream.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix(huggingface): Capture streamed tool calls diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json index 5d8ada30c..72d0b4046 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.log-payloads.json @@ -120,7 +120,6 @@ "completion_reasoning_tokens": 0, "completion_rejected_prediction_tokens": 0, "completion_tokens": "", - "prompt_cached_tokens": 0, "prompt_tokens": "", "tokens": "" }, @@ -144,7 +143,7 @@ "output": [ { "content": "", - "finish_reason": "stop", + "finish_reason": "", "index": 0, "role": "assistant" } @@ -254,7 +253,6 @@ "completion_reasoning_tokens": 0, "completion_rejected_prediction_tokens": 0, "completion_tokens": "", - "prompt_cached_tokens": 0, "prompt_tokens": "", "time_to_first_token": "", "tokens": "" @@ -280,7 +278,7 @@ "choices": [ { "content": "", - "finish_reason": "stop", + "finish_reason": "", "index": 0, "role": "assistant" } @@ -318,7 +316,7 @@ "id": "", "log_id": "g", "metadata": { - "operation": "text-generation-stream", + "operation": "chat-stream-tool-call", "testRunId": "" }, "metrics": { @@ -328,7 +326,7 @@ "root_span_id": "", "span_attributes": { "exec_counter": 5, - "name": "huggingface-text-generation-stream-operation" + "name": "huggingface-chat-stream-tool-call-operation" }, "span_id": "", "span_parents": [ @@ -353,13 +351,41 @@ "context": {}, "created": "", "id": "", - "input": "The capital of France is", + "input": [ + { + "content": "What is the weather in San Francisco? Call the get_current_weather tool.", + "role": "user" + } + ], "log_id": "g", "metadata": { - "endpointUrl": "https://router.huggingface.co/featherless-ai/v1/completions", - "max_tokens": 4, - "model": "arcee-ai/Trinity-Large-Thinking", - "provider": "huggingface" + "endpointUrl": "https://router.huggingface.co", + "max_tokens": 64, + "model": "meta-llama/Llama-3.1-8B-Instruct", + "provider": "featherless-ai", + "temperature": 0, + "tool_choice": "required", + "tools": [ + { + "function": { + "description": "Get the current weather for a location.", + "name": "get_current_weather", + "parameters": { + "properties": { + "location": { + "description": "City and state or city and country.", + "type": "string" + } + }, + "required": [ + "location" + ], + "type": "object" + } + }, + "type": "function" + } + ] }, "metrics": { "start": "" @@ -368,7 +394,7 @@ "root_span_id": "", "span_attributes": { "exec_counter": 6, - "name": "huggingface.text_generation_stream", + "name": "huggingface.chat_completion_stream", "type": "llm" }, "span_id": "", @@ -381,6 +407,9 @@ "id": "", "log_id": "g", "metrics": { + "completion_accepted_prediction_tokens": 0, + "completion_reasoning_tokens": 0, + "completion_rejected_prediction_tokens": 0, "completion_tokens": "", "prompt_tokens": "", "time_to_first_token": "", @@ -398,11 +427,20 @@ "id": "", "log_id": "g", "metadata": { - "finish_reason": "length" + "created": 0, + "id": "", + "model": "llama3.1-8b", + "object": "chat.completion.chunk" }, "output": { - "finish_reason": "length", - "generated_text": "" + "choices": [ + { + "content": "", + "finish_reason": "", + "index": 0, + "role": "assistant" + } + ] }, "project_id": "", "root_span_id": "", @@ -433,10 +471,10 @@ "caller_lineno": 0 }, "created": "", - "id": "", + "id": "", "log_id": "g", "metadata": { - "operation": "feature-extraction", + "operation": "text-generation-stream", "testRunId": "" }, "metrics": { @@ -446,23 +484,141 @@ "root_span_id": "", "span_attributes": { "exec_counter": 7, + "name": "huggingface-text-generation-stream-operation" + }, + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "context": {}, + "created": "", + "id": "", + "input": "The capital of France is", + "log_id": "g", + "metadata": { + "endpointUrl": "https://router.huggingface.co/featherless-ai/v1/completions", + "max_tokens": 4, + "model": "arcee-ai/Trinity-Large-Thinking", + "provider": "huggingface" + }, + "metrics": { + "start": "" + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 8, + "name": "huggingface.text_generation_stream", + "type": "llm" + }, + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "completion_tokens": "", + "prompt_tokens": "", + "time_to_first_token": "", + "tokens": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metadata": { + "finish_reason": "length" + }, + "output": { + "finish_reason": "length", + "generated_text": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/helpers/provider-runtime.mjs", + "caller_functionname": "runOperation", + "caller_lineno": 0 + }, + "created": "", + "id": "", + "log_id": "g", + "metadata": { + "operation": "feature-extraction", + "testRunId": "" + }, + "metrics": { + "start": "" + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 9, "name": "huggingface-feature-extraction-operation" }, - "span_id": "", + "span_id": "", "span_parents": [ "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ] @@ -470,7 +626,7 @@ { "context": {}, "created": "", - "id": "", + "id": "", "input": "Paris France", "log_id": "g", "metadata": { @@ -484,18 +640,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 8, + "exec_counter": 10, "name": "huggingface.feature_extraction", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": {}, "output": { @@ -503,23 +659,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] } ] diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json index 44005a464..ad0793623 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v281.span-events.json @@ -102,6 +102,48 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-tool-call" + }, + "metric_keys": [], + "name": "huggingface-chat-stream-tool-call-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "endpointUrl": "https://router.huggingface.co", + "model": "llama3.1-8b", + "provider": "featherless-ai" + }, + "metric_keys": [ + "completion_accepted_prediction_tokens", + "completion_reasoning_tokens", + "completion_rejected_prediction_tokens", + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "huggingface.chat_completion_stream", + "output": null, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, null, null, { @@ -113,7 +155,7 @@ "metric_keys": [], "name": "huggingface-text-generation-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -140,9 +182,9 @@ "generated_text": "" }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -155,7 +197,7 @@ "metric_keys": [], "name": "huggingface-feature-extraction-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -175,9 +217,9 @@ "embedding_length": 1024 }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json index 3ab9d6929..8c75b57ba 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json @@ -139,7 +139,7 @@ "output": [ { "content": "", - "finish_reason": "stop", + "finish_reason": "", "index": 0, "role": "assistant" } @@ -270,7 +270,7 @@ "choices": [ { "content": "", - "finish_reason": "stop", + "finish_reason": "", "index": 0, "role": "assistant" } @@ -308,7 +308,7 @@ "id": "", "log_id": "g", "metadata": { - "operation": "text-generation", + "operation": "chat-stream-tool-call", "testRunId": "" }, "metrics": { @@ -318,7 +318,7 @@ "root_span_id": "", "span_attributes": { "exec_counter": 5, - "name": "huggingface-text-generation-operation" + "name": "huggingface-chat-stream-tool-call-operation" }, "span_id": "", "span_parents": [ @@ -343,6 +343,160 @@ "context": {}, "created": "", "id": "", + "input": [ + { + "content": "What is the weather in San Francisco? Call the get_current_weather tool.", + "role": "user" + } + ], + "log_id": "g", + "metadata": { + "max_tokens": 64, + "model": "meta-llama/Llama-3.1-8B-Instruct", + "provider": "featherless-ai", + "temperature": 0, + "tool_choice": "required", + "tools": [ + { + "function": { + "description": "Get the current weather for a location.", + "name": "get_current_weather", + "parameters": { + "properties": { + "location": { + "description": "City and state or city and country.", + "type": "string" + } + }, + "required": [ + "location" + ], + "type": "object" + } + }, + "type": "function" + } + ] + }, + "metrics": { + "start": "" + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 6, + "name": "huggingface.chat_completion_stream", + "type": "llm" + }, + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "completion_tokens": "", + "prompt_tokens": "", + "time_to_first_token": "", + "tokens": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metadata": { + "created": 0, + "id": "", + "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk" + }, + "output": { + "choices": [ + { + "content": "", + "finish_reason": "", + "index": 0, + "role": "assistant" + } + ] + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/helpers/provider-runtime.mjs", + "caller_functionname": "runOperation", + "caller_lineno": 0 + }, + "created": "", + "id": "", + "log_id": "g", + "metadata": { + "operation": "text-generation", + "testRunId": "" + }, + "metrics": { + "start": "" + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 7, + "name": "huggingface-text-generation-operation" + }, + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "context": {}, + "created": "", + "id": "", "input": "The capital of France is", "log_id": "g", "metadata": { @@ -360,18 +514,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 6, + "exec_counter": 8, "name": "huggingface.text_generation", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": {}, "output": { @@ -379,23 +533,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { @@ -406,7 +560,7 @@ "caller_lineno": 0 }, "created": "", - "id": "", + "id": "", "log_id": "g", "metadata": { "operation": "text-generation-stream", @@ -418,24 +572,24 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 7, + "exec_counter": 9, "name": "huggingface-text-generation-stream-operation" }, - "span_id": "", + "span_id": "", "span_parents": [ "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ] @@ -443,7 +597,7 @@ { "context": {}, "created": "", - "id": "", + "id": "", "input": "The capital of France is", "log_id": "g", "metadata": { @@ -461,18 +615,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 8, + "exec_counter": 10, "name": "huggingface.text_generation_stream", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "completion_tokens": "", @@ -482,14 +636,14 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metadata": { "finish_reason": "length" @@ -500,23 +654,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { @@ -527,7 +681,7 @@ "caller_lineno": 0 }, "created": "", - "id": "", + "id": "", "log_id": "g", "metadata": { "operation": "feature-extraction", @@ -539,24 +693,24 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 9, + "exec_counter": 11, "name": "huggingface-feature-extraction-operation" }, - "span_id": "", + "span_id": "", "span_parents": [ "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ] @@ -564,7 +718,7 @@ { "context": {}, "created": "", - "id": "", + "id": "", "input": "Paris France", "log_id": "g", "metadata": { @@ -577,18 +731,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 10, + "exec_counter": 12, "name": "huggingface.feature_extraction", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": {}, "output": { @@ -596,23 +750,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] } ] diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json index 42b07d833..1fb8555b4 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json @@ -92,6 +92,43 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-tool-call" + }, + "metric_keys": [], + "name": "huggingface-chat-stream-tool-call-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "provider": "featherless-ai" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "huggingface.chat_completion_stream", + "output": null, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -101,7 +138,7 @@ "metric_keys": [], "name": "huggingface-text-generation-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -120,9 +157,9 @@ "generated_text": "" }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,7 +172,7 @@ "metric_keys": [], "name": "huggingface-text-generation-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -161,9 +198,9 @@ "generated_text": "" }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -176,7 +213,7 @@ "metric_keys": [], "name": "huggingface-feature-extraction-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -195,9 +232,9 @@ "embedding_length": 1024 }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json index 3ab9d6929..8c75b57ba 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json @@ -139,7 +139,7 @@ "output": [ { "content": "", - "finish_reason": "stop", + "finish_reason": "", "index": 0, "role": "assistant" } @@ -270,7 +270,7 @@ "choices": [ { "content": "", - "finish_reason": "stop", + "finish_reason": "", "index": 0, "role": "assistant" } @@ -308,7 +308,7 @@ "id": "", "log_id": "g", "metadata": { - "operation": "text-generation", + "operation": "chat-stream-tool-call", "testRunId": "" }, "metrics": { @@ -318,7 +318,7 @@ "root_span_id": "", "span_attributes": { "exec_counter": 5, - "name": "huggingface-text-generation-operation" + "name": "huggingface-chat-stream-tool-call-operation" }, "span_id": "", "span_parents": [ @@ -343,6 +343,160 @@ "context": {}, "created": "", "id": "", + "input": [ + { + "content": "What is the weather in San Francisco? Call the get_current_weather tool.", + "role": "user" + } + ], + "log_id": "g", + "metadata": { + "max_tokens": 64, + "model": "meta-llama/Llama-3.1-8B-Instruct", + "provider": "featherless-ai", + "temperature": 0, + "tool_choice": "required", + "tools": [ + { + "function": { + "description": "Get the current weather for a location.", + "name": "get_current_weather", + "parameters": { + "properties": { + "location": { + "description": "City and state or city and country.", + "type": "string" + } + }, + "required": [ + "location" + ], + "type": "object" + } + }, + "type": "function" + } + ] + }, + "metrics": { + "start": "" + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 6, + "name": "huggingface.chat_completion_stream", + "type": "llm" + }, + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "completion_tokens": "", + "prompt_tokens": "", + "time_to_first_token": "", + "tokens": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metadata": { + "created": 0, + "id": "", + "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "object": "chat.completion.chunk" + }, + "output": { + "choices": [ + { + "content": "", + "finish_reason": "", + "index": 0, + "role": "assistant" + } + ] + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": false, + "context": { + "caller_filename": "/e2e/helpers/provider-runtime.mjs", + "caller_functionname": "runOperation", + "caller_lineno": 0 + }, + "created": "", + "id": "", + "log_id": "g", + "metadata": { + "operation": "text-generation", + "testRunId": "" + }, + "metrics": { + "start": "" + }, + "project_id": "", + "root_span_id": "", + "span_attributes": { + "exec_counter": 7, + "name": "huggingface-text-generation-operation" + }, + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "_is_merge": true, + "id": "", + "log_id": "g", + "metrics": { + "end": "" + }, + "project_id": "", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ] + }, + { + "context": {}, + "created": "", + "id": "", "input": "The capital of France is", "log_id": "g", "metadata": { @@ -360,18 +514,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 6, + "exec_counter": 8, "name": "huggingface.text_generation", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": {}, "output": { @@ -379,23 +533,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { @@ -406,7 +560,7 @@ "caller_lineno": 0 }, "created": "", - "id": "", + "id": "", "log_id": "g", "metadata": { "operation": "text-generation-stream", @@ -418,24 +572,24 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 7, + "exec_counter": 9, "name": "huggingface-text-generation-stream-operation" }, - "span_id": "", + "span_id": "", "span_parents": [ "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ] @@ -443,7 +597,7 @@ { "context": {}, "created": "", - "id": "", + "id": "", "input": "The capital of France is", "log_id": "g", "metadata": { @@ -461,18 +615,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 8, + "exec_counter": 10, "name": "huggingface.text_generation_stream", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "completion_tokens": "", @@ -482,14 +636,14 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metadata": { "finish_reason": "length" @@ -500,23 +654,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { @@ -527,7 +681,7 @@ "caller_lineno": 0 }, "created": "", - "id": "", + "id": "", "log_id": "g", "metadata": { "operation": "feature-extraction", @@ -539,24 +693,24 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 9, + "exec_counter": 11, "name": "huggingface-feature-extraction-operation" }, - "span_id": "", + "span_id": "", "span_parents": [ "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ] @@ -564,7 +718,7 @@ { "context": {}, "created": "", - "id": "", + "id": "", "input": "Paris France", "log_id": "g", "metadata": { @@ -577,18 +731,18 @@ "project_id": "", "root_span_id": "", "span_attributes": { - "exec_counter": 10, + "exec_counter": 12, "name": "huggingface.feature_extraction", "type": "llm" }, - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": {}, "output": { @@ -596,23 +750,23 @@ }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] }, { "_is_merge": true, - "id": "", + "id": "", "log_id": "g", "metrics": { "end": "" }, "project_id": "", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ] } ] diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json index 42b07d833..1fb8555b4 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json @@ -92,6 +92,43 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-tool-call" + }, + "metric_keys": [], + "name": "huggingface-chat-stream-tool-call-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "provider": "featherless-ai" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "huggingface.chat_completion_stream", + "output": null, + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -101,7 +138,7 @@ "metric_keys": [], "name": "huggingface-text-generation-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -120,9 +157,9 @@ "generated_text": "" }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,7 +172,7 @@ "metric_keys": [], "name": "huggingface-text-generation-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -161,9 +198,9 @@ "generated_text": "" }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -176,7 +213,7 @@ "metric_keys": [], "name": "huggingface-feature-extraction-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -195,9 +232,9 @@ "embedding_length": 1024 }, "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/huggingface-instrumentation/assertions.ts b/e2e/scenarios/huggingface-instrumentation/assertions.ts index 60b9df63b..b28787953 100644 --- a/e2e/scenarios/huggingface-instrumentation/assertions.ts +++ b/e2e/scenarios/huggingface-instrumentation/assertions.ts @@ -37,7 +37,13 @@ function isRecord(value: Json | undefined): value is Record { return typeof value === "object" && value !== null && !Array.isArray(value); } -function summarizeChatOutput(output: Json | undefined): Json { +function summarizeChatOutput( + output: Json | undefined, + options?: { + normalizeFinishReason?: boolean; + omitToolCalls?: boolean; + }, +): Json { if (!Array.isArray(output)) { return null; } @@ -51,17 +57,49 @@ function summarizeChatOutput(output: Json | undefined): Json { ? (choice.message as Record) : undefined; const content = message?.content; - return { + const toolCalls = + !options?.omitToolCalls && Array.isArray(message?.tool_calls) + ? message.tool_calls.map((toolCall) => { + if (!isRecord(toolCall as Json)) { + return toolCall as Json; + } + + const toolFunction = isRecord(toolCall.function as Json) + ? (toolCall.function as Record) + : undefined; + return { + id: toolCall.id ?? null, + index: toolCall.index ?? null, + name: toolFunction?.name ?? null, + type: toolCall.type ?? null, + arguments: + typeof toolFunction?.arguments === "string" + ? "" + : (toolFunction?.arguments ?? null), + } satisfies Json; + }) + : undefined; + const summary: Record = { content: typeof content === "string" ? "" : Array.isArray(content) ? "" : (content ?? null), - finish_reason: choice.finish_reason ?? null, + finish_reason: + options?.normalizeFinishReason && + typeof choice.finish_reason === "string" + ? "" + : (choice.finish_reason ?? null), index: choice.index ?? null, role: message?.role ?? null, - } satisfies Json; + }; + + if (toolCalls) { + summary.tool_calls = toolCalls; + } + + return summary satisfies Json; }); } @@ -119,6 +157,10 @@ function normalizeMetrics(value: Json): Json { const normalized: Record = {}; for (const [key, entry] of Object.entries(value)) { + if (key === "prompt_cached_tokens") { + continue; + } + if ( typeof entry === "number" && [ @@ -147,14 +189,23 @@ function normalizePayloadOutput(row: Json): Json { return "output" in row ? { ...row, - output: normalizeLoggedOutput(row.output), + output: normalizeLoggedOutput(row.output, { + normalizeFinishReason: true, + omitToolCalls: true, + }), } : row; } -function normalizeLoggedOutput(output: Json): Json { +function normalizeLoggedOutput( + output: Json, + options?: { + normalizeFinishReason?: boolean; + omitToolCalls?: boolean; + }, +): Json { if (Array.isArray(output)) { - return summarizeChatOutput(output); + return summarizeChatOutput(output, options); } if (!isRecord(output)) { @@ -168,7 +219,7 @@ function normalizeLoggedOutput(output: Json): Json { if (Array.isArray(output.choices)) { return { ...output, - choices: summarizeChatOutput(output.choices), + choices: summarizeChatOutput(output.choices, options), }; } @@ -182,6 +233,10 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { events, "huggingface-chat-stream-operation", ); + const chatStreamToolCallOperation = findLatestSpan( + events, + "huggingface-chat-stream-tool-call-operation", + ); const textGenerationOperation = findLatestSpan( events, "huggingface-text-generation-operation", @@ -221,6 +276,18 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { )!, ) : null, + chatStreamToolCallOperation + ? summarizeWrapperContract(chatStreamToolCallOperation, ["operation"]) + : null, + chatStreamToolCallOperation + ? summarizeProviderSpan( + findLatestChildSpan( + events, + "huggingface.chat_completion_stream", + chatStreamToolCallOperation.span.id, + )!, + ) + : null, textGenerationOperation ? summarizeWrapperContract(textGenerationOperation, ["operation"]) : null, @@ -324,5 +391,71 @@ export function defineHuggingFaceInstrumentationAssertions(options: { ); }, ); + + test( + "captures streamed tool calls and request tool metadata", + { timeout: options.timeoutMs }, + () => { + const operation = findLatestSpan( + events, + "huggingface-chat-stream-tool-call-operation", + ); + const span = findLatestChildSpan( + events, + "huggingface.chat_completion_stream", + operation?.span.id, + ); + const firstChoice = isRecord(span?.output as Json) + ? span?.output.choices + : undefined; + const firstMessage = + Array.isArray(firstChoice) && isRecord(firstChoice[0] as Json) + ? (((firstChoice[0] as Record).message as Json) ?? + null) + : null; + const message = isRecord(firstMessage) ? firstMessage : undefined; + const toolCalls = Array.isArray(message?.tool_calls) + ? message.tool_calls + : undefined; + const choice = + Array.isArray(firstChoice) && isRecord(firstChoice[0] as Json) + ? (firstChoice[0] as Record) + : undefined; + const finishReason = + typeof choice?.finish_reason === "string" + ? choice.finish_reason + : undefined; + + expect(span?.metadata).toMatchObject({ + model: expect.any(String), + provider: "featherless-ai", + tool_choice: "required", + tools: [ + { + function: { + name: "get_current_weather", + }, + type: "function", + }, + ], + }); + + if (toolCalls) { + expect(toolCalls).toEqual([ + expect.objectContaining({ + function: expect.objectContaining({ + arguments: expect.any(String), + name: "get_current_weather", + }), + type: "function", + }), + ]); + expect(finishReason).toBe("tool_calls"); + return; + } + + expect(finishReason).toEqual(expect.any(String)); + }, + ); }); } diff --git a/e2e/scenarios/huggingface-instrumentation/scenario.impl.mjs b/e2e/scenarios/huggingface-instrumentation/scenario.impl.mjs index 61e54e51a..aecb247e6 100644 --- a/e2e/scenarios/huggingface-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/huggingface-instrumentation/scenario.impl.mjs @@ -14,6 +14,24 @@ const SCENARIO_NAME = "huggingface-instrumentation"; const TEXT_GENERATION_MODEL = "meta-llama/Llama-3.1-8B"; const TEXT_GENERATION_PROVIDER = "featherless-ai"; const V2_TEXT_GENERATION_MODEL = "arcee-ai/Trinity-Large-Thinking"; +const TOOL_NAME = "get_current_weather"; +const CHAT_TOOL = { + type: "function", + function: { + name: TOOL_NAME, + description: "Get the current weather for a location.", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "City and state or city and country.", + }, + }, + required: ["location"], + }, + }, +}; const HUGGINGFACE_SCENARIO_TIMEOUT_MS = 150_000; const V2_CHAT_ENDPOINT_URL = "https://router.huggingface.co"; const V2_FEATURE_EXTRACTION_ENDPOINT_URL = @@ -127,6 +145,28 @@ async function runHuggingFaceInstrumentationScenario(sdk, options = {}) { }, ); + await runOperation( + "huggingface-chat-stream-tool-call-operation", + "chat-stream-tool-call", + async () => { + const stream = client.chatCompletionStream({ + max_tokens: 64, + messages: [ + { + role: "user", + content: `What is the weather in San Francisco? Call the ${TOOL_NAME} tool.`, + }, + ], + model: CHAT_MODEL, + provider: CHAT_PROVIDER, + temperature: 0, + tool_choice: "required", + tools: [CHAT_TOOL], + }); + await collectAsync(stream); + }, + ); + if (capabilities.supportsLiveTextGeneration) { await runOperation( "huggingface-text-generation-operation", diff --git a/js/src/instrumentation/plugins/huggingface-plugin.test.ts b/js/src/instrumentation/plugins/huggingface-plugin.test.ts new file mode 100644 index 000000000..8b2b80286 --- /dev/null +++ b/js/src/instrumentation/plugins/huggingface-plugin.test.ts @@ -0,0 +1,148 @@ +import { describe, expect, it } from "vitest"; +import { + aggregateChatCompletionChunks, + extractResponseMetadata, +} from "./huggingface-plugin"; + +describe("extractResponseMetadata", () => { + it("keeps allowlisted response fields", () => { + expect( + extractResponseMetadata({ + id: "chatcmpl_123", + object: "chat.completion", + model: "meta-llama/Llama-3.1-8B-Instruct", + created: 123, + usage: { total_tokens: 10 }, + choices: [], + }), + ).toEqual({ + created: 123, + id: "chatcmpl_123", + model: "meta-llama/Llama-3.1-8B-Instruct", + object: "chat.completion", + }); + }); +}); + +describe("aggregateChatCompletionChunks", () => { + it("merges streamed tool call deltas by tool index", () => { + expect( + aggregateChatCompletionChunks([ + { + choices: [ + { + delta: { + role: "assistant", + tool_calls: [ + { + id: "call_1", + index: 0, + type: "function", + function: { + name: "get_current_weather", + }, + }, + ], + }, + }, + ], + }, + { + choices: [ + { + delta: { + tool_calls: [ + { + index: 0, + function: { + arguments: '{"location":"San Francisco"}', + }, + }, + ], + }, + finish_reason: "tool_calls", + }, + ], + }, + ]), + ).toEqual({ + choices: [ + { + finish_reason: "tool_calls", + index: 0, + message: { + content: "", + role: "assistant", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { + name: "get_current_weather", + arguments: '{"location":"San Francisco"}', + }, + }, + ], + }, + }, + ], + }); + }); + + it("preserves full tool calls emitted in a single streamed chunk", () => { + expect( + aggregateChatCompletionChunks([ + { + choices: [ + { + delta: { + role: "assistant", + }, + }, + ], + }, + { + choices: [ + { + delta: { + tool_calls: [ + { + id: "call_1", + index: 0, + type: "function", + function: { + name: "get_current_weather", + arguments: '{"location":"San Francisco"}', + }, + }, + ], + }, + finish_reason: "tool_calls", + }, + ], + }, + ]), + ).toEqual({ + choices: [ + { + finish_reason: "tool_calls", + index: 0, + message: { + content: "", + role: "assistant", + tool_calls: [ + { + id: "call_1", + type: "function", + function: { + name: "get_current_weather", + arguments: '{"location":"San Francisco"}', + }, + }, + ], + }, + }, + ], + }); + }); +}); diff --git a/js/src/instrumentation/plugins/huggingface-plugin.ts b/js/src/instrumentation/plugins/huggingface-plugin.ts index 20e06823e..797c3bf82 100644 --- a/js/src/instrumentation/plugins/huggingface-plugin.ts +++ b/js/src/instrumentation/plugins/huggingface-plugin.ts @@ -29,6 +29,8 @@ const REQUEST_METADATA_ALLOWLIST = new Set([ "stop", "stream", "temperature", + "tool_choice", + "tools", "top_p", ]); @@ -395,13 +397,17 @@ function aggregateChatCompletionChunks( content: string; finish_reason?: string | null; role?: string; + toolCallsByIndex: Map>; } >(); for (const chunk of chunks) { for (const choice of chunk.choices ?? []) { const index = typeof choice.index === "number" ? choice.index : 0; - const existing = aggregatedChoices.get(index) ?? { content: "" }; + const existing = aggregatedChoices.get(index) ?? { + content: "", + toolCallsByIndex: new Map>(), + }; const delta = isObject(choice.delta) ? choice.delta : undefined; const message = isObject(choice.message) ? choice.message : undefined; @@ -421,6 +427,12 @@ function aggregateChatCompletionChunks( existing.finish_reason = choice.finish_reason; } + const toolCallDeltas = + getChatToolCallDeltas(delta) ?? getChatToolCallDeltas(message); + if (toolCallDeltas) { + mergeChatToolCallDeltas(existing.toolCallsByIndex, toolCallDeltas); + } + aggregatedChoices.set(index, existing); } } @@ -431,6 +443,13 @@ function aggregateChatCompletionChunks( message: { content: choice.content, role: choice.role ?? "assistant", + ...(choice.toolCallsByIndex.size > 0 + ? { + tool_calls: [...choice.toolCallsByIndex.entries()] + .sort(([leftIndex], [rightIndex]) => leftIndex - rightIndex) + .map(([, toolCall]) => toolCall), + } + : {}), }, ...(choice.finish_reason !== undefined ? { finish_reason: choice.finish_reason } @@ -439,6 +458,101 @@ function aggregateChatCompletionChunks( }; } +function getChatToolCallDeltas( + value: Record | undefined, +): Record[] | undefined { + if (!Array.isArray(value?.tool_calls)) { + return undefined; + } + + const toolCalls = value.tool_calls.filter((toolCall) => isObject(toolCall)); + return toolCalls.length > 0 ? toolCalls : undefined; +} + +function mergeChatToolCallDeltas( + toolCallsByIndex: Map>, + toolCallDeltas: Record[], +): void { + for (const toolDelta of toolCallDeltas) { + const toolIndex = + typeof toolDelta.index === "number" && toolDelta.index >= 0 + ? toolDelta.index + : 0; + const existing = toolCallsByIndex.get(toolIndex); + + if (!existing) { + toolCallsByIndex.set(toolIndex, createChatToolCall(toolDelta)); + continue; + } + + mergeChatToolCall(existing, toolDelta); + } +} + +function createChatToolCall( + toolDelta: Record, +): Record { + const toolFunction = isObject(toolDelta.function) ? toolDelta.function : {}; + const toolCallFunction: Record = { + arguments: + typeof toolFunction.arguments === "string" ? toolFunction.arguments : "", + }; + + if (typeof toolFunction.name === "string") { + toolCallFunction.name = toolFunction.name; + } + + const toolCall: Record = { + function: toolCallFunction, + }; + + if (typeof toolDelta.id === "string") { + toolCall.id = toolDelta.id; + } + if (typeof toolDelta.type === "string") { + toolCall.type = toolDelta.type; + } + + return toolCall; +} + +function mergeChatToolCall( + existing: Record, + toolDelta: Record, +): void { + const currentFunction = isObject(existing.function) ? existing.function : {}; + const deltaFunction = isObject(toolDelta.function) ? toolDelta.function : {}; + const currentArguments = + typeof currentFunction.arguments === "string" + ? currentFunction.arguments + : ""; + const deltaArguments = + typeof deltaFunction.arguments === "string" ? deltaFunction.arguments : ""; + + if (typeof toolDelta.id === "string" && typeof existing.id !== "string") { + existing.id = toolDelta.id; + } + if (typeof toolDelta.type === "string" && typeof existing.type !== "string") { + existing.type = toolDelta.type; + } + + const nextFunction: Record = { + ...currentFunction, + arguments: `${currentArguments}${deltaArguments}`, + }; + + if ( + typeof deltaFunction.name === "string" && + typeof currentFunction.name !== "string" + ) { + nextFunction.name = deltaFunction.name; + } + + existing.function = nextFunction; +} + +export { aggregateChatCompletionChunks, extractResponseMetadata }; + function aggregateTextGenerationStreamChunks( chunks: HuggingFaceTextGenerationStreamOutput[], ): { generated_text: string; finish_reason?: string | null } | undefined { From bff137a7d1b9bf7780ba25ac649b89b589632bc4 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 21 Apr 2026 16:48:45 +0200 Subject: [PATCH 06/26] chore: Fix PR linking in changelog generation (#1880) --- .github/workflows/prepare-js-release.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/prepare-js-release.yaml b/.github/workflows/prepare-js-release.yaml index c5d58aebd..d074c96d1 100644 --- a/.github/workflows/prepare-js-release.yaml +++ b/.github/workflows/prepare-js-release.yaml @@ -54,6 +54,8 @@ jobs: echo "release_branch=$release_branch" } >> "$GITHUB_OUTPUT" - name: Run changeset version + env: + GITHUB_TOKEN: ${{ github.token }} run: pnpm exec changeset version - name: Create release commit run: | From 5cdb71f4b91fa45dcc1bbace917948835816113c Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 22 Apr 2026 23:44:59 +0200 Subject: [PATCH 07/26] chore: Stop using discontinued gemini model (#1882) --- e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs b/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs index 8adf5ca01..8d7a5fdfb 100644 --- a/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/google-genai-instrumentation/scenario.impl.mjs @@ -8,7 +8,7 @@ import { const GOOGLE_MODEL = "gemini-2.5-flash-lite"; const GOOGLE_EMBEDDING_MODEL = "gemini-embedding-001"; -const GOOGLE_GROUNDING_MODEL = "gemini-2.0-flash"; +const GOOGLE_GROUNDING_MODEL = "gemini-2.5-flash"; const ROOT_NAME = "google-genai-instrumentation-root"; const SCENARIO_NAME = "google-genai-instrumentation"; const GOOGLE_GENAI_RETRY_OPTIONS = { From 1c7e82849be8ce6bd6b5c8bda0dcfd36832485f3 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 22 Apr 2026 23:45:19 +0200 Subject: [PATCH 08/26] feat: Bump google ADK patching range to include new major `1.0.0` (#1885) Fixes https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1884 --- .agents/skills/e2e-tests/SKILL.md | 1 + .changeset/eager-jobs-hammer.md | 5 + e2e/config/pr-comment-scenarios.json | 5 +- ...json => google-adk-v061.log-payloads.json} | 0 ....json => google-adk-v061.span-events.json} | 0 ...son => google-adk-v1000.log-payloads.json} | 15 --- ...json => google-adk-v1000.span-events.json} | 17 --- .../google-adk-instrumentation/assertions.ts | 12 +- .../google-adk-instrumentation/package.json | 3 +- .../google-adk-instrumentation/pnpm-lock.yaml | 110 +++++++++++++++++- .../scenario.google-adk-v061.mjs | 5 + .../scenario.google-adk-v061.ts | 5 + .../scenario.test.ts | 88 +++++++++----- .../configs/google-adk.ts | 98 ++++++++++++++-- 14 files changed, 288 insertions(+), 76 deletions(-) create mode 100644 .changeset/eager-jobs-hammer.md rename e2e/scenarios/google-adk-instrumentation/__snapshots__/{google-adk-v061-auto.log-payloads.json => google-adk-v061.log-payloads.json} (100%) rename e2e/scenarios/google-adk-instrumentation/__snapshots__/{google-adk-v061-auto.span-events.json => google-adk-v061.span-events.json} (100%) rename e2e/scenarios/google-adk-instrumentation/__snapshots__/{google-adk-v061-wrapped.log-payloads.json => google-adk-v1000.log-payloads.json} (90%) rename e2e/scenarios/google-adk-instrumentation/__snapshots__/{google-adk-v061-wrapped.span-events.json => google-adk-v1000.span-events.json} (87%) create mode 100644 e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.mjs create mode 100644 e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.ts diff --git a/.agents/skills/e2e-tests/SKILL.md b/.agents/skills/e2e-tests/SKILL.md index ffe3458c3..9b21f3219 100644 --- a/.agents/skills/e2e-tests/SKILL.md +++ b/.agents/skills/e2e-tests/SKILL.md @@ -45,6 +45,7 @@ Try not to use specific test narrowing commands unless hunting down a very nasty - Keep reusable logic in `e2e/helpers/`. Keep one-off fixtures and scenario-specific files inside the scenario directory. - Snapshot stable contracts, not raw noise. Use `normalizeForSnapshot(...)` before inline snapshots and `formatJsonFileSnapshot(...)` plus file snapshots for larger payloads or version matrices. - When a scenario family already has `assertions.ts`, keep version- or provider-specific test setup in `scenario.test.ts` and reuse the shared assertions file. +- Keep the CI e2e summary up to date. If a scenario version matrix or `variantKey` changes, update `e2e/config/pr-comment-scenarios.json` in the same change and follow the established pattern used by other versioned scenarios: one summary row per version, not separate wrapped/auto rows unless that pattern already exists for the scenario family. - Run new or updated scenarios three times in a row before considering snapshots stable. ## Scenario Patterns diff --git a/.changeset/eager-jobs-hammer.md b/.changeset/eager-jobs-hammer.md new file mode 100644 index 000000000..7dec269f8 --- /dev/null +++ b/.changeset/eager-jobs-hammer.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +feat: Bump google ADK patching range to include new major `1.0.0` diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json index 7870cbcfe..210590c01 100644 --- a/e2e/config/pr-comment-scenarios.json +++ b/e2e/config/pr-comment-scenarios.json @@ -26,7 +26,10 @@ "scenarioDirName": "google-adk-instrumentation", "label": "Google ADK Instrumentation", "metadataScenario": "google-adk-instrumentation", - "variants": [{ "variantKey": "google-adk-v061", "label": "v0.6.1" }] + "variants": [ + { "variantKey": "google-adk-v061", "label": "v0.6.1" }, + { "variantKey": "google-adk-v1000", "label": "v1.0.0" } + ] }, { "scenarioDirName": "google-genai-instrumentation", diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-auto.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json similarity index 100% rename from e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-auto.log-payloads.json rename to e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.log-payloads.json diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-auto.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json similarity index 100% rename from e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-auto.span-events.json rename to e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061.span-events.json diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-wrapped.log-payloads.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json similarity index 90% rename from e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-wrapped.log-payloads.json rename to e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json index cc629c7a4..0b9a92b5c 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-wrapped.log-payloads.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.log-payloads.json @@ -51,21 +51,6 @@ "name": "Agent: weather_agent", "type": "task" }, - { - "input": { - "location": "Paris, France" - }, - "metadata": { - "google_adk.tool_call_id": "adk-", - "google_adk.tool_name": "get_weather", - "provider": "google-adk" - }, - "metrics": { - "start": 0 - }, - "name": "tool: get_weather", - "type": "tool" - }, { "input": { "location": "Paris, France" diff --git a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-wrapped.span-events.json b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json similarity index 87% rename from e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-wrapped.span-events.json rename to e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json index 71f5ce69e..cb8cddcb8 100644 --- a/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v061-wrapped.span-events.json +++ b/e2e/scenarios/google-adk-instrumentation/__snapshots__/google-adk-v1000.span-events.json @@ -61,23 +61,6 @@ ], "type": "task" }, - { - "has_input": true, - "has_output": false, - "metadata": { - "google_adk.tool_call_id": "adk-", - "google_adk.tool_name": "get_weather", - "provider": "google-adk" - }, - "metric_keys": [], - "name": "tool: get_weather", - "root_span_id": "", - "span_id": "", - "span_parents": [ - "" - ], - "type": "tool" - }, { "has_input": true, "has_output": true, diff --git a/e2e/scenarios/google-adk-instrumentation/assertions.ts b/e2e/scenarios/google-adk-instrumentation/assertions.ts index 39cc5a91c..c41f01a6f 100644 --- a/e2e/scenarios/google-adk-instrumentation/assertions.ts +++ b/e2e/scenarios/google-adk-instrumentation/assertions.ts @@ -258,7 +258,12 @@ export function defineGoogleADKInstrumentationAssertions(options: { test("matches the shared span snapshot", testConfig, async () => { const relevantEvents = events.filter( - (e) => e.span.name !== undefined && e.span.type !== "llm", + (e) => + e.span.name !== undefined && + e.span.type !== "llm" && + // Wrapped mode logs an extra start-only tool row. Normalize to the + // terminal tool record so wrapped and auto-hook snapshots stay aligned. + (e.span.type !== "tool" || e.output !== undefined), ); const spanSummary = normalizeForSnapshot( dedupeSnapshotItems( @@ -287,7 +292,10 @@ export function defineGoogleADKInstrumentationAssertions(options: { test("matches the shared payload snapshot", testConfig, async () => { const relevantEvents = events.filter( - (e) => e.span.name !== undefined && e.span.type !== "llm", + (e) => + e.span.name !== undefined && + e.span.type !== "llm" && + (e.span.type !== "tool" || e.output !== undefined), ); const payloadSummary = normalizeForSnapshot( dedupeSnapshotItems( diff --git a/e2e/scenarios/google-adk-instrumentation/package.json b/e2e/scenarios/google-adk-instrumentation/package.json index 3b6424224..a93433113 100644 --- a/e2e/scenarios/google-adk-instrumentation/package.json +++ b/e2e/scenarios/google-adk-instrumentation/package.json @@ -9,6 +9,7 @@ } }, "dependencies": { - "@google/adk": "0.6.1" + "@google/adk": "1.0.0", + "google-adk-sdk-v061": "npm:@google/adk@0.6.1" } } diff --git a/e2e/scenarios/google-adk-instrumentation/pnpm-lock.yaml b/e2e/scenarios/google-adk-instrumentation/pnpm-lock.yaml index 1c5117849..65a291f2c 100644 --- a/e2e/scenarios/google-adk-instrumentation/pnpm-lock.yaml +++ b/e2e/scenarios/google-adk-instrumentation/pnpm-lock.yaml @@ -9,8 +9,11 @@ importers: .: dependencies: '@google/adk': - specifier: 0.6.1 - version: 0.6.1(71c6fbe96c14c583afc78db44c65d7e5) + specifier: 1.0.0 + version: 1.0.0(@grpc/grpc-js@1.14.3)(@mikro-orm/mariadb@6.6.12(@mikro-orm/core@6.6.12)(pg@8.20.0))(@mikro-orm/mssql@6.6.12(@azure/core-client@1.10.1)(@mikro-orm/core@6.6.12)(mariadb@3.4.5)(pg@8.20.0))(@mikro-orm/mysql@6.6.12(@mikro-orm/core@6.6.12)(@types/node@25.5.2)(mariadb@3.4.5)(pg@8.20.0))(@mikro-orm/postgresql@6.6.12(@mikro-orm/core@6.6.12)(mariadb@3.4.5))(@mikro-orm/sqlite@6.6.12(@mikro-orm/core@6.6.12)(mariadb@3.4.5)(pg@8.20.0))(@opentelemetry/core@2.6.1(@opentelemetry/api@1.9.0))(encoding@0.1.13) + google-adk-sdk-v061: + specifier: npm:@google/adk@0.6.1 + version: '@google/adk@0.6.1(71c6fbe96c14c583afc78db44c65d7e5)' packages: @@ -178,6 +181,15 @@ packages: '@opentelemetry/sdk-trace-base': ^2.1.0 '@opentelemetry/sdk-trace-node': ^2.1.0 + '@google/adk@1.0.0': + resolution: {integrity: sha512-BRUnfoArCbBmPn4pvTBOYxjQQyDvUR99v40Bl2P5L8G7rgP5NVzpE8HCeKSIziNoU4yW7lgor0GyzaPt0YHQeA==} + peerDependencies: + '@mikro-orm/mariadb': ^6.6.6 + '@mikro-orm/mssql': ^6.6.6 + '@mikro-orm/mysql': ^6.6.6 + '@mikro-orm/postgresql': ^6.6.6 + '@mikro-orm/sqlite': ^6.6.6 + '@google/genai@1.49.0': resolution: {integrity: sha512-hO69Zl0H3x+L0KL4stl1pLYgnqnwHoLqtKy6MRlNnW8TAxjqMdOUVafomKd4z1BePkzoxJWbYILny9a2Zk43VQ==} engines: {node: '>=20.0.0'} @@ -208,6 +220,18 @@ packages: '@js-sdsl/ordered-map@4.4.2': resolution: {integrity: sha512-iUKgm52T8HOE/makSxjqoWhe95ZJA1/G1sYsGev2JDKUSS14KAgg1LHb+Ba+IPow0xflbnSkOsZcO08C7w1gYw==} + '@jsep-plugin/assignment@1.3.0': + resolution: {integrity: sha512-VVgV+CXrhbMI3aSusQyclHkenWSAm95WaiKrMxRFam3JSUiIaQjoMIw2sEs/OX4XifnqeQUN4DYbJjlA8EfktQ==} + engines: {node: '>= 10.16.0'} + peerDependencies: + jsep: ^0.4.0||^1.0.0 + + '@jsep-plugin/regex@1.0.4': + resolution: {integrity: sha512-q7qL4Mgjs1vByCaTnDFcBnV9HS7GVPJX5vyVoCgZHNSC9rjwIlmbXG5sUuorR5ndfHAIlJ8pVStxvjXHbNvtUg==} + engines: {node: '>= 10.16.0'} + peerDependencies: + jsep: ^0.4.0||^1.0.0 + '@mikro-orm/core@6.6.12': resolution: {integrity: sha512-LgLfRfaGdRUNkJ457H1GsuzoiZJuBY3HKgP+BZMTaFr/l6ah6JbyubodbVXxH+Ffji62TtbHFFRr0tj4wNwLRg==} engines: {node: '>= 18.12.0'} @@ -541,6 +565,9 @@ packages: engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} deprecated: This package is no longer supported. + argparse@2.0.1: + resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} + array-flatten@1.1.1: resolution: {integrity: sha512-PCVAQswWemu6UdxsDFFX/+gVeYqKAod3D3UVm91jHwynguOwAvYPhx8nNlM++NqRcK6CxxpUafjmhIdKiHibqg==} @@ -1265,6 +1292,14 @@ packages: js-md4@0.3.2: resolution: {integrity: sha512-/GDnfQYsltsjRswQhN9fhv3EMw2sCpUdrdxyWDOUK7eyD++r3gRhzgiQgc/x4MAv2i1iuQ4lxO5mvqM3vj4bwA==} + js-yaml@4.1.1: + resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==} + hasBin: true + + jsep@1.4.0: + resolution: {integrity: sha512-B7qPcEVE3NVkmSJbaYxvv4cHkVW7DQsZz13pUMrfS8z8Q/BuShN+gcTXrUlPiGqM2/t/EEaI030bpxMqY8gMlw==} + engines: {node: '>= 10.16.0'} + json-bigint@1.0.0: resolution: {integrity: sha512-SiPv/8VpZuWbvLSMtTDU8hEfrZWg/mH/nV/b4o0CYbSxu1UIQPLdwKOCIyLQX+VIPO5vrLX3i8qtqFyhdPSUSQ==} @@ -1277,6 +1312,11 @@ packages: jsonfile@6.2.0: resolution: {integrity: sha512-FGuPw30AdOIUTRMC2OMRtQV+jkVj2cfPqSeWXv1NEAJ1qZ5zb1X6z1mFhbfOB/iy3ssJCD+3KuZ8r8C3uVFlAg==} + jsonpath-plus@10.4.0: + resolution: {integrity: sha512-T92WWatJXmhBbKsgH/0hl+jxjdXrifi5IKeMY02DWggRxX0UElcbVzPlmgLTbvsPeW1PasQ6xE2Q75stkhGbsA==} + engines: {node: '>=18.0.0'} + hasBin: true + jsonwebtoken@9.0.3: resolution: {integrity: sha512-MT/xP0CrubFRNLNKvxJ2BYfy53Zkm++5bX9dtuPbqAeQpTVe0MQTFhao8+Cp//EmJp244xt6Drw/GVEGCUj40g==} engines: {node: '>=12', npm: '>=6'} @@ -2457,6 +2497,50 @@ snapshots: - supports-color - utf-8-validate + '@google/adk@1.0.0(@grpc/grpc-js@1.14.3)(@mikro-orm/mariadb@6.6.12(@mikro-orm/core@6.6.12)(pg@8.20.0))(@mikro-orm/mssql@6.6.12(@azure/core-client@1.10.1)(@mikro-orm/core@6.6.12)(mariadb@3.4.5)(pg@8.20.0))(@mikro-orm/mysql@6.6.12(@mikro-orm/core@6.6.12)(@types/node@25.5.2)(mariadb@3.4.5)(pg@8.20.0))(@mikro-orm/postgresql@6.6.12(@mikro-orm/core@6.6.12)(mariadb@3.4.5))(@mikro-orm/sqlite@6.6.12(@mikro-orm/core@6.6.12)(mariadb@3.4.5)(pg@8.20.0))(@opentelemetry/core@2.6.1(@opentelemetry/api@1.9.0))(encoding@0.1.13)': + dependencies: + '@a2a-js/sdk': 0.3.13(@grpc/grpc-js@1.14.3)(express@4.22.1) + '@google-cloud/opentelemetry-cloud-monitoring-exporter': 0.21.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@2.6.1(@opentelemetry/api@1.9.0))(@opentelemetry/resources@2.6.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-metrics@2.6.1(@opentelemetry/api@1.9.0))(encoding@0.1.13) + '@google-cloud/opentelemetry-cloud-trace-exporter': 3.0.0(@opentelemetry/api@1.9.0)(@opentelemetry/core@2.6.1(@opentelemetry/api@1.9.0))(@opentelemetry/resources@2.6.1(@opentelemetry/api@1.9.0))(@opentelemetry/sdk-trace-base@2.6.1(@opentelemetry/api@1.9.0))(encoding@0.1.13) + '@google-cloud/storage': 7.19.0(encoding@0.1.13) + '@google/genai': 1.49.0(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6)) + '@mikro-orm/core': 6.6.12 + '@mikro-orm/mariadb': 6.6.12(@mikro-orm/core@6.6.12)(pg@8.20.0) + '@mikro-orm/mssql': 6.6.12(@azure/core-client@1.10.1)(@mikro-orm/core@6.6.12)(mariadb@3.4.5)(pg@8.20.0) + '@mikro-orm/mysql': 6.6.12(@mikro-orm/core@6.6.12)(@types/node@25.5.2)(mariadb@3.4.5)(pg@8.20.0) + '@mikro-orm/postgresql': 6.6.12(@mikro-orm/core@6.6.12)(mariadb@3.4.5) + '@mikro-orm/reflection': 6.6.12(@mikro-orm/core@6.6.12) + '@mikro-orm/sqlite': 6.6.12(@mikro-orm/core@6.6.12)(mariadb@3.4.5)(pg@8.20.0) + '@modelcontextprotocol/sdk': 1.29.0(zod@4.3.6) + '@opentelemetry/api': 1.9.0 + '@opentelemetry/api-logs': 0.205.0 + '@opentelemetry/exporter-logs-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-metrics-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/exporter-trace-otlp-http': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/resource-detector-gcp': 0.40.3(@opentelemetry/api@1.9.0)(encoding@0.1.13) + '@opentelemetry/resources': 2.6.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-logs': 0.205.0(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-metrics': 2.6.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-base': 2.6.1(@opentelemetry/api@1.9.0) + '@opentelemetry/sdk-trace-node': 2.6.1(@opentelemetry/api@1.9.0) + express: 4.22.1 + google-auth-library: 10.6.2 + js-yaml: 4.1.1 + jsonpath-plus: 10.4.0 + lodash-es: 4.18.1 + winston: 3.19.0 + zod: 4.3.6 + zod-to-json-schema: 3.25.2(zod@4.3.6) + transitivePeerDependencies: + - '@bufbuild/protobuf' + - '@cfworker/json-schema' + - '@grpc/grpc-js' + - '@opentelemetry/core' + - bufferutil + - encoding + - supports-color + - utf-8-validate + '@google/genai@1.49.0(@modelcontextprotocol/sdk@1.29.0(zod@4.3.6))': dependencies: google-auth-library: 10.6.2 @@ -2490,6 +2574,14 @@ snapshots: '@js-sdsl/ordered-map@4.4.2': {} + '@jsep-plugin/assignment@1.3.0(jsep@1.4.0)': + dependencies: + jsep: 1.4.0 + + '@jsep-plugin/regex@1.0.4(jsep@1.4.0)': + dependencies: + jsep: 1.4.0 + '@mikro-orm/core@6.6.12': dependencies: dataloader: 2.2.3 @@ -2984,6 +3076,8 @@ snapshots: readable-stream: 3.6.2 optional: true + argparse@2.0.1: {} + array-flatten@1.1.1: {} array-union@2.1.0: {} @@ -3819,6 +3913,12 @@ snapshots: js-md4@0.3.2: {} + js-yaml@4.1.1: + dependencies: + argparse: 2.0.1 + + jsep@1.4.0: {} + json-bigint@1.0.0: dependencies: bignumber.js: 9.3.1 @@ -3833,6 +3933,12 @@ snapshots: optionalDependencies: graceful-fs: 4.2.11 + jsonpath-plus@10.4.0: + dependencies: + '@jsep-plugin/assignment': 1.3.0(jsep@1.4.0) + '@jsep-plugin/regex': 1.0.4(jsep@1.4.0) + jsep: 1.4.0 + jsonwebtoken@9.0.3: dependencies: jws: 4.0.1 diff --git a/e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.mjs b/e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.mjs new file mode 100644 index 000000000..c38a65601 --- /dev/null +++ b/e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.mjs @@ -0,0 +1,5 @@ +import * as adk from "google-adk-sdk-v061"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoGoogleADKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runAutoGoogleADKInstrumentation(adk)); diff --git a/e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.ts b/e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.ts new file mode 100644 index 000000000..7f89e28ed --- /dev/null +++ b/e2e/scenarios/google-adk-instrumentation/scenario.google-adk-v061.ts @@ -0,0 +1,5 @@ +import * as adk from "google-adk-sdk-v061"; +import { runMain } from "../../helpers/scenario-runtime"; +import { runWrappedGoogleADKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runWrappedGoogleADKInstrumentation(adk)); diff --git a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts index 80423925a..b12718a1d 100644 --- a/e2e/scenarios/google-adk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/google-adk-instrumentation/scenario.test.ts @@ -1,6 +1,7 @@ import { describe } from "vitest"; import { prepareScenarioDir, + readInstalledPackageVersion, resolveScenarioDir, } from "../../helpers/scenario-harness"; import { defineGoogleADKInstrumentationAssertions } from "./assertions"; @@ -9,37 +10,62 @@ const scenarioDir = await prepareScenarioDir({ scenarioDir: resolveScenarioDir(import.meta.url), }); const TIMEOUT_MS = 90_000; -describe("google adk sdk 0.6.1", () => { - defineGoogleADKInstrumentationAssertions({ - name: "wrapped instrumentation", - runScenario: async ({ runScenarioDir }) => { - await runScenarioDir({ - entry: "scenario.ts", - runContext: { variantKey: "google-adk-v061-wrapped" }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); +const googleADKScenarios = await Promise.all( + [ + { + autoEntry: "scenario.google-adk-v061.mjs", + dependencyName: "google-adk-sdk-v061", + snapshotName: "google-adk-v061", + wrapperEntry: "scenario.google-adk-v061.ts", }, - expectLLMSpan: false, - snapshotName: "google-adk-v061-wrapped", - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, - }); - - defineGoogleADKInstrumentationAssertions({ - name: "auto-hook instrumentation", - runScenario: async ({ runNodeScenarioDir }) => { - await runNodeScenarioDir({ - entry: "scenario.mjs", - nodeArgs: ["--import", "braintrust/hook.mjs"], - runContext: { variantKey: "google-adk-v061-auto" }, - scenarioDir, - timeoutMs: TIMEOUT_MS, - }); + { + autoEntry: "scenario.mjs", + dependencyName: "@google/adk", + snapshotName: "google-adk-v1000", + wrapperEntry: "scenario.ts", }, - expectLLMSpan: true, - snapshotName: "google-adk-v061-auto", - testFileUrl: import.meta.url, - timeoutMs: TIMEOUT_MS, + ].map(async (scenario) => ({ + ...scenario, + version: await readInstalledPackageVersion( + scenarioDir, + scenario.dependencyName, + ), + })), +); + +for (const scenario of googleADKScenarios) { + describe(`google adk sdk ${scenario.version}`, () => { + defineGoogleADKInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: scenario.wrapperEntry, + runContext: { variantKey: scenario.snapshotName }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + expectLLMSpan: false, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); + + defineGoogleADKInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: scenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { variantKey: scenario.snapshotName }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + expectLLMSpan: true, + snapshotName: scenario.snapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); }); -}); +} diff --git a/js/src/auto-instrumentations/configs/google-adk.ts b/js/src/auto-instrumentations/configs/google-adk.ts index 8da69df28..55b9ce662 100644 --- a/js/src/auto-instrumentations/configs/google-adk.ts +++ b/js/src/auto-instrumentations/configs/google-adk.ts @@ -2,7 +2,8 @@ import type { InstrumentationConfig } from "@apm-js-collab/code-transformer"; import { googleADKChannels } from "../../instrumentation/plugins/google-adk-channels"; const googleADKVersionRange = ">=0.1.0"; -const googleADKBundledIndexVersionRange = ">=0.6.1 <0.7.0"; +const googleADKBundledIndexV06VersionRange = ">=0.6.1 <0.7.0"; +const googleADKBundledIndexV1VersionRange = ">=1.0.0 <2.0.0"; /** * Instrumentation configurations for the Google ADK (@google/adk). @@ -42,7 +43,7 @@ export const googleADKConfigs: InstrumentationConfig[] = [ channelName: googleADKChannels.runnerRunAsync.channelName, module: { name: "@google/adk", - versionRange: googleADKBundledIndexVersionRange, + versionRange: googleADKBundledIndexV06VersionRange, filePath: "dist/cjs/index.js", }, functionQuery: { @@ -55,7 +56,7 @@ export const googleADKConfigs: InstrumentationConfig[] = [ channelName: googleADKChannels.runnerRunAsync.channelName, module: { name: "@google/adk", - versionRange: googleADKBundledIndexVersionRange, + versionRange: googleADKBundledIndexV06VersionRange, filePath: "dist/esm/index.js", }, functionQuery: { @@ -64,6 +65,34 @@ export const googleADKConfigs: InstrumentationConfig[] = [ index: 11, }, }, + // The 1.x bundled entrypoints still inline the runtime into index.js, but + // the minified method order changed. These indices are verified against 1.0.0. + { + channelName: googleADKChannels.runnerRunAsync.channelName, + module: { + name: "@google/adk", + versionRange: googleADKBundledIndexV1VersionRange, + filePath: "dist/cjs/index.js", + }, + functionQuery: { + methodName: "runAsync", + kind: "Sync", + index: 12, + }, + }, + { + channelName: googleADKChannels.runnerRunAsync.channelName, + module: { + name: "@google/adk", + versionRange: googleADKBundledIndexV1VersionRange, + filePath: "dist/esm/index.js", + }, + functionQuery: { + methodName: "runAsync", + kind: "Sync", + index: 12, + }, + }, // --- BaseAgent.runAsync --- async generator, kind "Sync" + sync-stream channel @@ -91,7 +120,35 @@ export const googleADKConfigs: InstrumentationConfig[] = [ channelName: googleADKChannels.agentRunAsync.channelName, module: { name: "@google/adk", - versionRange: googleADKBundledIndexVersionRange, + versionRange: googleADKBundledIndexV06VersionRange, + filePath: "dist/cjs/index.js", + }, + functionQuery: { + methodName: "runAsync", + kind: "Sync", + index: 0, + }, + }, + { + channelName: googleADKChannels.agentRunAsync.channelName, + module: { + name: "@google/adk", + versionRange: googleADKBundledIndexV06VersionRange, + filePath: "dist/esm/index.js", + }, + functionQuery: { + methodName: "runAsync", + kind: "Sync", + index: 0, + }, + }, + // The 1.x bundled entrypoints keep BaseAgent.runAsync as the first bundled + // async-generator runAsync method in file order. + { + channelName: googleADKChannels.agentRunAsync.channelName, + module: { + name: "@google/adk", + versionRange: googleADKBundledIndexV1VersionRange, filePath: "dist/cjs/index.js", }, functionQuery: { @@ -104,7 +161,7 @@ export const googleADKConfigs: InstrumentationConfig[] = [ channelName: googleADKChannels.agentRunAsync.channelName, module: { name: "@google/adk", - versionRange: googleADKBundledIndexVersionRange, + versionRange: googleADKBundledIndexV1VersionRange, filePath: "dist/esm/index.js", }, functionQuery: { @@ -140,7 +197,34 @@ export const googleADKConfigs: InstrumentationConfig[] = [ channelName: googleADKChannels.toolRunAsync.channelName, module: { name: "@google/adk", - versionRange: googleADKBundledIndexVersionRange, + versionRange: googleADKBundledIndexV06VersionRange, + filePath: "dist/cjs/index.js", + }, + functionQuery: { + methodName: "runAsync", + kind: "Async", + index: 1, + }, + }, + { + channelName: googleADKChannels.toolRunAsync.channelName, + module: { + name: "@google/adk", + versionRange: googleADKBundledIndexV06VersionRange, + filePath: "dist/esm/index.js", + }, + functionQuery: { + methodName: "runAsync", + kind: "Async", + index: 1, + }, + }, + // The 1.x bundle moves FunctionTool.runAsync behind one helper method. + { + channelName: googleADKChannels.toolRunAsync.channelName, + module: { + name: "@google/adk", + versionRange: googleADKBundledIndexV1VersionRange, filePath: "dist/cjs/index.js", }, functionQuery: { @@ -153,7 +237,7 @@ export const googleADKConfigs: InstrumentationConfig[] = [ channelName: googleADKChannels.toolRunAsync.channelName, module: { name: "@google/adk", - versionRange: googleADKBundledIndexVersionRange, + versionRange: googleADKBundledIndexV1VersionRange, filePath: "dist/esm/index.js", }, functionQuery: { From 08f5c74d271b394537fc37945b89e7c987afac97 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 22 Apr 2026 23:47:08 +0200 Subject: [PATCH 09/26] fix(claude-agent-sdk): Nest built-in tools under sub-agents (#1881) Now looks like this Screenshot 2026-04-22 at 13 32 24 --- .changeset/tall-nights-care.md | 5 + .../claude-agent-sdk-v0.1.span-events.json | 4 +- .../claude-agent-sdk-v0.2.76.span-events.json | 4 +- .../claude-agent-sdk-v0.2.79.span-events.json | 4 +- .../claude-agent-sdk-v0.2.81.span-events.json | 4 +- .../assertions.ts | 166 ++++++++++++++++-- .../scenario.impl.mjs | 28 +++ .../scenario.test.ts | 2 +- .../plugins/claude-agent-sdk-plugin.ts | 139 ++++++++++++--- js/src/vendor-sdk-types/claude-agent-sdk.ts | 1 + 10 files changed, 311 insertions(+), 46 deletions(-) create mode 100644 .changeset/tall-nights-care.md diff --git a/.changeset/tall-nights-care.md b/.changeset/tall-nights-care.md new file mode 100644 index 000000000..03e6b31f5 --- /dev/null +++ b/.changeset/tall-nights-care.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix(claude-agent-sdk): Nest built-in tools under sub-agents diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json index 78a9015fa..dc7c8bf54 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.1.span-events.json @@ -291,7 +291,7 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -307,7 +307,7 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json index 1a12cdb33..0c96f2827 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.76.span-events.json @@ -293,7 +293,7 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -309,7 +309,7 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json index 1a12cdb33..0c96f2827 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.79.span-events.json @@ -293,7 +293,7 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -309,7 +309,7 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json index 1a12cdb33..0c96f2827 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/__snapshots__/claude-agent-sdk-v0.2.81.span-events.json @@ -293,7 +293,7 @@ "metric_keys": [], "name": "Claude Agent", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -309,7 +309,7 @@ "metric_keys": [], "name": "tool: calculator/calculator", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts index ffc8e6b06..f47ed9230 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts @@ -175,6 +175,38 @@ function findSpanById( return events.find((event) => event.span.id === spanId); } +function isDescendantOf( + events: CapturedLogEvent[], + event: CapturedLogEvent | undefined, + ancestorId: string | undefined, +): boolean { + if (!event || !ancestorId) { + return false; + } + + const spanById = new Map(events.map((span) => [span.span.id, span] as const)); + const queue = [...event.span.parentIds]; + const visited = new Set(); + + while (queue.length > 0) { + const parentId = queue.shift(); + if (!parentId || visited.has(parentId)) { + continue; + } + if (parentId === ancestorId) { + return true; + } + + visited.add(parentId); + const parentSpan = spanById.get(parentId); + if (parentSpan) { + queue.push(...parentSpan.span.parentIds); + } + } + + return false; +} + function hasSubAgentHandoffToolName( event: CapturedLogEvent | undefined, ): boolean { @@ -191,10 +223,13 @@ function hasSubAgentHandoffToolName( function findSubAgentTaskSpan( events: CapturedLogEvent[], + ancestorId?: string, ): CapturedLogEvent | undefined { return events.find( (event) => - event.span.type === "task" && event.span.name?.startsWith("Agent:"), + event.span.type === "task" && + event.span.name?.startsWith("Agent:") && + (!ancestorId || isDescendantOf(events, event, ancestorId)), ); } @@ -208,6 +243,31 @@ function findSubAgentHandoffTool( return hasSubAgentHandoffToolName(parentSpan) ? parentSpan : undefined; } +function findLatestTaskLlmBeforeSpan( + events: CapturedLogEvent[], + taskId: string | undefined, + childStartTime: number | undefined, +): CapturedLogEvent | undefined { + return findChildSpans(events, "anthropic.messages.create", taskId) + .filter((event) => { + if (childStartTime === undefined) { + return true; + } + return ( + Number(event.metrics?.start ?? Number.NaN) <= Number(childStartTime) + ); + }) + .at(-1); +} + +function findOperationTaskRoot( + events: CapturedLogEvent[], + operationName: string, +): CapturedLogEvent | undefined { + const operation = findLatestSpan(events, operationName); + return findChildSpans(events, "Claude Agent", operation?.span.id).at(-1); +} + function buildSpanSummary(events: CapturedLogEvent[]): Json { const root = findLatestSpan(events, ROOT_NAME); const basicOperation = findLatestSpan(events, "claude-agent-basic-operation"); @@ -258,7 +318,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { const input = event.input as Array<{ content?: string }> | undefined; return Array.isArray(input) && input.some((item) => item.content); }); - const subAgentTask = findSubAgentTaskSpan(events); + const subAgentTask = findSubAgentTaskSpan(events, subAgentTaskRoot?.span.id); const subAgentLlm = findChildSpans( events, "anthropic.messages.create", @@ -274,9 +334,16 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json { const basicTool = findToolSpanByLocalHandler(events, "calculator-local-handler-multiply") ?? findToolSpanByOperation(events, "multiply"); - const subAgentTool = + const subAgentToolCandidate = findToolSpanByLocalHandler(events, "calculator-local-handler-add") ?? findToolSpanByOperation(events, "add"); + const subAgentTool = isDescendantOf( + events, + subAgentToolCandidate, + subAgentTask?.span.id, + ) + ? subAgentToolCandidate + : undefined; const failureTool = findToolSpanByLocalHandler(events, "calculator-local-handler-divide") ?? findToolSpanByOperation(events, "divide"); @@ -453,16 +520,23 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: { events, "claude-agent-subagent-operation", ); - const taskRoot = findChildSpans( + const taskRoot = findOperationTaskRoot( events, - "Claude Agent", - operation?.span.id, - ).at(-1); - const llm = findAllSpans(events, "anthropic.messages.create").find( - (event) => event.span.parentIds.includes(taskRoot?.span.id ?? ""), + "claude-agent-subagent-operation", ); - const nestedTask = findSubAgentTaskSpan(events); + const nestedTask = findSubAgentTaskSpan(events, taskRoot?.span.id); const handoffTool = findSubAgentHandoffTool(events, nestedTask); + const llm = findLatestTaskLlmBeforeSpan( + events, + taskRoot?.span.id, + typeof handoffTool?.metrics?.start === "number" + ? handoffTool.metrics.start + : undefined, + ); + const handoffToolParent = findSpanById( + events, + handoffTool?.span.parentIds[0], + ); const nestedTaskLlm = findChildSpans( events, "anthropic.messages.create", @@ -490,6 +564,10 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: { } expect(handoffTool).toBeDefined(); expect(hasSubAgentHandoffToolName(handoffTool)).toBe(true); + expect(handoffToolParent?.span.id).toBe(taskRoot?.span.id); + expect(Number(llm?.metrics?.start ?? Number.NaN)).toBeLessThanOrEqual( + Number(handoffTool?.metrics?.start ?? Number.NaN), + ); expect(nestedTask?.span.parentIds).toEqual([handoffTool?.span.id ?? ""]); expect(nestedTaskLlm).toBeDefined(); expect(nestedTaskLlm?.span.parentIds).toContain( @@ -508,6 +586,74 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: { } }); + if (options.expectTaskLifecycleDetails) { + test( + "orders built-in Agent and Bash after their llm siblings", + testConfig, + () => { + const operation = findLatestSpan( + events, + "claude-agent-subagent-built-in-tool-operation", + ); + const taskRoot = findOperationTaskRoot( + events, + "claude-agent-subagent-built-in-tool-operation", + ); + const nestedTask = findSubAgentTaskSpan(events, taskRoot?.span.id); + const handoffTool = findSubAgentHandoffTool(events, nestedTask); + const taskRootLlm = findLatestTaskLlmBeforeSpan( + events, + taskRoot?.span.id, + typeof handoffTool?.metrics?.start === "number" + ? handoffTool.metrics.start + : undefined, + ); + const handoffToolParent = findSpanById( + events, + handoffTool?.span.parentIds[0], + ); + const bashTool = findAllSpans(events, "tool: Bash").find((event) => + isDescendantOf(events, event, taskRoot?.span.id), + ); + const nestedTaskLlm = findLatestTaskLlmBeforeSpan( + events, + nestedTask?.span.id, + typeof bashTool?.metrics?.start === "number" + ? bashTool.metrics.start + : undefined, + ); + const bashToolParent = findSpanById( + events, + bashTool?.span.parentIds[0], + ); + + expect(operation).toBeDefined(); + expect(taskRoot).toBeDefined(); + expect(nestedTask).toBeDefined(); + expect(handoffTool).toBeDefined(); + expect(handoffToolParent?.span.id).toBe(taskRoot?.span.id); + expect( + Number(taskRootLlm?.metrics?.start ?? Number.NaN), + ).toBeLessThanOrEqual( + Number(handoffTool?.metrics?.start ?? Number.NaN), + ); + expect(nestedTaskLlm).toBeDefined(); + expect(bashTool).toBeDefined(); + expect(isDescendantOf(events, bashTool, nestedTask?.span.id)).toBe( + true, + ); + expect(bashTool?.span.parentIds).not.toContain( + taskRootLlm?.span.id ?? "", + ); + expect(bashToolParent?.span.type).toBe("task"); + expect(bashToolParent?.span.id).toBe(nestedTask?.span.id); + expect( + Number(nestedTaskLlm?.metrics?.start ?? Number.NaN), + ).toBeLessThanOrEqual(Number(bashTool?.metrics?.start ?? Number.NaN)); + }, + ); + } + test("captures tool failure details", testConfig, () => { const operation = findLatestSpan( events, diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs index 683918c11..4d565d2a0 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs @@ -7,6 +7,7 @@ import { import { z } from "zod"; const CLAUDE_AGENT_MODEL = "claude-haiku-4-5"; +const CLAUDE_AGENT_TOP_LEVEL_MODEL = "claude-sonnet-4-5"; export const ROOT_NAME = "claude-agent-sdk-root"; export const SCENARIO_NAME = "claude-agent-sdk-traces"; @@ -139,6 +140,33 @@ async function runClaudeAgentSDKScenario({ decorateSDK, sdk }) { }, ); + await runOperation( + "claude-agent-subagent-built-in-tool-operation", + "subagent-built-in-tool", + async () => { + await collectAsync( + query({ + prompt: + 'You MUST call the Agent tool now with subagent_type="echo" and description "echo greeting". Do not call Bash yourself. Do not answer with text yourself; delegate to the echo sub-agent.', + options: { + agents: { + echo: { + description: "Runs one bash echo and reports back.", + model: CLAUDE_AGENT_MODEL, + prompt: + "Run `echo hello` via Bash exactly once, then reply with only the word done.", + tools: ["Bash"], + }, + }, + allowedTools: ["Agent", "Bash"], + model: CLAUDE_AGENT_TOP_LEVEL_MODEL, + permissionMode: "bypassPermissions", + }, + }), + ); + }, + ); + await runOperation( "claude-agent-failure-operation", "failure", diff --git a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts index bbc10b088..2f3900430 100644 --- a/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts +++ b/e2e/scenarios/claude-agent-sdk-instrumentation/scenario.test.ts @@ -9,7 +9,7 @@ import { defineClaudeAgentSDKInstrumentationAssertions } from "./assertions"; const scenarioDir = await prepareScenarioDir({ scenarioDir: resolveScenarioDir(import.meta.url), }); -const TIMEOUT_MS = 120_000; +const TIMEOUT_MS = 180_000; const claudeAgentSDKScenarios = await Promise.all( [ { diff --git a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts index 9458b6127..bf8bcaa9c 100644 --- a/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts +++ b/js/src/instrumentation/plugins/claude-agent-sdk-plugin.ts @@ -38,7 +38,10 @@ type ParsedToolName = { rawToolName: string; toolName: string; }; -type ParentSpanResolver = (toolUseID: string) => Promise; +type ParentSpanResolver = ( + toolUseID: string, + context?: { agentId?: string; preferTaskSiblingParent?: boolean }, +) => Promise; type LLMSpanResult = { finalMessage: ClaudeConversationMessage | undefined; spanExport: string; @@ -62,6 +65,10 @@ function isSubAgentDelegationToolName(toolName: string): boolean { return toolName === "Agent" || toolName === "Task"; } +function shouldParentToolAsTaskSibling(toolName: string): boolean { + return toolName === "Agent" || toolName === "Task" || toolName === "Bash"; +} + function filterSerializableOptions( options: ClaudeAgentSDKQueryOptions, ): Record { @@ -188,6 +195,18 @@ function resolveTaskToolUseId( return undefined; } +function seedTaskToolUseIdMapping( + taskIdToToolUseId: Map, + message: ClaudeAgentSDKMessage, +): void { + if ( + typeof message.task_id === "string" && + typeof message.tool_use_id === "string" + ) { + taskIdToToolUseId.set(message.task_id, message.tool_use_id); + } +} + function extractUsageFromMessage( message: ClaudeAgentSDKMessage, ): Record { @@ -479,7 +498,10 @@ function createToolTracingHooks( }, }, name: parsed.displayName, - parent: await resolveParentSpan(toolUseID), + parent: await resolveParentSpan(toolUseID, { + agentId: input.agent_id, + preferTaskSiblingParent: shouldParentToolAsTaskSibling(parsed.toolName), + }), spanAttributes: { type: SpanTypeAttribute.TOOL }, }); @@ -869,6 +891,8 @@ function maybeTrackToolUseContext( state: QueryState, message: ClaudeAgentSDKMessage, ): void { + seedTaskToolUseIdMapping(state.taskIdToToolUseId, message); + if ( message.type !== "assistant" || !Array.isArray(message.message?.content) @@ -961,6 +985,45 @@ async function ensureSubAgentSpan( return subAgentSpan; } +async function ensureActiveLlmSpanForParentToolUse( + rootSpan: Span, + activeLlmSpansByParentToolUse: Map, + subAgentDetailsByToolUseId: Map, + activeToolSpans: Map, + subAgentSpans: Map, + parentToolUseId: string | null, + startTime: number, +): Promise { + const parentKey = llmParentKey(parentToolUseId); + const existingLlmSpan = activeLlmSpansByParentToolUse.get(parentKey); + if (existingLlmSpan) { + return existingLlmSpan; + } + + let llmParentSpan = await rootSpan.export(); + if (parentToolUseId) { + const subAgentSpan = await ensureSubAgentSpan( + subAgentDetailsByToolUseId, + rootSpan, + activeToolSpans, + subAgentSpans, + parentToolUseId, + ); + llmParentSpan = await subAgentSpan.export(); + } + + const llmSpan = startSpan({ + name: "anthropic.messages.create", + parent: llmParentSpan, + spanAttributes: { + type: SpanTypeAttribute.LLM, + }, + startTime, + }); + activeLlmSpansByParentToolUse.set(parentKey, llmSpan); + return llmSpan; +} + async function maybeHandleTaskLifecycleMessage( state: QueryState, message: ClaudeAgentSDKMessage, @@ -1098,30 +1161,15 @@ async function handleStreamMessage( if (message.type === "assistant" && message.message?.usage) { const parentToolUseId = message.parent_tool_use_id ?? null; - const parentKey = llmParentKey(parentToolUseId); - if (!state.activeLlmSpansByParentToolUse.has(parentKey)) { - let llmParentSpan = await state.span.export(); - if (parentToolUseId) { - const subAgentSpan = await ensureSubAgentSpan( - state.subAgentDetailsByToolUseId, - state.span, - state.activeToolSpans, - state.subAgentSpans, - parentToolUseId, - ); - llmParentSpan = await subAgentSpan.export(); - } - - const llmSpan = startSpan({ - name: "anthropic.messages.create", - parent: llmParentSpan, - spanAttributes: { - type: SpanTypeAttribute.LLM, - }, - startTime: state.currentMessageStartTime, - }); - state.activeLlmSpansByParentToolUse.set(parentKey, llmSpan); - } + await ensureActiveLlmSpanForParentToolUse( + state.span, + state.activeLlmSpansByParentToolUse, + state.subAgentDetailsByToolUseId, + state.activeToolSpans, + state.subAgentSpans, + parentToolUseId, + state.currentMessageStartTime, + ); state.currentMessages.push(message); } @@ -1311,10 +1359,47 @@ export class ClaudeAgentSDKPlugin extends BasePlugin { hasLocalToolHandlers; const resolveToolUseParentSpan: ParentSpanResolver = async ( toolUseID, + context, ) => { - const parentToolUseId = toolUseToParent.get(toolUseID) ?? null; + const trackedParentToolUseId = toolUseToParent.get(toolUseID); + const parentToolUseId = + trackedParentToolUseId ?? + (context?.agentId + ? (taskIdToToolUseId.get(context.agentId) ?? null) + : null); const parentKey = llmParentKey(parentToolUseId); const activeLlmSpan = activeLlmSpansByParentToolUse.get(parentKey); + + if (context?.preferTaskSiblingParent) { + // Built-in Claude tools should be siblings of the driving LLM turn, + // but we still materialize that LLM span first so trace ordering + // reflects that the tool call was produced by the model. + if (!activeLlmSpan) { + await ensureActiveLlmSpanForParentToolUse( + span, + activeLlmSpansByParentToolUse, + subAgentDetailsByToolUseId, + activeToolSpans, + subAgentSpans, + parentToolUseId, + getCurrentUnixTimestamp(), + ); + } + + if (parentToolUseId) { + const subAgentSpan = await ensureSubAgentSpan( + subAgentDetailsByToolUseId, + span, + activeToolSpans, + subAgentSpans, + parentToolUseId, + ); + return subAgentSpan.export(); + } + + return span.export(); + } + if (activeLlmSpan) { return activeLlmSpan.export(); } diff --git a/js/src/vendor-sdk-types/claude-agent-sdk.ts b/js/src/vendor-sdk-types/claude-agent-sdk.ts index 48ebb5beb..0f7e80ce5 100644 --- a/js/src/vendor-sdk-types/claude-agent-sdk.ts +++ b/js/src/vendor-sdk-types/claude-agent-sdk.ts @@ -23,6 +23,7 @@ interface BaseHookInput { session_id: string; transcript_path: string; cwd: string; + agent_id?: string; permission_mode?: string; } From 968f534e9b3794b655719ccc51810b8a59141764 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 22 Apr 2026 23:47:25 +0200 Subject: [PATCH 10/26] feat: Add instrumentation for `groq-sdk` (#1866) Fixes https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1856 --- .changeset/fresh-crabs-dream.md | 5 + .env.example | 1 + .github/workflows/e2e-canary.yaml | 1 + .github/workflows/integration-tests.yaml | 2 + e2e/README.md | 1 + e2e/config/pr-comment-scenarios.json | 9 + e2e/helpers/scenario-installer.ts | 1 + .../groq-v1-auto.span-events.json | 138 +++++++++++++ .../groq-v1-wrapped.span-events.json | 138 +++++++++++++ .../groq-instrumentation/assertions.ts | 166 ++++++++++++++++ .../groq-instrumentation/constants.mjs | 3 + .../groq-instrumentation/package.json | 14 ++ .../groq-instrumentation/pnpm-lock.yaml | 23 +++ .../groq-instrumentation/scenario.impl.mjs | 106 ++++++++++ .../groq-instrumentation/scenario.mjs | 9 + .../groq-instrumentation/scenario.test.ts | 49 +++++ .../groq-instrumentation/scenario.ts | 9 + e2e/scripts/run-canary-tests-docker.mjs | 1 + .../auto-instrumentations/bundler/plugin.ts | 2 + .../bundler/webpack-loader.ts | 2 + js/src/auto-instrumentations/configs/groq.ts | 31 +++ js/src/auto-instrumentations/hook.mts | 2 + js/src/auto-instrumentations/index.ts | 1 + js/src/exports.ts | 1 + .../instrumentation/braintrust-plugin.test.ts | 44 ++++ js/src/instrumentation/braintrust-plugin.ts | 13 ++ .../instrumentation/plugins/groq-channels.ts | 33 +++ .../plugins/groq-plugin.test.ts | 34 ++++ js/src/instrumentation/plugins/groq-plugin.ts | 127 ++++++++++++ js/src/vendor-sdk-types/groq.ts | 104 ++++++++++ js/src/wrappers/groq.test.ts | 188 ++++++++++++++++++ js/src/wrappers/groq.ts | 159 +++++++++++++++ turbo.json | 8 + 33 files changed, 1425 insertions(+) create mode 100644 .changeset/fresh-crabs-dream.md create mode 100644 e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json create mode 100644 e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json create mode 100644 e2e/scenarios/groq-instrumentation/assertions.ts create mode 100644 e2e/scenarios/groq-instrumentation/constants.mjs create mode 100644 e2e/scenarios/groq-instrumentation/package.json create mode 100644 e2e/scenarios/groq-instrumentation/pnpm-lock.yaml create mode 100644 e2e/scenarios/groq-instrumentation/scenario.impl.mjs create mode 100644 e2e/scenarios/groq-instrumentation/scenario.mjs create mode 100644 e2e/scenarios/groq-instrumentation/scenario.test.ts create mode 100644 e2e/scenarios/groq-instrumentation/scenario.ts create mode 100644 js/src/auto-instrumentations/configs/groq.ts create mode 100644 js/src/instrumentation/plugins/groq-channels.ts create mode 100644 js/src/instrumentation/plugins/groq-plugin.test.ts create mode 100644 js/src/instrumentation/plugins/groq-plugin.ts create mode 100644 js/src/vendor-sdk-types/groq.ts create mode 100644 js/src/wrappers/groq.test.ts create mode 100644 js/src/wrappers/groq.ts diff --git a/.changeset/fresh-crabs-dream.md b/.changeset/fresh-crabs-dream.md new file mode 100644 index 000000000..4f35ee20e --- /dev/null +++ b/.changeset/fresh-crabs-dream.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +feat: Add instrumentation for groq-sdk diff --git a/.env.example b/.env.example index a44259148..c9c314bb3 100644 --- a/.env.example +++ b/.env.example @@ -6,3 +6,4 @@ OPENROUTER_API_KEY= MISTRAL_API_KEY= HUGGINGFACE_API_KEY= COHERE_API_KEY= +GROQ_API_KEY= diff --git a/.github/workflows/e2e-canary.yaml b/.github/workflows/e2e-canary.yaml index 060e629df..12a287004 100644 --- a/.github/workflows/e2e-canary.yaml +++ b/.github/workflows/e2e-canary.yaml @@ -35,6 +35,7 @@ jobs: BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml index f8646475f..7b7b88ef3 100644 --- a/.github/workflows/integration-tests.yaml +++ b/.github/workflows/integration-tests.yaml @@ -58,6 +58,7 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} @@ -109,6 +110,7 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} diff --git a/e2e/README.md b/e2e/README.md index b165f3258..b4a15d56e 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -119,6 +119,7 @@ Non-hermetic scenarios require provider credentials in addition to the mock Brai - `MISTRAL_API_KEY` - `HUGGINGFACE_API_KEY` - `COHERE_API_KEY` +- `GROQ_API_KEY` `claude-agent-sdk-instrumentation` also uses `ANTHROPIC_API_KEY`, because it runs the real Claude Agent SDK against Anthropic in the same style as the existing live Anthropic wrapper coverage. diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json index 210590c01..bee0040c5 100644 --- a/e2e/config/pr-comment-scenarios.json +++ b/e2e/config/pr-comment-scenarios.json @@ -42,6 +42,15 @@ { "variantKey": "google-genai-v1460", "label": "v1.46.0" } ] }, + { + "scenarioDirName": "groq-instrumentation", + "label": "Groq Instrumentation", + "metadataScenario": "groq-instrumentation", + "variants": [ + { "variantKey": "groq-v1-wrapped", "label": "Wrapped" }, + { "variantKey": "groq-v1-auto", "label": "Auto-hook" } + ] + }, { "scenarioDirName": "huggingface-instrumentation", "label": "HuggingFace Instrumentation", diff --git a/e2e/helpers/scenario-installer.ts b/e2e/helpers/scenario-installer.ts index 13062e02b..29318aeff 100644 --- a/e2e/helpers/scenario-installer.ts +++ b/e2e/helpers/scenario-installer.ts @@ -25,6 +25,7 @@ const INSTALL_SECRET_ENV_VARS = [ "GEMINI_API_KEY", "GITHUB_TOKEN", "GH_TOKEN", + "GROQ_API_KEY", "HUGGINGFACE_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", diff --git a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json new file mode 100644 index 000000000..edc29e4f9 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-auto.span-events.json @@ -0,0 +1,138 @@ +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "groq-instrumentation" + }, + "metric_keys": [], + "name": "groq-instrumentation-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat" + }, + "metric_keys": [], + "name": "groq-chat-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "llama-3.3-70b-versatile", + "provider": "groq", + "temperature": 0 + }, + "metric_keys": [ + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream" + }, + "metric_keys": [], + "name": "groq-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "llama-3.3-70b-versatile", + "provider": "groq", + "temperature": 0 + }, + "metric_keys": [ + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "tool" + }, + "metric_keys": [], + "name": "groq-tool-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "llama-3.3-70b-versatile", + "provider": "groq", + "temperature": 0 + }, + "metric_keys": [ + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + } +] diff --git a/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json new file mode 100644 index 000000000..edc29e4f9 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/__snapshots__/groq-v1-wrapped.span-events.json @@ -0,0 +1,138 @@ +[ + { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "groq-instrumentation" + }, + "metric_keys": [], + "name": "groq-instrumentation-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat" + }, + "metric_keys": [], + "name": "groq-chat-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "llama-3.3-70b-versatile", + "provider": "groq", + "temperature": 0 + }, + "metric_keys": [ + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream" + }, + "metric_keys": [], + "name": "groq-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "llama-3.3-70b-versatile", + "provider": "groq", + "temperature": 0 + }, + "metric_keys": [ + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "tool" + }, + "metric_keys": [], + "name": "groq-tool-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "llama-3.3-70b-versatile", + "provider": "groq", + "temperature": 0 + }, + "metric_keys": [ + "completion_time", + "completion_tokens", + "prompt_time", + "prompt_tokens", + "queue_time", + "time_to_first_token", + "tokens", + "total_time" + ], + "name": "groq.chat.completions.create", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + } +] diff --git a/e2e/scenarios/groq-instrumentation/assertions.ts b/e2e/scenarios/groq-instrumentation/assertions.ts new file mode 100644 index 000000000..d3472114b --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/assertions.ts @@ -0,0 +1,166 @@ +import { beforeAll, describe, expect, test } from "vitest"; +import type { Json } from "../../helpers/normalize"; +import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; +import { + formatJsonFileSnapshot, + resolveFileSnapshotPath, +} from "../../helpers/file-snapshot"; +import { withScenarioHarness } from "../../helpers/scenario-harness"; +import { findChildSpans, findLatestSpan } from "../../helpers/trace-selectors"; +import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; +import { ROOT_NAME, SCENARIO_NAME } from "./constants.mjs"; + +type RunGroqScenario = (harness: { + runNodeScenarioDir: (options: { + entry: string; + nodeArgs: string[]; + runContext?: { variantKey: string }; + scenarioDir: string; + timeoutMs: number; + }) => Promise; + runScenarioDir: (options: { + entry: string; + runContext?: { variantKey: string }; + scenarioDir: string; + timeoutMs: number; + }) => Promise; +}) => Promise; + +function findGroqSpan( + events: CapturedLogEvent[], + parentId: string | undefined, + spanName: string, +) { + const spans = findChildSpans(events, spanName, parentId); + return spans.find((candidate) => candidate.output !== undefined) ?? spans[0]; +} + +function buildSpanSummary(events: CapturedLogEvent[]): Json { + const chatOperation = findLatestSpan(events, "groq-chat-operation"); + const streamOperation = findLatestSpan(events, "groq-stream-operation"); + const toolOperation = findLatestSpan(events, "groq-tool-operation"); + + return [ + findLatestSpan(events, ROOT_NAME), + chatOperation, + findGroqSpan( + events, + chatOperation?.span.id, + "groq.chat.completions.create", + ), + streamOperation, + findGroqSpan( + events, + streamOperation?.span.id, + "groq.chat.completions.create", + ), + toolOperation, + findGroqSpan( + events, + toolOperation?.span.id, + "groq.chat.completions.create", + ), + ].map((event) => + summarizeWrapperContract(event!, [ + "model", + "operation", + "provider", + "scenario", + "temperature", + ]), + ) as Json; +} + +export function defineGroqInstrumentationAssertions(options: { + name: string; + runScenario: RunGroqScenario; + snapshotName: string; + testFileUrl: string; + timeoutMs: number; +}): void { + const spanSnapshotPath = resolveFileSnapshotPath( + options.testFileUrl, + `${options.snapshotName}.span-events.json`, + ); + const testConfig = { + timeout: options.timeoutMs, + }; + + describe(options.name, () => { + let events: CapturedLogEvent[] = []; + + beforeAll(async () => { + await withScenarioHarness(async (harness) => { + await options.runScenario(harness); + events = harness.events(); + }); + }, options.timeoutMs); + + test("captures the scenario root span", testConfig, () => { + const root = findLatestSpan(events, ROOT_NAME); + expect(root).toBeDefined(); + expect(root?.row.metadata).toMatchObject({ + scenario: SCENARIO_NAME, + }); + }); + + test("captures chat and stream spans", testConfig, () => { + const chatOperation = findLatestSpan(events, "groq-chat-operation"); + const chatSpan = findGroqSpan( + events, + chatOperation?.span.id, + "groq.chat.completions.create", + ); + const streamOperation = findLatestSpan(events, "groq-stream-operation"); + const streamSpan = findGroqSpan( + events, + streamOperation?.span.id, + "groq.chat.completions.create", + ); + + expect(chatSpan?.row.metadata).toMatchObject({ + provider: "groq", + }); + expect(chatSpan?.row.metadata?.model).toBeDefined(); + expect(chatSpan?.output).toBeDefined(); + + expect(streamSpan?.row.metadata).toMatchObject({ + provider: "groq", + }); + expect(streamSpan?.row.metadata?.model).toBeDefined(); + expect(streamSpan?.output).toBeDefined(); + expect(streamSpan?.metrics).toMatchObject({ + time_to_first_token: expect.any(Number), + }); + }); + + test("captures tool calling span", testConfig, () => { + const operation = findLatestSpan(events, "groq-tool-operation"); + const span = findGroqSpan( + events, + operation?.span.id, + "groq.chat.completions.create", + ); + + expect(span?.row.metadata).toMatchObject({ + provider: "groq", + }); + expect(span?.row.metadata?.model).toBeDefined(); + expect(span?.output?.[0]?.message?.tool_calls).toEqual( + expect.arrayContaining([ + expect.objectContaining({ + function: expect.objectContaining({ + name: "get_weather", + }), + }), + ]), + ); + }); + + test("matches span snapshot", testConfig, async () => { + await expect( + formatJsonFileSnapshot(buildSpanSummary(events)), + ).toMatchFileSnapshot(spanSnapshotPath); + }); + }); +} diff --git a/e2e/scenarios/groq-instrumentation/constants.mjs b/e2e/scenarios/groq-instrumentation/constants.mjs new file mode 100644 index 000000000..967d98068 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/constants.mjs @@ -0,0 +1,3 @@ +export const CHAT_MODEL = "llama-3.3-70b-versatile"; +export const ROOT_NAME = "groq-instrumentation-root"; +export const SCENARIO_NAME = "groq-instrumentation"; diff --git a/e2e/scenarios/groq-instrumentation/package.json b/e2e/scenarios/groq-instrumentation/package.json new file mode 100644 index 000000000..1f410a4c4 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/package.json @@ -0,0 +1,14 @@ +{ + "name": "@braintrust/e2e-groq-instrumentation", + "private": true, + "braintrustScenario": { + "canary": { + "dependencies": { + "groq-sdk": "latest" + } + } + }, + "dependencies": { + "groq-sdk": "1.1.2" + } +} diff --git a/e2e/scenarios/groq-instrumentation/pnpm-lock.yaml b/e2e/scenarios/groq-instrumentation/pnpm-lock.yaml new file mode 100644 index 000000000..d81367d29 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/pnpm-lock.yaml @@ -0,0 +1,23 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + groq-sdk: + specifier: 1.1.2 + version: 1.1.2 + +packages: + + groq-sdk@1.1.2: + resolution: {integrity: sha512-CZO0XUQQDhn43ri1+lZHxZKpb+bGutgTvFmCJtooexiitGmPqhm1hntOT3nCoaq07e+OpeokVnfUs0i/oQuUaQ==} + hasBin: true + +snapshots: + + groq-sdk@1.1.2: {} diff --git a/e2e/scenarios/groq-instrumentation/scenario.impl.mjs b/e2e/scenarios/groq-instrumentation/scenario.impl.mjs new file mode 100644 index 000000000..5c6c79f27 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/scenario.impl.mjs @@ -0,0 +1,106 @@ +import { wrapGroq } from "braintrust"; +import { + collectAsync, + runOperation, + runTracedScenario, +} from "../../helpers/provider-runtime.mjs"; +import { CHAT_MODEL, ROOT_NAME, SCENARIO_NAME } from "./constants.mjs"; + +export const GROQ_SCENARIO_TIMEOUT_MS = 120_000; + +function getApiKey() { + return process.env.GROQ_API_KEY; +} + +function getWeatherToolDefinition() { + return { + type: "function", + function: { + name: "get_weather", + description: "Get the weather for a city.", + parameters: { + type: "object", + properties: { + location: { + type: "string", + description: "City name.", + }, + }, + required: ["location"], + }, + }, + }; +} + +export async function runGroqInstrumentationScenario(options) { + const apiKey = getApiKey(); + if (!apiKey) { + throw new Error("Expected GROQ_API_KEY to be set for e2e"); + } + + const baseClient = new options.Groq({ + apiKey, + }); + const client = options.decorateClient + ? options.decorateClient(baseClient) + : baseClient; + + await runTracedScenario({ + callback: async () => { + await runOperation("groq-chat-operation", "chat", async () => { + await client.chat.completions.create({ + max_completion_tokens: 12, + messages: [{ role: "user", content: "Reply with exactly OK." }], + model: CHAT_MODEL, + temperature: 0, + }); + }); + + await runOperation("groq-stream-operation", "stream", async () => { + const stream = await client.chat.completions.create({ + messages: [{ role: "user", content: "Reply with exactly STREAM." }], + model: CHAT_MODEL, + stream: true, + temperature: 0, + }); + await collectAsync(stream); + }); + + await runOperation("groq-tool-operation", "tool", async () => { + await client.chat.completions.create({ + messages: [ + { + role: "user", + content: "Check the weather in Vienna and use the weather tool.", + }, + ], + model: CHAT_MODEL, + temperature: 0, + tool_choice: { + type: "function", + function: { + name: "get_weather", + }, + }, + tools: [getWeatherToolDefinition()], + }); + }); + }, + metadata: { + scenario: SCENARIO_NAME, + }, + projectNameBase: "e2e-groq-instrumentation", + rootName: ROOT_NAME, + }); +} + +export async function runWrappedGroqInstrumentation(options) { + await runGroqInstrumentationScenario({ + decorateClient: wrapGroq, + ...options, + }); +} + +export async function runAutoGroqInstrumentation(options) { + await runGroqInstrumentationScenario(options); +} diff --git a/e2e/scenarios/groq-instrumentation/scenario.mjs b/e2e/scenarios/groq-instrumentation/scenario.mjs new file mode 100644 index 000000000..bdd181041 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/scenario.mjs @@ -0,0 +1,9 @@ +import Groq from "groq-sdk"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoGroqInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => { + await runAutoGroqInstrumentation({ + Groq, + }); +}); diff --git a/e2e/scenarios/groq-instrumentation/scenario.test.ts b/e2e/scenarios/groq-instrumentation/scenario.test.ts new file mode 100644 index 000000000..849d6f5ec --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/scenario.test.ts @@ -0,0 +1,49 @@ +import { describe } from "vitest"; +import { + prepareScenarioDir, + readInstalledPackageVersion, + resolveScenarioDir, +} from "../../helpers/scenario-harness"; +import { defineGroqInstrumentationAssertions } from "./assertions"; +import { GROQ_SCENARIO_TIMEOUT_MS } from "./scenario.impl.mjs"; + +const scenarioDir = await prepareScenarioDir({ + scenarioDir: resolveScenarioDir(import.meta.url), +}); +const groqSdkVersion = await readInstalledPackageVersion( + scenarioDir, + "groq-sdk", +); + +describe(`groq sdk ${groqSdkVersion}`, () => { + defineGroqInstrumentationAssertions({ + name: "wrapped instrumentation", + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: "scenario.ts", + runContext: { variantKey: "groq-v1-wrapped" }, + scenarioDir, + timeoutMs: GROQ_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: "groq-v1-wrapped", + testFileUrl: import.meta.url, + timeoutMs: GROQ_SCENARIO_TIMEOUT_MS, + }); + + defineGroqInstrumentationAssertions({ + name: "auto-hook instrumentation", + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: "scenario.mjs", + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { variantKey: "groq-v1-auto" }, + scenarioDir, + timeoutMs: GROQ_SCENARIO_TIMEOUT_MS, + }); + }, + snapshotName: "groq-v1-auto", + testFileUrl: import.meta.url, + timeoutMs: GROQ_SCENARIO_TIMEOUT_MS, + }); +}); diff --git a/e2e/scenarios/groq-instrumentation/scenario.ts b/e2e/scenarios/groq-instrumentation/scenario.ts new file mode 100644 index 000000000..5a960d526 --- /dev/null +++ b/e2e/scenarios/groq-instrumentation/scenario.ts @@ -0,0 +1,9 @@ +import Groq from "groq-sdk"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runWrappedGroqInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => { + await runWrappedGroqInstrumentation({ + Groq, + }); +}); diff --git a/e2e/scripts/run-canary-tests-docker.mjs b/e2e/scripts/run-canary-tests-docker.mjs index a2d25868d..0635ba044 100644 --- a/e2e/scripts/run-canary-tests-docker.mjs +++ b/e2e/scripts/run-canary-tests-docker.mjs @@ -19,6 +19,7 @@ const ALLOWED_ENV_KEYS = [ "GEMINI_API_KEY", "GOOGLE_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENROUTER_API_KEY", diff --git a/js/src/auto-instrumentations/bundler/plugin.ts b/js/src/auto-instrumentations/bundler/plugin.ts index 508bb8ae3..8d38d7a57 100644 --- a/js/src/auto-instrumentations/bundler/plugin.ts +++ b/js/src/auto-instrumentations/bundler/plugin.ts @@ -30,6 +30,7 @@ import { openRouterAgentConfigs } from "../configs/openrouter-agent"; import { openRouterConfigs } from "../configs/openrouter"; import { mistralConfigs } from "../configs/mistral"; import { cohereConfigs } from "../configs/cohere"; +import { groqConfigs } from "../configs/groq"; export interface BundlerPluginOptions { /** @@ -81,6 +82,7 @@ export const unplugin = createUnplugin((options = {}) => { ...openRouterAgentConfigs, ...mistralConfigs, ...cohereConfigs, + ...groqConfigs, ...(options.instrumentations || []), ]; diff --git a/js/src/auto-instrumentations/bundler/webpack-loader.ts b/js/src/auto-instrumentations/bundler/webpack-loader.ts index fef1cf416..5d6c7e20b 100644 --- a/js/src/auto-instrumentations/bundler/webpack-loader.ts +++ b/js/src/auto-instrumentations/bundler/webpack-loader.ts @@ -39,6 +39,7 @@ import { openRouterAgentConfigs } from "../configs/openrouter-agent"; import { openRouterConfigs } from "../configs/openrouter"; import { mistralConfigs } from "../configs/mistral"; import { cohereConfigs } from "../configs/cohere"; +import { groqConfigs } from "../configs/groq"; import { type BundlerPluginOptions } from "./plugin"; /** @@ -76,6 +77,7 @@ function getMatcher(options: BundlerPluginOptions): InstrumentationMatcher { ...openRouterAgentConfigs, ...mistralConfigs, ...cohereConfigs, + ...groqConfigs, ...(options.instrumentations ?? []), ]; const dcModule = options.browser ? "dc-browser" : undefined; diff --git a/js/src/auto-instrumentations/configs/groq.ts b/js/src/auto-instrumentations/configs/groq.ts new file mode 100644 index 000000000..9528303de --- /dev/null +++ b/js/src/auto-instrumentations/configs/groq.ts @@ -0,0 +1,31 @@ +import type { InstrumentationConfig } from "@apm-js-collab/code-transformer"; +import { groqChannels } from "../../instrumentation/plugins/groq-channels"; + +export const groqConfigs: InstrumentationConfig[] = [ + { + channelName: groqChannels.chatCompletionsCreate.channelName, + module: { + name: "groq-sdk", + versionRange: ">=1.0.0", + filePath: "resources/chat/completions.mjs", + }, + functionQuery: { + className: "Completions", + methodName: "create", + kind: "Async", + }, + }, + { + channelName: groqChannels.embeddingsCreate.channelName, + module: { + name: "groq-sdk", + versionRange: ">=1.0.0", + filePath: "resources/embeddings.mjs", + }, + functionQuery: { + className: "Embeddings", + methodName: "create", + kind: "Async", + }, + }, +]; diff --git a/js/src/auto-instrumentations/hook.mts b/js/src/auto-instrumentations/hook.mts index 20d6f5a04..2c3622600 100644 --- a/js/src/auto-instrumentations/hook.mts +++ b/js/src/auto-instrumentations/hook.mts @@ -25,6 +25,7 @@ import { openRouterConfigs } from "./configs/openrouter.js"; import { mistralConfigs } from "./configs/mistral.js"; import { googleADKConfigs } from "./configs/google-adk.js"; import { cohereConfigs } from "./configs/cohere.js"; +import { groqConfigs } from "./configs/groq.js"; import { ModulePatch } from "./loader/cjs-patch.js"; import { patchTracingChannel } from "./patch-tracing-channel.js"; @@ -82,6 +83,7 @@ const allConfigs = [ ? [] : googleADKConfigs), ...(isDisabled(disabledIntegrations, "cohere") ? [] : cohereConfigs), + ...(isDisabled(disabledIntegrations, "groq", "groq-sdk") ? [] : groqConfigs), ]; // 1. Register ESM loader for ESM modules diff --git a/js/src/auto-instrumentations/index.ts b/js/src/auto-instrumentations/index.ts index b73a24a02..03fda75a1 100644 --- a/js/src/auto-instrumentations/index.ts +++ b/js/src/auto-instrumentations/index.ts @@ -39,6 +39,7 @@ export { openRouterConfigs } from "./configs/openrouter"; export { mistralConfigs } from "./configs/mistral"; export { googleADKConfigs } from "./configs/google-adk"; export { cohereConfigs } from "./configs/cohere"; +export { groqConfigs } from "./configs/groq"; // Re-export orchestrion configuration types // Note: ModuleMetadata and FunctionQuery are properties of InstrumentationConfig, diff --git a/js/src/exports.ts b/js/src/exports.ts index 158479847..f2d940d99 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -181,6 +181,7 @@ export { wrapOpenRouterAgent } from "./wrappers/openrouter-agent"; export { wrapOpenRouter } from "./wrappers/openrouter"; export { wrapMistral } from "./wrappers/mistral"; export { wrapCohere } from "./wrappers/cohere"; +export { wrapGroq } from "./wrappers/groq"; export { wrapVitest } from "./wrappers/vitest"; export { initNodeTestSuite } from "./wrappers/node-test"; diff --git a/js/src/instrumentation/braintrust-plugin.test.ts b/js/src/instrumentation/braintrust-plugin.test.ts index a3227c9e0..aec836b66 100644 --- a/js/src/instrumentation/braintrust-plugin.test.ts +++ b/js/src/instrumentation/braintrust-plugin.test.ts @@ -10,6 +10,7 @@ import { OpenRouterAgentPlugin } from "./plugins/openrouter-agent-plugin"; import { OpenRouterPlugin } from "./plugins/openrouter-plugin"; import { MistralPlugin } from "./plugins/mistral-plugin"; import { CoherePlugin } from "./plugins/cohere-plugin"; +import { GroqPlugin } from "./plugins/groq-plugin"; function createPluginClassMock() { return vi.fn(function MockPlugin(this: { @@ -68,6 +69,10 @@ vi.mock("./plugins/cohere-plugin", () => ({ CoherePlugin: createPluginClassMock(), })); +vi.mock("./plugins/groq-plugin", () => ({ + GroqPlugin: createPluginClassMock(), +})); + describe("BraintrustPlugin", () => { beforeEach(() => { vi.clearAllMocks(); @@ -167,6 +172,15 @@ describe("BraintrustPlugin", () => { expect(mockInstance.enable).toHaveBeenCalledTimes(1); }); + it("should create and enable Groq plugin by default", () => { + const plugin = new BraintrustPlugin(); + plugin.enable(); + + expect(GroqPlugin).toHaveBeenCalledTimes(1); + const mockInstance = vi.mocked(GroqPlugin).mock.results[0].value; + expect(mockInstance.enable).toHaveBeenCalledTimes(1); + }); + it("should create all plugins when enabled with no config", () => { const plugin = new BraintrustPlugin(); plugin.enable(); @@ -181,6 +195,7 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).toHaveBeenCalledTimes(1); expect(MistralPlugin).toHaveBeenCalledTimes(1); expect(CoherePlugin).toHaveBeenCalledTimes(1); + expect(GroqPlugin).toHaveBeenCalledTimes(1); }); it("should create all plugins when enabled with empty config", () => { @@ -197,6 +212,7 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).toHaveBeenCalledTimes(1); expect(MistralPlugin).toHaveBeenCalledTimes(1); expect(CoherePlugin).toHaveBeenCalledTimes(1); + expect(GroqPlugin).toHaveBeenCalledTimes(1); }); it("should create all plugins when enabled with empty integrations config", () => { @@ -213,6 +229,7 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).toHaveBeenCalledTimes(1); expect(MistralPlugin).toHaveBeenCalledTimes(1); expect(CoherePlugin).toHaveBeenCalledTimes(1); + expect(GroqPlugin).toHaveBeenCalledTimes(1); }); }); @@ -370,6 +387,22 @@ describe("BraintrustPlugin", () => { expect(MistralPlugin).toHaveBeenCalledTimes(1); }); + it("should not create Groq plugin when groq: false", () => { + const plugin = new BraintrustPlugin({ + integrations: { groq: false }, + }); + plugin.enable(); + + expect(GroqPlugin).not.toHaveBeenCalled(); + expect(OpenAIPlugin).toHaveBeenCalledTimes(1); + expect(AnthropicPlugin).toHaveBeenCalledTimes(1); + expect(AISDKPlugin).toHaveBeenCalledTimes(1); + expect(ClaudeAgentSDKPlugin).toHaveBeenCalledTimes(1); + expect(GoogleGenAIPlugin).toHaveBeenCalledTimes(1); + expect(OpenRouterPlugin).toHaveBeenCalledTimes(1); + expect(MistralPlugin).toHaveBeenCalledTimes(1); + }); + it("should not create OpenRouter Agent plugin when openrouterAgent: false", () => { const plugin = new BraintrustPlugin({ integrations: { openrouterAgent: false }, @@ -393,6 +426,7 @@ describe("BraintrustPlugin", () => { openrouterAgent: false, mistral: false, cohere: false, + groq: false, }, }); plugin.enable(); @@ -407,6 +441,7 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).not.toHaveBeenCalled(); expect(MistralPlugin).not.toHaveBeenCalled(); expect(CoherePlugin).not.toHaveBeenCalled(); + expect(GroqPlugin).not.toHaveBeenCalled(); }); it("should allow selective enabling of plugins", () => { @@ -545,6 +580,7 @@ describe("BraintrustPlugin", () => { .results[0].value; const mistralMock = vi.mocked(MistralPlugin).mock.results[0].value; const cohereMock = vi.mocked(CoherePlugin).mock.results[0].value; + const groqMock = vi.mocked(GroqPlugin).mock.results[0].value; expect(openaiMock.enable).toHaveBeenCalledTimes(1); expect(anthropicMock.enable).toHaveBeenCalledTimes(1); @@ -556,6 +592,7 @@ describe("BraintrustPlugin", () => { expect(openRouterAgentMock.enable).toHaveBeenCalledTimes(1); expect(mistralMock.enable).toHaveBeenCalledTimes(1); expect(cohereMock.enable).toHaveBeenCalledTimes(1); + expect(groqMock.enable).toHaveBeenCalledTimes(1); }); it("should disable and nullify all sub-plugins when disabled", () => { @@ -576,6 +613,7 @@ describe("BraintrustPlugin", () => { .results[0].value; const mistralMock = vi.mocked(MistralPlugin).mock.results[0].value; const cohereMock = vi.mocked(CoherePlugin).mock.results[0].value; + const groqMock = vi.mocked(GroqPlugin).mock.results[0].value; plugin.disable(); @@ -589,6 +627,7 @@ describe("BraintrustPlugin", () => { expect(openRouterAgentMock.disable).toHaveBeenCalledTimes(1); expect(mistralMock.disable).toHaveBeenCalledTimes(1); expect(cohereMock.disable).toHaveBeenCalledTimes(1); + expect(groqMock.disable).toHaveBeenCalledTimes(1); }); it("should be idempotent on multiple enable calls", () => { @@ -632,6 +671,7 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).not.toHaveBeenCalled(); expect(MistralPlugin).not.toHaveBeenCalled(); expect(CoherePlugin).not.toHaveBeenCalled(); + expect(GroqPlugin).not.toHaveBeenCalled(); }); it("should allow re-enabling after disable", () => { @@ -653,6 +693,7 @@ describe("BraintrustPlugin", () => { expect(OpenRouterAgentPlugin).toHaveBeenCalledTimes(1); expect(MistralPlugin).toHaveBeenCalledTimes(1); expect(CoherePlugin).toHaveBeenCalledTimes(1); + expect(GroqPlugin).toHaveBeenCalledTimes(1); }); it("should only disable plugins that were enabled", () => { @@ -668,6 +709,7 @@ describe("BraintrustPlugin", () => { openrouterAgent: true, mistral: false, cohere: false, + groq: true, }, }); plugin.enable(); @@ -681,6 +723,7 @@ describe("BraintrustPlugin", () => { const openRouterMock = vi.mocked(OpenRouterPlugin).mock.results[0].value; const openRouterAgentMock = vi.mocked(OpenRouterAgentPlugin).mock .results[0].value; + const groqMock = vi.mocked(GroqPlugin).mock.results[0].value; plugin.disable(); @@ -690,6 +733,7 @@ describe("BraintrustPlugin", () => { expect(huggingFaceMock.disable).toHaveBeenCalledTimes(1); expect(openRouterMock.disable).toHaveBeenCalledTimes(1); expect(openRouterAgentMock.disable).toHaveBeenCalledTimes(1); + expect(groqMock.disable).toHaveBeenCalledTimes(1); expect(MistralPlugin).not.toHaveBeenCalled(); expect(CoherePlugin).not.toHaveBeenCalled(); }); diff --git a/js/src/instrumentation/braintrust-plugin.ts b/js/src/instrumentation/braintrust-plugin.ts index 8f3919900..485d2d9bc 100644 --- a/js/src/instrumentation/braintrust-plugin.ts +++ b/js/src/instrumentation/braintrust-plugin.ts @@ -10,6 +10,7 @@ import { OpenRouterPlugin } from "./plugins/openrouter-plugin"; import { MistralPlugin } from "./plugins/mistral-plugin"; import { GoogleADKPlugin } from "./plugins/google-adk-plugin"; import { CoherePlugin } from "./plugins/cohere-plugin"; +import { GroqPlugin } from "./plugins/groq-plugin"; export interface BraintrustPluginConfig { integrations?: { @@ -26,6 +27,7 @@ export interface BraintrustPluginConfig { mistral?: boolean; googleADK?: boolean; cohere?: boolean; + groq?: boolean; }; } @@ -58,6 +60,7 @@ export class BraintrustPlugin extends BasePlugin { private mistralPlugin: MistralPlugin | null = null; private googleADKPlugin: GoogleADKPlugin | null = null; private coherePlugin: CoherePlugin | null = null; + private groqPlugin: GroqPlugin | null = null; constructor(config: BraintrustPluginConfig = {}) { super(); @@ -129,6 +132,11 @@ export class BraintrustPlugin extends BasePlugin { this.coherePlugin = new CoherePlugin(); this.coherePlugin.enable(); } + + if (integrations.groq !== false) { + this.groqPlugin = new GroqPlugin(); + this.groqPlugin.enable(); + } } protected onDisable(): void { @@ -186,6 +194,11 @@ export class BraintrustPlugin extends BasePlugin { this.coherePlugin.disable(); this.coherePlugin = null; } + + if (this.groqPlugin) { + this.groqPlugin.disable(); + this.groqPlugin = null; + } } } diff --git a/js/src/instrumentation/plugins/groq-channels.ts b/js/src/instrumentation/plugins/groq-channels.ts new file mode 100644 index 000000000..638afcbff --- /dev/null +++ b/js/src/instrumentation/plugins/groq-channels.ts @@ -0,0 +1,33 @@ +import { channel, defineChannels } from "../core/channel-definitions"; +import type { + GroqChatChoice, + GroqChatCompletion, + GroqChatCompletionChunk, + GroqChatCreateParams, + GroqChatStream, + GroqEmbeddingCreateParams, + GroqEmbeddingResponse, +} from "../../vendor-sdk-types/groq"; + +type GroqChatResult = GroqChatCompletion | GroqChatStream; + +export const groqChannels = defineChannels("groq-sdk", { + chatCompletionsCreate: channel< + [GroqChatCreateParams], + GroqChatResult, + Record, + GroqChatCompletionChunk + >({ + channelName: "chat.completions.create", + kind: "async", + }), + + embeddingsCreate: channel<[GroqEmbeddingCreateParams], GroqEmbeddingResponse>( + { + channelName: "embeddings.create", + kind: "async", + }, + ), +}); + +export type GroqChatResultChoice = GroqChatChoice; diff --git a/js/src/instrumentation/plugins/groq-plugin.test.ts b/js/src/instrumentation/plugins/groq-plugin.test.ts new file mode 100644 index 000000000..a63bab607 --- /dev/null +++ b/js/src/instrumentation/plugins/groq-plugin.test.ts @@ -0,0 +1,34 @@ +import { describe, expect, it } from "vitest"; +import { parseGroqMetrics } from "./groq-plugin"; + +describe("parseGroqMetrics", () => { + it("merges OpenAI-compatible usage metrics with Groq cache metrics", () => { + expect( + parseGroqMetrics({ + usage: { + prompt_tokens: 10, + completion_tokens: 4, + total_tokens: 14, + }, + x_groq: { + usage: { + dram_cached_tokens: 2, + sram_cached_tokens: 3, + }, + }, + }), + ).toEqual({ + completion_tokens: 4, + dram_cached_tokens: 2, + prompt_tokens: 10, + sram_cached_tokens: 3, + tokens: 14, + }); + }); + + it("returns an empty object for unknown values", () => { + expect(parseGroqMetrics(undefined)).toEqual({}); + expect(parseGroqMetrics(null)).toEqual({}); + expect(parseGroqMetrics({})).toEqual({}); + }); +}); diff --git a/js/src/instrumentation/plugins/groq-plugin.ts b/js/src/instrumentation/plugins/groq-plugin.ts new file mode 100644 index 000000000..777bc40a0 --- /dev/null +++ b/js/src/instrumentation/plugins/groq-plugin.ts @@ -0,0 +1,127 @@ +import { BasePlugin } from "../core"; +import { + traceAsyncChannel, + traceStreamingChannel, + unsubscribeAll, +} from "../core/channel-tracing"; +import { SpanTypeAttribute } from "../../../util/index"; +import { processInputAttachments } from "../../wrappers/attachment-utils"; +import { getCurrentUnixTimestamp } from "../../util"; +import { + aggregateChatCompletionChunks, + parseMetricsFromUsage, +} from "./openai-plugin"; +import { groqChannels } from "./groq-channels"; +import type { + GroqChatCompletion, + GroqChatCompletionChunk, +} from "../../vendor-sdk-types/groq"; + +export class GroqPlugin extends BasePlugin { + protected onEnable(): void { + this.unsubscribers.push( + traceStreamingChannel(groqChannels.chatCompletionsCreate, { + name: "groq.chat.completions.create", + type: SpanTypeAttribute.LLM, + extractInput: ([params]) => { + const { messages, ...metadata } = params; + return { + input: processInputAttachments(messages), + metadata: { ...metadata, provider: "groq" }, + }; + }, + extractOutput: (result) => result?.choices, + extractMetrics: (result, startTime) => { + const metrics = parseGroqMetrics(result); + if (startTime) { + metrics.time_to_first_token = getCurrentUnixTimestamp() - startTime; + } + return metrics; + }, + aggregateChunks: aggregateGroqChatCompletionChunks, + }), + ); + + this.unsubscribers.push( + traceAsyncChannel(groqChannels.embeddingsCreate, { + name: "groq.embeddings.create", + type: SpanTypeAttribute.LLM, + extractInput: ([params]) => { + const { input, ...metadata } = params; + return { + input, + metadata: { ...metadata, provider: "groq" }, + }; + }, + extractOutput: (result) => { + const embedding = result?.data?.[0]?.embedding; + return Array.isArray(embedding) + ? { embedding_length: embedding.length } + : undefined; + }, + extractMetrics: (result) => parseGroqMetrics(result), + }), + ); + } + + protected onDisable(): void { + this.unsubscribers = unsubscribeAll(this.unsubscribers); + } +} + +export function parseGroqMetrics( + result: + | Pick + | { usage?: unknown; x_groq?: unknown } + | null + | undefined, +): Record { + const metrics = parseMetricsFromUsage(result?.usage); + const xGroq = result?.x_groq; + + if (!xGroq || typeof xGroq !== "object") { + return metrics; + } + + const extraUsage = "usage" in xGroq ? xGroq.usage : undefined; + + if (!extraUsage || typeof extraUsage !== "object") { + return metrics; + } + + const dramCachedTokens = (extraUsage as Record)[ + "dram_cached_tokens" + ]; + const sramCachedTokens = (extraUsage as Record)[ + "sram_cached_tokens" + ]; + + return { + ...metrics, + ...(typeof dramCachedTokens === "number" + ? { dram_cached_tokens: dramCachedTokens } + : {}), + ...(typeof sramCachedTokens === "number" + ? { sram_cached_tokens: sramCachedTokens } + : {}), + }; +} + +function aggregateGroqChatCompletionChunks( + chunks: GroqChatCompletionChunk[], + streamResult?: unknown, + endEvent?: unknown, +): { + metrics: Record; + output: GroqChatCompletion["choices"]; +} { + const aggregated = aggregateChatCompletionChunks( + chunks, + streamResult, + endEvent, + ); + return { + metrics: aggregated.metrics, + output: aggregated.output, + }; +} diff --git a/js/src/vendor-sdk-types/groq.ts b/js/src/vendor-sdk-types/groq.ts new file mode 100644 index 000000000..a59a72019 --- /dev/null +++ b/js/src/vendor-sdk-types/groq.ts @@ -0,0 +1,104 @@ +import type { + OpenAIAPIPromise, + OpenAIChatChoice, + OpenAIChatCompletionChunk, + OpenAIChatCreateParams, + OpenAIChatLogprobs, + OpenAIChatStream, + OpenAIEmbeddingCreateParams, + OpenAIUsage, +} from "./openai-common"; + +export interface GroqUsage extends OpenAIUsage { + queue_time?: number; +} + +export interface GroqChatCompletion { + choices: OpenAIChatChoice[]; + usage?: GroqUsage; + x_groq?: { + id?: string; + seed?: number | null; + usage?: { + dram_cached_tokens?: number; + sram_cached_tokens?: number; + [key: string]: number | undefined; + } | null; + [key: string]: unknown; + } | null; + [key: string]: unknown; +} + +export type GroqChatChoice = OpenAIChatChoice; +export type GroqChatLogprobs = OpenAIChatLogprobs; +export type GroqChatCompletionChunk = OpenAIChatCompletionChunk & { + usage?: GroqUsage; +}; +export type GroqChatCreateParams = OpenAIChatCreateParams; +export type GroqChatStream = OpenAIChatStream; + +export interface GroqEmbeddingCreateParams extends OpenAIEmbeddingCreateParams { + model?: string; +} + +export interface GroqEmbeddingResponse { + data?: Array<{ + embedding?: number[] | string; + [key: string]: unknown; + }>; + usage?: GroqUsage; + [key: string]: unknown; +} + +export interface GroqTranscriptionCreateParams { + file?: unknown; + language?: string | null; + model: string; + prompt?: string; + response_format?: string; + temperature?: number; + timestamp_granularities?: Array<"word" | "segment">; + url?: string; + [key: string]: unknown; +} + +export interface GroqTranscription { + text?: string; + [key: string]: unknown; +} + +export interface GroqChatCompletions { + create: ( + params: GroqChatCreateParams, + options?: unknown, + ) => OpenAIAPIPromise; +} + +export interface GroqChat { + completions: GroqChatCompletions; +} + +export interface GroqEmbeddings { + create: ( + params: GroqEmbeddingCreateParams, + options?: unknown, + ) => OpenAIAPIPromise; +} + +export interface GroqAudioTranscriptions { + create: ( + params: GroqTranscriptionCreateParams, + options?: unknown, + ) => OpenAIAPIPromise; +} + +export interface GroqAudio { + transcriptions: GroqAudioTranscriptions; +} + +export interface GroqClient { + audio?: GroqAudio; + chat?: GroqChat; + embeddings?: GroqEmbeddings; + [key: string]: unknown; +} diff --git a/js/src/wrappers/groq.test.ts b/js/src/wrappers/groq.test.ts new file mode 100644 index 000000000..976d727af --- /dev/null +++ b/js/src/wrappers/groq.test.ts @@ -0,0 +1,188 @@ +import { + afterEach, + beforeAll, + beforeEach, + describe, + expect, + test, + vi, +} from "vitest"; +import { configureNode } from "../node/config"; +import { _exportsForTestingOnly, initLogger } from "../logger"; +import { wrapGroq } from "./groq"; + +try { + configureNode(); +} catch { + // Best-effort initialization for test environments. +} + +describe("groq wrapper", () => { + let backgroundLogger: ReturnType< + typeof _exportsForTestingOnly.useTestBackgroundLogger + >; + + beforeAll(async () => { + await _exportsForTestingOnly.simulateLoginForTests(); + }); + + beforeEach(() => { + backgroundLogger = _exportsForTestingOnly.useTestBackgroundLogger(); + initLogger({ + projectId: "test-project-id", + projectName: "groq.test.ts", + }); + }); + + afterEach(() => { + _exportsForTestingOnly.clearTestBackgroundLogger(); + vi.restoreAllMocks(); + }); + + test("returns original object for unsupported clients", () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const invalid = { foo: "bar" }; + + expect(wrapGroq(invalid)).toBe(invalid); + expect(warnSpy).toHaveBeenCalledWith( + "Unsupported Groq library. Not wrapping.", + ); + }); + + test("wraps chat completions and embeddings", async () => { + async function* stream() { + yield { + choices: [{ delta: { role: "assistant" }, finish_reason: null }], + }; + yield { + choices: [ + { + delta: { content: "STREAM" }, + finish_reason: "stop", + }, + ], + usage: { + completion_tokens: 1, + prompt_tokens: 4, + total_tokens: 5, + }, + }; + } + + const wrapped = wrapGroq({ + chat: { + completions: { + create: vi.fn(async (request: Record) => { + if (request.stream) { + return stream(); + } + + return { + choices: [ + { + index: 0, + message: { + content: "OK", + role: "assistant", + }, + }, + ], + usage: { + completion_tokens: 2, + prompt_tokens: 5, + total_tokens: 7, + }, + x_groq: { + usage: { + dram_cached_tokens: 1, + sram_cached_tokens: 2, + }, + }, + }; + }), + }, + }, + embeddings: { + create: vi.fn(async () => ({ + data: [{ embedding: [0.1, 0.2, 0.3] }], + usage: { + prompt_tokens: 3, + total_tokens: 3, + }, + })), + }, + withOptions(options: unknown) { + return options ? this : null; + }, + }); + + expect(wrapped.withOptions({})).toBe(wrapped); + + await wrapped.chat.completions.create({ + max_completion_tokens: 12, + messages: [{ content: "Reply with exactly OK.", role: "user" }], + model: "llama-3.3-70b-versatile", + temperature: 0, + }); + + const streamed = await wrapped.chat.completions.create({ + messages: [{ content: "Reply with exactly STREAM.", role: "user" }], + model: "llama-3.3-70b-versatile", + stream: true, + }); + for await (const _chunk of streamed) { + // Consume the stream so chunk aggregation runs. + } + + await wrapped.embeddings.create({ + input: "braintrust tracing", + model: "nomic-embed-text-v1_5", + }); + + const spans = await backgroundLogger.drain(); + expect(spans).toHaveLength(3); + + const chatSpan = spans.find( + (span: any) => + span.span_attributes?.name === "groq.chat.completions.create" && + span.output?.[0]?.message?.content === "OK", + ) as Record | undefined; + const streamSpan = spans.find( + (span: any) => + span.span_attributes?.name === "groq.chat.completions.create" && + span.output?.[0]?.message?.content === "STREAM", + ) as Record | undefined; + const embeddingSpan = spans.find( + (span: any) => span.span_attributes?.name === "groq.embeddings.create", + ) as Record | undefined; + + expect(chatSpan?.metadata).toMatchObject({ + model: "llama-3.3-70b-versatile", + provider: "groq", + temperature: 0, + }); + expect(chatSpan?.metrics).toMatchObject({ + completion_tokens: 2, + dram_cached_tokens: 1, + prompt_tokens: 5, + sram_cached_tokens: 2, + time_to_first_token: expect.any(Number), + tokens: 7, + }); + + expect(streamSpan?.metrics).toMatchObject({ + completion_tokens: 1, + prompt_tokens: 4, + time_to_first_token: expect.any(Number), + tokens: 5, + }); + + expect(embeddingSpan?.metadata).toMatchObject({ + model: "nomic-embed-text-v1_5", + provider: "groq", + }); + expect(embeddingSpan?.output).toEqual({ + embedding_length: 3, + }); + }); +}); diff --git a/js/src/wrappers/groq.ts b/js/src/wrappers/groq.ts new file mode 100644 index 000000000..68312fa81 --- /dev/null +++ b/js/src/wrappers/groq.ts @@ -0,0 +1,159 @@ +import { groqChannels } from "../instrumentation/plugins/groq-channels"; +import type { + GroqChat, + GroqChatCompletion, + GroqChatCreateParams, + GroqChatStream, + GroqClient, + GroqEmbeddingCreateParams, + GroqEmbeddingResponse, + GroqEmbeddings, +} from "../vendor-sdk-types/groq"; + +/** + * Wrap a Groq client (created with `new Groq(...)`) with Braintrust tracing. + */ +export function wrapGroq(groq: T): T { + if (isSupportedGroqClient(groq)) { + return groqProxy(groq) as T; + } + + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. + console.warn("Unsupported Groq library. Not wrapping."); + return groq; +} + +function isRecord(value: unknown): value is Record { + return typeof value === "object" && value !== null; +} + +function hasFunction(value: unknown, methodName: string): boolean { + return ( + isRecord(value) && + methodName in value && + typeof value[methodName] === "function" + ); +} + +function hasChat(value: unknown): value is GroqChat { + return ( + isRecord(value) && + isRecord(value.completions) && + hasFunction(value.completions, "create") + ); +} + +function hasEmbeddings(value: unknown): value is GroqEmbeddings { + return hasFunction(value, "create"); +} + +function isSupportedGroqClient(value: unknown): value is GroqClient { + return ( + isRecord(value) && + ((value.chat !== undefined && hasChat(value.chat)) || + (value.embeddings !== undefined && hasEmbeddings(value.embeddings))) + ); +} + +function groqProxy(groq: GroqClient): GroqClient { + const privateMethodWorkaroundCache = new WeakMap< + (...args: unknown[]) => unknown, + (...args: unknown[]) => unknown + >(); + + const completionProxy = groq.chat?.completions + ? new Proxy(groq.chat.completions, { + get(target, prop, receiver) { + if (prop === "create") { + return wrapChatCompletionsCreate(target.create.bind(target)); + } + + return Reflect.get(target, prop, receiver); + }, + }) + : undefined; + + const chatProxy = groq.chat + ? new Proxy(groq.chat, { + get(target, prop, receiver) { + if (prop === "completions") { + return completionProxy ?? target.completions; + } + + return Reflect.get(target, prop, receiver); + }, + }) + : undefined; + + const embeddingsProxy = groq.embeddings + ? new Proxy(groq.embeddings, { + get(target, prop, receiver) { + if (prop === "create") { + return wrapEmbeddingsCreate(target.create.bind(target)); + } + + return Reflect.get(target, prop, receiver); + }, + }) + : undefined; + + const topLevelProxy: GroqClient = new Proxy(groq, { + get(target, prop, receiver) { + switch (prop) { + case "chat": + return chatProxy ?? target.chat; + case "embeddings": + return embeddingsProxy ?? target.embeddings; + } + + const value = Reflect.get(target, prop, target); + if (typeof value !== "function") { + return value; + } + + const cachedValue = privateMethodWorkaroundCache.get(value); + if (cachedValue) { + return cachedValue; + } + + const thisBoundValue = function ( + this: unknown, + ...args: unknown[] + ): unknown { + const thisArg = this === topLevelProxy ? target : this; + const output = Reflect.apply(value, thisArg, args); + return output === target ? topLevelProxy : output; + }; + + privateMethodWorkaroundCache.set(value, thisBoundValue); + return thisBoundValue; + }, + }); + + return topLevelProxy; +} + +function wrapChatCompletionsCreate( + create: ( + request: GroqChatCreateParams, + options?: unknown, + ) => Promise, +): GroqChat["completions"]["create"] { + return (request, options) => + groqChannels.chatCompletionsCreate.tracePromise( + () => create(request, options), + { arguments: [request] }, + ) as ReturnType; +} + +function wrapEmbeddingsCreate( + create: ( + request: GroqEmbeddingCreateParams, + options?: unknown, + ) => Promise, +): GroqEmbeddings["create"] { + return (request, options) => + groqChannels.embeddingsCreate.tracePromise(() => create(request, options), { + arguments: [request], + }) as ReturnType; +} diff --git a/turbo.json b/turbo.json index 22c028ca6..bad5b66ae 100644 --- a/turbo.json +++ b/turbo.json @@ -6,6 +6,7 @@ "ANTHROPIC_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENROUTER_API_KEY", "MISTRAL_API_KEY", "HUGGINGFACE_API_KEY" @@ -26,6 +27,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENROUTER_API_KEY", @@ -44,6 +46,7 @@ "BRAINTRUST_E2E_RUN_CONTEXT_DIR", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENROUTER_API_KEY", @@ -67,6 +70,7 @@ "BRAINTRUST_E2E_RUN_CONTEXT_DIR", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENROUTER_API_KEY", @@ -85,6 +89,7 @@ "BRAINTRUST_E2E_RUN_CONTEXT_DIR", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", "OPENROUTER_API_KEY", @@ -117,6 +122,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "MISTRAL_API_KEY", @@ -131,6 +137,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "MISTRAL_API_KEY", @@ -145,6 +152,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "GROQ_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", "MISTRAL_API_KEY", From a898cddd17877981bd220afda5e4b536ee10a4e7 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Wed, 22 Apr 2026 23:47:43 +0200 Subject: [PATCH 11/26] fix(auto-instrumentation): Skip over file transforms in bundler plugins when `id` is undefined (#1886) Ref https://linear.app/braintrustdata/issue/BT-4861/webpackplugin-crashes-on-nextjs-15-with-typeerror-cannot-read --- .changeset/chilly-sites-greet.md | 5 +++++ js/src/auto-instrumentations/bundler/plugin.ts | 5 +++++ 2 files changed, 10 insertions(+) create mode 100644 .changeset/chilly-sites-greet.md diff --git a/.changeset/chilly-sites-greet.md b/.changeset/chilly-sites-greet.md new file mode 100644 index 000000000..0b766015b --- /dev/null +++ b/.changeset/chilly-sites-greet.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix(auto-instrumentation): Skip over file transforms in bundler plugins when id is undefined diff --git a/js/src/auto-instrumentations/bundler/plugin.ts b/js/src/auto-instrumentations/bundler/plugin.ts index 8d38d7a57..3202d3dbc 100644 --- a/js/src/auto-instrumentations/bundler/plugin.ts +++ b/js/src/auto-instrumentations/bundler/plugin.ts @@ -96,6 +96,11 @@ export const unplugin = createUnplugin((options = {}) => { name: "code-transformer", enforce: "pre", transform(code: string, id: string) { + if (!id) { + // Some modules apparently don't have an id? + return null; + } + // Convert file:// URLs to regular paths at entry point // Node.js ESM loader hooks provide file:// URLs, but downstream code expects paths const filePath = id.startsWith("file:") ? fileURLToPath(id) : id; From 3500ec29c247f8d7021741a361580822df7239d0 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Thu, 23 Apr 2026 16:43:26 -0700 Subject: [PATCH 12/26] feat: dataset versioning (#1837) ### Summary This PR adds dataset snapshot and environment tag support to the JS SDK. See feature spec here: https://github.com/braintrustdata/braintrust-spec/pull/14 ### Background This change adds two friendlier ways to reference dataset versions: - Snapshots, which are stable human-readable names for a specific dataset version - Environment tags, which are movable aliases like ppe or production that can be repointed over time These are still just ways of referring to a concrete dataset version (xact_id). The SDK resolves snapshot names and environment tags down to the underlying xact_id before experiment or eval registration, so we keep the existing reproducibility guarantees while making version selection much easier to use. ### This PR adds: - SDK support for initializing datasets by: - explicit version (xact_id) - snapshot name - environment tag - Resolution of snapshot and environment selectors to a concrete dataset version internally before eval / experiment registration - SDK helpers for dataset snapshots, including: - create - list - update via register/upsert for the current dataset version - patch snapshot metadata by id - delete - restore and restore/preview to return the dataset head to the state at a particular version - Dev server support for forwarding dataset version and environment when resolving datasets for remote evals - Tests and example coverage for the new version-selection paths --- .changeset/goofy-hotels-care.md | 69 ++ js/dev/server.ts | 4 + js/src/exports.ts | 3 + js/src/logger.test.ts | 799 ++++++++++++++++++ js/src/logger.ts | 546 +++++++++++- .../api-compatibility.test.ts | 534 +++++++++++- 6 files changed, 1916 insertions(+), 39 deletions(-) create mode 100644 .changeset/goofy-hotels-care.md diff --git a/.changeset/goofy-hotels-care.md b/.changeset/goofy-hotels-care.md new file mode 100644 index 000000000..ede100873 --- /dev/null +++ b/.changeset/goofy-hotels-care.md @@ -0,0 +1,69 @@ +--- +"braintrust": minor +--- + +Add dataset versioning support to `init()`, `initDataset()`, and dataset objects. + +You can now pin dataset reads and experiment registration by explicit version, snapshot name, or environment tag: + +```ts +import { init, initDataset } from "braintrust"; + +const datasetByVersion = initDataset({ + project: "support-bot", + dataset: "production-cases", + version: "1234567890123456", +}); + +const datasetBySnapshot = initDataset({ + project: "support-bot", + dataset: "production-cases", + snapshotName: "baseline", +}); + +const datasetByEnvironment = initDataset({ + project: "support-bot", + dataset: "production-cases", + environment: "production", +}); + +init({ + project: "support-bot", + experiment: "baseline-eval", + dataset: { + id: "00000000-0000-0000-0000-000000000123", + snapshotName: "baseline", + }, +}); +``` + +Dataset objects now expose snapshot CRUD helpers, plus lookup by snapshot name or xact id: + +```ts +const dataset = initDataset({ + project: "support-bot", + dataset: "production-cases", +}); + +const snapshot = await dataset.createSnapshot({ + name: "baseline", + description: "Before the prompt rollout", +}); + +await dataset.updateSnapshot(snapshot.id, { + name: "baseline-v2", + description: null, +}); + +const snapshots = await dataset.listSnapshots(); +const byName = await dataset.getSnapshot({ + snapshotName: "baseline-v2", +}); +const byXactId = await dataset.getSnapshot({ + xactId: snapshot.xact_id, +}); + +await dataset.deleteSnapshot(snapshot.id); +``` + +`braintrust/dev` now also respects `dataset_version` and `dataset_environment` when resolving datasets for evals, so local eval runs match the pinned dataset selection used by the main SDK. diff --git a/js/dev/server.ts b/js/dev/server.ts index 5499fe208..ebce5c3f5 100644 --- a/js/dev/server.ts +++ b/js/dev/server.ts @@ -318,6 +318,8 @@ async function getDataset( state, project: data.project_name, dataset: data.dataset_name, + version: data.dataset_version ?? undefined, + environment: data.dataset_environment ?? undefined, _internal_btql: data._internal_btql ?? undefined, }); } else if ("dataset_id" in data) { @@ -329,6 +331,8 @@ async function getDataset( state, projectId: datasetInfo.projectId, dataset: datasetInfo.dataset, + version: data.dataset_version ?? undefined, + environment: data.dataset_environment ?? undefined, _internal_btql: data._internal_btql ?? undefined, }); } else { diff --git a/js/src/exports.ts b/js/src/exports.ts index f2d940d99..93a12bb05 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -8,6 +8,9 @@ export type { CompiledPromptParams, CompletionPrompt, ContextParentSpanIds, + DatasetRestorePreviewResult, + DatasetRestoreResult, + DatasetSnapshot, DataSummary, DatasetSummary, DefaultMetadataType, diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index fc4f14f7b..c4536bb78 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -4,6 +4,7 @@ import { vi, expect, test, describe, beforeEach, afterEach } from "vitest"; import { _exportsForTestingOnly, init, + initDataset, initLogger, Prompt, BraintrustState, @@ -453,6 +454,804 @@ test("init accepts dataset with id and version", () => { expect(datasetWithVersion.version).toBe("v2"); }); +test("init accepts dataset with id and environment", () => { + const datasetWithEnvironment = { + id: "dataset-id-123", + environment: "production", + }; + + expect(datasetWithEnvironment.id).toBe("dataset-id-123"); + expect(datasetWithEnvironment.environment).toBe("production"); +}); + +test("init accepts dataset with id and snapshotName", () => { + const datasetWithSnapshot = { + id: "dataset-id-123", + snapshotName: "123", + }; + + expect(datasetWithSnapshot.id).toBe("dataset-id-123"); + expect(datasetWithSnapshot.snapshotName).toBe("123"); +}); + +function mockInitGitMetadata() { + vi.spyOn(_exportsForTestingOnly.isomorph, "getRepoInfo").mockResolvedValue( + undefined, + ); + vi.spyOn( + _exportsForTestingOnly.isomorph, + "getPastNAncestors", + ).mockResolvedValue([]); +} + +test("initDataset prefers version over environment in eval data", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + environment: "production", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.toEvalData preserves dataset_environment", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + environment: "production", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_environment: "production", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.toEvalData preserves dataset_snapshot_name", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + snapshotName: "123", + state, + }); + + await expect(dataset.toEvalData()).resolves.toEqual({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_snapshot_name: "123", + }); + expect(postJson).toHaveBeenNthCalledWith(2, "api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.version preserves pinned-version fast path", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + const login = vi.spyOn(state, "login").mockResolvedValue(state); + const postJson = vi.spyOn(state.appConn(), "post_json"); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect(dataset.version()).resolves.toBe("123"); + expect(login).not.toHaveBeenCalled(); + expect(postJson).not.toHaveBeenCalled(); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.createSnapshot forwards update when requested", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + dataset_snapshot: { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "snapshot", + description: "updated description", + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + found_existing: true, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + state, + }); + + await expect( + dataset.createSnapshot({ + name: "snapshot", + description: "updated description", + update: true, + }), + ).resolves.toMatchObject({ + id: "00000000-0000-0000-0000-000000000004", + xact_id: "123", + }); + + expect(postJson).toHaveBeenNthCalledWith(2, "api/dataset_snapshot/register", { + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_snapshot_name: "snapshot", + description: "updated description", + xact_id: "123", + update: true, + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.getSnapshot looks up snapshots by name", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "snapshot", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + state, + }); + + await expect( + dataset.getSnapshot({ + snapshotName: "snapshot", + }), + ).resolves.toMatchObject({ + id: "00000000-0000-0000-0000-000000000004", + name: "snapshot", + xact_id: "123", + }); + + expect(postJson).toHaveBeenNthCalledWith(2, "api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "snapshot", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.getSnapshot looks up snapshots by xact id", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "snapshot", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }, + ]); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + state, + }); + + await expect( + dataset.getSnapshot({ + xactId: "123", + }), + ).resolves.toMatchObject({ + id: "00000000-0000-0000-0000-000000000004", + name: "snapshot", + xact_id: "123", + }); + + expect(postJson).toHaveBeenNthCalledWith(2, "api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + xact_id: "123", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.updateSnapshot patches snapshot metadata by id", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "renamed snapshot", + description: null, + xact_id: "123", + created: "2026-03-31T00:00:00.000Z", + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + state, + }); + + await expect( + dataset.updateSnapshot("00000000-0000-0000-0000-000000000004", { + name: "renamed snapshot", + description: null, + }), + ).resolves.toMatchObject({ + id: "00000000-0000-0000-0000-000000000004", + name: "renamed snapshot", + description: null, + }); + + expect(postJson).toHaveBeenNthCalledWith(2, "api/dataset_snapshot/patch_id", { + id: "00000000-0000-0000-0000-000000000004", + name: "renamed snapshot", + description: null, + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.restorePreview posts restore preview request", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + const postJson = vi + .spyOn(state.apiConn(), "post_json") + .mockResolvedValueOnce({ + rows_to_restore: 3, + rows_to_delete: 1, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + state, + }); + + await expect( + dataset.restorePreview({ + version: "123", + }), + ).resolves.toEqual({ + rows_to_restore: 3, + rows_to_delete: 1, + }); + + expect(postJson).toHaveBeenNthCalledWith( + 1, + "v1/dataset/00000000-0000-0000-0000-000000000002/restore/preview", + { + version: "123", + }, + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("dataset.restore posts restore request", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + vi.spyOn(state.appConn(), "post_json").mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + const postJson = vi + .spyOn(state.apiConn(), "post_json") + .mockResolvedValueOnce({ + xact_id: "456", + rows_restored: 3, + rows_deleted: 1, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + state, + }); + + await expect( + dataset.restore({ + version: "123", + }), + ).resolves.toEqual({ + xact_id: "456", + rows_restored: 3, + rows_deleted: 1, + }); + + expect(postJson).toHaveBeenNthCalledWith( + 1, + "v1/dataset/00000000-0000-0000-0000-000000000002/restore", + { + version: "123", + }, + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init keeps plain dataset refs attached to the experiment", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(experiment.dataset).toMatchObject({ + id: "00000000-0000-0000-0000-000000000002", + }); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset version from Dataset instances before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + state, + }); + const version = vi.spyOn(dataset, "version").mockResolvedValue("123"); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(version).toHaveBeenCalled(); + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset environment before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + const getJson = vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).toHaveBeenCalledWith( + "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", + { + org_name: "test-org-name", + }, + ); + expect(experiment.dataset).toMatchObject({ + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset environment without org_name when orgName is unset", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + state.orgName = null; + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + const getJson = vi.spyOn(state.apiConn(), "get_json").mockResolvedValue({ + object_version: "123", + }); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).toHaveBeenCalledWith( + "environment-object/dataset/00000000-0000-0000-0000-000000000002/production", + ); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init prefers dataset version over environment before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + const getJson = vi.spyOn(state.apiConn(), "get_json"); + const postJson = vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + version: "123", + environment: "production", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(getJson).not.toHaveBeenCalled(); + expect(postJson).toHaveBeenCalledWith( + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "123", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init resolves dataset snapshots before experiment registration", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + const postJson = vi + .spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce([ + { + id: "00000000-0000-0000-0000-000000000004", + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + description: null, + xact_id: "456", + created: "2026-03-31T00:00:00.000Z", + }, + ]) + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + snapshotName: "123", + }, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(postJson).toHaveBeenNthCalledWith(1, "api/dataset_snapshot/get", { + dataset_id: "00000000-0000-0000-0000-000000000002", + name: "123", + }); + expect(postJson).toHaveBeenNthCalledWith( + 2, + "api/experiment/register", + expect.objectContaining({ + dataset_id: "00000000-0000-0000-0000-000000000002", + dataset_version: "456", + }), + ); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + +test("init surfaces dataset environment lookup errors instead of falling back to latest", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + vi.spyOn(state.apiConn(), "get_json").mockRejectedValue( + new Error("environment lookup failed"), + ); + const postJson = vi.spyOn(state.appConn(), "post_json"); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset: { + id: "00000000-0000-0000-0000-000000000002", + environment: "production", + }, + setCurrent: false, + state, + }); + + await expect(experiment.id).rejects.toThrow("environment lookup failed"); + expect(postJson).not.toHaveBeenCalled(); + + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); +}); + describe("loader version precedence", () => { let state: BraintrustState; let getJson: ReturnType; diff --git a/js/src/logger.ts b/js/src/logger.ts index 2dd3d5150..ef8ed3a65 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -69,6 +69,8 @@ import { type GitMetadataSettingsType as GitMetadataSettings, type ChatCompletionMessageParamType as Message, type ChatCompletionOpenAIMessageParamType as OpenAIMessage, + DatasetSnapshot as datasetSnapshotSchema, + type DatasetSnapshotType as DatasetSnapshot, PromptData as promptDataSchema, type PromptDataType as PromptData, Prompt as promptSchema, @@ -90,6 +92,28 @@ const RESET_CONTEXT_MANAGER_STATE = Symbol.for( // 6 MB for the AWS lambda gateway (from our own testing). export const DEFAULT_MAX_REQUEST_SIZE = 6 * 1024 * 1024; +export type { DatasetSnapshot }; + +const datasetSnapshotRegisterResponseSchema = z.object({ + dataset_snapshot: datasetSnapshotSchema, + found_existing: z.boolean().optional(), +}); + +const datasetRestorePreviewResultSchema = z.object({ + rows_to_restore: z.number(), + rows_to_delete: z.number(), +}); +export type DatasetRestorePreviewResult = z.infer< + typeof datasetRestorePreviewResultSchema +>; + +const datasetRestoreResultSchema = z.object({ + xact_id: z.string().nullable(), + rows_restored: z.number(), + rows_deleted: z.number(), +}); +export type DatasetRestoreResult = z.infer; + const parametersRowSchema = z.object({ id: z.string().uuid(), _xact_id: z.string(), @@ -3390,13 +3414,55 @@ type InitOpenOption = { }; /** - * Reference to a dataset by ID and optional version. + * Reference to a dataset by ID and optional explicit selector. */ -export interface DatasetRef { - id: string; +type DatasetSelection = { version?: string; + environment?: string; + snapshotName?: string; +}; + +type DatasetSnapshotNameLookup = { + snapshotName: string; + xactId?: never; +}; + +type DatasetSnapshotXactLookup = { + snapshotName?: never; + xactId: string; +}; + +export type DatasetSnapshotLookup = + | DatasetSnapshotNameLookup + | DatasetSnapshotXactLookup; + +function isDatasetSnapshotNameLookup( + lookup: DatasetSnapshotLookup, +): lookup is DatasetSnapshotNameLookup { + return "snapshotName" in lookup; } +function assertDatasetSnapshotLookup(lookup: { + snapshotName?: string; + xactId?: string; +}): asserts lookup is DatasetSnapshotLookup { + const hasSnapshotName = lookup.snapshotName !== undefined; + const hasXactId = lookup.xactId !== undefined; + if (hasSnapshotName === hasXactId) { + throw new Error("Exactly one of snapshotName or xactId must be provided"); + } +} + +type DatasetPinState = { + lazyPinnedVersion?: LazyValue; + pinnedEnvironment?: string; + pinnedSnapshotName?: string; +}; + +export type DatasetRef = { + id: string; +} & DatasetSelection; + export interface ParametersRef { id: string; version?: string; @@ -3621,20 +3687,13 @@ export function init( } if (dataset !== undefined) { - if ( - "id" in dataset && - typeof dataset.id === "string" && - !("__braintrust_dataset_marker" in dataset) - ) { - // Simple {id: ..., version?: ...} object - args["dataset_id"] = dataset.id; - if ("version" in dataset && dataset.version !== undefined) { - args["dataset_version"] = dataset.version; - } - } else { - // Full Dataset object - args["dataset_id"] = await (dataset as AnyDataset).id; - args["dataset_version"] = await (dataset as AnyDataset).version(); + const datasetSelection = await serializeDatasetForExperiment({ + dataset, + state, + }); + args["dataset_id"] = datasetSelection.datasetId; + if (datasetSelection.datasetVersion !== undefined) { + args["dataset_version"] = datasetSelection.datasetVersion; } } @@ -3704,9 +3763,7 @@ export function init( const ret = new Experiment( state, lazyMetadata, - dataset !== undefined && "version" in dataset - ? (dataset as AnyDataset) - : undefined, + dataset !== undefined ? (dataset as AnyDataset) : undefined, ); if (options.setCurrent ?? true) { state.currentExperiment = ret; @@ -3795,6 +3852,8 @@ export type InitDatasetOptions = dataset?: string; description?: string; version?: string; + environment?: string; + snapshotName?: string; projectId?: string; metadata?: Record; state?: BraintrustState; @@ -3805,6 +3864,212 @@ export type FullInitDatasetOptions = { project?: string; } & InitDatasetOptions; +async function getDatasetSnapshots( + params: + | { + state: BraintrustState; + datasetId: string; + } + | ({ + state: BraintrustState; + datasetId: string; + } & DatasetSnapshotLookup), +): Promise { + const { state, datasetId } = params; + return datasetSnapshotSchema.array().parse( + await state.appConn().post_json("api/dataset_snapshot/get", { + dataset_id: datasetId, + ...("snapshotName" in params ? { name: params.snapshotName } : {}), + ...("xactId" in params ? { xact_id: params.xactId } : {}), + }), + ); +} + +async function getDatasetSnapshot( + params: { + state: BraintrustState; + datasetId: string; + } & DatasetSnapshotLookup, +): Promise { + assertDatasetSnapshotLookup(params); + const snapshots = await getDatasetSnapshots(params); + if (snapshots.length > 1) { + throw new Error( + isDatasetSnapshotNameLookup(params) + ? `Expected a unique dataset snapshot named "${params.snapshotName}" for ${params.datasetId}` + : `Expected a unique dataset snapshot for xact_id "${params.xactId}" in ${params.datasetId}`, + ); + } + return snapshots[0]; +} + +function normalizeDatasetSelection({ + version, + environment, + snapshotName, +}: DatasetSelection): DatasetSelection { + if (version !== undefined) { + return { version }; + } + + if (snapshotName !== undefined) { + return { snapshotName }; + } + + if (environment !== undefined) { + return { environment }; + } + + return {}; +} + +async function resolveDatasetSnapshotName({ + state, + datasetId, + snapshotName, +}: { + state: BraintrustState; + datasetId: string; + snapshotName: string; +}): Promise { + const match = await getDatasetSnapshot({ + state, + datasetId, + snapshotName, + }); + if (match === undefined) { + throw new Error( + `Dataset snapshot "${snapshotName}" not found for ${datasetId}`, + ); + } + return match.xact_id; +} + +async function resolveDatasetSnapshotNameForMetadata({ + state, + lazyMetadata, + snapshotName, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + snapshotName: string; +}): Promise { + const metadata = await lazyMetadata.get(); + return await resolveDatasetSnapshotName({ + state, + datasetId: metadata.dataset.id, + snapshotName, + }); +} + +async function resolveDatasetEnvironment({ + state, + datasetId, + environment, +}: { + state: BraintrustState; + datasetId: string; + environment: string; +}): Promise { + const environmentObjectPath = `environment-object/dataset/${datasetId}/${encodeURIComponent(environment)}`; + const response = + state.orgName == null + ? await state.apiConn().get_json(environmentObjectPath) + : await state.apiConn().get_json(environmentObjectPath, { + org_name: state.orgName, + }); + return z.object({ object_version: z.string() }).parse(response) + .object_version; +} + +async function resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment, +}: { + state: BraintrustState; + lazyMetadata: LazyValue; + environment: string; +}): Promise { + const metadata = await lazyMetadata.get(); + return await resolveDatasetEnvironment({ + state, + datasetId: metadata.dataset.id, + environment, + }); +} + +async function serializeDatasetForExperiment({ + dataset, + state, +}: { + dataset: AnyDataset | DatasetRef; + state: BraintrustState; +}): Promise<{ datasetId: string; datasetVersion?: string }> { + if (!Dataset.isDataset(dataset)) { + const selection = normalizeDatasetSelection(dataset); + + if (selection.version !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: selection.version, + }; + } + + if (selection.snapshotName !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: await resolveDatasetSnapshotName({ + state, + datasetId: dataset.id, + snapshotName: selection.snapshotName, + }), + }; + } + + if (selection.environment !== undefined) { + return { + datasetId: dataset.id, + datasetVersion: await resolveDatasetEnvironment({ + state, + datasetId: dataset.id, + environment: selection.environment, + }), + }; + } + + return { + datasetId: dataset.id, + }; + } + + const evalData = await dataset.toEvalData(); + const selection = normalizeDatasetSelection({ + version: evalData.dataset_version, + environment: evalData.dataset_environment, + snapshotName: evalData.dataset_snapshot_name, + }); + + if (selection.version !== undefined) { + return { + datasetId: evalData.dataset_id, + datasetVersion: selection.version, + }; + } + + const datasetVersion = await dataset.version(); + if (datasetVersion !== undefined) { + return { + datasetId: evalData.dataset_id, + datasetVersion, + }; + } + + return { + datasetId: evalData.dataset_id, + }; +} + /** * Create a new dataset in a specified project. If the project does not exist, it will be created. * @@ -3812,6 +4077,9 @@ export type FullInitDatasetOptions = { * @param options.project The name of the project to create the dataset in. Must specify at least one of `project` or `projectId`. * @param options.dataset The name of the dataset to create. If not specified, a name will be generated automatically. * @param options.description An optional description of the dataset. + * @param options.version Pin the dataset to a specific version xact_id. If `snapshotName` or `environment` are also provided, `version` takes precedence. + * @param options.snapshotName Pin the dataset to the version captured by this named snapshot. If `environment` is also provided, `snapshotName` takes precedence. + * @param options.environment Pin the dataset to the version tagged with this environment slug. * @param options.appUrl The URL of the Braintrust App. Defaults to https://www.braintrust.dev. * @param options.apiKey The API key to use. If the parameter is not specified, will try to use the `BRAINTRUST_API_KEY` environment variable. If no API key is specified, will prompt the user to login. * @param options.orgName (Optional) The name of a specific organization to connect to. This is useful if you belong to multiple. @@ -3868,6 +4136,8 @@ export function initDataset< dataset, description, version, + snapshotName, + environment, appUrl, apiKey, orgName, @@ -3879,6 +4149,14 @@ export function initDataset< state: stateArg, _internal_btql, } = options; + const selection = normalizeDatasetSelection({ + version, + environment, + snapshotName, + }); + const normalizedVersion = selection.version; + const normalizedEnvironment = selection.environment; + const normalizedSnapshotName = selection.snapshotName; const state = stateArg ?? _globalState; @@ -3919,13 +4197,57 @@ export function initDataset< }, ); - return new Dataset( + const resolvedVersion = + normalizedVersion !== undefined + ? normalizedVersion + : normalizedSnapshotName !== undefined + ? new LazyValue(async () => { + return await resolveDatasetSnapshotNameForMetadata({ + state, + lazyMetadata, + snapshotName: normalizedSnapshotName, + }); + }) + : normalizedEnvironment !== undefined + ? new LazyValue(async () => { + return await resolveDatasetEnvironmentForMetadata({ + state, + lazyMetadata, + environment: normalizedEnvironment, + }); + }) + : undefined; + + const datasetObject = new Dataset( stateArg ?? _globalState, lazyMetadata, - version, + typeof resolvedVersion === "string" ? resolvedVersion : undefined, legacy, _internal_btql, + resolvedVersion instanceof LazyValue || + normalizedEnvironment !== undefined || + normalizedSnapshotName !== undefined + ? { + ...(resolvedVersion instanceof LazyValue + ? { + lazyPinnedVersion: resolvedVersion, + } + : {}), + ...(normalizedEnvironment !== undefined + ? { + pinnedEnvironment: normalizedEnvironment, + } + : {}), + ...(normalizedSnapshotName !== undefined + ? { + pinnedSnapshotName: normalizedSnapshotName, + } + : {}), + } + : undefined, ); + + return datasetObject; } /** @@ -5706,6 +6028,18 @@ export class ObjectFetcher implements AsyncIterable< throw new Error("ObjectFetcher subclasses must have a 'getState' method"); } + protected getPinnedVersion(): string | undefined { + return this.pinnedVersion; + } + + protected setPinnedVersion(pinnedVersion: string | undefined): void { + this.pinnedVersion = pinnedVersion; + } + + protected getInternalBtql(): Record | undefined { + return this._internal_btql; + } + private async *fetchRecordsFromApi( batchSize: number | undefined, ): AsyncGenerator> { @@ -6842,6 +7176,9 @@ export class Dataset< private readonly lazyMetadata: LazyValue; private readonly __braintrust_dataset_marker = true; private newRecords = 0; + private lazyPinnedVersion?: LazyValue; + private pinnedEnvironment?: string; + private pinnedSnapshotName?: string; constructor( private state: BraintrustState, @@ -6849,6 +7186,7 @@ export class Dataset< pinnedVersion?: string, legacy?: IsLegacyDataset, _internal_btql?: Record, + pinState?: DatasetPinState, ) { // eslint-disable-next-line @typescript-eslint/consistent-type-assertions const isLegacyDataset = (legacy ?? @@ -6872,6 +7210,9 @@ export class Dataset< _internal_btql, ); this.lazyMetadata = lazyMetadata; + this.lazyPinnedVersion = pinState?.lazyPinnedVersion; + this.pinnedEnvironment = pinState?.pinnedEnvironment; + this.pinnedSnapshotName = pinState?.pinnedSnapshotName; } public get id(): Promise { @@ -6896,12 +7237,63 @@ export class Dataset< return this.state; } + public async toEvalData(): Promise<{ + dataset_id: string; + dataset_version?: string; + dataset_environment?: string; + dataset_snapshot_name?: string; + _internal_btql?: Record; + }> { + await this.getState(); + const metadata = await this.lazyMetadata.get(); + const pinnedVersion = this.getPinnedVersion(); + const internalBtql = this.getInternalBtql(); + + return { + dataset_id: metadata.dataset.id, + ...(this.pinnedEnvironment !== undefined + ? { + dataset_environment: this.pinnedEnvironment, + } + : {}), + ...(this.pinnedEnvironment === undefined && + this.pinnedSnapshotName !== undefined + ? { + dataset_snapshot_name: this.pinnedSnapshotName, + } + : {}), + ...(this.pinnedEnvironment === undefined && + this.pinnedSnapshotName === undefined && + pinnedVersion !== undefined + ? { + dataset_version: pinnedVersion, + } + : {}), + ...(internalBtql !== undefined ? { _internal_btql: internalBtql } : {}), + }; + } + protected async getState(): Promise { // Ensure the login state is populated by awaiting lazyMetadata. await this.lazyMetadata.get(); + if ( + this.lazyPinnedVersion !== undefined && + this.getPinnedVersion() === undefined + ) { + this.setPinnedVersion(await this.lazyPinnedVersion.get()); + } return this.state; } + public override async version(options?: { batchSize?: number }) { + const pinnedVersion = this.getPinnedVersion(); + if (pinnedVersion !== undefined) { + return pinnedVersion; + } + await this.getState(); + return await super.version(options); + } + private validateEvent({ metadata, expected, @@ -7078,6 +7470,116 @@ export class Dataset< return id; } + public async createSnapshot({ + name, + description, + update, + }: { + readonly name: string; + readonly description?: string; + readonly update?: boolean; + }): Promise { + await this.flush(); + const state = await this.getState(); + const datasetId = await this.id; + const currentVersion = await this.version(); + if (currentVersion === undefined) { + throw new Error("Cannot create snapshot: dataset has no version"); + } + const response = await state + .appConn() + .post_json("api/dataset_snapshot/register", { + dataset_id: datasetId, + dataset_snapshot_name: name, + description, + xact_id: currentVersion, + update, + }); + return datasetSnapshotRegisterResponseSchema.parse(response) + .dataset_snapshot; + } + + public async listSnapshots(): Promise { + const state = await this.getState(); + return await getDatasetSnapshots({ + state, + datasetId: await this.id, + }); + } + + public async getSnapshot( + lookup: DatasetSnapshotLookup, + ): Promise { + const state = await this.getState(); + const datasetId = await this.id; + return await getDatasetSnapshot({ + state, + datasetId, + ...lookup, + }); + } + + public async updateSnapshot( + snapshotId: string, + { + name, + description, + }: { + readonly name?: string; + readonly description?: string | null; + }, + ): Promise { + const state = await this.getState(); + return datasetSnapshotSchema.parse( + await state.appConn().post_json("api/dataset_snapshot/patch_id", { + id: snapshotId, + name, + description, + }), + ); + } + + public async deleteSnapshot(snapshotId: string): Promise { + const state = await this.getState(); + return datasetSnapshotSchema.parse( + await state.appConn().post_json("api/dataset_snapshot/delete_id", { + id: snapshotId, + }), + ); + } + + public async restorePreview({ + version, + }: { + readonly version: string; + }): Promise { + await this.flush(); + const state = await this.getState(); + const datasetId = await this.id; + return datasetRestorePreviewResultSchema.parse( + await state + .apiConn() + .post_json(`v1/dataset/${datasetId}/restore/preview`, { + version, + }), + ); + } + + public async restore({ + version, + }: { + readonly version: string; + }): Promise { + await this.flush(); + const state = await this.getState(); + const datasetId = await this.id; + return datasetRestoreResultSchema.parse( + await state.apiConn().post_json(`v1/dataset/${datasetId}/restore`, { + version, + }), + ); + } + /** * Summarize the dataset, including high level metrics about its size and other metadata. * @param summarizeData Whether to summarize the data. If false, only the metadata will be returned. diff --git a/js/tests/api-compatibility/api-compatibility.test.ts b/js/tests/api-compatibility/api-compatibility.test.ts index 80d15c45f..2fa625c1c 100644 --- a/js/tests/api-compatibility/api-compatibility.test.ts +++ b/js/tests/api-compatibility/api-compatibility.test.ts @@ -1492,8 +1492,8 @@ function areZodSchemaSignaturesCompatible( } /** - * Compares class signatures by extracting and comparing individual methods - * This handles cases where methods gain optional parameters or optional fields + * Compares class signatures using a cheap heuristic first and a parsed + * TypeScript AST fallback when the heuristic rejects the change. */ function areClassSignaturesCompatible( oldClass: string, @@ -1533,21 +1533,8 @@ function areClassSignaturesCompatible( // Also check normalized versions (removing optional markers and defaults) // If normalized versions are similar, the changes are likely just optionality - const normalizeClass = (classText: string): string => { - let normalized = classText; - // Remove optional markers: field?: Type -> field: Type - normalized = normalized.replace( - /([a-zA-Z_$][a-zA-Z0-9_$]*)\?:\s*/g, - "$1: ", - ); - // Remove default values - normalized = normalized.replace(/=\s*\{[^}]*\}/g, "= {}"); - normalized = normalized.replace(/=\s*[^,)}]+/g, ""); - return normalized; - }; - - const oldNormalized = normalizeClass(oldClass); - const newNormalized = normalizeClass(newClass); + const oldNormalized = normalizeClassForFastPath(oldClass); + const newNormalized = normalizeClassForFastPath(newClass); // Check if normalized versions are similar (one contains significant portion of the other) const similarityThreshold = Math.min(500, oldNormalized.length * 0.5); @@ -1555,6 +1542,432 @@ function areClassSignaturesCompatible( return true; } + const oldParsed = parseClassSignature(oldClass); + const newParsed = parseClassSignature(newClass); + if (!oldParsed || !newParsed) { + return false; + } + + return areParsedClassSignaturesCompatible(oldParsed, newParsed); +} + +interface ParsedClassSignature { + readonly name: string; + readonly typeParameters: string; + readonly heritage: string; + readonly isAbstract: boolean; + readonly members: Map; +} + +interface ParsedClassParameter { + readonly type: string; + readonly optional: boolean; + readonly rest: boolean; +} + +interface ParsedClassCallableMember { + readonly kind: "callable"; + readonly key: string; + readonly typeParameters: string; + readonly params: ParsedClassParameter[]; + readonly returnType: string; +} + +interface ParsedClassPropertyMember { + readonly kind: "property"; + readonly key: string; + readonly type: string; + readonly optional: boolean; + readonly isReadonly: boolean; +} + +type ParsedClassMember = ParsedClassCallableMember | ParsedClassPropertyMember; + +function normalizeClassForFastPath(classText: string): string { + let normalized = classText; + normalized = normalized.replace(/([a-zA-Z_$][a-zA-Z0-9_$]*)\?:\s*/g, "$1: "); + normalized = normalized.replace(/=\s*\{[^}]*\}/g, "= {}"); + normalized = normalized.replace(/=\s*[^,)}]+/g, ""); + return normalized; +} + +function normalizeClassFragment(fragment: string): string { + return normalizeTypeReference(fragment.replace(/\s+/g, " ").trim()); +} + +function hasModifier(node: ts.Node, kind: ts.SyntaxKind): boolean { + if (!ts.canHaveModifiers(node)) { + return false; + } + + return !!ts.getModifiers(node)?.some((modifier) => modifier.kind === kind); +} + +function getClassMemberVisibility( + node: ts.Node, +): "public" | "protected" | "private" { + if (hasModifier(node, ts.SyntaxKind.PrivateKeyword)) { + return "private"; + } + + if (hasModifier(node, ts.SyntaxKind.ProtectedKeyword)) { + return "protected"; + } + + return "public"; +} + +function isPrivateClassMember(member: ts.ClassElement): boolean { + if (ts.isConstructorDeclaration(member)) { + return false; + } + + if ( + (ts.isPropertyDeclaration(member) || + ts.isMethodDeclaration(member) || + ts.isGetAccessorDeclaration(member) || + ts.isSetAccessorDeclaration(member)) && + member.name && + ts.isPrivateIdentifier(member.name) + ) { + return true; + } + + return getClassMemberVisibility(member) === "private"; +} + +function isParameterProperty(parameter: ts.ParameterDeclaration): boolean { + if (!ts.canHaveModifiers(parameter)) { + return false; + } + + return !!ts.getModifiers(parameter)?.some((modifier) => { + return ( + modifier.kind === ts.SyntaxKind.PublicKeyword || + modifier.kind === ts.SyntaxKind.ProtectedKeyword || + modifier.kind === ts.SyntaxKind.PrivateKeyword || + modifier.kind === ts.SyntaxKind.ReadonlyKeyword + ); + }); +} + +function addParsedClassMember( + members: Map, + member: ParsedClassMember, +): void { + const existing = members.get(member.key); + if (existing) { + existing.push(member); + return; + } + + members.set(member.key, [member]); +} + +function parseClassParameters( + parameters: readonly ts.ParameterDeclaration[], + sourceFile: ts.SourceFile, +): ParsedClassParameter[] { + return parameters.map((parameter) => ({ + type: normalizeClassFragment(parameter.type?.getText(sourceFile) ?? ""), + optional: !!parameter.questionToken || parameter.initializer !== undefined, + rest: parameter.dotDotDotToken !== undefined, + })); +} + +function parseClassCallableMember( + key: string, + declaration: ts.SignatureDeclarationBase, + sourceFile: ts.SourceFile, + returnType: string, +): ParsedClassCallableMember { + return { + kind: "callable", + key, + typeParameters: normalizeClassFragment( + declaration.typeParameters + ?.map((typeParameter) => typeParameter.getText(sourceFile)) + .join(", ") ?? "", + ), + params: parseClassParameters(declaration.parameters, sourceFile), + returnType: normalizeClassFragment(returnType), + }; +} + +function parseClassPropertyMember( + key: string, + type: string, + optional: boolean, + isReadonly: boolean, +): ParsedClassPropertyMember { + return { + kind: "property", + key, + type: normalizeClassFragment(type), + optional, + isReadonly, + }; +} + +function parseConstructorParameterProperties( + constructor: ts.ConstructorDeclaration, + sourceFile: ts.SourceFile, +): ParsedClassPropertyMember[] { + const properties: ParsedClassPropertyMember[] = []; + + for (const parameter of constructor.parameters) { + if (!isParameterProperty(parameter)) { + continue; + } + + const visibility = getClassMemberVisibility(parameter); + if (visibility === "private") { + continue; + } + + properties.push( + parseClassPropertyMember( + `property:false:${visibility}:false:${parameter.name.getText(sourceFile)}`, + parameter.type?.getText(sourceFile) ?? "", + !!parameter.questionToken || parameter.initializer !== undefined, + hasModifier(parameter, ts.SyntaxKind.ReadonlyKeyword), + ), + ); + } + + return properties; +} + +function parseClassSignature(classText: string): ParsedClassSignature | null { + const sourceFile = ts.createSourceFile( + "class.ts", + classText, + ts.ScriptTarget.Latest, + true, + ts.ScriptKind.TS, + ); + + let declaration: ts.ClassDeclaration | undefined; + ts.forEachChild(sourceFile, (node) => { + if (ts.isClassDeclaration(node) && !declaration) { + declaration = node; + } + }); + + if (!declaration || !declaration.name) { + return null; + } + + const members = new Map(); + + for (const member of declaration.members) { + if (ts.isConstructorDeclaration(member)) { + addParsedClassMember( + members, + parseClassCallableMember( + `constructor:${getClassMemberVisibility(member)}`, + member, + sourceFile, + "", + ), + ); + for (const property of parseConstructorParameterProperties( + member, + sourceFile, + )) { + addParsedClassMember(members, property); + } + continue; + } + + if (isPrivateClassMember(member)) { + continue; + } + + const isStatic = hasModifier(member, ts.SyntaxKind.StaticKeyword); + const visibility = getClassMemberVisibility(member); + const isAbstract = hasModifier(member, ts.SyntaxKind.AbstractKeyword); + + if (ts.isPropertyDeclaration(member) && member.name) { + addParsedClassMember( + members, + parseClassPropertyMember( + `property:${isStatic}:${visibility}:${isAbstract}:${member.name.getText(sourceFile)}`, + member.type?.getText(sourceFile) ?? "", + !!member.questionToken, + hasModifier(member, ts.SyntaxKind.ReadonlyKeyword), + ), + ); + continue; + } + + if (ts.isMethodDeclaration(member)) { + addParsedClassMember( + members, + parseClassCallableMember( + `method:${isStatic}:${visibility}:${isAbstract}:${member.name.getText(sourceFile)}`, + member, + sourceFile, + member.type?.getText(sourceFile) ?? "", + ), + ); + continue; + } + + if (ts.isGetAccessorDeclaration(member)) { + addParsedClassMember( + members, + parseClassCallableMember( + `get:${isStatic}:${visibility}:${isAbstract}:${member.name.getText(sourceFile)}`, + member, + sourceFile, + member.type?.getText(sourceFile) ?? "", + ), + ); + continue; + } + + if (ts.isSetAccessorDeclaration(member)) { + addParsedClassMember( + members, + parseClassCallableMember( + `set:${isStatic}:${visibility}:${isAbstract}:${member.name.getText(sourceFile)}`, + member, + sourceFile, + "", + ), + ); + continue; + } + + if (ts.isIndexSignatureDeclaration(member)) { + addParsedClassMember( + members, + parseClassCallableMember( + `index:${visibility}:${isAbstract}`, + member, + sourceFile, + member.type?.getText(sourceFile) ?? "", + ), + ); + } + } + + return { + name: declaration.name.text, + typeParameters: normalizeClassFragment( + declaration.typeParameters + ?.map((typeParameter) => typeParameter.getText(sourceFile)) + .join(", ") ?? "", + ), + heritage: normalizeClassFragment( + declaration.heritageClauses + ?.map((heritageClause) => heritageClause.getText(sourceFile)) + .join(" ") ?? "", + ), + isAbstract: hasModifier(declaration, ts.SyntaxKind.AbstractKeyword), + members, + }; +} + +function areParsedClassSignaturesCompatible( + oldParsed: ParsedClassSignature, + newParsed: ParsedClassSignature, +): boolean { + if ( + oldParsed.name !== newParsed.name || + oldParsed.typeParameters !== newParsed.typeParameters || + oldParsed.heritage !== newParsed.heritage || + oldParsed.isAbstract !== newParsed.isAbstract + ) { + return false; + } + + for (const [key, oldMembers] of oldParsed.members) { + const newMembers = newParsed.members.get(key); + if (!newMembers) { + return false; + } + + const matchedNewMembers = new Set(); + for (const oldMember of oldMembers) { + const matchIndex = newMembers.findIndex((newMember, index) => { + return ( + !matchedNewMembers.has(index) && + areParsedClassMembersCompatible(oldMember, newMember) + ); + }); + + if (matchIndex === -1) { + return false; + } + + matchedNewMembers.add(matchIndex); + } + } + + return true; +} + +function areParsedClassMembersCompatible( + oldMember: ParsedClassMember, + newMember: ParsedClassMember, +): boolean { + if (oldMember.kind !== newMember.kind) { + return false; + } + + if (oldMember.kind === "property" && newMember.kind === "property") { + if (oldMember.isReadonly !== newMember.isReadonly) { + return false; + } + + if (oldMember.optional !== newMember.optional) { + return false; + } + + if (oldMember.type === newMember.type) { + return true; + } + + return isUnionTypeWidening(oldMember.type, newMember.type); + } + + if (oldMember.kind === "callable" && newMember.kind === "callable") { + if ( + oldMember.typeParameters !== newMember.typeParameters || + oldMember.returnType !== newMember.returnType + ) { + return false; + } + + for (let i = 0; i < oldMember.params.length; i++) { + const oldParam = oldMember.params[i]; + const newParam = newMember.params[i]; + + if (!newParam) { + return false; + } + + if (oldParam.type !== newParam.type || oldParam.rest !== newParam.rest) { + return false; + } + + if (oldParam.optional && !newParam.optional) { + return false; + } + } + + for (let i = oldMember.params.length; i < newMember.params.length; i++) { + const newParam = newMember.params[i]; + if (!newParam.optional && !newParam.rest) { + return false; + } + } + + return true; + } + return false; } @@ -1832,6 +2245,93 @@ describe("areInterfaceSignaturesCompatible", () => { }); }); +describe("areClassSignaturesCompatible", () => { + test("should allow adding private members and an optional constructor parameter", () => { + const oldClass = `export declare class Dataset { + private readonly lazyMetadata; + private readonly __braintrust_dataset_marker; + constructor( + private state: BraintrustState, + lazyMetadata: LazyValue, + pinnedVersion?: string, + ); + version(options?: { batchSize?: number }): Promise; + }`; + const newClass = `export declare class Dataset { + private readonly lazyMetadata; + private lazyPinnedVersion?: LazyValue; + private pinnedEnvironment?: string; + private pinnedSnapshotName?: string; + private readonly __braintrust_dataset_marker; + constructor( + private state: BraintrustState, + lazyMetadata: LazyValue, + pinnedVersion?: string, + pinState?: DatasetPinState, + ); + version(options?: { batchSize?: number }): Promise; + }`; + + expect(areClassSignaturesCompatible(oldClass, newClass)).toBe(true); + }); + + test("should allow adding an optional method parameter", () => { + const oldClass = `export declare class Example { + run(input: string): Promise; + }`; + const newClass = `export declare class Example { + run(input: string, options?: { retries?: number }): Promise; + }`; + + expect(areClassSignaturesCompatible(oldClass, newClass)).toBe(true); + }); + + test("should use parsed fallback when private members shift public member positions", () => { + const oldClass = `export declare class Example { + alpha(first: string, second: number): Promise<{ ok: true }>; + beta(value?: boolean): void; + }`; + const newClass = `export declare class Example { + private cacheOne?: string; + private cacheTwo?: string; + private cacheThree?: string; + alpha(first: string, second: number): Promise<{ ok: true }>; + beta(value?: boolean): void; + }`; + + expect(areClassSignaturesCompatible(oldClass, newClass)).toBe(true); + }); + + test("should ignore class member ordering", () => { + const oldClass = `export declare class Example { + alpha(): void; + protected beta(value: string): number; + }`; + const newClass = `export declare class Example { + protected beta(value: string): number; + alpha(): void; + }`; + + expect(areClassSignaturesCompatible(oldClass, newClass)).toBe(true); + }); + + test("should reject adding a required constructor parameter when fallback runs", () => { + const oldClass = `export declare class Example { + constructor(value: string); + run(input: string): void; + }`; + const newClass = `export declare class Example { + private cacheOne?: string; + private cacheTwo?: string; + private cacheThree?: string; + constructor(value: string, version: number); + run(input: string): void; + }`; + + expect(areClassSignaturesCompatible(oldClass, newClass)).toBe(false); + }); +}); + describe("areFunctionSignaturesCompatible", () => { test("should allow adding optional parameter at end", () => { const oldFn = `export function foo(a: string): void`; From fbd8d215698ab3767bfbdfabb0a7ad21a7a25d27 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Fri, 24 Apr 2026 12:16:11 +0200 Subject: [PATCH 13/26] fix: Capture reasoning in mistral (#1863) Fixes https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1857 --- .changeset/lemon-regions-shine.md | 5 + .../mistral-v1-10-0.log-payloads.json | 121 ++++++++-- .../mistral-v1-10-0.span-events.json | 123 ++++++++-- .../mistral-v1-14-1.log-payloads.json | 121 ++++++++-- .../mistral-v1-14-1.span-events.json | 123 ++++++++-- .../mistral-v1-15-1.log-payloads.json | 121 ++++++++-- .../mistral-v1-15-1.span-events.json | 123 ++++++++-- .../mistral-v1-3-4.log-payloads.json | 71 ++++-- .../mistral-v1-3-4.span-events.json | 87 +++++-- .../mistral-v1.log-payloads.json | 121 ++++++++-- .../__snapshots__/mistral-v1.span-events.json | 123 ++++++++-- .../mistral-v2.log-payloads.json | 123 ++++++++-- .../__snapshots__/mistral-v2.span-events.json | 125 ++++++++-- .../mistral-instrumentation/assertions.ts | 224 +++++++++++++++--- .../mistral-instrumentation/constants.mjs | 4 + .../mistral-instrumentation/scenario.impl.mjs | 95 ++++++-- .../scenario.mistral-v1-3-4.mjs | 6 +- .../scenario.mistral-v1-3-4.ts | 6 +- .../mistral-instrumentation/scenario.test.ts | 6 + .../plugins/mistral-plugin.test.ts | 70 ++++++ .../instrumentation/plugins/mistral-plugin.ts | 172 +++++++++++++- js/src/vendor-sdk-types/mistral.ts | 20 +- 22 files changed, 1686 insertions(+), 304 deletions(-) create mode 100644 .changeset/lemon-regions-shine.md diff --git a/.changeset/lemon-regions-shine.md b/.changeset/lemon-regions-shine.md new file mode 100644 index 000000000..ec49649aa --- /dev/null +++ b/.changeset/lemon-regions-shine.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix: Capture reasoning in mistral diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.log-payloads.json index e02cea9c9..fe7fc96b4 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.log-payloads.json @@ -98,12 +98,101 @@ "has_output": false, "input": null, "metadata": { - "operation": "chat-tool-call" + "operation": "chat-stream-reasoning" }, "metric_keys": [], "output": null, "span_id": "" }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "finish_reason": null, + "has_content": true, + "role": "assistant", + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "content_part_types": [ + "thinking", + "text" + ], + "finish_reason": null, + "has_content": true, + "role": "assistant", + "text_block_count": 1, + "thinking_block_count": 1, + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-tool-call" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, { "has_input": true, "has_output": true, @@ -132,7 +221,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -162,7 +251,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -193,7 +282,7 @@ "tool_call_count": 2, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -204,7 +293,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -228,7 +317,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -239,7 +328,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -263,7 +352,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -274,7 +363,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -304,7 +393,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -315,7 +404,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -345,7 +434,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -356,7 +445,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -386,7 +475,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -397,7 +486,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -416,6 +505,6 @@ "embedding_length": "", "type": "embedding" }, - "span_id": "" + "span_id": "" } ] diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.span-events.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.span-events.json index ab1e0df51..837183598 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.span-events.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-10-0.span-events.json @@ -84,6 +84,79 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-reasoning" + }, + "metric_keys": [], + "name": "mistral-chat-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "mistral-chat-thinking-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -93,7 +166,7 @@ "metric_keys": [], "name": "mistral-chat-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +187,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,9 +208,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -156,9 +229,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -171,7 +244,7 @@ "metric_keys": [], "name": "mistral-fim-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -192,9 +265,9 @@ ], "name": "mistral.fim.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -207,7 +280,7 @@ "metric_keys": [], "name": "mistral-fim-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -228,9 +301,9 @@ ], "name": "mistral.fim.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -243,7 +316,7 @@ "metric_keys": [], "name": "mistral-agents-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -264,9 +337,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -279,7 +352,7 @@ "metric_keys": [], "name": "mistral-agents-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -300,9 +373,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -315,7 +388,7 @@ "metric_keys": [], "name": "mistral-agents-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -336,9 +409,9 @@ ], "name": "mistral.agents.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -351,7 +424,7 @@ "metric_keys": [], "name": "mistral-embeddings-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -371,9 +444,9 @@ ], "name": "mistral.embeddings.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.log-payloads.json index e02cea9c9..fe7fc96b4 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.log-payloads.json @@ -98,12 +98,101 @@ "has_output": false, "input": null, "metadata": { - "operation": "chat-tool-call" + "operation": "chat-stream-reasoning" }, "metric_keys": [], "output": null, "span_id": "" }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "finish_reason": null, + "has_content": true, + "role": "assistant", + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "content_part_types": [ + "thinking", + "text" + ], + "finish_reason": null, + "has_content": true, + "role": "assistant", + "text_block_count": 1, + "thinking_block_count": 1, + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-tool-call" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, { "has_input": true, "has_output": true, @@ -132,7 +221,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -162,7 +251,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -193,7 +282,7 @@ "tool_call_count": 2, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -204,7 +293,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -228,7 +317,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -239,7 +328,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -263,7 +352,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -274,7 +363,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -304,7 +393,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -315,7 +404,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -345,7 +434,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -356,7 +445,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -386,7 +475,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -397,7 +486,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -416,6 +505,6 @@ "embedding_length": "", "type": "embedding" }, - "span_id": "" + "span_id": "" } ] diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.span-events.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.span-events.json index ab1e0df51..837183598 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.span-events.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-14-1.span-events.json @@ -84,6 +84,79 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-reasoning" + }, + "metric_keys": [], + "name": "mistral-chat-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "mistral-chat-thinking-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -93,7 +166,7 @@ "metric_keys": [], "name": "mistral-chat-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +187,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,9 +208,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -156,9 +229,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -171,7 +244,7 @@ "metric_keys": [], "name": "mistral-fim-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -192,9 +265,9 @@ ], "name": "mistral.fim.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -207,7 +280,7 @@ "metric_keys": [], "name": "mistral-fim-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -228,9 +301,9 @@ ], "name": "mistral.fim.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -243,7 +316,7 @@ "metric_keys": [], "name": "mistral-agents-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -264,9 +337,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -279,7 +352,7 @@ "metric_keys": [], "name": "mistral-agents-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -300,9 +373,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -315,7 +388,7 @@ "metric_keys": [], "name": "mistral-agents-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -336,9 +409,9 @@ ], "name": "mistral.agents.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -351,7 +424,7 @@ "metric_keys": [], "name": "mistral-embeddings-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -371,9 +444,9 @@ ], "name": "mistral.embeddings.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.log-payloads.json index e02cea9c9..fe7fc96b4 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.log-payloads.json @@ -98,12 +98,101 @@ "has_output": false, "input": null, "metadata": { - "operation": "chat-tool-call" + "operation": "chat-stream-reasoning" }, "metric_keys": [], "output": null, "span_id": "" }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "finish_reason": null, + "has_content": true, + "role": "assistant", + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "content_part_types": [ + "thinking", + "text" + ], + "finish_reason": null, + "has_content": true, + "role": "assistant", + "text_block_count": 1, + "thinking_block_count": 1, + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-tool-call" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, { "has_input": true, "has_output": true, @@ -132,7 +221,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -162,7 +251,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -193,7 +282,7 @@ "tool_call_count": 2, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -204,7 +293,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -228,7 +317,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -239,7 +328,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -263,7 +352,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -274,7 +363,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -304,7 +393,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -315,7 +404,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -345,7 +434,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -356,7 +445,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -386,7 +475,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -397,7 +486,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -416,6 +505,6 @@ "embedding_length": "", "type": "embedding" }, - "span_id": "" + "span_id": "" } ] diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.span-events.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.span-events.json index ab1e0df51..837183598 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.span-events.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-15-1.span-events.json @@ -84,6 +84,79 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-reasoning" + }, + "metric_keys": [], + "name": "mistral-chat-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "mistral-chat-thinking-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -93,7 +166,7 @@ "metric_keys": [], "name": "mistral-chat-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +187,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,9 +208,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -156,9 +229,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -171,7 +244,7 @@ "metric_keys": [], "name": "mistral-fim-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -192,9 +265,9 @@ ], "name": "mistral.fim.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -207,7 +280,7 @@ "metric_keys": [], "name": "mistral-fim-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -228,9 +301,9 @@ ], "name": "mistral.fim.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -243,7 +316,7 @@ "metric_keys": [], "name": "mistral-agents-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -264,9 +337,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -279,7 +352,7 @@ "metric_keys": [], "name": "mistral-agents-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -300,9 +373,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -315,7 +388,7 @@ "metric_keys": [], "name": "mistral-agents-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -336,9 +409,9 @@ ], "name": "mistral.agents.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -351,7 +424,7 @@ "metric_keys": [], "name": "mistral-embeddings-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -371,9 +444,9 @@ ], "name": "mistral.embeddings.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json index 6fde9fa53..089c640c1 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.log-payloads.json @@ -95,12 +95,51 @@ "has_output": false, "input": null, "metadata": { - "operation": "chat-tool-call" + "operation": "chat-stream-reasoning" }, "metric_keys": [], "output": null, "span_id": "" }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "time_to_first_token" + ], + "output": { + "choice_count": 1, + "finish_reason": null, + "has_content": true, + "role": "assistant", + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-tool-call" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, { "has_input": true, "has_output": true, @@ -129,7 +168,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -159,7 +198,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -190,7 +229,7 @@ "tool_call_count": 2, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -201,7 +240,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -225,7 +264,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -236,7 +275,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -257,7 +296,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -268,7 +307,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -298,7 +337,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -309,7 +348,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -339,7 +378,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -350,7 +389,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -380,7 +419,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -391,7 +430,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -410,6 +449,6 @@ "embedding_length": "", "type": "embedding" }, - "span_id": "" + "span_id": "" } ] diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.span-events.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.span-events.json index 09caba8c2..e9b4e1042 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.span-events.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1-3-4.span-events.json @@ -84,6 +84,43 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-reasoning" + }, + "metric_keys": [], + "name": "mistral-chat-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -93,7 +130,7 @@ "metric_keys": [], "name": "mistral-chat-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +151,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,9 +172,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -156,9 +193,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -171,7 +208,7 @@ "metric_keys": [], "name": "mistral-fim-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -192,9 +229,9 @@ ], "name": "mistral.fim.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -207,7 +244,7 @@ "metric_keys": [], "name": "mistral-fim-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -225,9 +262,9 @@ ], "name": "mistral.fim.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -240,7 +277,7 @@ "metric_keys": [], "name": "mistral-agents-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -261,9 +298,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -276,7 +313,7 @@ "metric_keys": [], "name": "mistral-agents-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -297,9 +334,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -312,7 +349,7 @@ "metric_keys": [], "name": "mistral-agents-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -333,9 +370,9 @@ ], "name": "mistral.agents.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -348,7 +385,7 @@ "metric_keys": [], "name": "mistral-embeddings-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -368,9 +405,9 @@ ], "name": "mistral.embeddings.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.log-payloads.json index e02cea9c9..fe7fc96b4 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.log-payloads.json @@ -98,12 +98,101 @@ "has_output": false, "input": null, "metadata": { - "operation": "chat-tool-call" + "operation": "chat-stream-reasoning" }, "metric_keys": [], "output": null, "span_id": "" }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "finish_reason": null, + "has_content": true, + "role": "assistant", + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "content_part_types": [ + "thinking", + "text" + ], + "finish_reason": null, + "has_content": true, + "role": "assistant", + "text_block_count": 1, + "thinking_block_count": 1, + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-tool-call" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, { "has_input": true, "has_output": true, @@ -132,7 +221,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -162,7 +251,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -193,7 +282,7 @@ "tool_call_count": 2, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -204,7 +293,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -228,7 +317,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -239,7 +328,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -263,7 +352,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -274,7 +363,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -304,7 +393,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -315,7 +404,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -345,7 +434,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -356,7 +445,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -386,7 +475,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -397,7 +486,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -416,6 +505,6 @@ "embedding_length": "", "type": "embedding" }, - "span_id": "" + "span_id": "" } ] diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.span-events.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.span-events.json index ab1e0df51..837183598 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.span-events.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v1.span-events.json @@ -84,6 +84,79 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-reasoning" + }, + "metric_keys": [], + "name": "mistral-chat-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "mistral-chat-thinking-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -93,7 +166,7 @@ "metric_keys": [], "name": "mistral-chat-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -114,9 +187,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -135,9 +208,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -156,9 +229,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -171,7 +244,7 @@ "metric_keys": [], "name": "mistral-fim-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -192,9 +265,9 @@ ], "name": "mistral.fim.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -207,7 +280,7 @@ "metric_keys": [], "name": "mistral-fim-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -228,9 +301,9 @@ ], "name": "mistral.fim.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -243,7 +316,7 @@ "metric_keys": [], "name": "mistral-agents-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -264,9 +337,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -279,7 +352,7 @@ "metric_keys": [], "name": "mistral-agents-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -300,9 +373,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -315,7 +388,7 @@ "metric_keys": [], "name": "mistral-agents-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -336,9 +409,9 @@ ], "name": "mistral.agents.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -351,7 +424,7 @@ "metric_keys": [], "name": "mistral-embeddings-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -371,9 +444,9 @@ ], "name": "mistral.embeddings.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.log-payloads.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.log-payloads.json index dec74bdb6..f549ead00 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.log-payloads.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.log-payloads.json @@ -100,12 +100,103 @@ "has_output": false, "input": null, "metadata": { - "operation": "chat-tool-call" + "operation": "chat-stream-reasoning" }, "metric_keys": [], "output": null, "span_id": "" }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "finish_reason": null, + "has_content": true, + "role": "assistant", + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, + { + "has_input": true, + "has_output": true, + "input": { + "item_count": 1, + "roles": [ + "user" + ], + "type": "messages" + }, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "output": { + "choice_count": 1, + "content_part_types": [ + "thinking", + "text" + ], + "finish_reason": null, + "has_content": true, + "role": "assistant", + "text_block_count": 1, + "thinking_block_count": 1, + "tool_call_count": 0, + "type": "choices" + }, + "span_id": "" + }, + { + "has_input": false, + "has_output": false, + "input": null, + "metadata": { + "operation": "chat-tool-call" + }, + "metric_keys": [], + "output": null, + "span_id": "" + }, { "has_input": true, "has_output": true, @@ -135,7 +226,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -166,7 +257,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -198,7 +289,7 @@ "tool_call_count": 2, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -209,7 +300,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -234,7 +325,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -245,7 +336,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -270,7 +361,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -281,7 +372,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -312,7 +403,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -323,7 +414,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -354,7 +445,7 @@ "tool_call_count": 1, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -365,7 +456,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -396,7 +487,7 @@ "tool_call_count": 0, "type": "choices" }, - "span_id": "" + "span_id": "" }, { "has_input": false, @@ -407,7 +498,7 @@ }, "metric_keys": [], "output": null, - "span_id": "" + "span_id": "" }, { "has_input": true, @@ -426,6 +517,6 @@ "embedding_length": "", "type": "embedding" }, - "span_id": "" + "span_id": "" } ] diff --git a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.span-events.json b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.span-events.json index 1f09eecd5..df04617bf 100644 --- a/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.span-events.json +++ b/e2e/scenarios/mistral-instrumentation/__snapshots__/mistral-v2.span-events.json @@ -86,6 +86,81 @@ ], "type": "llm" }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-reasoning" + }, + "metric_keys": [], + "name": "mistral-chat-reasoning-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "mistral-small-latest", + "provider": "mistral", + "reasoning_effort": "high" + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, + { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "chat-stream-thinking" + }, + "metric_keys": [], + "name": "mistral-chat-thinking-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + { + "has_input": true, + "has_output": true, + "metadata": { + "model": "magistral-small-latest", + "provider": "mistral" + }, + "metric_keys": [ + "completion_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "time_to_first_token", + "tokens" + ], + "name": "mistral.chat.stream", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "llm" + }, { "has_input": false, "has_output": false, @@ -95,7 +170,7 @@ "metric_keys": [], "name": "mistral-chat-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -117,9 +192,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -139,9 +214,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -161,9 +236,9 @@ ], "name": "mistral.chat.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -176,7 +251,7 @@ "metric_keys": [], "name": "mistral-fim-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -198,9 +273,9 @@ ], "name": "mistral.fim.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -213,7 +288,7 @@ "metric_keys": [], "name": "mistral-fim-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -235,9 +310,9 @@ ], "name": "mistral.fim.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -250,7 +325,7 @@ "metric_keys": [], "name": "mistral-agents-complete-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -272,9 +347,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -287,7 +362,7 @@ "metric_keys": [], "name": "mistral-agents-tool-call-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -309,9 +384,9 @@ ], "name": "mistral.agents.complete", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -324,7 +399,7 @@ "metric_keys": [], "name": "mistral-agents-stream-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -346,9 +421,9 @@ ], "name": "mistral.agents.stream", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" }, @@ -361,7 +436,7 @@ "metric_keys": [], "name": "mistral-embeddings-operation", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ "" ], @@ -381,9 +456,9 @@ ], "name": "mistral.embeddings.create", "root_span_id": "", - "span_id": "", + "span_id": "", "span_parents": [ - "" + "" ], "type": "llm" } diff --git a/e2e/scenarios/mistral-instrumentation/assertions.ts b/e2e/scenarios/mistral-instrumentation/assertions.ts index 13f31b1b1..0f8077ec1 100644 --- a/e2e/scenarios/mistral-instrumentation/assertions.ts +++ b/e2e/scenarios/mistral-instrumentation/assertions.ts @@ -17,10 +17,12 @@ import { summarizeWrapperContract, } from "../../helpers/wrapper-contract"; import { + ADJUSTABLE_REASONING_MODEL, FIM_MODEL, CHAT_MODEL, AGENT_MODEL, EMBEDDING_MODEL, + NATIVE_REASONING_MODEL, ROOT_NAME, SCENARIO_NAME, } from "./constants.mjs"; @@ -145,18 +147,43 @@ function summarizeOutput(output: unknown): Json { (Array.isArray(message.tool_calls) && message.tool_calls) || (Array.isArray(message.toolCalls) && message.toolCalls) || []; - - return { + const contentParts = Array.isArray(message.content) ? message.content : []; + const contentPartTypes = contentParts + .map((part) => + isRecord(part) && typeof part.type === "string" ? part.type : null, + ) + .filter((part): part is string => part !== null); + const summary = { choice_count: output.length, finish_reason: finishReason, has_content: typeof message.content === "string" ? message.content.length > 0 - : false, + : contentPartTypes.includes("text") + ? true + : false, role: typeof message.role === "string" ? message.role : null, tool_call_count: toolCalls.length, type: "choices", }; + + if (Array.isArray(message.content)) { + const hasThinkingContent = contentPartTypes.includes("thinking"); + + return { + ...summary, + content_part_types: contentPartTypes, + finish_reason: hasThinkingContent ? null : finishReason, + thinking_block_count: contentPartTypes.filter( + (partType) => partType === "thinking", + ).length, + text_block_count: contentPartTypes.filter( + (partType) => partType === "text", + ).length, + }; + } + + return summary; } if (!isRecord(output as Json)) { @@ -189,7 +216,7 @@ function summarizePayloadRow(row: CapturedLogRow): Json { input: summarizeInput(row.input), metadata: pickMetadata( row.metadata as Record | undefined, - ["model", "operation", "provider", "scenario"], + ["model", "operation", "provider", "reasoning_effort", "scenario"], ), metric_keys: Object.keys(metrics) .filter((key) => key !== "start" && key !== "end") @@ -236,29 +263,52 @@ function normalizeLegacyV134PayloadSummaryRow( "mistral.chat.stream", "mistral.fim.stream", ]); + let normalizedSummaryRow = summaryRow; + if ( - snapshotName !== "mistral-v1-3-4" || - !spanName || - !unstableLegacyV134SpanNames.has(spanName) || - !isRecord(summaryRow) + snapshotName === "mistral-v1-3-4" && + spanName && + unstableLegacyV134SpanNames.has(spanName) && + isRecord(summaryRow) ) { - return summaryRow; + const output = isRecord(summaryRow.output) ? summaryRow.output : null; + + normalizedSummaryRow = { + ...summaryRow, + metric_keys: normalizeLegacyV134MetricKeys(summaryRow.metric_keys), + ...(output + ? { + output: { + ...output, + finish_reason: null, + }, + } + : {}), + }; + } + + if (!isRecord(normalizedSummaryRow)) { + return normalizedSummaryRow; } - const output = isRecord(summaryRow.output) ? summaryRow.output : null; + const output = isRecord(normalizedSummaryRow.output) + ? normalizedSummaryRow.output + : null; + const metadata = isRecord(normalizedSummaryRow.metadata) + ? normalizedSummaryRow.metadata + : null; - return { - ...summaryRow, - metric_keys: normalizeLegacyV134MetricKeys(summaryRow.metric_keys), - ...(output - ? { - output: { - ...output, - finish_reason: null, - }, - } - : {}), - }; + if (output && metadata?.reasoning_effort === "high") { + return { + ...normalizedSummaryRow, + output: { + ...output, + finish_reason: null, + }, + }; + } + + return normalizedSummaryRow; } function mergeRecordValues( @@ -341,6 +391,14 @@ function buildSpanSummary( events, "mistral-chat-stream-operation", ); + const chatReasoningStreamOperation = findLatestSpan( + events, + "mistral-chat-reasoning-stream-operation", + ); + const chatThinkingStreamOperation = findLatestSpan( + events, + "mistral-chat-thinking-stream-operation", + ); const chatToolCallOperation = findLatestSpan( events, "mistral-chat-tool-call-operation", @@ -393,6 +451,14 @@ function buildSpanSummary( findMistralSpan(events, chatStreamOperation?.span.id, [ "mistral.chat.stream", ]), + chatReasoningStreamOperation, + findMistralSpan(events, chatReasoningStreamOperation?.span.id, [ + "mistral.chat.stream", + ]), + chatThinkingStreamOperation, + findMistralSpan(events, chatThinkingStreamOperation?.span.id, [ + "mistral.chat.stream", + ]), chatToolCallOperation, ...selectedChatToolCallSpans, fimCompleteOperation, @@ -417,17 +483,20 @@ function buildSpanSummary( findMistralSpan(events, embeddingsOperation?.span.id, [ "mistral.embeddings.create", ]), - ].map((event) => - normalizeLegacyV134SpanSummaryRow( - summarizeWrapperContract(event!, [ - "model", - "operation", - "provider", - "scenario", - ]), - snapshotName, - ), - ) as Json, + ] + .filter((event): event is CapturedLogEvent => event !== undefined) + .map((event) => + normalizeLegacyV134SpanSummaryRow( + summarizeWrapperContract(event, [ + "model", + "operation", + "provider", + "reasoning_effort", + "scenario", + ]), + snapshotName, + ), + ) as Json, ); } @@ -521,6 +590,7 @@ export function defineMistralInstrumentationAssertions(options: { name: string; runScenario: RunMistralScenario; snapshotName: string; + supportsThinkingStream?: boolean; testFileUrl: string; timeoutMs: number; }): void { @@ -532,6 +602,7 @@ export function defineMistralInstrumentationAssertions(options: { options.testFileUrl, `${options.snapshotName}.log-payloads.json`, ); + const supportsThinkingStream = options.supportsThinkingStream ?? true; const testConfig = { timeout: options.timeoutMs, }; @@ -597,6 +668,93 @@ export function defineMistralInstrumentationAssertions(options: { expect(span?.output).toBeDefined(); }); + test( + "captures trace for chat.stream() reasoning metadata", + testConfig, + () => { + const root = findLatestSpan(events, ROOT_NAME); + const operation = findLatestSpan( + events, + "mistral-chat-reasoning-stream-operation", + ); + const span = findMistralSpan(events, operation?.span.id, [ + "mistral.chat.stream", + ]); + const metadata = span?.row.metadata as + | Record + | undefined; + + expect(operation).toBeDefined(); + expect(span).toBeDefined(); + expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(span?.span.type).toBe("llm"); + expect(metadata).toMatchObject({ + model: ADJUSTABLE_REASONING_MODEL, + provider: "mistral", + reasoning_effort: "high", + }); + expect(span?.metrics).toMatchObject({ + time_to_first_token: expect.any(Number), + prompt_tokens: expect.any(Number), + completion_tokens: expect.any(Number), + }); + expect(span?.output).toBeDefined(); + }, + ); + + if (supportsThinkingStream) { + test( + "captures trace for chat.stream() thinking content", + testConfig, + () => { + const root = findLatestSpan(events, ROOT_NAME); + const operation = findLatestSpan( + events, + "mistral-chat-thinking-stream-operation", + ); + const span = findMistralSpan(events, operation?.span.id, [ + "mistral.chat.stream", + ]); + const metadata = span?.row.metadata as + | Record + | undefined; + const output = span?.output as + | Array<{ + message?: { + content?: + | Array<{ + thinking?: Array<{ text?: string; type?: string }>; + text?: string; + type?: string; + }> + | string + | null; + }; + }> + | undefined; + const content = Array.isArray(output?.[0]?.message?.content) + ? output[0].message.content + : []; + + expect(operation).toBeDefined(); + expect(span).toBeDefined(); + expect(operation?.span.parentIds).toEqual([root?.span.id ?? ""]); + expect(span?.span.type).toBe("llm"); + expect(metadata).toMatchObject({ + model: NATIVE_REASONING_MODEL, + provider: "mistral", + }); + expect(span?.metrics).toMatchObject({ + time_to_first_token: expect.any(Number), + prompt_tokens: expect.any(Number), + completion_tokens: expect.any(Number), + }); + expect(content.some((part) => part.type === "thinking")).toBe(true); + expect(content.some((part) => part.type === "text")).toBe(true); + }, + ); + } + test("captures trace for chat.complete() tool calling", testConfig, () => { const root = findLatestSpan(events, ROOT_NAME); const operation = findLatestSpan( diff --git a/e2e/scenarios/mistral-instrumentation/constants.mjs b/e2e/scenarios/mistral-instrumentation/constants.mjs index c3266b467..e57644f74 100644 --- a/e2e/scenarios/mistral-instrumentation/constants.mjs +++ b/e2e/scenarios/mistral-instrumentation/constants.mjs @@ -1,4 +1,6 @@ const CHAT_MODEL = "mistral-small-2506"; +const ADJUSTABLE_REASONING_MODEL = "mistral-small-latest"; +const NATIVE_REASONING_MODEL = "magistral-small-latest"; const EMBEDDING_MODEL = "mistral-embed"; const FIM_MODEL = "codestral-2508"; const AGENT_MODEL = CHAT_MODEL; @@ -6,10 +8,12 @@ const ROOT_NAME = "mistral-root"; const SCENARIO_NAME = "mistral-instrumentation"; export { + ADJUSTABLE_REASONING_MODEL, AGENT_MODEL, CHAT_MODEL, EMBEDDING_MODEL, FIM_MODEL, + NATIVE_REASONING_MODEL, ROOT_NAME, SCENARIO_NAME, }; diff --git a/e2e/scenarios/mistral-instrumentation/scenario.impl.mjs b/e2e/scenarios/mistral-instrumentation/scenario.impl.mjs index 47d0fe2e4..d8b73adf8 100644 --- a/e2e/scenarios/mistral-instrumentation/scenario.impl.mjs +++ b/e2e/scenarios/mistral-instrumentation/scenario.impl.mjs @@ -5,10 +5,12 @@ import { runTracedScenario, } from "../../helpers/provider-runtime.mjs"; import { + ADJUSTABLE_REASONING_MODEL, AGENT_MODEL, CHAT_MODEL, EMBEDDING_MODEL, FIM_MODEL, + NATIVE_REASONING_MODEL, ROOT_NAME, SCENARIO_NAME, } from "./constants.mjs"; @@ -20,43 +22,55 @@ const MISTRAL_REQUEST_RETRY_OPTIONS = { delayMs: 2_000, maxDelayMs: 10_000, }; + +const MISTRAL_THINKING_STREAM_OPTOUTS = new Set(["mistral-sdk-v1-3-4"]); + +function createMistralScenarioSpec(spec) { + return { + ...spec, + ...(MISTRAL_THINKING_STREAM_OPTOUTS.has(spec.dependencyName) + ? { supportsThinkingStream: false } + : {}), + }; +} + export const MISTRAL_SCENARIO_SPECS = [ - { + createMistralScenarioSpec({ autoEntry: "scenario.mistral-v1-3-4.mjs", dependencyName: "mistral-sdk-v1-3-4", snapshotName: "mistral-v1-3-4", wrapperEntry: "scenario.mistral-v1-3-4.ts", - }, - { + }), + createMistralScenarioSpec({ autoEntry: "scenario.mistral-v1-10-0.mjs", dependencyName: "mistral-sdk-v1-10-0", snapshotName: "mistral-v1-10-0", wrapperEntry: "scenario.mistral-v1-10-0.ts", - }, - { + }), + createMistralScenarioSpec({ autoEntry: "scenario.mistral-v1-14-1.mjs", dependencyName: "mistral-sdk-v1-14-1", snapshotName: "mistral-v1-14-1", wrapperEntry: "scenario.mistral-v1-14-1.ts", - }, - { + }), + createMistralScenarioSpec({ autoEntry: "scenario.mistral-v1-15-1.mjs", dependencyName: "mistral-sdk-v1-15-1", snapshotName: "mistral-v1-15-1", wrapperEntry: "scenario.mistral-v1-15-1.ts", - }, - { + }), + createMistralScenarioSpec({ autoEntry: "scenario.mistral-v1.mjs", dependencyName: "mistral-sdk-v1", snapshotName: "mistral-v1", wrapperEntry: "scenario.mistral-v1.ts", - }, - { + }), + createMistralScenarioSpec({ autoEntry: "scenario.mjs", dependencyName: "mistral-sdk-v2", snapshotName: "mistral-v2", wrapperEntry: "scenario.ts", - }, + }), ]; function nonEmptyString(value) { @@ -340,7 +354,7 @@ async function resolveAgentRuntime(client) { async function runMistralInstrumentationScenario( Mistral, - { decorateClient } = {}, + { decorateClient, supportsThinkingStream = true } = {}, ) { const baseClient = new Mistral({ apiKey: process.env.MISTRAL_API_KEY, @@ -400,6 +414,54 @@ async function runMistralInstrumentationScenario( }, ); + await runOperation( + "mistral-chat-reasoning-stream-operation", + "chat-stream-reasoning", + async () => { + await withRetry(async () => { + const stream = await client.chat.stream({ + model: ADJUSTABLE_REASONING_MODEL, + messages: [ + { + role: "user", + content: + "John is one of 4 children. The first sister is 4 years old. Next year, the second sister will be twice as old as the first sister. The third sister is two years older than the second sister. The third sister is half the age of her older brother. How old is John? Reply with just the number.", + }, + ], + maxTokens: 256, + reasoning_effort: "high", + stream: true, + temperature: 0, + }); + await collectAsync(stream); + }, MISTRAL_REQUEST_RETRY_OPTIONS); + }, + ); + + if (supportsThinkingStream) { + await runOperation( + "mistral-chat-thinking-stream-operation", + "chat-stream-thinking", + async () => { + await withRetry(async () => { + const stream = await client.chat.stream({ + model: NATIVE_REASONING_MODEL, + messages: [ + { + role: "user", + content: "What is 2+2? Reply with just the number.", + }, + ], + maxTokens: 1024, + stream: true, + temperature: 0, + }); + await collectAsync(stream); + }, MISTRAL_REQUEST_RETRY_OPTIONS); + }, + ); + } + await runOperation( "mistral-chat-tool-call-operation", "chat-tool-call", @@ -670,12 +732,13 @@ async function runMistralInstrumentationScenario( } } -export async function runWrappedMistralInstrumentation(Mistral) { +export async function runWrappedMistralInstrumentation(Mistral, options) { await runMistralInstrumentationScenario(Mistral, { + ...options, decorateClient: wrapMistral, }); } -export async function runAutoMistralInstrumentation(Mistral) { - await runMistralInstrumentationScenario(Mistral); +export async function runAutoMistralInstrumentation(Mistral, options) { + await runMistralInstrumentationScenario(Mistral, options); } diff --git a/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.mjs b/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.mjs index 4f772e22d..3ecf3d47f 100644 --- a/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.mjs +++ b/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.mjs @@ -2,4 +2,8 @@ import { Mistral } from "mistral-sdk-v1-3-4"; import { runMain } from "../../helpers/provider-runtime.mjs"; import { runAutoMistralInstrumentation } from "./scenario.impl.mjs"; -runMain(async () => runAutoMistralInstrumentation(Mistral)); +runMain(async () => + runAutoMistralInstrumentation(Mistral, { + supportsThinkingStream: false, + }), +); diff --git a/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.ts b/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.ts index 1db41c17f..349d45244 100644 --- a/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.ts +++ b/e2e/scenarios/mistral-instrumentation/scenario.mistral-v1-3-4.ts @@ -2,4 +2,8 @@ import { Mistral } from "mistral-sdk-v1-3-4"; import { runMain } from "../../helpers/scenario-runtime"; import { runWrappedMistralInstrumentation } from "./scenario.impl.mjs"; -runMain(async () => runWrappedMistralInstrumentation(Mistral)); +runMain(async () => + runWrappedMistralInstrumentation(Mistral, { + supportsThinkingStream: false, + }), +); diff --git a/e2e/scenarios/mistral-instrumentation/scenario.test.ts b/e2e/scenarios/mistral-instrumentation/scenario.test.ts index 77b1da4f6..f12eed393 100644 --- a/e2e/scenarios/mistral-instrumentation/scenario.test.ts +++ b/e2e/scenarios/mistral-instrumentation/scenario.test.ts @@ -37,6 +37,9 @@ for (const scenario of mistralScenarios) { }); }, snapshotName: scenario.snapshotName, + ...(scenario.supportsThinkingStream === false + ? { supportsThinkingStream: false } + : {}), testFileUrl: import.meta.url, timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, }); @@ -53,6 +56,9 @@ for (const scenario of mistralScenarios) { }); }, snapshotName: scenario.snapshotName, + ...(scenario.supportsThinkingStream === false + ? { supportsThinkingStream: false } + : {}), testFileUrl: import.meta.url, timeoutMs: MISTRAL_SCENARIO_TIMEOUT_MS, }); diff --git a/js/src/instrumentation/plugins/mistral-plugin.test.ts b/js/src/instrumentation/plugins/mistral-plugin.test.ts index e73d63076..96d410440 100644 --- a/js/src/instrumentation/plugins/mistral-plugin.test.ts +++ b/js/src/instrumentation/plugins/mistral-plugin.test.ts @@ -12,6 +12,7 @@ describe("extractMistralRequestMetadata", () => { extractMistralRequestMetadata({ model: "mistral-large-latest", maxTokens: 128, + reasoning_effort: "high", temperature: 0.4, n: 2, safe_prompt: true, @@ -24,6 +25,7 @@ describe("extractMistralRequestMetadata", () => { ).toEqual({ model: "mistral-large-latest", maxTokens: 128, + reasoning_effort: "high", temperature: 0.4, n: 2, safe_prompt: true, @@ -367,4 +369,72 @@ describe("aggregateMistralStreamChunks", () => { }, ]); }); + + it("preserves thinking content blocks from reasoning streams", () => { + const aggregated = aggregateMistralStreamChunks([ + { + data: { + choices: [ + { + delta: { + role: "assistant", + content: [ + { + type: "thinking", + thinking: [{ type: "text", text: "Let me" }], + }, + ], + }, + }, + ], + }, + }, + { + data: { + choices: [ + { + delta: { + content: [ + { + type: "thinking", + thinking: [{ type: "text", text: " think this through." }], + }, + { + type: "text", + text: "4", + }, + ], + }, + finishReason: "stop", + }, + ], + }, + }, + ]); + + expect(aggregated.output).toEqual([ + { + index: 0, + message: { + role: "assistant", + content: [ + { + type: "thinking", + thinking: [ + { + type: "text", + text: "Let me think this through.", + }, + ], + }, + { + type: "text", + text: "4", + }, + ], + }, + finishReason: "stop", + }, + ]); + }); }); diff --git a/js/src/instrumentation/plugins/mistral-plugin.ts b/js/src/instrumentation/plugins/mistral-plugin.ts index 88ae25f46..da9f122b3 100644 --- a/js/src/instrumentation/plugins/mistral-plugin.ts +++ b/js/src/instrumentation/plugins/mistral-plugin.ts @@ -13,6 +13,9 @@ import type { MistralChatCompletionChunkChoice, MistralChatCompletionEvent, MistralChatCompletionResponse, + MistralContentPart, + MistralTextContentPart, + MistralThinkingContentPart, MistralToolCallDelta, } from "../../vendor-sdk-types/mistral"; @@ -166,6 +169,8 @@ const MISTRAL_REQUEST_METADATA_ALLOWLIST = new Set([ "presence_penalty", "randomSeed", "random_seed", + "reasoningEffort", + "reasoning_effort", "responseFormat", "response_format", "safePrompt", @@ -377,6 +382,165 @@ function extractDeltaText(content: unknown): string | undefined { return textParts.length > 0 ? textParts.join("") : undefined; } +function normalizeMistralTextContentPart( + part: unknown, +): MistralTextContentPart | undefined { + if ( + !isObject(part) || + part.type !== "text" || + typeof part.text !== "string" + ) { + return undefined; + } + + return { + type: "text", + text: part.text, + }; +} + +function normalizeMistralThinkingContentPart( + part: unknown, +): MistralThinkingContentPart | undefined { + if (!isObject(part) || part.type !== "thinking") { + return undefined; + } + + const thinking = Array.isArray(part.thinking) + ? part.thinking + .map((thinkingPart) => normalizeMistralTextContentPart(thinkingPart)) + .filter( + (thinkingPart): thinkingPart is MistralTextContentPart => + thinkingPart !== undefined && typeof thinkingPart.text === "string", + ) + : []; + + return { + type: "thinking", + thinking, + }; +} + +function normalizeMistralContentParts(content: unknown): MistralContentPart[] { + if (!Array.isArray(content)) { + return []; + } + + return content + .map((part) => { + return ( + normalizeMistralTextContentPart(part) || + normalizeMistralThinkingContentPart(part) + ); + }) + .filter((part): part is MistralContentPart => part !== undefined); +} + +function mergeMistralTextSegments( + left: MistralTextContentPart[], + right: MistralTextContentPart[], +): MistralTextContentPart[] { + const merged = left.map((part) => ({ ...part })); + + for (const part of right) { + const lastPart = merged[merged.length - 1]; + if ( + lastPart && + lastPart.type === "text" && + typeof lastPart.text === "string" && + typeof part.text === "string" + ) { + lastPart.text += part.text; + continue; + } + + merged.push({ ...part }); + } + + return merged; +} + +function mergeMistralContentParts( + left: MistralContentPart[] | undefined, + right: MistralContentPart[], +): MistralContentPart[] { + const merged = [...(left || []).map((part) => structuredClone(part))]; + + for (const part of right) { + const lastPart = merged[merged.length - 1]; + if ( + part.type === "text" && + lastPart?.type === "text" && + typeof lastPart.text === "string" && + typeof part.text === "string" + ) { + lastPart.text += part.text; + continue; + } + + if ( + part.type === "thinking" && + lastPart?.type === "thinking" && + Array.isArray(lastPart.thinking) && + Array.isArray(part.thinking) + ) { + lastPart.thinking = mergeMistralTextSegments( + lastPart.thinking, + part.thinking, + ); + continue; + } + + merged.push(structuredClone(part)); + } + + return merged; +} + +function appendMistralContent( + accumulator: MistralChoiceAccumulator, + content: unknown, +): void { + if (typeof content === "string") { + if (accumulator.contentParts) { + accumulator.contentParts = mergeMistralContentParts( + accumulator.contentParts, + [{ type: "text", text: content }], + ); + return; + } + + accumulator.content = `${accumulator.content || ""}${content}`; + return; + } + + const normalizedContentParts = normalizeMistralContentParts(content); + if (normalizedContentParts.length === 0) { + return; + } + + const hasStructuredContent = normalizedContentParts.some( + (part) => part.type !== "text", + ); + + if (!accumulator.contentParts && !hasStructuredContent) { + const text = extractDeltaText(content); + if (text) { + accumulator.content = `${accumulator.content || ""}${text}`; + } + return; + } + + accumulator.contentParts = mergeMistralContentParts( + accumulator.contentParts || + (accumulator.content + ? [{ type: "text", text: accumulator.content }] + : []), + normalizedContentParts, + ); + delete accumulator.content; +} + function getDeltaToolCalls( delta: Record, ): MistralToolCallDelta[] { @@ -522,6 +686,7 @@ function getChoiceFinishReason( type MistralChoiceAccumulator = { content?: string; + contentParts?: MistralContentPart[]; finishReason?: string | null; index: number; order: number; @@ -642,10 +807,7 @@ export function aggregateMistralStreamChunks( accumulator.role = delta.role; } - const deltaText = extractDeltaText(delta.content); - if (deltaText) { - accumulator.content = `${accumulator.content || ""}${deltaText}`; - } + appendMistralContent(accumulator, delta.content); accumulator.toolCalls = mergeToolCallDeltas( accumulator.toolCalls, @@ -670,7 +832,7 @@ export function aggregateMistralStreamChunks( index: choice.index, message: { ...(choice.role ? { role: choice.role } : {}), - content: choice.content ?? null, + content: choice.contentParts ?? choice.content ?? null, ...(choice.toolCalls ? { toolCalls: choice.toolCalls } : {}), }, ...(choice.finishReason !== undefined diff --git a/js/src/vendor-sdk-types/mistral.ts b/js/src/vendor-sdk-types/mistral.ts index 90fd390fd..1a7edc8d8 100644 --- a/js/src/vendor-sdk-types/mistral.ts +++ b/js/src/vendor-sdk-types/mistral.ts @@ -8,9 +8,25 @@ export type MistralToolCallDelta = { [key: string]: unknown; }; +export type MistralTextContentPart = { + type: "text"; + text?: string; + [key: string]: unknown; +}; + +export type MistralThinkingContentPart = { + type: "thinking"; + thinking?: MistralTextContentPart[] | null; + [key: string]: unknown; +}; + +export type MistralContentPart = + | MistralTextContentPart + | MistralThinkingContentPart; + export type MistralChatMessageDelta = { role?: string; - content?: string | null; + content?: string | MistralContentPart[] | null; toolCalls?: MistralToolCallDelta[] | null; tool_calls?: MistralToolCallDelta[] | null; [key: string]: unknown; @@ -20,7 +36,7 @@ export type MistralChatCompletionChoice = { index?: number; message?: { role?: string; - content?: string | null; + content?: string | MistralContentPart[] | null; toolCalls?: unknown; tool_calls?: unknown; }; From 1cb8b437e460203a81836603c8ddb8b087dc8b07 Mon Sep 17 00:00:00 2001 From: Manu Goyal Date: Fri, 24 Apr 2026 11:05:29 -0700 Subject: [PATCH 14/26] Fix clean command (#1905) We should also wipe `util/dist` --- js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/js/package.json b/js/package.json index 604affe2e..f83a7f3ce 100644 --- a/js/package.json +++ b/js/package.json @@ -113,7 +113,7 @@ "build": "cross-env NODE_OPTIONS=\"--max-old-space-size=8192\" tsup", "check:typings": "tsc --noEmit", "watch": "tsup --watch", - "clean": "rm -r dist/* && rm -r dev/dist/*", + "clean": "rm -r dist/* && rm -r dev/dist/* && rm -r util/dist/*", "docs": "typedoc --options typedoc.json src/node/index.ts", "test": "vitest run --exclude \"src/wrappers/**/*.test.ts\" --exclude \"src/otel/**/*.test.ts\" --exclude \"smoke/**/*.test.ts\" --exclude \"src/zod/**/*.test.ts\" --exclude \"tests/api-compatibility/**\"", "test:core": "pnpm prune && pnpm test", From 7bf80340ba3a74e638f812564693111813c2c088 Mon Sep 17 00:00:00 2001 From: max-braintrust Date: Fri, 24 Apr 2026 16:50:32 -0700 Subject: [PATCH 15/26] feat: Add experiment dataset filter to experiment metadata (#1898) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Stores _internal_btql filters for experiment datasets in the experiment metadata. Right now we don’t persist those filter options, so we lose the ability to reconstruct the exact subset of rows an experiment ran against. If we save them with the experiment, we can recreate the same row set later instead of having to guess. This unlocks a few useful things: - Re-running an experiment on the exact same data it originally saw. - Showing the BTQL filter used by an experiment in the Braintrust UI. - Anything else that depends on reconstructing the precise rows that were initially fed into an experiment. --- .changeset/yellow-crabs-attend.md | 5 + js/src/logger.test.ts | 151 +++++++++++++++++++++++++++++- js/src/logger.ts | 42 ++++++++- 3 files changed, 194 insertions(+), 4 deletions(-) create mode 100644 .changeset/yellow-crabs-attend.md diff --git a/.changeset/yellow-crabs-attend.md b/.changeset/yellow-crabs-attend.md new file mode 100644 index 000000000..c36539820 --- /dev/null +++ b/.changeset/yellow-crabs-attend.md @@ -0,0 +1,5 @@ +--- +"braintrust": minor +--- + +(feat) Add experiment dataset filters to experiment metadata diff --git a/js/src/logger.test.ts b/js/src/logger.test.ts index c4536bb78..21231d8e0 100644 --- a/js/src/logger.test.ts +++ b/js/src/logger.test.ts @@ -484,6 +484,156 @@ function mockInitGitMetadata() { ).mockResolvedValue([]); } +test("init forwards dataset _internal_btql to experiment register", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + + try { + vi.spyOn(state, "login").mockResolvedValue(state); + mockInitGitMetadata(); + + const datasetFilter = { + filter: [ + { + op: "eq", + left: { op: "ident", name: ["metadata", "model"] }, + right: { op: "literal", value: "gpt-5-mini" }, + }, + { + op: "isnotnull", + expr: { op: "ident", name: ["expected"] }, + }, + ], + }; + + let experimentRegisterBody: unknown; + vi.spyOn(state.appConn(), "post_json") + .mockResolvedValueOnce({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }) + .mockImplementationOnce(async (_path, body) => { + experimentRegisterBody = body; + return { + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + experiment: { + id: "00000000-0000-0000-0000-000000000003", + project_id: "00000000-0000-0000-0000-000000000001", + name: "test-experiment", + public: false, + }, + }; + }); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + version: "123", + _internal_btql: datasetFilter, + state, + }); + + const experiment = init({ + project: "test-project", + experiment: "test-experiment", + dataset, + setCurrent: false, + state, + }); + + await experiment.id; + + expect(experimentRegisterBody).toEqual( + expect.objectContaining({ + internal_metadata: { + dataset_filter: datasetFilter, + }, + }), + ); + } finally { + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); + } +}); + +test("dataset fetch forwards _internal_btql filter arrays to btql", async () => { + const state = await _exportsForTestingOnly.simulateLoginForTests(); + + try { + vi.spyOn(state, "login").mockResolvedValue(state); + + const datasetFilter = { + filter: [ + { + op: "eq", + left: { op: "ident", name: ["metadata", "model"] }, + right: { op: "literal", value: "gpt-5-mini" }, + }, + { + op: "isnotnull", + expr: { op: "ident", name: ["expected"] }, + }, + ], + limit: 5, + }; + + vi.spyOn(state.appConn(), "post_json").mockResolvedValue({ + project: { + id: "00000000-0000-0000-0000-000000000001", + name: "test-project", + }, + dataset: { + id: "00000000-0000-0000-0000-000000000002", + name: "test-dataset", + }, + }); + + let btqlBody: unknown; + vi.spyOn(state.apiConn(), "post").mockImplementation( + async (_path, body) => { + btqlBody = body; + return new Response(JSON.stringify({ data: [] }), { + status: 200, + headers: { "Content-Type": "application/json" }, + }); + }, + ); + + const dataset = initDataset({ + project: "test-project", + dataset: "test-dataset", + _internal_btql: datasetFilter, + state, + }); + + const rows: unknown[] = []; + for await (const row of dataset) { + rows.push(row); + } + + expect(rows).toEqual([]); + expect(btqlBody).toEqual( + expect.objectContaining({ + query: expect.objectContaining({ + filter: datasetFilter.filter, + limit: 5, + }), + }), + ); + } finally { + _exportsForTestingOnly.simulateLogoutForTests(); + vi.restoreAllMocks(); + } +}); + test("initDataset prefers version over environment in eval data", async () => { const state = await _exportsForTestingOnly.simulateLoginForTests(); vi.spyOn(state, "login").mockResolvedValue(state); @@ -948,7 +1098,6 @@ test("init keeps plain dataset refs attached to the experiment", async () => { }); await experiment.id; - expect(experiment.dataset).toMatchObject({ id: "00000000-0000-0000-0000-000000000002", }); diff --git a/js/src/logger.ts b/js/src/logger.ts index ef8ed3a65..81da807fe 100644 --- a/js/src/logger.ts +++ b/js/src/logger.ts @@ -3472,6 +3472,7 @@ export type InitOptions = FullLoginOptions & { experiment?: string; description?: string; dataset?: AnyDataset | DatasetRef; + _internal_btql?: Record; parameters?: ParametersRef | RemoteEvalParameters; update?: boolean; baseExperiment?: string; @@ -3490,6 +3491,32 @@ export type FullInitOptions = { project?: string; } & InitOptions; +function getExperimentDatasetFilter({ + dataset, + _internal_btql, +}: { + dataset?: AnyDataset | DatasetRef; + _internal_btql?: Record; +}): Record | undefined { + if (_internal_btql !== undefined) { + return _internal_btql; + } + + if (!(dataset instanceof Dataset)) { + return undefined; + } + + const datasetFilter = Reflect.get(dataset, "_internal_btql"); + return isObject(datasetFilter) ? datasetFilter : undefined; +} + +function getInternalBtqlLimit( + internalBtql?: Record, +): number | undefined { + const limit = internalBtql?.["limit"]; + return typeof limit === "number" ? limit : undefined; +} + type InitializedExperiment = IsOpen extends true ? ReadonlyExperiment : Experiment; @@ -3556,6 +3583,7 @@ export function init( experiment, description, dataset, + _internal_btql, parameters, baseExperiment, isPublic, @@ -3697,6 +3725,16 @@ export function init( } } + const datasetFilter = getExperimentDatasetFilter({ + dataset, + _internal_btql, + }); + if (datasetFilter !== undefined) { + args["internal_metadata"] = { + dataset_filter: datasetFilter, + }; + } + if (parameters !== undefined) { if (RemoteEvalParameters.isParameters(parameters)) { args["parameters_id"] = parameters.id; @@ -6046,9 +6084,7 @@ export class ObjectFetcher implements AsyncIterable< const state = await this.getState(); const objectId = await this.id; const batchLimit = batchSize ?? DEFAULT_FETCH_BATCH_SIZE; - const internalLimit = ( - this._internal_btql as { limit?: number } | undefined - )?.limit; + const internalLimit = getInternalBtqlLimit(this._internal_btql); const limit = batchSize !== undefined ? batchSize : (internalLimit ?? batchLimit); const internalBtqlWithoutReservedQueryKeys = Object.fromEntries( From 9be62fce67812750097fee3cc8ee518ff138dbc8 Mon Sep 17 00:00:00 2001 From: Ankur Goyal Date: Sat, 25 Apr 2026 11:47:13 -0700 Subject: [PATCH 16/26] Rm eval status page (#1908) --- generated_types.json | 156 ++++++++++++++------------------------ js/src/generated_types.ts | 62 +++++++-------- 2 files changed, 87 insertions(+), 131 deletions(-) diff --git a/generated_types.json b/generated_types.json index 549ae84c9..5393031a2 100644 --- a/generated_types.json +++ b/generated_types.json @@ -1432,6 +1432,21 @@ }, "required": ["type", "index"], "title": "scorer" + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["classifier"] + }, + "index": { + "type": "integer", + "minimum": 0 + } + }, + "required": ["type", "index"], + "title": "classifier" } ] } @@ -1841,102 +1856,6 @@ }, "required": ["id", "object_type", "object_id", "name"] }, - "EvalStatusPage": { - "type": "object", - "properties": { - "id": { - "type": "string", - "format": "uuid", - "description": "Unique identifier for the eval status page" - }, - "project_id": { - "type": "string", - "format": "uuid", - "description": "Unique identifier for the project that the eval status page belongs under" - }, - "user_id": { - "type": ["string", "null"], - "format": "uuid", - "description": "Identifies the user who created the eval status page" - }, - "created": { - "type": ["string", "null"], - "format": "date-time", - "description": "Date of eval status page creation" - }, - "deleted_at": { - "type": ["string", "null"], - "format": "date-time", - "description": "Date of eval status page deletion, or null if the eval status page is still active" - }, - "name": { - "type": "string", - "description": "Name of the eval status page" - }, - "description": { - "type": ["string", "null"], - "description": "Textual description of the eval status page" - }, - "logo_url": { - "type": ["string", "null"], - "description": "URL of the logo to display on the page" - }, - "theme": { - "$ref": "#/components/schemas/EvalStatusPageTheme" - }, - "config": { - "$ref": "#/components/schemas/EvalStatusPageConfig" - } - }, - "required": ["id", "project_id", "name", "theme", "config"], - "description": "A public eval status page that displays aggregate experiment results" - }, - "EvalStatusPageConfig": { - "type": "object", - "properties": { - "score_columns": { - "type": ["array", "null"], - "items": { - "type": "string" - }, - "description": "The score columns to display on the page" - }, - "metric_columns": { - "type": ["array", "null"], - "items": { - "type": "string" - }, - "description": "The metric columns to display on the page" - }, - "grouping_field": { - "type": ["string", "null"], - "description": "The metadata field to use for grouping experiments (model)" - }, - "filter": { - "type": ["string", "null"], - "description": "BTQL filter to apply to experiment data" - }, - "sort_by": { - "type": ["string", "null"], - "description": "Field to sort results by (format: 'score:' or 'metric:')" - }, - "sort_order": { - "type": ["string", "null"], - "enum": ["asc", "desc"], - "description": "Sort order (ascending or descending)" - }, - "api_key": { - "type": ["string", "null"], - "description": "The API key used for fetching experiment data" - } - }, - "description": "Configuration for what data to display" - }, - "EvalStatusPageTheme": { - "type": "string", - "enum": ["light", "dark"], - "description": "The theme for the page" - }, "Experiment": { "type": "object", "properties": { @@ -1989,6 +1908,18 @@ "type": ["string", "null"], "description": "Version number of the linked dataset the experiment was run against. This can be used to reproduce the experiment after the dataset has been modified." }, + "internal_metadata": { + "type": ["object", "null"], + "properties": { + "dataset_filter": { + "type": ["object", "null"], + "additionalProperties": {}, + "description": "BTQL filter payload used to evaluate a subset of a linked dataset." + } + }, + "additionalProperties": {}, + "description": "Braintrust-controlled metadata about the experiment." + }, "parameters_id": { "type": ["string", "null"], "format": "uuid", @@ -4159,6 +4090,23 @@ ], "description": "The definition of what to export" }, + "scope": { + "anyOf": [ + { + "$ref": "#/components/schemas/SpanScope" + }, + { + "$ref": "#/components/schemas/TraceScope" + }, + { + "$ref": "#/components/schemas/GroupScope" + }, + { + "type": "null" + } + ], + "description": "Execution scope for export automation. Defaults to span-level execution." + }, "export_path": { "type": "string", "description": "The path to export the results to. It should include the storage protocol and prefix, e.g. s3://bucket-name/path/to/export" @@ -4193,6 +4141,20 @@ } }, "required": ["type", "role_arn", "external_id"] + }, + { + "type": "object", + "properties": { + "type": { + "type": "string", + "enum": ["gcp_service_account"] + }, + "service_account_email": { + "type": "string", + "description": "The GCP service account email to impersonate" + } + }, + "required": ["type", "service_account_email"] } ] }, @@ -6615,7 +6577,7 @@ }, "queryShape": { "type": ["string", "null"], - "enum": ["traces", "spans"] + "enum": ["traces", "spans", "topics"] }, "cluster": { "type": ["string", "null"] diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts index b2ca027e0..7326dfffc 100644 --- a/js/src/generated_types.ts +++ b/js/src/generated_types.ts @@ -1,4 +1,4 @@ -// Auto-generated file (content hash 0a2373633d8deec4) -- do not modify +// Auto-generated file (content hash 87cf242c359a07f8) -- do not modify import { z } from "zod/v3"; @@ -677,33 +677,6 @@ export const EnvVar = z.object({ .default("env_var"), }); export type EnvVarType = z.infer; -export const EvalStatusPageTheme = z.enum(["light", "dark"]); -export type EvalStatusPageThemeType = z.infer; -export const EvalStatusPageConfig = z - .object({ - score_columns: z.union([z.array(z.string()), z.null()]), - metric_columns: z.union([z.array(z.string()), z.null()]), - grouping_field: z.union([z.string(), z.null()]), - filter: z.union([z.string(), z.null()]), - sort_by: z.union([z.string(), z.null()]), - sort_order: z.union([z.enum(["asc", "desc"]), z.null()]), - api_key: z.union([z.string(), z.null()]), - }) - .partial(); -export type EvalStatusPageConfigType = z.infer; -export const EvalStatusPage = z.object({ - id: z.string().uuid(), - project_id: z.string().uuid(), - user_id: z.union([z.string(), z.null()]).optional(), - created: z.union([z.string(), z.null()]).optional(), - deleted_at: z.union([z.string(), z.null()]).optional(), - name: z.string(), - description: z.union([z.string(), z.null()]).optional(), - logo_url: z.union([z.string(), z.null()]).optional(), - theme: EvalStatusPageTheme, - config: EvalStatusPageConfig, -}); -export type EvalStatusPageType = z.infer; export const RepoInfo = z.union([ z .object({ @@ -733,6 +706,20 @@ export const Experiment = z.object({ deleted_at: z.union([z.string(), z.null()]).optional(), dataset_id: z.union([z.string(), z.null()]).optional(), dataset_version: z.union([z.string(), z.null()]).optional(), + internal_metadata: z + .union([ + z + .object({ + dataset_filter: z.union([ + z.object({}).partial().passthrough(), + z.null(), + ]), + }) + .partial() + .passthrough(), + z.null(), + ]) + .optional(), parameters_id: z.union([z.string(), z.null()]).optional(), parameters_version: z.union([z.string(), z.null()]).optional(), public: z.boolean(), @@ -1609,14 +1596,21 @@ export const ProjectAutomation = z.object({ z.object({ type: z.literal("log_spans") }), z.object({ type: z.literal("btql_query"), btql_query: z.string() }), ]), + scope: z.union([SpanScope, TraceScope, GroupScope, z.null()]).optional(), export_path: z.string(), format: z.enum(["jsonl", "parquet"]), interval_seconds: z.number().gte(1).lte(2592000), - credentials: z.object({ - type: z.literal("aws_iam"), - role_arn: z.string(), - external_id: z.string(), - }), + credentials: z.union([ + z.object({ + type: z.literal("aws_iam"), + role_arn: z.string(), + external_id: z.string(), + }), + z.object({ + type: z.literal("gcp_service_account"), + service_account_email: z.string(), + }), + ]), batch_size: z.union([z.number(), z.null()]).optional(), }), z.object({ @@ -2053,7 +2047,7 @@ export const ViewOptions = z.union([ z.object({ from: z.string(), to: z.string() }), z.null(), ]), - queryShape: z.union([z.enum(["traces", "spans"]), z.null()]), + queryShape: z.union([z.enum(["traces", "spans", "topics"]), z.null()]), cluster: z.union([z.string(), z.null()]), freezeColumns: z.union([z.boolean(), z.null()]), }) From 714ee221f6da8352a50823403737bff681033b46 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Tue, 28 Apr 2026 16:01:09 +0200 Subject: [PATCH 17/26] chore: Bump `@braintrust/browser` to `0.0.3` (#1913) It being a prerelease actually tripped up our release process because we didn't define a tag. In general we should probably not have rc versions in the package jsons too. --- integrations/browser-js/package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integrations/browser-js/package.json b/integrations/browser-js/package.json index 5095ec926..2cc972fe7 100644 --- a/integrations/browser-js/package.json +++ b/integrations/browser-js/package.json @@ -1,6 +1,6 @@ { "name": "@braintrust/browser", - "version": "0.0.2-rc.4", + "version": "0.0.3", "description": "Braintrust SDK for browser environments with AsyncLocalStorage polyfill", "type": "module", "module": "./dist/index.js", From f6426a1e31475af6d3c39c3d418fb73d4be49d7e Mon Sep 17 00:00:00 2001 From: "braintrust-bot[bot]" <215900051+braintrust-bot[bot]@users.noreply.github.com> Date: Tue, 28 Apr 2026 13:42:24 -0400 Subject: [PATCH 18/26] chore: generated SDK types (#1910) Automated regeneration of SDK types. Co-authored-by: braintrust-bot[bot] <215900051+braintrust-bot[bot]@users.noreply.github.com> --- generated_types.json | 4 ++++ js/src/generated_types.ts | 3 ++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/generated_types.json b/generated_types.json index 5393031a2..51e2a803c 100644 --- a/generated_types.json +++ b/generated_types.json @@ -6097,6 +6097,10 @@ "generation_settings": { "$ref": "#/components/schemas/TopicMapGenerationSettings" }, + "disable_reconciliation": { + "type": "boolean", + "description": "Whether new topic generation should ignore the previously saved report during reconciliation. Defaults to false when omitted." + }, "distance_threshold": { "type": "number", "description": "Maximum distance to nearest centroid. If exceeded, returns no_match." diff --git a/js/src/generated_types.ts b/js/src/generated_types.ts index 7326dfffc..609438488 100644 --- a/js/src/generated_types.ts +++ b/js/src/generated_types.ts @@ -1,4 +1,4 @@ -// Auto-generated file (content hash 87cf242c359a07f8) -- do not modify +// Auto-generated file (content hash 6a3eaf06ccb50b7d) -- do not modify import { z } from "zod/v3"; @@ -271,6 +271,7 @@ export const TopicMapData = z.object({ report_key: z.string().optional(), topic_names: z.record(z.string()).optional(), generation_settings: TopicMapGenerationSettings.optional(), + disable_reconciliation: z.boolean().optional(), distance_threshold: z.number().optional(), }); export type TopicMapDataType = z.infer; From 732d1a18f93f02f494f736b5bcb6dadfd68d5532 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Thu, 30 Apr 2026 17:02:55 +0200 Subject: [PATCH 19/26] deps: Add deno to `mise.toml` (#1925) --- mise.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/mise.toml b/mise.toml index 352bcde34..bb90c5250 100644 --- a/mise.toml +++ b/mise.toml @@ -11,6 +11,7 @@ _.file = ".env" nodejs = "24.15.0" npm = "11.12.1" pnpm = "10.33.0" +deno = "2.7.6" [hooks] postinstall = "pnpm install --frozen-lockfile" From dc24f2f735bb879e38646405dcf63e64ccbb6169 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Thu, 30 Apr 2026 17:03:43 +0200 Subject: [PATCH 20/26] ci: Remove api compatibility check from required checks (#1926) It's too inconsistent. --- .github/workflows/checks.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/checks.yaml b/.github/workflows/checks.yaml index bbdc3d1dc..bec88aa51 100644 --- a/.github/workflows/checks.yaml +++ b/.github/workflows/checks.yaml @@ -459,7 +459,6 @@ jobs: - js-test - js-build - e2e-hermetic - - js-api-compatibility - js-smoke-discover - js-smoke-test - temporal-js @@ -492,7 +491,6 @@ jobs: check_result "js-test" "${{ needs.js-test.result }}" check_result "js-build" "${{ needs.js-build.result }}" check_result "e2e-hermetic" "${{ needs.e2e-hermetic.result }}" - check_result "js-api-compatibility" "${{ needs.js-api-compatibility.result }}" check_result "js-smoke-discover" "${{ needs.js-smoke-discover.result }}" check_result "js-smoke-test" "${{ needs.js-smoke-test.result }}" check_result "temporal-js" "${{ needs.temporal-js.result }}" From eb224140af2ed045eec2a3605660b2b52cc2c522 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Thu, 30 Apr 2026 16:31:34 -0400 Subject: [PATCH 21/26] fix(cli): Use correct filename for eval.js (#1928) `..eval.js` -> `.eval.js` --- .changeset/fuzzy-evals-warn.md | 5 +++++ js/src/cli/index.ts | 2 +- 2 files changed, 6 insertions(+), 1 deletion(-) create mode 100644 .changeset/fuzzy-evals-warn.md diff --git a/.changeset/fuzzy-evals-warn.md b/.changeset/fuzzy-evals-warn.md new file mode 100644 index 000000000..5000a0551 --- /dev/null +++ b/.changeset/fuzzy-evals-warn.md @@ -0,0 +1,5 @@ +--- +"braintrust": patch +--- + +fix: Correct the eval file extension shown in CLI directory warnings diff --git a/js/src/cli/index.ts b/js/src/cli/index.ts index 863f19987..fcfb0fefc 100755 --- a/js/src/cli/index.ts +++ b/js/src/cli/index.ts @@ -719,7 +719,7 @@ async function collectFiles( console.warn( warning( `Reading ${inputPath} because it was specified directly. Rename it to end in ${prefix}.ts or ` + - `.${prefix}.js to include it automatically when you specify a directory.`, + `${prefix}.js to include it automatically when you specify a directory.`, ), ); } From 9d347e9b9f95a0f2e3686e0f69f3e6616b485c93 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 18:17:48 +0200 Subject: [PATCH 22/26] chore: Fix huggingface canary snapshot drift (#1924) --- .../__snapshots__/huggingface-v3150.log-payloads.json | 6 +++--- .../__snapshots__/huggingface-v3150.span-events.json | 6 +++--- .../__snapshots__/huggingface-v41315.log-payloads.json | 6 +++--- .../__snapshots__/huggingface-v41315.span-events.json | 6 +++--- .../__snapshots__/openrouter-v0123.span-events.json | 8 ++++++++ e2e/scenarios/openrouter-instrumentation/package.json | 2 +- .../openrouter-instrumentation/pnpm-lock.yaml | 10 +++++----- 7 files changed, 26 insertions(+), 18 deletions(-) diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json index 8c75b57ba..2d4df7a8e 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.log-payloads.json @@ -133,7 +133,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion" }, "output": [ @@ -263,7 +263,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk" }, "output": { @@ -417,7 +417,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk" }, "output": { diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json index 1fb8555b4..c7316d5dd 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v3150.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "provider": "featherless-ai" }, "metric_keys": [ @@ -74,7 +74,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "provider": "featherless-ai" }, "metric_keys": [ @@ -111,7 +111,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "provider": "featherless-ai" }, "metric_keys": [ diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json index 8c75b57ba..2d4df7a8e 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.log-payloads.json @@ -133,7 +133,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion" }, "output": [ @@ -263,7 +263,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk" }, "output": { @@ -417,7 +417,7 @@ "metadata": { "created": 0, "id": "", - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "object": "chat.completion.chunk" }, "output": { diff --git a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json index 1fb8555b4..c7316d5dd 100644 --- a/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json +++ b/e2e/scenarios/huggingface-instrumentation/__snapshots__/huggingface-v41315.span-events.json @@ -31,7 +31,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "provider": "featherless-ai" }, "metric_keys": [ @@ -74,7 +74,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "provider": "featherless-ai" }, "metric_keys": [ @@ -111,7 +111,7 @@ "has_input": true, "has_output": true, "metadata": { - "model": "meta-llama/Meta-Llama-3.1-8B-Instruct", + "model": "meta-llama/Llama-3.1-8B-Instruct", "provider": "featherless-ai" }, "metric_keys": [ diff --git a/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json b/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json index 09b6b3146..3f18e51b3 100644 --- a/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json +++ b/e2e/scenarios/openrouter-instrumentation/__snapshots__/openrouter-v0123.span-events.json @@ -38,6 +38,10 @@ "completion_audio_tokens", "completion_reasoning_tokens", "completion_tokens", + "cost", + "cost_upstream_inference_completions_cost", + "cost_upstream_inference_cost", + "cost_upstream_inference_prompt_cost", "prompt_audio_tokens", "prompt_cache_write_tokens", "prompt_cached_tokens", @@ -80,6 +84,10 @@ "completion_audio_tokens", "completion_reasoning_tokens", "completion_tokens", + "cost", + "cost_upstream_inference_completions_cost", + "cost_upstream_inference_cost", + "cost_upstream_inference_prompt_cost", "prompt_audio_tokens", "prompt_cache_write_tokens", "prompt_cached_tokens", diff --git a/e2e/scenarios/openrouter-instrumentation/package.json b/e2e/scenarios/openrouter-instrumentation/package.json index e42f8016b..790dd41af 100644 --- a/e2e/scenarios/openrouter-instrumentation/package.json +++ b/e2e/scenarios/openrouter-instrumentation/package.json @@ -9,7 +9,7 @@ } }, "dependencies": { - "@openrouter/sdk": "0.12.3", + "@openrouter/sdk": "0.12.22", "openrouter-sdk-v0911": "npm:@openrouter/sdk@0.9.11", "zod": "4.1.11" } diff --git a/e2e/scenarios/openrouter-instrumentation/pnpm-lock.yaml b/e2e/scenarios/openrouter-instrumentation/pnpm-lock.yaml index b38cd7491..8d025aa0d 100644 --- a/e2e/scenarios/openrouter-instrumentation/pnpm-lock.yaml +++ b/e2e/scenarios/openrouter-instrumentation/pnpm-lock.yaml @@ -9,8 +9,8 @@ importers: .: dependencies: '@openrouter/sdk': - specifier: 0.12.3 - version: 0.12.3 + specifier: 0.12.22 + version: 0.12.22 openrouter-sdk-v0911: specifier: npm:@openrouter/sdk@0.9.11 version: '@openrouter/sdk@0.9.11' @@ -20,8 +20,8 @@ importers: packages: - '@openrouter/sdk@0.12.3': - resolution: {integrity: sha512-qpzxyNS3ikz7BQ4vDvSlheJ33oM/34l103M1+k1E9yjOmXK5abz5LHCLNhOEmbLNg4MvZG9rg7TPLsGgREe0Ow==} + '@openrouter/sdk@0.12.22': + resolution: {integrity: sha512-I+1C/6b5PL98Uc2OQeQy0dWc+MJ9AcUwGByjWSf7AoKk5VSVrDueAfsC65I+lakeUiTlFMCLGFEWVj2pF1OWdw==} '@openrouter/sdk@0.9.11': resolution: {integrity: sha512-BgFu6NcIJO4a9aVjr04y3kZ8pyM71j15I+bzfVAGEvxnj+KQNIkBYQGgwrG3D+aT1QpDKLki8btcQmpaxUas6A==} @@ -31,7 +31,7 @@ packages: snapshots: - '@openrouter/sdk@0.12.3': + '@openrouter/sdk@0.12.22': dependencies: zod: 4.1.11 From 94ed6b368c26b766bdbe72a2e909cf9da7496f7a Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 19:00:51 +0200 Subject: [PATCH 23/26] chore: Make anthropic test less brittle (#1936) for some reason the model started respond weirdly --- js/src/wrappers/anthropic.test.ts | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) diff --git a/js/src/wrappers/anthropic.test.ts b/js/src/wrappers/anthropic.test.ts index e8f1c3f5d..aad51db97 100644 --- a/js/src/wrappers/anthropic.test.ts +++ b/js/src/wrappers/anthropic.test.ts @@ -357,12 +357,11 @@ describe("anthropic client unit tests", { retry: 3 }, () => { messages: [ { role: "user", - content: "What is Shakespeare's sonnet 18?", + content: "Reply exactly with BTANTHROPICSTREAMOK", }, ], - max_tokens: 1000, - system: - "No punctuation, newlines or non-alphanumeric characters. Just the poem.", + max_tokens: 20, + system: "No punctuation or newlines.", temperature: 0.01, stream: true, }); @@ -382,16 +381,13 @@ describe("anthropic client unit tests", { retry: 3 }, () => { const span = spans[0] as any; expect(span.input).toBeDefined(); - // clean up the output to make it easier to spot check + // Clean up the output to make the live provider assertion robust to casing + // and incidental spacing while still proving the streamed text was logged. const output = span.output .toLowerCase() - .replace(/\n/g, " ") + .replace(/\s/g, "") .replace(/'/g, ""); - // Validate we collected the streamed text without relying on one exact phrasing. - expect(output).toContain("shall i compare thee to a summers day"); - expect(output).toContain("shakespeare"); - expect(output).toContain("summer"); - expect(output.length).toBeGreaterThan(200); + expect(output).toContain("btanthropicstreamok"); expect(span["span_attributes"].type).toBe("llm"); expect(span["span_attributes"].name).toBe("anthropic.messages.create"); From 406a4b5c400de815d0bd543eec7d393756af6fea Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 20:13:05 +0200 Subject: [PATCH 24/26] ci: Set reporters to default (#1937) I don't like that vitest is spamming ci summaries --- e2e/vitest.config.mts | 1 + integrations/browser-js/vitest.config.ts | 1 + integrations/langchain-js/vitest.config.ts | 1 + integrations/openai-agents-js/vitest.config.ts | 1 + integrations/otel-js/otel-v1/vitest.config.ts | 1 + integrations/otel-js/otel-v2/vitest.config.ts | 1 + integrations/otel-js/vitest.config.ts | 1 + integrations/templates-nunjucks/vitest.config.ts | 1 + integrations/temporal-js/vitest.config.ts | 1 + integrations/vercel-ai-sdk/vitest.config.ts | 1 + js/examples/vitest/vitest.config.js | 1 + js/src/wrappers/ai-sdk/tests/v5/vitest.config.js | 1 + js/src/wrappers/ai-sdk/tests/v6/vitest.config.js | 1 + js/src/wrappers/claude-agent-sdk/vitest.config.js | 1 + js/src/wrappers/vitest/vitest.config.js | 7 +++++++ js/vitest.config.js | 1 + 16 files changed, 22 insertions(+) create mode 100644 js/src/wrappers/vitest/vitest.config.js diff --git a/e2e/vitest.config.mts b/e2e/vitest.config.mts index c02ba045f..d5a6f59a1 100644 --- a/e2e/vitest.config.mts +++ b/e2e/vitest.config.mts @@ -3,6 +3,7 @@ import { E2E_TAGS } from "./helpers/tags"; export default defineConfig({ test: { + reporters: ["default"], include: ["scenarios/**/*.test.ts"], // We run the scenarios in the hooks. hookTimeout: 120_000, diff --git a/integrations/browser-js/vitest.config.ts b/integrations/browser-js/vitest.config.ts index 73a26a648..744610c82 100644 --- a/integrations/browser-js/vitest.config.ts +++ b/integrations/browser-js/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/integrations/langchain-js/vitest.config.ts b/integrations/langchain-js/vitest.config.ts index d16bcb8cc..9c517c86c 100644 --- a/integrations/langchain-js/vitest.config.ts +++ b/integrations/langchain-js/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], setupFiles: ["./src/test/setup.ts"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, diff --git a/integrations/openai-agents-js/vitest.config.ts b/integrations/openai-agents-js/vitest.config.ts index 8e30c5974..9b3c6bb43 100644 --- a/integrations/openai-agents-js/vitest.config.ts +++ b/integrations/openai-agents-js/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], // Add any specific test configuration if needed }, diff --git a/integrations/otel-js/otel-v1/vitest.config.ts b/integrations/otel-js/otel-v1/vitest.config.ts index fd400cc49..0ef9b7448 100644 --- a/integrations/otel-js/otel-v1/vitest.config.ts +++ b/integrations/otel-js/otel-v1/vitest.config.ts @@ -18,6 +18,7 @@ export default defineConfig({ } : {}, test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/integrations/otel-js/otel-v2/vitest.config.ts b/integrations/otel-js/otel-v2/vitest.config.ts index fd400cc49..0ef9b7448 100644 --- a/integrations/otel-js/otel-v2/vitest.config.ts +++ b/integrations/otel-js/otel-v2/vitest.config.ts @@ -18,6 +18,7 @@ export default defineConfig({ } : {}, test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/integrations/otel-js/vitest.config.ts b/integrations/otel-js/vitest.config.ts index 73a26a648..744610c82 100644 --- a/integrations/otel-js/vitest.config.ts +++ b/integrations/otel-js/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/integrations/templates-nunjucks/vitest.config.ts b/integrations/templates-nunjucks/vitest.config.ts index 73a26a648..744610c82 100644 --- a/integrations/templates-nunjucks/vitest.config.ts +++ b/integrations/templates-nunjucks/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/integrations/temporal-js/vitest.config.ts b/integrations/temporal-js/vitest.config.ts index 73a26a648..744610c82 100644 --- a/integrations/temporal-js/vitest.config.ts +++ b/integrations/temporal-js/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/integrations/vercel-ai-sdk/vitest.config.ts b/integrations/vercel-ai-sdk/vitest.config.ts index 73a26a648..744610c82 100644 --- a/integrations/vercel-ai-sdk/vitest.config.ts +++ b/integrations/vercel-ai-sdk/vitest.config.ts @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["tests/**/*.test.ts", "src/**/*.test.ts"], }, }); diff --git a/js/examples/vitest/vitest.config.js b/js/examples/vitest/vitest.config.js index b904a4718..fc1bef90a 100644 --- a/js/examples/vitest/vitest.config.js +++ b/js/examples/vitest/vitest.config.js @@ -2,6 +2,7 @@ import { defineConfig } from "vitest/config"; export default defineConfig({ test: { + reporters: ["default"], include: ["**/*.test.ts", "**/*.eval.ts"], exclude: ["**/node_modules/**", "**/dist/**"], testTimeout: 15000, diff --git a/js/src/wrappers/ai-sdk/tests/v5/vitest.config.js b/js/src/wrappers/ai-sdk/tests/v5/vitest.config.js index f677c040a..b5e6dc600 100644 --- a/js/src/wrappers/ai-sdk/tests/v5/vitest.config.js +++ b/js/src/wrappers/ai-sdk/tests/v5/vitest.config.js @@ -14,6 +14,7 @@ const config = { }, }, test: { + reporters: ["default"], include: ["../../ai-sdk.test.ts", "./ai-sdk.v5.test.ts"], exclude: ["**/node_modules/**", "**/dist/**"], }, diff --git a/js/src/wrappers/ai-sdk/tests/v6/vitest.config.js b/js/src/wrappers/ai-sdk/tests/v6/vitest.config.js index 78eafbd78..ef6448d7d 100644 --- a/js/src/wrappers/ai-sdk/tests/v6/vitest.config.js +++ b/js/src/wrappers/ai-sdk/tests/v6/vitest.config.js @@ -14,6 +14,7 @@ const config = { }, }, test: { + reporters: ["default"], include: ["../../ai-sdk.test.ts", "./ai-sdk.v6.test.ts"], exclude: ["**/node_modules/**", "**/dist/**"], }, diff --git a/js/src/wrappers/claude-agent-sdk/vitest.config.js b/js/src/wrappers/claude-agent-sdk/vitest.config.js index 206bf0b1c..e0dc2213a 100644 --- a/js/src/wrappers/claude-agent-sdk/vitest.config.js +++ b/js/src/wrappers/claude-agent-sdk/vitest.config.js @@ -28,6 +28,7 @@ const config = { exclude: ["vendor/**", "**/vendor/**"], }, test: { + reporters: ["default"], exclude: [ // Default vitest exclusions "**/node_modules/**", diff --git a/js/src/wrappers/vitest/vitest.config.js b/js/src/wrappers/vitest/vitest.config.js new file mode 100644 index 000000000..4142755f8 --- /dev/null +++ b/js/src/wrappers/vitest/vitest.config.js @@ -0,0 +1,7 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + reporters: ["default"], + }, +}); diff --git a/js/vitest.config.js b/js/vitest.config.js index 089f86712..d9c34decb 100644 --- a/js/vitest.config.js +++ b/js/vitest.config.js @@ -27,6 +27,7 @@ const config = { exclude: ["vendor/**"], }, test: { + reporters: ["default"], exclude: [ // Default vitest exclusions "**/node_modules/**", From 0a2e9115c308aaa7ba1219af73a0b569f09dd1b7 Mon Sep 17 00:00:00 2001 From: Luca Forstner Date: Mon, 4 May 2026 20:24:11 +0200 Subject: [PATCH 25/26] feat: Add `@cursor/sdk` instrumentation (#1923) Fixes https://github.com/braintrustdata/braintrust-sdk-javascript/issues/1919 Can probably still be improved but a first iteration. Screenshot 2026-05-04 at 10 58 19 --- .changeset/short-moles-punch.md | 5 + .env.example | 1 + .github/workflows/e2e-canary.yaml | 1 + .github/workflows/integration-tests.yaml | 3 + e2e/README.md | 1 + e2e/config/pr-comment-scenarios.json | 6 + e2e/helpers/scenario-installer.ts | 37 +- .../cursor-sdk-v1-auto-hook.span-events.json | 234 ++++ .../cursor-sdk-v1-wrapped.span-events.json | 234 ++++ .../cursor-sdk-instrumentation/assertions.ts | 301 +++++ .../cursor-sdk-instrumentation/package.json | 19 + .../cursor-sdk-instrumentation/pnpm-lock.yaml | 1159 ++++++++++++++++ .../scenario.cursor-sdk-v1.mjs | 5 + .../scenario.cursor-sdk-v1.ts | 5 + .../scenario.impl.mjs | 136 ++ .../scenario.test.ts | 56 + e2e/scripts/run-canary-tests-docker.mjs | 1 + .../auto-instrumentations/bundler/plugin.ts | 2 + .../bundler/webpack-loader.ts | 2 + .../configs/cursor-sdk.ts | 49 + js/src/auto-instrumentations/hook.mts | 4 + js/src/auto-instrumentations/index.ts | 1 + js/src/exports.ts | 1 + js/src/instrumentation/braintrust-plugin.ts | 14 + .../plugins/cursor-sdk-channels.ts | 47 + .../plugins/cursor-sdk-plugin.test.ts | 259 ++++ .../plugins/cursor-sdk-plugin.ts | 1179 +++++++++++++++++ js/src/instrumentation/registry.ts | 10 +- js/src/vendor-sdk-types/cursor-sdk.ts | 337 +++++ js/src/wrappers/cursor-sdk.test.ts | 145 ++ js/src/wrappers/cursor-sdk.ts | 182 +++ turbo.json | 8 + 32 files changed, 4411 insertions(+), 33 deletions(-) create mode 100644 .changeset/short-moles-punch.md create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-auto-hook.span-events.json create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-wrapped.span-events.json create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/assertions.ts create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/package.json create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/pnpm-lock.yaml create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.mjs create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.ts create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/scenario.impl.mjs create mode 100644 e2e/scenarios/cursor-sdk-instrumentation/scenario.test.ts create mode 100644 js/src/auto-instrumentations/configs/cursor-sdk.ts create mode 100644 js/src/instrumentation/plugins/cursor-sdk-channels.ts create mode 100644 js/src/instrumentation/plugins/cursor-sdk-plugin.test.ts create mode 100644 js/src/instrumentation/plugins/cursor-sdk-plugin.ts create mode 100644 js/src/vendor-sdk-types/cursor-sdk.ts create mode 100644 js/src/wrappers/cursor-sdk.test.ts create mode 100644 js/src/wrappers/cursor-sdk.ts diff --git a/.changeset/short-moles-punch.md b/.changeset/short-moles-punch.md new file mode 100644 index 000000000..a7a2b2261 --- /dev/null +++ b/.changeset/short-moles-punch.md @@ -0,0 +1,5 @@ +--- +"braintrust": minor +--- + +feat: Add @cursor/sdk instrumentation diff --git a/.env.example b/.env.example index c9c314bb3..bff0c17ab 100644 --- a/.env.example +++ b/.env.example @@ -2,6 +2,7 @@ BRAINTRUST_API_KEY= OPENAI_API_KEY= ANTHROPIC_API_KEY= GEMINI_API_KEY= +CURSOR_API_KEY= OPENROUTER_API_KEY= MISTRAL_API_KEY= HUGGINGFACE_API_KEY= diff --git a/.github/workflows/e2e-canary.yaml b/.github/workflows/e2e-canary.yaml index 12a287004..728c4f069 100644 --- a/.github/workflows/e2e-canary.yaml +++ b/.github/workflows/e2e-canary.yaml @@ -35,6 +35,7 @@ jobs: BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} diff --git a/.github/workflows/integration-tests.yaml b/.github/workflows/integration-tests.yaml index 7b7b88ef3..5e5cfcbbe 100644 --- a/.github/workflows/integration-tests.yaml +++ b/.github/workflows/integration-tests.yaml @@ -29,6 +29,7 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }} HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }} @@ -58,6 +59,7 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} @@ -110,6 +112,7 @@ jobs: ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }} COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }} + CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }} GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }} OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }} diff --git a/e2e/README.md b/e2e/README.md index b4a15d56e..3ed3d4b1b 100644 --- a/e2e/README.md +++ b/e2e/README.md @@ -115,6 +115,7 @@ Non-hermetic scenarios require provider credentials in addition to the mock Brai - `OPENAI_API_KEY` - `ANTHROPIC_API_KEY` - `GEMINI_API_KEY` or `GOOGLE_API_KEY` +- `CURSOR_API_KEY` - `OPENROUTER_API_KEY` - `MISTRAL_API_KEY` - `HUGGINGFACE_API_KEY` diff --git a/e2e/config/pr-comment-scenarios.json b/e2e/config/pr-comment-scenarios.json index bee0040c5..0108b9e78 100644 --- a/e2e/config/pr-comment-scenarios.json +++ b/e2e/config/pr-comment-scenarios.json @@ -133,5 +133,11 @@ "label": "v0.2.81" } ] + }, + { + "scenarioDirName": "cursor-sdk-instrumentation", + "label": "Cursor SDK Instrumentation", + "metadataScenario": "cursor-sdk-instrumentation", + "variants": [{ "variantKey": "cursor-sdk-v1", "label": "v1" }] } ] diff --git a/e2e/helpers/scenario-installer.ts b/e2e/helpers/scenario-installer.ts index 29318aeff..9eb7b19c2 100644 --- a/e2e/helpers/scenario-installer.ts +++ b/e2e/helpers/scenario-installer.ts @@ -22,6 +22,7 @@ const INSTALL_SECRET_ENV_VARS = [ "ANTHROPIC_API_KEY", "BRAINTRUST_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GEMINI_API_KEY", "GITHUB_TOKEN", "GH_TOKEN", @@ -37,10 +38,9 @@ let cleanupRegistered = false; type CanaryDependencyRule = { packageName: string; - query: string; + version: string; }; -const canaryVersionCache = new Map(); const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url)); const E2E_ROOT = path.resolve(HELPERS_DIR, ".."); @@ -155,32 +155,6 @@ function packageSpecifier( : `npm:${packageName}@${version}`; } -async function resolveCanaryVersion( - rule: CanaryDependencyRule, -): Promise { - const cacheKey = rule.query; - const cached = canaryVersionCache.get(cacheKey); - if (cached) { - return cached; - } - - const output = await spawnOrThrow( - PNPM_COMMAND, - ["view", rule.query, "version", "--json"], - process.cwd(), - installEnv(), - ); - const parsed = JSON.parse(output) as string | string[]; - const version = Array.isArray(parsed) ? parsed.at(-1) : parsed; - - if (typeof version !== "string") { - throw new Error(`Could not resolve canary version for ${rule.query}`); - } - - canaryVersionCache.set(cacheKey, version); - return version; -} - function parseCanaryDependencyRule( dependencyName: string, rawRule: string, @@ -195,7 +169,7 @@ function parseCanaryDependencyRule( if (rawRule === "latest") { return { packageName: dependencyName, - query: dependencyName, + version: "latest", }; } @@ -208,7 +182,7 @@ function parseCanaryDependencyRule( return { packageName: rawRule.slice(0, versionSeparator), - query: rawRule, + version: rawRule.slice(versionSeparator + 1), }; } @@ -230,11 +204,10 @@ async function rewriteManifestForCanary(scenarioDir: string): Promise { rawRule, scenarioDir, ); - const version = await resolveCanaryVersion(rule); dependencies[dependencyName] = packageSpecifier( dependencyName, rule.packageName, - version, + rule.version, ); updated = true; } diff --git a/e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-auto-hook.span-events.json b/e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-auto-hook.span-events.json new file mode 100644 index 000000000..1308581f3 --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-auto-hook.span-events.json @@ -0,0 +1,234 @@ +{ + "conversation": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "resume-conversation" + }, + "metric_keys": [], + "name": "cursor-sdk-resume-conversation-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.agent_id": "", + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "agent.send", + "cursor_sdk.run_id": "", + "cursor_sdk.status": "finished", + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "duration" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "prompt": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "prompt" + }, + "metric_keys": [], + "name": "cursor-sdk-prompt-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "Agent.prompt", + "cursor_sdk.run_id": "", + "cursor_sdk.runtime": "local", + "cursor_sdk.status": "finished", + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "duration" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "cursor-sdk-instrumentation" + }, + "metric_keys": [], + "name": "cursor-sdk-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "stream": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream" + }, + "metric_keys": [], + "name": "cursor-sdk-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "subagent_task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.tool.status": "completed", + "gen_ai.tool.name": "task" + }, + "metric_keys": [], + "name": "Agent: ", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "subagent_tool": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.tool.status": "completed", + "gen_ai.tool.name": "task" + }, + "metric_keys": [], + "name": "tool: task", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.agent_id": "", + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "agent.send", + "cursor_sdk.run_id": "", + "cursor_sdk.status": "finished", + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "duration" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.tool.status": "completed", + "gen_ai.tool.name": "shell" + }, + "metric_keys": [], + "name": "tool: shell", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "wait": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "wait" + }, + "metric_keys": [], + "name": "cursor-sdk-wait-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.agent_id": "", + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "agent.send", + "cursor_sdk.run_id": "", + "cursor_sdk.status": "finished", + "cursor_sdk.step_types": [ + "assistantMessage" + ], + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "completion_tokens", + "cursor_sdk.delta_tokens", + "cursor_sdk.step_duration_ms", + "cursor_sdk.steps", + "duration", + "prompt_cache_creation_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "tokens" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + } +} diff --git a/e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-wrapped.span-events.json b/e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-wrapped.span-events.json new file mode 100644 index 000000000..1308581f3 --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/__snapshots__/cursor-sdk-v1-wrapped.span-events.json @@ -0,0 +1,234 @@ +{ + "conversation": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "resume-conversation" + }, + "metric_keys": [], + "name": "cursor-sdk-resume-conversation-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.agent_id": "", + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "agent.send", + "cursor_sdk.run_id": "", + "cursor_sdk.status": "finished", + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "duration" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "prompt": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "prompt" + }, + "metric_keys": [], + "name": "cursor-sdk-prompt-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "Agent.prompt", + "cursor_sdk.run_id": "", + "cursor_sdk.runtime": "local", + "cursor_sdk.status": "finished", + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "duration" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + }, + "root": { + "has_input": false, + "has_output": false, + "metadata": { + "scenario": "cursor-sdk-instrumentation" + }, + "metric_keys": [], + "name": "cursor-sdk-root", + "root_span_id": "", + "span_id": "", + "span_parents": [], + "type": "task" + }, + "stream": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "stream" + }, + "metric_keys": [], + "name": "cursor-sdk-stream-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "subagent_task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.tool.status": "completed", + "gen_ai.tool.name": "task" + }, + "metric_keys": [], + "name": "Agent: ", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "subagent_tool": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.tool.status": "completed", + "gen_ai.tool.name": "task" + }, + "metric_keys": [], + "name": "tool: task", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.agent_id": "", + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "agent.send", + "cursor_sdk.run_id": "", + "cursor_sdk.status": "finished", + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "duration" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + }, + "tool": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.tool.status": "completed", + "gen_ai.tool.name": "shell" + }, + "metric_keys": [], + "name": "tool: shell", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "tool" + } + }, + "wait": { + "operation": { + "has_input": false, + "has_output": false, + "metadata": { + "operation": "wait" + }, + "metric_keys": [], + "name": "cursor-sdk-wait-operation", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": null + }, + "task": { + "has_input": true, + "has_output": true, + "metadata": { + "cursor_sdk.agent_id": "", + "cursor_sdk.model": "composer-2", + "cursor_sdk.operation": "agent.send", + "cursor_sdk.run_id": "", + "cursor_sdk.status": "finished", + "cursor_sdk.step_types": [ + "assistantMessage" + ], + "model": "composer-2", + "provider": "cursor" + }, + "metric_keys": [ + "completion_tokens", + "cursor_sdk.delta_tokens", + "cursor_sdk.step_duration_ms", + "cursor_sdk.steps", + "duration", + "prompt_cache_creation_tokens", + "prompt_cached_tokens", + "prompt_tokens", + "tokens" + ], + "name": "Cursor Agent", + "root_span_id": "", + "span_id": "", + "span_parents": [ + "" + ], + "type": "task" + } + } +} diff --git a/e2e/scenarios/cursor-sdk-instrumentation/assertions.ts b/e2e/scenarios/cursor-sdk-instrumentation/assertions.ts new file mode 100644 index 000000000..85016fb83 --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/assertions.ts @@ -0,0 +1,301 @@ +import { beforeAll, describe, expect, test } from "vitest"; +import { normalizeForSnapshot, type Json } from "../../helpers/normalize"; +import type { CapturedLogEvent } from "../../helpers/mock-braintrust-server"; +import { + formatJsonFileSnapshot, + resolveFileSnapshotPath, +} from "../../helpers/file-snapshot"; +import { withScenarioHarness } from "../../helpers/scenario-harness"; +import { + findAllSpans, + findChildSpans, + findLatestSpan, +} from "../../helpers/trace-selectors"; +import { summarizeWrapperContract } from "../../helpers/wrapper-contract"; +import { ROOT_NAME, SCENARIO_NAME } from "./scenario.impl.mjs"; + +type RunCursorSDKScenario = (harness: { + runNodeScenarioDir: (options: { + entry: string; + nodeArgs: string[]; + runContext?: { variantKey: string }; + scenarioDir: string; + timeoutMs: number; + }) => Promise; + runScenarioDir: (options: { + entry: string; + runContext?: { variantKey: string }; + scenarioDir: string; + timeoutMs: number; + }) => Promise; +}) => Promise; + +const METADATA_KEYS = [ + "provider", + "model", + "operation", + "scenario", + "gen_ai.tool.name", + "cursor_sdk.model", + "cursor_sdk.operation", + "cursor_sdk.agent_id", + "cursor_sdk.run_id", + "cursor_sdk.runtime", + "cursor_sdk.status", + "cursor_sdk.duration_ms", + "cursor_sdk.step_types", + "cursor_sdk.tool.status", +] as const; + +function summarizeSpan(event: CapturedLogEvent | undefined): Json { + if (!event) { + return null; + } + const summary = summarizeWrapperContract(event, [...METADATA_KEYS]) as Record< + string, + Json + >; + if (summary.metadata && typeof summary.metadata === "object") { + const metadata = summary.metadata as Record; + if (typeof metadata["cursor_sdk.agent_id"] === "string") { + metadata["cursor_sdk.agent_id"] = ""; + } + if (typeof metadata["cursor_sdk.run_id"] === "string") { + metadata["cursor_sdk.run_id"] = ""; + } + if (typeof metadata["cursor_sdk.duration_ms"] === "number") { + metadata["cursor_sdk.duration_ms"] = 1; + } + } + if (typeof event.row.error === "string") { + summary.error = event.row.error; + } + if (typeof summary.name === "string" && summary.name.startsWith("Agent:")) { + summary.name = "Agent: "; + } + return summary; +} + +function findOperation(events: CapturedLogEvent[], name: string) { + return findLatestSpan(events, name); +} + +function findCursorTask(events: CapturedLogEvent[], operationName: string) { + const operation = findOperation(events, operationName); + return findChildSpans(events, "Cursor Agent", operation?.span.id).at(-1); +} + +function findSubagentTool( + events: CapturedLogEvent[], + parentId: string | undefined, +) { + if (!parentId) { + return undefined; + } + return [...events] + .reverse() + .find( + (event) => + event.span.type === "tool" && + event.span.parentIds.includes(parentId) && + ["tool: Agent", "tool: Task", "tool: task"].includes( + event.span.name ?? "", + ), + ); +} + +function findSubagentTask( + events: CapturedLogEvent[], + parentId: string | undefined, +) { + if (!parentId) { + return undefined; + } + return [...events] + .reverse() + .find( + (event) => + event.span.type === "task" && + event.span.parentIds.includes(parentId) && + event.span.name?.startsWith("Agent:"), + ); +} + +function outputText(event: CapturedLogEvent | undefined): string { + return typeof event?.output === "string" ? event.output : ""; +} + +function summarize(events: CapturedLogEvent[]): Json { + const promptTask = findCursorTask(events, "cursor-sdk-prompt-operation"); + const streamTask = findCursorTask(events, "cursor-sdk-stream-operation"); + const waitTask = findCursorTask(events, "cursor-sdk-wait-operation"); + const conversationTask = findCursorTask( + events, + "cursor-sdk-resume-conversation-operation", + ); + const tool = findAllSpans(events, "tool: shell").at(-1); + const subagentTool = findSubagentTool(events, streamTask?.span.id); + const subagentTask = findSubagentTask(events, subagentTool?.span.id); + + return normalizeForSnapshot({ + conversation: { + operation: summarizeSpan( + findOperation(events, "cursor-sdk-resume-conversation-operation"), + ), + task: summarizeSpan(conversationTask), + }, + prompt: { + operation: summarizeSpan( + findOperation(events, "cursor-sdk-prompt-operation"), + ), + task: summarizeSpan(promptTask), + }, + root: summarizeSpan(findLatestSpan(events, ROOT_NAME)), + stream: { + operation: summarizeSpan( + findOperation(events, "cursor-sdk-stream-operation"), + ), + subagent_task: summarizeSpan(subagentTask), + subagent_tool: summarizeSpan(subagentTool), + task: summarizeSpan(streamTask), + tool: summarizeSpan(tool), + }, + wait: { + operation: summarizeSpan( + findOperation(events, "cursor-sdk-wait-operation"), + ), + task: summarizeSpan(waitTask), + }, + } as Json); +} + +export function defineCursorSDKInstrumentationAssertions(options: { + name: string; + runScenario: RunCursorSDKScenario; + snapshotName: string; + testFileUrl: string; + timeoutMs: number; +}): void { + const snapshotPath = resolveFileSnapshotPath( + options.testFileUrl, + `${options.snapshotName}.span-events.json`, + ); + const testConfig = { timeout: options.timeoutMs }; + + describe(options.name, () => { + let events: CapturedLogEvent[] = []; + + beforeAll(async () => { + await withScenarioHarness(async (harness) => { + await options.runScenario(harness); + events = harness.events(); + }); + }, options.timeoutMs); + + test("captures the root trace", testConfig, () => { + const root = findLatestSpan(events, ROOT_NAME); + + expect(root).toBeDefined(); + expect(root?.row.metadata).toMatchObject({ scenario: SCENARIO_NAME }); + }); + + test( + "captures Cursor Agent task spans for run-producing APIs", + testConfig, + () => { + for (const operationName of [ + "cursor-sdk-prompt-operation", + "cursor-sdk-stream-operation", + "cursor-sdk-wait-operation", + "cursor-sdk-resume-conversation-operation", + ]) { + const operation = findOperation(events, operationName); + const task = findCursorTask(events, operationName); + + expect(operation).toBeDefined(); + expect(task).toBeDefined(); + expect(task?.span.parentIds).toEqual([operation?.span.id ?? ""]); + expect(task?.row.metadata).toMatchObject({ + provider: "cursor", + }); + } + }, + ); + + test( + "captures tool spans when Cursor surfaces tool calls", + testConfig, + () => { + const streamTask = findCursorTask( + events, + "cursor-sdk-stream-operation", + ); + const toolSpans = events.filter( + (event) => + event.span.type === "tool" && + event.span.parentIds.includes(streamTask?.span.id ?? ""), + ); + + expect(toolSpans.length).toBeGreaterThan(0); + expect( + toolSpans.some( + (event) => + event.input !== undefined && + event.output !== undefined && + event.metadata?.["cursor_sdk.tool.status"] === "completed", + ), + ).toBe(true); + expect( + JSON.stringify(toolSpans.map((event) => event.output)), + ).toContain("cursor_tool_ok"); + }, + ); + + test("captures subagent spans when Cursor uses agents", testConfig, () => { + const streamTask = findCursorTask(events, "cursor-sdk-stream-operation"); + const subagentTool = findSubagentTool(events, streamTask?.span.id); + const subagentTask = findSubagentTask(events, subagentTool?.span.id); + + expect(subagentTool).toBeDefined(); + expect(subagentTool?.metadata).toMatchObject({ + "cursor_sdk.tool.status": "completed", + }); + expect(subagentTask).toBeDefined(); + expect(subagentTask?.span.rootId).toBe(streamTask?.span.rootId); + expect(subagentTask?.metadata).toMatchObject({ + "cursor_sdk.tool.status": "completed", + }); + expect(subagentTask?.output).toBeDefined(); + }); + + test("preserves user onDelta/onStep callbacks", testConfig, () => { + expect(findLatestSpan(events, "cursor-sdk-user-on-delta")).toBeDefined(); + expect(findLatestSpan(events, "cursor-sdk-user-on-step")).toBeDefined(); + + const waitTask = findCursorTask(events, "cursor-sdk-wait-operation"); + expect(waitTask?.metrics).toMatchObject({ + completion_tokens: expect.any(Number), + prompt_tokens: expect.any(Number), + }); + expect(waitTask?.metrics?.["cursor_sdk.step_duration_ms"]).toEqual( + expect.any(Number), + ); + expect(outputText(waitTask)).toContain("CURSOR_WAIT_OK"); + }); + + test("captures conversation output text", testConfig, () => { + const conversationTask = findCursorTask( + events, + "cursor-sdk-resume-conversation-operation", + ); + + expect(outputText(conversationTask)).toContain("CURSOR_CONVERSATION_OK"); + }); + + test("matches the shared span snapshot", testConfig, async () => { + await expect( + formatJsonFileSnapshot(summarize(events)), + ).toMatchFileSnapshot(snapshotPath); + }); + }); +} diff --git a/e2e/scenarios/cursor-sdk-instrumentation/package.json b/e2e/scenarios/cursor-sdk-instrumentation/package.json new file mode 100644 index 000000000..44193c30f --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/package.json @@ -0,0 +1,19 @@ +{ + "name": "@braintrust/e2e-cursor-sdk-instrumentation", + "private": true, + "braintrustScenario": { + "canary": { + "dependencies": { + "cursor-sdk-v1": "@cursor/sdk@latest" + } + } + }, + "dependencies": { + "cursor-sdk-v1": "npm:@cursor/sdk@1.0.10" + }, + "pnpm": { + "onlyBuiltDependencies": [ + "sqlite3" + ] + } +} diff --git a/e2e/scenarios/cursor-sdk-instrumentation/pnpm-lock.yaml b/e2e/scenarios/cursor-sdk-instrumentation/pnpm-lock.yaml new file mode 100644 index 000000000..2917de861 --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/pnpm-lock.yaml @@ -0,0 +1,1159 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + cursor-sdk-v1: + specifier: npm:@cursor/sdk@1.0.10 + version: '@cursor/sdk@1.0.10' + +packages: + + '@bufbuild/protobuf@1.10.0': + resolution: {integrity: sha512-QDdVFLoN93Zjg36NoQPZfsVH9tZew7wKDKyV5qRdj8ntT4wQCOradQjRaTdwMhWUYsgKsvCINKKm87FdEk96Ag==} + + '@connectrpc/connect-node@1.7.0': + resolution: {integrity: sha512-6vaPIkG/NyhxlYgytLoR9KYbPhczEboFB2OYWkA9qvUz1K7efXfeGrlRxoLtpa+r8VxyIOw73w5ktNe743nD+A==} + engines: {node: '>=16.0.0'} + peerDependencies: + '@bufbuild/protobuf': ^1.10.0 + '@connectrpc/connect': 1.7.0 + + '@connectrpc/connect@1.7.0': + resolution: {integrity: sha512-iNKdJRi69YP3mq6AePRT8F/HrxWCewrhxnLMNm0vpqXAR8biwzRtO6Hjx80C6UvtKJ5sFmffQT7I4Baecz389w==} + peerDependencies: + '@bufbuild/protobuf': ^1.10.0 + + '@cursor/sdk-darwin-arm64@1.0.10': + resolution: {integrity: sha512-uwNhyH2fyJhiSHhgWlozeuelBMyjotVN7jmqrPxaBR2Qii4JYmuhlNvo4fiNhojvLjC5EMF1pnM5tr+Uyt/G1g==} + cpu: [arm64] + os: [darwin] + + '@cursor/sdk-darwin-x64@1.0.10': + resolution: {integrity: sha512-Z0IVJB5cfyQ3lHz9MEjyH8bnmpaLRx/eh1E6MKC95lLr5K+1jPITsKgK3P9NwhIl1kc0NEA/z90mXxDOWoc2fg==} + cpu: [x64] + os: [darwin] + + '@cursor/sdk-linux-arm64@1.0.10': + resolution: {integrity: sha512-443sB9wDmlsdMDSgcGbmaNf5H+3IoIFhnmxSACFXbdFYNYj4U6e1TWxJqpl/FI/MjTodQFNQBvGbUo6SUcwj8w==} + cpu: [arm64] + os: [linux] + + '@cursor/sdk-linux-x64@1.0.10': + resolution: {integrity: sha512-elRt/lsH6xw1LyD4HcPAJINk5q7Apj4F68lmemb0UZOC01w5PfHsjUkURg7CkPWL7PmNgUjxXTaQe3EdEq8now==} + cpu: [x64] + os: [linux] + + '@cursor/sdk-win32-x64@1.0.10': + resolution: {integrity: sha512-5Fyb7aZYnSPRQPg/reHpwEw8SDhJHg1W+ARyDCByysI2II59RFqqBdlDay7iwUCKaziemuebFK5KNSVt8WlYTA==} + cpu: [x64] + os: [win32] + + '@cursor/sdk@1.0.10': + resolution: {integrity: sha512-j2y2sbDBgxMPZqXWUyCRfzatpD4h0Vg4SLvVLBV+j65A8m+e9gTdrSUK3eaUdIs9IAAZe1gngP2aOKMw6/tq+Q==} + engines: {node: '>=18'} + + '@fastify/busboy@2.1.1': + resolution: {integrity: sha512-vBZP4NlzfOlerQTnba4aqZoMhE/a9HY7HRqoOPaETQcSQuWEIyZMHGfVu6w9wGtGK5fED5qRs2DteVCjOH60sA==} + engines: {node: '>=14'} + + '@gar/promisify@1.1.3': + resolution: {integrity: sha512-k2Ty1JcVojjJFwrg/ThKi2ujJ7XNLYaFGNB/bWT9wGR+oSMJHMa5w+CUq6p/pVrKeNNgA7pCqEcjSnHVoqJQFw==} + + '@npmcli/fs@1.1.1': + resolution: {integrity: sha512-8KG5RD0GVP4ydEzRn/I4BNDuxDtqVbOdm8675T49OIG/NGhaK0pjPX7ZcDlvKYbA+ulvVK3ztfcF4uBdOxuJbQ==} + + '@npmcli/move-file@1.1.2': + resolution: {integrity: sha512-1SUf/Cg2GzGDyaf15aR9St9TWlb+XvbZXWpDx8YKs7MLzMH/BCeopv+y9vzrzgkfykCGuWOlSu3mZhj2+FQcrg==} + engines: {node: '>=10'} + deprecated: This functionality has been moved to @npmcli/fs + + '@statsig/client-core@3.31.0': + resolution: {integrity: sha512-SuxQD6TmVszPG7FoMKwTk/uyBuVFk7XnxI3T/E0uyb7PL7GNjONtfsoh+NqBBVUJVse0CUeSFfgJPoZy1ZOslQ==} + + '@statsig/js-client@3.31.0': + resolution: {integrity: sha512-LFa5E0LjT6sTfZv3sNGoyRLSZ1078+agdgOA+Vm1ecjG+KbSOfBLTW7hMwimrJ29slRwbYDzbtKaPJo/R37N2g==} + + '@tootallnate/once@1.1.2': + resolution: {integrity: sha512-RbzJvlNzmRq5c3O09UipeuXno4tA1FE6ikOjxZK0tuxVv3412l64l5t1W5pj4+rJq9vpkm/kwiR07aZXnsKPxw==} + engines: {node: '>= 6'} + + abbrev@1.1.1: + resolution: {integrity: sha512-nne9/IiQ/hzIhY6pdDnbBtz7DjPTKrY00P/zvPSm5pOFkl6xuGrGnXn/VtTNNfNtAfZ9/1RtehkszU9qcTii0Q==} + + agent-base@6.0.2: + resolution: {integrity: sha512-RZNwNclF7+MS/8bDg70amg32dyeZGZxiDuQmZxKLAlQjr3jGyLx+4Kkk58UO7D2QdgFIQCovuSuZESne6RG6XQ==} + engines: {node: '>= 6.0.0'} + + agentkeepalive@4.6.0: + resolution: {integrity: sha512-kja8j7PjmncONqaTsB8fQ+wE2mSU2DJ9D4XKoJ5PFWIdRMa6SLSN1ff4mOr4jCbfRSsxR4keIiySJU0N9T5hIQ==} + engines: {node: '>= 8.0.0'} + + aggregate-error@3.1.0: + resolution: {integrity: sha512-4I7Td01quW/RpocfNayFdFVk1qSuoh0E7JrbRJ16nH01HhKFQ88INq9Sd+nd72zqRySlr9BmDA8xlEJ6vJMrYA==} + engines: {node: '>=8'} + + ansi-regex@5.0.1: + resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} + engines: {node: '>=8'} + + aproba@2.1.0: + resolution: {integrity: sha512-tLIEcj5GuR2RSTnxNKdkK0dJ/GrC7P38sUkiDmDuHfsHmbagTFAxDVIBltoklXEVIQ/f14IL8IMJ5pn9Hez1Ew==} + + are-we-there-yet@3.0.1: + resolution: {integrity: sha512-QZW4EDmGwlYur0Yyf/b2uGucHQMa8aFUP7eu9ddR73vvhFyt4V0Vl3QHPcTNJ8l6qYOBdxgXdnBXQrHilfRQBg==} + engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + deprecated: This package is no longer supported. + + balanced-match@1.0.2: + resolution: {integrity: sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==} + + base64-js@1.5.1: + resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} + + bindings@1.5.0: + resolution: {integrity: sha512-p2q/t/mhvuOj/UeLlV6566GD/guowlr0hHxClI0W9m7MWYkL1F0hLo+0Aexs9HSPCtR1SXQ0TD3MMKrXZajbiQ==} + + bl@4.1.0: + resolution: {integrity: sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w==} + + brace-expansion@1.1.14: + resolution: {integrity: sha512-MWPGfDxnyzKU7rNOW9SP/c50vi3xrmrua/+6hfPbCS2ABNWfx24vPidzvC7krjU/RTo235sV776ymlsMtGKj8g==} + + buffer@5.7.1: + resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} + + cacache@15.3.0: + resolution: {integrity: sha512-VVdYzXEn+cnbXpFgWs5hTT7OScegHVmLhJIR8Ufqk3iFD6A6j5iSX1KuBTfNEv4tdJWE2PzA6IVFtcLC7fN9wQ==} + engines: {node: '>= 10'} + + chownr@1.1.4: + resolution: {integrity: sha512-jJ0bqzaylmJtVnNgzTeSOs8DPavpbYgEr/b0YL8/2GO3xJEhInFmhKMUnEJQjZumK7KXGFhUy89PrsJWlakBVg==} + + chownr@2.0.0: + resolution: {integrity: sha512-bIomtDF5KGpdogkLd9VspvFzk9KfpyyGlS8YFVZl7TGPBHL5snIOnxeshwVgPteQ9b4Eydl+pVbIyE1DcvCWgQ==} + engines: {node: '>=10'} + + clean-stack@2.2.0: + resolution: {integrity: sha512-4diC9HaTE+KRAMWhDhrGOECgWZxoevMc5TlkObMqNSsVU62PYzXZ/SMTjzyGAFF1YusgxGcSWTEXBhp0CPwQ1A==} + engines: {node: '>=6'} + + color-support@1.1.3: + resolution: {integrity: sha512-qiBjkpbMLO/HL68y+lh4q0/O1MZFj2RX6X/KmMa3+gJD3z+WwI1ZzDHysvqHGS3mP6mznPckpXmw1nI9cJjyRg==} + hasBin: true + + concat-map@0.0.1: + resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==} + + console-control-strings@1.1.0: + resolution: {integrity: sha512-ty/fTekppD2fIwRvnZAVdeOiGd1c7YXEixbgJTNzqcxJWKQnjJ/V1bNEEE6hygpM3WjwHFUVK6HTjWSzV4a8sQ==} + + debug@4.4.3: + resolution: {integrity: sha512-RGwwWnwQvkVfavKVt22FGLw+xYSdzARwm0ru6DhTVA3umU5hZc28V3kO4stgYryrTlLpuvgI9GiijltAjNbcqA==} + engines: {node: '>=6.0'} + peerDependencies: + supports-color: '*' + peerDependenciesMeta: + supports-color: + optional: true + + decompress-response@6.0.0: + resolution: {integrity: sha512-aW35yZM6Bb/4oJlZncMH2LCoZtJXTRxES17vE3hoRiowU2kWHaJKFkSBDnDR+cm9J+9QhXmREyIfv0pji9ejCQ==} + engines: {node: '>=10'} + + deep-extend@0.6.0: + resolution: {integrity: sha512-LOHxIOaPYdHlJRtCQfDIVZtfw/ufM8+rVj649RIHzcm/vGwQRXFt6OPqIFWsm2XEMrNIEtWR64sY1LEKD2vAOA==} + engines: {node: '>=4.0.0'} + + delegates@1.0.0: + resolution: {integrity: sha512-bd2L678uiWATM6m5Z1VzNCErI3jiGzt6HGY8OVICs40JQq/HALfbyNJmp0UDakEY4pMMaN0Ly5om/B1VI/+xfQ==} + + detect-libc@2.1.2: + resolution: {integrity: sha512-Btj2BOOO83o3WyH59e8MgXsxEQVcarkUOpEYrubB0urwnN10yQ364rsiByU11nZlqWYZm05i/of7io4mzihBtQ==} + engines: {node: '>=8'} + + emoji-regex@8.0.0: + resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} + + encoding@0.1.13: + resolution: {integrity: sha512-ETBauow1T35Y/WZMkio9jiM0Z5xjHHmJ4XmjZOq1l/dXz3lr2sRn87nJy20RupqSh1F2m3HHPSp8ShIPQJrJ3A==} + + end-of-stream@1.4.5: + resolution: {integrity: sha512-ooEGc6HP26xXq/N+GCGOT0JKCLDGrq2bQUZrQ7gyrJiZANJ/8YDTxTpQBXGMn+WbIQXNVpyWymm7KYVICQnyOg==} + + env-paths@2.2.1: + resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==} + engines: {node: '>=6'} + + err-code@2.0.3: + resolution: {integrity: sha512-2bmlRpNKBxT/CRmPOlyISQpNj+qSeYvcym/uT0Jx2bMOlKLtSy1ZmLuVxSEKKyor/N5yhvp/ZiG1oE3DEYMSFA==} + + expand-template@2.0.3: + resolution: {integrity: sha512-XYfuKMvj4O35f/pOXLObndIRvyQ+/+6AhODh+OKWj9S9498pHHn/IMszH+gt0fBCRWMNfk1ZSp5x3AifmnI2vg==} + engines: {node: '>=6'} + + file-uri-to-path@1.0.0: + resolution: {integrity: sha512-0Zt+s3L7Vf1biwWZ29aARiVYLx7iMGnEUl9x33fbB/j3jR81u/O2LbqK+Bm1CDSNDKVtJ/YjwY7TUd5SkeLQLw==} + + fs-constants@1.0.0: + resolution: {integrity: sha512-y6OAwoSIf7FyjMIv94u+b5rdheZEjzR63GTyZJm5qh4Bi+2YgwLCcI/fPFZkL5PSixOt6ZNKm+w+Hfp/Bciwow==} + + fs-minipass@2.1.0: + resolution: {integrity: sha512-V/JgOLFCS+R6Vcq0slCuaeWEdNC3ouDlJMNIsacH2VtALiu9mV4LPrHc5cDl8k5aw6J8jwgWWpiTo5RYhmIzvg==} + engines: {node: '>= 8'} + + fs.realpath@1.0.0: + resolution: {integrity: sha512-OO0pH2lK6a0hZnAdau5ItzHPI6pUlvI7jMVnxUQRtw4owF2wk8lOSabtGDCTP4Ggrg2MbGnWO9X8K1t4+fGMDw==} + + gauge@4.0.4: + resolution: {integrity: sha512-f9m+BEN5jkg6a0fZjleidjN51VE1X+mPFQ2DJ0uv1V39oCLCbsGe6yjbBnp7eK7z/+GAon99a3nHuqbuuthyPg==} + engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + deprecated: This package is no longer supported. + + github-from-package@0.0.0: + resolution: {integrity: sha512-SyHy3T1v2NUXn29OsWdxmK6RwHD+vkj3v8en8AOBZ1wBQ/hCAQ5bAQTD02kW4W9tUp/3Qh6J8r9EvntiyCmOOw==} + + glob@7.2.3: + resolution: {integrity: sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q==} + deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me + + graceful-fs@4.2.11: + resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} + + has-unicode@2.0.1: + resolution: {integrity: sha512-8Rf9Y83NBReMnx0gFzA8JImQACstCYWUplepDa9xprwwtmgEZUF0h/i5xSA625zB/I37EtrswSST6OXxwaaIJQ==} + + http-cache-semantics@4.2.0: + resolution: {integrity: sha512-dTxcvPXqPvXBQpq5dUr6mEMJX4oIEFv6bwom3FDwKRDsuIjjJGANqhBuoAn9c1RQJIdAKav33ED65E2ys+87QQ==} + + http-proxy-agent@4.0.1: + resolution: {integrity: sha512-k0zdNgqWTGA6aeIRVpvfVob4fL52dTfaehylg0Y4UvSySvOq/Y+BOyPrgpUrA7HylqvU8vIZGsRuXmspskV0Tg==} + engines: {node: '>= 6'} + + https-proxy-agent@5.0.1: + resolution: {integrity: sha512-dFcAjpTQFgoLMzC2VwU+C/CbS7uRL0lWmxDITmqm7C+7F0Odmj6s9l6alZc6AELXhrnggM2CeWSXHGOdX2YtwA==} + engines: {node: '>= 6'} + + humanize-ms@1.2.1: + resolution: {integrity: sha512-Fl70vYtsAFb/C06PTS9dZBo7ihau+Tu/DNCk/OyHhea07S+aeMWpFFkUaXRa8fI+ScZbEI8dfSxwY7gxZ9SAVQ==} + + iconv-lite@0.6.3: + resolution: {integrity: sha512-4fCk79wshMdzMp2rH06qWrJE4iolqLhCUH+OiuIgU++RB0+94NlDL81atO7GX55uUKueo0txHNtvEyI6D7WdMw==} + engines: {node: '>=0.10.0'} + + ieee754@1.2.1: + resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} + + imurmurhash@0.1.4: + resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==} + engines: {node: '>=0.8.19'} + + indent-string@4.0.0: + resolution: {integrity: sha512-EdDDZu4A2OyIK7Lr/2zG+w5jmbuk1DVBnEwREQvBzspBJkCEbRa8GxU1lghYcaGJCnRWibjDXlq779X1/y5xwg==} + engines: {node: '>=8'} + + infer-owner@1.0.4: + resolution: {integrity: sha512-IClj+Xz94+d7irH5qRyfJonOdfTzuDaifE6ZPWfx0N0+/ATZCbuTPq2prFl526urkQd90WyUKIh1DfBQ2hMz9A==} + + inflight@1.0.6: + resolution: {integrity: sha512-k92I/b08q4wvFscXCLvqfsHCrjrF7yiXsQuIVvVE7N82W3+aqpzuUdBbfhWcy/FZR3/4IgflMgKLOsvPDrGCJA==} + deprecated: This module is not supported, and leaks memory. Do not use it. Check out lru-cache if you want a good and tested way to coalesce async requests by a key value, which is much more comprehensive and powerful. + + inherits@2.0.4: + resolution: {integrity: sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ==} + + ini@1.3.8: + resolution: {integrity: sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==} + + ip-address@10.1.1: + resolution: {integrity: sha512-1FMu8/N15Ck1BL551Jf42NYIoin2unWjLQ2Fze/DXryJRl5twqtwNHlO39qERGbIOcKYWHdgRryhOC+NG4eaLw==} + engines: {node: '>= 12'} + + is-fullwidth-code-point@3.0.0: + resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} + engines: {node: '>=8'} + + is-lambda@1.0.1: + resolution: {integrity: sha512-z7CMFGNrENq5iFB9Bqo64Xk6Y9sg+epq1myIcdHaGnbMTYOxvzsEtdYqQUylB7LxfkvgrrjP32T6Ywciio9UIQ==} + + isexe@2.0.0: + resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==} + + lru-cache@6.0.0: + resolution: {integrity: sha512-Jo6dJ04CmSjuznwJSS3pUeWmd/H0ffTlkXXgwZi+eq1UCmqQwCh+eLsYOYCwY991i2Fah4h1BEMCx4qThGbsiA==} + engines: {node: '>=10'} + + make-fetch-happen@9.1.0: + resolution: {integrity: sha512-+zopwDy7DNknmwPQplem5lAZX/eCOzSvSNNcSKm5eVwTkOBzoktEfXsa9L23J/GIRhxRsaxzkPEhrJEpE2F4Gg==} + engines: {node: '>= 10'} + + mimic-response@3.1.0: + resolution: {integrity: sha512-z0yWI+4FDrrweS8Zmt4Ej5HdJmky15+L2e6Wgn3+iK5fWzb6T3fhNFq2+MeTRb064c6Wr4N/wv0DzQTjNzHNGQ==} + engines: {node: '>=10'} + + minimatch@3.1.5: + resolution: {integrity: sha512-VgjWUsnnT6n+NUk6eZq77zeFdpW2LWDzP6zFGrCbHXiYNul5Dzqk2HHQ5uFH2DNW5Xbp8+jVzaeNt94ssEEl4w==} + + minimist@1.2.8: + resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==} + + minipass-collect@1.0.2: + resolution: {integrity: sha512-6T6lH0H8OG9kITm/Jm6tdooIbogG9e0tLgpY6mphXSm/A9u8Nq1ryBG+Qspiub9LjWlBPsPS3tWQ/Botq4FdxA==} + engines: {node: '>= 8'} + + minipass-fetch@1.4.1: + resolution: {integrity: sha512-CGH1eblLq26Y15+Azk7ey4xh0J/XfJfrCox5LDJiKqI2Q2iwOLOKrlmIaODiSQS8d18jalF6y2K2ePUm0CmShw==} + engines: {node: '>=8'} + + minipass-flush@1.0.7: + resolution: {integrity: sha512-TbqTz9cUwWyHS2Dy89P3ocAGUGxKjjLuR9z8w4WUTGAVgEj17/4nhgo2Du56i0Fm3Pm30g4iA8Lcqctc76jCzA==} + engines: {node: '>= 8'} + + minipass-pipeline@1.2.4: + resolution: {integrity: sha512-xuIq7cIOt09RPRJ19gdi4b+RiNvDFYe5JH+ggNvBqGqpQXcru3PcRmOZuHBKWK1Txf9+cQ+HMVN4d6z46LZP7A==} + engines: {node: '>=8'} + + minipass-sized@1.0.3: + resolution: {integrity: sha512-MbkQQ2CTiBMlA2Dm/5cY+9SWFEN8pzzOXi6rlM5Xxq0Yqbda5ZQy9sU75a673FE9ZK0Zsbr6Y5iP6u9nktfg2g==} + engines: {node: '>=8'} + + minipass@3.3.6: + resolution: {integrity: sha512-DxiNidxSEK+tHG6zOIklvNOwm3hvCrbUrdtzY74U6HKTJxvIDfOUL5W5P2Ghd3DTkhhKPYGqeNUIh5qcM4YBfw==} + engines: {node: '>=8'} + + minipass@5.0.0: + resolution: {integrity: sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ==} + engines: {node: '>=8'} + + minizlib@2.1.2: + resolution: {integrity: sha512-bAxsR8BVfj60DWXHE3u30oHzfl4G7khkSuPW+qvpd7jFRHm7dLxOjUk1EHACJ/hxLY8phGJ0YhYHZo7jil7Qdg==} + engines: {node: '>= 8'} + + mkdirp-classic@0.5.3: + resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} + + mkdirp@1.0.4: + resolution: {integrity: sha512-vVqVZQyf3WLx2Shd0qJ9xuvqgAyKPLAiqITEtqW0oIUjzo3PePDd6fW9iFz30ef7Ysp/oiWqbhszeGWW2T6Gzw==} + engines: {node: '>=10'} + hasBin: true + + ms@2.1.3: + resolution: {integrity: sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==} + + napi-build-utils@2.0.0: + resolution: {integrity: sha512-GEbrYkbfF7MoNaoh2iGG84Mnf/WZfB0GdGEsM8wz7Expx/LlWf5U8t9nvJKXSp3qr5IsEbK04cBGhol/KwOsWA==} + + negotiator@0.6.4: + resolution: {integrity: sha512-myRT3DiWPHqho5PrJaIRyaMv2kgYf0mUVgBNOYMuCH5Ki1yEiQaf/ZJuQ62nvpc44wL5WDbTX7yGJi1Neevw8w==} + engines: {node: '>= 0.6'} + + node-abi@3.89.0: + resolution: {integrity: sha512-6u9UwL0HlAl21+agMN3YAMXcKByMqwGx+pq+P76vii5f7hTPtKDp08/H9py6DY+cfDw7kQNTGEj/rly3IgbNQA==} + engines: {node: '>=10'} + + node-addon-api@7.1.1: + resolution: {integrity: sha512-5m3bsyrjFWE1xf7nz7YXdN4udnVtXK6/Yfgn5qnahL6bCkf2yKt4k3nuTKAtT4r3IG8JNR2ncsIMdZuAzJjHQQ==} + + node-gyp@8.4.1: + resolution: {integrity: sha512-olTJRgUtAb/hOXG0E93wZDs5YiJlgbXxTwQAFHyNlRsXQnYzUaF2aGgujZbw+hR8aF4ZG/rST57bWMWD16jr9w==} + engines: {node: '>= 10.12.0'} + hasBin: true + + nopt@5.0.0: + resolution: {integrity: sha512-Tbj67rffqceeLpcRXrT7vKAN8CwfPeIBgM7E6iBkmKLV7bEMwpGgYLGv0jACUsECaa/vuxP0IjEont6umdMgtQ==} + engines: {node: '>=6'} + hasBin: true + + npmlog@6.0.2: + resolution: {integrity: sha512-/vBvz5Jfr9dT/aFWd0FIRf+T/Q2WBsLENygUaFUqstqsycmZAP/t5BvFJTK0viFmSUxiUKTUplWy5vt+rvKIxg==} + engines: {node: ^12.13.0 || ^14.15.0 || >=16.0.0} + deprecated: This package is no longer supported. + + once@1.4.0: + resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} + + p-map@4.0.0: + resolution: {integrity: sha512-/bjOqmgETBYB5BoEeGVea8dmvHb2m9GLy1E9W43yeyfP6QQCZGFNa+XRceJEuDB6zqr+gKpIAmlLebMpykw/MQ==} + engines: {node: '>=10'} + + path-is-absolute@1.0.1: + resolution: {integrity: sha512-AVbw3UJ2e9bq64vSaS9Am0fje1Pa8pbGqTTsmXfaIiMpnr5DlDhfJOuLj9Sf95ZPVDAUerDfEk88MPmPe7UCQg==} + engines: {node: '>=0.10.0'} + + prebuild-install@7.1.3: + resolution: {integrity: sha512-8Mf2cbV7x1cXPUILADGI3wuhfqWvtiLA1iclTDbFRZkgRQS0NqsPZphna9V+HyTEadheuPmjaJMsbzKQFOzLug==} + engines: {node: '>=10'} + deprecated: No longer maintained. Please contact the author of the relevant native addon; alternatives are available. + hasBin: true + + promise-inflight@1.0.1: + resolution: {integrity: sha512-6zWPyEOFaQBJYcGMHBKTKJ3u6TBsnMFOIZSa6ce1e/ZrrsOlnHRHbabMjLiBYKp+n44X9eUI6VUPaukCXHuG4g==} + peerDependencies: + bluebird: '*' + peerDependenciesMeta: + bluebird: + optional: true + + promise-retry@2.0.1: + resolution: {integrity: sha512-y+WKFlBR8BGXnsNlIHFGPZmyDf3DFMoLhaflAnyZgV6rG6xu+JwesTo2Q9R6XwYmtmwAFCkAk3e35jEdoeh/3g==} + engines: {node: '>=10'} + + pump@3.0.4: + resolution: {integrity: sha512-VS7sjc6KR7e1ukRFhQSY5LM2uBWAUPiOPa/A3mkKmiMwSmRFUITt0xuj+/lesgnCv+dPIEYlkzrcyXgquIHMcA==} + + rc@1.2.8: + resolution: {integrity: sha512-y3bGgqKj3QBdxLbLkomlohkvsA8gdAiUQlSBJnBhfn+BPxg4bc62d8TcBW15wavDfgexCgccckhcZvywyQYPOw==} + hasBin: true + + readable-stream@3.6.2: + resolution: {integrity: sha512-9u/sniCrY3D5WdsERHzHE4G2YCXqoG5FTHUiCC4SIbr6XcLZBY05ya9EKjYek9O5xOAwjGq+1JdGBAS7Q9ScoA==} + engines: {node: '>= 6'} + + retry@0.12.0: + resolution: {integrity: sha512-9LkiTwjUh6rT555DtE9rTX+BKByPfrMzEAtnlEtdEwr3Nkffwiihqe2bWADg+OQRjt9gl6ICdmB/ZFDCGAtSow==} + engines: {node: '>= 4'} + + rimraf@3.0.2: + resolution: {integrity: sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA==} + deprecated: Rimraf versions prior to v4 are no longer supported + hasBin: true + + safe-buffer@5.2.1: + resolution: {integrity: sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ==} + + safer-buffer@2.1.2: + resolution: {integrity: sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg==} + + semver@7.7.4: + resolution: {integrity: sha512-vFKC2IEtQnVhpT78h1Yp8wzwrf8CM+MzKMHGJZfBtzhZNycRFnXsHk6E5TxIkkMsgNS7mdX3AGB7x2QM2di4lA==} + engines: {node: '>=10'} + hasBin: true + + set-blocking@2.0.0: + resolution: {integrity: sha512-KiKBS8AnWGEyLzofFfmvKwpdPzqiy16LvQfK3yv/fVH7Bj13/wl3JSR1J+rfgRE9q7xUJK4qvgS8raSOeLUehw==} + + signal-exit@3.0.7: + resolution: {integrity: sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ==} + + simple-concat@1.0.1: + resolution: {integrity: sha512-cSFtAPtRhljv69IK0hTVZQ+OfE9nePi/rtJmw5UjHeVyVroEqJXP1sFztKUy1qU+xvz3u/sfYJLa947b7nAN2Q==} + + simple-get@4.0.1: + resolution: {integrity: sha512-brv7p5WgH0jmQJr1ZDDfKDOSeWWg+OVypG99A/5vYGPqJ6pxiaHLy8nxtFjBA7oMa01ebA9gfh1uMCFqOuXxvA==} + + smart-buffer@4.2.0: + resolution: {integrity: sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==} + engines: {node: '>= 6.0.0', npm: '>= 3.0.0'} + + socks-proxy-agent@6.2.1: + resolution: {integrity: sha512-a6KW9G+6B3nWZ1yB8G7pJwL3ggLy1uTzKAgCb7ttblwqdz9fMGJUuTy3uFzEP48FAs9FLILlmzDlE2JJhVQaXQ==} + engines: {node: '>= 10'} + + socks@2.8.8: + resolution: {integrity: sha512-NlGELfPrgX2f1TAAcz0WawlLn+0r3FyhhCRpFFK2CemXenPYvzMWWZINv3eDNo9ucdwme7oCHRY0Jnbs4aIkog==} + engines: {node: '>= 10.0.0', npm: '>= 3.0.0'} + + sqlite3@5.1.7: + resolution: {integrity: sha512-GGIyOiFaG+TUra3JIfkI/zGP8yZYLPQ0pl1bH+ODjiX57sPhrLU5sQJn1y9bDKZUFYkX1crlrPfSYt0BKKdkog==} + + ssri@8.0.1: + resolution: {integrity: sha512-97qShzy1AiyxvPNIkLWoGua7xoQzzPjQ0HAH4B0rWKo7SZ6USuPcrUiAFrws0UH8RrbWmgq3LMTObhPIHbbBeQ==} + engines: {node: '>= 8'} + + string-width@4.2.3: + resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} + engines: {node: '>=8'} + + string_decoder@1.3.0: + resolution: {integrity: sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA==} + + strip-ansi@6.0.1: + resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} + engines: {node: '>=8'} + + strip-json-comments@2.0.1: + resolution: {integrity: sha512-4gB8na07fecVVkOI6Rs4e7T6NOTki5EmL7TUduTs6bu3EdnSycntVJ4re8kgZA+wx9IueI2Y11bfbgwtzuE0KQ==} + engines: {node: '>=0.10.0'} + + tar-fs@2.1.4: + resolution: {integrity: sha512-mDAjwmZdh7LTT6pNleZ05Yt65HC3E+NiQzl672vQG38jIrehtJk/J3mNwIg+vShQPcLF/LV7CMnDW6vjj6sfYQ==} + + tar-stream@2.2.0: + resolution: {integrity: sha512-ujeqbceABgwMZxEJnk2HDY2DlnUZ+9oEcb1KzTVfYHio0UE6dG71n60d8D2I4qNvleWrrXpmjpt7vZeF1LnMZQ==} + engines: {node: '>=6'} + + tar@6.2.1: + resolution: {integrity: sha512-DZ4yORTwrbTj/7MZYq2w+/ZFdI6OZ/f9SFHR+71gIVUZhOQPHzVCLpvRnPgyaMpfWxxk/4ONva3GQSyNIKRv6A==} + engines: {node: '>=10'} + deprecated: Old versions of tar are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me + + tunnel-agent@0.6.0: + resolution: {integrity: sha512-McnNiV1l8RYeY8tBgEpuodCC1mLUdbSN+CYBL7kJsJNInOP8UjDDEwdk6Mw60vdLLrr5NHKZhMAOSrR2NZuQ+w==} + + undici@5.29.0: + resolution: {integrity: sha512-raqeBD6NQK4SkWhQzeYKd1KmIG6dllBOTt55Rmkt4HtI9mwdWtJljnrXjAFUBLTSN67HWrOIZ3EPF4kjUw80Bg==} + engines: {node: '>=14.0'} + + unique-filename@1.1.1: + resolution: {integrity: sha512-Vmp0jIp2ln35UTXuryvjzkjGdRyf9b2lTXuSYUiPmzRcl3FDtYqAwOnTJkAngD9SWhnoJzDbTKwaOrZ+STtxNQ==} + + unique-slug@2.0.2: + resolution: {integrity: sha512-zoWr9ObaxALD3DOPfjPSqxt4fnZiWblxHIgeWqW8x7UqDzEtHEQLzji2cuJYQFCU6KmoJikOYAZlrTHHebjx2w==} + + util-deprecate@1.0.2: + resolution: {integrity: sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==} + + which@2.0.2: + resolution: {integrity: sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==} + engines: {node: '>= 8'} + hasBin: true + + wide-align@1.1.5: + resolution: {integrity: sha512-eDMORYaPNZ4sQIuuYPDHdQvf4gyCF9rEEV/yPxGfwPkRodwEgiMUUXTx/dex+Me0wxx53S+NgUHaP7y3MGlDmg==} + + wrappy@1.0.2: + resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} + + yallist@4.0.0: + resolution: {integrity: sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A==} + + zod@3.25.76: + resolution: {integrity: sha512-gzUt/qt81nXsFGKIFcC3YnfEAx5NkunCfnDlvuBSSFS02bcXu4Lmea0AFIUwbLWxWPx3d9p8S5QoaujKcNQxcQ==} + +snapshots: + + '@bufbuild/protobuf@1.10.0': {} + + '@connectrpc/connect-node@1.7.0(@bufbuild/protobuf@1.10.0)(@connectrpc/connect@1.7.0(@bufbuild/protobuf@1.10.0))': + dependencies: + '@bufbuild/protobuf': 1.10.0 + '@connectrpc/connect': 1.7.0(@bufbuild/protobuf@1.10.0) + undici: 5.29.0 + + '@connectrpc/connect@1.7.0(@bufbuild/protobuf@1.10.0)': + dependencies: + '@bufbuild/protobuf': 1.10.0 + + '@cursor/sdk-darwin-arm64@1.0.10': + optional: true + + '@cursor/sdk-darwin-x64@1.0.10': + optional: true + + '@cursor/sdk-linux-arm64@1.0.10': + optional: true + + '@cursor/sdk-linux-x64@1.0.10': + optional: true + + '@cursor/sdk-win32-x64@1.0.10': + optional: true + + '@cursor/sdk@1.0.10': + dependencies: + '@bufbuild/protobuf': 1.10.0 + '@connectrpc/connect': 1.7.0(@bufbuild/protobuf@1.10.0) + '@connectrpc/connect-node': 1.7.0(@bufbuild/protobuf@1.10.0)(@connectrpc/connect@1.7.0(@bufbuild/protobuf@1.10.0)) + '@statsig/js-client': 3.31.0 + sqlite3: 5.1.7 + zod: 3.25.76 + optionalDependencies: + '@cursor/sdk-darwin-arm64': 1.0.10 + '@cursor/sdk-darwin-x64': 1.0.10 + '@cursor/sdk-linux-arm64': 1.0.10 + '@cursor/sdk-linux-x64': 1.0.10 + '@cursor/sdk-win32-x64': 1.0.10 + transitivePeerDependencies: + - bluebird + - supports-color + + '@fastify/busboy@2.1.1': {} + + '@gar/promisify@1.1.3': + optional: true + + '@npmcli/fs@1.1.1': + dependencies: + '@gar/promisify': 1.1.3 + semver: 7.7.4 + optional: true + + '@npmcli/move-file@1.1.2': + dependencies: + mkdirp: 1.0.4 + rimraf: 3.0.2 + optional: true + + '@statsig/client-core@3.31.0': {} + + '@statsig/js-client@3.31.0': + dependencies: + '@statsig/client-core': 3.31.0 + + '@tootallnate/once@1.1.2': + optional: true + + abbrev@1.1.1: + optional: true + + agent-base@6.0.2: + dependencies: + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + optional: true + + agentkeepalive@4.6.0: + dependencies: + humanize-ms: 1.2.1 + optional: true + + aggregate-error@3.1.0: + dependencies: + clean-stack: 2.2.0 + indent-string: 4.0.0 + optional: true + + ansi-regex@5.0.1: + optional: true + + aproba@2.1.0: + optional: true + + are-we-there-yet@3.0.1: + dependencies: + delegates: 1.0.0 + readable-stream: 3.6.2 + optional: true + + balanced-match@1.0.2: + optional: true + + base64-js@1.5.1: {} + + bindings@1.5.0: + dependencies: + file-uri-to-path: 1.0.0 + + bl@4.1.0: + dependencies: + buffer: 5.7.1 + inherits: 2.0.4 + readable-stream: 3.6.2 + + brace-expansion@1.1.14: + dependencies: + balanced-match: 1.0.2 + concat-map: 0.0.1 + optional: true + + buffer@5.7.1: + dependencies: + base64-js: 1.5.1 + ieee754: 1.2.1 + + cacache@15.3.0: + dependencies: + '@npmcli/fs': 1.1.1 + '@npmcli/move-file': 1.1.2 + chownr: 2.0.0 + fs-minipass: 2.1.0 + glob: 7.2.3 + infer-owner: 1.0.4 + lru-cache: 6.0.0 + minipass: 3.3.6 + minipass-collect: 1.0.2 + minipass-flush: 1.0.7 + minipass-pipeline: 1.2.4 + mkdirp: 1.0.4 + p-map: 4.0.0 + promise-inflight: 1.0.1 + rimraf: 3.0.2 + ssri: 8.0.1 + tar: 6.2.1 + unique-filename: 1.1.1 + transitivePeerDependencies: + - bluebird + optional: true + + chownr@1.1.4: {} + + chownr@2.0.0: {} + + clean-stack@2.2.0: + optional: true + + color-support@1.1.3: + optional: true + + concat-map@0.0.1: + optional: true + + console-control-strings@1.1.0: + optional: true + + debug@4.4.3: + dependencies: + ms: 2.1.3 + optional: true + + decompress-response@6.0.0: + dependencies: + mimic-response: 3.1.0 + + deep-extend@0.6.0: {} + + delegates@1.0.0: + optional: true + + detect-libc@2.1.2: {} + + emoji-regex@8.0.0: + optional: true + + encoding@0.1.13: + dependencies: + iconv-lite: 0.6.3 + optional: true + + end-of-stream@1.4.5: + dependencies: + once: 1.4.0 + + env-paths@2.2.1: + optional: true + + err-code@2.0.3: + optional: true + + expand-template@2.0.3: {} + + file-uri-to-path@1.0.0: {} + + fs-constants@1.0.0: {} + + fs-minipass@2.1.0: + dependencies: + minipass: 3.3.6 + + fs.realpath@1.0.0: + optional: true + + gauge@4.0.4: + dependencies: + aproba: 2.1.0 + color-support: 1.1.3 + console-control-strings: 1.1.0 + has-unicode: 2.0.1 + signal-exit: 3.0.7 + string-width: 4.2.3 + strip-ansi: 6.0.1 + wide-align: 1.1.5 + optional: true + + github-from-package@0.0.0: {} + + glob@7.2.3: + dependencies: + fs.realpath: 1.0.0 + inflight: 1.0.6 + inherits: 2.0.4 + minimatch: 3.1.5 + once: 1.4.0 + path-is-absolute: 1.0.1 + optional: true + + graceful-fs@4.2.11: + optional: true + + has-unicode@2.0.1: + optional: true + + http-cache-semantics@4.2.0: + optional: true + + http-proxy-agent@4.0.1: + dependencies: + '@tootallnate/once': 1.1.2 + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + optional: true + + https-proxy-agent@5.0.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.3 + transitivePeerDependencies: + - supports-color + optional: true + + humanize-ms@1.2.1: + dependencies: + ms: 2.1.3 + optional: true + + iconv-lite@0.6.3: + dependencies: + safer-buffer: 2.1.2 + optional: true + + ieee754@1.2.1: {} + + imurmurhash@0.1.4: + optional: true + + indent-string@4.0.0: + optional: true + + infer-owner@1.0.4: + optional: true + + inflight@1.0.6: + dependencies: + once: 1.4.0 + wrappy: 1.0.2 + optional: true + + inherits@2.0.4: {} + + ini@1.3.8: {} + + ip-address@10.1.1: + optional: true + + is-fullwidth-code-point@3.0.0: + optional: true + + is-lambda@1.0.1: + optional: true + + isexe@2.0.0: + optional: true + + lru-cache@6.0.0: + dependencies: + yallist: 4.0.0 + optional: true + + make-fetch-happen@9.1.0: + dependencies: + agentkeepalive: 4.6.0 + cacache: 15.3.0 + http-cache-semantics: 4.2.0 + http-proxy-agent: 4.0.1 + https-proxy-agent: 5.0.1 + is-lambda: 1.0.1 + lru-cache: 6.0.0 + minipass: 3.3.6 + minipass-collect: 1.0.2 + minipass-fetch: 1.4.1 + minipass-flush: 1.0.7 + minipass-pipeline: 1.2.4 + negotiator: 0.6.4 + promise-retry: 2.0.1 + socks-proxy-agent: 6.2.1 + ssri: 8.0.1 + transitivePeerDependencies: + - bluebird + - supports-color + optional: true + + mimic-response@3.1.0: {} + + minimatch@3.1.5: + dependencies: + brace-expansion: 1.1.14 + optional: true + + minimist@1.2.8: {} + + minipass-collect@1.0.2: + dependencies: + minipass: 3.3.6 + optional: true + + minipass-fetch@1.4.1: + dependencies: + minipass: 3.3.6 + minipass-sized: 1.0.3 + minizlib: 2.1.2 + optionalDependencies: + encoding: 0.1.13 + optional: true + + minipass-flush@1.0.7: + dependencies: + minipass: 3.3.6 + optional: true + + minipass-pipeline@1.2.4: + dependencies: + minipass: 3.3.6 + optional: true + + minipass-sized@1.0.3: + dependencies: + minipass: 3.3.6 + optional: true + + minipass@3.3.6: + dependencies: + yallist: 4.0.0 + + minipass@5.0.0: {} + + minizlib@2.1.2: + dependencies: + minipass: 3.3.6 + yallist: 4.0.0 + + mkdirp-classic@0.5.3: {} + + mkdirp@1.0.4: {} + + ms@2.1.3: + optional: true + + napi-build-utils@2.0.0: {} + + negotiator@0.6.4: + optional: true + + node-abi@3.89.0: + dependencies: + semver: 7.7.4 + + node-addon-api@7.1.1: {} + + node-gyp@8.4.1: + dependencies: + env-paths: 2.2.1 + glob: 7.2.3 + graceful-fs: 4.2.11 + make-fetch-happen: 9.1.0 + nopt: 5.0.0 + npmlog: 6.0.2 + rimraf: 3.0.2 + semver: 7.7.4 + tar: 6.2.1 + which: 2.0.2 + transitivePeerDependencies: + - bluebird + - supports-color + optional: true + + nopt@5.0.0: + dependencies: + abbrev: 1.1.1 + optional: true + + npmlog@6.0.2: + dependencies: + are-we-there-yet: 3.0.1 + console-control-strings: 1.1.0 + gauge: 4.0.4 + set-blocking: 2.0.0 + optional: true + + once@1.4.0: + dependencies: + wrappy: 1.0.2 + + p-map@4.0.0: + dependencies: + aggregate-error: 3.1.0 + optional: true + + path-is-absolute@1.0.1: + optional: true + + prebuild-install@7.1.3: + dependencies: + detect-libc: 2.1.2 + expand-template: 2.0.3 + github-from-package: 0.0.0 + minimist: 1.2.8 + mkdirp-classic: 0.5.3 + napi-build-utils: 2.0.0 + node-abi: 3.89.0 + pump: 3.0.4 + rc: 1.2.8 + simple-get: 4.0.1 + tar-fs: 2.1.4 + tunnel-agent: 0.6.0 + + promise-inflight@1.0.1: + optional: true + + promise-retry@2.0.1: + dependencies: + err-code: 2.0.3 + retry: 0.12.0 + optional: true + + pump@3.0.4: + dependencies: + end-of-stream: 1.4.5 + once: 1.4.0 + + rc@1.2.8: + dependencies: + deep-extend: 0.6.0 + ini: 1.3.8 + minimist: 1.2.8 + strip-json-comments: 2.0.1 + + readable-stream@3.6.2: + dependencies: + inherits: 2.0.4 + string_decoder: 1.3.0 + util-deprecate: 1.0.2 + + retry@0.12.0: + optional: true + + rimraf@3.0.2: + dependencies: + glob: 7.2.3 + optional: true + + safe-buffer@5.2.1: {} + + safer-buffer@2.1.2: + optional: true + + semver@7.7.4: {} + + set-blocking@2.0.0: + optional: true + + signal-exit@3.0.7: + optional: true + + simple-concat@1.0.1: {} + + simple-get@4.0.1: + dependencies: + decompress-response: 6.0.0 + once: 1.4.0 + simple-concat: 1.0.1 + + smart-buffer@4.2.0: + optional: true + + socks-proxy-agent@6.2.1: + dependencies: + agent-base: 6.0.2 + debug: 4.4.3 + socks: 2.8.8 + transitivePeerDependencies: + - supports-color + optional: true + + socks@2.8.8: + dependencies: + ip-address: 10.1.1 + smart-buffer: 4.2.0 + optional: true + + sqlite3@5.1.7: + dependencies: + bindings: 1.5.0 + node-addon-api: 7.1.1 + prebuild-install: 7.1.3 + tar: 6.2.1 + optionalDependencies: + node-gyp: 8.4.1 + transitivePeerDependencies: + - bluebird + - supports-color + + ssri@8.0.1: + dependencies: + minipass: 3.3.6 + optional: true + + string-width@4.2.3: + dependencies: + emoji-regex: 8.0.0 + is-fullwidth-code-point: 3.0.0 + strip-ansi: 6.0.1 + optional: true + + string_decoder@1.3.0: + dependencies: + safe-buffer: 5.2.1 + + strip-ansi@6.0.1: + dependencies: + ansi-regex: 5.0.1 + optional: true + + strip-json-comments@2.0.1: {} + + tar-fs@2.1.4: + dependencies: + chownr: 1.1.4 + mkdirp-classic: 0.5.3 + pump: 3.0.4 + tar-stream: 2.2.0 + + tar-stream@2.2.0: + dependencies: + bl: 4.1.0 + end-of-stream: 1.4.5 + fs-constants: 1.0.0 + inherits: 2.0.4 + readable-stream: 3.6.2 + + tar@6.2.1: + dependencies: + chownr: 2.0.0 + fs-minipass: 2.1.0 + minipass: 5.0.0 + minizlib: 2.1.2 + mkdirp: 1.0.4 + yallist: 4.0.0 + + tunnel-agent@0.6.0: + dependencies: + safe-buffer: 5.2.1 + + undici@5.29.0: + dependencies: + '@fastify/busboy': 2.1.1 + + unique-filename@1.1.1: + dependencies: + unique-slug: 2.0.2 + optional: true + + unique-slug@2.0.2: + dependencies: + imurmurhash: 0.1.4 + optional: true + + util-deprecate@1.0.2: {} + + which@2.0.2: + dependencies: + isexe: 2.0.0 + optional: true + + wide-align@1.1.5: + dependencies: + string-width: 4.2.3 + optional: true + + wrappy@1.0.2: {} + + yallist@4.0.0: {} + + zod@3.25.76: {} diff --git a/e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.mjs b/e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.mjs new file mode 100644 index 000000000..d7df96baa --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.mjs @@ -0,0 +1,5 @@ +import * as cursorSDK from "cursor-sdk-v1"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runAutoCursorSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runAutoCursorSDKInstrumentation(cursorSDK)); diff --git a/e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.ts b/e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.ts new file mode 100644 index 000000000..4fb25222d --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/scenario.cursor-sdk-v1.ts @@ -0,0 +1,5 @@ +import * as cursorSDK from "cursor-sdk-v1"; +import { runMain } from "../../helpers/provider-runtime.mjs"; +import { runWrappedCursorSDKInstrumentation } from "./scenario.impl.mjs"; + +runMain(async () => runWrappedCursorSDKInstrumentation(cursorSDK)); diff --git a/e2e/scenarios/cursor-sdk-instrumentation/scenario.impl.mjs b/e2e/scenarios/cursor-sdk-instrumentation/scenario.impl.mjs new file mode 100644 index 000000000..4dc909eb9 --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/scenario.impl.mjs @@ -0,0 +1,136 @@ +import { traced, wrapCursorSDK } from "braintrust"; +import { + collectAsync, + runOperation, + runTracedScenario, +} from "../../helpers/provider-runtime.mjs"; + +const CURSOR_MODEL = "composer-2"; + +export const ROOT_NAME = "cursor-sdk-root"; +export const SCENARIO_NAME = "cursor-sdk-instrumentation"; + +function cursorOptions() { + return { + apiKey: process.env.CURSOR_API_KEY, + local: { + cwd: process.cwd(), + sandboxOptions: { enabled: false }, + }, + model: { id: CURSOR_MODEL }, + }; +} + +async function disposeAgent(agent) { + if (agent?.[Symbol.asyncDispose]) { + await agent[Symbol.asyncDispose](); + } else if (agent?.close) { + agent.close(); + } +} + +async function runCursorSDKScenario({ decorateSDK, sdk }) { + if (!process.env.CURSOR_API_KEY) { + throw new Error( + "CURSOR_API_KEY is required for cursor-sdk-instrumentation", + ); + } + + const instrumentedSDK = decorateSDK ? decorateSDK(sdk) : sdk; + const { Agent } = instrumentedSDK; + let reusableAgent; + + await runTracedScenario({ + callback: async () => { + await runOperation("cursor-sdk-prompt-operation", "prompt", async () => { + await Agent.prompt( + "Reply with exactly: CURSOR_PROMPT_OK. Do not modify files.", + cursorOptions(), + ); + }); + + await runOperation("cursor-sdk-stream-operation", "stream", async () => { + reusableAgent = await Agent.create({ + ...cursorOptions(), + agents: { + reviewer: { + description: "Reads the request and replies briefly.", + model: "inherit", + prompt: "Reply concisely. Do not modify files.", + }, + }, + }); + const run = await reusableAgent.send( + "First use the reviewer subagent to confirm exactly CURSOR_SUBAGENT_OK. Then run the shell command `printf cursor_tool_ok` and report the output. Do not edit files.", + ); + await collectAsync(run.stream()); + }); + + await runOperation("cursor-sdk-wait-operation", "wait", async () => { + const agent = await Agent.create(cursorOptions()); + try { + const run = await agent.send( + "Reply with exactly: CURSOR_WAIT_OK. Do not modify files.", + { + onDelta: async ({ update }) => { + await traced(async () => update.type, { + name: "cursor-sdk-user-on-delta", + }); + }, + onStep: async ({ step }) => { + await traced(async () => step.type, { + name: "cursor-sdk-user-on-step", + }); + }, + }, + ); + await run.wait(); + } finally { + await disposeAgent(agent); + } + }); + + await runOperation( + "cursor-sdk-resume-conversation-operation", + "resume-conversation", + async () => { + const agentId = reusableAgent?.agentId; + if (!agentId) { + throw new Error("Expected reusable Cursor agent id"); + } + const agent = await Agent.resume(agentId, cursorOptions()); + try { + const run = await agent.send( + "Reply with exactly: CURSOR_CONVERSATION_OK. Do not modify files.", + ); + await run.conversation(); + } finally { + await disposeAgent(agent); + } + }, + ); + }, + flushCount: 2, + flushDelayMs: 250, + metadata: { + scenario: SCENARIO_NAME, + }, + projectNameBase: "e2e-cursor-sdk-instrumentation", + rootName: ROOT_NAME, + }); + + await disposeAgent(reusableAgent); +} + +export async function runWrappedCursorSDKInstrumentation(sdk) { + await runCursorSDKScenario({ + decorateSDK: wrapCursorSDK, + sdk, + }); +} + +export async function runAutoCursorSDKInstrumentation(sdk) { + await runCursorSDKScenario({ + sdk, + }); +} diff --git a/e2e/scenarios/cursor-sdk-instrumentation/scenario.test.ts b/e2e/scenarios/cursor-sdk-instrumentation/scenario.test.ts new file mode 100644 index 000000000..6b5d497d9 --- /dev/null +++ b/e2e/scenarios/cursor-sdk-instrumentation/scenario.test.ts @@ -0,0 +1,56 @@ +import { describe } from "vitest"; +import { + prepareScenarioDir, + readInstalledPackageVersion, + resolveScenarioDir, +} from "../../helpers/scenario-harness"; +import { defineCursorSDKInstrumentationAssertions } from "./assertions"; + +const scenarioDir = await prepareScenarioDir({ + scenarioDir: resolveScenarioDir(import.meta.url), +}); +const TIMEOUT_MS = 240_000; +const cursorSDKScenario = { + autoEntry: "scenario.cursor-sdk-v1.mjs", + autoSnapshotName: "cursor-sdk-v1-auto-hook", + dependencyName: "cursor-sdk-v1", + version: await readInstalledPackageVersion(scenarioDir, "cursor-sdk-v1"), + wrapperEntry: "scenario.cursor-sdk-v1.ts", + wrapperSnapshotName: "cursor-sdk-v1-wrapped", + variantKey: "cursor-sdk-v1", +}; + +describe("wrapped instrumentation", () => { + defineCursorSDKInstrumentationAssertions({ + name: `cursor sdk ${cursorSDKScenario.version}`, + runScenario: async ({ runScenarioDir }) => { + await runScenarioDir({ + entry: cursorSDKScenario.wrapperEntry, + runContext: { variantKey: cursorSDKScenario.variantKey }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: cursorSDKScenario.wrapperSnapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); +}); + +describe("auto-hook instrumentation", () => { + defineCursorSDKInstrumentationAssertions({ + name: `cursor sdk ${cursorSDKScenario.version}`, + runScenario: async ({ runNodeScenarioDir }) => { + await runNodeScenarioDir({ + entry: cursorSDKScenario.autoEntry, + nodeArgs: ["--import", "braintrust/hook.mjs"], + runContext: { variantKey: cursorSDKScenario.variantKey }, + scenarioDir, + timeoutMs: TIMEOUT_MS, + }); + }, + snapshotName: cursorSDKScenario.autoSnapshotName, + testFileUrl: import.meta.url, + timeoutMs: TIMEOUT_MS, + }); +}); diff --git a/e2e/scripts/run-canary-tests-docker.mjs b/e2e/scripts/run-canary-tests-docker.mjs index 0635ba044..9b75bb175 100644 --- a/e2e/scripts/run-canary-tests-docker.mjs +++ b/e2e/scripts/run-canary-tests-docker.mjs @@ -19,6 +19,7 @@ const ALLOWED_ENV_KEYS = [ "GEMINI_API_KEY", "GOOGLE_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", diff --git a/js/src/auto-instrumentations/bundler/plugin.ts b/js/src/auto-instrumentations/bundler/plugin.ts index 3202d3dbc..2462c5faf 100644 --- a/js/src/auto-instrumentations/bundler/plugin.ts +++ b/js/src/auto-instrumentations/bundler/plugin.ts @@ -24,6 +24,7 @@ import { openaiConfigs } from "../configs/openai"; import { anthropicConfigs } from "../configs/anthropic"; import { aiSDKConfigs } from "../configs/ai-sdk"; import { claudeAgentSDKConfigs } from "../configs/claude-agent-sdk"; +import { cursorSDKConfigs } from "../configs/cursor-sdk"; import { googleGenAIConfigs } from "../configs/google-genai"; import { huggingFaceConfigs } from "../configs/huggingface"; import { openRouterAgentConfigs } from "../configs/openrouter-agent"; @@ -76,6 +77,7 @@ export const unplugin = createUnplugin((options = {}) => { ...anthropicConfigs, ...aiSDKConfigs, ...claudeAgentSDKConfigs, + ...cursorSDKConfigs, ...googleGenAIConfigs, ...huggingFaceConfigs, ...openRouterConfigs, diff --git a/js/src/auto-instrumentations/bundler/webpack-loader.ts b/js/src/auto-instrumentations/bundler/webpack-loader.ts index 5d6c7e20b..966c659aa 100644 --- a/js/src/auto-instrumentations/bundler/webpack-loader.ts +++ b/js/src/auto-instrumentations/bundler/webpack-loader.ts @@ -33,6 +33,7 @@ import { openaiConfigs } from "../configs/openai"; import { anthropicConfigs } from "../configs/anthropic"; import { aiSDKConfigs } from "../configs/ai-sdk"; import { claudeAgentSDKConfigs } from "../configs/claude-agent-sdk"; +import { cursorSDKConfigs } from "../configs/cursor-sdk"; import { googleGenAIConfigs } from "../configs/google-genai"; import { huggingFaceConfigs } from "../configs/huggingface"; import { openRouterAgentConfigs } from "../configs/openrouter-agent"; @@ -71,6 +72,7 @@ function getMatcher(options: BundlerPluginOptions): InstrumentationMatcher { ...anthropicConfigs, ...aiSDKConfigs, ...claudeAgentSDKConfigs, + ...cursorSDKConfigs, ...googleGenAIConfigs, ...huggingFaceConfigs, ...openRouterConfigs, diff --git a/js/src/auto-instrumentations/configs/cursor-sdk.ts b/js/src/auto-instrumentations/configs/cursor-sdk.ts new file mode 100644 index 000000000..4aa8ed631 --- /dev/null +++ b/js/src/auto-instrumentations/configs/cursor-sdk.ts @@ -0,0 +1,49 @@ +import type { InstrumentationConfig } from "@apm-js-collab/code-transformer"; +import { cursorSDKChannels } from "../../instrumentation/plugins/cursor-sdk-channels"; + +const cursorSDKVersionRange = ">=1.0.7 <2.0.0"; + +const cursorSDKEntrypoints = ["dist/esm/index.js", "dist/cjs/index.js"]; + +export const cursorSDKConfigs: InstrumentationConfig[] = + cursorSDKEntrypoints.flatMap((filePath) => [ + { + channelName: cursorSDKChannels.create.channelName, + module: { + name: "@cursor/sdk", + versionRange: cursorSDKVersionRange, + filePath, + }, + functionQuery: { + className: "Agent", + methodName: "create", + kind: "Async", + }, + }, + { + channelName: cursorSDKChannels.resume.channelName, + module: { + name: "@cursor/sdk", + versionRange: cursorSDKVersionRange, + filePath, + }, + functionQuery: { + className: "Agent", + methodName: "resume", + kind: "Async", + }, + }, + { + channelName: cursorSDKChannels.prompt.channelName, + module: { + name: "@cursor/sdk", + versionRange: cursorSDKVersionRange, + filePath, + }, + functionQuery: { + className: "Agent", + methodName: "prompt", + kind: "Async", + }, + }, + ]); diff --git a/js/src/auto-instrumentations/hook.mts b/js/src/auto-instrumentations/hook.mts index 2c3622600..46fe0f0d2 100644 --- a/js/src/auto-instrumentations/hook.mts +++ b/js/src/auto-instrumentations/hook.mts @@ -18,6 +18,7 @@ import { openaiConfigs } from "./configs/openai.js"; import { anthropicConfigs } from "./configs/anthropic.js"; import { aiSDKConfigs } from "./configs/ai-sdk.js"; import { claudeAgentSDKConfigs } from "./configs/claude-agent-sdk.js"; +import { cursorSDKConfigs } from "./configs/cursor-sdk.js"; import { googleGenAIConfigs } from "./configs/google-genai.js"; import { huggingFaceConfigs } from "./configs/huggingface.js"; import { openRouterAgentConfigs } from "./configs/openrouter-agent.js"; @@ -68,6 +69,9 @@ const allConfigs = [ ...(isDisabled(disabledIntegrations, "claudeagentsdk", "claude-agent-sdk") ? [] : claudeAgentSDKConfigs), + ...(isDisabled(disabledIntegrations, "cursor", "cursor-sdk") + ? [] + : cursorSDKConfigs), ...(isDisabled(disabledIntegrations, "google", "google-genai") ? [] : googleGenAIConfigs), diff --git a/js/src/auto-instrumentations/index.ts b/js/src/auto-instrumentations/index.ts index 03fda75a1..bdac954ab 100644 --- a/js/src/auto-instrumentations/index.ts +++ b/js/src/auto-instrumentations/index.ts @@ -32,6 +32,7 @@ export { openaiConfigs } from "./configs/openai"; export { anthropicConfigs } from "./configs/anthropic"; export { aiSDKConfigs } from "./configs/ai-sdk"; export { claudeAgentSDKConfigs } from "./configs/claude-agent-sdk"; +export { cursorSDKConfigs } from "./configs/cursor-sdk"; export { googleGenAIConfigs } from "./configs/google-genai"; export { huggingFaceConfigs } from "./configs/huggingface"; export { openRouterAgentConfigs } from "./configs/openrouter-agent"; diff --git a/js/src/exports.ts b/js/src/exports.ts index 93a12bb05..02dadbf66 100644 --- a/js/src/exports.ts +++ b/js/src/exports.ts @@ -177,6 +177,7 @@ export { export { wrapAnthropic } from "./wrappers/anthropic"; export { wrapMastraAgent } from "./wrappers/mastra"; export { wrapClaudeAgentSDK } from "./wrappers/claude-agent-sdk/claude-agent-sdk"; +export { wrapCursorSDK } from "./wrappers/cursor-sdk"; export { wrapGoogleGenAI } from "./wrappers/google-genai"; export { wrapGoogleADK } from "./wrappers/google-adk"; export { wrapHuggingFace } from "./wrappers/huggingface"; diff --git a/js/src/instrumentation/braintrust-plugin.ts b/js/src/instrumentation/braintrust-plugin.ts index 485d2d9bc..5db01b441 100644 --- a/js/src/instrumentation/braintrust-plugin.ts +++ b/js/src/instrumentation/braintrust-plugin.ts @@ -3,6 +3,7 @@ import { OpenAIPlugin } from "./plugins/openai-plugin"; import { AnthropicPlugin } from "./plugins/anthropic-plugin"; import { AISDKPlugin } from "./plugins/ai-sdk-plugin"; import { ClaudeAgentSDKPlugin } from "./plugins/claude-agent-sdk-plugin"; +import { CursorSDKPlugin } from "./plugins/cursor-sdk-plugin"; import { GoogleGenAIPlugin } from "./plugins/google-genai-plugin"; import { HuggingFacePlugin } from "./plugins/huggingface-plugin"; import { OpenRouterAgentPlugin } from "./plugins/openrouter-agent-plugin"; @@ -22,6 +23,8 @@ export interface BraintrustPluginConfig { googleGenAI?: boolean; huggingface?: boolean; claudeAgentSDK?: boolean; + cursor?: boolean; + cursorSDK?: boolean; openrouter?: boolean; openrouterAgent?: boolean; mistral?: boolean; @@ -53,6 +56,7 @@ export class BraintrustPlugin extends BasePlugin { private anthropicPlugin: AnthropicPlugin | null = null; private aiSDKPlugin: AISDKPlugin | null = null; private claudeAgentSDKPlugin: ClaudeAgentSDKPlugin | null = null; + private cursorSDKPlugin: CursorSDKPlugin | null = null; private googleGenAIPlugin: GoogleGenAIPlugin | null = null; private huggingFacePlugin: HuggingFacePlugin | null = null; private openRouterPlugin: OpenRouterPlugin | null = null; @@ -95,6 +99,11 @@ export class BraintrustPlugin extends BasePlugin { this.claudeAgentSDKPlugin.enable(); } + if (integrations.cursorSDK !== false && integrations.cursor !== false) { + this.cursorSDKPlugin = new CursorSDKPlugin(); + this.cursorSDKPlugin.enable(); + } + // Enable Google GenAI integration (default: true) // Support both 'googleGenAI' and legacy 'google' config keys if (integrations.googleGenAI !== false && integrations.google !== false) { @@ -160,6 +169,11 @@ export class BraintrustPlugin extends BasePlugin { this.claudeAgentSDKPlugin = null; } + if (this.cursorSDKPlugin) { + this.cursorSDKPlugin.disable(); + this.cursorSDKPlugin = null; + } + if (this.googleGenAIPlugin) { this.googleGenAIPlugin.disable(); this.googleGenAIPlugin = null; diff --git a/js/src/instrumentation/plugins/cursor-sdk-channels.ts b/js/src/instrumentation/plugins/cursor-sdk-channels.ts new file mode 100644 index 000000000..b38f7246a --- /dev/null +++ b/js/src/instrumentation/plugins/cursor-sdk-channels.ts @@ -0,0 +1,47 @@ +import { channel, defineChannels } from "../core/channel-definitions"; +import type { + CursorSDKAgent, + CursorSDKAgentOptions, + CursorSDKRun, + CursorSDKRunResult, + CursorSDKSendOptions, + CursorSDKUserMessage, +} from "../../vendor-sdk-types/cursor-sdk"; + +export const cursorSDKChannels = defineChannels("@cursor/sdk", { + create: channel< + [CursorSDKAgentOptions], + CursorSDKAgent, + Record + >({ + channelName: "Agent.create", + kind: "async", + }), + resume: channel< + [string, Partial | undefined], + CursorSDKAgent, + Record + >({ + channelName: "Agent.resume", + kind: "async", + }), + prompt: channel< + [string | CursorSDKUserMessage, CursorSDKAgentOptions | undefined], + CursorSDKRunResult, + Record + >({ + channelName: "Agent.prompt", + kind: "async", + }), + send: channel< + [string | CursorSDKUserMessage, CursorSDKSendOptions | undefined], + CursorSDKRun, + { + agent?: CursorSDKAgent; + operation?: "send"; + } + >({ + channelName: "agent.send", + kind: "async", + }), +}); diff --git a/js/src/instrumentation/plugins/cursor-sdk-plugin.test.ts b/js/src/instrumentation/plugins/cursor-sdk-plugin.test.ts new file mode 100644 index 000000000..909fceb02 --- /dev/null +++ b/js/src/instrumentation/plugins/cursor-sdk-plugin.test.ts @@ -0,0 +1,259 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from "vitest"; + +const { mockStartSpan } = vi.hoisted(() => ({ + mockStartSpan: vi.fn(), +})); + +vi.mock("../../isomorph", () => ({ + default: { + newTracingChannel: vi.fn(), + }, +})); + +vi.mock("../../logger", () => ({ + startSpan: (...args: unknown[]) => mockStartSpan(...args), +})); + +import iso from "../../isomorph"; +import { CursorSDKPlugin } from "./cursor-sdk-plugin"; + +const mockNewTracingChannel = iso.newTracingChannel as ReturnType; + +describe("CursorSDKPlugin", () => { + let handlersByName: Map; + let spans: Array<{ + end: ReturnType; + export: ReturnType; + log: ReturnType; + name?: string; + }>; + + beforeEach(() => { + handlersByName = new Map(); + spans = []; + mockNewTracingChannel.mockImplementation((name: string) => ({ + subscribe: vi.fn((handlers) => handlersByName.set(name, handlers)), + tracePromise: vi.fn((fn) => fn()), + unsubscribe: vi.fn(), + })); + mockStartSpan.mockImplementation((args: any) => { + const span = { + end: vi.fn(), + export: vi.fn(async () => `${args.name}-export-${spans.length}`), + log: vi.fn(), + name: args.name, + }; + if (args.event) { + span.log(args.event); + } + spans.push(span); + return span; + }); + }); + + afterEach(() => { + vi.clearAllMocks(); + }); + + it("subscribes to Cursor SDK channels", () => { + const plugin = new CursorSDKPlugin(); + + plugin.enable(); + + expect(handlersByName.has("orchestrion:@cursor/sdk:Agent.create")).toBe( + true, + ); + expect(handlersByName.has("orchestrion:@cursor/sdk:Agent.resume")).toBe( + true, + ); + expect(handlersByName.has("orchestrion:@cursor/sdk:Agent.prompt")).toBe( + true, + ); + expect(handlersByName.has("orchestrion:@cursor/sdk:agent.send")).toBe(true); + }); + + it("patches agents returned by Agent.create and traces send/wait", async () => { + const plugin = new CursorSDKPlugin(); + plugin.enable(); + + const createHandlers = handlersByName.get( + "orchestrion:@cursor/sdk:Agent.create", + ); + const sendHandlers = handlersByName.get( + "orchestrion:@cursor/sdk:agent.send", + ); + const run = makeRun(); + const originalSend = vi.fn(async () => run); + const agent = { + agentId: "agent-1", + send: originalSend, + }; + + createHandlers.asyncEnd({ + arguments: [{ local: { cwd: "/tmp/repo" } }], + result: agent, + }); + + const patchedRun = await agent.send("use a tool", {}); + expect(patchedRun).toBe(run); + expect(originalSend).toHaveBeenCalledTimes(1); + + const sendEvent = { + agent, + arguments: ["use a tool", {}], + result: run, + }; + sendHandlers.start(sendEvent); + sendHandlers.asyncEnd(sendEvent); + + await run.wait(); + + const rootSpan = spans.find((span) => span.name === "Cursor Agent"); + expect(rootSpan?.log).toHaveBeenCalledWith( + expect.objectContaining({ + input: "use a tool", + metadata: expect.objectContaining({ + "cursor_sdk.agent_id": "agent-1", + provider: "cursor", + }), + }), + ); + expect(rootSpan?.log).toHaveBeenCalledWith( + expect.objectContaining({ + output: "done", + metadata: expect.objectContaining({ + "cursor_sdk.run_id": "run-1", + "cursor_sdk.status": "finished", + }), + }), + ); + expect(rootSpan?.end).toHaveBeenCalledTimes(1); + }); + + it("captures stream tool calls and usage", async () => { + const plugin = new CursorSDKPlugin(); + plugin.enable(); + + const sendHandlers = handlersByName.get( + "orchestrion:@cursor/sdk:agent.send", + ); + const run = makeRun([ + { + type: "tool_call", + call_id: "call-1", + name: "shell", + status: "running", + args: { command: "echo hi" }, + }, + { + type: "tool_call", + call_id: "call-1", + name: "shell", + status: "completed", + result: { stdout: "hi\n" }, + }, + { + type: "assistant", + message: { content: [{ type: "text", text: "done" }] }, + }, + ]); + const event = { + agent: { agentId: "agent-1" }, + arguments: [ + "hello", + { + onDelta: vi.fn(), + }, + ], + result: run, + }; + + sendHandlers.start(event); + await event.arguments[1].onDelta({ + update: { + type: "turn-ended", + usage: { + inputTokens: 3, + outputTokens: 4, + cacheReadTokens: 1, + cacheWriteTokens: 2, + }, + }, + }); + sendHandlers.asyncEnd(event); + + const chunks = []; + for await (const chunk of run.stream()) { + chunks.push(chunk); + } + + expect(chunks).toHaveLength(3); + const toolSpan = spans.find((span) => span.name === "tool: shell"); + expect(toolSpan?.log).toHaveBeenCalledWith( + expect.objectContaining({ + input: { command: "echo hi" }, + }), + ); + expect(toolSpan?.log).toHaveBeenCalledWith( + expect.objectContaining({ + output: { stdout: "hi\n" }, + }), + ); + const rootSpan = spans.find((span) => span.name === "Cursor Agent"); + expect(rootSpan?.log).toHaveBeenCalledWith( + expect.objectContaining({ + metrics: expect.objectContaining({ + completion_tokens: 4, + prompt_cache_creation_tokens: 2, + prompt_cached_tokens: 1, + prompt_tokens: 3, + }), + output: "done", + }), + ); + }); + + it("traces Agent.prompt without a nested send span", () => { + const plugin = new CursorSDKPlugin(); + plugin.enable(); + + const promptHandlers = handlersByName.get( + "orchestrion:@cursor/sdk:Agent.prompt", + ); + const sendHandlers = handlersByName.get( + "orchestrion:@cursor/sdk:agent.send", + ); + const promptEvent = { arguments: ["hello", { local: { cwd: "/tmp" } }] }; + + promptHandlers.start(promptEvent); + sendHandlers.start({ arguments: ["nested", {}] }); + promptHandlers.asyncEnd({ + ...promptEvent, + result: { id: "run-1", result: "done", status: "finished" }, + }); + + expect(spans.filter((span) => span.name === "Cursor Agent")).toHaveLength( + 1, + ); + }); +}); + +function makeRun(messages: unknown[] = []) { + return { + agentId: "agent-1", + async conversation() { + return []; + }, + id: "run-1", + result: "done", + status: "finished", + stream: async function* () { + for (const message of messages) { + yield message; + } + }, + async wait() { + return { id: "run-1", result: "done", status: "finished" }; + }, + }; +} diff --git a/js/src/instrumentation/plugins/cursor-sdk-plugin.ts b/js/src/instrumentation/plugins/cursor-sdk-plugin.ts new file mode 100644 index 000000000..5f45bf440 --- /dev/null +++ b/js/src/instrumentation/plugins/cursor-sdk-plugin.ts @@ -0,0 +1,1179 @@ +import { BasePlugin } from "../core"; +import type { ChannelMessage } from "../core/channel-definitions"; +import type { IsoChannelHandlers } from "../../isomorph"; +import { debugLogger } from "../../debug-logger"; +import { startSpan } from "../../logger"; +import type { Span } from "../../logger"; +import { getCurrentUnixTimestamp } from "../../util"; +import { SpanTypeAttribute } from "../../../util/index"; +import { cursorSDKChannels } from "./cursor-sdk-channels"; +import type { + CursorSDKAgent, + CursorSDKAgentOptions, + CursorSDKConversationStep, + CursorSDKConversationTurn, + CursorSDKInteractionUpdate, + CursorSDKMessage, + CursorSDKModelSelection, + CursorSDKRun, + CursorSDKRunGitInfo, + CursorSDKRunResult, + CursorSDKSendOptions, + CursorSDKToolCall, + CursorSDKToolUseMessage, + CursorSDKUsage, + CursorSDKUserMessage, +} from "../../vendor-sdk-types/cursor-sdk"; + +const PATCHED_AGENT = Symbol.for("braintrust.cursor-sdk.auto-patched-agent"); +const PATCHED_RUN = Symbol.for("braintrust.cursor-sdk.patched-run"); +const WRAPPED_AGENT = Symbol.for("braintrust.cursor-sdk.wrapped-agent"); + +type ToolState = { + span: Span; + subAgentSpan?: Span; +}; + +type CursorRunState = { + activeToolSpans: Map; + agent?: CursorSDKAgent; + deltaText: string[]; + streamText: string[]; + stepText: string[]; + conversationOutput?: unknown; + conversationText: string[]; + finalized: boolean; + input: string | CursorSDKUserMessage; + lastResult?: CursorSDKRunResult; + metadata: Record; + metrics: Record; + run?: CursorSDKRun; + span: Span; + startTime: number; + streamMessages: CursorSDKMessage[]; + taskText: string[]; +}; + +type PromptState = { + metadata: Record; + span: Span; + startTime: number; +}; + +export class CursorSDKPlugin extends BasePlugin { + private promptDepth = 0; + + protected onEnable(): void { + this.subscribeToAgentFactories(); + this.subscribeToPrompt(); + this.subscribeToSend(); + } + + protected onDisable(): void { + for (const unsubscribe of this.unsubscribers) { + unsubscribe(); + } + this.unsubscribers = []; + this.promptDepth = 0; + } + + private subscribeToAgentFactories(): void { + this.subscribeToAgentFactory(cursorSDKChannels.create); + this.subscribeToAgentFactory(cursorSDKChannels.resume); + } + + private subscribeToAgentFactory( + channel: typeof cursorSDKChannels.create | typeof cursorSDKChannels.resume, + ): void { + const tracingChannel = channel.tracingChannel(); + const handlers: IsoChannelHandlers> = { + asyncEnd: (event) => { + patchCursorAgentInPlace(event.result); + }, + error: () => {}, + }; + + tracingChannel.subscribe(handlers); + this.unsubscribers.push(() => { + tracingChannel.unsubscribe(handlers); + }); + } + + private subscribeToPrompt(): void { + const channel = cursorSDKChannels.prompt.tracingChannel(); + const states = new WeakMap(); + + const handlers: IsoChannelHandlers< + ChannelMessage + > = { + start: (event) => { + this.promptDepth += 1; + const message = event.arguments[0]; + const options = event.arguments[1]; + const metadata = { + ...extractAgentOptionsMetadata(options), + "cursor_sdk.operation": "Agent.prompt", + provider: "cursor", + ...(event.moduleVersion + ? { "cursor_sdk.version": event.moduleVersion } + : {}), + }; + const span = startSpan({ + name: "Cursor Agent", + spanAttributes: { type: SpanTypeAttribute.TASK }, + }); + const startTime = getCurrentUnixTimestamp(); + safeLog(span, { + input: sanitizeUserMessage(message), + metadata, + }); + states.set(event, { metadata, span, startTime }); + }, + asyncEnd: (event) => { + this.promptDepth = Math.max(0, this.promptDepth - 1); + const state = states.get(event); + if (!state) { + return; + } + try { + safeLog(state.span, { + metadata: { + ...state.metadata, + ...extractRunResultMetadata(event.result), + }, + metrics: buildDurationMetrics(state.startTime), + output: event.result?.result ?? event.result, + }); + } finally { + state.span.end(); + states.delete(event); + } + }, + error: (event) => { + this.promptDepth = Math.max(0, this.promptDepth - 1); + const state = states.get(event); + if (!state || !event.error) { + return; + } + safeLog(state.span, { error: event.error.message }); + state.span.end(); + states.delete(event); + }, + }; + + channel.subscribe(handlers); + this.unsubscribers.push(() => { + channel.unsubscribe(handlers); + }); + } + + private subscribeToSend(): void { + const channel = cursorSDKChannels.send.tracingChannel(); + const states = new WeakMap(); + + const handlers: IsoChannelHandlers< + ChannelMessage + > = { + start: (event) => { + if (this.promptDepth > 0) { + return; + } + + const message = event.arguments[0]; + const sendOptions = event.arguments[1]; + const agent = event.agent; + const metadata = { + ...extractSendMetadata(sendOptions), + ...(agent ? extractAgentMetadata(agent) : {}), + "cursor_sdk.operation": "agent.send", + provider: "cursor", + ...(event.moduleVersion + ? { "cursor_sdk.version": event.moduleVersion } + : {}), + }; + const span = startSpan({ + name: "Cursor Agent", + spanAttributes: { type: SpanTypeAttribute.TASK }, + }); + const startTime = getCurrentUnixTimestamp(); + safeLog(span, { + input: sanitizeUserMessage(message), + metadata, + }); + + const state: CursorRunState = { + activeToolSpans: new Map(), + agent, + conversationText: [], + deltaText: [], + finalized: false, + input: message, + metadata, + metrics: {}, + span, + startTime, + streamMessages: [], + streamText: [], + stepText: [], + taskText: [], + }; + + if (hasCursorCallbacks(sendOptions)) { + event.arguments[1] = wrapSendOptionsCallbacks(sendOptions, state); + } + states.set(event, state); + }, + asyncEnd: (event) => { + const state = states.get(event); + if (!state) { + return; + } + + if (!event.result) { + return; + } + state.run = event.result; + state.metadata = { + ...state.metadata, + ...extractRunMetadata(event.result), + }; + patchCursorRun(event.result, state); + }, + error: (event) => { + const state = states.get(event); + if (!state || !event.error) { + return; + } + safeLog(state.span, { error: event.error.message }); + endOpenToolSpans(state, event.error.message); + state.span.end(); + state.finalized = true; + states.delete(event); + }, + }; + + channel.subscribe(handlers); + this.unsubscribers.push(() => { + channel.unsubscribe(handlers); + }); + } +} + +function patchCursorAgentInPlace(agent: unknown): void { + if (!agent || typeof agent !== "object") { + return; + } + const agentRecord = agent as CursorSDKAgent & Record; + if ( + agentRecord[PATCHED_AGENT] || + agentRecord[WRAPPED_AGENT] || + typeof agentRecord.send !== "function" + ) { + return; + } + + const originalSend = agentRecord.send.bind(agentRecord); + try { + Object.defineProperty(agentRecord, PATCHED_AGENT, { + configurable: false, + enumerable: false, + value: true, + }); + Object.defineProperty(agentRecord, "send", { + configurable: true, + value( + message: string | CursorSDKUserMessage, + options?: CursorSDKSendOptions, + ) { + const args = [message, options] as [ + string | CursorSDKUserMessage, + CursorSDKSendOptions | undefined, + ]; + return cursorSDKChannels.send.tracePromise( + () => originalSend(...args), + { + agent: agentRecord, + arguments: args, + operation: "send", + } as never, + ); + }, + writable: true, + }); + } catch { + // Frozen/sealed agents cannot be patched. Leave user behavior untouched. + } +} + +function wrapSendOptionsCallbacks( + options: CursorSDKSendOptions, + state: CursorRunState, +): CursorSDKSendOptions { + const originalOnDelta = options.onDelta; + const originalOnStep = options.onStep; + + return { + ...options, + async onDelta(args) { + try { + await handleInteractionUpdate(state, args.update); + } catch (error) { + logInstrumentationError("Cursor SDK onDelta", error); + } + if (originalOnDelta) { + return originalOnDelta(args); + } + }, + async onStep(args) { + try { + handleStepUpdate(state, args.step); + } catch (error) { + logInstrumentationError("Cursor SDK onStep", error); + } + if (originalOnStep) { + return originalOnStep(args); + } + }, + }; +} + +function hasCursorCallbacks( + options: CursorSDKSendOptions | undefined, +): options is CursorSDKSendOptions { + return ( + !!options && + (typeof options.onDelta === "function" || + typeof options.onStep === "function") + ); +} + +function patchCursorRun(run: CursorSDKRun, state: CursorRunState): void { + if (!run || typeof run !== "object") { + return; + } + const runRecord = run as CursorSDKRun & Record; + if (runRecord[PATCHED_RUN]) { + return; + } + + try { + Object.defineProperty(runRecord, PATCHED_RUN, { + configurable: false, + enumerable: false, + value: true, + }); + + if (typeof runRecord.stream === "function") { + const originalStream = runRecord.stream.bind(runRecord); + Object.defineProperty(runRecord, "stream", { + configurable: true, + value() { + const stream = originalStream(); + return patchCursorStream(stream, state); + }, + writable: true, + }); + } + + if (typeof runRecord.wait === "function") { + const originalWait = runRecord.wait.bind(runRecord); + Object.defineProperty(runRecord, "wait", { + configurable: true, + async value() { + try { + const result = await originalWait(); + state.lastResult = result; + await finalizeCursorRun(state, { result }); + return result; + } catch (error) { + await finalizeCursorRun(state, { error }); + throw error; + } + }, + writable: true, + }); + } + + if (typeof runRecord.conversation === "function") { + const originalConversation = runRecord.conversation.bind(runRecord); + Object.defineProperty(runRecord, "conversation", { + configurable: true, + async value() { + try { + const conversation = await originalConversation(); + await handleConversation(state, conversation); + await finalizeCursorRun(state); + return conversation; + } catch (error) { + await finalizeCursorRun(state, { error }); + throw error; + } + }, + writable: true, + }); + } + } catch { + // If the Run object is not patchable, finish the span with available data. + void finalizeCursorRun(state, { output: run }); + } +} + +async function* patchCursorStream( + stream: AsyncGenerator, + state: CursorRunState, +): AsyncGenerator { + try { + for await (const message of stream) { + try { + await handleStreamMessage(state, message); + } catch (error) { + logInstrumentationError("Cursor SDK stream", error); + } + yield message; + } + await finalizeCursorRun(state); + } catch (error) { + await finalizeCursorRun(state, { error }); + throw error; + } +} + +async function handleInteractionUpdate( + state: CursorRunState, + update: CursorSDKInteractionUpdate, +): Promise { + switch (update.type) { + case "text-delta": + if (typeof update.text === "string") { + state.deltaText.push(update.text); + } + return; + case "token-delta": + if (typeof update.tokens === "number") { + state.metrics["cursor_sdk.delta_tokens"] = + (state.metrics["cursor_sdk.delta_tokens"] ?? 0) + update.tokens; + } + return; + case "tool-call-started": + case "partial-tool-call": + case "tool-call-completed": + await handleToolUpdate( + state, + update as Extract< + CursorSDKInteractionUpdate, + { + type: + | "tool-call-started" + | "partial-tool-call" + | "tool-call-completed"; + } + >, + ); + return; + case "turn-ended": + addUsageMetrics( + state.metrics, + (update as { usage?: CursorSDKUsage }).usage, + ); + return; + case "summary": + if (typeof update.summary === "string") { + state.taskText.push(update.summary); + } + return; + case "step-completed": + if (typeof update.stepDurationMs === "number") { + state.metrics["cursor_sdk.step_duration_ms"] = + (state.metrics["cursor_sdk.step_duration_ms"] ?? 0) + + update.stepDurationMs; + } + state.metrics["cursor_sdk.steps"] = + (state.metrics["cursor_sdk.steps"] ?? 0) + 1; + return; + default: + return; + } +} + +async function handleToolUpdate( + state: CursorRunState, + update: Extract< + CursorSDKInteractionUpdate, + { type: "tool-call-started" | "partial-tool-call" | "tool-call-completed" } + >, +): Promise { + const callId = update.callId; + if (!callId) { + return; + } + + const toolCall = update.toolCall; + const name = extractToolName(toolCall); + const args = extractToolArgs(toolCall); + const result = extractToolResult(toolCall); + + if ( + update.type === "tool-call-started" || + update.type === "partial-tool-call" + ) { + if (!state.activeToolSpans.has(callId)) { + state.activeToolSpans.set( + callId, + await startToolSpan(state, { + args, + callId, + name, + status: "running", + toolCall, + }), + ); + } + return; + } + + const toolState = + state.activeToolSpans.get(callId) ?? + (await startToolSpan(state, { + args, + callId, + name, + status: "completed", + toolCall, + })); + + finishToolSpan(toolState, { + error: toolCall?.status === "error" ? stringifyUnknown(result) : undefined, + metadata: { + "cursor_sdk.tool.status": toolCall?.status ?? "completed", + }, + output: result, + }); + state.activeToolSpans.delete(callId); +} + +async function handleStreamMessage( + state: CursorRunState, + message: CursorSDKMessage, +): Promise { + state.streamMessages.push(message); + if (message.type === "system") { + const systemMessage = message as Extract< + CursorSDKMessage, + { type: "system" } + >; + state.metadata = { + ...state.metadata, + ...extractModelMetadata(systemMessage.model), + ...(systemMessage.agent_id + ? { "cursor_sdk.agent_id": systemMessage.agent_id } + : {}), + ...(systemMessage.run_id + ? { "cursor_sdk.run_id": systemMessage.run_id } + : {}), + ...(systemMessage.tools + ? { "cursor_sdk.tools": systemMessage.tools } + : {}), + }; + return; + } + + if (message.type === "assistant") { + const assistantMessage = message as Extract< + CursorSDKMessage, + { type: "assistant" } + >; + for (const block of assistantMessage.message?.content ?? []) { + if (block?.type === "text" && typeof block.text === "string") { + state.streamText.push(block.text); + } else if (block?.type === "tool_use" && block.id) { + state.activeToolSpans.set( + block.id, + await startToolSpan(state, { + args: block.input, + callId: block.id, + name: block.name, + status: "running", + }), + ); + } + } + return; + } + + if (message.type === "tool_call") { + await handleToolMessage( + state, + message as Extract, + ); + return; + } + + if (message.type === "task" && typeof message.text === "string") { + state.taskText.push(message.text); + return; + } + + if (message.type === "status" && message.status) { + state.metadata["cursor_sdk.status"] = message.status; + } +} + +async function handleToolMessage( + state: CursorRunState, + message: CursorSDKToolUseMessage, +): Promise { + const callId = message.call_id; + if (!callId) { + return; + } + + if (message.status === "running") { + if (!state.activeToolSpans.has(callId)) { + state.activeToolSpans.set( + callId, + await startToolSpan(state, { + args: message.args, + callId, + name: message.name, + status: message.status, + truncated: message.truncated, + }), + ); + } + return; + } + + const toolState = + state.activeToolSpans.get(callId) ?? + (await startToolSpan(state, { + args: message.args, + callId, + name: message.name, + status: message.status, + truncated: message.truncated, + })); + finishToolSpan(toolState, { + error: + message.status === "error" ? stringifyUnknown(message.result) : undefined, + metadata: { + "cursor_sdk.tool.status": message.status, + }, + output: message.result, + }); + state.activeToolSpans.delete(callId); +} + +async function handleConversation( + state: CursorRunState, + turns: CursorSDKConversationTurn[], +): Promise { + state.conversationOutput = turns; + for (const turn of turns) { + if (turn.type === "agentConversationTurn") { + for (const step of turn.turn?.steps ?? []) { + await handleConversationStep(state, step); + } + } else if (turn.type === "shellConversationTurn") { + const command = turn.turn?.shellCommand?.command; + if (command) { + const callId = `shell:${state.activeToolSpans.size}:${command}`; + const toolState = await startToolSpan(state, { + args: turn.turn?.shellCommand, + callId, + name: "shell", + status: "completed", + }); + finishToolSpan(toolState, { + metadata: { "cursor_sdk.tool.status": "completed" }, + output: turn.turn?.shellOutput, + }); + } + } + } +} + +async function handleConversationStep( + state: CursorRunState, + step: CursorSDKConversationStep, +): Promise { + if ( + step.type === "assistantMessage" && + typeof step.message?.text === "string" + ) { + state.conversationText.push(step.message.text); + return; + } + + if (step.type !== "toolCall") { + return; + } + + const toolCall = step.message; + const callId = + typeof toolCall?.callId === "string" + ? toolCall.callId + : `conversation-tool:${state.activeToolSpans.size}`; + const toolState = await startToolSpan(state, { + args: extractToolArgs(toolCall), + callId, + name: extractToolName(toolCall), + status: toolCall?.status, + toolCall, + }); + finishToolSpan(toolState, { + error: + toolCall?.status === "error" + ? stringifyUnknown(toolCall.result) + : undefined, + metadata: { + "cursor_sdk.tool.status": toolCall?.status ?? "completed", + }, + output: extractToolResult(toolCall), + }); +} + +function handleStepUpdate( + state: CursorRunState, + step: CursorSDKConversationStep, +): void { + state.metrics["cursor_sdk.steps"] = + (state.metrics["cursor_sdk.steps"] ?? 0) + 1; + if (step.type) { + const stepTypes = state.metadata["cursor_sdk.step_types"]; + if (Array.isArray(stepTypes)) { + if (!stepTypes.includes(step.type)) { + stepTypes.push(step.type); + } + } else { + state.metadata["cursor_sdk.step_types"] = [step.type]; + } + } + if ( + step.type === "assistantMessage" && + typeof step.message?.text === "string" + ) { + state.stepText.push(step.message.text); + } +} + +async function startToolSpan( + state: CursorRunState, + args: { + args?: unknown; + callId: string; + name?: string; + status?: string; + toolCall?: CursorSDKToolCall; + truncated?: { args?: boolean; result?: boolean }; + }, +): Promise { + const name = args.name || "unknown"; + const metadata: Record = { + "cursor_sdk.tool.status": args.status, + "gen_ai.tool.call.id": args.callId, + "gen_ai.tool.name": name, + }; + if (args.truncated?.args !== undefined) { + metadata["cursor_sdk.tool.args_truncated"] = args.truncated.args; + } + if (args.truncated?.result !== undefined) { + metadata["cursor_sdk.tool.result_truncated"] = args.truncated.result; + } + + const span = startSpan({ + event: { + input: args.args, + metadata, + }, + name: `tool: ${name}`, + parent: await state.span.export(), + spanAttributes: { type: SpanTypeAttribute.TOOL }, + }); + + let subAgentSpan: Span | undefined; + if (isSubAgentToolName(name)) { + subAgentSpan = startSpan({ + event: { + input: args.args, + metadata: { + "cursor_sdk.subagent.tool_call_id": args.callId, + "gen_ai.tool.name": name, + }, + }, + name: formatSubAgentSpanName(args.toolCall, args.args), + parent: await span.export(), + spanAttributes: { type: SpanTypeAttribute.TASK }, + }); + } + + return { span, subAgentSpan }; +} + +function finishToolSpan( + toolState: ToolState, + result: { + error?: string; + metadata?: Record; + output?: unknown; + }, +): void { + try { + if (result.error) { + safeLog(toolState.span, { + error: result.error, + metadata: result.metadata, + output: result.output, + }); + if (toolState.subAgentSpan) { + safeLog(toolState.subAgentSpan, { + error: result.error, + metadata: result.metadata, + output: result.output, + }); + } + } else { + safeLog(toolState.span, { + metadata: result.metadata, + output: result.output, + }); + if (toolState.subAgentSpan) { + safeLog(toolState.subAgentSpan, { + metadata: result.metadata, + output: result.output, + }); + } + } + } finally { + toolState.subAgentSpan?.end(); + toolState.span.end(); + } +} + +async function finalizeCursorRun( + state: CursorRunState, + params: { + error?: unknown; + output?: unknown; + result?: CursorSDKRunResult; + } = {}, +): Promise { + if (state.finalized) { + return; + } + state.finalized = true; + + const error = params.error; + const result = params.result ?? state.lastResult; + const output = + params.output ?? + result?.result ?? + state.run?.result ?? + (state.conversationText.length > 0 + ? state.conversationText.join("\n") + : undefined) ?? + state.conversationOutput ?? + (state.streamText.length > 0 ? state.streamText.join("") : undefined) ?? + (state.deltaText.length > 0 ? state.deltaText.join("") : undefined) ?? + (state.stepText.length > 0 ? state.stepText.join("\n") : undefined) ?? + (state.taskText.length > 0 ? state.taskText.join("\n") : undefined); + + try { + const metadata = { + ...state.metadata, + ...(state.run ? extractRunMetadata(state.run) : {}), + ...(result ? extractRunResultMetadata(result) : {}), + }; + if (error) { + safeLog(state.span, { + error: error instanceof Error ? error.message : String(error), + metadata, + metrics: { + ...cleanMetrics(state.metrics), + ...buildDurationMetrics(state.startTime), + }, + output, + }); + } else { + safeLog(state.span, { + metadata, + metrics: { + ...cleanMetrics(state.metrics), + ...buildDurationMetrics(state.startTime), + }, + output, + }); + } + } finally { + endOpenToolSpans(state); + state.span.end(); + } +} + +function endOpenToolSpans(state: CursorRunState, error?: string): void { + for (const [, toolState] of state.activeToolSpans) { + finishToolSpan(toolState, { error }); + } + state.activeToolSpans.clear(); +} + +function sanitizeUserMessage( + message: string | CursorSDKUserMessage | undefined, +): unknown { + if (typeof message === "string" || message === undefined) { + return message; + } + return { + ...message, + images: message.images?.map((image) => { + const imageRecord = image as Record; + return { + ...(typeof imageRecord.url === "string" + ? { url: imageRecord.url } + : {}), + ...(typeof imageRecord.mimeType === "string" + ? { mimeType: imageRecord.mimeType } + : {}), + ...(image.dimension ? { dimension: image.dimension } : {}), + hasData: typeof imageRecord.data === "string", + }; + }), + }; +} + +function extractAgentOptionsMetadata( + options: CursorSDKAgentOptions | Partial | undefined, +): Record { + if (!options) { + return {}; + } + + return { + ...extractModelMetadata(options.model), + ...(options.agentId ? { "cursor_sdk.agent_id": options.agentId } : {}), + ...(options.name ? { "cursor_sdk.agent_name": options.name } : {}), + ...(options.local + ? { + "cursor_sdk.runtime": "local", + "cursor_sdk.local.cwd": Array.isArray(options.local.cwd) + ? options.local.cwd.join(",") + : options.local.cwd, + } + : {}), + ...(options.cloud + ? { + "cursor_sdk.runtime": "cloud", + "cursor_sdk.cloud.auto_create_pr": options.cloud.autoCreatePR, + "cursor_sdk.cloud.env_type": options.cloud.env?.type, + "cursor_sdk.cloud.env_name": options.cloud.env?.name, + "cursor_sdk.cloud.repos": options.cloud.repos + ?.map((repo) => repo.url) + .filter((url): url is string => typeof url === "string"), + } + : {}), + }; +} + +function extractSendMetadata( + options: CursorSDKSendOptions | undefined, +): Record { + if (!options) { + return {}; + } + return { + ...extractModelMetadata(options.model), + ...(options.local?.force !== undefined + ? { "cursor_sdk.local.force": options.local.force } + : {}), + }; +} + +function extractAgentMetadata(agent: CursorSDKAgent): Record { + return { + ...(agent.agentId ? { "cursor_sdk.agent_id": agent.agentId } : {}), + ...extractModelMetadata(agent.model), + }; +} + +function extractRunMetadata( + run: CursorSDKRun | undefined, +): Record { + if (!run) { + return {}; + } + return { + ...(run.id ? { "cursor_sdk.run_id": run.id } : {}), + ...(run.agentId ? { "cursor_sdk.agent_id": run.agentId } : {}), + ...(run.status ? { "cursor_sdk.status": run.status } : {}), + ...(run.durationMs !== undefined + ? { "cursor_sdk.duration_ms": run.durationMs } + : {}), + ...extractModelMetadata(run.model), + ...extractGitMetadata(run.git), + }; +} + +function extractRunResultMetadata( + result: CursorSDKRunResult | undefined, +): Record { + if (!result) { + return {}; + } + return { + ...(result.id ? { "cursor_sdk.run_id": result.id } : {}), + ...(result.status ? { "cursor_sdk.status": result.status } : {}), + ...(result.durationMs !== undefined + ? { "cursor_sdk.duration_ms": result.durationMs } + : {}), + ...extractModelMetadata(result.model), + ...extractGitMetadata(result.git), + }; +} + +function extractGitMetadata( + git: CursorSDKRunGitInfo | undefined, +): Record { + const branches = git?.branches; + if (!branches || branches.length === 0) { + return {}; + } + return { + "cursor_sdk.git.branches": branches.map((branch) => ({ + branch: branch.branch, + prUrl: branch.prUrl, + repoUrl: branch.repoUrl, + })), + }; +} + +function extractModelMetadata( + model: CursorSDKModelSelection | undefined, +): Record { + if (!model?.id) { + return {}; + } + return { + model: model.id, + "cursor_sdk.model": model.id, + ...(model.params ? { "cursor_sdk.model.params": model.params } : {}), + }; +} + +function addUsageMetrics( + metrics: Record, + usage: CursorSDKUsage | undefined, +): void { + if (!usage) { + return; + } + if (usage.inputTokens !== undefined) { + metrics.prompt_tokens = (metrics.prompt_tokens ?? 0) + usage.inputTokens; + } + if (usage.outputTokens !== undefined) { + metrics.completion_tokens = + (metrics.completion_tokens ?? 0) + usage.outputTokens; + } + if (usage.cacheReadTokens !== undefined) { + metrics.prompt_cached_tokens = + (metrics.prompt_cached_tokens ?? 0) + usage.cacheReadTokens; + } + if (usage.cacheWriteTokens !== undefined) { + metrics.prompt_cache_creation_tokens = + (metrics.prompt_cache_creation_tokens ?? 0) + usage.cacheWriteTokens; + } + metrics.tokens = + (metrics.prompt_tokens ?? 0) + + (metrics.completion_tokens ?? 0) + + (metrics.prompt_cached_tokens ?? 0) + + (metrics.prompt_cache_creation_tokens ?? 0); +} + +function buildDurationMetrics(startTime: number): Record { + const end = getCurrentUnixTimestamp(); + return { + duration: end - startTime, + end, + start: startTime, + }; +} + +function extractToolName(toolCall: CursorSDKToolCall | undefined): string { + if (!toolCall) { + return "unknown"; + } + if (typeof toolCall.name === "string") { + return toolCall.name; + } + if (typeof toolCall.type === "string") { + return toolCall.type; + } + return "unknown"; +} + +function extractToolArgs(toolCall: CursorSDKToolCall | undefined): unknown { + return toolCall && "args" in toolCall ? toolCall.args : undefined; +} + +function extractToolResult(toolCall: CursorSDKToolCall | undefined): unknown { + return toolCall && "result" in toolCall ? toolCall.result : undefined; +} + +function isSubAgentToolName(name: string): boolean { + return name === "Agent" || name === "Task" || name === "task"; +} + +function formatSubAgentSpanName( + toolCall: CursorSDKToolCall | undefined, + args: unknown, +): string { + const details = (toolCall ?? args) as Record | undefined; + const description = + getString(details, "description") ?? + getString(details, "subagent_type") ?? + getString(details, "type") ?? + getString(details, "name"); + return description ? `Agent: ${description}` : "Agent: sub-agent"; +} + +function getString( + obj: Record | undefined, + key: string, +): string | undefined { + const value = obj?.[key]; + return typeof value === "string" ? value : undefined; +} + +function stringifyUnknown(value: unknown): string { + if (value instanceof Error) { + return value.message; + } + if (typeof value === "string") { + return value; + } + try { + return JSON.stringify(value); + } catch { + return String(value); + } +} + +function safeLog(span: Span, event: Parameters[0]): void { + try { + span.log(event); + } catch (error) { + logInstrumentationError("Cursor SDK span log", error); + } +} + +function logInstrumentationError(context: string, error: unknown): void { + debugLogger.error(`Error processing ${context}:`, error); +} + +function cleanMetrics(metrics: Record): Record { + const cleaned: Record = {}; + for (const [key, value] of Object.entries(metrics)) { + if (value !== undefined && Number.isFinite(value)) { + cleaned[key] = value; + } + } + return cleaned; +} diff --git a/js/src/instrumentation/registry.ts b/js/src/instrumentation/registry.ts index 0d3111db4..8e09f214a 100644 --- a/js/src/instrumentation/registry.ts +++ b/js/src/instrumentation/registry.ts @@ -21,6 +21,8 @@ export interface InstrumentationConfig { google?: boolean; huggingface?: boolean; claudeAgentSDK?: boolean; + cursor?: boolean; + cursorSDK?: boolean; openrouter?: boolean; openrouterAgent?: boolean; mistral?: boolean; @@ -111,6 +113,8 @@ class PluginRegistry { google: true, huggingface: true, claudeAgentSDK: true, + cursor: true, + cursorSDK: true, openrouter: true, openrouterAgent: true, mistral: true, @@ -133,7 +137,11 @@ class PluginRegistry { .filter((s) => s.length > 0); for (const sdk of disabled) { - integrations[sdk] = false; + if (sdk === "cursor-sdk") { + integrations.cursorSDK = false; + } else { + integrations[sdk] = false; + } } } diff --git a/js/src/vendor-sdk-types/cursor-sdk.ts b/js/src/vendor-sdk-types/cursor-sdk.ts new file mode 100644 index 000000000..a8bf1bb70 --- /dev/null +++ b/js/src/vendor-sdk-types/cursor-sdk.ts @@ -0,0 +1,337 @@ +/** + * Vendored types for @cursor/sdk used by Braintrust instrumentation. + * + * Keep this surface intentionally narrow. These types are not exported to SDK + * users and should only cover fields we read, wrap, or log. + */ + +export interface CursorSDKModule { + Agent: CursorSDKAgentClass; + [key: string]: unknown; +} + +export interface CursorSDKAgentClass { + create(options: CursorSDKAgentOptions): Promise; + resume( + agentId: string, + options?: Partial, + ): Promise; + prompt( + message: string | CursorSDKUserMessage, + options?: CursorSDKAgentOptions, + ): Promise; + [key: string]: unknown; +} + +export interface CursorSDKAgent { + readonly agentId?: string; + readonly model?: CursorSDKModelSelection; + send( + message: string | CursorSDKUserMessage, + options?: CursorSDKSendOptions, + ): Promise; + close?: () => void; + reload?: () => Promise; + [Symbol.asyncDispose]?: () => Promise; + [key: string | symbol]: unknown; +} + +export interface CursorSDKAgentOptions { + agentId?: string; + apiKey?: string; + model?: CursorSDKModelSelection; + name?: string; + local?: { + cwd?: string | string[]; + settingSources?: string[]; + sandboxOptions?: { enabled?: boolean }; + }; + cloud?: { + env?: { type?: "cloud" | "pool" | "machine"; name?: string }; + repos?: Array<{ url?: string; startingRef?: string; prUrl?: string }>; + autoCreatePR?: boolean; + workOnCurrentBranch?: boolean; + skipReviewerRequest?: boolean; + }; + mcpServers?: Record; + agents?: Record; + [key: string]: unknown; +} + +export interface CursorSDKAgentDefinition { + description?: string; + prompt?: string; + model?: CursorSDKModelSelection | "inherit"; + mcpServers?: unknown[]; +} + +export interface CursorSDKSendOptions { + model?: CursorSDKModelSelection; + mcpServers?: Record; + onDelta?: (args: { + update: CursorSDKInteractionUpdate; + }) => void | Promise; + onStep?: (args: { step: CursorSDKConversationStep }) => void | Promise; + local?: { + force?: boolean; + }; + [key: string]: unknown; +} + +export interface CursorSDKModelSelection { + id?: string; + params?: Array<{ id?: string; value?: string }>; +} + +export interface CursorSDKUserMessage { + text?: string; + images?: CursorSDKImage[]; + [key: string]: unknown; +} + +export type CursorSDKImage = + | { + url?: string; + dimension?: CursorSDKImageDimension; + [key: string]: unknown; + } + | { + data?: string; + mimeType?: string; + dimension?: CursorSDKImageDimension; + [key: string]: unknown; + }; + +export interface CursorSDKImageDimension { + width?: number; + height?: number; +} + +export type CursorSDKRunStatus = "running" | "finished" | "error" | "cancelled"; + +export type CursorSDKRunOperation = + | "stream" + | "wait" + | "cancel" + | "conversation"; + +export interface CursorSDKRun { + readonly id?: string; + readonly agentId?: string; + readonly status?: CursorSDKRunStatus; + readonly result?: string; + readonly model?: CursorSDKModelSelection; + readonly durationMs?: number; + readonly git?: CursorSDKRunGitInfo; + readonly createdAt?: number; + stream(): AsyncGenerator; + wait(): Promise; + conversation(): Promise; + cancel?: () => Promise; + supports?: (operation: CursorSDKRunOperation) => boolean; + unsupportedReason?: (operation: CursorSDKRunOperation) => string | undefined; + onDidChangeStatus?: ( + listener: (status: CursorSDKRunStatus) => void, + ) => () => void; + [key: string]: unknown; +} + +export interface CursorSDKRunResult { + id?: string; + status?: Exclude; + result?: string; + model?: CursorSDKModelSelection; + durationMs?: number; + git?: CursorSDKRunGitInfo; + [key: string]: unknown; +} + +export interface CursorSDKRunGitInfo { + branches?: Array<{ repoUrl?: string; branch?: string; prUrl?: string }>; +} + +export type CursorSDKMessage = + | CursorSDKSystemMessage + | CursorSDKUserMessageEvent + | CursorSDKAssistantMessage + | CursorSDKThinkingMessage + | CursorSDKToolUseMessage + | CursorSDKStatusMessage + | CursorSDKTaskMessage + | CursorSDKRequestMessage + | { type?: string; [key: string]: unknown }; + +export interface CursorSDKSystemMessage { + type: "system"; + subtype?: "init"; + agent_id?: string; + run_id?: string; + model?: CursorSDKModelSelection; + tools?: string[]; +} + +export interface CursorSDKUserMessageEvent { + type: "user"; + agent_id?: string; + run_id?: string; + message?: { role?: "user"; content?: CursorSDKTextBlock[] }; +} + +export interface CursorSDKAssistantMessage { + type: "assistant"; + agent_id?: string; + run_id?: string; + message?: { + role?: "assistant"; + content?: Array; + }; +} + +export interface CursorSDKThinkingMessage { + type: "thinking"; + agent_id?: string; + run_id?: string; + text?: string; + thinking_duration_ms?: number; +} + +export interface CursorSDKToolUseMessage { + type: "tool_call"; + agent_id?: string; + run_id?: string; + call_id?: string; + name?: string; + status?: "running" | "completed" | "error"; + args?: unknown; + result?: unknown; + truncated?: { args?: boolean; result?: boolean }; +} + +export interface CursorSDKStatusMessage { + type: "status"; + agent_id?: string; + run_id?: string; + status?: string; + message?: string; +} + +export interface CursorSDKTaskMessage { + type: "task"; + agent_id?: string; + run_id?: string; + status?: string; + text?: string; +} + +export interface CursorSDKRequestMessage { + type: "request"; + agent_id?: string; + run_id?: string; + request_id?: string; +} + +export interface CursorSDKTextBlock { + type?: "text"; + text?: string; +} + +export interface CursorSDKToolUseBlock { + type?: "tool_use"; + id?: string; + name?: string; + input?: unknown; +} + +export type CursorSDKInteractionUpdate = + | { + type: "text-delta" | "thinking-delta"; + text?: string; + [key: string]: unknown; + } + | { + type: "thinking-completed"; + thinkingDurationMs?: number; + [key: string]: unknown; + } + | { + type: "tool-call-started" | "partial-tool-call" | "tool-call-completed"; + callId?: string; + toolCall?: CursorSDKToolCall; + modelCallId?: string; + status?: string; + [key: string]: unknown; + } + | { + type: "token-delta"; + tokens?: number; + [key: string]: unknown; + } + | { + type: "turn-ended"; + usage?: CursorSDKUsage; + [key: string]: unknown; + } + | { + type: + | "step-started" + | "step-completed" + | "user-message-appended" + | "summary" + | "summary-started" + | "summary-completed" + | "shell-output-delta"; + [key: string]: unknown; + } + | { type?: string; [key: string]: unknown }; + +export interface CursorSDKUsage { + inputTokens?: number; + outputTokens?: number; + cacheReadTokens?: number; + cacheWriteTokens?: number; +} + +export interface CursorSDKToolCall { + type?: string; + name?: string; + args?: unknown; + result?: unknown; + truncated?: { args?: boolean; result?: boolean }; + status?: "running" | "completed" | "error"; + [key: string]: unknown; +} + +export type CursorSDKConversationTurn = + | { + type?: "agentConversationTurn"; + turn?: { + userMessage?: { text?: string }; + steps?: CursorSDKConversationStep[]; + }; + [key: string]: unknown; + } + | { + type?: "shellConversationTurn"; + turn?: { + shellCommand?: { command?: string; workingDirectory?: string }; + shellOutput?: { stdout?: string; stderr?: string; exitCode?: number }; + }; + [key: string]: unknown; + }; + +export type CursorSDKConversationStep = + | { + type?: "assistantMessage"; + message?: { text?: string }; + [key: string]: unknown; + } + | { + type?: "thinkingMessage"; + message?: { text?: string; thinkingDurationMs?: number }; + [key: string]: unknown; + } + | { + type?: "toolCall"; + message?: CursorSDKToolCall; + [key: string]: unknown; + }; diff --git a/js/src/wrappers/cursor-sdk.test.ts b/js/src/wrappers/cursor-sdk.test.ts new file mode 100644 index 000000000..28ef5a0fe --- /dev/null +++ b/js/src/wrappers/cursor-sdk.test.ts @@ -0,0 +1,145 @@ +import { afterEach, describe, expect, it, vi } from "vitest"; + +const { tracePromise } = vi.hoisted(() => ({ + tracePromise: vi.fn((fn: () => Promise) => fn()), +})); + +vi.mock("../isomorph", () => ({ + default: { + newTracingChannel: vi.fn(() => ({ + subscribe: vi.fn(), + tracePromise, + unsubscribe: vi.fn(), + })), + }, +})); + +import { wrapCursorSDK } from "./cursor-sdk"; + +describe("wrapCursorSDK", () => { + afterEach(() => { + vi.clearAllMocks(); + }); + + it("returns invalid modules unchanged", () => { + const warnSpy = vi.spyOn(console, "warn").mockImplementation(() => {}); + const sdk = { Cursor: {} }; + + expect(wrapCursorSDK(sdk)).toBe(sdk); + expect(warnSpy).toHaveBeenCalledWith( + "Unsupported Cursor SDK. Not wrapping.", + ); + + warnSpy.mockRestore(); + }); + + it("wraps Agent.create and returned agent.send", async () => { + const run = makeRun(); + const agent = { + agentId: "agent-1", + send: vi.fn(async () => run), + }; + const sdk = { + Agent: class { + static async create() { + return agent; + } + }, + }; + + const wrapped = wrapCursorSDK(sdk); + const created = await wrapped.Agent.create({ local: { cwd: "/tmp/repo" } }); + const result = await created.send("hello", { + onDelta: vi.fn(), + onStep: vi.fn(), + }); + + expect(result).toBe(run); + expect(agent.send).toHaveBeenCalledWith("hello", expect.any(Object)); + expect(tracePromise).toHaveBeenCalledTimes(2); + }); + + it("wraps Agent.resume and preserves private-field-safe method binding", async () => { + class PrivateAgent { + #run = makeRun(); + agentId = "agent-2"; + + async send() { + return this.#run; + } + + async [Symbol.asyncDispose]() { + return undefined; + } + } + + const sdk = { + Agent: class { + static async resume() { + return new PrivateAgent(); + } + }, + }; + + const wrapped = wrapCursorSDK(sdk); + const agent = await wrapped.Agent.resume("agent-2"); + + await expect(agent.send("hello")).resolves.toMatchObject({ id: "run-1" }); + await expect(agent[Symbol.asyncDispose]()).resolves.toBeUndefined(); + }); + + it("wraps Agent.prompt", async () => { + const sdk = { + Agent: class { + static async prompt(message: string) { + return { id: "run-1", result: message, status: "finished" }; + } + }, + }; + + const wrapped = wrapCursorSDK(sdk); + await expect(wrapped.Agent.prompt("hello")).resolves.toMatchObject({ + result: "hello", + }); + expect(tracePromise).toHaveBeenCalledTimes(1); + }); + + it("handles module namespace-like objects", async () => { + const Agent = class { + static async prompt() { + return { status: "finished" }; + } + }; + const sdk = Object.defineProperty({}, "Agent", { + configurable: false, + enumerable: true, + value: Agent, + writable: false, + }); + + const wrapped = wrapCursorSDK(sdk as { Agent: typeof Agent }); + + await expect(wrapped.Agent.prompt("hello")).resolves.toMatchObject({ + status: "finished", + }); + }); +}); + +function makeRun() { + return { + agentId: "agent-1", + async conversation() { + return []; + }, + id: "run-1", + stream: async function* () { + yield { + type: "assistant", + message: { content: [{ text: "hello", type: "text" }] }, + }; + }, + async wait() { + return { id: "run-1", result: "hello", status: "finished" }; + }, + }; +} diff --git a/js/src/wrappers/cursor-sdk.ts b/js/src/wrappers/cursor-sdk.ts new file mode 100644 index 000000000..a35d0f090 --- /dev/null +++ b/js/src/wrappers/cursor-sdk.ts @@ -0,0 +1,182 @@ +import { cursorSDKChannels } from "../instrumentation/plugins/cursor-sdk-channels"; +import type { + CursorSDKAgent, + CursorSDKAgentClass, + CursorSDKAgentOptions, + CursorSDKModule, + CursorSDKRunResult, + CursorSDKSendOptions, + CursorSDKUserMessage, +} from "../vendor-sdk-types/cursor-sdk"; + +const WRAPPED_AGENT = Symbol.for("braintrust.cursor-sdk.wrapped-agent"); + +/** + * Wraps the Cursor TypeScript SDK with Braintrust tracing. The wrapper emits + * diagnostics-channel events; the Cursor SDK plugin owns span lifecycle. + */ +export function wrapCursorSDK(sdk: T): T { + if (!sdk || typeof sdk !== "object") { + return sdk; + } + + const maybeSDK = sdk as Record; + if (!maybeSDK.Agent || typeof maybeSDK.Agent !== "function") { + // eslint-disable-next-line no-restricted-properties -- preserving intentional console usage. + console.warn("Unsupported Cursor SDK. Not wrapping."); + return sdk; + } + + const target = isModuleNamespace(sdk) + ? Object.setPrototypeOf({}, sdk) + : (sdk as Record); + + return new Proxy(target, { + get(target, prop, receiver) { + const value = Reflect.get(target, prop, receiver); + if (prop === "Agent" && typeof value === "function") { + return wrapCursorAgentClass(value as unknown as CursorSDKAgentClass); + } + if (typeof value === "function") { + return value.bind(target); + } + return value; + }, + }) as T; +} + +function isModuleNamespace(obj: unknown): boolean { + if (!obj || typeof obj !== "object") { + return false; + } + if (obj.constructor?.name === "Module") { + return true; + } + const keys = Object.keys(obj); + if (keys.length === 0) { + return false; + } + const descriptor = Object.getOwnPropertyDescriptor(obj, keys[0]); + return descriptor ? !descriptor.configurable && !descriptor.writable : false; +} + +function wrapCursorAgentClass(Agent: CursorSDKAgentClass): CursorSDKAgentClass { + const cache = new Map(); + + return new Proxy(Agent, { + get(target, prop, receiver) { + if (cache.has(prop)) { + return cache.get(prop); + } + + const value = Reflect.get(target, prop, receiver); + if (prop === "create" && typeof value === "function") { + const wrapped = async function ( + options: CursorSDKAgentOptions, + ): Promise { + const args = [options] as [CursorSDKAgentOptions]; + return cursorSDKChannels.create.tracePromise( + async () => + wrapCursorAgent(await Reflect.apply(value, target, args)), + { arguments: args } as never, + ); + }; + cache.set(prop, wrapped); + return wrapped; + } + + if (prop === "resume" && typeof value === "function") { + const wrapped = async function ( + agentId: string, + options?: Partial, + ): Promise { + const args = [agentId, options] as [ + string, + Partial | undefined, + ]; + return cursorSDKChannels.resume.tracePromise( + async () => + wrapCursorAgent(await Reflect.apply(value, target, args)), + { arguments: args } as never, + ); + }; + cache.set(prop, wrapped); + return wrapped; + } + + if (prop === "prompt" && typeof value === "function") { + const wrapped = async function ( + message: string | CursorSDKUserMessage, + options?: CursorSDKAgentOptions, + ): Promise { + const args = [message, options] as [ + string | CursorSDKUserMessage, + CursorSDKAgentOptions | undefined, + ]; + return cursorSDKChannels.prompt.tracePromise( + () => Reflect.apply(value, target, args), + { arguments: args } as never, + ); + }; + cache.set(prop, wrapped); + return wrapped; + } + + if (typeof value === "function") { + const bound = value.bind(target); + cache.set(prop, bound); + return bound; + } + + return value; + }, + }) as CursorSDKAgentClass; +} + +function wrapCursorAgent(agent: CursorSDKAgent): CursorSDKAgent { + if (!agent || typeof agent !== "object") { + return agent; + } + if ((agent as Record)[WRAPPED_AGENT]) { + return agent; + } + + const proxy = new Proxy(agent, { + get(target, prop, receiver) { + if (prop === WRAPPED_AGENT) { + return true; + } + + const value = Reflect.get(target, prop, receiver); + if (prop === "send" && typeof value === "function") { + return function ( + message: string | CursorSDKUserMessage, + options?: CursorSDKSendOptions, + ) { + const args = [message, options] as [ + string | CursorSDKUserMessage, + CursorSDKSendOptions | undefined, + ]; + return cursorSDKChannels.send.tracePromise( + () => Reflect.apply(value, target, args), + { + agent: target, + arguments: args, + operation: "send", + } as never, + ); + }; + } + + if (typeof value === "function") { + return value.bind(target); + } + + return value; + }, + }); + + return proxy as CursorSDKAgent; +} + +export type { CursorSDKModule }; diff --git a/turbo.json b/turbo.json index bad5b66ae..809a67834 100644 --- a/turbo.json +++ b/turbo.json @@ -6,6 +6,7 @@ "ANTHROPIC_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENROUTER_API_KEY", "MISTRAL_API_KEY", @@ -27,6 +28,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", @@ -46,6 +48,7 @@ "BRAINTRUST_E2E_RUN_CONTEXT_DIR", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", @@ -70,6 +73,7 @@ "BRAINTRUST_E2E_RUN_CONTEXT_DIR", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", @@ -89,6 +93,7 @@ "BRAINTRUST_E2E_RUN_CONTEXT_DIR", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENAI_BASE_URL", @@ -122,6 +127,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", @@ -137,6 +143,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", @@ -152,6 +159,7 @@ "BRAINTRUST_API_KEY", "GEMINI_API_KEY", "COHERE_API_KEY", + "CURSOR_API_KEY", "GROQ_API_KEY", "OPENAI_API_KEY", "OPENROUTER_API_KEY", From 4a1c2293b301c96e9994938517ecfbdda0501088 Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Mon, 4 May 2026 21:19:38 +0000 Subject: [PATCH 26/26] chore: Prepare changelog --- .changeset/chilly-sites-greet.md | 5 --- .changeset/cold-geese-move.md | 5 --- .changeset/eager-jobs-hammer.md | 5 --- .changeset/fresh-crabs-dream.md | 5 --- .changeset/fuzzy-evals-warn.md | 5 --- .changeset/goofy-hotels-care.md | 69 ------------------------------- .changeset/honest-mails-show.md | 5 --- .changeset/lemon-regions-shine.md | 5 --- .changeset/short-moles-punch.md | 5 --- .changeset/sunny-cities-dream.md | 5 --- .changeset/tall-nights-care.md | 5 --- .changeset/yellow-crabs-attend.md | 5 --- js/CHANGELOG.md | 20 +++++++++ js/package.json | 2 +- 14 files changed, 21 insertions(+), 125 deletions(-) delete mode 100644 .changeset/chilly-sites-greet.md delete mode 100644 .changeset/cold-geese-move.md delete mode 100644 .changeset/eager-jobs-hammer.md delete mode 100644 .changeset/fresh-crabs-dream.md delete mode 100644 .changeset/fuzzy-evals-warn.md delete mode 100644 .changeset/goofy-hotels-care.md delete mode 100644 .changeset/honest-mails-show.md delete mode 100644 .changeset/lemon-regions-shine.md delete mode 100644 .changeset/short-moles-punch.md delete mode 100644 .changeset/sunny-cities-dream.md delete mode 100644 .changeset/tall-nights-care.md delete mode 100644 .changeset/yellow-crabs-attend.md diff --git a/.changeset/chilly-sites-greet.md b/.changeset/chilly-sites-greet.md deleted file mode 100644 index 0b766015b..000000000 --- a/.changeset/chilly-sites-greet.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -fix(auto-instrumentation): Skip over file transforms in bundler plugins when id is undefined diff --git a/.changeset/cold-geese-move.md b/.changeset/cold-geese-move.md deleted file mode 100644 index 77496c322..000000000 --- a/.changeset/cold-geese-move.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -fix: Fix export map for bundler plugins diff --git a/.changeset/eager-jobs-hammer.md b/.changeset/eager-jobs-hammer.md deleted file mode 100644 index 7dec269f8..000000000 --- a/.changeset/eager-jobs-hammer.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -feat: Bump google ADK patching range to include new major `1.0.0` diff --git a/.changeset/fresh-crabs-dream.md b/.changeset/fresh-crabs-dream.md deleted file mode 100644 index 4f35ee20e..000000000 --- a/.changeset/fresh-crabs-dream.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -feat: Add instrumentation for groq-sdk diff --git a/.changeset/fuzzy-evals-warn.md b/.changeset/fuzzy-evals-warn.md deleted file mode 100644 index 5000a0551..000000000 --- a/.changeset/fuzzy-evals-warn.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -fix: Correct the eval file extension shown in CLI directory warnings diff --git a/.changeset/goofy-hotels-care.md b/.changeset/goofy-hotels-care.md deleted file mode 100644 index ede100873..000000000 --- a/.changeset/goofy-hotels-care.md +++ /dev/null @@ -1,69 +0,0 @@ ---- -"braintrust": minor ---- - -Add dataset versioning support to `init()`, `initDataset()`, and dataset objects. - -You can now pin dataset reads and experiment registration by explicit version, snapshot name, or environment tag: - -```ts -import { init, initDataset } from "braintrust"; - -const datasetByVersion = initDataset({ - project: "support-bot", - dataset: "production-cases", - version: "1234567890123456", -}); - -const datasetBySnapshot = initDataset({ - project: "support-bot", - dataset: "production-cases", - snapshotName: "baseline", -}); - -const datasetByEnvironment = initDataset({ - project: "support-bot", - dataset: "production-cases", - environment: "production", -}); - -init({ - project: "support-bot", - experiment: "baseline-eval", - dataset: { - id: "00000000-0000-0000-0000-000000000123", - snapshotName: "baseline", - }, -}); -``` - -Dataset objects now expose snapshot CRUD helpers, plus lookup by snapshot name or xact id: - -```ts -const dataset = initDataset({ - project: "support-bot", - dataset: "production-cases", -}); - -const snapshot = await dataset.createSnapshot({ - name: "baseline", - description: "Before the prompt rollout", -}); - -await dataset.updateSnapshot(snapshot.id, { - name: "baseline-v2", - description: null, -}); - -const snapshots = await dataset.listSnapshots(); -const byName = await dataset.getSnapshot({ - snapshotName: "baseline-v2", -}); -const byXactId = await dataset.getSnapshot({ - xactId: snapshot.xact_id, -}); - -await dataset.deleteSnapshot(snapshot.id); -``` - -`braintrust/dev` now also respects `dataset_version` and `dataset_environment` when resolving datasets for evals, so local eval runs match the pinned dataset selection used by the main SDK. diff --git a/.changeset/honest-mails-show.md b/.changeset/honest-mails-show.md deleted file mode 100644 index ca2cf8bbb..000000000 --- a/.changeset/honest-mails-show.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -feat: Capture thinking with cohere diff --git a/.changeset/lemon-regions-shine.md b/.changeset/lemon-regions-shine.md deleted file mode 100644 index ec49649aa..000000000 --- a/.changeset/lemon-regions-shine.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -fix: Capture reasoning in mistral diff --git a/.changeset/short-moles-punch.md b/.changeset/short-moles-punch.md deleted file mode 100644 index a7a2b2261..000000000 --- a/.changeset/short-moles-punch.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": minor ---- - -feat: Add @cursor/sdk instrumentation diff --git a/.changeset/sunny-cities-dream.md b/.changeset/sunny-cities-dream.md deleted file mode 100644 index 45a22fd0e..000000000 --- a/.changeset/sunny-cities-dream.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -fix(huggingface): Capture streamed tool calls diff --git a/.changeset/tall-nights-care.md b/.changeset/tall-nights-care.md deleted file mode 100644 index 03e6b31f5..000000000 --- a/.changeset/tall-nights-care.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": patch ---- - -fix(claude-agent-sdk): Nest built-in tools under sub-agents diff --git a/.changeset/yellow-crabs-attend.md b/.changeset/yellow-crabs-attend.md deleted file mode 100644 index c36539820..000000000 --- a/.changeset/yellow-crabs-attend.md +++ /dev/null @@ -1,5 +0,0 @@ ---- -"braintrust": minor ---- - -(feat) Add experiment dataset filters to experiment metadata diff --git a/js/CHANGELOG.md b/js/CHANGELOG.md index 0a374e030..c89eed322 100644 --- a/js/CHANGELOG.md +++ b/js/CHANGELOG.md @@ -1,5 +1,25 @@ # braintrust +## 3.10.0 + +### Minor Changes + +- Add dataset versioning support to `init()`, `initDataset()`, and dataset objects. You can now pin dataset reads and experiment registration by explicit version, snapshot name, or environment tag: `ts import { init, initDataset } from "braintrust"; const datasetByVersion = initDataset({ project: "support-bot", dataset: "production-cases", version: "1234567890123456", }); const datasetBySnapshot = initDataset({ project: "support-bot", dataset: "production-cases", snapshotName: "baseline", }); const datasetByEnvironment = initDataset({ project: "support-bot", dataset: "production-cases", environment: "production", }); init({ project: "support-bot", experiment: "baseline-eval", dataset: { id: "00000000-0000-0000-0000-000000000123", snapshotName: "baseline", }, }); ` Dataset objects now expose snapshot CRUD helpers, plus lookup by snapshot name or xact id: `ts const dataset = initDataset({ project: "support-bot", dataset: "production-cases", }); const snapshot = await dataset.createSnapshot({ name: "baseline", description: "Before the prompt rollout", }); await dataset.updateSnapshot(snapshot.id, { name: "baseline-v2", description: null, }); const snapshots = await dataset.listSnapshots(); const byName = await dataset.getSnapshot({ snapshotName: "baseline-v2", }); const byXactId = await dataset.getSnapshot({ xactId: snapshot.xact_id, }); await dataset.deleteSnapshot(snapshot.id); ` `braintrust/dev` now also respects `dataset_version` and `dataset_environment` when resolving datasets for evals, so local eval runs match the pinned dataset selection used by the main SDK. Thanks @max-braintrust! (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1837) +- feat: Add @cursor/sdk instrumentation (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1923) +- (feat) Add experiment dataset filters to experiment metadata Thanks @max-braintrust! (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1898) + +### Patch Changes + +- fix(auto-instrumentation): Skip over file transforms in bundler plugins when id is undefined (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1886) +- fix: Fix export map for bundler plugins (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1870) +- feat: Bump google ADK patching range to include new major `1.0.0` (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1885) +- feat: Add instrumentation for groq-sdk (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1866) +- fix: Correct the eval file extension shown in CLI directory warnings (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1928) +- feat: Capture thinking with cohere (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1861) +- fix: Capture reasoning in mistral (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1863) +- fix(huggingface): Capture streamed tool calls (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1848) +- fix(claude-agent-sdk): Nest built-in tools under sub-agents (https://github.com/braintrustdata/braintrust-sdk-javascript/pull/1881) + ## 3.9.0 ### Notable Changes diff --git a/js/package.json b/js/package.json index f83a7f3ce..75c3ec39e 100644 --- a/js/package.json +++ b/js/package.json @@ -1,6 +1,6 @@ { "name": "braintrust", - "version": "3.9.0", + "version": "3.10.0", "description": "SDK for integrating Braintrust", "repository": { "type": "git",