Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
27 commits
Select commit Hold shift + click to select a range
5f87e25
chore: Sync release branch to main (a6d62c4832d5) (#1875)
github-actions[bot] Apr 20, 2026
a25315c
fix: Fix export map for bundler plugins (#1870)
lforst Apr 21, 2026
2eb3287
test: Don't use discontinued models (#1873)
lforst Apr 21, 2026
798763e
feat: Capture thinking with cohere (#1861)
lforst Apr 21, 2026
9ee6400
fix(huggingface): Capture streamed tool calls (#1848)
lforst Apr 21, 2026
bff137a
chore: Fix PR linking in changelog generation (#1880)
lforst Apr 21, 2026
5cdb71f
chore: Stop using discontinued gemini model (#1882)
lforst Apr 22, 2026
1c7e828
feat: Bump google ADK patching range to include new major `1.0.0` (#1…
lforst Apr 22, 2026
08f5c74
fix(claude-agent-sdk): Nest built-in tools under sub-agents (#1881)
lforst Apr 22, 2026
968f534
feat: Add instrumentation for `groq-sdk` (#1866)
lforst Apr 22, 2026
a898cdd
fix(auto-instrumentation): Skip over file transforms in bundler plugi…
lforst Apr 22, 2026
3500ec2
feat: dataset versioning (#1837)
max-braintrust Apr 23, 2026
fbd8d21
fix: Capture reasoning in mistral (#1863)
lforst Apr 24, 2026
1cb8b43
Fix clean command (#1905)
manugoyal Apr 24, 2026
7bf8034
feat: Add experiment dataset filter to experiment metadata (#1898)
max-braintrust Apr 24, 2026
9be62fc
Rm eval status page (#1908)
ankrgyl Apr 25, 2026
714ee22
chore: Bump `@braintrust/browser` to `0.0.3` (#1913)
lforst Apr 28, 2026
f6426a1
chore: generated SDK types (#1910)
braintrust-bot[bot] Apr 28, 2026
732d1a1
deps: Add deno to `mise.toml` (#1925)
lforst Apr 30, 2026
dc24f2f
ci: Remove api compatibility check from required checks (#1926)
lforst Apr 30, 2026
eb22414
fix(cli): Use correct filename for eval.js (#1928)
AbhiPrasad Apr 30, 2026
9d347e9
chore: Fix huggingface canary snapshot drift (#1924)
lforst May 4, 2026
94ed6b3
chore: Make anthropic test less brittle (#1936)
lforst May 4, 2026
406a4b5
ci: Set reporters to default (#1937)
lforst May 4, 2026
0a2e911
feat: Add `@cursor/sdk` instrumentation (#1923)
lforst May 4, 2026
4a1c229
chore: Prepare changelog
github-actions[bot] May 4, 2026
a8b1850
Merge branch 'release' into prepare-release/0a2e9115c308
lforst May 4, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .agents/skills/e2e-tests/SKILL.md
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ Try not to use specific test narrowing commands unless hunting down a very nasty
- Keep reusable logic in `e2e/helpers/`. Keep one-off fixtures and scenario-specific files inside the scenario directory.
- Snapshot stable contracts, not raw noise. Use `normalizeForSnapshot(...)` before inline snapshots and `formatJsonFileSnapshot(...)` plus file snapshots for larger payloads or version matrices.
- When a scenario family already has `assertions.ts`, keep version- or provider-specific test setup in `scenario.test.ts` and reuse the shared assertions file.
- Keep the CI e2e summary up to date. If a scenario version matrix or `variantKey` changes, update `e2e/config/pr-comment-scenarios.json` in the same change and follow the established pattern used by other versioned scenarios: one summary row per version, not separate wrapped/auto rows unless that pattern already exists for the scenario family.
- Run new or updated scenarios three times in a row before considering snapshots stable.

## Scenario Patterns
Expand Down
2 changes: 2 additions & 0 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ BRAINTRUST_API_KEY=
OPENAI_API_KEY=
ANTHROPIC_API_KEY=
GEMINI_API_KEY=
CURSOR_API_KEY=
OPENROUTER_API_KEY=
MISTRAL_API_KEY=
HUGGINGFACE_API_KEY=
COHERE_API_KEY=
GROQ_API_KEY=
2 changes: 0 additions & 2 deletions .github/workflows/checks.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -459,7 +459,6 @@ jobs:
- js-test
- js-build
- e2e-hermetic
- js-api-compatibility
- js-smoke-discover
- js-smoke-test
- temporal-js
Expand Down Expand Up @@ -492,7 +491,6 @@ jobs:
check_result "js-test" "${{ needs.js-test.result }}"
check_result "js-build" "${{ needs.js-build.result }}"
check_result "e2e-hermetic" "${{ needs.e2e-hermetic.result }}"
check_result "js-api-compatibility" "${{ needs.js-api-compatibility.result }}"
check_result "js-smoke-discover" "${{ needs.js-smoke-discover.result }}"
check_result "js-smoke-test" "${{ needs.js-smoke-test.result }}"
check_result "temporal-js" "${{ needs.temporal-js.result }}"
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/e2e-canary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ jobs:
BRAINTRUST_E2E_PROJECT_NAME: ${{ vars.BRAINTRUST_E2E_PROJECT_NAME }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
Expand Down
5 changes: 5 additions & 0 deletions .github/workflows/integration-tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
Expand Down Expand Up @@ -58,6 +59,8 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
Expand Down Expand Up @@ -109,6 +112,8 @@ jobs:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
Expand Down
2 changes: 2 additions & 0 deletions .github/workflows/prepare-js-release.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,8 @@ jobs:
echo "release_branch=$release_branch"
} >> "$GITHUB_OUTPUT"
- name: Run changeset version
env:
GITHUB_TOKEN: ${{ github.token }}
run: pnpm exec changeset version
- name: Create release commit
run: |
Expand Down
2 changes: 2 additions & 0 deletions e2e/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -115,10 +115,12 @@ Non-hermetic scenarios require provider credentials in addition to the mock Brai
- `OPENAI_API_KEY`
- `ANTHROPIC_API_KEY`
- `GEMINI_API_KEY` or `GOOGLE_API_KEY`
- `CURSOR_API_KEY`
- `OPENROUTER_API_KEY`
- `MISTRAL_API_KEY`
- `HUGGINGFACE_API_KEY`
- `COHERE_API_KEY`
- `GROQ_API_KEY`

`claude-agent-sdk-instrumentation` also uses `ANTHROPIC_API_KEY`, because it runs the real Claude Agent SDK against Anthropic in the same style as the existing live Anthropic wrapper coverage.

Expand Down
20 changes: 19 additions & 1 deletion e2e/config/pr-comment-scenarios.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@
"scenarioDirName": "google-adk-instrumentation",
"label": "Google ADK Instrumentation",
"metadataScenario": "google-adk-instrumentation",
"variants": [{ "variantKey": "google-adk-v061", "label": "v0.6.1" }]
"variants": [
{ "variantKey": "google-adk-v061", "label": "v0.6.1" },
{ "variantKey": "google-adk-v1000", "label": "v1.0.0" }
]
},
{
"scenarioDirName": "google-genai-instrumentation",
Expand All @@ -39,6 +42,15 @@
{ "variantKey": "google-genai-v1460", "label": "v1.46.0" }
]
},
{
"scenarioDirName": "groq-instrumentation",
"label": "Groq Instrumentation",
"metadataScenario": "groq-instrumentation",
"variants": [
{ "variantKey": "groq-v1-wrapped", "label": "Wrapped" },
{ "variantKey": "groq-v1-auto", "label": "Auto-hook" }
]
},
{
"scenarioDirName": "huggingface-instrumentation",
"label": "HuggingFace Instrumentation",
Expand Down Expand Up @@ -121,5 +133,11 @@
"label": "v0.2.81"
}
]
},
{
"scenarioDirName": "cursor-sdk-instrumentation",
"label": "Cursor SDK Instrumentation",
"metadataScenario": "cursor-sdk-instrumentation",
"variants": [{ "variantKey": "cursor-sdk-v1", "label": "v1" }]
}
]
38 changes: 6 additions & 32 deletions e2e/helpers/scenario-installer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,11 @@ const INSTALL_SECRET_ENV_VARS = [
"ANTHROPIC_API_KEY",
"BRAINTRUST_API_KEY",
"COHERE_API_KEY",
"CURSOR_API_KEY",
"GEMINI_API_KEY",
"GITHUB_TOKEN",
"GH_TOKEN",
"GROQ_API_KEY",
"HUGGINGFACE_API_KEY",
"OPENAI_API_KEY",
"OPENROUTER_API_KEY",
Expand All @@ -36,10 +38,9 @@ let cleanupRegistered = false;

type CanaryDependencyRule = {
packageName: string;
query: string;
version: string;
};

const canaryVersionCache = new Map<string, string>();
const HELPERS_DIR = path.dirname(fileURLToPath(import.meta.url));
const E2E_ROOT = path.resolve(HELPERS_DIR, "..");

Expand Down Expand Up @@ -154,32 +155,6 @@ function packageSpecifier(
: `npm:${packageName}@${version}`;
}

async function resolveCanaryVersion(
rule: CanaryDependencyRule,
): Promise<string> {
const cacheKey = rule.query;
const cached = canaryVersionCache.get(cacheKey);
if (cached) {
return cached;
}

const output = await spawnOrThrow(
PNPM_COMMAND,
["view", rule.query, "version", "--json"],
process.cwd(),
installEnv(),
);
const parsed = JSON.parse(output) as string | string[];
const version = Array.isArray(parsed) ? parsed.at(-1) : parsed;

if (typeof version !== "string") {
throw new Error(`Could not resolve canary version for ${rule.query}`);
}

canaryVersionCache.set(cacheKey, version);
return version;
}

function parseCanaryDependencyRule(
dependencyName: string,
rawRule: string,
Expand All @@ -194,7 +169,7 @@ function parseCanaryDependencyRule(
if (rawRule === "latest") {
return {
packageName: dependencyName,
query: dependencyName,
version: "latest",
};
}

Expand All @@ -207,7 +182,7 @@ function parseCanaryDependencyRule(

return {
packageName: rawRule.slice(0, versionSeparator),
query: rawRule,
version: rawRule.slice(versionSeparator + 1),
};
}

Expand All @@ -229,11 +204,10 @@ async function rewriteManifestForCanary(scenarioDir: string): Promise<void> {
rawRule,
scenarioDir,
);
const version = await resolveCanaryVersion(rule);
dependencies[dependencyName] = packageSpecifier(
dependencyName,
rule.packageName,
version,
rule.version,
);
updated = true;
}
Expand Down
2 changes: 1 addition & 1 deletion e2e/scenarios/ai-sdk-instrumentation/scenario.impl.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,7 @@ async function runAISDKInstrumentationScenario(
) {
const instrumentedAI = decorateAI ? decorateAI(options.ai) : options.ai;
const openaiModel = options.openai("gpt-4o-mini-2024-07-18");
const anthropicModel = options.anthropic?.("claude-3-haiku-20240307");
const anthropicModel = options.anthropic?.("claude-haiku-4-5");
const openaiEmbeddingModel = options.openai.textEmbeddingModel(
"text-embedding-3-small",
);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,26 +29,26 @@
}
],
"metadata": {
"model": "claude-3-haiku-20240307",
"model": "claude-haiku-4-5",
"provider": "anthropic",
"stop_reason": "end_turn",
"stop_sequence": null
},
"metrics": {
"completion_tokens": 5,
"completion_tokens": 4,
"end": 0,
"prompt_cache_creation_tokens": 0,
"prompt_cached_tokens": 0,
"prompt_tokens": 12,
"start": 0,
"time_to_first_token": 0,
"tokens": 17
"tokens": 16
},
"name": "anthropic.messages.create",
"output": {
"content": [
{
"text": "OK.",
"text": "OK",
"type": "text"
}
],
Expand Down Expand Up @@ -93,20 +93,20 @@
}
],
"metadata": {
"model": "claude-3-haiku-20240307",
"model": "claude-haiku-4-5",
"provider": "anthropic",
"stop_reason": "end_turn",
"stop_sequence": null
},
"metrics": {
"completion_tokens": 29,
"completion_tokens": "<number>",
"end": 0,
"prompt_cache_creation_tokens": 0,
"prompt_cached_tokens": 0,
"prompt_tokens": 1389,
"start": 0,
"time_to_first_token": 0,
"tokens": 1418
"tokens": "<number>"
},
"name": "anthropic.messages.create",
"output": {
Expand Down Expand Up @@ -139,23 +139,23 @@
}
],
"metadata": {
"model": "claude-3-haiku-20240307",
"model": "claude-haiku-4-5",
"provider": "anthropic",
"stop_reason": "end_turn",
"stop_sequence": null
},
"metrics": {
"completion_tokens": 18,
"completion_tokens": 15,
"end": 0,
"prompt_cache_creation_tokens": 0,
"prompt_cached_tokens": 0,
"prompt_tokens": 24,
"start": 0,
"time_to_first_token": 0,
"tokens": 42
"tokens": 39
},
"name": "anthropic.messages.create",
"output": "1 - one\n2 - two\n3 - three",
"output": "1 one\n2 two\n3 three",
"type": "llm"
},
{
Expand All @@ -177,23 +177,23 @@
}
],
"metadata": {
"model": "claude-3-haiku-20240307",
"model": "claude-haiku-4-5",
"provider": "anthropic",
"stop_reason": "end_turn",
"stop_sequence": null
},
"metrics": {
"completion_tokens": 18,
"completion_tokens": 15,
"end": 0,
"prompt_cache_creation_tokens": 0,
"prompt_cached_tokens": 0,
"prompt_tokens": 24,
"start": 0,
"time_to_first_token": 0,
"tokens": 42
"tokens": 39
},
"name": "anthropic.messages.create",
"output": "1 - one\n2 - two\n3 - three",
"output": "1 one\n2 two\n3 three",
"type": "llm"
},
{
Expand All @@ -215,7 +215,7 @@
}
],
"metadata": {
"model": "claude-3-haiku-20240307",
"model": "claude-haiku-4-5",
"provider": "anthropic",
"stop_reason": "tool_use",
"stop_sequence": null
Expand All @@ -225,10 +225,10 @@
"end": 0,
"prompt_cache_creation_tokens": 0,
"prompt_cached_tokens": 0,
"prompt_tokens": 454,
"prompt_tokens": 687,
"start": 0,
"time_to_first_token": 0,
"tokens": 480
"tokens": 713
},
"name": "anthropic.messages.create",
"output": {
Expand Down Expand Up @@ -265,20 +265,20 @@
}
],
"metadata": {
"model": "claude-3-haiku-20240307",
"model": "claude-haiku-4-5",
"provider": "anthropic",
"stop_reason": "tool_use",
"stop_sequence": null
},
"metrics": {
"completion_tokens": 55,
"completion_tokens": 56,
"end": 0,
"prompt_cache_creation_tokens": 0,
"prompt_cached_tokens": 0,
"prompt_tokens": 357,
"prompt_tokens": 589,
"start": 0,
"time_to_first_token": 0,
"tokens": 412
"tokens": 645
},
"name": "anthropic.messages.create",
"output": {
Expand Down
Loading
Loading