Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/tall-nights-care.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"braintrust": patch
---

fix(claude-agent-sdk): Nest built-in tools under sub-agents
Original file line number Diff line number Diff line change
Expand Up @@ -291,7 +291,7 @@
"metric_keys": [],
"name": "Claude Agent",
"root_span_id": "<span:1>",
"span_id": "<span:20>",
"span_id": "<span:16>",
"span_parents": [
"<span:19>"
],
Expand All @@ -307,7 +307,7 @@
"metric_keys": [],
"name": "tool: calculator/calculator",
"root_span_id": "<span:1>",
"span_id": "<span:21>",
"span_id": "<span:20>",
"span_parents": [
"<span:17>"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@
"metric_keys": [],
"name": "Claude Agent",
"root_span_id": "<span:1>",
"span_id": "<span:20>",
"span_id": "<span:16>",
"span_parents": [
"<span:19>"
],
Expand All @@ -309,7 +309,7 @@
"metric_keys": [],
"name": "tool: calculator/calculator",
"root_span_id": "<span:1>",
"span_id": "<span:21>",
"span_id": "<span:20>",
"span_parents": [
"<span:17>"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@
"metric_keys": [],
"name": "Claude Agent",
"root_span_id": "<span:1>",
"span_id": "<span:20>",
"span_id": "<span:16>",
"span_parents": [
"<span:19>"
],
Expand All @@ -309,7 +309,7 @@
"metric_keys": [],
"name": "tool: calculator/calculator",
"root_span_id": "<span:1>",
"span_id": "<span:21>",
"span_id": "<span:20>",
"span_parents": [
"<span:17>"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -293,7 +293,7 @@
"metric_keys": [],
"name": "Claude Agent",
"root_span_id": "<span:1>",
"span_id": "<span:20>",
"span_id": "<span:16>",
"span_parents": [
"<span:19>"
],
Expand All @@ -309,7 +309,7 @@
"metric_keys": [],
"name": "tool: calculator/calculator",
"root_span_id": "<span:1>",
"span_id": "<span:21>",
"span_id": "<span:20>",
"span_parents": [
"<span:17>"
],
Expand Down
166 changes: 156 additions & 10 deletions e2e/scenarios/claude-agent-sdk-instrumentation/assertions.ts
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,38 @@ function findSpanById(
return events.find((event) => event.span.id === spanId);
}

function isDescendantOf(
events: CapturedLogEvent[],
event: CapturedLogEvent | undefined,
ancestorId: string | undefined,
): boolean {
if (!event || !ancestorId) {
return false;
}

const spanById = new Map(events.map((span) => [span.span.id, span] as const));
const queue = [...event.span.parentIds];
const visited = new Set<string>();

while (queue.length > 0) {
const parentId = queue.shift();
if (!parentId || visited.has(parentId)) {
continue;
}
if (parentId === ancestorId) {
return true;
}

visited.add(parentId);
const parentSpan = spanById.get(parentId);
if (parentSpan) {
queue.push(...parentSpan.span.parentIds);
}
}

return false;
}

function hasSubAgentHandoffToolName(
event: CapturedLogEvent | undefined,
): boolean {
Expand All @@ -191,10 +223,13 @@ function hasSubAgentHandoffToolName(

function findSubAgentTaskSpan(
events: CapturedLogEvent[],
ancestorId?: string,
): CapturedLogEvent | undefined {
return events.find(
(event) =>
event.span.type === "task" && event.span.name?.startsWith("Agent:"),
event.span.type === "task" &&
event.span.name?.startsWith("Agent:") &&
(!ancestorId || isDescendantOf(events, event, ancestorId)),
);
}

Expand All @@ -208,6 +243,31 @@ function findSubAgentHandoffTool(
return hasSubAgentHandoffToolName(parentSpan) ? parentSpan : undefined;
}

function findLatestTaskLlmBeforeSpan(
events: CapturedLogEvent[],
taskId: string | undefined,
childStartTime: number | undefined,
): CapturedLogEvent | undefined {
return findChildSpans(events, "anthropic.messages.create", taskId)
.filter((event) => {
if (childStartTime === undefined) {
return true;
}
return (
Number(event.metrics?.start ?? Number.NaN) <= Number(childStartTime)
);
})
.at(-1);
}

function findOperationTaskRoot(
events: CapturedLogEvent[],
operationName: string,
): CapturedLogEvent | undefined {
const operation = findLatestSpan(events, operationName);
return findChildSpans(events, "Claude Agent", operation?.span.id).at(-1);
}

function buildSpanSummary(events: CapturedLogEvent[]): Json {
const root = findLatestSpan(events, ROOT_NAME);
const basicOperation = findLatestSpan(events, "claude-agent-basic-operation");
Expand Down Expand Up @@ -258,7 +318,7 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
const input = event.input as Array<{ content?: string }> | undefined;
return Array.isArray(input) && input.some((item) => item.content);
});
const subAgentTask = findSubAgentTaskSpan(events);
const subAgentTask = findSubAgentTaskSpan(events, subAgentTaskRoot?.span.id);
const subAgentLlm = findChildSpans(
events,
"anthropic.messages.create",
Expand All @@ -274,9 +334,16 @@ function buildSpanSummary(events: CapturedLogEvent[]): Json {
const basicTool =
findToolSpanByLocalHandler(events, "calculator-local-handler-multiply") ??
findToolSpanByOperation(events, "multiply");
const subAgentTool =
const subAgentToolCandidate =
findToolSpanByLocalHandler(events, "calculator-local-handler-add") ??
findToolSpanByOperation(events, "add");
const subAgentTool = isDescendantOf(
events,
subAgentToolCandidate,
subAgentTask?.span.id,
)
? subAgentToolCandidate
: undefined;
const failureTool =
findToolSpanByLocalHandler(events, "calculator-local-handler-divide") ??
findToolSpanByOperation(events, "divide");
Expand Down Expand Up @@ -453,16 +520,23 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: {
events,
"claude-agent-subagent-operation",
);
const taskRoot = findChildSpans(
const taskRoot = findOperationTaskRoot(
events,
"Claude Agent",
operation?.span.id,
).at(-1);
const llm = findAllSpans(events, "anthropic.messages.create").find(
(event) => event.span.parentIds.includes(taskRoot?.span.id ?? ""),
"claude-agent-subagent-operation",
);
const nestedTask = findSubAgentTaskSpan(events);
const nestedTask = findSubAgentTaskSpan(events, taskRoot?.span.id);
const handoffTool = findSubAgentHandoffTool(events, nestedTask);
const llm = findLatestTaskLlmBeforeSpan(
events,
taskRoot?.span.id,
typeof handoffTool?.metrics?.start === "number"
? handoffTool.metrics.start
: undefined,
);
const handoffToolParent = findSpanById(
events,
handoffTool?.span.parentIds[0],
);
const nestedTaskLlm = findChildSpans(
events,
"anthropic.messages.create",
Expand Down Expand Up @@ -490,6 +564,10 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: {
}
expect(handoffTool).toBeDefined();
expect(hasSubAgentHandoffToolName(handoffTool)).toBe(true);
expect(handoffToolParent?.span.id).toBe(taskRoot?.span.id);
expect(Number(llm?.metrics?.start ?? Number.NaN)).toBeLessThanOrEqual(
Number(handoffTool?.metrics?.start ?? Number.NaN),
);
expect(nestedTask?.span.parentIds).toEqual([handoffTool?.span.id ?? ""]);
expect(nestedTaskLlm).toBeDefined();
expect(nestedTaskLlm?.span.parentIds).toContain(
Expand All @@ -508,6 +586,74 @@ export function defineClaudeAgentSDKInstrumentationAssertions(options: {
}
});

if (options.expectTaskLifecycleDetails) {
test(
"orders built-in Agent and Bash after their llm siblings",
testConfig,
() => {
const operation = findLatestSpan(
events,
"claude-agent-subagent-built-in-tool-operation",
);
const taskRoot = findOperationTaskRoot(
events,
"claude-agent-subagent-built-in-tool-operation",
);
const nestedTask = findSubAgentTaskSpan(events, taskRoot?.span.id);
const handoffTool = findSubAgentHandoffTool(events, nestedTask);
const taskRootLlm = findLatestTaskLlmBeforeSpan(
events,
taskRoot?.span.id,
typeof handoffTool?.metrics?.start === "number"
? handoffTool.metrics.start
: undefined,
);
const handoffToolParent = findSpanById(
events,
handoffTool?.span.parentIds[0],
);
const bashTool = findAllSpans(events, "tool: Bash").find((event) =>
isDescendantOf(events, event, taskRoot?.span.id),
);
const nestedTaskLlm = findLatestTaskLlmBeforeSpan(
events,
nestedTask?.span.id,
typeof bashTool?.metrics?.start === "number"
? bashTool.metrics.start
: undefined,
);
const bashToolParent = findSpanById(
events,
bashTool?.span.parentIds[0],
);

expect(operation).toBeDefined();
expect(taskRoot).toBeDefined();
expect(nestedTask).toBeDefined();
expect(handoffTool).toBeDefined();
expect(handoffToolParent?.span.id).toBe(taskRoot?.span.id);
expect(
Number(taskRootLlm?.metrics?.start ?? Number.NaN),
).toBeLessThanOrEqual(
Number(handoffTool?.metrics?.start ?? Number.NaN),
);
expect(nestedTaskLlm).toBeDefined();
expect(bashTool).toBeDefined();
expect(isDescendantOf(events, bashTool, nestedTask?.span.id)).toBe(
true,
);
expect(bashTool?.span.parentIds).not.toContain(
taskRootLlm?.span.id ?? "",
);
expect(bashToolParent?.span.type).toBe("task");
expect(bashToolParent?.span.id).toBe(nestedTask?.span.id);
expect(
Number(nestedTaskLlm?.metrics?.start ?? Number.NaN),
).toBeLessThanOrEqual(Number(bashTool?.metrics?.start ?? Number.NaN));
},
);
}

test("captures tool failure details", testConfig, () => {
const operation = findLatestSpan(
events,
Expand Down
28 changes: 28 additions & 0 deletions e2e/scenarios/claude-agent-sdk-instrumentation/scenario.impl.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import {
import { z } from "zod";

const CLAUDE_AGENT_MODEL = "claude-haiku-4-5";
const CLAUDE_AGENT_TOP_LEVEL_MODEL = "claude-sonnet-4-5";

export const ROOT_NAME = "claude-agent-sdk-root";
export const SCENARIO_NAME = "claude-agent-sdk-traces";
Expand Down Expand Up @@ -139,6 +140,33 @@ async function runClaudeAgentSDKScenario({ decorateSDK, sdk }) {
},
);

await runOperation(
"claude-agent-subagent-built-in-tool-operation",
"subagent-built-in-tool",
async () => {
await collectAsync(
query({
prompt:
'You MUST call the Agent tool now with subagent_type="echo" and description "echo greeting". Do not call Bash yourself. Do not answer with text yourself; delegate to the echo sub-agent.',
options: {
agents: {
echo: {
description: "Runs one bash echo and reports back.",
model: CLAUDE_AGENT_MODEL,
prompt:
"Run `echo hello` via Bash exactly once, then reply with only the word done.",
tools: ["Bash"],
},
},
allowedTools: ["Agent", "Bash"],
model: CLAUDE_AGENT_TOP_LEVEL_MODEL,
permissionMode: "bypassPermissions",
},
}),
);
},
);

await runOperation(
"claude-agent-failure-operation",
"failure",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ import { defineClaudeAgentSDKInstrumentationAssertions } from "./assertions";
const scenarioDir = await prepareScenarioDir({
scenarioDir: resolveScenarioDir(import.meta.url),
});
const TIMEOUT_MS = 120_000;
const TIMEOUT_MS = 180_000;
const claudeAgentSDKScenarios = await Promise.all(
[
{
Expand Down
Loading
Loading