From 52a53e4122bebb988a1fe9e21b9562b8f8f3e307 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 11 Mar 2026 10:31:46 -0400 Subject: [PATCH 1/5] feat: Improve claude agent sdk spans --- .../wrappers/claude_agent_sdk/__init__.py | 7 - .../wrappers/claude_agent_sdk/_constants.py | 71 ++ .../wrappers/claude_agent_sdk/_wrapper.py | 626 +++++++++++--- ...ubagent_creates_task_span__sdk_0_1_48.json | 696 ++++++++++++++++ .../wrappers/claude_agent_sdk/test_wrapper.py | 783 ++++++++++++++++-- 5 files changed, 2007 insertions(+), 176 deletions(-) create mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/_constants.py create mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py index 870ec0e1..9c45bf7d 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/__init__.py @@ -66,39 +66,32 @@ def setup_claude_agent_sdk( import claude_agent_sdk - # Store original classes before patching original_client = claude_agent_sdk.ClaudeSDKClient if hasattr(claude_agent_sdk, "ClaudeSDKClient") else None original_tool_class = claude_agent_sdk.SdkMcpTool if hasattr(claude_agent_sdk, "SdkMcpTool") else None original_tool_fn = claude_agent_sdk.tool if hasattr(claude_agent_sdk, "tool") else None - # Patch ClaudeSDKClient if original_client: wrapped_client = _create_client_wrapper_class(original_client) claude_agent_sdk.ClaudeSDKClient = wrapped_client - # Update all modules that already imported ClaudeSDKClient for module in list(sys.modules.values()): if module and hasattr(module, "ClaudeSDKClient"): if getattr(module, "ClaudeSDKClient", None) is original_client: setattr(module, "ClaudeSDKClient", wrapped_client) - # Patch SdkMcpTool if original_tool_class: wrapped_tool_class = _create_tool_wrapper_class(original_tool_class) claude_agent_sdk.SdkMcpTool = wrapped_tool_class - # Update all modules that already imported SdkMcpTool for module in list(sys.modules.values()): if module and hasattr(module, "SdkMcpTool"): if getattr(module, "SdkMcpTool", None) is original_tool_class: setattr(module, "SdkMcpTool", wrapped_tool_class) - # Patch tool() decorator if original_tool_fn: wrapped_tool_fn = _wrap_tool_factory(original_tool_fn) claude_agent_sdk.tool = wrapped_tool_fn - # Update all modules that already imported tool for module in list(sys.modules.values()): if module and hasattr(module, "tool"): if getattr(module, "tool", None) is original_tool_fn: diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_constants.py b/py/src/braintrust/wrappers/claude_agent_sdk/_constants.py new file mode 100644 index 00000000..23b3b299 --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_constants.py @@ -0,0 +1,71 @@ +from dataclasses import dataclass +from enum import Enum +from types import MappingProxyType +from typing import Final, Mapping + + +class MessageClassName(str, Enum): + ASSISTANT = "AssistantMessage" + USER = "UserMessage" + RESULT = "ResultMessage" + SYSTEM = "SystemMessage" + TASK_STARTED = "TaskStartedMessage" + TASK_PROGRESS = "TaskProgressMessage" + TASK_NOTIFICATION = "TaskNotificationMessage" + + +class BlockClassName(str, Enum): + TEXT = "TextBlock" + TOOL_USE = "ToolUseBlock" + TOOL_RESULT = "ToolResultBlock" + + +class SerializedContentType(str, Enum): + TEXT = "text" + TOOL_USE = "tool_use" + TOOL_RESULT = "tool_result" + + +@dataclass(frozen=True) +class ToolMetadataKeys: + tool_name: str = "gen_ai.tool.name" + tool_call_id: str = "gen_ai.tool.call.id" + raw_tool_name: str = "raw_tool_name" + operation_name: str = "gen_ai.operation.name" + mcp_method_name: str = "mcp.method.name" + mcp_server: str = "mcp.server" + + +@dataclass(frozen=True) +class MCPToolMetadataValues: + operation_name: str = "execute_tool" + method_name: str = "tools/call" + + +DEFAULT_TOOL_NAME: Final[str] = "unknown" + +CLAUDE_AGENT_TASK_SPAN_NAME: Final[str] = "Claude Agent" +ANTHROPIC_MESSAGES_CREATE_SPAN_NAME: Final[str] = "anthropic.messages.create" + +MCP_TOOL_PREFIX: Final[str] = "mcp__" +MCP_TOOL_NAME_DELIMITER: Final[str] = "__" + +TOOL_METADATA: Final[ToolMetadataKeys] = ToolMetadataKeys() +MCP_TOOL_METADATA: Final[MCPToolMetadataValues] = MCPToolMetadataValues() + +SERIALIZED_CONTENT_TYPE_BY_BLOCK_CLASS: Final[Mapping[str, SerializedContentType]] = MappingProxyType( + { + BlockClassName.TEXT: SerializedContentType.TEXT, + BlockClassName.TOOL_USE: SerializedContentType.TOOL_USE, + BlockClassName.TOOL_RESULT: SerializedContentType.TOOL_RESULT, + } +) + +SYSTEM_MESSAGE_TYPES: Final[frozenset[MessageClassName]] = frozenset( + { + MessageClassName.SYSTEM, + MessageClassName.TASK_STARTED, + MessageClassName.TASK_PROGRESS, + MessageClassName.TASK_NOTIFICATION, + } +) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index 75d6d03e..cb7d4728 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -2,23 +2,169 @@ import logging import threading import time -from collections.abc import AsyncGenerator, AsyncIterable, Callable +from collections.abc import AsyncGenerator, AsyncIterable from typing import Any from braintrust.logger import start_span from braintrust.span_types import SpanTypeAttribute from braintrust.wrappers._anthropic_utils import Wrapper, extract_anthropic_usage, finalize_anthropic_tokens +from braintrust.wrappers.claude_agent_sdk._constants import ( + ANTHROPIC_MESSAGES_CREATE_SPAN_NAME, + CLAUDE_AGENT_TASK_SPAN_NAME, + DEFAULT_TOOL_NAME, + MCP_TOOL_METADATA, + MCP_TOOL_NAME_DELIMITER, + MCP_TOOL_PREFIX, + SERIALIZED_CONTENT_TYPE_BY_BLOCK_CLASS, + SYSTEM_MESSAGE_TYPES, + TOOL_METADATA, + BlockClassName, + MessageClassName, + SerializedContentType, +) log = logging.getLogger(__name__) - -# Thread-local storage to propagate parent span export to tool handlers -# The Claude Agent SDK may execute tools in separate async contexts that don't -# preserve contextvars, so we use threading.local() _thread_local = threading.local() +@dataclasses.dataclass(frozen=True) +class ParsedToolName: + raw_name: str + display_name: str + is_mcp: bool = False + mcp_server: str | None = None + + +@dataclasses.dataclass +class _ActiveToolSpan: + span: Any + raw_name: str + display_name: str + input: Any + handler_active: bool = False + + @property + def has_span(self) -> bool: + return True + + def activate(self) -> None: + self.handler_active = True + self.span.set_current() + + def log_error(self, exc: Exception) -> None: + self.span.log(error=str(exc)) + + def release(self) -> None: + if not self.handler_active: + return + + self.handler_active = False + self.span.unset_current() + + +class _NoopActiveToolSpan: + @property + def has_span(self) -> bool: + return False + + def log_error(self, exc: Exception) -> None: + del exc + + def release(self) -> None: + return + + +_NOOP_ACTIVE_TOOL_SPAN = _NoopActiveToolSpan() + + +def _log_tracing_warning(exc: Exception) -> None: + log.warning("Error in tracing code", exc_info=exc) + +def _parse_tool_name(tool_name: Any) -> ParsedToolName: + raw_name = str(tool_name) if tool_name is not None else DEFAULT_TOOL_NAME + + if not raw_name.startswith(MCP_TOOL_PREFIX): + return ParsedToolName(raw_name=raw_name, display_name=raw_name) + + remainder = raw_name[len(MCP_TOOL_PREFIX) :] + if not remainder: + return ParsedToolName(raw_name=raw_name, display_name=raw_name) + + server_and_tool = remainder.rsplit(MCP_TOOL_NAME_DELIMITER, 1) + if len(server_and_tool) != 2: + return ParsedToolName(raw_name=raw_name, display_name=raw_name) + + server_name, tool_display_name = server_and_tool + if not server_name or not tool_display_name: + return ParsedToolName(raw_name=raw_name, display_name=raw_name) + + return ParsedToolName( + raw_name=raw_name, + display_name=tool_display_name, + is_mcp=True, + mcp_server=server_name, + ) + + +def _serialize_tool_result_content(content: Any) -> Any: + if dataclasses.is_dataclass(content): + serialized_content = _serialize_content_blocks([content]) + return serialized_content[0] if serialized_content else None + + if not isinstance(content, list): + return content + + serialized_content = _serialize_content_blocks(content) + if ( + isinstance(serialized_content, list) + and len(serialized_content) == 1 + and isinstance(serialized_content[0], dict) + and serialized_content[0].get("type") == SerializedContentType.TEXT + and SerializedContentType.TEXT in serialized_content[0] + ): + return serialized_content[0][SerializedContentType.TEXT] + + return serialized_content + + +def _serialize_tool_result_output(tool_result_block: Any) -> dict[str, Any]: + output = {"content": _serialize_tool_result_content(getattr(tool_result_block, "content", None))} + + if getattr(tool_result_block, "is_error", None) is True: + output["is_error"] = True + + return output + +def _serialize_system_message(message: Any) -> dict[str, Any]: + serialized = {"subtype": getattr(message, "subtype", None)} + + for field_name in ( + "task_id", + "description", + "uuid", + "session_id", + "tool_use_id", + "task_type", + "status", + "output_file", + "summary", + "last_tool_name", + "usage", + ): + value = getattr(message, field_name, None) + if value is not None: + serialized[field_name] = value + + if len(serialized) == 1: + data = getattr(message, "data", None) + if data: + serialized["data"] = data + + return serialized + + def _create_tool_wrapper_class(original_tool_class: Any) -> Any: - """Creates a wrapper class for SdkMcpTool that wraps handlers.""" + """Creates a wrapper class for SdkMcpTool that re-enters active TOOL spans.""" class WrappedSdkMcpTool(original_tool_class): # type: ignore[valid-type,misc] def __init__( @@ -32,32 +178,24 @@ def __init__( wrapped_handler = _wrap_tool_handler(handler, name) super().__init__(name, description, input_schema, wrapped_handler, **kwargs) # type: ignore[call-arg] - # Preserve generic typing support __class_getitem__ = classmethod(lambda cls, params: cls) # type: ignore[assignment] return WrappedSdkMcpTool -def _wrap_tool_factory(tool_fn: Any) -> Callable[..., Any]: - """Wraps the tool() factory function to return wrapped tools.""" +def _wrap_tool_factory(tool_fn: Any) -> Any: + """Wrap the tool() factory so decorated handlers inherit the active TOOL span.""" def wrapped_tool(*args: Any, **kwargs: Any) -> Any: result = tool_fn(*args, **kwargs) - - # The tool() function returns a decorator, not a tool definition - # We need to wrap the decorator to intercept the final tool definition if not callable(result): return result def wrapped_decorator(handler_fn: Any) -> Any: tool_def = result(handler_fn) - - # Now we have the actual tool definition, wrap its handler if tool_def and hasattr(tool_def, "handler"): - tool_name = getattr(tool_def, "name", "unknown") - original_handler = tool_def.handler - tool_def.handler = _wrap_tool_handler(original_handler, tool_name) - + tool_name = getattr(tool_def, "name", DEFAULT_TOOL_NAME) + tool_def.handler = _wrap_tool_handler(tool_def.handler, tool_name) return tool_def return wrapped_decorator @@ -65,91 +203,329 @@ def wrapped_decorator(handler_fn: Any) -> Any: return wrapped_tool -def _wrap_tool_handler(handler: Any, tool_name: Any) -> Callable[..., Any]: - """Wraps a tool handler to add tracing. - - Uses start_span context manager which automatically: - - Handles exceptions and logs them to the span - - Sets the span as current for nested operations - - Nests under the parent span (TASK span) via the parent parameter - - The Claude Agent SDK may execute tool handlers in a separate async context, - so we try the context variable first, then fall back to current_span export. - """ - # Check if already wrapped to prevent double-wrapping +def _wrap_tool_handler(handler: Any, tool_name: Any) -> Any: + """Wrap a tool handler so nested spans execute under the stream-based TOOL span.""" if hasattr(handler, "_braintrust_wrapped"): return handler async def wrapped_handler(args: Any) -> Any: - # Get parent span export from thread-local storage - parent_export = getattr(_thread_local, "parent_span_export", None) - - with start_span( - name=str(tool_name), - span_attributes={"type": SpanTypeAttribute.TOOL}, - input=args, - parent=parent_export, - ) as span: - result = await handler(args) - span.log(output=result) - return result + active_tool_span = _activate_tool_span_for_handler(tool_name, args) + if not active_tool_span.has_span: + with start_span( + name=str(tool_name), + span_attributes={"type": SpanTypeAttribute.TOOL}, + input=args, + ) as span: + result = await handler(args) + span.log(output=result) + return result + + try: + return await handler(args) + except Exception as exc: + active_tool_span.log_error(exc) + raise + finally: + active_tool_span.release() - # Mark as wrapped to prevent double-wrapping wrapped_handler._braintrust_wrapped = True # type: ignore[attr-defined] return wrapped_handler -def _create_client_wrapper_class(original_client_class: Any) -> Any: - """Creates a wrapper class for ClaudeSDKClient that wraps query and receive_response.""" +class ToolSpanTracker: + def __init__(self): + self._active_spans: dict[str, _ActiveToolSpan] = {} + + def start_tool_spans(self, message: Any, llm_span_export: str | None) -> None: + if llm_span_export is None or not hasattr(message, "content"): + return + + for block in message.content: + if type(block).__name__ != BlockClassName.TOOL_USE: + continue + + tool_use_id = getattr(block, "id", None) + if not tool_use_id: + continue + + tool_use_id = str(tool_use_id) + if tool_use_id in self._active_spans: + self._end_tool_span(tool_use_id) + + parsed_tool_name = _parse_tool_name(getattr(block, "name", None)) + metadata = { + TOOL_METADATA.tool_name: parsed_tool_name.display_name, + TOOL_METADATA.tool_call_id: tool_use_id, + } + if parsed_tool_name.raw_name != parsed_tool_name.display_name: + metadata[TOOL_METADATA.raw_tool_name] = parsed_tool_name.raw_name + if parsed_tool_name.is_mcp: + metadata[TOOL_METADATA.operation_name] = MCP_TOOL_METADATA.operation_name + metadata[TOOL_METADATA.mcp_method_name] = MCP_TOOL_METADATA.method_name + if parsed_tool_name.mcp_server: + metadata[TOOL_METADATA.mcp_server] = parsed_tool_name.mcp_server + + tool_span = start_span( + name=parsed_tool_name.display_name, + span_attributes={"type": SpanTypeAttribute.TOOL}, + input=getattr(block, "input", None), + metadata=metadata, + parent=llm_span_export, + ) + self._active_spans[tool_use_id] = _ActiveToolSpan( + span=tool_span, + raw_name=parsed_tool_name.raw_name, + display_name=parsed_tool_name.display_name, + input=getattr(block, "input", None), + ) - class LLMSpanTracker: - """Manages LLM span lifecycle for Claude Agent SDK message streams. + def finish_tool_spans(self, message: Any) -> None: + if not hasattr(message, "content"): + return - Message flow per turn: - 1. UserMessage (tool results) → mark the time when next LLM will start - 2. AssistantMessage - LLM response arrives → create span with the marked start time, ending previous span - 3. ResultMessage - usage metrics → log to span + for block in message.content: + if type(block).__name__ != BlockClassName.TOOL_RESULT: + continue - We end the previous span when the next AssistantMessage arrives, using the marked - start time to ensure sequential timing (no overlapping LLM spans). - """ + tool_use_id = getattr(block, "tool_use_id", None) + if tool_use_id is None: + continue - def __init__(self, query_start_time: float | None = None): - self.current_span: Any | None = None - self.next_start_time: float | None = query_start_time + self._end_tool_span(str(tool_use_id), tool_result_block=block) - def start_llm_span( - self, message: Any, prompt: Any, conversation_history: list[dict[str, Any]] - ) -> dict[str, Any] | None: - """Start a new LLM span, ending the previous one if it exists.""" - # Use the marked start time, or current time as fallback - start_time = self.next_start_time if self.next_start_time is not None else time.time() + def cleanup(self, end_time: float | None = None) -> None: + for tool_use_id in list(self._active_spans): + self._end_tool_span(tool_use_id, end_time=end_time) - # End the previous span at this start time to ensure sequential spans - if self.current_span: - self.current_span.end(end_time=start_time) + @property + def has_active_spans(self) -> bool: + return bool(self._active_spans) - final_content, span = _create_llm_span_for_messages( - [message], prompt, conversation_history, start_time=start_time - ) - self.current_span = span - self.next_start_time = None # Reset for next span - return final_content + def acquire_span_for_handler(self, tool_name: Any, args: Any) -> _ActiveToolSpan | None: + parsed_tool_name = _parse_tool_name(tool_name) + candidate_names = list(dict.fromkeys((parsed_tool_name.raw_name, parsed_tool_name.display_name, str(tool_name)))) + + candidates = [ + active_tool_span + for active_tool_span in self._active_spans.values() + if not active_tool_span.handler_active + and (active_tool_span.raw_name in candidate_names or active_tool_span.display_name in candidate_names) + ] + + matched_span = _match_tool_span_for_handler(candidates, args) + if matched_span is None: + return None + + matched_span.activate() + return matched_span + + def _end_tool_span(self, tool_use_id: str, tool_result_block: Any | None = None, end_time: float | None = None) -> None: + active_tool_span = self._active_spans.pop(tool_use_id, None) + if active_tool_span is None: + return + + if tool_result_block is None: + active_tool_span.span.end(end_time=end_time) + return + + output = _serialize_tool_result_output(tool_result_block) + log_event: dict[str, Any] = {"output": output} + if getattr(tool_result_block, "is_error", None) is True: + log_event["error"] = str(output["content"]) + active_tool_span.span.log(**log_event) + active_tool_span.span.end(end_time=end_time) + + def get_span_export(self, tool_use_id: Any) -> str | None: + if tool_use_id is None: + return None + + active_tool_span = self._active_spans.get(str(tool_use_id)) + if active_tool_span is None: + return None + + return active_tool_span.span.export() + + +def _match_tool_span_for_handler(candidates: list[_ActiveToolSpan], args: Any) -> _ActiveToolSpan | None: + if not candidates: + return None + + exact_input_matches = [candidate for candidate in candidates if candidate.input == args] + if exact_input_matches: + return exact_input_matches[0] + + if len(candidates) == 1: + return candidates[0] - def mark_next_llm_start(self) -> None: - """Mark when the next LLM call will start (after tool results).""" - self.next_start_time = time.time() + for active_tool_span in candidates: + if active_tool_span.input is None: + return active_tool_span - def log_usage(self, usage_metrics: dict[str, float]) -> None: - """Log usage metrics to the current LLM span.""" - if self.current_span and usage_metrics: - self.current_span.log(metrics=usage_metrics) + return candidates[0] - def cleanup(self) -> None: - """End any unclosed spans.""" - if self.current_span: - self.current_span.end() - self.current_span = None + +def _activate_tool_span_for_handler(tool_name: Any, args: Any) -> _ActiveToolSpan | _NoopActiveToolSpan: + tool_span_tracker = getattr(_thread_local, "tool_span_tracker", None) + if tool_span_tracker is None: + return _NOOP_ACTIVE_TOOL_SPAN + + return tool_span_tracker.acquire_span_for_handler(tool_name, args) or _NOOP_ACTIVE_TOOL_SPAN + + +class LLMSpanTracker: + """Manages LLM span lifecycle for Claude Agent SDK message streams. + + Message flow per turn: + 1. UserMessage (tool results) -> mark the time when next LLM will start + 2. AssistantMessage - LLM response arrives -> create span with the marked start time, ending previous span + 3. ResultMessage - usage metrics -> log to span + + We end the previous span when the next AssistantMessage arrives, using the marked + start time to ensure sequential spans (no overlapping LLM spans). + """ + + def __init__(self, query_start_time: float | None = None): + self.current_span: Any | None = None + self.current_span_export: str | None = None + self.next_start_time: float | None = query_start_time + + def get_next_start_time(self) -> float: + return self.next_start_time if self.next_start_time is not None else time.time() + + def start_llm_span( + self, + message: Any, + prompt: Any, + conversation_history: list[dict[str, Any]], + start_time: float | None = None, + ) -> dict[str, Any] | None: + """Start a new LLM span, ending the previous one if it exists.""" + resolved_start_time = start_time if start_time is not None else self.get_next_start_time() + first_token_time = time.time() + + if self.current_span: + self.current_span.end(end_time=resolved_start_time) + + final_content, span = _create_llm_span_for_messages( + [message], prompt, conversation_history, start_time=resolved_start_time + ) + if span is not None: + span.log(metrics={"time_to_first_token": max(0.0, first_token_time - resolved_start_time)}) + self.current_span = span + self.current_span_export = span.export() if span else None + self.next_start_time = None + return final_content + + def mark_next_llm_start(self) -> None: + """Mark when the next LLM call will start (after tool results).""" + self.next_start_time = time.time() + + def log_usage(self, usage_metrics: dict[str, float]) -> None: + """Log usage metrics to the current LLM span.""" + if self.current_span and usage_metrics: + self.current_span.log(metrics=usage_metrics) + + def cleanup(self) -> None: + """End any unclosed spans.""" + if self.current_span: + self.current_span.end() + self.current_span = None + self.current_span_export = None + + +class TaskEventSpanTracker: + def __init__(self, root_span_export: str, tool_tracker: ToolSpanTracker): + self._root_span_export = root_span_export + self._tool_tracker = tool_tracker + self._active_spans: dict[str, Any] = {} + + def process(self, message: Any) -> None: + task_id = getattr(message, "task_id", None) + if task_id is None: + return + + task_id = str(task_id) + message_type = type(message).__name__ + task_span = self._active_spans.get(task_id) + + if task_span is None: + task_span = start_span( + name=self._span_name(message, task_id), + span_attributes={"type": SpanTypeAttribute.TASK}, + metadata=self._metadata(message), + parent=self._parent_export(message), + ) + self._active_spans[task_id] = task_span + else: + update: dict[str, Any] = {} + metadata = self._metadata(message) + if metadata: + update["metadata"] = metadata + + output = self._output(message) + if output is not None: + update["output"] = output + + if update: + task_span.log(**update) + + if self._should_end(message_type): + task_span.end() + del self._active_spans[task_id] + + def cleanup(self) -> None: + for task_id, span in list(self._active_spans.items()): + span.end() + del self._active_spans[task_id] + + def _parent_export(self, message: Any) -> str: + return self._tool_tracker.get_span_export(getattr(message, "tool_use_id", None)) or self._root_span_export + + def _span_name(self, message: Any, task_id: str) -> str: + return ( + getattr(message, "description", None) + or getattr(message, "task_type", None) + or f"Task {task_id}" + ) + + def _metadata(self, message: Any) -> dict[str, Any]: + metadata = { + k: v + for k, v in { + "task_id": getattr(message, "task_id", None), + "session_id": getattr(message, "session_id", None), + "tool_use_id": getattr(message, "tool_use_id", None), + "task_type": getattr(message, "task_type", None), + "status": getattr(message, "status", None), + "last_tool_name": getattr(message, "last_tool_name", None), + "usage": getattr(message, "usage", None), + }.items() + if v is not None + } + return metadata + + def _output(self, message: Any) -> dict[str, Any] | None: + summary = getattr(message, "summary", None) + output_file = getattr(message, "output_file", None) + + if summary is None and output_file is None: + return None + + return { + k: v + for k, v in { + "summary": summary, + "output_file": output_file, + }.items() + if v is not None + } + + def _should_end(self, message_type: str) -> bool: + return message_type == MessageClassName.TASK_NOTIFICATION + + +def _create_client_wrapper_class(original_client_class: Any) -> Any: + """Creates a wrapper class for ClaudeSDKClient that wraps query and receive_response.""" class WrappedClaudeSDKClient(Wrapper): def __init__(self, *args: Any, **kwargs: Any): @@ -208,40 +584,49 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: initial_input = self.__last_prompt if self.__last_prompt else None with start_span( - name="Claude Agent", + name=CLAUDE_AGENT_TASK_SPAN_NAME, span_attributes={"type": SpanTypeAttribute.TASK}, input=initial_input, ) as span: # If we're capturing async messages, we'll update input after they're consumed input_needs_update = self.__captured_messages is not None - # Store the parent span export in thread-local storage for tool handlers - _thread_local.parent_span_export = span.export() final_results: list[dict[str, Any]] = [] + task_events: list[dict[str, Any]] = [] llm_tracker = LLMSpanTracker(query_start_time=self.__query_start_time) + tool_tracker = ToolSpanTracker() + task_event_span_tracker = TaskEventSpanTracker(span.export(), tool_tracker) + _thread_local.tool_span_tracker = tool_tracker try: async for message in generator: # Update input from captured async messages (once, after they're consumed) - if input_needs_update and self.__captured_messages: - captured_input = _format_captured_messages(self.__captured_messages) + if input_needs_update: + captured_input = self.__captured_messages if self.__captured_messages else [] if captured_input: span.log(input=captured_input) input_needs_update = False message_type = type(message).__name__ - if message_type == "AssistantMessage": - final_content = llm_tracker.start_llm_span(message, self.__last_prompt, final_results) + if message_type == MessageClassName.ASSISTANT: + if llm_tracker.current_span and tool_tracker.has_active_spans: + tool_tracker.cleanup(end_time=llm_tracker.get_next_start_time()) + final_content = llm_tracker.start_llm_span( + message, + self.__last_prompt, + final_results, + ) + tool_tracker.start_tool_spans(message, llm_tracker.current_span_export) if final_content: final_results.append(final_content) - elif message_type == "UserMessage": + elif message_type == MessageClassName.USER: + tool_tracker.finish_tool_spans(message) if hasattr(message, "content"): content = _serialize_content_blocks(message.content) final_results.append({"content": content, "role": "user"}) - llm_tracker.mark_next_llm_start() - elif message_type == "ResultMessage": + elif message_type == MessageClassName.RESULT: if hasattr(message, "usage"): usage_metrics = _extract_usage_from_result_message(message) llm_tracker.log_usage(usage_metrics) @@ -254,17 +639,25 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: }.items() if v is not None } - if result_metadata: - span.log(metadata=result_metadata) + span.log(metadata=result_metadata) + elif message_type in SYSTEM_MESSAGE_TYPES: + task_event_span_tracker.process(message) + task_events.append(_serialize_system_message(message)) yield message - span.log(output=final_results[-1] if final_results else None) - except Exception as e: - log.warning("Error in tracing code", exc_info=e) + except Exception: + raise + else: + if final_results: + span.log(output=final_results[-1]) finally: + if task_events: + span.log(metadata={"task_events": task_events}) + task_event_span_tracker.cleanup() + tool_tracker.cleanup() llm_tracker.cleanup() - if hasattr(_thread_local, "parent_span_export"): - delattr(_thread_local, "parent_span_export") + if hasattr(_thread_local, "tool_span_tracker"): + delattr(_thread_local, "tool_span_tracker") async def __aenter__(self) -> "WrappedClaudeSDKClient": await self.__client.__aenter__() @@ -296,7 +689,7 @@ def _create_llm_span_for_messages( return None, None last_message = messages[-1] - if type(last_message).__name__ != "AssistantMessage": + if type(last_message).__name__ != MessageClassName.ASSISTANT: return None, None model = getattr(last_message, "model", None) input_messages = _build_llm_input(prompt, conversation_history) @@ -308,7 +701,7 @@ def _create_llm_span_for_messages( outputs.append({"content": content, "role": "assistant"}) llm_span = start_span( - name="anthropic.messages.create", + name=ANTHROPIC_MESSAGES_CREATE_SPAN_NAME, span_attributes={"type": SpanTypeAttribute.LLM}, input=input_messages, output=outputs, @@ -337,18 +730,20 @@ def _serialize_content_blocks(content: Any) -> Any: serialized = dataclasses.asdict(block) block_type = type(block).__name__ - if block_type == "TextBlock": - serialized["type"] = "text" - elif block_type == "ToolUseBlock": - serialized["type"] = "tool_use" - elif block_type == "ToolResultBlock": - serialized["type"] = "tool_result" + serialized_type = SERIALIZED_CONTENT_TYPE_BY_BLOCK_CLASS.get(block_type) + if serialized_type is not None: + serialized["type"] = serialized_type + if block_type == BlockClassName.TOOL_RESULT: content_value = serialized.get("content") if isinstance(content_value, list) and len(content_value) == 1: item = content_value[0] - if isinstance(item, dict) and item.get("type") == "text" and "text" in item: - serialized["content"] = item["text"] + if ( + isinstance(item, dict) + and item.get("type") == SerializedContentType.TEXT + and SerializedContentType.TEXT in item + ): + serialized["content"] = item[SerializedContentType.TEXT] if "is_error" in serialized and serialized["is_error"] is None: del serialized["is_error"] @@ -391,12 +786,3 @@ def _build_llm_input(prompt: Any, conversation_history: list[dict[str, Any]]) -> return [{"content": prompt, "role": "user"}] + conversation_history return conversation_history if conversation_history else None - - -def _format_captured_messages(messages: list[dict[str, Any]]) -> list[dict[str, Any]]: - """Formats captured async generator messages into structured input. - - Returns the messages as-is to preserve structure for tracing. - Empty list returns empty list. - """ - return messages if messages else [] diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json new file mode 100644 index 00000000..f00186b0 --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json @@ -0,0 +1,696 @@ +{ + "cassette_name": "test_bundled_subagent_creates_task_span", + "events": [ + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "request": { + "hooks": null, + "subtype": "initialize" + }, + "request_id": "req_1_e25e9f3a", + "type": "control_request" + } + } + }, + { + "op": "read", + "payload": { + "response": { + "request_id": "req_1_e25e9f3a", + "response": { + "account": { + "apiKeySource": "ANTHROPIC_API_KEY", + "tokenSource": "none" + }, + "agents": [ + { + "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", + "name": "general-purpose" + }, + { + "description": "Use this agent to configure the user's Claude Code status line setting.", + "model": "sonnet", + "name": "statusline-setup" + }, + { + "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", + "model": "haiku", + "name": "Explore" + }, + { + "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", + "name": "Plan" + } + ], + "available_output_styles": [ + "default", + "Explanatory", + "Learning" + ], + "commands": [ + { + "argumentHint": "", + "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", + "name": "keybindings-help" + }, + { + "argumentHint": "[issue description]", + "description": "Enable debug logging for this session and help diagnose issues (bundled)", + "name": "debug" + }, + { + "argumentHint": "", + "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", + "name": "simplify" + }, + { + "argumentHint": "", + "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", + "name": "batch" + }, + { + "argumentHint": "[interval] ", + "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", + "name": "loop" + }, + { + "argumentHint": "", + "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", + "name": "claude-api" + }, + { + "argumentHint": "", + "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", + "name": "compact" + }, + { + "argumentHint": "", + "description": "Show current context usage", + "name": "context" + }, + { + "argumentHint": "", + "description": "Show the total cost and duration of the current session", + "name": "cost" + }, + { + "argumentHint": "", + "description": "Dump the JS heap to ~/Desktop", + "name": "heapdump" + }, + { + "argumentHint": "", + "description": "Initialize a new CLAUDE.md file with codebase documentation", + "name": "init" + }, + { + "argumentHint": "", + "description": "Get comments from a GitHub pull request", + "name": "pr-comments" + }, + { + "argumentHint": "", + "description": "View release notes", + "name": "release-notes" + }, + { + "argumentHint": "", + "description": "Review a pull request", + "name": "review" + }, + { + "argumentHint": "", + "description": "Complete a security review of the pending changes on the current branch", + "name": "security-review" + }, + { + "argumentHint": "", + "description": "Generate a report analyzing your Claude Code sessions", + "name": "insights" + } + ], + "models": [ + { + "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", + "displayName": "Default (recommended)", + "supportedEffortLevels": [ + "low", + "medium", + "high", + "max" + ], + "supportsAdaptiveThinking": true, + "supportsEffort": true, + "value": "default" + }, + { + "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", + "displayName": "Sonnet (1M context)", + "supportedEffortLevels": [ + "low", + "medium", + "high", + "max" + ], + "supportsAdaptiveThinking": true, + "supportsEffort": true, + "value": "sonnet[1m]" + }, + { + "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", + "displayName": "Opus", + "supportedEffortLevels": [ + "low", + "medium", + "high", + "max" + ], + "supportsAdaptiveThinking": true, + "supportsEffort": true, + "supportsFastMode": true, + "value": "opus" + }, + { + "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", + "displayName": "Opus (1M context)", + "supportedEffortLevels": [ + "low", + "medium", + "high", + "max" + ], + "supportsAdaptiveThinking": true, + "supportsEffort": true, + "supportsFastMode": true, + "value": "opus[1m]" + }, + { + "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", + "displayName": "Haiku", + "value": "haiku" + }, + { + "description": "claude-haiku-4-5-20251001", + "displayName": "Haiku 4.5", + "value": "claude-haiku-4-5-20251001" + } + ], + "output_style": "default", + "pid": 87088 + }, + "subtype": "success" + }, + "type": "control_response" + } + }, + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "message": { + "content": "You must delegate this task to the bundled general-purpose agent. Have that agent inspect the current repository and reply with only the repository name. Do not answer directly without using the subagent.", + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "default", + "type": "user" + } + } + }, + { + "op": "read", + "payload": { + "agents": [ + "general-purpose", + "statusline-setup", + "Explore", + "Plan" + ], + "apiKeySource": "ANTHROPIC_API_KEY", + "claude_code_version": "2.1.71", + "cwd": "", + "fast_mode_state": "off", + "mcp_servers": [], + "model": "claude-haiku-4-5-20251001", + "output_style": "default", + "permissionMode": "bypassPermissions", + "plugins": [], + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "skills": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api" + ], + "slash_commands": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api", + "compact", + "context", + "cost", + "heapdump", + "init", + "pr-comments", + "release-notes", + "review", + "security-review", + "insights" + ], + "subtype": "init", + "tools": [ + "Task", + "TaskOutput", + "Bash", + "Glob", + "Grep", + "ExitPlanMode", + "Read", + "Edit", + "Write", + "NotebookEdit", + "WebFetch", + "TodoWrite", + "WebSearch", + "TaskStop", + "AskUserQuestion", + "Skill", + "EnterPlanMode", + "EnterWorktree", + "CronCreate", + "CronDelete", + "CronList", + "ToolSearch" + ], + "type": "system", + "uuid": "31a1f676-a444-48d2-9850-0f9a04f201ff" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "The user wants me to delegate a task to the general-purpose agent to inspect the current repository and reply with only the repository name. I should use the Agent tool with the \"general-purpose\" subagent type.\n\nThe task is to inspect the current repository and reply with the repository name. I'll ask the agent to do this.", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_01EKkgN3VHXAzhbB9rcWPDnY", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 4210 + }, + "cache_creation_input_tokens": 4210, + "cache_read_input_tokens": 13723, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 1, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "assistant", + "uuid": "ea022d57-eaf5-4cec-af5c-8a1532c050e2" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "input": { + "description": "Inspect repository and return name", + "prompt": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", + "subagent_type": "general-purpose" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01EKkgN3VHXAzhbB9rcWPDnY", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 4210 + }, + "cache_creation_input_tokens": 4210, + "cache_read_input_tokens": 13723, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 1, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "assistant", + "uuid": "e7806e52-0636-4302-aaa1-39937f78fd28" + } + }, + { + "op": "read", + "payload": { + "description": "Inspect repository and return name", + "prompt": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "subtype": "task_started", + "task_id": "adc4e28af324cf3b8", + "task_type": "local_agent", + "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "type": "system", + "uuid": "28578d9d-1a26-4102-a023-8112d75cf795" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "user", + "uuid": "547509d4-14ac-4b66-a879-ca5a515aeede" + } + }, + { + "op": "read", + "payload": { + "description": "Running Get the remote URL to determine repository name", + "last_tool_name": "Bash", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "subtype": "task_progress", + "task_id": "adc4e28af324cf3b8", + "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "type": "system", + "usage": { + "duration_ms": 1606, + "tool_uses": 1, + "total_tokens": 13352 + }, + "uuid": "0e4dd812-c727-45ca-9116-df155c0af17a" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_015sKk5zQ5seoMWtsJdWWeKn", + "input": { + "command": "git config --get remote.origin.url", + "description": "Get the remote URL to determine repository name" + }, + "name": "Bash", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01BULgW4AiYyLLva26eT6xbg", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 13322 + }, + "cache_creation_input_tokens": 13322, + "cache_read_input_tokens": 0, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 27, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "assistant", + "uuid": "1d8ecc87-127f-411a-833d-b7b887974003" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": "git@github.com:braintrustdata/braintrust-sdk-python.git", + "is_error": false, + "tool_use_id": "toolu_015sKk5zQ5seoMWtsJdWWeKn", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "user", + "uuid": "d9eb9d0a-f360-4b4d-9589-9791208fba3a" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "status": "completed", + "subtype": "task_notification", + "summary": "Inspect repository and return name", + "task_id": "adc4e28af324cf3b8", + "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "type": "system", + "usage": { + "duration_ms": 2656, + "tool_uses": 1, + "total_tokens": 13471 + }, + "uuid": "b7227885-b5d2-4cab-b5c0-62abefc765ff" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "braintrust-sdk-python", + "type": "text" + }, + { + "text": "agentId: adc4e28af324cf3b8 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13453\ntool_uses: 1\nduration_ms: 2657", + "type": "text" + } + ], + "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "tool_use_result": { + "agentId": "adc4e28af324cf3b8", + "content": [ + { + "text": "braintrust-sdk-python", + "type": "text" + } + ], + "prompt": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", + "status": "completed", + "totalDurationMs": 2657, + "totalTokens": 13453, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 115 + }, + "cache_creation_input_tokens": 115, + "cache_read_input_tokens": 13322, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 10, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "60e92138-c01c-4eda-b0e7-869f641bbc9b" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "The agent has returned \"braintrust-sdk-python\" as the repository name. The user asked me to delegate the task and have the agent reply with only the repository name. I should now provide that result to the user.", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_015Hg13AzAA4XGSKmkzhT2UH", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 306 + }, + "cache_creation_input_tokens": 306, + "cache_read_input_tokens": 17933, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 5, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "assistant", + "uuid": "ec279b2d-f5be-4f3f-8a38-8ae546eb478f" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "braintrust-sdk-python", + "type": "text" + } + ], + "context_management": null, + "id": "msg_015Hg13AzAA4XGSKmkzhT2UH", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 306 + }, + "cache_creation_input_tokens": 306, + "cache_read_input_tokens": 17933, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 5, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "type": "assistant", + "uuid": "83984605-5460-415f-9cf9-39a0c626a801" + } + }, + { + "op": "read", + "payload": { + "duration_api_ms": 6581, + "duration_ms": 6955, + "fast_mode_state": "off", + "is_error": false, + "modelUsage": { + "claude-haiku-4-5-20251001": { + "cacheCreationInputTokens": 17953, + "cacheReadInputTokens": 44978, + "contextWindow": 200000, + "costUSD": 0.028971049999999998, + "inputTokens": 27, + "maxOutputTokens": 32000, + "outputTokens": 401, + "webSearchRequests": 0 + } + }, + "num_turns": 2, + "permission_denials": [], + "result": "braintrust-sdk-python", + "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", + "stop_reason": "end_turn", + "subtype": "success", + "total_cost_usd": 0.028971049999999998, + "type": "result", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 4516 + }, + "cache_creation_input_tokens": 4516, + "cache_read_input_tokens": 31656, + "inference_geo": "", + "input_tokens": 18, + "iterations": [], + "output_tokens": 285, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + }, + "uuid": "616f9447-8d7f-4fc8-821b-35f66e301c2e" + } + } + ], + "sdk_version": "0.1.48" +} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index b86cad42..9b9ef037 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -1,15 +1,12 @@ -""" -Integration tests for the Claude Agent SDK wrapper. - -These tests verify the wrapper creates the correct span hierarchy when used with -the actual Claude Agent SDK. -""" +"""Tests for the Claude Agent SDK wrapper.""" import asyncio +import dataclasses import gc import sys import types -from typing import Type +from pathlib import Path +from typing import Any, Type import pytest @@ -23,18 +20,28 @@ print("Claude Agent SDK not installed, skipping integration tests") from braintrust import logger +from braintrust.logger import start_span from braintrust.span_types import SpanTypeAttribute from braintrust.test_helpers import init_test_logger from braintrust.wrappers.claude_agent_sdk import setup_claude_agent_sdk from braintrust.wrappers.claude_agent_sdk._test_transport import make_cassette_transport from braintrust.wrappers.claude_agent_sdk._wrapper import ( + ToolSpanTracker, + _build_llm_input, _create_client_wrapper_class, _create_tool_wrapper_class, + _extract_usage_from_result_message, + _parse_tool_name, + _serialize_content_blocks, + _serialize_system_message, + _serialize_tool_result_output, + _thread_local, ) from braintrust.wrappers.test_utils import verify_autoinstrument_script PROJECT_NAME = "test-claude-agent-sdk" TEST_MODEL = "claude-haiku-4-5-20251001" +REPO_ROOT = Path(__file__).resolve().parents[5] @pytest.fixture @@ -48,21 +55,11 @@ def memory_logger(): @pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") @pytest.mark.asyncio async def test_calculator_with_multiple_operations(memory_logger): - """Test claude_agent.py example - calculator with multiple operations. - - This integration test verifies: - - Task span is created for the overall agent interaction - - LLM spans are created for each message group - - Tool spans are created for calculator calls - - Span hierarchy is correct (children reference parent) - - Metrics are properly extracted and logged - """ + """Test claude_agent.py example - calculator with multiple operations.""" assert not memory_logger.pop() - # Patch claude_agent_sdk for tracing (logger already initialized by fixture) original_client = claude_agent_sdk.ClaudeSDKClient original_tool_class = claude_agent_sdk.SdkMcpTool - claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client) claude_agent_sdk.SdkMcpTool = _create_tool_wrapper_class(original_tool_class) @@ -161,6 +158,8 @@ async def calculator_handler(args): llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM] assert len(llm_spans) >= 1, f"Should have at least one LLM span, got {len(llm_spans)}" + llm_span_ids = {span["span_id"] for span in llm_spans} + _assert_llm_spans_have_time_to_first_token(llm_spans) llm_spans_with_metrics = [s for s in llm_spans if "prompt_tokens" in s.get("metrics", {})] assert len(llm_spans_with_metrics) >= 1, "At least one LLM span should have token metrics" @@ -169,22 +168,24 @@ async def calculator_handler(args): assert llm_span["span_attributes"]["name"] == "anthropic.messages.create" assert isinstance(llm_span["output"], list) assert len(llm_span["output"]) > 0 - - last_llm_span = llm_spans[-1] - assert last_llm_span["metrics"]["prompt_tokens"] > 0 - assert last_llm_span["metrics"]["completion_tokens"] > 0 - + for metric_name in ("prompt_tokens", "completion_tokens", "tokens"): + if metric_name in llm_span.get("metrics", {}): + assert llm_span["metrics"][metric_name] > 0 tool_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TOOL] for tool_span in tool_spans: assert tool_span["span_attributes"]["name"] == "calculator" assert tool_span["input"] is not None assert tool_span["output"] is not None + assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"]) root_span_id = task_span["span_id"] - for span in spans: - if span["span_id"] != root_span_id: - assert span["root_span_id"] == root_span_id - assert root_span_id in span["span_parents"] + for llm_span in llm_spans: + assert llm_span["root_span_id"] == root_span_id + assert root_span_id in llm_span["span_parents"] + + for tool_span in tool_spans: + assert tool_span["root_span_id"] == root_span_id + assert any(parent_id in llm_span_ids for parent_id in tool_span["span_parents"]) def _make_message(content: str) -> dict: @@ -199,6 +200,23 @@ def _assert_structured_input(task_span: dict, expected_contents: list[str]) -> N assert [x["message"]["content"] for x in inp] == expected_contents +def _assert_llm_spans_have_time_to_first_token(llm_spans: list[dict[str, Any]]) -> None: + assert llm_spans, "Expected at least one LLM span" + for llm_span in llm_spans: + assert "time_to_first_token" in llm_span.get("metrics", {}) + assert llm_span["metrics"]["time_to_first_token"] >= 0 + + +def _sdk_version_at_least(version: str) -> bool: + if not CLAUDE_SDK_AVAILABLE: + return False + + def parse(value: str) -> tuple[int, ...]: + return tuple(int(part) for part in value.split(".") if part.isdigit()) + + return parse(getattr(claude_agent_sdk, "__version__", "0")) >= parse(version) + + class CustomAsyncIterable: """Custom AsyncIterable class (not a generator) for testing.""" @@ -272,7 +290,6 @@ async def test_query_async_iterable(memory_logger, cassette_name, input_factory, async for message in client.receive_response(): if type(message).__name__ == "ResultMessage": break - finally: claude_agent_sdk.ClaudeSDKClient = original_client @@ -287,6 +304,72 @@ async def test_query_async_iterable(memory_logger, cassette_name, input_factory, ) _assert_structured_input(task_span, expected_contents) + llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM] + _assert_llm_spans_have_time_to_first_token(llm_spans) + + +@pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") +@pytest.mark.asyncio +async def test_bundled_subagent_creates_task_span(memory_logger): + assert not memory_logger.pop() + if not _sdk_version_at_least("0.1.48"): + pytest.skip("Bundled subagent task events were not observed on older Claude Agent SDK versions") + + original_client = claude_agent_sdk.ClaudeSDKClient + claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client) + + try: + options = claude_agent_sdk.ClaudeAgentOptions( + model=TEST_MODEL, + cwd=REPO_ROOT, + permission_mode="bypassPermissions", + max_turns=8, + ) + transport = make_cassette_transport( + cassette_name="test_bundled_subagent_creates_task_span", + prompt="", + options=options, + ) + + async with claude_agent_sdk.ClaudeSDKClient(options=options, transport=transport) as client: + await client.query( + "You must delegate this task to the bundled general-purpose agent. " + "Have that agent inspect the current repository and reply with only the repository name. " + "Do not answer directly without using the subagent." + ) + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + finally: + claude_agent_sdk.ClaudeSDKClient = original_client + + spans = memory_logger.pop() + + task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK] + assert len(task_spans) >= 2, f"Expected root task span and subagent span, got {len(task_spans)}" + + root_task_span = _find_span_by_name(task_spans, "Claude Agent") + subagent_spans = [s for s in task_spans if s["span_attributes"]["name"] != "Claude Agent"] + tool_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TOOL] + assert subagent_spans, "Expected at least one subagent task span" + assert any(s.get("metadata", {}).get("task_id") for s in subagent_spans) + for subagent_span in subagent_spans: + assert subagent_span["root_span_id"] == root_task_span["span_id"] + parents = set(subagent_span["span_parents"]) + tool_use_id = subagent_span.get("metadata", {}).get("tool_use_id") + matching_tool_span = next( + (s for s in tool_spans if s.get("metadata", {}).get("gen_ai.tool.call.id") == tool_use_id), + None, + ) + if matching_tool_span is not None: + assert matching_tool_span["span_id"] in parents + else: + assert root_task_span["span_id"] in parents + + assert root_task_span.get("metadata", {}).get("task_events"), "Expected task events on root task span" + + llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM] + _assert_llm_spans_have_time_to_first_token(llm_spans) async def _single_message_generator(): """Generator yielding a single message.""" @@ -299,6 +382,596 @@ async def _multi_message_generator(): yield _make_message("Part 2") +@dataclasses.dataclass +class TextBlock: + text: str + + +@dataclasses.dataclass +class ToolUseBlock: + id: str + name: str + input: dict[str, Any] + + +@dataclasses.dataclass +class ToolResultBlock: + tool_use_id: str + content: Any + is_error: bool | None = None + + +@dataclasses.dataclass +class AssistantMessage: + content: list[Any] + model: str = TEST_MODEL + + +@dataclasses.dataclass +class UserMessage: + content: list[Any] + + +@dataclasses.dataclass +class TaskStartedMessage: + subtype: str + data: dict[str, Any] + task_id: str + description: str + uuid: str + session_id: str + tool_use_id: str | None = None + task_type: str | None = None + + +@dataclasses.dataclass +class TaskProgressMessage: + subtype: str + data: dict[str, Any] + task_id: str + description: str + usage: dict[str, Any] + uuid: str + session_id: str + tool_use_id: str | None = None + last_tool_name: str | None = None + + +@dataclasses.dataclass +class TaskNotificationMessage: + subtype: str + data: dict[str, Any] + task_id: str + status: str + output_file: str + summary: str + uuid: str + session_id: str + tool_use_id: str | None = None + usage: dict[str, Any] | None = None + + +class ResultMessage: + def __init__( + self, + *, + input_tokens: int = 1, + output_tokens: int = 1, + cache_creation_input_tokens: int = 0, + num_turns: int = 1, + session_id: str = "session-123", + ): + self.usage = types.SimpleNamespace( + input_tokens=input_tokens, + output_tokens=output_tokens, + cache_creation_input_tokens=cache_creation_input_tokens, + ) + self.num_turns = num_turns + self.session_id = session_id + + +def _make_fake_sdk_mcp_tool_class(): + class FakeSdkMcpTool: + def __init__(self, name, description, input_schema, handler, **kwargs): + del kwargs + self.name = name + self.description = description + self.input_schema = input_schema + self.handler = handler + + return FakeSdkMcpTool + + +def _find_spans_by_type(spans: list[dict[str, Any]], span_type: str) -> list[dict[str, Any]]: + return [span for span in spans if span.get("span_attributes", {}).get("type") == span_type] + + +def _find_span_by_name(spans: list[dict[str, Any]], name: str) -> dict[str, Any]: + for span in spans: + if span["span_attributes"]["name"] == name: + return span + + available_names = [span["span_attributes"]["name"] for span in spans] + raise AssertionError(f"Expected span named {name!r}. Available spans: {available_names}") + + +def _clear_tool_span_tracker() -> None: + if hasattr(_thread_local, "tool_span_tracker"): + delattr(_thread_local, "tool_span_tracker") + + +@pytest.mark.parametrize( + "tool_name,expected", + [ + pytest.param( + "calculator", + { + "raw_name": "calculator", + "display_name": "calculator", + "is_mcp": False, + "mcp_server": None, + }, + id="plain", + ), + pytest.param( + "mcp__filesystem__team__read_file", + { + "raw_name": "mcp__filesystem__team__read_file", + "display_name": "read_file", + "is_mcp": True, + "mcp_server": "filesystem__team", + }, + id="mcp_with_embedded_delimiters", + ), + ], +) +def test_parse_tool_name(tool_name, expected): + parsed = _parse_tool_name(tool_name) + + assert parsed.raw_name == expected["raw_name"] + assert parsed.display_name == expected["display_name"] + assert parsed.is_mcp == expected["is_mcp"] + assert parsed.mcp_server == expected["mcp_server"] + + +def test_tool_span_tracker_lifecycle(memory_logger): + assert not memory_logger.pop() + + tracker = ToolSpanTracker() + + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK) as task_span: + llm_span = start_span( + name="anthropic.messages.create", + type=SpanTypeAttribute.LLM, + parent=task_span.export(), + ) + tracker.start_tool_spans( + AssistantMessage( + content=[ + TextBlock("Let me calculate that."), + ToolUseBlock(id="call-4", name="calculator", input={"operation": "multiply", "a": 6, "b": 7}), + ] + ), + llm_span.export(), + ) + tracker.finish_tool_spans( + UserMessage(content=[ToolResultBlock(tool_use_id="call-4", content=[TextBlock("42")])]) + ) + llm_span.end() + + spans = memory_logger.pop() + llm_span_log = _find_span_by_name(spans, "anthropic.messages.create") + tool_span = _find_span_by_name(spans, "calculator") + + assert tool_span["input"] == {"operation": "multiply", "a": 6, "b": 7} + assert tool_span["output"] == {"content": "42"} + assert tool_span["metadata"]["gen_ai.tool.name"] == "calculator" + assert tool_span["metadata"]["gen_ai.tool.call.id"] == "call-4" + assert llm_span_log["span_id"] in tool_span["span_parents"] + + +def test_tool_span_tracker_logs_errors(memory_logger): + assert not memory_logger.pop() + + tracker = ToolSpanTracker() + + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK) as task_span: + llm_span = start_span( + name="anthropic.messages.create", + type=SpanTypeAttribute.LLM, + parent=task_span.export(), + ) + tracker.start_tool_spans( + AssistantMessage(content=[ToolUseBlock(id="call-err", name="calculator", input={"a": 1, "b": 0})]), + llm_span.export(), + ) + tracker.finish_tool_spans( + UserMessage( + content=[ToolResultBlock(tool_use_id="call-err", content=[TextBlock("Division by zero")], is_error=True)] + ) + ) + llm_span.end() + + spans = memory_logger.pop() + tool_span = _find_span_by_name(spans, "calculator") + + assert tool_span["output"] == {"content": "Division by zero", "is_error": True} + assert tool_span["error"] == "Division by zero" + + +def test_tool_span_tracker_cleanup_closes_unmatched_spans(memory_logger): + assert not memory_logger.pop() + + tracker = ToolSpanTracker() + + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK) as task_span: + llm_span = start_span( + name="anthropic.messages.create", + type=SpanTypeAttribute.LLM, + parent=task_span.export(), + ) + tracker.start_tool_spans( + AssistantMessage(content=[ToolUseBlock(id="call-dangling", name="weather", input={"city": "Toronto"})]), + llm_span.export(), + ) + tracker.cleanup() + llm_span.end() + + spans = memory_logger.pop() + tool_span = _find_span_by_name(spans, "weather") + + assert tool_span["input"] == {"city": "Toronto"} + assert tool_span.get("output") is None + + +def test_serialize_content_blocks_keeps_malformed_text_block_payload(): + malformed_tool_result = ToolResultBlock( + tool_use_id="call-malformed", + content=[{"type": "text"}], + ) + + serialized = _serialize_content_blocks([malformed_tool_result]) + + assert serialized == [ + { + "tool_use_id": "call-malformed", + "content": [{"type": "text"}], + "type": "tool_result", + } + ] + + +@pytest.mark.asyncio +async def test_wrapped_tool_handler_creates_fallback_tool_span_without_active_stream(memory_logger): + assert not memory_logger.pop() + + wrapped_tool_class = _create_tool_wrapper_class(_make_fake_sdk_mcp_tool_class()) + + async def calculator_handler(args): + return {"content": [{"type": "text", "text": f"{args['a'] * args['b']}"}]} + + calculator_tool = wrapped_tool_class( + name="calculator", + description="Multiply two numbers", + input_schema={"type": "object"}, + handler=calculator_handler, + ) + + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK): + result = await calculator_tool.handler({"operation": "multiply", "a": 6, "b": 7}) + + assert result == {"content": [{"type": "text", "text": "42"}]} + + spans = memory_logger.pop() + tool_span = _find_span_by_name(_find_spans_by_type(spans, SpanTypeAttribute.TOOL), "calculator") + + assert tool_span["input"] == {"operation": "multiply", "a": 6, "b": 7} + assert tool_span["output"] == {"content": [{"type": "text", "text": "42"}]} + + +def test_serialize_tool_result_output_flattens_text_blocks_and_errors(): + tool_result = ToolResultBlock( + tool_use_id="call-err", + content=[TextBlock("Division by zero")], + is_error=True, + ) + + output = _serialize_tool_result_output(tool_result) + + assert output == {"content": "Division by zero", "is_error": True} + + +@pytest.mark.parametrize( + "message,expected", + [ + pytest.param( + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-1"}, + task_id="task-1", + description="Inspect the repository", + uuid="msg-start", + session_id="session-123", + task_type="general-purpose", + ), + { + "subtype": "task_started", + "task_id": "task-1", + "description": "Inspect the repository", + "uuid": "msg-start", + "session_id": "session-123", + "task_type": "general-purpose", + }, + id="task_started", + ), + pytest.param( + TaskProgressMessage( + subtype="task_progress", + data={"subtype": "task_progress", "task_id": "task-1"}, + task_id="task-1", + description="Running Bash", + usage={"total_tokens": 11, "tool_uses": 1, "duration_ms": 250}, + uuid="msg-progress", + session_id="session-123", + tool_use_id="call-bash", + last_tool_name="Bash", + ), + { + "subtype": "task_progress", + "task_id": "task-1", + "description": "Running Bash", + "uuid": "msg-progress", + "session_id": "session-123", + "tool_use_id": "call-bash", + "last_tool_name": "Bash", + "usage": {"total_tokens": 11, "tool_uses": 1, "duration_ms": 250}, + }, + id="task_progress", + ), + pytest.param( + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-1"}, + task_id="task-1", + status="completed", + output_file="/tmp/report.txt", + summary="Repository inspection completed", + uuid="msg-notify", + session_id="session-123", + tool_use_id="call-bash", + usage={"total_tokens": 15, "tool_uses": 1, "duration_ms": 400}, + ), + { + "subtype": "task_notification", + "task_id": "task-1", + "uuid": "msg-notify", + "session_id": "session-123", + "tool_use_id": "call-bash", + "status": "completed", + "output_file": "/tmp/report.txt", + "summary": "Repository inspection completed", + "usage": {"total_tokens": 15, "tool_uses": 1, "duration_ms": 400}, + }, + id="task_notification", + ), + ], +) +def test_serialize_system_message_extracts_known_fields(message, expected): + assert _serialize_system_message(message) == expected + + +def test_extract_usage_from_result_message_normalizes_anthropic_tokens(): + metrics = _extract_usage_from_result_message(ResultMessage(input_tokens=5, output_tokens=3, cache_creation_input_tokens=2)) + + assert metrics == { + "prompt_tokens": 7.0, + "completion_tokens": 3.0, + "prompt_cache_creation_tokens": 2.0, + "tokens": 10.0, + } + + +@pytest.mark.parametrize( + "prompt,conversation_history,expected", + [ + pytest.param( + "What is 2 + 2?", + [], + [{"content": "What is 2 + 2?", "role": "user"}], + id="prompt_only", + ), + pytest.param( + "What is 2 + 2?", + [ + {"role": "assistant", "content": "Let me calculate that."}, + {"role": "user", "content": "Please continue."}, + ], + [ + {"content": "What is 2 + 2?", "role": "user"}, + {"role": "assistant", "content": "Let me calculate that."}, + {"role": "user", "content": "Please continue."}, + ], + id="prompt_with_history", + ), + pytest.param( + None, + [ + {"role": "assistant", "content": "Let me calculate that."}, + {"role": "user", "content": "Please continue."}, + ], + [ + {"role": "assistant", "content": "Let me calculate that."}, + {"role": "user", "content": "Please continue."}, + ], + id="history_only", + ), + ], +) +def test_build_llm_input(prompt, conversation_history, expected): + assert _build_llm_input(prompt, conversation_history) == expected + + +def test_tool_span_tracker_records_mcp_metadata(memory_logger): + assert not memory_logger.pop() + + tracker = ToolSpanTracker() + + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK) as task_span: + llm_span = start_span( + name="anthropic.messages.create", + type=SpanTypeAttribute.LLM, + parent=task_span.export(), + ) + tracker.start_tool_spans( + AssistantMessage( + content=[ + ToolUseBlock( + id="call-mcp", + name="mcp__filesystem__team__read_file", + input={"path": "/tmp/test.txt"}, + ) + ] + ), + llm_span.export(), + ) + tracker.finish_tool_spans( + UserMessage(content=[ToolResultBlock(tool_use_id="call-mcp", content=[TextBlock("file contents")])]) + ) + llm_span.end() + + spans = memory_logger.pop() + tool_span = _find_span_by_name(spans, "read_file") + + assert tool_span["input"] == {"path": "/tmp/test.txt"} + assert tool_span["output"] == {"content": "file contents"} + assert tool_span["metadata"]["gen_ai.tool.name"] == "read_file" + assert tool_span["metadata"]["gen_ai.tool.call.id"] == "call-mcp" + assert tool_span["metadata"]["gen_ai.operation.name"] == "execute_tool" + assert tool_span["metadata"]["mcp.method.name"] == "tools/call" + assert tool_span["metadata"]["mcp.server"] == "filesystem__team" + assert tool_span["metadata"]["raw_tool_name"] == "mcp__filesystem__team__read_file" + + +@pytest.mark.asyncio +async def test_wrapped_tool_handler_keeps_nested_traces_under_stream_tool_span(memory_logger): + assert not memory_logger.pop() + + wrapped_tool_class = _create_tool_wrapper_class(_make_fake_sdk_mcp_tool_class()) + + async def calculator_handler(args): + nested_span = start_span(name="nested_tool_work") + nested_span.log(input=args) + nested_span.end() + return {"content": [{"type": "text", "text": "42"}]} + + calculator_tool = wrapped_tool_class( + name="calculator", + description="Multiply two numbers", + input_schema={"type": "object"}, + handler=calculator_handler, + ) + + tracker = ToolSpanTracker() + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK) as task_span: + llm_span = start_span( + name="anthropic.messages.create", + type=SpanTypeAttribute.LLM, + parent=task_span.export(), + ) + tracker.start_tool_spans( + AssistantMessage( + content=[ + ToolUseBlock(id="call-4", name="calculator", input={"operation": "multiply", "a": 6, "b": 7}), + ] + ), + llm_span.export(), + ) + _thread_local.tool_span_tracker = tracker + try: + result = await calculator_tool.handler({"operation": "multiply", "a": 6, "b": 7}) + tracker.finish_tool_spans( + UserMessage(content=[ToolResultBlock(tool_use_id="call-4", content=[TextBlock("42")])]) + ) + finally: + _clear_tool_span_tracker() + tracker.cleanup() + llm_span.end() + + assert result == {"content": [{"type": "text", "text": "42"}]} + + spans = memory_logger.pop() + tool_span = _find_span_by_name(_find_spans_by_type(spans, SpanTypeAttribute.TOOL), "calculator") + nested_span = _find_span_by_name(spans, "nested_tool_work") + + assert tool_span["span_id"] in nested_span["span_parents"] + + +@pytest.mark.asyncio +async def test_wrapped_tool_handler_matches_same_name_tool_spans_by_input(memory_logger): + assert not memory_logger.pop() + + wrapped_tool_class = _create_tool_wrapper_class(_make_fake_sdk_mcp_tool_class()) + + async def calculator_handler(args): + nested_span = start_span(name=f"nested_tool_work_{args['a']}") + nested_span.log(input=args) + nested_span.end() + return {"content": [{"type": "text", "text": str(args['a'] + args['b'])}]} + + calculator_tool = wrapped_tool_class( + name="calculator", + description="Add two numbers", + input_schema={"type": "object"}, + handler=calculator_handler, + ) + + tracker = ToolSpanTracker() + with start_span(name="Claude Agent", type=SpanTypeAttribute.TASK) as task_span: + llm_span = start_span( + name="anthropic.messages.create", + type=SpanTypeAttribute.LLM, + parent=task_span.export(), + ) + tracker.start_tool_spans( + AssistantMessage( + content=[ + ToolUseBlock(id="call-1", name="calculator", input={"operation": "add", "a": 2, "b": 3}), + ToolUseBlock(id="call-2", name="calculator", input={"operation": "add", "a": 10, "b": 5}), + ] + ), + llm_span.export(), + ) + _thread_local.tool_span_tracker = tracker + try: + await calculator_tool.handler({"operation": "add", "a": 10, "b": 5}) + await calculator_tool.handler({"operation": "add", "a": 2, "b": 3}) + tracker.finish_tool_spans( + UserMessage( + content=[ + ToolResultBlock(tool_use_id="call-1", content=[TextBlock("5")]), + ToolResultBlock(tool_use_id="call-2", content=[TextBlock("15")]), + ] + ) + ) + finally: + _clear_tool_span_tracker() + tracker.cleanup() + llm_span.end() + + spans = memory_logger.pop() + calculator_spans = [ + span + for span in _find_spans_by_type(spans, SpanTypeAttribute.TOOL) + if span["span_attributes"]["name"] == "calculator" + ] + tool_span_by_input = {tuple(sorted(span["input"].items())): span for span in calculator_spans} + nested_span_first = _find_span_by_name(spans, "nested_tool_work_2") + nested_span_second = _find_span_by_name(spans, "nested_tool_work_10") + + assert tool_span_by_input[(("a", 2), ("b", 3), ("operation", "add"))]["span_id"] in nested_span_first["span_parents"] + assert tool_span_by_input[(("a", 10), ("b", 5), ("operation", "add"))]["span_id"] in nested_span_second["span_parents"] + + class TestAutoInstrumentClaudeAgentSDK: """Tests for auto_instrument() with Claude Agent SDK.""" @@ -309,27 +982,15 @@ def test_auto_instrument_claude_agent_sdk(self): class _FakeClaudeAgentOptions: - def __init__(self, model, permission_mode=None): + def __init__(self, model, permission_mode=None, response_events=None): self.model = model self.permission_mode = permission_mode - - -class _FakeMessage: - def __init__(self, content): - self.content = content - - -class _FakeResultMessage: - def __init__(self): - self.usage = types.SimpleNamespace(input_tokens=1, output_tokens=1, cache_creation_input_tokens=0) - self.num_turns = 1 - self.session_id = "session-123" + self.response_events = response_events class _FakeClaudeSDKClient: def __init__(self, options): self.options = options - self._prompt = None async def __aenter__(self): return self @@ -338,19 +999,29 @@ async def __aexit__(self, *args): return None async def query(self, prompt): - self._prompt = prompt + del prompt async def receive_response(self): - yield _FakeMessage("Hello") - await asyncio.sleep(0) - yield _FakeResultMessage() + response_events = self.options.response_events or [ + AssistantMessage(content=[TextBlock("Hello")]), + ResultMessage(), + ] + + for event in response_events: + if callable(event): + maybe_awaitable = event() + if hasattr(maybe_awaitable, "__await__"): + await maybe_awaitable + continue + yield event + await asyncio.sleep(0) class _FakeClaudeSdkModule(types.ModuleType): ClaudeSDKClient: Type[_FakeClaudeSDKClient] ClaudeAgentOptions: Type[_FakeClaudeAgentOptions] - SdkMcpTool = None - tool = None + SdkMcpTool: Any + tool: Any class _FakeConsumerModule(types.ModuleType): @@ -362,6 +1033,15 @@ def _install_fake_claude_sdk(monkeypatch): fake_module = _FakeClaudeSdkModule("claude_agent_sdk") fake_module.ClaudeSDKClient = _FakeClaudeSDKClient fake_module.ClaudeAgentOptions = _FakeClaudeAgentOptions + fake_module.SdkMcpTool = _make_fake_sdk_mcp_tool_class() + + def fake_tool(*args, **kwargs): + def decorator(handler_fn): + return types.SimpleNamespace(handler=handler_fn, name=kwargs.get("name", "unknown"), args=args) + + return decorator + + fake_module.tool = fake_tool monkeypatch.setitem(sys.modules, "claude_agent_sdk", fake_module) return fake_module @@ -372,6 +1052,9 @@ async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, m assert not memory_logger.pop() fake_sdk = _install_fake_claude_sdk(monkeypatch) + original_tool_class = fake_sdk.SdkMcpTool + original_tool_fn = fake_sdk.tool + consumer_module_name = "test_issue7_repro_module" consumer_module = _FakeConsumerModule(consumer_module_name) consumer_module.ClaudeSDKClient = fake_sdk.ClaudeSDKClient @@ -380,6 +1063,8 @@ async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, m assert setup_claude_agent_sdk(project=PROJECT_NAME, api_key=logger.TEST_API_KEY) assert consumer_module.ClaudeSDKClient is not _FakeClaudeSDKClient + assert fake_sdk.SdkMcpTool is not original_tool_class + assert fake_sdk.tool is not original_tool_fn loop_errors = [] received_types = [] @@ -404,7 +1089,7 @@ async def main(): await main() assert loop_errors == [] - assert received_types == ["_FakeMessage", "_FakeResultMessage"] + assert received_types == ["AssistantMessage", "ResultMessage"] spans = memory_logger.pop() task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK] From 5b2c5c564aaa92c64c61eee4806e468719ff9780 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 11 Mar 2026 12:21:43 -0400 Subject: [PATCH 2/5] clean up tests --- .../wrappers/claude_agent_sdk/test_wrapper.py | 142 ++++++------------ 1 file changed, 43 insertions(+), 99 deletions(-) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 9b9ef037..10776f8c 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -2,11 +2,10 @@ import asyncio import dataclasses -import gc import sys import types from pathlib import Path -from typing import Any, Type +from typing import Any import pytest @@ -980,119 +979,64 @@ def test_auto_instrument_claude_agent_sdk(self): """Test auto_instrument patches Claude Agent SDK and creates spans.""" verify_autoinstrument_script("test_auto_claude_agent_sdk.py") - -class _FakeClaudeAgentOptions: - def __init__(self, model, permission_mode=None, response_events=None): - self.model = model - self.permission_mode = permission_mode - self.response_events = response_events - - -class _FakeClaudeSDKClient: - def __init__(self, options): - self.options = options - - async def __aenter__(self): - return self - - async def __aexit__(self, *args): - return None - - async def query(self, prompt): - del prompt - - async def receive_response(self): - response_events = self.options.response_events or [ - AssistantMessage(content=[TextBlock("Hello")]), - ResultMessage(), - ] - - for event in response_events: - if callable(event): - maybe_awaitable = event() - if hasattr(maybe_awaitable, "__await__"): - await maybe_awaitable - continue - yield event - await asyncio.sleep(0) - - -class _FakeClaudeSdkModule(types.ModuleType): - ClaudeSDKClient: Type[_FakeClaudeSDKClient] - ClaudeAgentOptions: Type[_FakeClaudeAgentOptions] - SdkMcpTool: Any - tool: Any - - -class _FakeConsumerModule(types.ModuleType): - ClaudeSDKClient: Type[_FakeClaudeSDKClient] - ClaudeAgentOptions: Type[_FakeClaudeAgentOptions] - - -def _install_fake_claude_sdk(monkeypatch): - fake_module = _FakeClaudeSdkModule("claude_agent_sdk") - fake_module.ClaudeSDKClient = _FakeClaudeSDKClient - fake_module.ClaudeAgentOptions = _FakeClaudeAgentOptions - fake_module.SdkMcpTool = _make_fake_sdk_mcp_tool_class() - - def fake_tool(*args, **kwargs): - def decorator(handler_fn): - return types.SimpleNamespace(handler=handler_fn, name=kwargs.get("name", "unknown"), args=args) - - return decorator - - fake_module.tool = fake_tool - monkeypatch.setitem(sys.modules, "claude_agent_sdk", fake_module) - return fake_module - - +@pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") @pytest.mark.asyncio async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, monkeypatch): """Regression test for https://github.com/braintrustdata/braintrust-sdk-python/issues/7.""" assert not memory_logger.pop() - - fake_sdk = _install_fake_claude_sdk(monkeypatch) - original_tool_class = fake_sdk.SdkMcpTool - original_tool_fn = fake_sdk.tool + original_client = claude_agent_sdk.ClaudeSDKClient + original_tool_class = claude_agent_sdk.SdkMcpTool + original_tool_fn = claude_agent_sdk.tool consumer_module_name = "test_issue7_repro_module" - consumer_module = _FakeConsumerModule(consumer_module_name) - consumer_module.ClaudeSDKClient = fake_sdk.ClaudeSDKClient - consumer_module.ClaudeAgentOptions = fake_sdk.ClaudeAgentOptions + consumer_module = types.ModuleType(consumer_module_name) + consumer_module.ClaudeSDKClient = original_client + consumer_module.ClaudeAgentOptions = claude_agent_sdk.ClaudeAgentOptions + consumer_module.SdkMcpTool = original_tool_class + consumer_module.tool = original_tool_fn monkeypatch.setitem(sys.modules, consumer_module_name, consumer_module) - assert setup_claude_agent_sdk(project=PROJECT_NAME, api_key=logger.TEST_API_KEY) - assert consumer_module.ClaudeSDKClient is not _FakeClaudeSDKClient - assert fake_sdk.SdkMcpTool is not original_tool_class - assert fake_sdk.tool is not original_tool_fn - loop_errors = [] received_types = [] - async def main(): - loop = asyncio.get_running_loop() - loop.set_exception_handler(lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message"))) - - options = consumer_module.ClaudeAgentOptions( - model="claude-sonnet-4-20250514", - permission_mode="bypassPermissions", - ) - async with consumer_module.ClaudeSDKClient(options=options) as client: - await client.query("Hello") - async for message in client.receive_response(): - received_types.append(type(message).__name__) - - await asyncio.sleep(0) - gc.collect() - await asyncio.sleep(0.01) + try: + assert setup_claude_agent_sdk(project=PROJECT_NAME, api_key=logger.TEST_API_KEY) + assert consumer_module.ClaudeSDKClient is not original_client + assert consumer_module.SdkMcpTool is not original_tool_class + assert consumer_module.tool is not original_tool_fn + assert claude_agent_sdk.SdkMcpTool is not original_tool_class + assert claude_agent_sdk.tool is not original_tool_fn + + async def main() -> None: + loop = asyncio.get_running_loop() + loop.set_exception_handler(lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message"))) + + options = consumer_module.ClaudeAgentOptions( + model="claude-3-5-haiku-20241022", + permission_mode="bypassPermissions", + ) + transport = make_cassette_transport( + cassette_name="test_auto_claude_agent_sdk", + prompt="", + options=options, + ) + async with consumer_module.ClaudeSDKClient(options=options, transport=transport) as client: + await client.query("Say hi") + async for message in client.receive_response(): + received_types.append(type(message).__name__) - await main() + await main() + finally: + claude_agent_sdk.ClaudeSDKClient = original_client + claude_agent_sdk.SdkMcpTool = original_tool_class + claude_agent_sdk.tool = original_tool_fn assert loop_errors == [] - assert received_types == ["AssistantMessage", "ResultMessage"] + assert "AssistantMessage" in received_types + assert received_types[-1] == "ResultMessage" spans = memory_logger.pop() task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK] assert len(task_spans) == 1 assert task_spans[0]["span_attributes"]["name"] == "Claude Agent" - assert task_spans[0]["input"] == "Hello" + assert task_spans[0]["input"] == "Say hi" From 8719ad0466f535ea877a67d1fd52905b15db69b5 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 11 Mar 2026 12:37:48 -0400 Subject: [PATCH 3/5] fix pylint --- py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 10776f8c..0add38bd 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -5,16 +5,18 @@ import sys import types from pathlib import Path -from typing import Any +from typing import Any, cast import pytest # Try to import the Claude Agent SDK - skip tests if not available try: - import claude_agent_sdk + import claude_agent_sdk as _claude_agent_sdk + claude_agent_sdk = cast(Any, _claude_agent_sdk) CLAUDE_SDK_AVAILABLE = True except ImportError: + claude_agent_sdk = cast(Any, None) CLAUDE_SDK_AVAILABLE = False print("Claude Agent SDK not installed, skipping integration tests") From cff315380051ad5f71a0e395af888e77cf06c095 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 11 Mar 2026 12:38:26 -0400 Subject: [PATCH 4/5] more pylint --- .../wrappers/claude_agent_sdk/test_wrapper.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 0add38bd..8ba914a3 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -1003,9 +1003,9 @@ async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, m try: assert setup_claude_agent_sdk(project=PROJECT_NAME, api_key=logger.TEST_API_KEY) - assert consumer_module.ClaudeSDKClient is not original_client - assert consumer_module.SdkMcpTool is not original_tool_class - assert consumer_module.tool is not original_tool_fn + assert getattr(consumer_module, "ClaudeSDKClient") is not original_client + assert getattr(consumer_module, "SdkMcpTool") is not original_tool_class + assert getattr(consumer_module, "tool") is not original_tool_fn assert claude_agent_sdk.SdkMcpTool is not original_tool_class assert claude_agent_sdk.tool is not original_tool_fn @@ -1013,7 +1013,7 @@ async def main() -> None: loop = asyncio.get_running_loop() loop.set_exception_handler(lambda loop, ctx: loop_errors.append(ctx.get("exception") or ctx.get("message"))) - options = consumer_module.ClaudeAgentOptions( + options = getattr(consumer_module, "ClaudeAgentOptions")( model="claude-3-5-haiku-20241022", permission_mode="bypassPermissions", ) @@ -1022,7 +1022,7 @@ async def main() -> None: prompt="", options=options, ) - async with consumer_module.ClaudeSDKClient(options=options, transport=transport) as client: + async with getattr(consumer_module, "ClaudeSDKClient")(options=options, transport=transport) as client: await client.query("Say hi") async for message in client.receive_response(): received_types.append(type(message).__name__) From aede03c00c2f432abd718067dc40b3ef18ecf0b0 Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Wed, 11 Mar 2026 18:29:28 -0400 Subject: [PATCH 5/5] make more improvements to the instrumentation --- .../claude_agent_sdk/_test_transport.py | 43 +- .../wrappers/claude_agent_sdk/_wrapper.py | 139 ++- .../cassettes/test_auto_claude_agent_sdk.json | 210 ++++ ...est_auto_claude_agent_sdk__sdk_0_1_10.json | 253 ----- ...est_auto_claude_agent_sdk__sdk_0_1_48.json | 381 ------- ...st_bundled_subagent_creates_task_span.json | 525 +++++++++ ...ubagent_creates_task_span__sdk_0_1_48.json | 696 ------------ ..._calculator_with_multiple_operations.json} | 455 ++++---- ..._with_multiple_operations__sdk_0_1_48.json | 1006 ----------------- ...nts_keep_outer_orchestration_separate.json | 793 +++++++++++++ ...c_iterable_asyncgen_multi__sdk_0_1_10.json | 271 ----- ...c_iterable_asyncgen_multi__sdk_0_1_48.json | 434 ------- ..._iterable_asyncgen_single__sdk_0_1_10.json | 257 ----- ..._iterable_asyncgen_single__sdk_0_1_48.json | 420 ------- ...ble_custom_async_iterable__sdk_0_1_10.json | 271 ----- ...ble_custom_async_iterable__sdk_0_1_48.json | 434 ------- .../wrappers/claude_agent_sdk/test_wrapper.py | 672 ++++++++++- 17 files changed, 2513 insertions(+), 4747 deletions(-) create mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_10.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_48.json create mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json rename py/src/braintrust/wrappers/claude_agent_sdk/cassettes/{test_calculator_with_multiple_operations__sdk_0_1_10.json => test_calculator_with_multiple_operations.json} (61%) delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_48.json create mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_10.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_48.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_10.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_48.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_10.json delete mode 100644 py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_48.json diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py index 5d73f9fb..f8786ead 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_test_transport.py @@ -44,11 +44,6 @@ def get_record_mode() -> str: return "once" -def _version_suffix() -> str: - version = getattr(claude_agent_sdk, "__version__", "unknown") - return version.replace(".", "_") - - def _require_sdk() -> None: if _CLAUDE_AGENT_SDK_IMPORT_ERROR is not None: raise ImportError( @@ -57,7 +52,7 @@ def _require_sdk() -> None: def cassette_path(name: str) -> Path: - return CASSETTES_DIR / f"{name}__sdk_{_version_suffix()}.json" + return CASSETTES_DIR / f"{name}.json" def _normalize_write(data: str, *, sanitize: bool = False) -> dict[str, Any]: @@ -91,12 +86,48 @@ def _sanitize_json_for_storage(value: Any) -> Any: if isinstance(value, list): return [_sanitize_json_for_storage(item) for item in value] if isinstance(value, dict): + value = _compact_initialize_message_for_storage(value) return {key: _sanitize_field_for_storage(key, item) for key, item in value.items()} if isinstance(value, str): return _sanitize_string_for_storage(value) return value +def _compact_initialize_message_for_storage(value: dict[str, Any]) -> dict[str, Any]: + if value.get("type") != "control_response": + return value + + response = value.get("response") + if not isinstance(response, dict) or response.get("subtype") != "success": + return value + + result = response.get("response") + if not isinstance(result, dict) or not _looks_like_initialize_response(result): + return value + + compact_result: dict[str, Any] = {} + if "account" in result: + compact_result["account"] = result["account"] + + for key in ("available_output_styles", "commands", "models", "agents"): + if key in result: + compact_result[key] = [] + + return { + **value, + "response": { + **response, + "response": compact_result, + }, + } + + +def _looks_like_initialize_response(value: dict[str, Any]) -> bool: + return "account" in value and any( + key in value for key in ("available_output_styles", "commands", "models", "agents") + ) + + def _sanitize_field_for_storage(key: str, value: Any) -> Any: if not isinstance(value, str): return _sanitize_json_for_storage(value) diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py index cb7d4728..4e418d2d 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/_wrapper.py @@ -293,8 +293,10 @@ def finish_tool_spans(self, message: Any) -> None: self._end_tool_span(str(tool_use_id), tool_result_block=block) - def cleanup(self, end_time: float | None = None) -> None: + def cleanup(self, end_time: float | None = None, exclude_tool_use_ids: frozenset[str] | None = None) -> None: for tool_use_id in list(self._active_spans): + if exclude_tool_use_ids and tool_use_id in exclude_tool_use_ids: + continue self._end_tool_span(tool_use_id, end_time=end_time) @property @@ -387,6 +389,8 @@ class LLMSpanTracker: def __init__(self, query_start_time: float | None = None): self.current_span: Any | None = None self.current_span_export: str | None = None + self.current_parent_export: str | None = None + self.current_output: list[dict[str, Any]] | None = None self.next_start_time: float | None = query_start_time def get_next_start_time(self) -> float: @@ -397,9 +401,27 @@ def start_llm_span( message: Any, prompt: Any, conversation_history: list[dict[str, Any]], + parent_export: str | None = None, start_time: float | None = None, - ) -> dict[str, Any] | None: + ) -> tuple[dict[str, Any] | None, bool]: """Start a new LLM span, ending the previous one if it exists.""" + current_message = _serialize_assistant_message(message) + + if ( + self.current_span + and self.next_start_time is None + and self.current_parent_export == parent_export + and current_message is not None + ): + merged_message = _merge_assistant_messages( + self.current_output[0] if self.current_output else None, + current_message, + ) + if merged_message is not None: + self.current_output = [merged_message] + self.current_span.log(output=self.current_output) + return merged_message, True + resolved_start_time = start_time if start_time is not None else self.get_next_start_time() first_token_time = time.time() @@ -407,14 +429,20 @@ def start_llm_span( self.current_span.end(end_time=resolved_start_time) final_content, span = _create_llm_span_for_messages( - [message], prompt, conversation_history, start_time=resolved_start_time + [message], + prompt, + conversation_history, + parent=parent_export, + start_time=resolved_start_time, ) if span is not None: span.log(metrics={"time_to_first_token": max(0.0, first_token_time - resolved_start_time)}) self.current_span = span self.current_span_export = span.export() if span else None + self.current_parent_export = parent_export + self.current_output = [final_content] if final_content is not None else None self.next_start_time = None - return final_content + return final_content, False def mark_next_llm_start(self) -> None: """Mark when the next LLM call will start (after tool results).""" @@ -431,6 +459,8 @@ def cleanup(self) -> None: self.current_span.end() self.current_span = None self.current_span_export = None + self.current_parent_export = None + self.current_output = None class TaskEventSpanTracker: @@ -438,6 +468,8 @@ def __init__(self, root_span_export: str, tool_tracker: ToolSpanTracker): self._root_span_export = root_span_export self._tool_tracker = tool_tracker self._active_spans: dict[str, Any] = {} + self._task_span_by_tool_use_id: dict[str, Any] = {} + self._active_task_order: list[str] = [] def process(self, message: Any) -> None: task_id = getattr(message, "task_id", None) @@ -456,6 +488,10 @@ def process(self, message: Any) -> None: parent=self._parent_export(message), ) self._active_spans[task_id] = task_span + self._active_task_order.append(task_id) + tool_use_id = getattr(message, "tool_use_id", None) + if tool_use_id is not None: + self._task_span_by_tool_use_id[str(tool_use_id)] = task_span else: update: dict[str, Any] = {} metadata = self._metadata(message) @@ -470,13 +506,46 @@ def process(self, message: Any) -> None: task_span.log(**update) if self._should_end(message_type): + tool_use_id = getattr(message, "tool_use_id", None) + if tool_use_id is not None: + self._task_span_by_tool_use_id.pop(str(tool_use_id), None) task_span.end() del self._active_spans[task_id] + self._active_task_order = [active_task_id for active_task_id in self._active_task_order if active_task_id != task_id] + + @property + def active_tool_use_ids(self) -> frozenset[str]: + return frozenset(self._task_span_by_tool_use_id.keys()) def cleanup(self) -> None: for task_id, span in list(self._active_spans.items()): span.end() del self._active_spans[task_id] + self._task_span_by_tool_use_id.clear() + self._active_task_order.clear() + + def parent_export_for_message(self, message: Any, fallback_export: str) -> str: + parent_tool_use_id = getattr(message, "parent_tool_use_id", None) + if parent_tool_use_id is None: + if _message_starts_subagent_tool(message): + return fallback_export + active_task_export = self._latest_active_task_export() + return active_task_export or fallback_export + + task_span = self._task_span_by_tool_use_id.get(str(parent_tool_use_id)) + if task_span is not None: + return task_span.export() + + active_task_export = self._latest_active_task_export() + return active_task_export or fallback_export + + def _latest_active_task_export(self) -> str | None: + for task_id in reversed(self._active_task_order): + task_span = self._active_spans.get(task_id) + if task_span is not None: + return task_span.export() + + return None def _parent_export(self, message: Any) -> str: return self._tool_tracker.get_span_export(getattr(message, "tool_use_id", None)) or self._root_span_export @@ -524,6 +593,19 @@ def _should_end(self, message_type: str) -> bool: return message_type == MessageClassName.TASK_NOTIFICATION +def _message_starts_subagent_tool(message: Any) -> bool: + if not hasattr(message, "content"): + return False + + for block in message.content: + if type(block).__name__ != BlockClassName.TOOL_USE: + continue + if getattr(block, "name", None) == "Agent": + return True + + return False + + def _create_client_wrapper_class(original_client_class: Any) -> Any: """Creates a wrapper class for ClaudeSDKClient that wraps query and receive_response.""" @@ -611,21 +693,38 @@ async def receive_response(self) -> AsyncGenerator[Any, None]: if message_type == MessageClassName.ASSISTANT: if llm_tracker.current_span and tool_tracker.has_active_spans: - tool_tracker.cleanup(end_time=llm_tracker.get_next_start_time()) - final_content = llm_tracker.start_llm_span( + tool_tracker.cleanup( + end_time=llm_tracker.get_next_start_time(), + exclude_tool_use_ids=task_event_span_tracker.active_tool_use_ids, + ) + llm_parent_export = task_event_span_tracker.parent_export_for_message( + message, + span.export(), + ) + final_content, extended_existing_span = llm_tracker.start_llm_span( message, self.__last_prompt, final_results, + parent_export=llm_parent_export, ) tool_tracker.start_tool_spans(message, llm_tracker.current_span_export) if final_content: - final_results.append(final_content) + if extended_existing_span and final_results and final_results[-1].get("role") == "assistant": + final_results[-1] = final_content + else: + final_results.append(final_content) elif message_type == MessageClassName.USER: tool_tracker.finish_tool_spans(message) + has_tool_results = False if hasattr(message, "content"): + has_tool_results = any( + type(block).__name__ == BlockClassName.TOOL_RESULT + for block in message.content + ) content = _serialize_content_blocks(message.content) final_results.append({"content": content, "role": "user"}) - llm_tracker.mark_next_llm_start() + if has_tool_results: + llm_tracker.mark_next_llm_start() elif message_type == MessageClassName.RESULT: if hasattr(message, "usage"): usage_metrics = _extract_usage_from_result_message(message) @@ -673,6 +772,7 @@ def _create_llm_span_for_messages( messages: list[Any], # List of AssistantMessage objects prompt: Any, conversation_history: list[dict[str, Any]], + parent: str | None = None, start_time: float | None = None, ) -> tuple[dict[str, Any] | None, Any | None]: """Creates an LLM span for a group of AssistantMessage objects. @@ -706,6 +806,7 @@ def _create_llm_span_for_messages( input=input_messages, output=outputs, metadata={"model": model} if model else None, + parent=parent, start_time=start_time, ) @@ -717,6 +818,28 @@ def _create_llm_span_for_messages( return None, llm_span +def _serialize_assistant_message(message: Any) -> dict[str, Any] | None: + if not hasattr(message, "content"): + return None + + return {"content": _serialize_content_blocks(message.content), "role": "assistant"} + + +def _merge_assistant_messages(existing_message: dict[str, Any] | None, new_message: dict[str, Any]) -> dict[str, Any]: + if existing_message is None: + return new_message + + existing_content = existing_message.get("content") + new_content = new_message.get("content") + if isinstance(existing_content, list) and isinstance(new_content, list): + return { + "role": "assistant", + "content": [*existing_content, *new_content], + } + + return new_message + + def _serialize_content_blocks(content: Any) -> Any: """Converts content blocks to a serializable format with proper type fields. diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json new file mode 100644 index 00000000..3043ade7 --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk.json @@ -0,0 +1,210 @@ +{ + "cassette_name": "test_auto_claude_agent_sdk", + "events": [ + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "request": { + "hooks": null, + "subtype": "initialize" + }, + "request_id": "req_1_352812de", + "type": "control_request" + } + } + }, + { + "op": "read", + "payload": { + "response": { + "request_id": "req_1_352812de", + "response": { + "account": { + "apiKeySource": "ANTHROPIC_API_KEY", + "tokenSource": "none" + }, + "agents": [], + "available_output_styles": [], + "commands": [], + "models": [] + }, + "subtype": "success" + }, + "type": "control_response" + } + }, + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "message": { + "content": "Say hi", + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "default", + "type": "user" + } + } + }, + { + "op": "read", + "payload": { + "agents": [ + "general-purpose", + "statusline-setup", + "Explore", + "Plan" + ], + "apiKeySource": "ANTHROPIC_API_KEY", + "claude_code_version": "2.1.71", + "cwd": "", + "fast_mode_state": "off", + "mcp_servers": [], + "model": "claude-3-5-haiku-20241022", + "output_style": "default", + "permissionMode": "bypassPermissions", + "plugins": [], + "session_id": "715aa7ea-4a5b-4189-ba6d-289eb5cbc314", + "skills": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api" + ], + "slash_commands": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api", + "compact", + "context", + "cost", + "heapdump", + "init", + "pr-comments", + "release-notes", + "review", + "security-review", + "insights" + ], + "subtype": "init", + "tools": [ + "Task", + "TaskOutput", + "Bash", + "Glob", + "Grep", + "ExitPlanMode", + "Read", + "Edit", + "Write", + "NotebookEdit", + "WebFetch", + "TodoWrite", + "WebSearch", + "TaskStop", + "AskUserQuestion", + "Skill", + "EnterPlanMode", + "EnterWorktree", + "CronCreate", + "CronDelete", + "CronList", + "ToolSearch" + ], + "type": "system", + "uuid": "04580fb4-5ab0-4597-a527-a77f1235c768" + } + }, + { + "op": "read", + "payload": { + "error": "invalid_request", + "message": { + "container": null, + "content": [ + { + "text": "There's an issue with the selected model (claude-3-5-haiku-20241022). It may not exist or you may not have access to it. Run --model to pick a different model.", + "type": "text" + } + ], + "context_management": null, + "id": "35b7f708-5311-4ecc-b95f-ec16a0fa5cc0", + "model": "", + "role": "assistant", + "stop_reason": "stop_sequence", + "stop_sequence": "", + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": null, + "input_tokens": 0, + "iterations": null, + "output_tokens": 0, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": null, + "speed": null + } + }, + "parent_tool_use_id": null, + "session_id": "715aa7ea-4a5b-4189-ba6d-289eb5cbc314", + "type": "assistant", + "uuid": "1b1fdd63-27cd-40b9-999e-05773744c670" + } + }, + { + "op": "read", + "payload": { + "duration_api_ms": 0, + "duration_ms": 424, + "fast_mode_state": "off", + "is_error": true, + "modelUsage": {}, + "num_turns": 1, + "permission_denials": [], + "result": "There's an issue with the selected model (claude-3-5-haiku-20241022). It may not exist or you may not have access to it. Run --model to pick a different model.", + "session_id": "715aa7ea-4a5b-4189-ba6d-289eb5cbc314", + "stop_reason": "stop_sequence", + "subtype": "success", + "total_cost_usd": 0, + "type": "result", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 0 + }, + "cache_creation_input_tokens": 0, + "cache_read_input_tokens": 0, + "inference_geo": "", + "input_tokens": 0, + "iterations": [], + "output_tokens": 0, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + }, + "uuid": "c4f6b54e-7792-4bf6-80b6-47133b56d787" + } + } + ], + "sdk_version": "0.1.48" +} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_10.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_10.json deleted file mode 100644 index b33d7ed0..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_10.json +++ /dev/null @@ -1,253 +0,0 @@ -{ - "cassette_name": "test_auto_claude_agent_sdk", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_d4224e8b", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_d4224e8b", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Visualize current context usage as a colored grid", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "List current todo items", - "name": "todos" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.5) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "value": "default" - }, - { - "description": "Opus 4.5 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "value": "opus" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "Custom model", - "displayName": "claude-3-5-haiku-20241022", - "value": "claude-3-5-haiku-20241022" - } - ], - "output_style": "default" - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Say hi", - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.0.53", - "cwd": "", - "mcp_servers": [], - "model": "claude-3-5-haiku-20241022", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "bd0f8f04-78fd-44fc-8aea-1b11a87cd63d", - "skills": [], - "slash_commands": [ - "compact", - "context", - "cost", - "init", - "pr-comments", - "release-notes", - "todos", - "review", - "security-review" - ], - "subtype": "init", - "tools": [ - "Task", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "BashOutput", - "KillShell", - "Skill", - "SlashCommand", - "EnterPlanMode" - ], - "type": "system", - "uuid": "54e0b263-025a-4de8-b4b5-d09783b053a4" - } - }, - { - "op": "read", - "payload": { - "error": "unknown", - "message": { - "container": null, - "content": [ - { - "text": "API Error: 404 {\"type\":\"error\",\"error\":{\"type\":\"not_found_error\",\"message\":\"model: claude-3-5-haiku-20241022\"},\"request_id\":\"req_011CYvJGzWBz7oifGm9o8gpg\"}", - "type": "text" - } - ], - "context_management": null, - "id": "3cce00aa-b48d-4674-8be9-4502bbecdf57", - "model": "", - "role": "assistant", - "stop_reason": "stop_sequence", - "stop_sequence": "", - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 0 - }, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - "input_tokens": 0, - "output_tokens": 0, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": null - } - }, - "parent_tool_use_id": null, - "session_id": "bd0f8f04-78fd-44fc-8aea-1b11a87cd63d", - "type": "assistant", - "uuid": "2530bd82-4687-409b-a1b5-f38c57ac06de" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 0, - "duration_ms": 415, - "is_error": true, - "modelUsage": {}, - "num_turns": 1, - "permission_denials": [], - "result": "API Error: 404 {\"type\":\"error\",\"error\":{\"type\":\"not_found_error\",\"message\":\"model: claude-3-5-haiku-20241022\"},\"request_id\":\"req_011CYvJGzWBz7oifGm9o8gpg\"}", - "session_id": "bd0f8f04-78fd-44fc-8aea-1b11a87cd63d", - "subtype": "success", - "total_cost_usd": 0, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 0 - }, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - "input_tokens": 0, - "output_tokens": 0, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard" - }, - "uuid": "9a8bad13-6c95-4e9b-be7f-18c5b1287e9b" - } - } - ], - "sdk_version": "0.1.10" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_48.json deleted file mode 100644 index 83672567..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_auto_claude_agent_sdk__sdk_0_1_48.json +++ /dev/null @@ -1,381 +0,0 @@ -{ - "cassette_name": "test_auto_claude_agent_sdk", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_25b53356", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_25b53356", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "agents": [ - { - "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", - "name": "general-purpose" - }, - { - "description": "Use this agent to configure the user's Claude Code status line setting.", - "model": "sonnet", - "name": "statusline-setup" - }, - { - "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", - "model": "haiku", - "name": "Explore" - }, - { - "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", - "name": "Plan" - } - ], - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", - "name": "keybindings-help" - }, - { - "argumentHint": "[issue description]", - "description": "Enable debug logging for this session and help diagnose issues (bundled)", - "name": "debug" - }, - { - "argumentHint": "", - "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", - "name": "simplify" - }, - { - "argumentHint": "", - "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", - "name": "batch" - }, - { - "argumentHint": "[interval] ", - "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", - "name": "loop" - }, - { - "argumentHint": "", - "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", - "name": "claude-api" - }, - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Show current context usage", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Dump the JS heap to ~/Desktop", - "name": "heapdump" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - }, - { - "argumentHint": "", - "description": "Generate a report analyzing your Claude Code sessions", - "name": "insights" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "default" - }, - { - "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", - "displayName": "Sonnet (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "sonnet[1m]" - }, - { - "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus" - }, - { - "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", - "displayName": "Opus (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus[1m]" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "Newer version available \u00b7 select Haiku for Haiku 4.5", - "displayName": "Claude 3.5 Haiku", - "value": "claude-3-5-haiku-20241022" - } - ], - "output_style": "default", - "pid": 521 - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Say hi", - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.1.71", - "cwd": "", - "fast_mode_state": "off", - "mcp_servers": [], - "model": "claude-3-5-haiku-20241022", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "7d364a6b-d0b7-4992-9513-617c5fdaafa8", - "skills": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api" - ], - "slash_commands": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api", - "compact", - "context", - "cost", - "heapdump", - "init", - "pr-comments", - "release-notes", - "review", - "security-review", - "insights" - ], - "subtype": "init", - "tools": [ - "Task", - "TaskOutput", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "TaskStop", - "AskUserQuestion", - "Skill", - "EnterPlanMode", - "EnterWorktree", - "CronCreate", - "CronDelete", - "CronList", - "ToolSearch" - ], - "type": "system", - "uuid": "c791f4b3-400e-4d34-bd0e-0a370e960b59" - } - }, - { - "op": "read", - "payload": { - "error": "invalid_request", - "message": { - "container": null, - "content": [ - { - "text": "There's an issue with the selected model (claude-3-5-haiku-20241022). It may not exist or you may not have access to it. Run --model to pick a different model.", - "type": "text" - } - ], - "context_management": null, - "id": "74e468a5-ac6f-4d56-86ce-75a0bd3568fd", - "model": "", - "role": "assistant", - "stop_reason": "stop_sequence", - "stop_sequence": "", - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 0 - }, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - "inference_geo": null, - "input_tokens": 0, - "iterations": null, - "output_tokens": 0, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": null, - "speed": null - } - }, - "parent_tool_use_id": null, - "session_id": "7d364a6b-d0b7-4992-9513-617c5fdaafa8", - "type": "assistant", - "uuid": "e0bec5a6-6f57-470d-a506-a3225c70cef1" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 0, - "duration_ms": 426, - "fast_mode_state": "off", - "is_error": true, - "modelUsage": {}, - "num_turns": 1, - "permission_denials": [], - "result": "There's an issue with the selected model (claude-3-5-haiku-20241022). It may not exist or you may not have access to it. Run --model to pick a different model.", - "session_id": "7d364a6b-d0b7-4992-9513-617c5fdaafa8", - "stop_reason": "stop_sequence", - "subtype": "success", - "total_cost_usd": 0, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 0 - }, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 0, - "inference_geo": "", - "input_tokens": 0, - "iterations": [], - "output_tokens": 0, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - }, - "uuid": "68e781e7-c30a-42b0-bfa6-0b79db1d8e88" - } - } - ], - "sdk_version": "0.1.48" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json new file mode 100644 index 00000000..b3be3521 --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span.json @@ -0,0 +1,525 @@ +{ + "cassette_name": "test_bundled_subagent_creates_task_span", + "events": [ + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "request": { + "hooks": null, + "subtype": "initialize" + }, + "request_id": "req_1_1ff6a2bd", + "type": "control_request" + } + } + }, + { + "op": "read", + "payload": { + "response": { + "request_id": "req_1_1ff6a2bd", + "response": { + "account": { + "apiKeySource": "ANTHROPIC_API_KEY", + "tokenSource": "none" + }, + "agents": [], + "available_output_styles": [], + "commands": [], + "models": [] + }, + "subtype": "success" + }, + "type": "control_response" + } + }, + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "message": { + "content": "You must delegate this task to the bundled general-purpose agent. Have that agent inspect the current repository and reply with only the repository name. Do not answer directly without using the subagent.", + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "default", + "type": "user" + } + } + }, + { + "op": "read", + "payload": { + "agents": [ + "general-purpose", + "statusline-setup", + "Explore", + "Plan" + ], + "apiKeySource": "ANTHROPIC_API_KEY", + "claude_code_version": "2.1.71", + "cwd": "", + "fast_mode_state": "off", + "mcp_servers": [], + "model": "claude-haiku-4-5-20251001", + "output_style": "default", + "permissionMode": "bypassPermissions", + "plugins": [], + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "skills": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api" + ], + "slash_commands": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api", + "compact", + "context", + "cost", + "heapdump", + "init", + "pr-comments", + "release-notes", + "review", + "security-review", + "insights" + ], + "subtype": "init", + "tools": [ + "Task", + "TaskOutput", + "Bash", + "Glob", + "Grep", + "ExitPlanMode", + "Read", + "Edit", + "Write", + "NotebookEdit", + "WebFetch", + "TodoWrite", + "WebSearch", + "TaskStop", + "AskUserQuestion", + "Skill", + "EnterPlanMode", + "EnterWorktree", + "CronCreate", + "CronDelete", + "CronList", + "ToolSearch" + ], + "type": "system", + "uuid": "5ba3cbe0-8c02-409a-a424-5d406d1ac019" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "The user is asking me to delegate the task to a general-purpose agent to inspect the current repository and reply with only the repository name. They explicitly state I should not answer directly without using the subagent.\n\nI need to use the Agent tool with subagent_type \"general-purpose\" to:\n1. Inspect the current repository\n2. Get the repository name\n3. Have the agent reply with only the repository name\n\nLet me invoke the general-purpose agent to do this.", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_01HfZQtBdQsCUX2ACtDwfQj5", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 4210 + }, + "cache_creation_input_tokens": 4210, + "cache_read_input_tokens": 13723, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 8, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "assistant", + "uuid": "31424cd2-e47c-4c7f-bdb6-ce2375ae7a8c" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "input": { + "description": "Inspect repository and return name", + "prompt": "Inspect the current git repository and determine its name. Reply with ONLY the repository name, nothing else.", + "subagent_type": "general-purpose" + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01HfZQtBdQsCUX2ACtDwfQj5", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 4210 + }, + "cache_creation_input_tokens": 4210, + "cache_read_input_tokens": 13723, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 8, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "assistant", + "uuid": "4c145d58-366d-423c-ad7e-03ace6aa15de" + } + }, + { + "op": "read", + "payload": { + "description": "Inspect repository and return name", + "prompt": "Inspect the current git repository and determine its name. Reply with ONLY the repository name, nothing else.", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "subtype": "task_started", + "task_id": "a1eaf2ecd4d8968df", + "task_type": "local_agent", + "tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "type": "system", + "uuid": "da85f40c-793d-4cde-96c5-8e6751fcafd6" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Inspect the current git repository and determine its name. Reply with ONLY the repository name, nothing else.", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "user", + "uuid": "ff52487f-6c32-43b0-965a-33fda2d013fe" + } + }, + { + "op": "read", + "payload": { + "description": "Running Get the remote repository URL to determine the repository name", + "last_tool_name": "Bash", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "subtype": "task_progress", + "task_id": "a1eaf2ecd4d8968df", + "tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "type": "system", + "usage": { + "duration_ms": 1277, + "tool_uses": 1, + "total_tokens": 13843 + }, + "uuid": "61ad602a-d833-4dbb-a371-ffc578c08dd7" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_015gvmWWDEFMcEsDvGrkb434", + "input": { + "command": "git config --get remote.origin.url", + "description": "Get the remote repository URL to determine the repository name" + }, + "name": "Bash", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_012DAbx6oadTcdDdf2izWwcH", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 13830 + }, + "cache_creation_input_tokens": 13830, + "cache_read_input_tokens": 0, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 5, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "assistant", + "uuid": "6ea0c352-7aab-40ae-a370-7d5c9138ad83" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": "git@github.com:braintrustdata/braintrust-sdk-python.git", + "is_error": false, + "tool_use_id": "toolu_015gvmWWDEFMcEsDvGrkb434", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "user", + "uuid": "8c222148-ccea-43ef-9002-1dd2f5fd49be" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "status": "completed", + "subtype": "task_notification", + "summary": "Inspect repository and return name", + "task_id": "a1eaf2ecd4d8968df", + "tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "type": "system", + "usage": { + "duration_ms": 2248, + "tool_uses": 1, + "total_tokens": 13975 + }, + "uuid": "b1ee7643-f426-4a32-b945-f8f582f27980" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "braintrust-sdk-python", + "type": "text" + }, + { + "text": "agentId: a1eaf2ecd4d8968df (for resuming to continue this agent's work if needed)\ntotal_tokens: 13974\ntool_uses: 1\nduration_ms: 2250", + "type": "text" + } + ], + "tool_use_id": "toolu_01PPP5buTH4CbqHHsRT6PV1j", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "tool_use_result": { + "agentId": "a1eaf2ecd4d8968df", + "content": [ + { + "text": "braintrust-sdk-python", + "type": "text" + } + ], + "prompt": "Inspect the current git repository and determine its name. Reply with ONLY the repository name, nothing else.", + "status": "completed", + "totalDurationMs": 2250, + "totalTokens": 13974, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 128 + }, + "cache_creation_input_tokens": 128, + "cache_read_input_tokens": 13830, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 10, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "9b778d83-0baf-4451-a2a3-9b7bda034eac" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "Great! The agent has inspected the repository and returned the name. The repository name is \"braintrust-sdk-python\". The agent has done exactly what was requested - inspected the repository and replied with only the repository name.\n\nI should now present this result to the user. Since the user asked me to delegate the task and have the agent reply with only the repository name, I should just provide that result.", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_01T5PgLyWxF7V2yYEe9b3RqZ", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 311 + }, + "cache_creation_input_tokens": 311, + "cache_read_input_tokens": 17933, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "assistant", + "uuid": "c8a7a8ef-bbd4-4fcf-af73-c56fac07f3cc" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "braintrust-sdk-python", + "type": "text" + } + ], + "context_management": null, + "id": "msg_01T5PgLyWxF7V2yYEe9b3RqZ", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 311 + }, + "cache_creation_input_tokens": 311, + "cache_read_input_tokens": 17933, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "type": "assistant", + "uuid": "6751d85c-cc60-4d21-8e6f-672f57a3b33a" + } + }, + { + "op": "read", + "payload": { + "duration_api_ms": 5602, + "duration_ms": 6029, + "fast_mode_state": "off", + "is_error": false, + "modelUsage": { + "claude-haiku-4-5-20251001": { + "cacheCreationInputTokens": 18479, + "cacheReadInputTokens": 45486, + "contextWindow": 200000, + "costUSD": 0.02987435, + "inputTokens": 27, + "maxOutputTokens": 32000, + "outputTokens": 440, + "webSearchRequests": 0 + } + }, + "num_turns": 2, + "permission_denials": [], + "result": "braintrust-sdk-python", + "session_id": "f3a4ac3b-566d-4a94-add4-a223e0bb81d9", + "stop_reason": "end_turn", + "subtype": "success", + "total_cost_usd": 0.02987435, + "type": "result", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 4521 + }, + "cache_creation_input_tokens": 4521, + "cache_read_input_tokens": 31656, + "inference_geo": "", + "input_tokens": 18, + "iterations": [], + "output_tokens": 332, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + }, + "uuid": "4298fec8-9ffb-4ac1-a391-bc1bcfb3b4bb" + } + } + ], + "sdk_version": "0.1.48" +} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json deleted file mode 100644 index f00186b0..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_bundled_subagent_creates_task_span__sdk_0_1_48.json +++ /dev/null @@ -1,696 +0,0 @@ -{ - "cassette_name": "test_bundled_subagent_creates_task_span", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_e25e9f3a", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_e25e9f3a", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "agents": [ - { - "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", - "name": "general-purpose" - }, - { - "description": "Use this agent to configure the user's Claude Code status line setting.", - "model": "sonnet", - "name": "statusline-setup" - }, - { - "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", - "model": "haiku", - "name": "Explore" - }, - { - "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", - "name": "Plan" - } - ], - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", - "name": "keybindings-help" - }, - { - "argumentHint": "[issue description]", - "description": "Enable debug logging for this session and help diagnose issues (bundled)", - "name": "debug" - }, - { - "argumentHint": "", - "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", - "name": "simplify" - }, - { - "argumentHint": "", - "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", - "name": "batch" - }, - { - "argumentHint": "[interval] ", - "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", - "name": "loop" - }, - { - "argumentHint": "", - "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", - "name": "claude-api" - }, - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Show current context usage", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Dump the JS heap to ~/Desktop", - "name": "heapdump" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - }, - { - "argumentHint": "", - "description": "Generate a report analyzing your Claude Code sessions", - "name": "insights" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "default" - }, - { - "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", - "displayName": "Sonnet (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "sonnet[1m]" - }, - { - "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus" - }, - { - "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", - "displayName": "Opus (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus[1m]" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "claude-haiku-4-5-20251001", - "displayName": "Haiku 4.5", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default", - "pid": 87088 - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "You must delegate this task to the bundled general-purpose agent. Have that agent inspect the current repository and reply with only the repository name. Do not answer directly without using the subagent.", - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.1.71", - "cwd": "", - "fast_mode_state": "off", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "skills": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api" - ], - "slash_commands": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api", - "compact", - "context", - "cost", - "heapdump", - "init", - "pr-comments", - "release-notes", - "review", - "security-review", - "insights" - ], - "subtype": "init", - "tools": [ - "Task", - "TaskOutput", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "TaskStop", - "AskUserQuestion", - "Skill", - "EnterPlanMode", - "EnterWorktree", - "CronCreate", - "CronDelete", - "CronList", - "ToolSearch" - ], - "type": "system", - "uuid": "31a1f676-a444-48d2-9850-0f9a04f201ff" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "The user wants me to delegate a task to the general-purpose agent to inspect the current repository and reply with only the repository name. I should use the Agent tool with the \"general-purpose\" subagent type.\n\nThe task is to inspect the current repository and reply with the repository name. I'll ask the agent to do this.", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_01EKkgN3VHXAzhbB9rcWPDnY", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 4210 - }, - "cache_creation_input_tokens": 4210, - "cache_read_input_tokens": 13723, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 1, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "assistant", - "uuid": "ea022d57-eaf5-4cec-af5c-8a1532c050e2" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "caller": { - "type": "direct" - }, - "id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "input": { - "description": "Inspect repository and return name", - "prompt": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", - "subagent_type": "general-purpose" - }, - "name": "Agent", - "type": "tool_use" - } - ], - "context_management": null, - "id": "msg_01EKkgN3VHXAzhbB9rcWPDnY", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 4210 - }, - "cache_creation_input_tokens": 4210, - "cache_read_input_tokens": 13723, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 1, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "assistant", - "uuid": "e7806e52-0636-4302-aaa1-39937f78fd28" - } - }, - { - "op": "read", - "payload": { - "description": "Inspect repository and return name", - "prompt": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "subtype": "task_started", - "task_id": "adc4e28af324cf3b8", - "task_type": "local_agent", - "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "type": "system", - "uuid": "28578d9d-1a26-4102-a023-8112d75cf795" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", - "type": "text" - } - ], - "role": "user" - }, - "parent_tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "user", - "uuid": "547509d4-14ac-4b66-a879-ca5a515aeede" - } - }, - { - "op": "read", - "payload": { - "description": "Running Get the remote URL to determine repository name", - "last_tool_name": "Bash", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "subtype": "task_progress", - "task_id": "adc4e28af324cf3b8", - "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "type": "system", - "usage": { - "duration_ms": 1606, - "tool_uses": 1, - "total_tokens": 13352 - }, - "uuid": "0e4dd812-c727-45ca-9116-df155c0af17a" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "caller": { - "type": "direct" - }, - "id": "toolu_015sKk5zQ5seoMWtsJdWWeKn", - "input": { - "command": "git config --get remote.origin.url", - "description": "Get the remote URL to determine repository name" - }, - "name": "Bash", - "type": "tool_use" - } - ], - "context_management": null, - "id": "msg_01BULgW4AiYyLLva26eT6xbg", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 13322 - }, - "cache_creation_input_tokens": 13322, - "cache_read_input_tokens": 0, - "inference_geo": "not_available", - "input_tokens": 3, - "output_tokens": 27, - "service_tier": "standard" - } - }, - "parent_tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "assistant", - "uuid": "1d8ecc87-127f-411a-833d-b7b887974003" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "content": "git@github.com:braintrustdata/braintrust-sdk-python.git", - "is_error": false, - "tool_use_id": "toolu_015sKk5zQ5seoMWtsJdWWeKn", - "type": "tool_result" - } - ], - "role": "user" - }, - "parent_tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "user", - "uuid": "d9eb9d0a-f360-4b4d-9589-9791208fba3a" - } - }, - { - "op": "read", - "payload": { - "output_file": "", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "status": "completed", - "subtype": "task_notification", - "summary": "Inspect repository and return name", - "task_id": "adc4e28af324cf3b8", - "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "type": "system", - "usage": { - "duration_ms": 2656, - "tool_uses": 1, - "total_tokens": 13471 - }, - "uuid": "b7227885-b5d2-4cab-b5c0-62abefc765ff" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "content": [ - { - "text": "braintrust-sdk-python", - "type": "text" - }, - { - "text": "agentId: adc4e28af324cf3b8 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13453\ntool_uses: 1\nduration_ms: 2657", - "type": "text" - } - ], - "tool_use_id": "toolu_01DiTaCFdU8v7PtqKwcKiVfC", - "type": "tool_result" - } - ], - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "tool_use_result": { - "agentId": "adc4e28af324cf3b8", - "content": [ - { - "text": "braintrust-sdk-python", - "type": "text" - } - ], - "prompt": "Inspect the current git repository and determine its name. Use git commands to find the repository name (you can check the git config, the .git directory, or the remote URLs). Reply with ONLY the repository name, nothing else.", - "status": "completed", - "totalDurationMs": 2657, - "totalTokens": 13453, - "totalToolUseCount": 1, - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 115 - }, - "cache_creation_input_tokens": 115, - "cache_read_input_tokens": 13322, - "inference_geo": "", - "input_tokens": 6, - "iterations": [], - "output_tokens": 10, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - } - }, - "type": "user", - "uuid": "60e92138-c01c-4eda-b0e7-869f641bbc9b" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "The agent has returned \"braintrust-sdk-python\" as the repository name. The user asked me to delegate the task and have the agent reply with only the repository name. I should now provide that result to the user.", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_015Hg13AzAA4XGSKmkzhT2UH", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 306 - }, - "cache_creation_input_tokens": 306, - "cache_read_input_tokens": 17933, - "inference_geo": "not_available", - "input_tokens": 8, - "output_tokens": 5, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "assistant", - "uuid": "ec279b2d-f5be-4f3f-8a38-8ae546eb478f" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "braintrust-sdk-python", - "type": "text" - } - ], - "context_management": null, - "id": "msg_015Hg13AzAA4XGSKmkzhT2UH", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 306 - }, - "cache_creation_input_tokens": 306, - "cache_read_input_tokens": 17933, - "inference_geo": "not_available", - "input_tokens": 8, - "output_tokens": 5, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "type": "assistant", - "uuid": "83984605-5460-415f-9cf9-39a0c626a801" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 6581, - "duration_ms": 6955, - "fast_mode_state": "off", - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 17953, - "cacheReadInputTokens": 44978, - "contextWindow": 200000, - "costUSD": 0.028971049999999998, - "inputTokens": 27, - "maxOutputTokens": 32000, - "outputTokens": 401, - "webSearchRequests": 0 - } - }, - "num_turns": 2, - "permission_denials": [], - "result": "braintrust-sdk-python", - "session_id": "20233b8f-16f5-4e9a-a947-ea211e476dce", - "stop_reason": "end_turn", - "subtype": "success", - "total_cost_usd": 0.028971049999999998, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 4516 - }, - "cache_creation_input_tokens": 4516, - "cache_read_input_tokens": 31656, - "inference_geo": "", - "input_tokens": 18, - "iterations": [], - "output_tokens": 285, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - }, - "uuid": "616f9447-8d7f-4fc8-821b-35f66e301c2e" - } - } - ], - "sdk_version": "0.1.48" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_10.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json similarity index 61% rename from py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_10.json rename to py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json index 1f4ac12d..9b64ea5a 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_10.json +++ b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations.json @@ -10,7 +10,7 @@ "hooks": null, "subtype": "initialize" }, - "request_id": "req_1_0c20d744", + "request_id": "req_1_7bac646b", "type": "control_request" } } @@ -27,15 +27,15 @@ "capabilities": {}, "clientInfo": { "name": "claude-code", - "version": "2.0.53" + "version": "2.1.71" }, - "protocolVersion": "2025-06-18" + "protocolVersion": "2025-11-25" } }, "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "9230a3c8-1b50-49d2-a49a-b7434a62deff", + "request_id": "2127d480-b055-4e3f-864b-d8a56d74bf39", "type": "control_request" } }, @@ -45,7 +45,7 @@ "kind": "json", "value": { "response": { - "request_id": "9230a3c8-1b50-49d2-a49a-b7434a62deff", + "request_id": "2127d480-b055-4e3f-864b-d8a56d74bf39", "response": { "mcp_response": { "id": 0, @@ -72,87 +72,16 @@ "op": "read", "payload": { "response": { - "request_id": "req_1_0c20d744", + "request_id": "req_1_7bac646b", "response": { "account": { "apiKeySource": "ANTHROPIC_API_KEY", "tokenSource": "none" }, - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Visualize current context usage as a colored grid", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "List current todo items", - "name": "todos" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.5) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "value": "default" - }, - { - "description": "Opus 4.5 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "value": "opus" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "Custom model", - "displayName": "claude-haiku-4-5-20251001", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default" + "agents": [], + "available_output_styles": [], + "commands": [], + "models": [] }, "subtype": "success" }, @@ -185,10 +114,29 @@ "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "801e3b05-8152-4349-8d6d-af4bf91544ab", + "request_id": "283bbf65-3d77-4488-aa6d-5c50fea285bf", "type": "control_request" } }, + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "response": { + "request_id": "283bbf65-3d77-4488-aa6d-5c50fea285bf", + "response": { + "mcp_response": { + "jsonrpc": "2.0", + "result": {} + } + }, + "subtype": "success" + }, + "type": "control_response" + } + } + }, { "op": "read", "payload": { @@ -201,15 +149,15 @@ "capabilities": {}, "clientInfo": { "name": "claude-code", - "version": "2.0.53" + "version": "2.1.71" }, - "protocolVersion": "2025-06-18" + "protocolVersion": "2025-11-25" } }, "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "946f085c-e65f-4e55-8b22-24449b0781d0", + "request_id": "7e196f9d-3a93-42aa-b4f8-cbc7bae54d8a", "type": "control_request" } }, @@ -219,26 +167,7 @@ "kind": "json", "value": { "response": { - "request_id": "801e3b05-8152-4349-8d6d-af4bf91544ab", - "response": { - "mcp_response": { - "jsonrpc": "2.0", - "result": {} - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "946f085c-e65f-4e55-8b22-24449b0781d0", + "request_id": "7e196f9d-3a93-42aa-b4f8-cbc7bae54d8a", "response": { "mcp_response": { "id": 0, @@ -273,7 +202,7 @@ "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "c6146719-2daa-4747-9ee6-11783868e8d0", + "request_id": "2cf91a70-c2e5-434c-93f3-a0f21019490e", "type": "control_request" } }, @@ -283,7 +212,7 @@ "kind": "json", "value": { "response": { - "request_id": "c6146719-2daa-4747-9ee6-11783868e8d0", + "request_id": "2cf91a70-c2e5-434c-93f3-a0f21019490e", "response": { "mcp_response": { "id": 1, @@ -343,7 +272,7 @@ "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "d4daa9cc-4d34-4f43-b2f7-c525a7db2cc3", + "request_id": "c37d9442-dd36-4269-b59a-feeed63e7d61", "type": "control_request" } }, @@ -353,7 +282,7 @@ "kind": "json", "value": { "response": { - "request_id": "d4daa9cc-4d34-4f43-b2f7-c525a7db2cc3", + "request_id": "c37d9442-dd36-4269-b59a-feeed63e7d61", "response": { "mcp_response": { "jsonrpc": "2.0", @@ -366,77 +295,6 @@ } } }, - { - "op": "read", - "payload": { - "request": { - "message": { - "id": 1, - "jsonrpc": "2.0", - "method": "tools/list" - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "1914635c-c1eb-43b5-b35e-ee3dc29fcd1d", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "1914635c-c1eb-43b5-b35e-ee3dc29fcd1d", - "response": { - "mcp_response": { - "id": 1, - "jsonrpc": "2.0", - "result": { - "tools": [ - { - "description": "Performs basic arithmetic operations", - "inputSchema": { - "properties": { - "a": { - "description": "First number", - "type": "number" - }, - "b": { - "description": "Second number", - "type": "number" - }, - "operation": { - "description": "The arithmetic operation to perform", - "enum": [ - "add", - "subtract", - "multiply", - "divide" - ], - "type": "string" - } - }, - "required": [ - "operation", - "a", - "b" - ], - "type": "object" - }, - "name": "calculator" - } - ] - } - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, { "op": "read", "payload": { @@ -447,8 +305,9 @@ "Plan" ], "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.0.53", + "claude_code_version": "2.1.71", "cwd": "", + "fast_mode_state": "off", "mcp_servers": [ { "name": "calculator", @@ -459,22 +318,37 @@ "output_style": "default", "permissionMode": "bypassPermissions", "plugins": [], - "session_id": "7388c564-487e-451f-865d-757bf0122afe", - "skills": [], + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", + "skills": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api" + ], "slash_commands": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api", "compact", "context", "cost", + "heapdump", "init", "pr-comments", "release-notes", - "todos", "review", - "security-review" + "security-review", + "insights" ], "subtype": "init", "tools": [ "Task", + "TaskOutput", "Bash", "Glob", "Grep", @@ -486,15 +360,19 @@ "WebFetch", "TodoWrite", "WebSearch", - "BashOutput", - "KillShell", + "TaskStop", + "AskUserQuestion", "Skill", - "SlashCommand", "EnterPlanMode", + "EnterWorktree", + "CronCreate", + "CronDelete", + "CronList", + "ToolSearch", "mcp__calculator__calculator" ], "type": "system", - "uuid": "e020a28d-7993-4e1e-a829-99a0d327d349" + "uuid": "9065a984-ce57-42b9-b22e-1f81a9daf0ed" } }, { @@ -503,12 +381,13 @@ "message": { "content": [ { - "text": "I'll help you with that calculation. Let me first multiply 15 by 7, then subtract 5 from the result.", - "type": "text" + "signature": "", + "thinking": "The user is asking me to:\n1. Multiply 15 by 7\n2. Subtract 5 from the result\n\nI can use the calculator tool for this. Let me first multiply 15 by 7, then subtract 5 from that result.\n\nFirst call: 15 * 7 = 105\nSecond call: 105 - 5 = 100\n\nI'll need to make the first calculation, get the result, then make the second calculation. These are dependent operations, so I should do the first one first.", + "type": "thinking" } ], "context_management": null, - "id": "msg_017F32N8ghRVakXtFJYxMRnZ", + "id": "msg_01SZLU92jt1i6LRNuaTfx7HD", "model": "claude-haiku-4-5-20251001", "role": "assistant", "stop_reason": null, @@ -517,20 +396,20 @@ "usage": { "cache_creation": { "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 14015 + "ephemeral_5m_input_tokens": 4311 }, - "cache_creation_input_tokens": 14015, - "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 4311, + "cache_read_input_tokens": 13723, "inference_geo": "not_available", - "input_tokens": 3, - "output_tokens": 5, + "input_tokens": 10, + "output_tokens": 7, "service_tier": "standard" } }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "type": "assistant", - "uuid": "34bd1986-f756-439f-b792-f97ee641cdcf" + "uuid": "1a074bf6-f4fa-41c9-82f1-b14cbc463f93" } }, { @@ -542,7 +421,7 @@ "caller": { "type": "direct" }, - "id": "toolu_01K6fcD1aZeYkN9AVAX8AU5M", + "id": "toolu_01GTvysuW8mhwmw8uR6trESb", "input": { "a": 15, "b": 7, @@ -553,7 +432,7 @@ } ], "context_management": null, - "id": "msg_017F32N8ghRVakXtFJYxMRnZ", + "id": "msg_01SZLU92jt1i6LRNuaTfx7HD", "model": "claude-haiku-4-5-20251001", "role": "assistant", "stop_reason": null, @@ -562,20 +441,20 @@ "usage": { "cache_creation": { "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 14015 + "ephemeral_5m_input_tokens": 4311 }, - "cache_creation_input_tokens": 14015, - "cache_read_input_tokens": 0, + "cache_creation_input_tokens": 4311, + "cache_read_input_tokens": 13723, "inference_geo": "not_available", - "input_tokens": 3, - "output_tokens": 119, + "input_tokens": 10, + "output_tokens": 7, "service_tier": "standard" } }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "type": "assistant", - "uuid": "821087f9-6b60-4e08-b780-cd74ccf51eba" + "uuid": "4fd4c99b-f8a4-4a63-9ca8-e0ee9b66d89f" } }, { @@ -588,7 +467,8 @@ "method": "tools/call", "params": { "_meta": { - "claudecode/toolUseId": "toolu_01K6fcD1aZeYkN9AVAX8AU5M" + "claudecode/toolUseId": "toolu_01GTvysuW8mhwmw8uR6trESb", + "progressToken": 2 }, "arguments": { "a": 15, @@ -601,7 +481,7 @@ "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "15fa3d33-0028-44ff-9a64-fd9831fbc7cb", + "request_id": "5103eab1-ac9e-4773-a7f3-0d1896284ef0", "type": "control_request" } }, @@ -611,7 +491,7 @@ "kind": "json", "value": { "response": { - "request_id": "15fa3d33-0028-44ff-9a64-fd9831fbc7cb", + "request_id": "5103eab1-ac9e-4773-a7f3-0d1896284ef0", "response": { "mcp_response": { "id": 2, @@ -644,14 +524,14 @@ "type": "text" } ], - "tool_use_id": "toolu_01K6fcD1aZeYkN9AVAX8AU5M", + "tool_use_id": "toolu_01GTvysuW8mhwmw8uR6trESb", "type": "tool_result" } ], "role": "user" }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "tool_use_result": [ { "text": "The result of multiply(15, 7) is 105", @@ -659,7 +539,7 @@ } ], "type": "user", - "uuid": "45f4901b-a992-409a-8974-61718f0bd709" + "uuid": "e1b4150c-44c8-43ba-ad21-193308652b17" } }, { @@ -668,12 +548,13 @@ "message": { "content": [ { - "text": "Now let me subtract 5 from that result:", - "type": "text" + "signature": "", + "thinking": "Great, 15 * 7 = 105. Now I need to subtract 5 from 105.", + "type": "thinking" } ], "context_management": null, - "id": "msg_017YZdFndE8zj9aTno3Q8E5C", + "id": "msg_012L8xHqi8Qw2DMNPuxXhXk7", "model": "claude-haiku-4-5-20251001", "role": "assistant", "stop_reason": null, @@ -682,20 +563,20 @@ "usage": { "cache_creation": { "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 141 + "ephemeral_5m_input_tokens": 249 }, - "cache_creation_input_tokens": 141, - "cache_read_input_tokens": 14015, + "cache_creation_input_tokens": 249, + "cache_read_input_tokens": 18034, "inference_geo": "not_available", - "input_tokens": 6, - "output_tokens": 2, + "input_tokens": 8, + "output_tokens": 1, "service_tier": "standard" } }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "type": "assistant", - "uuid": "d2c7719f-0e47-484f-a0bb-0aaaa368032b" + "uuid": "220a27fe-9bb8-4787-9b70-398b37020369" } }, { @@ -707,7 +588,7 @@ "caller": { "type": "direct" }, - "id": "toolu_01HkzvMnStwuQLnuhAefWfpa", + "id": "toolu_01PfSFqVXeF82k7N9oKrY7Ni", "input": { "a": 105, "b": 5, @@ -718,7 +599,7 @@ } ], "context_management": null, - "id": "msg_017YZdFndE8zj9aTno3Q8E5C", + "id": "msg_012L8xHqi8Qw2DMNPuxXhXk7", "model": "claude-haiku-4-5-20251001", "role": "assistant", "stop_reason": null, @@ -727,20 +608,20 @@ "usage": { "cache_creation": { "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 141 + "ephemeral_5m_input_tokens": 249 }, - "cache_creation_input_tokens": 141, - "cache_read_input_tokens": 14015, + "cache_creation_input_tokens": 249, + "cache_read_input_tokens": 18034, "inference_geo": "not_available", - "input_tokens": 6, - "output_tokens": 2, + "input_tokens": 8, + "output_tokens": 1, "service_tier": "standard" } }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "type": "assistant", - "uuid": "4847dde4-a017-4065-8de3-9f6656352a66" + "uuid": "15aa21b8-cbf2-4fa4-bd93-a5ec31626375" } }, { @@ -753,7 +634,8 @@ "method": "tools/call", "params": { "_meta": { - "claudecode/toolUseId": "toolu_01HkzvMnStwuQLnuhAefWfpa" + "claudecode/toolUseId": "toolu_01PfSFqVXeF82k7N9oKrY7Ni", + "progressToken": 3 }, "arguments": { "a": 105, @@ -766,7 +648,7 @@ "server_name": "calculator", "subtype": "mcp_message" }, - "request_id": "8085b52c-99d9-4e78-9900-e07f0f8c46f6", + "request_id": "c261ffdf-d271-4522-ab21-b17c4db5c27d", "type": "control_request" } }, @@ -776,7 +658,7 @@ "kind": "json", "value": { "response": { - "request_id": "8085b52c-99d9-4e78-9900-e07f0f8c46f6", + "request_id": "c261ffdf-d271-4522-ab21-b17c4db5c27d", "response": { "mcp_response": { "id": 3, @@ -809,14 +691,14 @@ "type": "text" } ], - "tool_use_id": "toolu_01HkzvMnStwuQLnuhAefWfpa", + "tool_use_id": "toolu_01PfSFqVXeF82k7N9oKrY7Ni", "type": "tool_result" } ], "role": "user" }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "tool_use_result": [ { "text": "The result of subtract(105, 5) is 100", @@ -824,7 +706,44 @@ } ], "type": "user", - "uuid": "bca777bc-06a9-4957-83ca-2b11700ab5fb" + "uuid": "c091d10a-3e93-4251-97fb-3ff4b0436391" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "Perfect! So:\n- 15 \u00d7 7 = 105\n- 105 - 5 = 100\n\nThe final answer is 100.", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_0113WA3B1LfYfcAwGDYzsQK8", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 155 + }, + "cache_creation_input_tokens": 155, + "cache_read_input_tokens": 18283, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 1, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", + "type": "assistant", + "uuid": "e1e7bf78-2285-4bb0-845e-b902e6965db0" } }, { @@ -833,12 +752,12 @@ "message": { "content": [ { - "text": "**The answer is 100.**\n\nHere's the breakdown:\n- 15 \u00d7 7 = 105\n- 105 - 5 = 100", + "text": "The answer is **100**.\n\nHere's the breakdown:\n- 15 \u00d7 7 = 105\n- 105 \u2212 5 = **100**", "type": "text" } ], "context_management": null, - "id": "msg_018YckGW4fMsPGHS288TRaPf", + "id": "msg_0113WA3B1LfYfcAwGDYzsQK8", "model": "claude-haiku-4-5-20251001", "role": "assistant", "stop_reason": null, @@ -847,64 +766,70 @@ "usage": { "cache_creation": { "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 127 + "ephemeral_5m_input_tokens": 155 }, - "cache_creation_input_tokens": 127, - "cache_read_input_tokens": 14156, + "cache_creation_input_tokens": 155, + "cache_read_input_tokens": 18283, "inference_geo": "not_available", - "input_tokens": 6, + "input_tokens": 8, "output_tokens": 1, "service_tier": "standard" } }, "parent_tool_use_id": null, - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", "type": "assistant", - "uuid": "1d9e74aa-ace1-4a18-a9e2-1e64bb2ee6dc" + "uuid": "487dce71-f0fa-40c9-878a-84b146d11ffe" } }, { "op": "read", "payload": { - "duration_api_ms": 8137, - "duration_ms": 5529, + "duration_api_ms": 4772, + "duration_ms": 4944, + "fast_mode_state": "off", "is_error": false, "modelUsage": { "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 14283, - "cacheReadInputTokens": 28171, + "cacheCreationInputTokens": 4715, + "cacheReadInputTokens": 50040, "contextWindow": 200000, - "costUSD": 0.02362985, - "inputTokens": 979, - "outputTokens": 396, + "costUSD": 0.013053750000000001, + "inputTokens": 26, + "maxOutputTokens": 32000, + "outputTokens": 426, "webSearchRequests": 0 } }, "num_turns": 3, "permission_denials": [], - "result": "**The answer is 100.**\n\nHere's the breakdown:\n- 15 \u00d7 7 = 105\n- 105 - 5 = 100", - "session_id": "7388c564-487e-451f-865d-757bf0122afe", + "result": "The answer is **100**.\n\nHere's the breakdown:\n- 15 \u00d7 7 = 105\n- 105 \u2212 5 = **100**", + "session_id": "aedda120-23b6-46ba-8377-2bff1cd7410b", + "stop_reason": "end_turn", "subtype": "success", - "total_cost_usd": 0.02362985, + "total_cost_usd": 0.013053750000000001, "type": "result", "usage": { "cache_creation": { "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 14283 + "ephemeral_5m_input_tokens": 4715 }, - "cache_creation_input_tokens": 14283, - "cache_read_input_tokens": 28171, - "input_tokens": 15, - "output_tokens": 261, + "cache_creation_input_tokens": 4715, + "cache_read_input_tokens": 50040, + "inference_geo": "", + "input_tokens": 26, + "iterations": [], + "output_tokens": 426, "server_tool_use": { "web_fetch_requests": 0, "web_search_requests": 0 }, - "service_tier": "standard" + "service_tier": "standard", + "speed": "standard" }, - "uuid": "552114b7-46af-4323-b4e3-07c8d71eaa38" + "uuid": "3ed72c05-f236-44cc-bcb7-dea85e825f7b" } } ], - "sdk_version": "0.1.10" + "sdk_version": "0.1.48" } diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_48.json deleted file mode 100644 index 77d7f360..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_calculator_with_multiple_operations__sdk_0_1_48.json +++ /dev/null @@ -1,1006 +0,0 @@ -{ - "cassette_name": "test_calculator_with_multiple_operations", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_48c9b054", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "id": 0, - "jsonrpc": "2.0", - "method": "initialize", - "params": { - "capabilities": {}, - "clientInfo": { - "name": "claude-code", - "version": "2.1.71" - }, - "protocolVersion": "2025-11-25" - } - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "5f7e9e7e-4a24-41b7-8a68-63a1bfbbd56b", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "5f7e9e7e-4a24-41b7-8a68-63a1bfbbd56b", - "response": { - "mcp_response": { - "id": 0, - "jsonrpc": "2.0", - "result": { - "capabilities": { - "tools": {} - }, - "protocolVersion": "2024-11-05", - "serverInfo": { - "name": "calculator", - "version": "1.0.0" - } - } - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_48c9b054", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "agents": [ - { - "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", - "name": "general-purpose" - }, - { - "description": "Use this agent to configure the user's Claude Code status line setting.", - "model": "sonnet", - "name": "statusline-setup" - }, - { - "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", - "model": "haiku", - "name": "Explore" - }, - { - "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", - "name": "Plan" - } - ], - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", - "name": "keybindings-help" - }, - { - "argumentHint": "[issue description]", - "description": "Enable debug logging for this session and help diagnose issues (bundled)", - "name": "debug" - }, - { - "argumentHint": "", - "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", - "name": "simplify" - }, - { - "argumentHint": "", - "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", - "name": "batch" - }, - { - "argumentHint": "[interval] ", - "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", - "name": "loop" - }, - { - "argumentHint": "", - "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", - "name": "claude-api" - }, - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Show current context usage", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Dump the JS heap to ~/Desktop", - "name": "heapdump" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - }, - { - "argumentHint": "", - "description": "Generate a report analyzing your Claude Code sessions", - "name": "insights" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "default" - }, - { - "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", - "displayName": "Sonnet (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "sonnet[1m]" - }, - { - "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus" - }, - { - "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", - "displayName": "Opus (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus[1m]" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "claude-haiku-4-5-20251001", - "displayName": "Haiku 4.5", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default", - "pid": 99859 - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "What is 15 multiplied by 7? Then subtract 5 from the result.", - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "jsonrpc": "2.0", - "method": "notifications/initialized" - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "2f3de201-ca0a-4364-9045-e4b758b11e8b", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "2f3de201-ca0a-4364-9045-e4b758b11e8b", - "response": { - "mcp_response": { - "jsonrpc": "2.0", - "result": {} - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "id": 0, - "jsonrpc": "2.0", - "method": "initialize", - "params": { - "capabilities": {}, - "clientInfo": { - "name": "claude-code", - "version": "2.1.71" - }, - "protocolVersion": "2025-11-25" - } - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "b29e4751-1bd0-43ec-a1ef-0247078a6666", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "b29e4751-1bd0-43ec-a1ef-0247078a6666", - "response": { - "mcp_response": { - "id": 0, - "jsonrpc": "2.0", - "result": { - "capabilities": { - "tools": {} - }, - "protocolVersion": "2024-11-05", - "serverInfo": { - "name": "calculator", - "version": "1.0.0" - } - } - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "id": 1, - "jsonrpc": "2.0", - "method": "tools/list" - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "eac5a619-8a6a-419c-8709-384f4c82efe9", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "eac5a619-8a6a-419c-8709-384f4c82efe9", - "response": { - "mcp_response": { - "id": 1, - "jsonrpc": "2.0", - "result": { - "tools": [ - { - "description": "Performs basic arithmetic operations", - "inputSchema": { - "properties": { - "a": { - "description": "First number", - "type": "number" - }, - "b": { - "description": "Second number", - "type": "number" - }, - "operation": { - "description": "The arithmetic operation to perform", - "enum": [ - "add", - "subtract", - "multiply", - "divide" - ], - "type": "string" - } - }, - "required": [ - "operation", - "a", - "b" - ], - "type": "object" - }, - "name": "calculator" - } - ] - } - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "jsonrpc": "2.0", - "method": "notifications/initialized" - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "f2642205-be77-47a9-9d38-c4724c49f4f4", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "f2642205-be77-47a9-9d38-c4724c49f4f4", - "response": { - "mcp_response": { - "jsonrpc": "2.0", - "result": {} - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.1.71", - "cwd": "", - "fast_mode_state": "off", - "mcp_servers": [ - { - "name": "calculator", - "status": "connected" - } - ], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "skills": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api" - ], - "slash_commands": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api", - "compact", - "context", - "cost", - "heapdump", - "init", - "pr-comments", - "release-notes", - "review", - "security-review", - "insights" - ], - "subtype": "init", - "tools": [ - "Task", - "TaskOutput", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "TaskStop", - "AskUserQuestion", - "Skill", - "EnterPlanMode", - "EnterWorktree", - "CronCreate", - "CronDelete", - "CronList", - "ToolSearch", - "mcp__calculator__calculator" - ], - "type": "system", - "uuid": "27638b62-28a0-4931-aa94-9d2952895a86" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "The user is asking me to:\n1. Calculate 15 multiplied by 7\n2. Then subtract 5 from that result\n\nI can use the calculator tool to do this. Let me first multiply 15 by 7, then subtract 5 from the result.\n\nFirst calculation: 15 * 7 = 105\nThen: 105 - 5 = 100\n\nI should make these calls. I can do the first calculation, then use the result to do the second calculation. Since the second calculation depends on the first, I need to do them sequentially.\n\nLet me start with the multiplication:", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_016hxa8wxeBgtn8cqvHSnFXW", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 0 - }, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 18034, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 7, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "type": "assistant", - "uuid": "1b85fe5f-4760-4d00-ac69-64cf8ee4ceea" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "caller": { - "type": "direct" - }, - "id": "toolu_01AQ383TKiAF2ekgb444vpZC", - "input": { - "a": 15, - "b": 7, - "operation": "multiply" - }, - "name": "mcp__calculator__calculator", - "type": "tool_use" - } - ], - "context_management": null, - "id": "msg_016hxa8wxeBgtn8cqvHSnFXW", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 0 - }, - "cache_creation_input_tokens": 0, - "cache_read_input_tokens": 18034, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 7, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "type": "assistant", - "uuid": "910539d3-b9d3-499f-99c7-8befc4886ef9" - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "id": 2, - "jsonrpc": "2.0", - "method": "tools/call", - "params": { - "_meta": { - "claudecode/toolUseId": "toolu_01AQ383TKiAF2ekgb444vpZC", - "progressToken": 2 - }, - "arguments": { - "a": 15, - "b": 7, - "operation": "multiply" - }, - "name": "calculator" - } - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "1b19d53c-0e0f-48b7-ac54-f6cc85a8436e", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "1b19d53c-0e0f-48b7-ac54-f6cc85a8436e", - "response": { - "mcp_response": { - "id": 2, - "jsonrpc": "2.0", - "result": { - "content": [ - { - "text": "The result of multiply(15, 7) is 105", - "type": "text" - } - ] - } - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "content": [ - { - "text": "The result of multiply(15, 7) is 105", - "type": "text" - } - ], - "tool_use_id": "toolu_01AQ383TKiAF2ekgb444vpZC", - "type": "tool_result" - } - ], - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "tool_use_result": [ - { - "text": "The result of multiply(15, 7) is 105", - "type": "text" - } - ], - "type": "user", - "uuid": "e0c17e5a-8cdf-487f-aac2-7fe27fe2ffc1" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "Great! 15 \u00d7 7 = 105. Now I need to subtract 5 from this result.", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_01KoQgg7NpZwQPHrh7Q3Hb1a", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 266 - }, - "cache_creation_input_tokens": 266, - "cache_read_input_tokens": 18034, - "inference_geo": "not_available", - "input_tokens": 8, - "output_tokens": 1, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "type": "assistant", - "uuid": "4d2700c4-ecad-4160-b121-6daff58f2632" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "caller": { - "type": "direct" - }, - "id": "toolu_01KnPS5RFM71zxSp4THdpyMk", - "input": { - "a": 105, - "b": 5, - "operation": "subtract" - }, - "name": "mcp__calculator__calculator", - "type": "tool_use" - } - ], - "context_management": null, - "id": "msg_01KoQgg7NpZwQPHrh7Q3Hb1a", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 266 - }, - "cache_creation_input_tokens": 266, - "cache_read_input_tokens": 18034, - "inference_geo": "not_available", - "input_tokens": 8, - "output_tokens": 1, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "type": "assistant", - "uuid": "7eefb65f-c5ac-4c5d-a7af-032512a0f763" - } - }, - { - "op": "read", - "payload": { - "request": { - "message": { - "id": 3, - "jsonrpc": "2.0", - "method": "tools/call", - "params": { - "_meta": { - "claudecode/toolUseId": "toolu_01KnPS5RFM71zxSp4THdpyMk", - "progressToken": 3 - }, - "arguments": { - "a": 105, - "b": 5, - "operation": "subtract" - }, - "name": "calculator" - } - }, - "server_name": "calculator", - "subtype": "mcp_message" - }, - "request_id": "fca4ca14-b214-4b37-a9b0-0b87d5c3c062", - "type": "control_request" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "response": { - "request_id": "fca4ca14-b214-4b37-a9b0-0b87d5c3c062", - "response": { - "mcp_response": { - "id": 3, - "jsonrpc": "2.0", - "result": { - "content": [ - { - "text": "The result of subtract(105, 5) is 100", - "type": "text" - } - ] - } - } - }, - "subtype": "success" - }, - "type": "control_response" - } - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "content": [ - { - "text": "The result of subtract(105, 5) is 100", - "type": "text" - } - ], - "tool_use_id": "toolu_01KnPS5RFM71zxSp4THdpyMk", - "type": "tool_result" - } - ], - "role": "user" - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "tool_use_result": [ - { - "text": "The result of subtract(105, 5) is 100", - "type": "text" - } - ], - "type": "user", - "uuid": "e2fdd3b7-c529-4f6a-a95a-922ad2e37ac9" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "Perfect! I have the answer:\n- 15 \u00d7 7 = 105\n- 105 - 5 = 100", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_016CfLs6TyhvantAcTiLQ2L9", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 155 - }, - "cache_creation_input_tokens": 155, - "cache_read_input_tokens": 18300, - "inference_geo": "not_available", - "input_tokens": 8, - "output_tokens": 3, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "type": "assistant", - "uuid": "81e201f1-c54d-43c3-9c43-7d28cddab1c5" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "The answer is **100**.\n\nHere's the breakdown:\n1. 15 \u00d7 7 = 105\n2. 105 - 5 = 100", - "type": "text" - } - ], - "context_management": null, - "id": "msg_016CfLs6TyhvantAcTiLQ2L9", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 155 - }, - "cache_creation_input_tokens": 155, - "cache_read_input_tokens": 18300, - "inference_geo": "not_available", - "input_tokens": 8, - "output_tokens": 3, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "type": "assistant", - "uuid": "153ec255-c5c0-42d5-b374-44a666dfffde" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 5537, - "duration_ms": 5664, - "fast_mode_state": "off", - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 421, - "cacheReadInputTokens": 54368, - "contextWindow": 200000, - "costUSD": 0.00817905, - "inputTokens": 26, - "maxOutputTokens": 32000, - "outputTokens": 438, - "webSearchRequests": 0 - } - }, - "num_turns": 3, - "permission_denials": [], - "result": "The answer is **100**.\n\nHere's the breakdown:\n1. 15 \u00d7 7 = 105\n2. 105 - 5 = 100", - "session_id": "31fc0827-d751-45a8-9cf0-607d37bf3c52", - "stop_reason": "end_turn", - "subtype": "success", - "total_cost_usd": 0.00817905, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 421 - }, - "cache_creation_input_tokens": 421, - "cache_read_input_tokens": 54368, - "inference_geo": "", - "input_tokens": 26, - "iterations": [], - "output_tokens": 438, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - }, - "uuid": "b60de4b9-ad9c-4cf9-b2eb-093b88d6b1be" - } - } - ], - "sdk_version": "0.1.48" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json new file mode 100644 index 00000000..26feebfd --- /dev/null +++ b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_multiple_bundled_subagents_keep_outer_orchestration_separate.json @@ -0,0 +1,793 @@ +{ + "cassette_name": "test_multiple_bundled_subagents_keep_outer_orchestration_separate", + "events": [ + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "request": { + "hooks": null, + "subtype": "initialize" + }, + "request_id": "req_1_28715f6e", + "type": "control_request" + } + } + }, + { + "op": "read", + "payload": { + "response": { + "request_id": "req_1_28715f6e", + "response": { + "account": { + "apiKeySource": "ANTHROPIC_API_KEY", + "tokenSource": "none" + }, + "agents": [], + "available_output_styles": [], + "commands": [], + "models": [] + }, + "subtype": "success" + }, + "type": "control_response" + } + }, + { + "op": "write", + "payload": { + "kind": "json", + "value": { + "message": { + "content": "Launch two bundled general-purpose subagents for two independent tasks. Start both Agent tool calls before waiting on either result if the tool API allows it. The first delegated subagent must use Bash and Read on release_notes_alpha.md and return only 'alpha: | '. The second delegated subagent must use Bash and Read on release_notes_beta.md and return only 'beta: | '. After both delegated agents finish, reply with exactly two lines in that same order. Do not answer directly without using both subagents.", + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "default", + "type": "user" + } + } + }, + { + "op": "read", + "payload": { + "agents": [ + "general-purpose", + "statusline-setup", + "Explore", + "Plan" + ], + "apiKeySource": "ANTHROPIC_API_KEY", + "claude_code_version": "2.1.71", + "cwd": "", + "fast_mode_state": "off", + "mcp_servers": [], + "model": "claude-haiku-4-5-20251001", + "output_style": "default", + "permissionMode": "bypassPermissions", + "plugins": [], + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "skills": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api" + ], + "slash_commands": [ + "keybindings-help", + "debug", + "simplify", + "batch", + "loop", + "claude-api", + "compact", + "context", + "cost", + "heapdump", + "init", + "pr-comments", + "release-notes", + "review", + "security-review", + "insights" + ], + "subtype": "init", + "tools": [ + "Task", + "TaskOutput", + "Bash", + "Glob", + "Grep", + "ExitPlanMode", + "Read", + "Edit", + "Write", + "NotebookEdit", + "WebFetch", + "TodoWrite", + "WebSearch", + "TaskStop", + "AskUserQuestion", + "Skill", + "EnterPlanMode", + "EnterWorktree", + "CronCreate", + "CronDelete", + "CronList", + "ToolSearch" + ], + "type": "system", + "uuid": "a1e1a50b-4ffa-4058-898c-e6da19910b8d" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "The user wants me to:\n1. Launch two independent general-purpose subagents\n2. First agent: read release_notes_alpha.md and return only 'alpha: | '\n3. Second agent: read release_notes_beta.md and return only 'beta: | '\n4. Wait for both to finish\n5. Reply with exactly two lines in that same order\n6. Use both subagents, not answer directly\n\nI need to use the Agent tool twice. Since these are independent tasks, I can make both calls in the same function_calls block.\n\nLet me structure clear prompts for each agent:\n- Agent 1: Read release_notes_alpha.md and extract version and owner, return ONLY 'alpha: | '\n- Agent 2: Read release_notes_beta.md and extract version and owner, return ONLY 'beta: | '", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_017YnmKGQDsAsf4c62gh4gEE", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1001 + }, + "cache_creation_input_tokens": 1001, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "af2150ed-8cbc-4bf2-8332-95c3d142dae3" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "I'll launch both subagents now to extract the version and owner information from each file.", + "type": "text" + } + ], + "context_management": null, + "id": "msg_017YnmKGQDsAsf4c62gh4gEE", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1001 + }, + "cache_creation_input_tokens": 1001, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "5e988405-5e20-4596-9ee4-bc1bcfddea4d" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "input": { + "description": "Extract alpha version and owner", + "prompt": "Read the file release_notes_alpha.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: alpha: | \n\nDo not include any other text, explanation, or formatting. Return only that single line." + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_017YnmKGQDsAsf4c62gh4gEE", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1001 + }, + "cache_creation_input_tokens": 1001, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "39bdff8c-acaa-4029-9048-7bfc67728150" + } + }, + { + "op": "read", + "payload": { + "description": "Extract alpha version and owner", + "prompt": "Read the file release_notes_alpha.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: alpha: | \n\nDo not include any other text, explanation, or formatting. Return only that single line.", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "subtype": "task_started", + "task_id": "abf71aaad61af6d10", + "task_type": "local_agent", + "tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "type": "system", + "uuid": "60b1580d-9f8f-4b21-b86c-27d44e65d472" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file release_notes_alpha.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: alpha: | \n\nDo not include any other text, explanation, or formatting. Return only that single line.", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "user", + "uuid": "c9c334a2-4b60-431b-9f22-33499c811ec0" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "input": { + "description": "Extract beta version and owner", + "prompt": "Read the file release_notes_beta.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: beta: | \n\nDo not include any other text, explanation, or formatting. Return only that single line." + }, + "name": "Agent", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_017YnmKGQDsAsf4c62gh4gEE", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1001 + }, + "cache_creation_input_tokens": 1001, + "cache_read_input_tokens": 17022, + "inference_geo": "not_available", + "input_tokens": 10, + "output_tokens": 3, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "c6e6a79e-6f62-40d0-906b-c854ffc5a830" + } + }, + { + "op": "read", + "payload": { + "description": "Extract beta version and owner", + "prompt": "Read the file release_notes_beta.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: beta: | \n\nDo not include any other text, explanation, or formatting. Return only that single line.", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "subtype": "task_started", + "task_id": "a36319d06125e8338", + "task_type": "local_agent", + "tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "type": "system", + "uuid": "cb168dea-4cf5-46d4-8f69-ae230f701b08" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "Read the file release_notes_beta.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: beta: | \n\nDo not include any other text, explanation, or formatting. Return only that single line.", + "type": "text" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "user", + "uuid": "53ef63d9-fb67-4a6a-8dcb-dd1993633dee" + } + }, + { + "op": "read", + "payload": { + "description": "Reading release_notes_alpha.md", + "last_tool_name": "Read", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "subtype": "task_progress", + "task_id": "abf71aaad61af6d10", + "tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "type": "system", + "usage": { + "duration_ms": 1299, + "tool_uses": 1, + "total_tokens": 13176 + }, + "uuid": "23c53684-5efa-4d85-91ee-17bc030dd5db" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01BJFR9pzk6S2aSnKvxUVpvp", + "input": { + "file_path": "/private/var/folders/1r/0r49yqc973s87vl4n1x_3j1m0000gn/T/pytest-of-abhijeetprasad/pytest-157/test_multiple_bundled_subagent0/subagent_multi_workspace/release_notes_alpha.md" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_018CGmjbzUVDfRHFRMRut7ko", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1096 + }, + "cache_creation_input_tokens": 1096, + "cache_read_input_tokens": 12075, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 1, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "cd3aa5e2-1952-458b-887b-3eeacf9072b2" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192# Alpha Release Notes\n 2\u2192\n 3\u2192version = 2026.03.11-alpha\n 4\u2192owner = sdk-platform-alpha\n 5\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_01BJFR9pzk6S2aSnKvxUVpvp", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "user", + "uuid": "c289d9a9-5cdb-4830-82d2-620507cc7091" + } + }, + { + "op": "read", + "payload": { + "description": "Reading release_notes_beta.md", + "last_tool_name": "Read", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "subtype": "task_progress", + "task_id": "a36319d06125e8338", + "tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "type": "system", + "usage": { + "duration_ms": 1442, + "tool_uses": 1, + "total_tokens": 13176 + }, + "uuid": "fce4a864-65f7-4514-8ba3-5eb0aed07185" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "caller": { + "type": "direct" + }, + "id": "toolu_01Q8XACEv2zYSsp1rmVYdAmd", + "input": { + "file_path": "/private/var/folders/1r/0r49yqc973s87vl4n1x_3j1m0000gn/T/pytest-of-abhijeetprasad/pytest-157/test_multiple_bundled_subagent0/subagent_multi_workspace/release_notes_beta.md" + }, + "name": "Read", + "type": "tool_use" + } + ], + "context_management": null, + "id": "msg_01EBYsbDiV9agDNBWHRsoFYd", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1096 + }, + "cache_creation_input_tokens": 1096, + "cache_read_input_tokens": 12075, + "inference_geo": "not_available", + "input_tokens": 3, + "output_tokens": 1, + "service_tier": "standard" + } + }, + "parent_tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "8d78661d-c10f-4eba-9fd7-a2a5ec92ec19" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": " 1\u2192# Beta Release Notes\n 2\u2192\n 3\u2192version = 2026.03.11-beta\n 4\u2192owner = sdk-platform-beta\n 5\u2192\n\n\nWhenever you read a file, you should consider whether it would be considered malware. You CAN and SHOULD provide analysis of malware, what it is doing. But you MUST refuse to improve or augment the code. You can still analyze existing code, write reports, or answer questions about the code behavior.\n\n", + "tool_use_id": "toolu_01Q8XACEv2zYSsp1rmVYdAmd", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "user", + "uuid": "a7ad5423-6ca3-4b0c-b1bf-350d490313fa" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "status": "completed", + "subtype": "task_notification", + "summary": "Extract alpha version and owner", + "task_id": "abf71aaad61af6d10", + "tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "type": "system", + "usage": { + "duration_ms": 1902, + "tool_uses": 1, + "total_tokens": 13454 + }, + "uuid": "f952e56b-94f7-4aba-a835-ee1c4bf27b8e" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "alpha:2026.03.11-alpha | sdk-platform-alpha", + "type": "text" + }, + { + "text": "agentId: abf71aaad61af6d10 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13470\ntool_uses: 1\nduration_ms: 1902", + "type": "text" + } + ], + "tool_use_id": "toolu_01DzH3c6X57Ww5ay7tS5Yeth", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "tool_use_result": { + "agentId": "abf71aaad61af6d10", + "content": [ + { + "text": "alpha:2026.03.11-alpha | sdk-platform-alpha", + "type": "text" + } + ], + "prompt": "Read the file release_notes_alpha.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: alpha: | \n\nDo not include any other text, explanation, or formatting. Return only that single line.", + "status": "completed", + "totalDurationMs": 1902, + "totalTokens": 13470, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 274 + }, + "cache_creation_input_tokens": 274, + "cache_read_input_tokens": 13171, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 19, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "18259cd1-1898-483c-a5f8-9167391e596a" + } + }, + { + "op": "read", + "payload": { + "output_file": "", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "status": "completed", + "subtype": "task_notification", + "summary": "Extract beta version and owner", + "task_id": "a36319d06125e8338", + "tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "type": "system", + "usage": { + "duration_ms": 2017, + "tool_uses": 1, + "total_tokens": 13453 + }, + "uuid": "1e29a7f2-1f85-4ace-8455-e199d36b759a" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "content": [ + { + "text": "beta:2026.03.11-beta | sdk-platform-beta", + "type": "text" + }, + { + "text": "agentId: a36319d06125e8338 (for resuming to continue this agent's work if needed)\ntotal_tokens: 13469\ntool_uses: 1\nduration_ms: 2017", + "type": "text" + } + ], + "tool_use_id": "toolu_0131Zr1aw2d3k71vc2vqohcZ", + "type": "tool_result" + } + ], + "role": "user" + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "tool_use_result": { + "agentId": "a36319d06125e8338", + "content": [ + { + "text": "beta:2026.03.11-beta | sdk-platform-beta", + "type": "text" + } + ], + "prompt": "Read the file release_notes_beta.md using the Read tool. Extract the version number and owner name. Return ONLY the string in format: beta: | \n\nDo not include any other text, explanation, or formatting. Return only that single line.", + "status": "completed", + "totalDurationMs": 2017, + "totalTokens": 13469, + "totalToolUseCount": 1, + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 273 + }, + "cache_creation_input_tokens": 273, + "cache_read_input_tokens": 13171, + "inference_geo": "", + "input_tokens": 6, + "iterations": [], + "output_tokens": 19, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + } + }, + "type": "user", + "uuid": "128afe1e-cfa4-4071-86ab-31554c88c9c4" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "signature": "", + "thinking": "Perfect! Both agents have completed their tasks successfully. Now I need to reply with exactly two lines in the same order:\n1. alpha:2026.03.11-alpha | sdk-platform-alpha\n2. beta:2026.03.11-beta | sdk-platform-beta", + "type": "thinking" + } + ], + "context_management": null, + "id": "msg_01G3azkn2ohs2MmxKN8qDgZ7", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 701 + }, + "cache_creation_input_tokens": 701, + "cache_read_input_tokens": 18023, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 6, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "6b49f467-7ba6-41f7-91d2-a8be6afdac79" + } + }, + { + "op": "read", + "payload": { + "message": { + "content": [ + { + "text": "alpha:2026.03.11-alpha | sdk-platform-alpha\nbeta:2026.03.11-beta | sdk-platform-beta", + "type": "text" + } + ], + "context_management": null, + "id": "msg_01G3azkn2ohs2MmxKN8qDgZ7", + "model": "claude-haiku-4-5-20251001", + "role": "assistant", + "stop_reason": null, + "stop_sequence": null, + "type": "message", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 701 + }, + "cache_creation_input_tokens": 701, + "cache_read_input_tokens": 18023, + "inference_geo": "not_available", + "input_tokens": 8, + "output_tokens": 6, + "service_tier": "standard" + } + }, + "parent_tool_use_id": null, + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "type": "assistant", + "uuid": "e3a08731-4a0d-463d-a05e-5b2d683cd5e2" + } + }, + { + "op": "read", + "payload": { + "duration_api_ms": 8463, + "duration_ms": 6750, + "fast_mode_state": "off", + "is_error": false, + "modelUsage": { + "claude-haiku-4-5-20251001": { + "cacheCreationInputTokens": 4441, + "cacheReadInputTokens": 85537, + "contextWindow": 200000, + "costUSD": 0.01868095, + "inputTokens": 36, + "maxOutputTokens": 32000, + "outputTokens": 908, + "webSearchRequests": 0 + } + }, + "num_turns": 3, + "permission_denials": [], + "result": "alpha:2026.03.11-alpha | sdk-platform-alpha\nbeta:2026.03.11-beta | sdk-platform-beta", + "session_id": "f2922e90-d8d5-4044-8781-a4597e79705b", + "stop_reason": "end_turn", + "subtype": "success", + "total_cost_usd": 0.01868095, + "type": "result", + "usage": { + "cache_creation": { + "ephemeral_1h_input_tokens": 0, + "ephemeral_5m_input_tokens": 1702 + }, + "cache_creation_input_tokens": 1702, + "cache_read_input_tokens": 35045, + "inference_geo": "", + "input_tokens": 18, + "iterations": [], + "output_tokens": 583, + "server_tool_use": { + "web_fetch_requests": 0, + "web_search_requests": 0 + }, + "service_tier": "standard", + "speed": "standard" + }, + "uuid": "6d988e52-9288-427d-a7a6-b27200956695" + } + } + ], + "sdk_version": "0.1.48" +} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_10.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_10.json deleted file mode 100644 index 0d133ee5..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_10.json +++ /dev/null @@ -1,271 +0,0 @@ -{ - "cassette_name": "test_query_async_iterable_asyncgen_multi", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_d0759297", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_d0759297", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Visualize current context usage as a colored grid", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "List current todo items", - "name": "todos" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.5) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "value": "default" - }, - { - "description": "Opus 4.5 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "value": "opus" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "Custom model", - "displayName": "claude-haiku-4-5-20251001", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default" - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Part 1", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Part 2", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.0.53", - "cwd": "", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "15d3a62c-617c-490f-bb06-8800b6ff455f", - "skills": [], - "slash_commands": [ - "compact", - "context", - "cost", - "init", - "pr-comments", - "release-notes", - "todos", - "review", - "security-review" - ], - "subtype": "init", - "tools": [ - "Task", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "BashOutput", - "KillShell", - "Skill", - "SlashCommand", - "EnterPlanMode" - ], - "type": "system", - "uuid": "a1f98ad4-0f89-4b5f-bb8a-0e7e6af87e58" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "I'm ready to help! However, I notice you've just written \"Part 1\" without specifying what you'd like me to do.\n\nCould you please provide more details about what you need help with? For example:\n\n- Do you want me to work on a specific coding task?\n- Are you looking to explore or modify a codebase?\n- Do you need help with debugging, refactoring, or implementing a feature?\n- Is there a file you'd like me to read or analyze?\n\nOnce you provide more context, I'll be able to assist you effectively!", - "type": "text" - } - ], - "context_management": null, - "id": "msg_018cxp1mXFHFEWwj5rCwQ7Bp", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 323 - }, - "cache_creation_input_tokens": 323, - "cache_read_input_tokens": 13554, - "inference_geo": "not_available", - "input_tokens": 3, - "output_tokens": 2, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "15d3a62c-617c-490f-bb06-8800b6ff455f", - "type": "assistant", - "uuid": "fe2633eb-32d5-4e23-a007-d583c38fe2a7" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 4424, - "duration_ms": 2545, - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 323, - "cacheReadInputTokens": 13554, - "contextWindow": 200000, - "costUSD": 0.00409115, - "inputTokens": 967, - "outputTokens": 273, - "webSearchRequests": 0 - } - }, - "num_turns": 1, - "permission_denials": [], - "result": "I'm ready to help! However, I notice you've just written \"Part 1\" without specifying what you'd like me to do.\n\nCould you please provide more details about what you need help with? For example:\n\n- Do you want me to work on a specific coding task?\n- Are you looking to explore or modify a codebase?\n- Do you need help with debugging, refactoring, or implementing a feature?\n- Is there a file you'd like me to read or analyze?\n\nOnce you provide more context, I'll be able to assist you effectively!", - "session_id": "15d3a62c-617c-490f-bb06-8800b6ff455f", - "subtype": "success", - "total_cost_usd": 0.00409115, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 323 - }, - "cache_creation_input_tokens": 323, - "cache_read_input_tokens": 13554, - "input_tokens": 3, - "output_tokens": 127, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard" - }, - "uuid": "dc00ce8e-ea1a-4d9b-bfeb-b0066666ef2a" - } - } - ], - "sdk_version": "0.1.10" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_48.json deleted file mode 100644 index a961e090..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_multi__sdk_0_1_48.json +++ /dev/null @@ -1,434 +0,0 @@ -{ - "cassette_name": "test_query_async_iterable_asyncgen_multi", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_f2ff09b8", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_f2ff09b8", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "agents": [ - { - "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", - "name": "general-purpose" - }, - { - "description": "Use this agent to configure the user's Claude Code status line setting.", - "model": "sonnet", - "name": "statusline-setup" - }, - { - "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", - "model": "haiku", - "name": "Explore" - }, - { - "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", - "name": "Plan" - } - ], - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", - "name": "keybindings-help" - }, - { - "argumentHint": "[issue description]", - "description": "Enable debug logging for this session and help diagnose issues (bundled)", - "name": "debug" - }, - { - "argumentHint": "", - "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", - "name": "simplify" - }, - { - "argumentHint": "", - "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", - "name": "batch" - }, - { - "argumentHint": "[interval] ", - "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", - "name": "loop" - }, - { - "argumentHint": "", - "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", - "name": "claude-api" - }, - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Show current context usage", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Dump the JS heap to ~/Desktop", - "name": "heapdump" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - }, - { - "argumentHint": "", - "description": "Generate a report analyzing your Claude Code sessions", - "name": "insights" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "default" - }, - { - "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", - "displayName": "Sonnet (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "sonnet[1m]" - }, - { - "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus" - }, - { - "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", - "displayName": "Opus (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus[1m]" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "claude-haiku-4-5-20251001", - "displayName": "Haiku 4.5", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default", - "pid": 171 - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Part 1", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Part 2", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.1.71", - "cwd": "", - "fast_mode_state": "off", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "982f3129-7544-418e-94cc-021a5c532975", - "skills": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api" - ], - "slash_commands": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api", - "compact", - "context", - "cost", - "heapdump", - "init", - "pr-comments", - "release-notes", - "review", - "security-review", - "insights" - ], - "subtype": "init", - "tools": [ - "Task", - "TaskOutput", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "TaskStop", - "AskUserQuestion", - "Skill", - "EnterPlanMode", - "EnterWorktree", - "CronCreate", - "CronDelete", - "CronList", - "ToolSearch" - ], - "type": "system", - "uuid": "e414daeb-6b16-4ac6-91f6-b5be6b18765a" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "The user has written \"Part 1\" which is quite vague. They haven't actually asked me to do anything specific. I should ask them what they'd like me to help with, as there's no clear task or request here.\n\nLet me wait for clarification on what \"Part 1\" refers to - it could be referring to:\n- A multi-part task they want to break down\n- A section of code or documentation\n- Something else entirely\n\nI should ask them to clarify what they need help with.", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_01VvB5o1ThNmrvZt4Uhzjvt2", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 874 - }, - "cache_creation_input_tokens": 874, - "cache_read_input_tokens": 17022, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 3, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "982f3129-7544-418e-94cc-021a5c532975", - "type": "assistant", - "uuid": "b45c44d7-0192-48cf-89c0-9512a6e0bfda" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "I see you've written \"Part 1\" - I'm ready to help! However, I need more information about what you'd like me to do. \n\nCould you please clarify:\n- What project or task are you working on?\n- What would you like me to help with for Part 1?\n- Do you have code you'd like me to review, create, debug, or modify?\n- Are there specific requirements or goals you're aiming for?\n\nOnce you provide more details, I'll be able to assist you effectively!", - "type": "text" - } - ], - "context_management": null, - "id": "msg_01VvB5o1ThNmrvZt4Uhzjvt2", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 874 - }, - "cache_creation_input_tokens": 874, - "cache_read_input_tokens": 17022, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 3, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "982f3129-7544-418e-94cc-021a5c532975", - "type": "assistant", - "uuid": "49adc10c-3e47-4c4e-8e91-cf6521f9f8eb" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 2798, - "duration_ms": 2981, - "fast_mode_state": "off", - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 874, - "cacheReadInputTokens": 17022, - "contextWindow": 200000, - "costUSD": 0.0039947, - "inputTokens": 10, - "maxOutputTokens": 32000, - "outputTokens": 238, - "webSearchRequests": 0 - } - }, - "num_turns": 1, - "permission_denials": [], - "result": "I see you've written \"Part 1\" - I'm ready to help! However, I need more information about what you'd like me to do. \n\nCould you please clarify:\n- What project or task are you working on?\n- What would you like me to help with for Part 1?\n- Do you have code you'd like me to review, create, debug, or modify?\n- Are there specific requirements or goals you're aiming for?\n\nOnce you provide more details, I'll be able to assist you effectively!", - "session_id": "982f3129-7544-418e-94cc-021a5c532975", - "stop_reason": "end_turn", - "subtype": "success", - "total_cost_usd": 0.0039947, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 874 - }, - "cache_creation_input_tokens": 874, - "cache_read_input_tokens": 17022, - "inference_geo": "", - "input_tokens": 10, - "iterations": [], - "output_tokens": 238, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - }, - "uuid": "f8dc535d-643a-4d36-97e0-3adad711a9ad" - } - } - ], - "sdk_version": "0.1.48" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_10.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_10.json deleted file mode 100644 index 168f858e..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_10.json +++ /dev/null @@ -1,257 +0,0 @@ -{ - "cassette_name": "test_query_async_iterable_asyncgen_single", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_457f90f4", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_457f90f4", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Visualize current context usage as a colored grid", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "List current todo items", - "name": "todos" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.5) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "value": "default" - }, - { - "description": "Opus 4.5 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "value": "opus" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "Custom model", - "displayName": "claude-haiku-4-5-20251001", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default" - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "What is 2 + 2?", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.0.53", - "cwd": "", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "3b225142-68d3-4288-a51d-3ae170d30d38", - "skills": [], - "slash_commands": [ - "compact", - "context", - "cost", - "init", - "pr-comments", - "release-notes", - "todos", - "review", - "security-review" - ], - "subtype": "init", - "tools": [ - "Task", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "BashOutput", - "KillShell", - "Skill", - "SlashCommand", - "EnterPlanMode" - ], - "type": "system", - "uuid": "a9253a2e-aab7-4374-84a2-3580fe791d30" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "2 + 2 = 4\n\nThis is basic arithmetic: when you add 2 and 2 together, you get 4.", - "type": "text" - } - ], - "context_management": null, - "id": "msg_01S14VLS9fHC1hRMRULwVXdv", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 13883 - }, - "cache_creation_input_tokens": 13883, - "cache_read_input_tokens": 0, - "inference_geo": "not_available", - "input_tokens": 3, - "output_tokens": 1, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "3b225142-68d3-4288-a51d-3ae170d30d38", - "type": "assistant", - "uuid": "6ac00cbc-1fee-4be2-834c-54a67b5da8a2" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 3781, - "duration_ms": 1950, - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 13883, - "cacheReadInputTokens": 0, - "contextWindow": 200000, - "costUSD": 0.019255750000000002, - "inputTokens": 967, - "outputTokens": 187, - "webSearchRequests": 0 - } - }, - "num_turns": 1, - "permission_denials": [], - "result": "2 + 2 = 4\n\nThis is basic arithmetic: when you add 2 and 2 together, you get 4.", - "session_id": "3b225142-68d3-4288-a51d-3ae170d30d38", - "subtype": "success", - "total_cost_usd": 0.019255750000000002, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 13883 - }, - "cache_creation_input_tokens": 13883, - "cache_read_input_tokens": 0, - "input_tokens": 3, - "output_tokens": 36, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard" - }, - "uuid": "99178824-6110-4d25-a4ab-5653e3701230" - } - } - ], - "sdk_version": "0.1.10" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_48.json deleted file mode 100644 index a4f49b0c..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_asyncgen_single__sdk_0_1_48.json +++ /dev/null @@ -1,420 +0,0 @@ -{ - "cassette_name": "test_query_async_iterable_asyncgen_single", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_89b41c03", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_89b41c03", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "agents": [ - { - "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", - "name": "general-purpose" - }, - { - "description": "Use this agent to configure the user's Claude Code status line setting.", - "model": "sonnet", - "name": "statusline-setup" - }, - { - "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", - "model": "haiku", - "name": "Explore" - }, - { - "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", - "name": "Plan" - } - ], - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", - "name": "keybindings-help" - }, - { - "argumentHint": "[issue description]", - "description": "Enable debug logging for this session and help diagnose issues (bundled)", - "name": "debug" - }, - { - "argumentHint": "", - "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", - "name": "simplify" - }, - { - "argumentHint": "", - "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", - "name": "batch" - }, - { - "argumentHint": "[interval] ", - "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", - "name": "loop" - }, - { - "argumentHint": "", - "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", - "name": "claude-api" - }, - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Show current context usage", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Dump the JS heap to ~/Desktop", - "name": "heapdump" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - }, - { - "argumentHint": "", - "description": "Generate a report analyzing your Claude Code sessions", - "name": "insights" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "default" - }, - { - "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", - "displayName": "Sonnet (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "sonnet[1m]" - }, - { - "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus" - }, - { - "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", - "displayName": "Opus (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus[1m]" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "claude-haiku-4-5-20251001", - "displayName": "Haiku 4.5", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default", - "pid": 99989 - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "What is 2 + 2?", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.1.71", - "cwd": "", - "fast_mode_state": "off", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "e4923f50-caa1-4bfa-9307-3ada79c71dfb", - "skills": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api" - ], - "slash_commands": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api", - "compact", - "context", - "cost", - "heapdump", - "init", - "pr-comments", - "release-notes", - "review", - "security-review", - "insights" - ], - "subtype": "init", - "tools": [ - "Task", - "TaskOutput", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "TaskStop", - "AskUserQuestion", - "Skill", - "EnterPlanMode", - "EnterWorktree", - "CronCreate", - "CronDelete", - "CronList", - "ToolSearch" - ], - "type": "system", - "uuid": "428028b4-37fa-47d7-91e7-ccf95d0d0305" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "The user is asking a simple arithmetic question: \"What is 2 + 2?\"\n\nThis is straightforward mathematics. 2 + 2 = 4.\n\nThere's no need to use any tools for this - it's a basic calculation that I can answer directly.", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_01V1cusnrvq36te6JJ8ctNRx", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 4179 - }, - "cache_creation_input_tokens": 4179, - "cache_read_input_tokens": 13723, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 5, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "e4923f50-caa1-4bfa-9307-3ada79c71dfb", - "type": "assistant", - "uuid": "8046841f-0e0c-4b60-bfe9-e51ed87a804d" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "2 + 2 = 4", - "type": "text" - } - ], - "context_management": null, - "id": "msg_01V1cusnrvq36te6JJ8ctNRx", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 4179 - }, - "cache_creation_input_tokens": 4179, - "cache_read_input_tokens": 13723, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 5, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "e4923f50-caa1-4bfa-9307-3ada79c71dfb", - "type": "assistant", - "uuid": "8aad0a19-297d-4a59-bcbf-3e5c46174df9" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 1866, - "duration_ms": 2003, - "fast_mode_state": "off", - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 4179, - "cacheReadInputTokens": 13723, - "contextWindow": 200000, - "costUSD": 0.0070060500000000015, - "inputTokens": 10, - "maxOutputTokens": 32000, - "outputTokens": 80, - "webSearchRequests": 0 - } - }, - "num_turns": 1, - "permission_denials": [], - "result": "2 + 2 = 4", - "session_id": "e4923f50-caa1-4bfa-9307-3ada79c71dfb", - "stop_reason": "end_turn", - "subtype": "success", - "total_cost_usd": 0.0070060500000000015, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 4179 - }, - "cache_creation_input_tokens": 4179, - "cache_read_input_tokens": 13723, - "inference_geo": "", - "input_tokens": 10, - "iterations": [], - "output_tokens": 80, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - }, - "uuid": "53aa55c0-0ec7-41cc-ba62-8a6e0672a4fb" - } - } - ], - "sdk_version": "0.1.48" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_10.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_10.json deleted file mode 100644 index 25475111..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_10.json +++ /dev/null @@ -1,271 +0,0 @@ -{ - "cassette_name": "test_query_async_iterable_custom_async_iterable", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_235d5d62", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_235d5d62", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Visualize current context usage as a colored grid", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "List current todo items", - "name": "todos" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.5) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "value": "default" - }, - { - "description": "Opus 4.5 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "value": "opus" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "Custom model", - "displayName": "claude-haiku-4-5-20251001", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default" - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Custom 1", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Custom 2", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.0.53", - "cwd": "", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "36b62d67-cdd0-48d9-ad08-1ea9fa9a8e24", - "skills": [], - "slash_commands": [ - "compact", - "context", - "cost", - "init", - "pr-comments", - "release-notes", - "todos", - "review", - "security-review" - ], - "subtype": "init", - "tools": [ - "Task", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "BashOutput", - "KillShell", - "Skill", - "SlashCommand", - "EnterPlanMode" - ], - "type": "system", - "uuid": "d9078887-f8b5-4aff-9cdc-f78d7ff2cade" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "I'll help you with a custom task! However, I need more information about what you'd like me to do. Could you please provide:\n\n1. **What is the task?** (e.g., code changes, file operations, research, debugging, etc.)\n2. **What codebase or files are involved?** (if applicable)\n3. **Any specific requirements or constraints?**\n\nOnce you give me more details, I'll be able to assist you effectively!", - "type": "text" - } - ], - "context_management": null, - "id": "msg_01UBBAVvUYXYu5P3heQ99Ew5", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 323 - }, - "cache_creation_input_tokens": 323, - "cache_read_input_tokens": 13554, - "inference_geo": "not_available", - "input_tokens": 3, - "output_tokens": 5, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "36b62d67-cdd0-48d9-ad08-1ea9fa9a8e24", - "type": "assistant", - "uuid": "57af45bb-7949-4fab-87c0-a739043f20cb" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 3843, - "duration_ms": 2109, - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 323, - "cacheReadInputTokens": 13554, - "contextWindow": 200000, - "costUSD": 0.00376115, - "inputTokens": 967, - "outputTokens": 207, - "webSearchRequests": 0 - } - }, - "num_turns": 1, - "permission_denials": [], - "result": "I'll help you with a custom task! However, I need more information about what you'd like me to do. Could you please provide:\n\n1. **What is the task?** (e.g., code changes, file operations, research, debugging, etc.)\n2. **What codebase or files are involved?** (if applicable)\n3. **Any specific requirements or constraints?**\n\nOnce you give me more details, I'll be able to assist you effectively!", - "session_id": "36b62d67-cdd0-48d9-ad08-1ea9fa9a8e24", - "subtype": "success", - "total_cost_usd": 0.00376115, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 323 - }, - "cache_creation_input_tokens": 323, - "cache_read_input_tokens": 13554, - "input_tokens": 3, - "output_tokens": 105, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard" - }, - "uuid": "67334230-d67e-448d-ae3f-806001c95a22" - } - } - ], - "sdk_version": "0.1.10" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_48.json b/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_48.json deleted file mode 100644 index 9716921b..00000000 --- a/py/src/braintrust/wrappers/claude_agent_sdk/cassettes/test_query_async_iterable_custom_async_iterable__sdk_0_1_48.json +++ /dev/null @@ -1,434 +0,0 @@ -{ - "cassette_name": "test_query_async_iterable_custom_async_iterable", - "events": [ - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "request": { - "hooks": null, - "subtype": "initialize" - }, - "request_id": "req_1_d304ec28", - "type": "control_request" - } - } - }, - { - "op": "read", - "payload": { - "response": { - "request_id": "req_1_d304ec28", - "response": { - "account": { - "apiKeySource": "ANTHROPIC_API_KEY", - "tokenSource": "none" - }, - "agents": [ - { - "description": "General-purpose agent for researching complex questions, searching for code, and executing multi-step tasks. When you are searching for a keyword or file and are not confident that you will find the right match in the first few tries use this agent to perform the search for you.", - "name": "general-purpose" - }, - { - "description": "Use this agent to configure the user's Claude Code status line setting.", - "model": "sonnet", - "name": "statusline-setup" - }, - { - "description": "Fast agent specialized for exploring codebases. Use this when you need to quickly find files by patterns (eg. \"src/components/**/*.tsx\"), search code for keywords (eg. \"API endpoints\"), or answer questions about the codebase (eg. \"how do API endpoints work?\"). When calling this agent, specify the desired thoroughness level: \"quick\" for basic searches, \"medium\" for moderate exploration, or \"very thorough\" for comprehensive analysis across multiple locations and naming conventions.", - "model": "haiku", - "name": "Explore" - }, - { - "description": "Software architect agent for designing implementation plans. Use this when you need to plan the implementation strategy for a task. Returns step-by-step plans, identifies critical files, and considers architectural trade-offs.", - "name": "Plan" - } - ], - "available_output_styles": [ - "default", - "Explanatory", - "Learning" - ], - "commands": [ - { - "argumentHint": "", - "description": "Use when the user wants to customize keyboard shortcuts, rebind keys, add chord bindings, or modify ~/.claude/keybindings.json. Examples: \"rebind ctrl+s\", \"add a chord shortcut\", \"change the submit key\", \"customize keybindings\". (bundled)", - "name": "keybindings-help" - }, - { - "argumentHint": "[issue description]", - "description": "Enable debug logging for this session and help diagnose issues (bundled)", - "name": "debug" - }, - { - "argumentHint": "", - "description": "Review changed code for reuse, quality, and efficiency, then fix any issues found. (bundled)", - "name": "simplify" - }, - { - "argumentHint": "", - "description": "Research and plan a large-scale change, then execute it in parallel across 5\u201330 isolated worktree agents that each open a PR. (bundled)", - "name": "batch" - }, - { - "argumentHint": "[interval] ", - "description": "Run a prompt or slash command on a recurring interval (e.g. /loop 5m /foo, defaults to 10m) (bundled)", - "name": "loop" - }, - { - "argumentHint": "", - "description": "Build apps with the Claude API or Anthropic SDK.\nTRIGGER when: code imports `anthropic`/`@anthropic-ai/sdk`/`claude_agent_sdk`, or user asks to use Claude API, Anthropic SDKs, or Agent SDK.\nDO NOT TRIGGER when: code imports `openai`/other AI SDK, general programming, or ML/data-science tasks. (bundled)", - "name": "claude-api" - }, - { - "argumentHint": "", - "description": "Clear conversation history but keep a summary in context. Optional: /compact [instructions for summarization]", - "name": "compact" - }, - { - "argumentHint": "", - "description": "Show current context usage", - "name": "context" - }, - { - "argumentHint": "", - "description": "Show the total cost and duration of the current session", - "name": "cost" - }, - { - "argumentHint": "", - "description": "Dump the JS heap to ~/Desktop", - "name": "heapdump" - }, - { - "argumentHint": "", - "description": "Initialize a new CLAUDE.md file with codebase documentation", - "name": "init" - }, - { - "argumentHint": "", - "description": "Get comments from a GitHub pull request", - "name": "pr-comments" - }, - { - "argumentHint": "", - "description": "View release notes", - "name": "release-notes" - }, - { - "argumentHint": "", - "description": "Review a pull request", - "name": "review" - }, - { - "argumentHint": "", - "description": "Complete a security review of the pending changes on the current branch", - "name": "security-review" - }, - { - "argumentHint": "", - "description": "Generate a report analyzing your Claude Code sessions", - "name": "insights" - } - ], - "models": [ - { - "description": "Use the default model (currently Sonnet 4.6) \u00b7 $3/$15 per Mtok", - "displayName": "Default (recommended)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "default" - }, - { - "description": "Sonnet 4.6 for long sessions \u00b7 $6/$22.50 per Mtok", - "displayName": "Sonnet (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "value": "sonnet[1m]" - }, - { - "description": "Opus 4.6 \u00b7 Most capable for complex work \u00b7 $5/$25 per Mtok", - "displayName": "Opus", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus" - }, - { - "description": "Opus 4.6 for long sessions \u00b7 $10/$37.50 per Mtok", - "displayName": "Opus (1M context)", - "supportedEffortLevels": [ - "low", - "medium", - "high", - "max" - ], - "supportsAdaptiveThinking": true, - "supportsEffort": true, - "supportsFastMode": true, - "value": "opus[1m]" - }, - { - "description": "Haiku 4.5 \u00b7 Fastest for quick answers \u00b7 $1/$5 per Mtok", - "displayName": "Haiku", - "value": "haiku" - }, - { - "description": "claude-haiku-4-5-20251001", - "displayName": "Haiku 4.5", - "value": "claude-haiku-4-5-20251001" - } - ], - "output_style": "default", - "pid": 372 - }, - "subtype": "success" - }, - "type": "control_response" - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Custom 1", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "write", - "payload": { - "kind": "json", - "value": { - "message": { - "content": "Custom 2", - "role": "user" - }, - "session_id": "default", - "type": "user" - } - } - }, - { - "op": "read", - "payload": { - "agents": [ - "general-purpose", - "statusline-setup", - "Explore", - "Plan" - ], - "apiKeySource": "ANTHROPIC_API_KEY", - "claude_code_version": "2.1.71", - "cwd": "", - "fast_mode_state": "off", - "mcp_servers": [], - "model": "claude-haiku-4-5-20251001", - "output_style": "default", - "permissionMode": "bypassPermissions", - "plugins": [], - "session_id": "3169895f-b0a2-4c2e-9e08-d96bd7ab1eaa", - "skills": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api" - ], - "slash_commands": [ - "keybindings-help", - "debug", - "simplify", - "batch", - "loop", - "claude-api", - "compact", - "context", - "cost", - "heapdump", - "init", - "pr-comments", - "release-notes", - "review", - "security-review", - "insights" - ], - "subtype": "init", - "tools": [ - "Task", - "TaskOutput", - "Bash", - "Glob", - "Grep", - "ExitPlanMode", - "Read", - "Edit", - "Write", - "NotebookEdit", - "WebFetch", - "TodoWrite", - "WebSearch", - "TaskStop", - "AskUserQuestion", - "Skill", - "EnterPlanMode", - "EnterWorktree", - "CronCreate", - "CronDelete", - "CronList", - "ToolSearch" - ], - "type": "system", - "uuid": "9f87e72b-47cb-4141-9811-f0cccb091fbb" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "signature": "", - "thinking": "The user has just sent \"Custom 1\" which appears to be a test message or placeholder. I should ask them what they need help with or what task they'd like me to assist with.", - "type": "thinking" - } - ], - "context_management": null, - "id": "msg_01DQNYLBHXw96x6razoxPAvS", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 874 - }, - "cache_creation_input_tokens": 874, - "cache_read_input_tokens": 17022, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 4, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "3169895f-b0a2-4c2e-9e08-d96bd7ab1eaa", - "type": "assistant", - "uuid": "56cc6996-0b17-46c6-85c7-9578b900e51d" - } - }, - { - "op": "read", - "payload": { - "message": { - "content": [ - { - "text": "Hello! I'm Claude, an AI assistant. I see you've sent \"Custom 1\" - I'm not sure what you need help with. \n\nHow can I assist you? Here are some things I can help with:\n\n- **Code implementation**: Write, modify, or debug code\n- **Project exploration**: Search and understand your codebase\n- **Git operations**: Manage commits, branches, and pull requests\n- **Testing and debugging**: Run tests and troubleshoot issues\n- **Planning**: Design implementation strategies for complex features\n- **API and SDK work**: Help with Claude API or other integrations\n\nFeel free to share what you'd like to work on, and I'll be happy to help!", - "type": "text" - } - ], - "context_management": null, - "id": "msg_01DQNYLBHXw96x6razoxPAvS", - "model": "claude-haiku-4-5-20251001", - "role": "assistant", - "stop_reason": null, - "stop_sequence": null, - "type": "message", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 874 - }, - "cache_creation_input_tokens": 874, - "cache_read_input_tokens": 17022, - "inference_geo": "not_available", - "input_tokens": 10, - "output_tokens": 4, - "service_tier": "standard" - } - }, - "parent_tool_use_id": null, - "session_id": "3169895f-b0a2-4c2e-9e08-d96bd7ab1eaa", - "type": "assistant", - "uuid": "ae52b382-9ed6-43ea-94a3-76b46809ea9e" - } - }, - { - "op": "read", - "payload": { - "duration_api_ms": 2940, - "duration_ms": 3096, - "fast_mode_state": "off", - "is_error": false, - "modelUsage": { - "claude-haiku-4-5-20251001": { - "cacheCreationInputTokens": 874, - "cacheReadInputTokens": 17022, - "contextWindow": 200000, - "costUSD": 0.0038147, - "inputTokens": 10, - "maxOutputTokens": 32000, - "outputTokens": 202, - "webSearchRequests": 0 - } - }, - "num_turns": 1, - "permission_denials": [], - "result": "Hello! I'm Claude, an AI assistant. I see you've sent \"Custom 1\" - I'm not sure what you need help with. \n\nHow can I assist you? Here are some things I can help with:\n\n- **Code implementation**: Write, modify, or debug code\n- **Project exploration**: Search and understand your codebase\n- **Git operations**: Manage commits, branches, and pull requests\n- **Testing and debugging**: Run tests and troubleshoot issues\n- **Planning**: Design implementation strategies for complex features\n- **API and SDK work**: Help with Claude API or other integrations\n\nFeel free to share what you'd like to work on, and I'll be happy to help!", - "session_id": "3169895f-b0a2-4c2e-9e08-d96bd7ab1eaa", - "stop_reason": "end_turn", - "subtype": "success", - "total_cost_usd": 0.0038147, - "type": "result", - "usage": { - "cache_creation": { - "ephemeral_1h_input_tokens": 0, - "ephemeral_5m_input_tokens": 874 - }, - "cache_creation_input_tokens": 874, - "cache_read_input_tokens": 17022, - "inference_geo": "", - "input_tokens": 10, - "iterations": [], - "output_tokens": 202, - "server_tool_use": { - "web_fetch_requests": 0, - "web_search_requests": 0 - }, - "service_tier": "standard", - "speed": "standard" - }, - "uuid": "2c44d6fb-428c-4dfe-b339-589e1df5dd80" - } - } - ], - "sdk_version": "0.1.48" -} diff --git a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py index 8ba914a3..2e8e49ca 100644 --- a/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py +++ b/py/src/braintrust/wrappers/claude_agent_sdk/test_wrapper.py @@ -4,6 +4,8 @@ import dataclasses import sys import types +from collections.abc import AsyncIterable +from contextlib import contextmanager from pathlib import Path from typing import Any, cast @@ -53,18 +55,32 @@ def memory_logger(): yield bgl +@contextmanager +def _patched_claude_sdk(*, wrap_client: bool = False, wrap_tool_class: bool = False): + original_client = claude_agent_sdk.ClaudeSDKClient + original_tool_class = claude_agent_sdk.SdkMcpTool + original_tool_fn = claude_agent_sdk.tool + + if wrap_client: + claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client) + if wrap_tool_class: + claude_agent_sdk.SdkMcpTool = _create_tool_wrapper_class(original_tool_class) + + try: + yield + finally: + claude_agent_sdk.ClaudeSDKClient = original_client + claude_agent_sdk.SdkMcpTool = original_tool_class + claude_agent_sdk.tool = original_tool_fn + + @pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") @pytest.mark.asyncio async def test_calculator_with_multiple_operations(memory_logger): """Test claude_agent.py example - calculator with multiple operations.""" assert not memory_logger.pop() - original_client = claude_agent_sdk.ClaudeSDKClient - original_tool_class = claude_agent_sdk.SdkMcpTool - claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client) - claude_agent_sdk.SdkMcpTool = _create_tool_wrapper_class(original_tool_class) - - try: + with _patched_claude_sdk(wrap_client=True, wrap_tool_class=True): # Create calculator tool async def calculator_handler(args): operation = args["operation"] @@ -137,10 +153,6 @@ async def calculator_handler(args): if type(message).__name__ == "ResultMessage": result_message = message - finally: - claude_agent_sdk.ClaudeSDKClient = original_client - claude_agent_sdk.SdkMcpTool = original_tool_class - spans = memory_logger.pop() task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK] @@ -270,29 +282,20 @@ async def __anext__(self): ) async def test_query_async_iterable(memory_logger, cassette_name, input_factory, expected_contents): """Test that async iterable inputs are captured as structured lists.""" + del cassette_name assert not memory_logger.pop() - original_client = claude_agent_sdk.ClaudeSDKClient - claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client) - - try: - options = claude_agent_sdk.ClaudeAgentOptions( - model=TEST_MODEL, - permission_mode="bypassPermissions", - ) - transport = make_cassette_transport( - cassette_name=cassette_name, - prompt="", - options=options, - ) + wrapped_client_class = _create_client_wrapper_class(FakeClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + AssistantMessage(content=[TextBlock("done")]), + ResultMessage(), + ] - async with claude_agent_sdk.ClaudeSDKClient(options=options, transport=transport) as client: - await client.query(input_factory()) - async for message in client.receive_response(): - if type(message).__name__ == "ResultMessage": - break - finally: - claude_agent_sdk.ClaudeSDKClient = original_client + await client.query(input_factory()) + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break spans = memory_logger.pop() @@ -316,10 +319,7 @@ async def test_bundled_subagent_creates_task_span(memory_logger): if not _sdk_version_at_least("0.1.48"): pytest.skip("Bundled subagent task events were not observed on older Claude Agent SDK versions") - original_client = claude_agent_sdk.ClaudeSDKClient - claude_agent_sdk.ClaudeSDKClient = _create_client_wrapper_class(original_client) - - try: + with _patched_claude_sdk(wrap_client=True): options = claude_agent_sdk.ClaudeAgentOptions( model=TEST_MODEL, cwd=REPO_ROOT, @@ -341,8 +341,6 @@ async def test_bundled_subagent_creates_task_span(memory_logger): async for message in client.receive_response(): if type(message).__name__ == "ResultMessage": break - finally: - claude_agent_sdk.ClaudeSDKClient = original_client spans = memory_logger.pop() @@ -371,6 +369,566 @@ async def test_bundled_subagent_creates_task_span(memory_logger): llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM] _assert_llm_spans_have_time_to_first_token(llm_spans) + assert any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans for llm_span in llm_spans) + + delegated_llm_spans = [ + llm_span for llm_span in llm_spans if any(subagent_span["span_id"] in llm_span["span_parents"] for subagent_span in subagent_spans) + ] + assert delegated_llm_spans, "Expected at least one delegated LLM span nested under a subagent task span" + + assert any( + any(llm_span["span_id"] in tool_span["span_parents"] for llm_span in delegated_llm_spans) + for tool_span in tool_spans + ), "Expected delegated tool spans to nest under a delegated LLM span" + + +@pytest.mark.skipif(not CLAUDE_SDK_AVAILABLE, reason="Claude Agent SDK not installed") +@pytest.mark.asyncio +async def test_multiple_bundled_subagents_keep_outer_orchestration_separate(memory_logger, tmp_path): + assert not memory_logger.pop() + if not _sdk_version_at_least("0.1.48"): + pytest.skip("Bundled subagent task events were not observed on older Claude Agent SDK versions") + + workspace = tmp_path / "subagent_multi_workspace" + workspace.mkdir() + (workspace / "release_notes_alpha.md").write_text( + "# Alpha Release Notes\n\nversion = 2026.03.11-alpha\nowner = sdk-platform-alpha\n", + encoding="utf-8", + ) + (workspace / "release_notes_beta.md").write_text( + "# Beta Release Notes\n\nversion = 2026.03.11-beta\nowner = sdk-platform-beta\n", + encoding="utf-8", + ) + + with _patched_claude_sdk(wrap_client=True): + options = claude_agent_sdk.ClaudeAgentOptions( + model=TEST_MODEL, + cwd=workspace, + permission_mode="bypassPermissions", + max_turns=12, + ) + transport = make_cassette_transport( + cassette_name="test_multiple_bundled_subagents_keep_outer_orchestration_separate", + prompt="", + options=options, + ) + + async with claude_agent_sdk.ClaudeSDKClient(options=options, transport=transport) as client: + await client.query( + "Launch two bundled general-purpose subagents for two independent tasks. " + "Start both Agent tool calls before waiting on either result if the tool API allows it. " + "The first delegated subagent must use Bash and Read on release_notes_alpha.md and return only " + "'alpha: | '. " + "The second delegated subagent must use Bash and Read on release_notes_beta.md and return only " + "'beta: | '. " + "After both delegated agents finish, reply with exactly two lines in that same order. " + "Do not answer directly without using both subagents." + ) + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + + spans = memory_logger.pop() + task_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TASK] + llm_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.LLM] + tool_spans = [s for s in spans if s["span_attributes"]["type"] == SpanTypeAttribute.TOOL] + + root_task_span = _find_span_by_name(task_spans, "Claude Agent") + subagent_spans = [s for s in task_spans if s["span_attributes"]["name"] != "Claude Agent"] + assert len(subagent_spans) >= 2, f"Expected at least two delegated task spans, got {len(subagent_spans)}" + + outer_llm_spans = [llm_span for llm_span in llm_spans if root_task_span["span_id"] in llm_span["span_parents"]] + assert outer_llm_spans, "Expected outer orchestration LLM spans under the root task" + + agent_tool_spans = [tool_span for tool_span in tool_spans if tool_span["span_attributes"]["name"] == "Agent"] + assert len(agent_tool_spans) >= 2, f"Expected at least two Agent tool spans, got {len(agent_tool_spans)}" + + subagent_span_ids = {subagent_span["span_id"] for subagent_span in subagent_spans} + for agent_tool_span in agent_tool_spans: + assert any(outer_llm_span["span_id"] in agent_tool_span["span_parents"] for outer_llm_span in outer_llm_spans) + assert not subagent_span_ids.intersection(agent_tool_span["span_parents"]) + + delegated_llm_spans = [ + llm_span for llm_span in llm_spans if subagent_span_ids.intersection(llm_span["span_parents"]) + ] + assert delegated_llm_spans, "Expected delegated LLM spans nested under delegated task spans" + + non_agent_tool_spans = [tool_span for tool_span in tool_spans if tool_span["span_attributes"]["name"] != "Agent"] + assert any( + any(delegated_llm_span["span_id"] in tool_span["span_parents"] for delegated_llm_span in delegated_llm_spans) + for tool_span in non_agent_tool_spans + ), "Expected delegated tool spans to nest under delegated LLM spans" + + +@pytest.mark.asyncio +async def test_delegated_subagent_llm_and_tool_spans_nest_under_task_span(memory_logger): + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + AssistantMessage( + content=[ + ToolUseBlock( + id="call-agent", + name="Agent", + input={"description": "Inspect release notes", "subagent_type": "general-purpose"}, + ) + ] + ), + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-subagent"}, + task_id="task-subagent", + description="Inspect release notes", + uuid="msg-start", + session_id="session-123", + tool_use_id="call-agent", + task_type="local_agent", + ), + AssistantMessage( + content=[ + ToolUseBlock( + id="call-read", + name="Read", + input={"file_path": "/tmp/release_notes.md"}, + ) + ], + ), + UserMessage( + content=[ToolResultBlock(tool_use_id="call-read", content=[TextBlock("version = 2026.03.11")])], + ), + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-subagent"}, + task_id="task-subagent", + status="completed", + output_file="", + summary="Inspection complete", + uuid="msg-done", + session_id="session-123", + tool_use_id="call-agent", + usage={"total_tokens": 42, "tool_uses": 1, "duration_ms": 250}, + ), + UserMessage(content=[ToolResultBlock(tool_use_id="call-agent", content=[TextBlock("2026.03.11 | sdk-platform")])]), + ResultMessage(), + ] + + await client.query("Delegate this task.") + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + llm_spans = _find_spans_by_type(spans, SpanTypeAttribute.LLM) + tool_spans = _find_spans_by_type(spans, SpanTypeAttribute.TOOL) + + subagent_task_span = _find_span_by_name(task_spans, "Inspect release notes") + agent_tool_span = _find_span_by_name(tool_spans, "Agent") + read_tool_span = _find_span_by_name(tool_spans, "Read") + + assert agent_tool_span["span_id"] in subagent_task_span["span_parents"] + + delegated_llm_spans = [ + llm_span for llm_span in llm_spans if subagent_task_span["span_id"] in llm_span["span_parents"] + ] + assert len(delegated_llm_spans) == 1 + + delegated_llm_span = delegated_llm_spans[0] + assert delegated_llm_span["span_id"] in read_tool_span["span_parents"] + + +@pytest.mark.asyncio +async def test_multiple_subagent_orchestration_keeps_outer_agent_tool_calls_outside_active_subagent(memory_logger): + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + AssistantMessage( + content=[ + TextBlock("Launching the first delegated agent."), + ToolUseBlock( + id="call-alpha", + name="Agent", + input={"description": "Read alpha release notes", "subagent_type": "general-purpose"}, + ), + ] + ), + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-alpha"}, + task_id="task-alpha", + description="Read alpha release notes", + uuid="msg-alpha-start", + session_id="session-123", + tool_use_id="call-alpha", + task_type="local_agent", + ), + AssistantMessage( + content=[ + ToolUseBlock( + id="call-beta", + name="Agent", + input={"description": "Read beta release notes", "subagent_type": "general-purpose"}, + ) + ] + ), + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-beta"}, + task_id="task-beta", + description="Read beta release notes", + uuid="msg-beta-start", + session_id="session-123", + tool_use_id="call-beta", + task_type="local_agent", + ), + AssistantMessage( + content=[ToolUseBlock(id="read-alpha", name="Read", input={"file_path": "/tmp/release_notes_alpha.md"})], + parent_tool_use_id="call-alpha", + ), + UserMessage(content=[ToolResultBlock(tool_use_id="read-alpha", content=[TextBlock("alpha result")])]), + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-alpha"}, + task_id="task-alpha", + status="completed", + output_file="", + summary="Alpha complete", + uuid="msg-alpha-done", + session_id="session-123", + tool_use_id="call-alpha", + usage={"total_tokens": 11, "tool_uses": 1, "duration_ms": 250}, + ), + AssistantMessage( + content=[ToolUseBlock(id="read-beta", name="Read", input={"file_path": "/tmp/release_notes_beta.md"})], + parent_tool_use_id="call-beta", + ), + UserMessage(content=[ToolResultBlock(tool_use_id="read-beta", content=[TextBlock("beta result")])]), + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-beta"}, + task_id="task-beta", + status="completed", + output_file="", + summary="Beta complete", + uuid="msg-beta-done", + session_id="session-123", + tool_use_id="call-beta", + usage={"total_tokens": 12, "tool_uses": 1, "duration_ms": 300}, + ), + UserMessage( + content=[ + ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]), + ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]), + ] + ), + ResultMessage(), + ] + + await client.query("Launch two delegated subagents.") + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + llm_spans = _find_spans_by_type(spans, SpanTypeAttribute.LLM) + tool_spans = _find_spans_by_type(spans, SpanTypeAttribute.TOOL) + + root_task_span = _find_span_by_name(task_spans, "Claude Agent") + alpha_task_span = _find_span_by_name(task_spans, "Read alpha release notes") + beta_task_span = _find_span_by_name(task_spans, "Read beta release notes") + + agent_tool_spans = [span for span in tool_spans if span["span_attributes"]["name"] == "Agent"] + assert len(agent_tool_spans) == 2 + + outer_llm_spans = [ + llm_span for llm_span in llm_spans if root_task_span["span_id"] in llm_span["span_parents"] + ] + assert len(outer_llm_spans) == 1, f"Expected a single outer orchestration LLM span, got {len(outer_llm_spans)}" + outer_llm_span = outer_llm_spans[0] + + for agent_tool_span in agent_tool_spans: + assert outer_llm_span["span_id"] in agent_tool_span["span_parents"] + assert alpha_task_span["span_id"] not in agent_tool_span["span_parents"] + assert beta_task_span["span_id"] not in agent_tool_span["span_parents"] + + delegated_llm_spans = [ + llm_span + for llm_span in llm_spans + if alpha_task_span["span_id"] in llm_span["span_parents"] or beta_task_span["span_id"] in llm_span["span_parents"] + ] + assert delegated_llm_spans, "Expected delegated LLM spans nested under delegated task spans" + +@pytest.mark.asyncio +async def test_relay_user_messages_between_parallel_agent_calls_do_not_split_llm_span(memory_logger): + """Relay UserMessages (subagent prompt echoes without ToolResultBlocks) between + parallel Agent calls should not create separate outer LLM spans. + + The real Claude Agent SDK emits relay UserMessages between Agent tool calls + when subagents are launched concurrently. These relay messages contain only + text (the subagent prompt), not ToolResultBlocks. They should not be treated + as LLM turn boundaries. + """ + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + # Orchestrator responds with thinking + text + first Agent call + AssistantMessage( + content=[ + TextBlock("I'll launch two subagents."), + ToolUseBlock( + id="call-alpha", + name="Agent", + input={"description": "Read alpha release notes", "subagent_type": "general-purpose"}, + ), + ] + ), + # SDK relays the alpha subagent prompt as a UserMessage (no ToolResultBlock) + UserMessage(content=[TextBlock("You must use Bash and Read on release_notes_alpha.md...")]), + # Orchestrator emits the second Agent call + AssistantMessage( + content=[ + ToolUseBlock( + id="call-beta", + name="Agent", + input={"description": "Read beta release notes", "subagent_type": "general-purpose"}, + ) + ] + ), + # SDK relays the beta subagent prompt as a UserMessage (no ToolResultBlock) + UserMessage(content=[TextBlock("You must use Bash and Read on release_notes_beta.md...")]), + # Task lifecycle events + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-alpha"}, + task_id="task-alpha", + description="Read alpha release notes", + uuid="msg-alpha-start", + session_id="session-123", + tool_use_id="call-alpha", + task_type="local_agent", + ), + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-beta"}, + task_id="task-beta", + description="Read beta release notes", + uuid="msg-beta-start", + session_id="session-123", + tool_use_id="call-beta", + task_type="local_agent", + ), + # Subagent completions + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-alpha"}, + task_id="task-alpha", + status="completed", + output_file="", + summary="Alpha complete", + uuid="msg-alpha-done", + session_id="session-123", + tool_use_id="call-alpha", + usage={"total_tokens": 11, "tool_uses": 1, "duration_ms": 250}, + ), + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-beta"}, + task_id="task-beta", + status="completed", + output_file="", + summary="Beta complete", + uuid="msg-beta-done", + session_id="session-123", + tool_use_id="call-beta", + usage={"total_tokens": 12, "tool_uses": 1, "duration_ms": 300}, + ), + # Final tool results (real turn boundary — has ToolResultBlocks) + UserMessage( + content=[ + ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha:2026.03.11-alpha | sdk-platform-alpha")]), + ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta:2026.03.11-beta | sdk-platform-beta")]), + ] + ), + # Final answer + AssistantMessage(content=[TextBlock("alpha:2026.03.11-alpha\nbeta:2026.03.11-beta")]), + ResultMessage(), + ] + + await client.query("Launch two delegated subagents.") + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + llm_spans = _find_spans_by_type(spans, SpanTypeAttribute.LLM) + tool_spans = _find_spans_by_type(spans, SpanTypeAttribute.TOOL) + + root_task_span = _find_span_by_name(task_spans, "Claude Agent") + + # Both Agent tool spans should exist + agent_tool_spans = [span for span in tool_spans if span["span_attributes"]["name"] == "Agent"] + assert len(agent_tool_spans) == 2, f"Expected 2 Agent tool spans, got {len(agent_tool_spans)}" + + # Both Agent tool spans should share the SAME parent LLM span + llm_span_ids = {span["span_id"] for span in llm_spans} + alpha_llm_parents = set(agent_tool_spans[0]["span_parents"]).intersection(llm_span_ids) + beta_llm_parents = set(agent_tool_spans[1]["span_parents"]).intersection(llm_span_ids) + assert alpha_llm_parents == beta_llm_parents, ( + f"Both Agent tool spans should share the same parent LLM span. " + f"Alpha parents: {alpha_llm_parents}, Beta parents: {beta_llm_parents}" + ) + + # Exactly one outer LLM span should parent both Agent tool calls + # (the final-answer LLM span is a separate, expected outer span) + orchestration_llm_spans = [ + llm_span for llm_span in llm_spans + if any(llm_span["span_id"] in agent_tool_span["span_parents"] for agent_tool_span in agent_tool_spans) + ] + assert len(orchestration_llm_spans) == 1, ( + f"Expected a single orchestration LLM span parenting both Agent tool calls " + f"(relay UserMessages without ToolResultBlocks should not split it), " + f"got {len(orchestration_llm_spans)}" + ) + + +@pytest.mark.asyncio +async def test_agent_tool_spans_encapsulate_child_task_spans(memory_logger): + """Agent TOOL spans must end after their child TASK spans, not before. + + The mid-stream tool_tracker.cleanup() in the AssistantMessage handler must + not close Agent TOOL spans that still have active child TASK spans. Those + Agent TOOL spans should only close when their ToolResult arrives. + """ + assert not memory_logger.pop() + + wrapped_client_class = _create_client_wrapper_class(FakeClaudeSDKClient) + client = wrapped_client_class() + client._WrappedClaudeSDKClient__client.messages = [ # type: ignore[attr-defined] + # Orchestrator responds with text + first Agent call + AssistantMessage( + content=[ + TextBlock("I'll launch two subagents."), + ToolUseBlock( + id="call-alpha", + name="Agent", + input={"description": "Read alpha", "subagent_type": "general-purpose"}, + ), + ] + ), + # SDK emits TaskStarted immediately after Agent ToolUse (real ordering) + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-alpha"}, + task_id="task-alpha", + description="Read alpha release notes", + uuid="msg-alpha-start", + session_id="session-123", + tool_use_id="call-alpha", + task_type="local_agent", + ), + # SDK relays the alpha subagent prompt (no ToolResultBlock) + UserMessage(content=[TextBlock("Read alpha release notes...")]), + # Orchestrator emits the second Agent call + AssistantMessage( + content=[ + ToolUseBlock( + id="call-beta", + name="Agent", + input={"description": "Read beta", "subagent_type": "general-purpose"}, + ) + ] + ), + # SDK emits TaskStarted immediately after Agent ToolUse (real ordering) + TaskStartedMessage( + subtype="task_started", + data={"subtype": "task_started", "task_id": "task-beta"}, + task_id="task-beta", + description="Read beta release notes", + uuid="msg-beta-start", + session_id="session-123", + tool_use_id="call-beta", + task_type="local_agent", + ), + # SDK relays the beta subagent prompt (no ToolResultBlock) + UserMessage(content=[TextBlock("Read beta release notes...")]), + # Both tasks complete + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-alpha"}, + task_id="task-alpha", + status="completed", + output_file="", + summary="Alpha complete", + uuid="msg-alpha-done", + session_id="session-123", + tool_use_id="call-alpha", + usage={"total_tokens": 11, "tool_uses": 1, "duration_ms": 250}, + ), + TaskNotificationMessage( + subtype="task_notification", + data={"subtype": "task_notification", "task_id": "task-beta"}, + task_id="task-beta", + status="completed", + output_file="", + summary="Beta complete", + uuid="msg-beta-done", + session_id="session-123", + tool_use_id="call-beta", + usage={"total_tokens": 12, "tool_uses": 1, "duration_ms": 300}, + ), + # Final tool results (real turn boundary — has ToolResultBlocks) + UserMessage( + content=[ + ToolResultBlock(tool_use_id="call-alpha", content=[TextBlock("alpha result")]), + ToolResultBlock(tool_use_id="call-beta", content=[TextBlock("beta result")]), + ] + ), + # Final answer + AssistantMessage(content=[TextBlock("Done.")]), + ResultMessage(), + ] + + await client.query("Launch two subagents.") + async for message in client.receive_response(): + if type(message).__name__ == "ResultMessage": + break + + spans = memory_logger.pop() + task_spans = _find_spans_by_type(spans, SpanTypeAttribute.TASK) + tool_spans = _find_spans_by_type(spans, SpanTypeAttribute.TOOL) + + agent_tool_spans = [s for s in tool_spans if s["span_attributes"]["name"] == "Agent"] + assert len(agent_tool_spans) == 2, f"Expected 2 Agent tool spans, got {len(agent_tool_spans)}" + + child_task_spans = [s for s in task_spans if s["span_attributes"]["name"] != "Claude Agent"] + assert len(child_task_spans) == 2, f"Expected 2 child TASK spans, got {len(child_task_spans)}" + + # Each Agent TOOL span must end at or after its child TASK span + for agent_span in agent_tool_spans: + agent_end = agent_span["metrics"]["end"] + # Find child TASK span (parented under this Agent TOOL span) + children = [ + ts for ts in child_task_spans + if agent_span["span_id"] in ts.get("span_parents", []) + ] + assert len(children) == 1, ( + f"Agent span {agent_span['span_id']} should have exactly 1 child TASK span, " + f"got {len(children)}" + ) + child_end = children[0]["metrics"]["end"] + assert agent_end >= child_end, ( + f"Agent TOOL span must encapsulate its child TASK span. " + f"Agent end={agent_end}, child TASK end={child_end}" + ) + async def _single_message_generator(): """Generator yielding a single message.""" @@ -406,11 +964,13 @@ class ToolResultBlock: class AssistantMessage: content: list[Any] model: str = TEST_MODEL + parent_tool_use_id: str | None = None @dataclasses.dataclass class UserMessage: content: list[Any] + parent_tool_use_id: str | None = None @dataclasses.dataclass @@ -471,6 +1031,36 @@ def __init__( self.session_id = session_id +class FakeClaudeSDKClient: + def __init__(self, *args, **kwargs): + del args, kwargs + self.messages: list[Any] = [] + self.prompt: Any = None + + async def query(self, prompt, **kwargs): + del kwargs + self.prompt = prompt + if isinstance(prompt, AsyncIterable): + async for _ in prompt: + pass + return None + + async def receive_response(self): + for message in self.messages: + yield message + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + del args + return None + + +def _find_spans_by_type(spans: list[dict[str, Any]], span_type: str) -> list[dict[str, Any]]: + return [span for span in spans if span.get("span_attributes", {}).get("type") == span_type] + + def _make_fake_sdk_mcp_tool_class(): class FakeSdkMcpTool: def __init__(self, name, description, input_schema, handler, **kwargs): @@ -483,10 +1073,6 @@ def __init__(self, name, description, input_schema, handler, **kwargs): return FakeSdkMcpTool -def _find_spans_by_type(spans: list[dict[str, Any]], span_type: str) -> list[dict[str, Any]]: - return [span for span in spans if span.get("span_attributes", {}).get("type") == span_type] - - def _find_span_by_name(spans: list[dict[str, Any]], name: str) -> dict[str, Any]: for span in spans: if span["span_attributes"]["name"] == name: @@ -1001,7 +1587,7 @@ async def test_setup_claude_agent_sdk_repro_import_before_setup(memory_logger, m loop_errors = [] received_types = [] - try: + with _patched_claude_sdk(): assert setup_claude_agent_sdk(project=PROJECT_NAME, api_key=logger.TEST_API_KEY) assert getattr(consumer_module, "ClaudeSDKClient") is not original_client assert getattr(consumer_module, "SdkMcpTool") is not original_tool_class @@ -1028,10 +1614,6 @@ async def main() -> None: received_types.append(type(message).__name__) await main() - finally: - claude_agent_sdk.ClaudeSDKClient = original_client - claude_agent_sdk.SdkMcpTool = original_tool_class - claude_agent_sdk.tool = original_tool_fn assert loop_errors == [] assert "AssistantMessage" in received_types