From 3f6d01c6ccf3dd158107a0325abfad5b82db2d9b Mon Sep 17 00:00:00 2001 From: Abhijeet Prasad Date: Tue, 31 Mar 2026 10:59:50 -0400 Subject: [PATCH] ref(pydantic_ai): migrate wrapper to integration patchers Move the Pydantic AI wrapper implementation into the integrations package and keep the legacy wrapper module as a compatibility re-export. This aligns the provider with the newer integration architecture and consolidates its tests, auto-instrument wiring, and public exports in one place. Add generic class-scan support in the shared integration base so runtime-loaded subclasses can be rescanned and patched without leaking patch-marker details into tracing code. Use that helper for Pydantic AI model-class wrapping and move wrap_model_classes into patchers so patcher-specific behavior stays in the patching layer. --- py/noxfile.py | 6 +- py/src/braintrust/__init__.py | 6 +- py/src/braintrust/auto.py | 11 +- py/src/braintrust/integrations/__init__.py | 2 + py/src/braintrust/integrations/base.py | 148 +- .../integrations/pydantic_ai/__init__.py | 52 + .../cassettes/test_agent_run_async.yaml | 0 .../cassettes/test_agent_run_stream.yaml | 0 .../test_agent_run_stream_events.yaml | 0 ...st_agent_run_stream_structured_output.yaml | 0 .../cassettes/test_agent_run_stream_sync.yaml | 0 .../cassettes/test_agent_run_sync.yaml | 0 ...nt_stream_buffer_pattern_early_return.yaml | 0 .../test_agent_stream_early_break.yaml | 0 .../test_agent_structured_output.yaml | 0 .../test_agent_with_binary_content.yaml | 0 .../test_agent_with_custom_settings.yaml | 0 .../test_agent_with_document_input.yaml | 0 .../test_agent_with_message_history.yaml | 0 ...agent_with_model_settings_in_metadata.yaml | 110 ++ ...with_model_settings_override_in_input.yaml | 120 ++ ..._agent_with_system_prompt_in_metadata.yaml | 0 .../test_agent_with_tool_execution.yaml | 0 .../cassettes/test_agent_with_tools.yaml | 0 .../cassettes/test_auto_pydantic_ai.yaml | 112 ++ .../cassettes/test_direct_model_request.yaml | 0 ...s_nested_chat_span_without_class_scan.yaml | 109 ++ .../test_direct_model_request_stream.yaml | 0 ..._model_request_stream_complete_output.yaml | 0 ...test_direct_model_request_stream_sync.yaml | 0 .../test_direct_model_request_sync.yaml | 0 ...st_direct_model_request_with_settings.yaml | 0 .../test_model_class_span_names.yaml | 0 ...tream_sync_thread_context_propagation.yaml | 436 +++++ ...multiple_identical_sequential_streams.yaml | 0 .../test_multiple_sequential_streams.yaml | 0 .../cassettes/test_no_model_agent_run.yaml | 0 .../test_no_model_agent_run_with_logfire.yaml | 0 .../test_pydantic_wrapped_completion.yaml | 0 .../test_pydantic_wrapped_stream.yaml | 0 ...st_stream_buffer_pattern_early_return.yaml | 0 ...st_stream_early_break_async_generator.yaml | 314 ++++ .../test_tool_execution_creates_spans.yaml | 0 ..._not_depend_on_message_reconstruction.yaml | 217 +++ .../test_wrapper_agent_run_is_traced.yaml | 111 ++ .../integrations/pydantic_ai/integration.py | 32 + .../integrations/pydantic_ai/patchers.py | 201 +++ .../test_pydantic_ai_integration.py | 458 ++--- .../pydantic_ai}/test_pydantic_ai_logfire.py | 6 + .../test_pydantic_ai_wrap_openai.py | 6 + .../integrations/pydantic_ai/tracing.py | 1478 ++++++++++++++++ py/src/braintrust/wrappers/pydantic_ai.py | 1500 +---------------- 52 files changed, 3662 insertions(+), 1773 deletions(-) create mode 100644 py/src/braintrust/integrations/pydantic_ai/__init__.py rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_async.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream_events.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream_structured_output.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream_sync.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_sync.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_stream_buffer_pattern_early_return.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_stream_early_break.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_structured_output.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_binary_content.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_custom_settings.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_document_input.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_message_history.yaml (100%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_system_prompt_in_metadata.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_tool_execution.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_tools.yaml (100%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request.yaml (100%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_stream.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_stream_complete_output.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_stream_sync.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_sync.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_with_settings.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_model_class_span_names.yaml (100%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_multiple_identical_sequential_streams.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_multiple_sequential_streams.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_no_model_agent_run.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_no_model_agent_run_with_logfire.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_pydantic_wrapped_completion.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_pydantic_wrapped_stream.yaml (100%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_stream_buffer_pattern_early_return.yaml (100%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_tool_execution_creates_spans.yaml (100%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml create mode 100644 py/src/braintrust/integrations/pydantic_ai/integration.py create mode 100644 py/src/braintrust/integrations/pydantic_ai/patchers.py rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/test_pydantic_ai_integration.py (89%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/test_pydantic_ai_logfire.py (94%) rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/test_pydantic_ai_wrap_openai.py (97%) create mode 100644 py/src/braintrust/integrations/pydantic_ai/tracing.py diff --git a/py/noxfile.py b/py/noxfile.py index 4294032e..fc5ab01c 100644 --- a/py/noxfile.py +++ b/py/noxfile.py @@ -124,7 +124,7 @@ def test_pydantic_ai_wrap_openai(session, version): """Test pydantic_ai with wrap_openai() approach - supports older versions.""" _install_test_deps(session) _install(session, "pydantic_ai", version) - _run_tests(session, f"{WRAPPER_DIR}/test_pydantic_ai_wrap_openai.py") + _run_tests(session, f"{INTEGRATION_DIR}/pydantic_ai/test_pydantic_ai_wrap_openai.py") _run_core_tests(session) @@ -137,7 +137,7 @@ def test_pydantic_ai_integration(session, version): session.skip("pydantic_ai integration tests require Python >= 3.10 (pydantic_ai 1.10.0+)") _install_test_deps(session) _install(session, "pydantic_ai", version) - _run_tests(session, f"{WRAPPER_DIR}/test_pydantic_ai_integration.py") + _run_tests(session, f"{INTEGRATION_DIR}/pydantic_ai/test_pydantic_ai_integration.py") _run_core_tests(session) @@ -149,7 +149,7 @@ def test_pydantic_ai_logfire(session): _install_test_deps(session) _install(session, "pydantic_ai") _install(session, "logfire") - _run_tests(session, f"{WRAPPER_DIR}/test_pydantic_ai_logfire.py") + _run_tests(session, f"{INTEGRATION_DIR}/pydantic_ai/test_pydantic_ai_logfire.py") @nox.session() diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py index f40d80a3..02f7e252 100644 --- a/py/src/braintrust/__init__.py +++ b/py/src/braintrust/__init__.py @@ -76,6 +76,9 @@ def is_equal(expected, output): from .integrations.openrouter import ( wrap_openrouter, # noqa: F401 # type: ignore[reportUnusedImport] ) +from .integrations.pydantic_ai import ( + setup_pydantic_ai, # noqa: F401 # type: ignore[reportUnusedImport] +) from .logger import * from .logger import ( _internal_get_global_state, # noqa: F401 # type: ignore[reportUnusedImport] @@ -98,6 +101,3 @@ def is_equal(expected, output): from .wrappers.litellm import ( wrap_litellm, # noqa: F401 # type: ignore[reportUnusedImport] ) -from .wrappers.pydantic_ai import ( - setup_pydantic_ai, # noqa: F401 # type: ignore[reportUnusedImport] -) diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py index 4ede9f96..189ea2f9 100644 --- a/py/src/braintrust/auto.py +++ b/py/src/braintrust/auto.py @@ -16,6 +16,7 @@ DSPyIntegration, GoogleGenAIIntegration, OpenRouterIntegration, + PydanticAIIntegration, ) @@ -124,7 +125,7 @@ def auto_instrument( if litellm: results["litellm"] = _instrument_litellm() if pydantic_ai: - results["pydantic_ai"] = _instrument_pydantic_ai() + results["pydantic_ai"] = _instrument_integration(PydanticAIIntegration) if google_genai: results["google_genai"] = _instrument_integration(GoogleGenAIIntegration) if openrouter: @@ -163,11 +164,3 @@ def _instrument_litellm() -> bool: return patch_litellm() return False - - -def _instrument_pydantic_ai() -> bool: - with _try_patch(): - from braintrust.wrappers.pydantic_ai import setup_pydantic_ai - - return setup_pydantic_ai() - return False diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py index e4e6e208..3331f2a5 100644 --- a/py/src/braintrust/integrations/__init__.py +++ b/py/src/braintrust/integrations/__init__.py @@ -6,6 +6,7 @@ from .dspy import DSPyIntegration from .google_genai import GoogleGenAIIntegration from .openrouter import OpenRouterIntegration +from .pydantic_ai import PydanticAIIntegration __all__ = [ @@ -17,4 +18,5 @@ "DSPyIntegration", "GoogleGenAIIntegration", "OpenRouterIntegration", + "PydanticAIIntegration", ] diff --git a/py/src/braintrust/integrations/base.py b/py/src/braintrust/integrations/base.py index 8abd3320..62e0c8ce 100644 --- a/py/src/braintrust/integrations/base.py +++ b/py/src/braintrust/integrations/base.py @@ -20,6 +20,31 @@ class BasePatcher(ABC): patch_id: ClassVar[str | None] = None version_spec: ClassVar[str | None] = None priority: ClassVar[int] = 100 + rescan_on_setup: ClassVar[bool] = False + + @classmethod + def patch_marker_attr(cls) -> str: + """Return the sentinel attribute used to mark this patcher as applied.""" + suffix = re.sub(r"\W+", "_", cls.identifier()).strip("_") + return f"__braintrust_patched_{suffix}__" + + @classmethod + def has_patch_marker(cls, obj: Any) -> bool: + """Return whether *obj* is marked as patched by this patcher. + + For classes, read ``__dict__`` directly so markers inherited via the + MRO do not make subclasses appear locally patched. + """ + if obj is None: + return False + if isinstance(obj, type): + return bool(obj.__dict__.get(cls.patch_marker_attr(), False)) + return bool(getattr(obj, cls.patch_marker_attr(), False)) + + @classmethod + def mark_patched(cls, obj: Any) -> None: + """Mark an object as patched by this patcher.""" + setattr(obj, cls.patch_marker_attr(), True) @classmethod def identifier(cls) -> str: @@ -44,6 +69,115 @@ def patch(cls, module: Any | None, version: str | None, *, target: Any | None = raise NotImplementedError +class ClassScanPatcher(BasePatcher): + """Base patcher for rescanning and patching discovered class hierarchies.""" + + rescan_on_setup: ClassVar[bool] = True + include_abstract_classes: ClassVar[bool] = False + target_module: ClassVar[str | None] = None + root_class_path: ClassVar[str | None] = None + + @classmethod + def resolve_scan_root(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> Any | None: + """Return the object from which this patcher resolves its root class.""" + if target is not None: + return target + if cls.target_module is not None: + try: + return importlib.import_module(cls.target_module) + except ImportError: + return None + return module + + @classmethod + def iter_root_classes( + cls, + module: Any | None, + version: str | None, + *, + target: Any | None = None, + ) -> Iterable[type[Any]]: + """Yield root classes whose subclass trees should be scanned.""" + if cls.root_class_path is None: + return () + root = cls.resolve_scan_root(module, version, target=target) + if root is None: + return () + root_class = _resolve_attr_path(root, cls.root_class_path) + if root_class is None: + return () + return (root_class,) + + @classmethod + def resolve_root_classes( + cls, + module: Any | None, + version: str | None, + *, + target: Any | None = None, + ) -> tuple[type[Any], ...]: + """Return the currently discoverable root classes for this patcher.""" + try: + return tuple(cls.iter_root_classes(module, version, target=target)) + except ImportError: + return () + + @classmethod + def applies(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + """Return whether any root classes are currently discoverable.""" + return super().applies(module, version, target=target) and bool( + cls.resolve_root_classes(module, version, target=target) + ) + + @classmethod + @abstractmethod + def patch_class(cls, target_class: type[Any]) -> bool | None: + """Patch one discovered class. + + Return ``False`` to skip marking the class as patched. Any other return + value is treated as a successful patch. + """ + raise NotImplementedError + + @classmethod + def iter_classes( + cls, + module: Any | None, + version: str | None, + *, + target: Any | None = None, + ) -> Iterable[type[Any]]: + """Yield discovered subclasses under the configured root classes.""" + + def walk(base_class: type[Any]) -> Iterable[type[Any]]: + for subclass in base_class.__subclasses__(): + if cls.include_abstract_classes or not getattr(subclass, "__abstractmethods__", None): + yield subclass + yield from walk(subclass) + + for root_class in cls.resolve_root_classes(module, version, target=target): + yield from walk(root_class) + + @classmethod + def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + """Return ``True`` when every currently discovered class is patched.""" + classes = tuple(cls.iter_classes(module, version, target=target)) + return bool(classes) and all(cls.has_patch_marker(class_) for class_ in classes) + + @classmethod + def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: + """Patch all newly discovered classes under the configured roots.""" + success = False + for class_ in cls.iter_classes(module, version, target=target): + if cls.has_patch_marker(class_): + continue + if cls.patch_class(class_) is False: + continue + cls.mark_patched(class_) + success = True + return success + + class FunctionWrapperPatcher(BasePatcher): """Base patcher for single-target `wrap_function_wrapper` instrumentation. @@ -125,14 +259,13 @@ def mark_patched(cls, obj: Any) -> None: @classmethod def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: """Return whether this patcher's target has already been instrumented.""" - marker = cls.patch_marker_attr() resolved_target = cls.resolve_target(module, version, target=target) - if resolved_target is not None and getattr(resolved_target, marker, False): + if cls.has_patch_marker(resolved_target): return True # Fall back to checking the root — the marker may live there when the # resolved target does not support setattr (e.g. bound methods). root = cls.resolve_root(module, version, target=target) - if root is not None and root is not resolved_target and getattr(root, marker, False): + if root is not None and root is not resolved_target and cls.has_patch_marker(root): return True return False @@ -152,7 +285,7 @@ def patch(cls, module: Any | None, version: str | None, *, target: Any | None = cls.mark_patched(resolved_target) # If mark_patched could not store the marker on the target (e.g. bound # methods), store it on the root so is_patched() can still find it. - if not getattr(resolved_target, marker, False): + if not cls.has_patch_marker(resolved_target): setattr(root, marker, True) return True @@ -174,8 +307,7 @@ def wrap_target(cls, target: Any) -> Any: ``superseded_by`` has a target that exists on *target*. Returns *target* for convenient chaining. """ - marker = cls.patch_marker_attr() - if getattr(target, marker, False): + if cls.has_patch_marker(target): return target attr = cls.target_path.rsplit(".", 1)[-1] if _resolve_attr_path(target, attr) is None: @@ -241,7 +373,7 @@ def mark_patched(cls, obj: Any) -> None: def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: """Return whether this patcher's replacement class is already installed.""" resolved_target = cls.resolve_target(module, version, target=target) - return bool(resolved_target is not None and getattr(resolved_target, cls.patch_marker_attr(), False)) + return bool(resolved_target is not None and cls.has_patch_marker(resolved_target)) @classmethod def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool: @@ -370,7 +502,7 @@ def setup( for patcher in sorted(selected_patchers, key=lambda patcher: patcher.priority): if not patcher.applies(module, version, target=target): continue - if patcher.is_patched(module, version, target=target): + if not patcher.rescan_on_setup and patcher.is_patched(module, version, target=target): success = True continue success = patcher.patch(module, version, target=target) or success diff --git a/py/src/braintrust/integrations/pydantic_ai/__init__.py b/py/src/braintrust/integrations/pydantic_ai/__init__.py new file mode 100644 index 00000000..141fa667 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/__init__.py @@ -0,0 +1,52 @@ +"""Braintrust integration for Pydantic AI.""" + +import logging + +from braintrust.logger import NOOP_SPAN, current_span, init_logger + +from .integration import PydanticAIIntegration +from .patchers import wrap_agent, wrap_model_classes +from .tracing import ( + wrap_model_request, + wrap_model_request_stream, + wrap_model_request_stream_sync, + wrap_model_request_sync, +) + + +logger = logging.getLogger(__name__) + +__all__ = [ + "PydanticAIIntegration", + "setup_pydantic_ai", + "wrap_agent", + "wrap_model_classes", + "wrap_model_request", + "wrap_model_request_sync", + "wrap_model_request_stream", + "wrap_model_request_stream_sync", +] + + +def setup_pydantic_ai( + api_key: str | None = None, + project_id: str | None = None, + project_name: str | None = None, +) -> bool: + """ + Setup Braintrust integration with Pydantic AI. Will automatically patch Pydantic AI + agents and direct API functions for automatic tracing. + + Args: + api_key: Braintrust API key. + project_id: Braintrust project ID. + project_name: Braintrust project name. + + Returns: + True if setup was successful, False otherwise. + """ + span = current_span() + if span == NOOP_SPAN: + init_logger(project=project_name, api_key=api_key, project_id=project_id) + + return PydanticAIIntegration.setup() diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_async.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_async.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_run_async.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_async.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream_events.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_events.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream_events.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_events.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream_structured_output.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_structured_output.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream_structured_output.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_structured_output.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream_sync.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_sync.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_run_sync.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_sync.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_stream_buffer_pattern_early_return.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_buffer_pattern_early_return.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_stream_buffer_pattern_early_return.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_buffer_pattern_early_return.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_stream_early_break.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_early_break.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_stream_early_break.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_early_break.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_structured_output.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_structured_output.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_structured_output.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_structured_output.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_binary_content.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_binary_content.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_binary_content.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_binary_content.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_custom_settings.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_custom_settings.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_custom_settings.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_custom_settings.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_document_input.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_document_input.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_document_input.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_document_input.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_message_history.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_message_history.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_message_history.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_message_history.yaml diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml new file mode 100644 index 00000000..f8c05e41 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml @@ -0,0 +1,110 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false,"temperature":0.5}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '135' + Content-Type: + - application/json + Cookie: + - _cfuvid=VQg4i_utDK73HtVZX9MnimdbFMrcTwHiGTkj8zvaxBM-1766265730198-0.0.1.1-604800000 + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.73.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DPB5LNj3De8GmyTYjsSCXBus7wf7B\",\n \"object\": + \"chat.completion\",\n \"created\": 1774893183,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Hello! How can I assist you today?\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 9,\n \"completion_tokens\": 9,\n \"total_tokens\": 18,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e48f8ba4ee792c6-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 30 Mar 2026 17:53:04 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '839' + openai-organization: + - braintrust-data + openai-processing-ms: + - '637' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=K5ScKgWxKV8qrun72h6zWqwqzuox1P7HfwixJPaisaU-1774893183.0866485-1.0.1.1-aFlnpoUkbkAngI0favlhLoCOJtcaN7dUO6bYg0g4jfC.HRhU3s_NrZt7oH01lSi39dR_xL9hFmrQs2o5en0gk0jRe0MRJTasLHnGP6o4.yXI0SZeUn56WYYaGKfOKVTx; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Mon, 30 Mar 2026 + 18:23:04 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_1e2852cd7fd34d339ac251eeef0c0487 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml new file mode 100644 index 00000000..1e5de914 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml @@ -0,0 +1,120 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Tell me a story"}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false,"temperature":0.9}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '141' + Content-Type: + - application/json + Cookie: + - _cfuvid=VQg4i_utDK73HtVZX9MnimdbFMrcTwHiGTkj8zvaxBM-1766265730198-0.0.1.1-604800000; + __cf_bm=K5ScKgWxKV8qrun72h6zWqwqzuox1P7HfwixJPaisaU-1774893183.0866485-1.0.1.1-aFlnpoUkbkAngI0favlhLoCOJtcaN7dUO6bYg0g4jfC.HRhU3s_NrZt7oH01lSi39dR_xL9hFmrQs2o5en0gk0jRe0MRJTasLHnGP6o4.yXI0SZeUn56WYYaGKfOKVTx + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.73.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DPB5N7VhlEbWXNCjjIvlcDN3PJnb6\",\n \"object\": + \"chat.completion\",\n \"created\": 1774893185,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"Once upon a time in a quaint little + village nestled between rolling hills and lush green forests, there lived + a young girl named Elara. She was known far and wide for her insatiable curiosity + and adventurous spirit. Every morning, Elara would wander through the village, + her bright blue eyes sparkling with wonder at the world around her.\\n\\nOne + sunny afternoon, while exploring the outskirts of the forest, Elara stumbled + upon a hidden path that she had never seen before. It was overgrown with vines + and flowers, but something about it beckoned her to follow. With each step, + the sounds of the village faded away, replaced by the gentle rustle of leaves + and the distant chirping of birds.\\n\\nAs she ventured deeper into the woods, + the sunlight danced through the trees, creating a magical tapestry of light + and shadow. After walking for what felt like hours, Elara arrived at a clearing + that took her breath away. In the center stood a magnificent tree, its bark + shimmering like silver, with branches\",\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"length\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 11,\n \"completion_tokens\": + 200,\n \"total_tokens\": 211,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e48f8c39839aa98-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Mon, 30 Mar 2026 17:53:09 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '1813' + openai-organization: + - braintrust-data + openai-processing-ms: + - '4441' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_0e199628e6ab42a8bde2da2054693dac + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_system_prompt_in_metadata.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_system_prompt_in_metadata.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_system_prompt_in_metadata.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_system_prompt_in_metadata.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_tool_execution.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tool_execution.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_tool_execution.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tool_execution.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_tools.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tools.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_agent_with_tools.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tools.yaml diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml new file mode 100644 index 00000000..32c50c9d --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml @@ -0,0 +1,112 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '114' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.44.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.15.0 + X-Stainless-Raw-Response: + - 'true' + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: !!binary | + H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Y8W4Vsq4ntSw8tmvTUBuihDwQCQ64kJhSXIFdtjcD/ + XlCyLbkPoBcddnZGM8N9zgCE0WIHQrWSVedt/rbQhW8/lF/iHX69a8q9evOx+fzp6fGdu7kRi8Sg + h0dUfGK9VNR5i2zIjbAKKBmT6vL6aluUq/X61QB0pNEmWuM5LynvjDP5qliVeXGdLzdHdktGYRQ7 + +JYBADwP3+TTafwpdlAsTpMOY5QNit15CUAEsmkiZIwmsnQsFhOoyDG6wfqtAW4x4Au4pR+gpIP3 + MHJgTz0wabl/PecGrPsok3/XWzsDpHPEMuUfXN8fkcPZp6XGB3qIv1FFbZyJbRVQRnLJU2TyYkAP + GcD90Ed/EVH4QJ3niukJh99tRzUxPcKELY9VCSaWdjY/kS7EKo0sjY2zOoWSqkU9MafuZa8NzYBs + FvlPM3/THmMb1/yP/AQohZ5RVz6gNuoy8LQWMJ3ov9bOFQ+GRcTw3Sis2GBIz6Cxlr0dD0fEfWTs + qtq4BoMPZrye2ler7XpdyO3VZiOyQ/YLAAD//wMAbBhxq0sDAAA= + headers: + CF-RAY: + - 9c1afdbedfc4cf0a-SJC + Connection: + - keep-alive + Content-Encoding: + - gzip + Content-Type: + - application/json + Date: + - Thu, 22 Jan 2026 00:38:55 GMT + Server: + - cloudflare + Set-Cookie: + - __cf_bm=.v4HHKHusX6vziKsYW5cyVzZRrAsCxp4XT463GaX0yQ-1769042335-1.0.1.1-InjFtjx7UOJ8ivwZeShYpDg8mc4QGt.4kpoe9GlkrPwH7LBqBZxH.e.oLUSXSkyh_t0ETNUXh6C5G5zGSAXLYT6oNyc6cef0jwB2ADi_S.w; + path=/; expires=Thu, 22-Jan-26 01:08:55 GMT; domain=.api.openai.com; HttpOnly; + Secure; SameSite=None + - _cfuvid=bHlkcNsEuGGe.AQuXN6zbPWK8MJ2dKBjLFcSS263aVQ-1769042335390-0.0.1.1-604800000; + path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + cf-cache-status: + - DYNAMIC + openai-organization: + - braintrust-data + openai-processing-ms: + - '395' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-envoy-upstream-service-time: + - '412' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999997' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_0cc555f0b9354a85a3b0f965716d99de + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request.yaml diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml new file mode 100644 index 00000000..d4ff5c2e --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml @@ -0,0 +1,109 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"What is 2+2? Answer with just the + number."}],"model":"gpt-4o-mini","stream":false}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '121' + Content-Type: + - application/json + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.66.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.24.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DPVzHBFp5i6BHHGWuTsYqBGubpikG\",\n \"object\": + \"chat.completion\",\n \"created\": 1774973531,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"4\",\n \"refusal\": null,\n + \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": + \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 20,\n \"completion_tokens\": + 1,\n \"total_tokens\": 21,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e50a256ab1f178c-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 31 Mar 2026 16:12:11 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '807' + openai-organization: + - braintrust-data + openai-processing-ms: + - '392' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=zLt6TNwkPTB2O.Bp3ceD4Ijrj3biQuLYigEMbvthfOs-1774973530.6689873-1.0.1.1-PL.I8XK.cSEDkAeAeA.SAH05Z2qjjTzvfUfqd4fC3.j0tktm6p0Tqb1.Zwy0776VHR2qgoUpogSKSon8qg2QYY179MO.33I7RVvoQMPznp3dxHHWwIBuPqEqxwJZqbZ2; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 31 Mar 2026 + 16:42:11 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999987' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_c9480ee8e49349b7a68c3a2767779fd3 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_complete_output.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_complete_output.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_complete_output.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_complete_output.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_sync.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_sync.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_sync.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_sync.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_sync.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_with_settings.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_with_settings.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_with_settings.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_with_settings.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_model_class_span_names.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_class_span_names.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_model_class_span_names.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_class_span_names.yaml diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml new file mode 100644 index 00000000..69ecec3d --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml @@ -0,0 +1,436 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '0' + User-Agent: + - python-requests/2.33.1 + method: POST + uri: https://www.braintrust.dev/api/apikey/login + response: + body: + string: '{"org_info":[{"id":"5d7c97d7-fef1-4cb7-bda6-7e3756a0ca8e","name":"braintrustdata.com","api_url":"https://staging-api.braintrust.dev","git_metadata":{"fields":["commit","branch","tag","author_name","author_email","commit_message","commit_time","dirty"],"collect":"some"},"is_universal_api":true,"proxy_url":"https://staging-api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}' + headers: + Access-Control-Allow-Credentials: + - 'true' + Access-Control-Allow-Headers: + - X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5, + Content-Type, Date, X-Api-Version + Access-Control-Allow-Methods: + - GET,OPTIONS,PATCH,DELETE,POST,PUT + Access-Control-Allow-Origin: + - '*' + Cache-Control: + - public, max-age=0, must-revalidate + Content-Length: + - '395' + Content-Security-Policy: + - 'script-src ''self'' ''unsafe-eval'' ''wasm-unsafe-eval'' ''strict-dynamic'' + ''nonce-Y2NiZDlhM2MtNGFlNy00OTE1LWE1YjQtZTI3MzcwOTEzZTgx'' *.js.stripe.com + js.stripe.com maps.googleapis.com ; style-src ''self'' ''unsafe-inline'' *.braintrust.dev + btcm6qilbbhv4yi1.public.blob.vercel-storage.com fonts.googleapis.com www.gstatic.com + d4tuoctqmanu0.cloudfront.net; font-src ''self'' data: fonts.gstatic.com btcm6qilbbhv4yi1.public.blob.vercel-storage.com + cdn.jsdelivr.net d4tuoctqmanu0.cloudfront.net fonts.googleapis.com mintlify-assets.b-cdn.net + fonts.cdnfonts.com; object-src ''none''; base-uri ''self''; form-action ''self''; + frame-ancestors ''self''; worker-src ''self'' blob:; report-uri https://o4507221741076480.ingest.us.sentry.io/api/4507221754380288/security/?sentry_key=27fa5ac907cf7c6ce4a1ab2a03f805b4&sentry_environment=production&sentry_release=16; + report-to csp-endpoint-0' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:13 GMT + Etag: + - '"12n7ok4b5phaz"' + Reporting-Endpoints: + - csp-endpoint-0="https://o4507221741076480.ingest.us.sentry.io/api/4507221754380288/security/?sentry_key=27fa5ac907cf7c6ce4a1ab2a03f805b4&sentry_environment=production&sentry_release=16" + Server: + - Vercel + Strict-Transport-Security: + - max-age=63072000 + X-Bt-Was-Udf-Cached: + - 'true' + X-Clerk-Auth-Message: + - Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid, + token-carrier=header) + X-Clerk-Auth-Reason: + - token-invalid + X-Clerk-Auth-Status: + - signed-out + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Matched-Path: + - /api/apikey/login + X-Nonce: + - Y2NiZDlhM2MtNGFlNy00OTE1LWE1YjQtZTI3MzcwOTEzZTgx + X-Vercel-Cache: + - MISS + X-Vercel-Id: + - yul1::iad1::l88hm-1774893193654-1b03d5ef0879 + status: + code: 200 + message: OK +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.33.1 + method: GET + uri: https://staging-api.braintrust.dev/version + response: + body: + string: '{"version":"1.1.31","date_version":"20260330","ff_version":21,"commit":"7e00d36a24f3ee49ce2f75f19d20386636e5519b","deployment_mode":"lambda","deployment_type":"custom","brainstore_default":"force","brainstore_can_contain_row_refs":true,"skip_pg_config":"all","has_realtime_wal_bucket":true,"brainstore_wal_footer_version":"v3","brainstore_wal_use_efficient_format":true,"has_logs2":true,"js":true,"universal":true,"code_execution":true,"logs3_payload_max_bytes":5242880,"control_plane_telemetry":["status","metrics","logs","traces","memprof","usage"]}' + headers: + Connection: + - keep-alive + Content-Length: + - '551' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:13 GMT + Via: + - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 f4f653453255d3978688a6c5c61be2d6.cloudfront.net + (CloudFront) + X-Amz-Cf-Id: + - VbidRp8Ziuk63w3KD3joP-ZO7fBoIV_oHTYVlsHGQjWuURLgUMZ-Ow== + X-Amz-Cf-Pop: + - IAD55-P9 + - IAD61-P11 + X-Amzn-Trace-Id: + - Root=1-69cab889-1aaac8301c7833580844d4e1;Parent=025690bcec6beb43;Sampled=0;Lineage=1:fc3b4ff1:0 + X-Cache: + - Miss from cloudfront + access-control-allow-credentials: + - 'true' + access-control-expose-headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + etag: + - W/"227-KBfWb2OrI4L6tO+9HRx1pkjhfrY" + vary: + - Origin + x-amz-apigw-id: + - bDHFmGKEIAMEaqg= + x-amzn-Remapped-content-length: + - '551' + x-amzn-RequestId: + - 031b2887-f4fc-4737-ac70-77fa3d42cc96 + x-bt-internal-trace-id: + - 69cab88900000000055a53298b8a9e38 + status: + code: 200 + message: OK +- request: + body: '{"rows": [{"_is_merge": false, "context": {"caller_filename": "/Users/abhijeetprasad/workspace/braintrust-sdk-python/py/.nox/test_pydantic_ai_integration-latest/lib/python3.13/site-packages/pydantic_ai/direct.py", + "caller_functionname": "_consume_async_stream", "caller_lineno": 360}, "created": + "2026-03-30T17:53:13.604954+00:00", "id": "021ecd8c-ef0b-4705-8491-84b7fd7e44a3", + "input": {"instrument": null, "messages": [{"instructions": null, "kind": "request", + "metadata": null, "parts": [{"content": "Hello", "part_kind": "user-prompt", + "timestamp": "2026-03-30 17:53:13.603675+00:00"}], "run_id": null, "timestamp": + null}], "model": "openai:gpt-4o-mini", "model_request_parameters": null, "model_settings": + null}, "log_id": "g", "metadata": {"model": "gpt-4o-mini", "provider": "openai"}, + "metrics": {"start": 1774893193.604952}, "project_id": "test-pydantic-ai-integration", + "root_span_id": "6c05f860-41f9-4d0b-92d2-53a72ea73045", "span_attributes": {"exec_counter": + 91, "name": "model_request_stream", "type": "llm"}, "span_id": "33c192f1-78db-4d9f-89f7-d6f36bc817aa", + "span_parents": ["c568d22b-66c3-4f43-a610-2f3807e92d60"]}], "api_version": 2}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '1152' + User-Agent: + - python-requests/2.33.1 + method: POST + uri: https://staging-api.braintrust.dev/logs3 + response: + body: + string: '{"Code":"ForbiddenError","Message":"Missing read access to project_log + id test-pydantic-ai-integration, or the project_log does not exist [user_email=abhijeet@braintrustdata.com] + [user_org=braintrustdata.com] [timestamp=1774893194.106]","InternalTraceId":"69cab88a000000001216f8994c57459e","Path":"/logs3","Service":"api"}' + headers: + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:14 GMT + Via: + - 1.1 e8ac579de7fc88986153d8653adf92fc.cloudfront.net (CloudFront), 1.1 da473159f6f131ea8035a6279b0f60aa.cloudfront.net + (CloudFront) + X-Amz-Cf-Id: + - MsuUmsZgF5KmuHR5gJnlVeocN-0ut6X8N3L4lrOMghuMxf_EFvJerg== + X-Amz-Cf-Pop: + - IAD55-P9 + - IAD61-P11 + X-Amzn-Trace-Id: + - Root=1-69cab889-2d61f8540b519af25e9769e5;Parent=5b0c1b4a8c31a605;Sampled=0;Lineage=1:fc3b4ff1:0 + X-Cache: + - Error from cloudfront + access-control-allow-credentials: + - 'true' + access-control-expose-headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + content-length: + - '322' + etag: + - W/"142-xcxriWqbDXxHzOZANosR4ILTx3M" + vary: + - Origin, Accept-Encoding + x-amz-apigw-id: + - bDHFnEuqIAMEtgg= + x-amzn-RequestId: + - 3aaa6d41-535f-49d1-b1bd-4da058a30870 + x-bt-internal-trace-id: + - 69cab88a000000001216f8994c57459e + status: + code: 403 + message: Forbidden +- request: + body: '{"rows": [{"_is_merge": true, "id": "cb2a81f1-d1ce-46eb-b71f-48f94cc77709", + "log_id": "g", "metrics": {"completion_tokens": 8.0, "duration": 0.002259969711303711, + "end": 1774893189.5945878, "prompt_cache_creation_tokens": 0.0, "prompt_cached_tokens": + 0.0, "prompt_tokens": 14.0, "start": 1774893189.5923278, "time_to_first_token": + 0.0016760826110839844, "tokens": 22.0}, "output": {"finish_reason": "stop", + "kind": "response", "metadata": null, "model_name": "gpt-4o-mini-2024-07-18", + "parts": [{"content": "1, 2, 3.", "id": null, "part_kind": "text", "provider_details": + null, "provider_name": null}], "provider_details": {"finish_reason": "stop", + "timestamp": "2025-12-20 21:13:35+00:00"}, "provider_name": "openai", "provider_response_id": + "chatcmpl-CoyYZrJy9JYFY664IA2WgzVOsQSmj", "provider_url": "https://api.openai.com/v1/", + "run_id": null, "timestamp": "2026-03-30 17:53:09.593906+00:00", "usage": {"cache_audio_read_tokens": + 0, "cache_read_tokens": 0, "cache_write_tokens": 0, "details": {"accepted_prediction_tokens": + 0, "audio_tokens": 0, "reasoning_tokens": 0, "rejected_prediction_tokens": 0}, + "input_audio_tokens": 0, "input_tokens": 14, "output_audio_tokens": 0, "output_tokens": + 8}}, "project_id": "test-pydantic-ai-integration", "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7", + "span_id": "d47230a5-49a5-4cba-b863-1e479ceac7aa", "span_parents": ["f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e"]},{"_is_merge": + true, "id": "bdcee637-b6ac-4d83-bd0b-d6d5f559b2e0", "log_id": "g", "metrics": + {"completion_tokens": 8.0, "duration": 0.0023889541625976562, "end": 1774893189.59468, + "prompt_cache_creation_tokens": 0.0, "prompt_cached_tokens": 0.0, "prompt_tokens": + 14.0, "start": 1774893189.592291, "time_to_first_token": 0.001712799072265625, + "tokens": 22.0}, "output": {"finish_reason": "stop", "kind": "response", "metadata": + null, "model_name": "gpt-4o-mini-2024-07-18", "parts": [{"content": "1, 2, 3.", + "id": null, "part_kind": "text", "provider_details": null, "provider_name": + null}], "provider_details": {"finish_reason": "stop", "timestamp": "2025-12-20 + 21:13:35+00:00"}, "provider_name": "openai", "provider_response_id": "chatcmpl-CoyYZrJy9JYFY664IA2WgzVOsQSmj", + "provider_url": "https://api.openai.com/v1/", "run_id": null, "timestamp": "2026-03-30 + 17:53:09.593906+00:00", "usage": {"cache_audio_read_tokens": 0, "cache_read_tokens": + 0, "cache_write_tokens": 0, "details": {"accepted_prediction_tokens": 0, "audio_tokens": + 0, "reasoning_tokens": 0, "rejected_prediction_tokens": 0}, "input_audio_tokens": + 0, "input_tokens": 14, "output_audio_tokens": 0, "output_tokens": 8}}, "project_id": + "test-pydantic-ai-integration", "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7", + "span_id": "f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e", "span_parents": ["7b1235d0-d31d-49ed-8df1-6ae33d4dedf7"]}], + "api_version": 2}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '2826' + User-Agent: + - python-requests/2.33.1 + method: POST + uri: https://staging-api.braintrust.dev/logs3 + response: + body: + string: '{"Code":"ForbiddenError","Message":"Missing read access to project_log + id test-pydantic-ai-integration, or the project_log does not exist [user_email=abhijeet@braintrustdata.com] + [user_org=braintrustdata.com] [timestamp=1774893194.229]","InternalTraceId":"69cab88a000000006f8e7e21edeefe09","Path":"/logs3","Service":"api"}' + headers: + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:14 GMT + Via: + - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 bd7551a5260a8bac087bad2ac8ebffec.cloudfront.net + (CloudFront) + X-Amz-Cf-Id: + - U3UXEimOcKVsdV6Lo0Yk8XezNhwfTiQcy__NSvaZ_5xaT8odied-Tw== + X-Amz-Cf-Pop: + - IAD55-P9 + - IAD61-P11 + X-Amzn-Trace-Id: + - Root=1-69cab88a-72db82d1703ec6b9651329df;Parent=40e411d20fc278df;Sampled=0;Lineage=1:fc3b4ff1:0 + X-Cache: + - Error from cloudfront + access-control-allow-credentials: + - 'true' + access-control-expose-headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + content-length: + - '322' + etag: + - W/"142-wDmrkVP8fVfti8Te3LX+0pveJRw" + vary: + - Origin, Accept-Encoding + x-amz-apigw-id: + - bDHFoFWDoAMEbjQ= + x-amzn-RequestId: + - e0b0db62-51f5-451f-8aa4-6e86a0936ec7 + x-bt-internal-trace-id: + - 69cab88a000000006f8e7e21edeefe09 + status: + code: 403 + message: Forbidden +- request: + body: '{"messages":[{"role":"user","content":"Hello"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true}}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '124' + Content-Type: + - application/json + Cookie: + - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000 + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.73.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"lMoGU7R2O"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"O1qW8o"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"VVrBHX6n3h"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5hWyEXX"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nVyWCh4"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"d6to3IExF"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JxzI"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"i6J33cg"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Irdup"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"R65rXtBVaV"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"Bf2lm"} + + + data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":8,"completion_tokens":9,"total_tokens":17,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"OmfK72y6EWIX"} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e48f8fc3b177116-YYZ + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:14 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '403' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=IHGiGf.Mtx_0IgVPmpOyr33nU6wJiuBbWv2RKuFyGQ8-1774893193.6401794-1.0.1.1-_WtR5M3BuFYZr5v6J1YF_vcnLZ95nF_kyCjT5FUTaqbdodV8NBNQct8MkpsPKDu12ElhK3j1RUo3KbOi1PcdyyIXNn0Gv6R_R73LtuYWQFstRviblljvfUx8ryTnWuz0; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Mon, 30 Mar 2026 + 18:23:14 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999995' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_50755602447c427682158e8e30b526a7 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_multiple_identical_sequential_streams.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_identical_sequential_streams.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_multiple_identical_sequential_streams.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_identical_sequential_streams.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_multiple_sequential_streams.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_sequential_streams.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_multiple_sequential_streams.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_sequential_streams.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_no_model_agent_run.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_no_model_agent_run.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_no_model_agent_run_with_logfire.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run_with_logfire.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_no_model_agent_run_with_logfire.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run_with_logfire.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_completion.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_completion.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_completion.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_completion.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_stream.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_stream.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_stream.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_stream.yaml diff --git a/py/src/braintrust/wrappers/cassettes/test_stream_buffer_pattern_early_return.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_buffer_pattern_early_return.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_stream_buffer_pattern_early_return.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_buffer_pattern_early_return.yaml diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml new file mode 100644 index 00000000..d171dce1 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml @@ -0,0 +1,314 @@ +interactions: +- request: + body: null + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + User-Agent: + - python-requests/2.33.1 + method: GET + uri: https://staging-api.braintrust.dev/version + response: + body: + string: '{"version":"1.1.31","date_version":"20260330","ff_version":21,"commit":"7e00d36a24f3ee49ce2f75f19d20386636e5519b","deployment_mode":"lambda","deployment_type":"custom","brainstore_default":"force","brainstore_can_contain_row_refs":true,"skip_pg_config":"all","has_realtime_wal_bucket":true,"brainstore_wal_footer_version":"v3","brainstore_wal_use_efficient_format":true,"has_logs2":true,"js":true,"universal":true,"code_execution":true,"logs3_payload_max_bytes":5242880,"control_plane_telemetry":["status","metrics","logs","traces","memprof","usage"]}' + headers: + Connection: + - keep-alive + Content-Length: + - '551' + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:10 GMT + Via: + - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 0260b26200cba81bc8e0dc18d51916d8.cloudfront.net + (CloudFront) + X-Amz-Cf-Id: + - Rutphtid1FBgIKTDJbyYk8gNAU97BOpNPtX1xDZMaleONeKJ3s38Iw== + X-Amz-Cf-Pop: + - IAD55-P9 + - IAD61-P11 + X-Amzn-Trace-Id: + - Root=1-69cab886-77d423473139f3c74b00cea3;Parent=06a3bc64aa61c425;Sampled=0;Lineage=1:fc3b4ff1:0 + X-Cache: + - Miss from cloudfront + access-control-allow-credentials: + - 'true' + access-control-expose-headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + etag: + - W/"227-KBfWb2OrI4L6tO+9HRx1pkjhfrY" + vary: + - Origin + x-amz-apigw-id: + - bDHFBHTxIAMErhA= + x-amzn-Remapped-content-length: + - '551' + x-amzn-RequestId: + - 51a738b6-ac50-428b-a776-be56711c5922 + x-bt-internal-trace-id: + - 69cab886000000002c579ac6ee72eb06 + status: + code: 200 + message: OK +- request: + body: '{"rows": [{"_is_merge": false, "context": {"caller_filename": "/Users/abhijeetprasad/workspace/braintrust-sdk-python/py/.nox/test_pydantic_ai_integration-latest/lib/python3.13/site-packages/pydantic_ai/direct.py", + "caller_functionname": "_consume_async_stream", "caller_lineno": 360}, "created": + "2026-03-30T17:53:09.592213+00:00", "id": "bdcee637-b6ac-4d83-bd0b-d6d5f559b2e0", + "input": {"instrument": null, "messages": [{"instructions": null, "kind": "request", + "metadata": null, "parts": [{"content": "Count from 1 to 3", "part_kind": "user-prompt", + "timestamp": "2026-03-30 17:53:09.591693+00:00"}], "run_id": null, "timestamp": + null}], "model": "openai:gpt-4o-mini", "model_request_parameters": null, "model_settings": + null}, "log_id": "g", "metadata": {"model": "gpt-4o-mini", "provider": "openai"}, + "metrics": {"start": 1774893189.592212}, "project_id": "test-pydantic-ai-integration", + "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7", "span_attributes": {"exec_counter": + 58, "name": "model_request_stream", "type": "llm"}, "span_id": "f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e", + "span_parents": ["7b1235d0-d31d-49ed-8df1-6ae33d4dedf7"]},{"_is_merge": false, + "context": {"caller_filename": "/Users/abhijeetprasad/workspace/braintrust-sdk-python/py/.nox/test_pydantic_ai_integration-latest/lib/python3.13/site-packages/pydantic_ai/direct.py", + "caller_functionname": "_consume_async_stream", "caller_lineno": 360}, "created": + "2026-03-30T17:53:09.592300+00:00", "id": "cb2a81f1-d1ce-46eb-b71f-48f94cc77709", + "input": {"messages": [{"instructions": null, "kind": "request", "metadata": + null, "parts": [{"content": "Count from 1 to 3", "part_kind": "user-prompt", + "timestamp": "2026-03-30 17:53:09.591693+00:00"}], "run_id": null, "timestamp": + null}]}, "log_id": "g", "metadata": {"model": "gpt-4o-mini", "provider": "openai"}, + "metrics": {"start": 1774893189.592299}, "project_id": "test-pydantic-ai-integration", + "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7", "span_attributes": {"exec_counter": + 59, "name": "chat gpt-4o-mini", "type": "llm"}, "span_id": "d47230a5-49a5-4cba-b863-1e479ceac7aa", + "span_parents": ["f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e"]}], "api_version": 2}' + headers: + Accept: + - '*/*' + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '2186' + User-Agent: + - python-requests/2.33.1 + method: POST + uri: https://staging-api.braintrust.dev/logs3 + response: + body: + string: '{"Code":"ForbiddenError","Message":"Missing read access to project_log + id test-pydantic-ai-integration, or the project_log does not exist [user_email=abhijeet@braintrustdata.com] + [user_org=braintrustdata.com] [timestamp=1774893190.508]","InternalTraceId":"69cab886000000002541a41a062b3821","Path":"/logs3","Service":"api"}' + headers: + Connection: + - keep-alive + Content-Type: + - application/json; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:10 GMT + Via: + - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 c2397f8122d12a766778848b1e46618c.cloudfront.net + (CloudFront) + X-Amz-Cf-Id: + - d73rTtpa6LLDwu7hVQKfmSYL_SSkYfPKbkMviRoaI0PhG9JcQdcc0A== + X-Amz-Cf-Pop: + - IAD55-P9 + - IAD61-P11 + X-Amzn-Trace-Id: + - Root=1-69cab886-4aa46ad25fe765fa582e040e;Parent=796a754107031849;Sampled=0;Lineage=1:fc3b4ff1:0 + X-Cache: + - Error from cloudfront + access-control-allow-credentials: + - 'true' + access-control-expose-headers: + - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms + content-length: + - '322' + etag: + - W/"142-o3U2Y50CsoSuWcGoGfpU85I9aag" + vary: + - Origin, Accept-Encoding + x-amz-apigw-id: + - bDHFCF_-IAMEsQQ= + x-amzn-RequestId: + - 071836c2-6481-4fe4-82c8-63a2f60f3811 + x-bt-internal-trace-id: + - 69cab886000000002541a41a062b3821 + status: + code: 403 + message: Forbidden +- request: + body: '{"messages":[{"role":"user","content":"Count from 1 to 5"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true}}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate + Connection: + - keep-alive + Content-Length: + - '136' + Content-Type: + - application/json + Cookie: + - _cfuvid=VQg4i_utDK73HtVZX9MnimdbFMrcTwHiGTkj8zvaxBM-1766265730198-0.0.1.1-604800000; + __cf_bm=K5ScKgWxKV8qrun72h6zWqwqzuox1P7HfwixJPaisaU-1774893183.0866485-1.0.1.1-aFlnpoUkbkAngI0favlhLoCOJtcaN7dUO6bYg0g4jfC.HRhU3s_NrZt7oH01lSi39dR_xL9hFmrQs2o5en0gk0jRe0MRJTasLHnGP6o4.yXI0SZeUn56WYYaGKfOKVTx + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.73.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.30.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: 'data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GJC9P5OdK"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Sure"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1P1UjOI"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZlSk142ecs"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + Here"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"4yabYK"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"grCdnO7"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + go"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Nhi1EPE9"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"M7DzNi5HGu"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"2IAA3kRV0b"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RhdCx21T32"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"N2k2EsOSJW"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KcDwfb76mt"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"7lR6LaTkCm"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wdhkGWENkw"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Kv82utB0v1"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mr5lS7vtdA"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"s9xOzGrc1a"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"u42PFlFCKS"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"6FjYL9XiZG"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"jJ789GMJk1"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":" + "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xaM26X2ccz"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"VEKM6MTL9y"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DWYxA5bszQ"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"ky8MR"} + + + data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":14,"completion_tokens":21,"total_tokens":35,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"jlxaAs828P"} + + + data: [DONE] + + + ' + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e48f8e32964076d-YYZ + Connection: + - keep-alive + Content-Type: + - text/event-stream; charset=utf-8 + Date: + - Mon, 30 Mar 2026 17:53:10 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + openai-organization: + - braintrust-data + openai-processing-ms: + - '372' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999992' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_9b03c171263a4211b5037eff9b1a4723 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/wrappers/cassettes/test_tool_execution_creates_spans.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_creates_spans.yaml similarity index 100% rename from py/src/braintrust/wrappers/cassettes/test_tool_execution_creates_spans.yaml rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_creates_spans.yaml diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml new file mode 100644 index 00000000..5ecd07ac --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml @@ -0,0 +1,217 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"What''s the weather in Paris?"}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"","parameters":{"additionalProperties":false,"properties":{"city":{"type":"string"}},"required":["city"],"type":"object"},"strict":true}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '372' + Content-Type: + - application/json + Cookie: + - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000; + __cf_bm=epAi6KrcpiRht5_zKqAbs_ZkpcP6bWzSCDoNAhVSQcg-1774973531.7748783-1.0.1.1-eexwuoEVNhxJeFCArlDcDavolPepARox5VsLLlCOfl17u1yyKZAxen8yKFPrew9xF3zVGlK3_FSx59t5p8RKNEk1f83tfxWC6HF_lbbsUvSvr3Wt1mbPvGIdnDbSfyDL + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.66.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.24.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DPVzIVqFFl74aixNWy61pqlKSLtm7\",\n \"object\": + \"chat.completion\",\n \"created\": 1774973532,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": null,\n \"tool_calls\": [\n {\n + \ \"id\": \"call_wcgZ1wAdiVEmHWxtAmhLUoz2\",\n \"type\": + \"function\",\n \"function\": {\n \"name\": \"get_weather\",\n + \ \"arguments\": \"{\\\"city\\\":\\\"Paris\\\"}\"\n }\n + \ }\n ],\n \"refusal\": null,\n \"annotations\": + []\n },\n \"logprobs\": null,\n \"finish_reason\": \"tool_calls\"\n + \ }\n ],\n \"usage\": {\n \"prompt_tokens\": 43,\n \"completion_tokens\": + 14,\n \"total_tokens\": 57,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_e738e3044b\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e50a260ae9436d0-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 31 Mar 2026 16:12:12 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '1084' + openai-organization: + - braintrust-data + openai-processing-ms: + - '527' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999990' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_f52a7793909d4febb7d8282622d2a13c + status: + code: 200 + message: OK +- request: + body: '{"messages":[{"role":"user","content":"What''s the weather in Paris?"},{"role":"assistant","content":null,"tool_calls":[{"id":"call_wcgZ1wAdiVEmHWxtAmhLUoz2","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}]},{"role":"tool","tool_call_id":"call_wcgZ1wAdiVEmHWxtAmhLUoz2","content":"It''s + sunny in Paris"}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"","parameters":{"additionalProperties":false,"properties":{"city":{"type":"string"}},"required":["city"],"type":"object"},"strict":true}}]}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '644' + Content-Type: + - application/json + Cookie: + - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000; + __cf_bm=epAi6KrcpiRht5_zKqAbs_ZkpcP6bWzSCDoNAhVSQcg-1774973531.7748783-1.0.1.1-eexwuoEVNhxJeFCArlDcDavolPepARox5VsLLlCOfl17u1yyKZAxen8yKFPrew9xF3zVGlK3_FSx59t5p8RKNEk1f83tfxWC6HF_lbbsUvSvr3Wt1mbPvGIdnDbSfyDL + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.66.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.24.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DPVzJr38yaFUi6ANuIRoeJAjXIBrO\",\n \"object\": + \"chat.completion\",\n \"created\": 1774973533,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"The weather in Paris is currently sunny.\",\n + \ \"refusal\": null,\n \"annotations\": []\n },\n \"logprobs\": + null,\n \"finish_reason\": \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": + 69,\n \"completion_tokens\": 9,\n \"total_tokens\": 78,\n \"prompt_tokens_details\": + {\n \"cached_tokens\": 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_78db9bf1f6\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e50a264add7c8b2-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 31 Mar 2026 16:12:13 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '846' + openai-organization: + - braintrust-data + openai-processing-ms: + - '429' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999985' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_e05560b30a4e4fabaf1166c1fe903df1 + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml new file mode 100644 index 00000000..aa0b0d07 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml @@ -0,0 +1,111 @@ +interactions: +- request: + body: '{"messages":[{"role":"user","content":"What is 2+2? Answer with just the + number."}],"model":"gpt-4o-mini","max_completion_tokens":50,"stream":false}' + headers: + Accept: + - application/json + Accept-Encoding: + - gzip, deflate, zstd + Connection: + - keep-alive + Content-Length: + - '148' + Content-Type: + - application/json + Cookie: + - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000 + Host: + - api.openai.com + User-Agent: + - pydantic-ai/1.66.0 + X-Stainless-Arch: + - arm64 + X-Stainless-Async: + - async:asyncio + X-Stainless-Lang: + - python + X-Stainless-OS: + - MacOS + X-Stainless-Package-Version: + - 2.24.0 + X-Stainless-Runtime: + - CPython + X-Stainless-Runtime-Version: + - 3.13.3 + x-stainless-read-timeout: + - '600' + x-stainless-retry-count: + - '0' + method: POST + uri: https://api.openai.com/v1/chat/completions + response: + body: + string: "{\n \"id\": \"chatcmpl-DPVzHBw20ZdgFxQ8hDmn74NllhQob\",\n \"object\": + \"chat.completion\",\n \"created\": 1774973531,\n \"model\": \"gpt-4o-mini-2024-07-18\",\n + \ \"choices\": [\n {\n \"index\": 0,\n \"message\": {\n \"role\": + \"assistant\",\n \"content\": \"4\",\n \"refusal\": null,\n + \ \"annotations\": []\n },\n \"logprobs\": null,\n \"finish_reason\": + \"stop\"\n }\n ],\n \"usage\": {\n \"prompt_tokens\": 20,\n \"completion_tokens\": + 1,\n \"total_tokens\": 21,\n \"prompt_tokens_details\": {\n \"cached_tokens\": + 0,\n \"audio_tokens\": 0\n },\n \"completion_tokens_details\": + {\n \"reasoning_tokens\": 0,\n \"audio_tokens\": 0,\n \"accepted_prediction_tokens\": + 0,\n \"rejected_prediction_tokens\": 0\n }\n },\n \"service_tier\": + \"default\",\n \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n" + headers: + CF-Cache-Status: + - DYNAMIC + CF-Ray: + - 9e50a25d9cd94cc4-YYZ + Connection: + - keep-alive + Content-Type: + - application/json + Date: + - Tue, 31 Mar 2026 16:12:12 GMT + Server: + - cloudflare + Strict-Transport-Security: + - max-age=31536000; includeSubDomains; preload + Transfer-Encoding: + - chunked + X-Content-Type-Options: + - nosniff + access-control-expose-headers: + - X-Request-ID + alt-svc: + - h3=":443"; ma=86400 + content-length: + - '807' + openai-organization: + - braintrust-data + openai-processing-ms: + - '377' + openai-project: + - proj_vsCSXafhhByzWOThMrJcZiw9 + openai-version: + - '2020-10-01' + set-cookie: + - __cf_bm=epAi6KrcpiRht5_zKqAbs_ZkpcP6bWzSCDoNAhVSQcg-1774973531.7748783-1.0.1.1-eexwuoEVNhxJeFCArlDcDavolPepARox5VsLLlCOfl17u1yyKZAxen8yKFPrew9xF3zVGlK3_FSx59t5p8RKNEk1f83tfxWC6HF_lbbsUvSvr3Wt1mbPvGIdnDbSfyDL; + HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 31 Mar 2026 + 16:42:12 GMT + x-openai-proxy-wasm: + - v0.1 + x-ratelimit-limit-requests: + - '30000' + x-ratelimit-limit-tokens: + - '150000000' + x-ratelimit-remaining-requests: + - '29999' + x-ratelimit-remaining-tokens: + - '149999987' + x-ratelimit-reset-requests: + - 2ms + x-ratelimit-reset-tokens: + - 0s + x-request-id: + - req_8d78179fe6524c99a93e2eb714045e9c + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/pydantic_ai/integration.py b/py/src/braintrust/integrations/pydantic_ai/integration.py new file mode 100644 index 00000000..64c689ff --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/integration.py @@ -0,0 +1,32 @@ +"""Pydantic AI integration orchestration.""" + +from braintrust.integrations.base import BaseIntegration + +from .patchers import ( + AgentPatcher, + DirectModelRequestPatcher, + DirectModelRequestStreamPatcher, + DirectModelRequestStreamSyncPatcher, + DirectModelRequestSyncPatcher, + DirectPrepareModelPatcher, + StreamedResponseSyncStartProducerPatcher, + ToolManagerFunctionToolPatcher, +) + + +class PydanticAIIntegration(BaseIntegration): + """Braintrust instrumentation for Pydantic AI.""" + + name = "pydantic_ai" + import_names = ("pydantic_ai",) + min_version = "1.10.0" + patchers = ( + StreamedResponseSyncStartProducerPatcher, + AgentPatcher, + DirectPrepareModelPatcher, + DirectModelRequestPatcher, + DirectModelRequestSyncPatcher, + DirectModelRequestStreamPatcher, + DirectModelRequestStreamSyncPatcher, + ToolManagerFunctionToolPatcher, + ) diff --git a/py/src/braintrust/integrations/pydantic_ai/patchers.py b/py/src/braintrust/integrations/pydantic_ai/patchers.py new file mode 100644 index 00000000..0335fcd7 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/patchers.py @@ -0,0 +1,201 @@ +"""Pydantic AI patchers.""" + +import warnings +from typing import Any, ClassVar + +from braintrust.integrations.base import ClassScanPatcher, CompositeFunctionWrapperPatcher, FunctionWrapperPatcher + +from .tracing import ( + _agent_get_model_wrapper, + _agent_run_stream_events_wrapper, + _agent_run_stream_sync_wrapper, + _agent_run_stream_wrapper, + _agent_run_sync_wrapper, + _agent_run_wrapper, + _agent_to_cli_sync_wrapper, + _create_direct_model_request_stream_sync_wrapper, + _create_direct_model_request_stream_wrapper, + _create_direct_model_request_sync_wrapper, + _create_direct_model_request_wrapper, + _create_start_producer_wrapper, + _direct_prepare_model_wrapper, + _tool_manager_call_function_tool_wrapper, + _tool_manager_execute_function_tool_wrapper, + _wrap_concrete_model_class, +) + + +class _AgentRunPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.run" + target_module = "pydantic_ai.agent.abstract" + target_path = "AbstractAgent.run" + wrapper = _agent_run_wrapper + + +class _AgentRunSyncPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.run_sync" + target_module = "pydantic_ai.agent.abstract" + target_path = "AbstractAgent.run_sync" + wrapper = _agent_run_sync_wrapper + + +class _AgentToCliSyncPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.to_cli_sync" + target_module = "pydantic_ai.agent.abstract" + target_path = "AbstractAgent.to_cli_sync" + wrapper = _agent_to_cli_sync_wrapper + + +class _AgentRunStreamPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.run_stream" + target_module = "pydantic_ai.agent.abstract" + target_path = "AbstractAgent.run_stream" + wrapper = _agent_run_stream_wrapper + + +class _AgentRunStreamSyncPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.run_stream_sync" + target_module = "pydantic_ai.agent.abstract" + target_path = "AbstractAgent.run_stream_sync" + wrapper = _agent_run_stream_sync_wrapper + + +class _AgentRunStreamEventsPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.run_stream_events" + target_module = "pydantic_ai.agent.abstract" + target_path = "AbstractAgent.run_stream_events" + wrapper = _agent_run_stream_events_wrapper + + +class _AgentGetModelPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.agent.get_model" + target_module = "pydantic_ai" + target_path = "Agent._get_model" + wrapper = _agent_get_model_wrapper + + +class AgentPatcher(CompositeFunctionWrapperPatcher): + """Patch Pydantic AI agent entrypoints for tracing.""" + + name = "pydantic_ai.agent" + sub_patchers = ( + _AgentRunPatcher, + _AgentRunSyncPatcher, + _AgentToCliSyncPatcher, + _AgentRunStreamPatcher, + _AgentRunStreamSyncPatcher, + _AgentRunStreamEventsPatcher, + _AgentGetModelPatcher, + ) + + +class DirectPrepareModelPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.direct.prepare_model" + target_module = "pydantic_ai.direct" + target_path = "_prepare_model" + wrapper = _direct_prepare_model_wrapper + + +class DirectModelRequestPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.direct.model_request" + target_module = "pydantic_ai.direct" + target_path = "model_request" + wrapper = _create_direct_model_request_wrapper() + + +class DirectModelRequestSyncPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.direct.model_request_sync" + target_module = "pydantic_ai.direct" + target_path = "model_request_sync" + wrapper = _create_direct_model_request_sync_wrapper() + + +class DirectModelRequestStreamPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.direct.model_request_stream" + target_module = "pydantic_ai.direct" + target_path = "model_request_stream" + wrapper = _create_direct_model_request_stream_wrapper() + + +class DirectModelRequestStreamSyncPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.direct.model_request_stream_sync" + target_module = "pydantic_ai.direct" + target_path = "model_request_stream_sync" + wrapper = _create_direct_model_request_stream_sync_wrapper() + + +class StreamedResponseSyncStartProducerPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.direct.streamed_response_sync.start_producer" + target_module = "pydantic_ai.direct" + target_path = "StreamedResponseSync._start_producer" + wrapper = _create_start_producer_wrapper() + priority: ClassVar[int] = 50 + + +class _ToolManagerExecuteFunctionToolPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.tool_manager.execute_function_tool" + target_module = "pydantic_ai._tool_manager" + target_path = "ToolManager._execute_function_tool_call" + wrapper = _tool_manager_execute_function_tool_wrapper + + +class _ToolManagerCallFunctionToolPatcher(FunctionWrapperPatcher): + name = "pydantic_ai.tool_manager.call_function_tool" + target_module = "pydantic_ai._tool_manager" + target_path = "ToolManager._call_function_tool" + wrapper = _tool_manager_call_function_tool_wrapper + superseded_by = (_ToolManagerExecuteFunctionToolPatcher,) + + +class ToolManagerFunctionToolPatcher(CompositeFunctionWrapperPatcher): + name = "pydantic_ai.tool_manager" + sub_patchers = ( + _ToolManagerExecuteFunctionToolPatcher, + _ToolManagerCallFunctionToolPatcher, + ) + + +def wrap_agent(Agent: Any) -> Any: + return AgentPatcher.wrap_target(Agent) + + +class ModelClassesPatcher(ClassScanPatcher): + """Deprecated compatibility fallback for model subclass scanning. + + Normal setup now wraps resolved models via ``Agent._get_model`` and + ``pydantic_ai.direct._prepare_model`` instead of relying on subclass scans. + """ + + name = "pydantic_ai.models" + priority: ClassVar[int] = 200 + target_module = "pydantic_ai.models" + root_class_path = "Model" + + patch_class = staticmethod(_wrap_concrete_model_class) + + +def wrap_model_class(model_class: Any) -> Any: + warnings.warn( + "wrap_model_class() is deprecated and no longer needed for normal setup. " + "setup_pydantic_ai() now wraps models at runtime via model resolution seams.", + DeprecationWarning, + stacklevel=2, + ) + if ModelClassesPatcher.has_patch_marker(model_class): + return model_class + _wrap_concrete_model_class(model_class) + ModelClassesPatcher.mark_patched(model_class) + return model_class + + +def wrap_model_classes() -> bool: + """Deprecated compatibility shim for scanning currently loaded model subclasses.""" + warnings.warn( + "wrap_model_classes() is deprecated and no longer needed. " + "setup_pydantic_ai() now wraps models at runtime via model resolution seams.", + DeprecationWarning, + stacklevel=2, + ) + if not ModelClassesPatcher.applies(None, None): + return False + return ClassScanPatcher.patch.__func__(ModelClassesPatcher, None, None) diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py similarity index 89% rename from py/src/braintrust/wrappers/test_pydantic_ai_integration.py rename to py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py index 5dc1007c..c6142505 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py +++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py @@ -5,6 +5,7 @@ import asyncio import inspect import time +from pathlib import Path import pytest from braintrust import logger, setup_pydantic_ai, traced @@ -22,6 +23,11 @@ TEST_PROMPT = "What is 2+2? Answer with just the number." +@pytest.fixture(scope="module") +def vcr_cassette_dir(): + return str(Path(__file__).resolve().parent / "cassettes") + + @pytest.fixture(scope="module", autouse=True) def setup_wrapper(): """Setup pydantic_ai wrapper before any tests run.""" @@ -67,6 +73,36 @@ def _assert_metrics_are_valid(metrics, start, end): assert metrics["completion_tokens"] > 0 +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_direct_model_request_creates_nested_chat_span_without_class_scan(memory_logger, direct): + """Direct calls should resolve and wrap models via _prepare_model, not class scanning.""" + assert not memory_logger.pop() + + messages = [ModelRequest(parts=[UserPromptPart(content=TEST_PROMPT)])] + + start = time.time() + response = await direct.model_request(model=MODEL, messages=messages) + end = time.time() + + assert response.parts + assert "4" in str(response.parts[0].content) + + spans = memory_logger.pop() + assert len(spans) >= 2, f"Expected at least 2 spans (model_request + chat), got {len(spans)}" + + direct_span = next((s for s in spans if s["span_attributes"]["name"] == "model_request"), None) + chat_span = next((s for s in spans if "chat" in s["span_attributes"]["name"]), None) + + assert direct_span is not None, "model_request span not found" + assert chat_span is not None, "chat span not found" + assert chat_span["span_parents"] == [direct_span["span_id"]] + assert chat_span["metadata"]["model"] == "gpt-4o-mini" + assert chat_span["metadata"]["provider"] == "openai" + _assert_metrics_are_valid(direct_span["metrics"], start, end) + _assert_metrics_are_valid(chat_span["metrics"], start, end) + + @pytest.mark.vcr @pytest.mark.asyncio async def test_agent_run_async(memory_logger): @@ -117,6 +153,37 @@ async def test_agent_run_async(memory_logger): assert agent_span["metrics"]["completion_tokens"] > 0 +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_wrapper_agent_run_is_traced(memory_logger): + """WrapperAgent inherits AbstractAgent methods and should be traced by setup().""" + from pydantic_ai.agent.wrapper import WrapperAgent + + assert not memory_logger.pop() + + wrapped = WrapperAgent(Agent(MODEL, name="wrapped-agent", model_settings=ModelSettings(max_tokens=50))) + + start = time.time() + result = await wrapped.run(TEST_PROMPT) + end = time.time() + + assert result.output + assert "4" in str(result.output) + + spans = memory_logger.pop() + assert len(spans) >= 2, f"Expected at least 2 spans (agent_run + chat), got {len(spans)}" + + agent_span = next((s for s in spans if "agent_run" in s["span_attributes"]["name"]), None) + chat_span = next((s for s in spans if "chat" in s["span_attributes"]["name"]), None) + + assert agent_span is not None, "agent_run span not found" + assert chat_span is not None, "chat span not found" + assert agent_span["span_attributes"]["name"] == "agent_run [wrapped-agent]" + assert chat_span["span_parents"] == [agent_span["span_id"]] + _assert_metrics_are_valid(agent_span["metrics"], start, end) + _assert_metrics_are_valid(chat_span["metrics"], start, end) + + @pytest.mark.vcr def test_agent_run_sync(memory_logger): """Test Agent.run_sync() synchronous method.""" @@ -703,270 +770,6 @@ async def test_direct_model_request_stream_complete_output(memory_logger, direct assert len(spans) >= 1 -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_direct_api_streaming_call_3(memory_logger, direct): - """Test direct API streaming (call 3) - should output complete '1, 2, 3, 4, 5'.""" - assert not memory_logger.pop() - - IDENTICAL_PROMPT = "Count from 1 to 5." - messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])] - - collected_text = "" - async with direct.model_request_stream( - model="openai:gpt-4o", messages=messages, model_settings=ModelSettings(max_tokens=100) - ) as stream: - async for chunk in stream: - # FIX: Handle PartStartEvent which contains initial text - if hasattr(chunk, "part") and hasattr(chunk.part, "content"): - collected_text += str(chunk.part.content) - # Handle PartDeltaEvent with delta content - elif hasattr(chunk, "delta") and chunk.delta: - if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta: - collected_text += chunk.delta.content_delta - - # Now this should pass! - assert "1" in collected_text, f"Expected '1' in output but got: {collected_text}" - assert "2" in collected_text - assert "3" in collected_text - assert "4" in collected_text - assert "5" in collected_text - - -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_direct_api_streaming_call_4(memory_logger, direct): - """Test direct API streaming (call 4) - identical to call 3.""" - assert not memory_logger.pop() - - IDENTICAL_PROMPT = "Count from 1 to 5." - messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])] - - collected_text = "" - async with direct.model_request_stream( - model="openai:gpt-4o", messages=messages, model_settings=ModelSettings(max_tokens=100) - ) as stream: - async for chunk in stream: - # FIX: Handle PartStartEvent which contains initial text - if hasattr(chunk, "part") and hasattr(chunk.part, "content"): - collected_text += str(chunk.part.content) - # Handle PartDeltaEvent with delta content - elif hasattr(chunk, "delta") and chunk.delta: - if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta: - collected_text += chunk.delta.content_delta - - # Now this should pass! - assert "1" in collected_text, f"Expected '1' in output but got: {collected_text}" - - -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_direct_api_streaming_early_break_call_5(memory_logger, direct): - """Test direct API streaming with early break (call 5) - should still get first few chars including '1'.""" - assert not memory_logger.pop() - - IDENTICAL_PROMPT = "Count from 1 to 5." - messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])] - - collected_text = "" - i = 0 - async with direct.model_request_stream( - model="openai:gpt-4o", messages=messages, model_settings=ModelSettings(max_tokens=100) - ) as stream: - async for chunk in stream: - # FIX: Handle PartStartEvent which contains initial text - if hasattr(chunk, "part") and hasattr(chunk.part, "content"): - collected_text += str(chunk.part.content) - # Handle PartDeltaEvent with delta content - elif hasattr(chunk, "delta") and chunk.delta: - if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta: - collected_text += chunk.delta.content_delta - - i += 1 - if i >= 3: - break - - # Even with early break after 3 chunks, we should capture text from PartStartEvent (chunk 1) - print(f"Collected text: '{collected_text}'") - assert len(collected_text) > 0, f"Expected some text even with early break but got empty string" - # Verify we're capturing PartStartEvent by checking we got text before breaking at chunk 3 - assert collected_text, f"Should have captured text from PartStartEvent or first delta" - - -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_direct_api_streaming_no_duplication(memory_logger, direct): - """Test that direct API streaming doesn't duplicate output and captures all text in span.""" - assert not memory_logger.pop() - - collected_text = "" - chunk_count = 0 - - # Use direct API streaming - messages = [ModelRequest(parts=[UserPromptPart(content="Count from 1 to 5, separated by commas.")])] - async with direct.model_request_stream( - messages=messages, - model_settings=ModelSettings(max_tokens=100), - model="openai:gpt-4o", - ) as response: - async for chunk in response: - chunk_count += 1 - # Extract text from chunk - text = None - if hasattr(chunk, "part") and hasattr(chunk.part, "content"): - text = str(chunk.part.content) - elif hasattr(chunk, "delta") and chunk.delta: - if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta: - text = chunk.delta.content_delta - - if text: - collected_text += text - - print(f"Collected text from stream: '{collected_text}'") - print(f"Total chunks: {chunk_count}") - - # Verify we collected complete text - assert len(collected_text) > 0, "Should have collected text from stream" - assert "1" in collected_text, "Should have '1' in output" - - # Check span captured the full output - spans = memory_logger.pop() - assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}" - - # Find the model_request_stream span - stream_span = next((s for s in spans if "model_request_stream" in s["span_attributes"]["name"]), None) - assert stream_span is not None, "model_request_stream span not found" - - # Check that span output contains the full text, not just "1," - span_output = stream_span.get("output", {}) - print(f"Span output: {span_output}") - - # The span should capture the full response - if "response" in span_output and "parts" in span_output["response"]: - parts = span_output["response"]["parts"] - span_text = "".join(str(p.get("content", "")) for p in parts if isinstance(p, dict)) - print(f"Span captured text: '{span_text}'") - # Should have more than just "1," - assert len(span_text) > 2, f"Span should capture more than just '1,', got: '{span_text}'" - assert "1" in span_text, "Span should contain '1'" - - -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_direct_api_streaming_no_duplication_comprehensive(memory_logger, direct): - """Comprehensive test matching golden test setup to verify no duplication and full output capture.""" - assert not memory_logger.pop() - - # Match golden test exactly - IDENTICAL_PROMPT = "Count from 1 to 5." - IDENTICAL_SETTINGS = ModelSettings(max_tokens=100) - - messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])] - - collected_text = "" - chunk_types = [] - seen_delta = False - - async with direct.model_request_stream( - messages=messages, model_settings=IDENTICAL_SETTINGS, model="openai:gpt-4o" - ) as stream: - async for chunk in stream: - # Track chunk types - if hasattr(chunk, "part") and hasattr(chunk.part, "content") and not seen_delta: - chunk_types.append(("PartStartEvent", str(chunk.part.content))) - text = str(chunk.part.content) - collected_text += text - elif hasattr(chunk, "delta") and chunk.delta: - seen_delta = True - if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta: - chunk_types.append(("PartDeltaEvent", chunk.delta.content_delta)) - text = chunk.delta.content_delta - collected_text += text - - print(f"\nCollected text: '{collected_text}'") - print(f"Total chunks received: {len(chunk_types)}") - print(f"All chunk types:") - for i, (chunk_type, content) in enumerate(chunk_types): - print(f" {i}: {chunk_type} = {content!r}") - - # Verify no duplication in collected text - # Expected: "Sure! Here you go:\n\n1, 2, 3, 4, 5." or similar (length ~30) - # Should NOT be duplicated - assert len(collected_text) < 60, ( - f"Text seems duplicated (too long): '{collected_text}' (len={len(collected_text)})" - ) - assert collected_text.count("1, 2, 3") == 1, f"Text should appear once, not duplicated: '{collected_text}'" - - # Check span - spans = memory_logger.pop() - print(f"Number of spans: {len(spans)}") - for i, s in enumerate(spans): - print(f"Span {i}: {s['span_attributes']['name']} (type: {s['span_attributes'].get('type', 'N/A')})") - if "span_parents" in s and s["span_parents"]: - print(f" Parents: {s['span_parents']}") - - # Should have 1 or 2 spans (direct API wrapper + potentially model wrapper) - assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}" - - # Find the model_request_stream span - stream_span = next((s for s in spans if "model_request_stream" in s["span_attributes"]["name"]), None) - assert stream_span is not None, "model_request_stream span not found" - - # Check that span output is not empty and captures reasonable amount of text - span_output = stream_span.get("output", {}) - print(f"Span output keys: {span_output.keys() if span_output else 'None'}") - - if "parts" in span_output: - parts = span_output.get("parts", []) - print(f"Span parts: {parts}") - if parts and len(parts) > 0: - first_part = parts[0] - print(f"First part type: {type(first_part)}") - print(f"First part: {first_part}") - if isinstance(first_part, dict): - part_content = first_part.get("content", "") - print(f"Part content: '{part_content}'") - print(f"Part content length: {len(part_content)}") - # The span should capture the FULL text, not just "1," - assert len(part_content) > 5, f"Span should capture full text, got: '{part_content}'" - - -@pytest.mark.vcr -@pytest.mark.asyncio -async def test_async_generator_pattern_call_6(memory_logger): - """Test async generator pattern (call 6) - wrapping stream in async generator.""" - assert not memory_logger.pop() - - IDENTICAL_PROMPT = "Count from 1 to 5." - - async def stream_with_async_generator(prompt: str): - """Wrap the stream in an async generator (customer pattern).""" - agent = Agent("openai:gpt-4o", model_settings=ModelSettings(max_tokens=100)) - async for event in agent.run_stream_events(prompt): - yield event - - collected_text = "" - i = 0 - async for event in stream_with_async_generator(IDENTICAL_PROMPT): - # run_stream_events returns ResultEvent objects with different structure - # Try to extract text from whatever event type we get - if hasattr(event, "content") and event.content: - collected_text += str(event.content) - elif hasattr(event, "part") and hasattr(event.part, "content"): - collected_text += str(event.part.content) - elif hasattr(event, "delta") and event.delta: - if hasattr(event.delta, "content_delta") and event.delta.content_delta: - collected_text += event.delta.content_delta - - i += 1 - if i >= 3: - break - - # This should capture something - print(f"Collected text from generator: '{collected_text}'") - assert len(collected_text) > 0, f"Expected some text from async generator but got empty string" - - @pytest.mark.vcr @pytest.mark.asyncio async def test_agent_structured_output(memory_logger): @@ -1959,6 +1762,42 @@ def calculate(operation: str, a: float, b: float) -> str: assert calc_tool_span["span_parents"] == [agent_span["span_id"]], "tool span should be nested under agent_run" +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_tool_execution_tracing_does_not_depend_on_message_reconstruction(memory_logger, monkeypatch): + """Real tool execution spans should be emitted even if message reconstruction is unavailable.""" + from braintrust.integrations.pydantic_ai import tracing as pydantic_ai_tracing + + assert not memory_logger.pop() + + def fail_if_called(result): + raise AssertionError("message-based tool span reconstruction should not run") + + monkeypatch.setattr(pydantic_ai_tracing, "_create_tool_spans_from_messages_impl", fail_if_called) + + agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=200)) + + @agent.tool_plain + def get_weather(city: str) -> str: + return f"It's sunny in {city}" + + result = await agent.run("What's the weather in Paris?") + + assert result.output + assert "Paris" in str(result.output) or "sunny" in str(result.output) + + spans = memory_logger.pop() + agent_span = next((s for s in spans if "agent_run" in s["span_attributes"]["name"]), None) + tool_span = next((s for s in spans if s["span_attributes"].get("name") == "get_weather"), None) + + assert agent_span is not None, "agent_run span not found" + assert tool_span is not None, "runtime tool span not found" + assert tool_span["span_attributes"]["type"] == SpanTypeAttribute.TOOL + assert tool_span["span_parents"] == [agent_span["span_id"]] + assert tool_span["metadata"].get("tool_call_id") + assert tool_span["metrics"]["duration"] >= 0 + + @pytest.mark.vcr def test_tool_execution_creates_spans(memory_logger): """Test that executing tools with agents works and creates traced spans.""" @@ -2029,7 +1868,7 @@ def test_agent_tool_metadata_extraction(memory_logger): Principle: If agent.run() accepts it, it goes in input only. """ - from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata + from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=100)) @@ -2110,7 +1949,7 @@ def search_database(query: str, limit: int = 10) -> str: def test_agent_without_tools_metadata(): """Test metadata extraction for agent without tools.""" - from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata + from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata # Agent with no tools agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=50)) @@ -2127,7 +1966,7 @@ def test_agent_without_tools_metadata(): def test_agent_tool_with_custom_name(): """Test that tools with custom names are properly extracted with schemas in input.""" - from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata + from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata agent = Agent(MODEL) @@ -2162,7 +2001,7 @@ def calc(a: int, b: int) -> int: def test_explicit_toolsets_kwarg_in_input(): """Test that explicitly passed toolsets kwarg goes to input (not just metadata).""" - from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata + from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata agent = Agent(MODEL) @@ -2216,7 +2055,7 @@ def test_reasoning_tokens_extraction(memory_logger): mock_response.usage.details.reasoning_tokens = 128 # Test the metric extraction function directly - from braintrust.wrappers.pydantic_ai import _extract_response_metrics + from braintrust.integrations.pydantic_ai.tracing import _extract_response_metrics start_time = time.time() end_time = start_time + 5.0 @@ -2359,14 +2198,85 @@ class name (e.g., 'OpenAIChatModel') rather than str(instance) which _assert_metrics_are_valid(chat_span["metrics"], start, end) +def test_model_classes_patcher_marker_check_is_mro_safe(): + from braintrust.integrations.pydantic_ai.patchers import ModelClassesPatcher + + class WrapperModel: + pass + + class InstrumentedModel(WrapperModel): + pass + + ModelClassesPatcher.mark_patched(WrapperModel) + + assert ModelClassesPatcher.has_patch_marker(WrapperModel) is True + assert ModelClassesPatcher.has_patch_marker(InstrumentedModel) is False + + +def test_wrap_model_class_is_idempotent(): + from braintrust.integrations.pydantic_ai.patchers import ModelClassesPatcher, wrap_model_class + + class DummyModel: + async def request(self, *args, **kwargs): + return None + + def request_stream(self, *args, **kwargs): + return iter(()) + + with pytest.deprecated_call(match=r"wrap_model_class\(\) is deprecated"): + wrap_model_class(DummyModel) + first_request = DummyModel.__dict__["request"] + first_request_stream = DummyModel.__dict__["request_stream"] + + assert ModelClassesPatcher.has_patch_marker(DummyModel) is True + + with pytest.deprecated_call(match=r"wrap_model_class\(\) is deprecated"): + wrap_model_class(DummyModel) + + assert DummyModel.__dict__["request"] is first_request + assert DummyModel.__dict__["request_stream"] is first_request_stream + + +def test_wrap_model_classes_is_deprecated(monkeypatch): + from braintrust.integrations.pydantic_ai.patchers import wrap_model_classes + + monkeypatch.setattr( + "braintrust.integrations.pydantic_ai.patchers.ModelClassesPatcher.applies", lambda *_args, **_kwargs: False + ) + + with pytest.deprecated_call(match=r"wrap_model_classes\(\) is deprecated"): + assert wrap_model_classes() is False + + +def test_setup_pydantic_ai_is_idempotent_across_new_patch_points(): + import pydantic_ai._tool_manager as tool_manager_module + import pydantic_ai.direct as direct_module + from braintrust.integrations.pydantic_ai.integration import PydanticAIIntegration + from pydantic_ai.agent.abstract import AbstractAgent + + run = AbstractAgent.__dict__["run"] + prepare_model = direct_module.__dict__["_prepare_model"] + tool_method_name = ( + "_execute_function_tool_call" + if "_execute_function_tool_call" in tool_manager_module.ToolManager.__dict__ + else "_call_function_tool" + ) + tool_method = tool_manager_module.ToolManager.__dict__[tool_method_name] + + assert PydanticAIIntegration.setup() is True + assert AbstractAgent.__dict__["run"] is run + assert direct_module.__dict__["_prepare_model"] is prepare_model + assert tool_manager_module.ToolManager.__dict__[tool_method_name] is tool_method + + def test_serialize_content_part_with_binary_content(): """Unit test to verify _serialize_content_part handles BinaryContent correctly. This tests the direct serialization of BinaryContent objects and verifies they are converted to Braintrust Attachment objects. """ + from braintrust.integrations.pydantic_ai.tracing import _serialize_content_part from braintrust.logger import Attachment - from braintrust.wrappers.pydantic_ai import _serialize_content_part from pydantic_ai.models.function import BinaryContent # Test 1: Direct BinaryContent serialization @@ -2389,8 +2299,8 @@ def test_serialize_content_part_with_user_prompt_part(): containing BinaryContent, we need to recursively serialize the content items so that BinaryContent is converted to Braintrust Attachment. """ + from braintrust.integrations.pydantic_ai.tracing import _serialize_content_part from braintrust.logger import Attachment - from braintrust.wrappers.pydantic_ai import _serialize_content_part from pydantic_ai.messages import UserPromptPart from pydantic_ai.models.function import BinaryContent @@ -2430,8 +2340,8 @@ def test_serialize_messages_with_binary_content(): This tests the full message serialization path that's used for the chat span, ensuring that nested BinaryContent in UserPromptPart is properly converted. """ + from braintrust.integrations.pydantic_ai.tracing import _serialize_messages from braintrust.logger import Attachment - from braintrust.wrappers.pydantic_ai import _serialize_messages from pydantic_ai.messages import ModelRequest, UserPromptPart from pydantic_ai.models.function import BinaryContent @@ -2488,7 +2398,7 @@ async def test_streaming_wrappers_capture_time_to_first_token(): """ from unittest.mock import AsyncMock, MagicMock, Mock - from braintrust.wrappers.pydantic_ai import ( + from braintrust.integrations.pydantic_ai.tracing import ( _AgentStreamResultSyncProxy, _AgentStreamWrapper, _DirectStreamIteratorProxy, @@ -2846,7 +2756,7 @@ def _async_producer(self): def test_start_producer_wrapper_exception_does_not_double_invoke_producer(): """Regression test: producer exceptions must not trigger a second producer call.""" - from braintrust.wrappers.pydantic_ai import _create_start_producer_wrapper + from braintrust.integrations.pydantic_ai.tracing import _create_start_producer_wrapper class StreamLike: def __init__(self): diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_logfire.py similarity index 94% rename from py/src/braintrust/wrappers/test_pydantic_ai_logfire.py rename to py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_logfire.py index 661b7bf7..6a0caa98 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py +++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_logfire.py @@ -6,6 +6,7 @@ """ import time +from pathlib import Path import pytest from braintrust import logger, setup_pydantic_ai @@ -19,6 +20,11 @@ TEST_PROMPT = "What is 2+2? Answer with just the number." +@pytest.fixture(scope="module") +def vcr_cassette_dir(): + return str(Path(__file__).resolve().parent / "cassettes") + + @pytest.fixture(scope="module", autouse=True) def setup_wrapper(): """Setup pydantic_ai wrapper and logfire before any tests run.""" diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py similarity index 97% rename from py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py rename to py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py index c1dfceb3..3c089f01 100644 --- a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py +++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py @@ -1,4 +1,5 @@ import time +from pathlib import Path from typing import Any, Dict import pytest @@ -27,6 +28,11 @@ TEST_PROMPT = "What is the capital of Italy?" +@pytest.fixture(scope="module") +def vcr_cassette_dir(): + return str(Path(__file__).resolve().parent / "cassettes") + + def get_pydantic_agents_client(model_name: str, client: AsyncOpenAI): _provider = OpenAIProvider(openai_client=client) return OpenAIModelClass(model_name, provider=_provider) diff --git a/py/src/braintrust/integrations/pydantic_ai/tracing.py b/py/src/braintrust/integrations/pydantic_ai/tracing.py new file mode 100644 index 00000000..c4eaf991 --- /dev/null +++ b/py/src/braintrust/integrations/pydantic_ai/tracing.py @@ -0,0 +1,1478 @@ +import asyncio +import contextvars +import logging +import sys +import time +from contextlib import AbstractAsyncContextManager +from typing import Any + +from braintrust.bt_json import bt_safe_deep_copy +from braintrust.logger import Attachment, start_span +from braintrust.span_types import SpanTypeAttribute +from wrapt import wrap_function_wrapper + + +logger = logging.getLogger(__name__) +_tool_trace_state: contextvars.ContextVar[list[int] | None] = contextvars.ContextVar( + "braintrust_pydantic_ai_tool_trace_state", default=None +) + + +def wrap_agent(Agent: Any) -> Any: + from .patchers import AgentPatcher # pylint: disable=import-outside-toplevel + + return AgentPatcher.wrap_target(Agent) + + +def _wrap_model_instance(model: Any) -> Any: + """Ensure a resolved model class is wrapped exactly once.""" + if model is None: + return model + + from .patchers import wrap_model_class # pylint: disable=import-outside-toplevel + + wrap_model_class(type(model)) + return model + + +def _agent_get_model_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + return _wrap_model_instance(wrapped(*args, **kwargs)) + + +def _direct_prepare_model_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + return _wrap_model_instance(wrapped(*args, **kwargs)) + + +def _start_tool_trace_capture() -> Any: + return _tool_trace_state.set([0]) + + +def _reset_tool_trace_capture(token: Any) -> None: + _tool_trace_state.reset(token) + + +def _mark_tool_span_emitted() -> None: + state = _tool_trace_state.get() + if state is not None: + state[0] += 1 + + +def _maybe_create_tool_spans_from_messages(result: Any) -> None: + state = _tool_trace_state.get() + if state is not None and state[0] > 0: + return + _create_tool_spans_from_messages(result) + + +async def _agent_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) + + with start_span( + name=f"agent_run [{instance.name}]" if hasattr(instance, "name") and instance.name else "agent_run", + type=SpanTypeAttribute.LLM, + input=input_data if input_data else None, + metadata=metadata, + ) as agent_span: + tool_trace_token = _start_tool_trace_capture() + try: + start_time = time.time() + result = await wrapped(*args, **kwargs) + end_time = time.time() + + _maybe_create_tool_spans_from_messages(result) + + output = _serialize_result_output(result) + metrics = _extract_usage_metrics(result, start_time, end_time) + + agent_span.log(output=output, metrics=metrics) + return result + finally: + _reset_tool_trace_capture(tool_trace_token) + + +def _agent_run_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) + + with start_span( + name=f"agent_run_sync [{instance.name}]" if hasattr(instance, "name") and instance.name else "agent_run_sync", + type=SpanTypeAttribute.LLM, + input=input_data if input_data else None, + metadata=metadata, + ) as agent_span: + tool_trace_token = _start_tool_trace_capture() + try: + start_time = time.time() + result = wrapped(*args, **kwargs) + end_time = time.time() + + _maybe_create_tool_spans_from_messages(result) + + output = _serialize_result_output(result) + metrics = _extract_usage_metrics(result, start_time, end_time) + + agent_span.log(output=output, metrics=metrics) + return result + finally: + _reset_tool_trace_capture(tool_trace_token) + + +def _agent_to_cli_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) + + with start_span( + name=f"agent_to_cli_sync [{instance.name}]" + if hasattr(instance, "name") and instance.name + else "agent_to_cli_sync", + type=SpanTypeAttribute.LLM, + input=input_data if input_data else None, + metadata=metadata, + ) as agent_span: + start_time = time.time() + result = wrapped(*args, **kwargs) + end_time = time.time() + agent_span.log(metrics={"start": start_time, "end": end_time, "duration": end_time - start_time}) + return result + + +def _agent_run_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) + agent_name = instance.name if hasattr(instance, "name") else None + span_name = f"agent_run_stream [{agent_name}]" if agent_name else "agent_run_stream" + + return _AgentStreamWrapper( + wrapped(*args, **kwargs), + span_name, + input_data, + metadata, + ) + + +def _agent_run_stream_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) + agent_name = instance.name if hasattr(instance, "name") else None + span_name = f"agent_run_stream_sync [{agent_name}]" if agent_name else "agent_run_stream_sync" + + # Create span context BEFORE calling wrapped function so internal spans nest under it + span_cm = start_span( + name=span_name, + type=SpanTypeAttribute.LLM, + input=input_data if input_data else None, + metadata=metadata, + ) + span = span_cm.__enter__() + tool_trace_token = _start_tool_trace_capture() + start_time = time.time() + + try: + # Call the original function within the span context + stream_result = wrapped(*args, **kwargs) + return _AgentStreamResultSyncProxy( + stream_result, + span, + span_cm, + start_time, + tool_trace_token, + ) + except Exception: + # Clean up span on error + _reset_tool_trace_capture(tool_trace_token) + span_cm.__exit__(*sys.exc_info()) + raise + + +async def _agent_run_stream_events_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) + + agent_name = instance.name if hasattr(instance, "name") else None + span_name = f"agent_run_stream_events [{agent_name}]" if agent_name else "agent_run_stream_events" + + with start_span( + name=span_name, + type=SpanTypeAttribute.LLM, + input=input_data if input_data else None, + metadata=metadata, + ) as agent_span: + tool_trace_token = _start_tool_trace_capture() + try: + start_time = time.time() + event_count = 0 + final_result = None + + async for event in wrapped(*args, **kwargs): + event_count += 1 + if hasattr(event, "output"): + final_result = event + yield event + + end_time = time.time() + + if final_result: + _maybe_create_tool_spans_from_messages(final_result) + + output = None + metrics = { + "start": start_time, + "end": end_time, + "duration": end_time - start_time, + "event_count": event_count, + } + + if final_result: + output = _serialize_result_output(final_result) + usage_metrics = _extract_usage_metrics(final_result, start_time, end_time) + metrics.update(usage_metrics) + + agent_span.log(output=output, metrics=metrics) + finally: + _reset_tool_trace_capture(tool_trace_token) + + +def _create_direct_model_request_wrapper(): + """Create wrapper for direct.model_request().""" + + async def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + with start_span( + name="model_request", + type=SpanTypeAttribute.LLM, + input=input_data, + metadata=metadata, + ) as span: + start_time = time.time() + result = await wrapped(*args, **kwargs) + end_time = time.time() + + output = _serialize_model_response(result) + metrics = _extract_response_metrics(result, start_time, end_time) + + span.log(output=output, metrics=metrics) + return result + + return wrapper + + +def _create_direct_model_request_sync_wrapper(): + """Create wrapper for direct.model_request_sync().""" + + def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + with start_span( + name="model_request_sync", + type=SpanTypeAttribute.LLM, + input=input_data, + metadata=metadata, + ) as span: + start_time = time.time() + result = wrapped(*args, **kwargs) + end_time = time.time() + + output = _serialize_model_response(result) + metrics = _extract_response_metrics(result, start_time, end_time) + + span.log(output=output, metrics=metrics) + return result + + return wrapper + + +def _create_direct_model_request_stream_wrapper(): + """Create wrapper for direct.model_request_stream().""" + + def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + return _DirectStreamWrapper( + wrapped(*args, **kwargs), + "model_request_stream", + input_data, + metadata, + ) + + return wrapper + + +def _create_direct_model_request_stream_sync_wrapper(): + """Create wrapper for direct.model_request_stream_sync().""" + + def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + return _DirectStreamWrapperSync( + wrapped(*args, **kwargs), + "model_request_stream_sync", + input_data, + metadata, + ) + + return wrapper + + +def wrap_model_request(original_func: Any) -> Any: + async def wrapper(*args, **kwargs): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + with start_span( + name="model_request", + type=SpanTypeAttribute.LLM, + input=input_data, + metadata=metadata, + ) as span: + start_time = time.time() + result = await original_func(*args, **kwargs) + end_time = time.time() + + output = _serialize_model_response(result) + metrics = _extract_response_metrics(result, start_time, end_time) + + span.log(output=output, metrics=metrics) + return result + + return wrapper + + +def wrap_model_request_sync(original_func: Any) -> Any: + def wrapper(*args, **kwargs): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + with start_span( + name="model_request_sync", + type=SpanTypeAttribute.LLM, + input=input_data, + metadata=metadata, + ) as span: + start_time = time.time() + result = original_func(*args, **kwargs) + end_time = time.time() + + output = _serialize_model_response(result) + metrics = _extract_response_metrics(result, start_time, end_time) + + span.log(output=output, metrics=metrics) + return result + + return wrapper + + +def wrap_model_request_stream(original_func: Any) -> Any: + def wrapper(*args, **kwargs): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + return _DirectStreamWrapper( + original_func(*args, **kwargs), + "model_request_stream", + input_data, + metadata, + ) + + return wrapper + + +def wrap_model_request_stream_sync(original_func: Any) -> Any: + def wrapper(*args, **kwargs): + input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) + + return _DirectStreamWrapperSync( + original_func(*args, **kwargs), + "model_request_stream_sync", + input_data, + metadata, + ) + + return wrapper + + +def _build_model_class_input_and_metadata(instance: Any, args: Any, kwargs: Any): + """Build input data and metadata for model class request wrappers. + + Returns: + Tuple of (model_name, display_name, input_data, metadata) + """ + model_name, provider = _extract_model_info_from_model_instance(instance) + display_name = model_name or type(instance).__name__ + + messages = args[0] if len(args) > 0 else kwargs.get("messages") + model_settings = args[1] if len(args) > 1 else kwargs.get("model_settings") + + serialized_messages = _serialize_messages(messages) + + input_data = {"messages": serialized_messages} + if model_settings is not None: + input_data["model_settings"] = bt_safe_deep_copy(model_settings) + + metadata = _build_model_metadata(model_name, provider, model_settings=None) + + return model_name, display_name, input_data, metadata + + +def _wrap_concrete_model_class(model_class: Any): + """Wrap a concrete model class to trace its request methods.""" + + async def model_request_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs) + + with start_span( + name=f"chat {display_name}", + type=SpanTypeAttribute.LLM, + input=input_data, + metadata=metadata, + ) as span: + start_time = time.time() + result = await wrapped(*args, **kwargs) + end_time = time.time() + + output = _serialize_model_response(result) + metrics = _extract_response_metrics(result, start_time, end_time) + + span.log(output=output, metrics=metrics) + return result + + def model_request_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs) + + return _DirectStreamWrapper( + wrapped(*args, **kwargs), + f"chat {display_name}", + input_data, + metadata, + ) + + wrap_function_wrapper(model_class, "request", model_request_wrapper) + wrap_function_wrapper(model_class, "request_stream", model_request_stream_wrapper) + return model_class + + +class _AgentStreamWrapper(AbstractAsyncContextManager): + """Wrapper for agent.run_stream() that adds tracing while passing through the stream result.""" + + def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any): + self.stream_cm = stream_cm + self.span_name = span_name + self.input_data = input_data + self.metadata = metadata + self.span_cm = None + self.start_time = None + self.stream_result = None + self._enter_task = None + self._first_token_time = None + self._tool_trace_token = None + + async def __aenter__(self): + self._enter_task = asyncio.current_task() + + # Use context manager properly so span stays current + # DON'T pass start_time here - we'll set it via metrics in __aexit__ + self.span_cm = start_span( + name=self.span_name, + type=SpanTypeAttribute.LLM, + input=self.input_data if self.input_data else None, + metadata=self.metadata, + ) + self.span_cm.__enter__() + + # Capture start time right before entering the stream (API call initiation) + self._tool_trace_token = _start_tool_trace_capture() + self.start_time = time.time() + self.stream_result = await self.stream_cm.__aenter__() + + # Wrap the stream result to capture first token time + return _StreamResultProxy(self.stream_result, self) + + async def __aexit__(self, exc_type, exc_val, exc_tb): + try: + await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb) + finally: + if self.span_cm and self.start_time and self.stream_result: + end_time = time.time() + + _maybe_create_tool_spans_from_messages(self.stream_result) + + output = _serialize_stream_output(self.stream_result) + metrics = _extract_stream_usage_metrics( + self.stream_result, self.start_time, end_time, self._first_token_time + ) + self.span_cm.log(output=output, metrics=metrics) + + # Clean up span context + if self.span_cm: + if asyncio.current_task() is self._enter_task: + self.span_cm.__exit__(None, None, None) + else: + self.span_cm.end() + if self._tool_trace_token is not None: + _reset_tool_trace_capture(self._tool_trace_token) + self._tool_trace_token = None + + return False + + +class _StreamResultProxy: + """Proxy for stream result that captures first token time.""" + + def __init__(self, stream_result: Any, wrapper: _AgentStreamWrapper): + self._stream_result = stream_result + self._wrapper = wrapper + + def __getattr__(self, name: str): + """Delegate all attribute access to the wrapped stream result.""" + attr = getattr(self._stream_result, name) + + # Wrap streaming methods to capture first token time + if callable(attr) and name in ("stream_text", "stream_output"): + + async def wrapped_method(*args, **kwargs): + result = attr(*args, **kwargs) + async for item in result: + if self._wrapper._first_token_time is None: + self._wrapper._first_token_time = time.time() + yield item + + return wrapped_method + + return attr + + +class _DirectStreamWrapper(AbstractAsyncContextManager): + """Wrapper for model_request_stream() that adds tracing while passing through the stream.""" + + def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any): + self.stream_cm = stream_cm + self.span_name = span_name + self.input_data = input_data + self.metadata = metadata + self.span_cm = None + self.start_time = None + self.stream = None + self._enter_task = None + self._first_token_time = None + + async def __aenter__(self): + self._enter_task = asyncio.current_task() + + # Use context manager properly so span stays current + # DON'T pass start_time here - we'll set it via metrics in __aexit__ + self.span_cm = start_span( + name=self.span_name, + type=SpanTypeAttribute.LLM, + input=self.input_data if self.input_data else None, + metadata=self.metadata, + ) + self.span_cm.__enter__() + + # Capture start time right before entering the stream (API call initiation) + self.start_time = time.time() + self.stream = await self.stream_cm.__aenter__() + + # Wrap the stream to capture first token time + return _DirectStreamIteratorProxy(self.stream, self) + + async def __aexit__(self, exc_type, exc_val, exc_tb): + try: + await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb) + finally: + if self.span_cm and self.start_time and self.stream: + end_time = time.time() + + try: + final_response = self.stream.get() + output = _serialize_model_response(final_response) + metrics = _extract_response_metrics( + final_response, self.start_time, end_time, self._first_token_time + ) + self.span_cm.log(output=output, metrics=metrics) + except Exception as e: + logger.debug(f"Failed to extract stream output/metrics: {e}") + + # Clean up span context + if self.span_cm: + if asyncio.current_task() is self._enter_task: + self.span_cm.__exit__(None, None, None) + else: + self.span_cm.end() + + return False + + +class _DirectStreamIteratorProxy: + """Proxy for direct stream that captures first token time.""" + + def __init__(self, stream: Any, wrapper: _DirectStreamWrapper): + self._stream = stream + self._wrapper = wrapper + self._iterator = None + + def __getattr__(self, name: str): + """Delegate all attribute access to the wrapped stream.""" + return getattr(self._stream, name) + + def __aiter__(self): + """Return async iterator that captures first token time.""" + # Get the actual async iterator from the stream + self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream + return self + + async def __anext__(self): + """Capture first token time on first iteration.""" + if self._iterator is None: + # In case __aiter__ wasn't called, initialize it + self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream + + item = await self._iterator.__anext__() + if self._wrapper._first_token_time is None: + self._wrapper._first_token_time = time.time() + return item + + +class _AgentStreamResultSyncProxy: + """Proxy for agent.run_stream_sync() result that adds tracing while delegating to actual stream result.""" + + def __init__( + self, + stream_result: Any, + span: Any, + span_cm: Any, + start_time: float, + tool_trace_token: Any = None, + ): + self._stream_result = stream_result + self._span = span + self._span_cm = span_cm + self._start_time = start_time + self._logged = False + self._finalize_on_del = True + self._first_token_time = None + self._tool_trace_token = tool_trace_token + + def __getattr__(self, name: str): + """Delegate all attribute access to the wrapped stream result.""" + attr = getattr(self._stream_result, name) + + # Wrap any method that returns an iterator to auto-finalize when exhausted + if callable(attr) and name in ("stream_text", "stream_output", "__iter__"): + + def wrapped_method(*args, **kwargs): + try: + iterator = attr(*args, **kwargs) + # If it's an iterator, wrap it + if hasattr(iterator, "__iter__") or hasattr(iterator, "__next__"): + try: + for item in iterator: + if self._first_token_time is None: + self._first_token_time = time.time() + yield item + finally: + self._finalize() + self._finalize_on_del = False # Don't finalize again in __del__ + else: + return iterator + except Exception: + self._finalize() + self._finalize_on_del = False + raise + + return wrapped_method + + return attr + + def _finalize(self): + """Log metrics and close span.""" + if self._span and not self._logged and self._stream_result: + try: + end_time = time.time() + + _maybe_create_tool_spans_from_messages(self._stream_result) + + output = _serialize_stream_output(self._stream_result) + metrics = _extract_stream_usage_metrics( + self._stream_result, self._start_time, end_time, self._first_token_time + ) + self._span.log(output=output, metrics=metrics) + self._logged = True + finally: + try: + self._span_cm.__exit__(None, None, None) + except Exception: + pass + if self._tool_trace_token is not None: + _reset_tool_trace_capture(self._tool_trace_token) + self._tool_trace_token = None + + def __del__(self): + """Ensure span is closed when proxy is destroyed.""" + if getattr(self, "_finalize_on_del", False): + self._finalize() + + +class _DirectStreamWrapperSync: + """Wrapper for model_request_stream_sync() that adds tracing while passing through the stream.""" + + def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any): + self.stream_cm = stream_cm + self.span_name = span_name + self.input_data = input_data + self.metadata = metadata + self.span_cm = None + self.start_time = None + self.stream = None + self._first_token_time = None + + def __enter__(self): + # Use context manager properly so span stays current + # DON'T pass start_time here - we'll set it via metrics in __exit__ + self.span_cm = start_span( + name=self.span_name, + type=SpanTypeAttribute.LLM, + input=self.input_data if self.input_data else None, + metadata=self.metadata, + ) + span = self.span_cm.__enter__() + + # Capture start time right before entering the stream (API call initiation) + self.start_time = time.time() + self.stream = self.stream_cm.__enter__() + + # Wrap the stream to capture first token time + return _DirectStreamIteratorSyncProxy(self.stream, self) + + def __exit__(self, exc_type, exc_val, exc_tb): + try: + self.stream_cm.__exit__(exc_type, exc_val, exc_tb) + finally: + if self.span_cm and self.start_time and self.stream: + end_time = time.time() + + try: + final_response = self.stream.get() + output = _serialize_model_response(final_response) + metrics = _extract_response_metrics( + final_response, self.start_time, end_time, self._first_token_time + ) + self.span_cm.log(output=output, metrics=metrics) + except Exception as e: + logger.debug(f"Failed to extract stream output/metrics: {e}") + + # Always clean up span context + if self.span_cm: + self.span_cm.__exit__(None, None, None) + + return False + + +class _DirectStreamIteratorSyncProxy: + """Proxy for direct stream (sync) that captures first token time.""" + + def __init__(self, stream: Any, wrapper: _DirectStreamWrapperSync): + self._stream = stream + self._wrapper = wrapper + self._iterator = None + + def __getattr__(self, name: str): + """Delegate all attribute access to the wrapped stream.""" + return getattr(self._stream, name) + + def __iter__(self): + """Return iterator that captures first token time.""" + # Get the actual iterator from the stream + self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream + return self + + def __next__(self): + """Capture first token time on first iteration.""" + if self._iterator is None: + # In case __iter__ wasn't called, initialize it + self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream + + item = self._iterator.__next__() + if self._wrapper._first_token_time is None: + self._wrapper._first_token_time = time.time() + return item + + +def _extract_tool_call(call_or_validated: Any) -> Any: + if hasattr(call_or_validated, "call"): + return call_or_validated.call + return call_or_validated + + +async def _trace_tool_execution(wrapped: Any, args: Any, kwargs: Any): + call = _extract_tool_call(args[0] if args else kwargs.get("validated") or kwargs.get("call")) + if call is None: + return await wrapped(*args, **kwargs) + + tool_name = getattr(call, "tool_name", None) or "unknown_tool" + tool_call_id = getattr(call, "tool_call_id", None) + + try: + input_data = call.args_as_dict() + except Exception: + input_data = bt_safe_deep_copy(getattr(call, "args", None)) + + metadata = {"tool_call_id": tool_call_id} if tool_call_id else None + + _mark_tool_span_emitted() + with start_span(name=tool_name, type=SpanTypeAttribute.TOOL, input=input_data, metadata=metadata) as tool_span: + start_time = time.time() + result = await wrapped(*args, **kwargs) + end_time = time.time() + tool_span.log( + output=bt_safe_deep_copy(result), + metrics={"start": start_time, "end": end_time, "duration": end_time - start_time}, + ) + return result + + +async def _tool_manager_call_function_tool_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + return await _trace_tool_execution(wrapped, args, kwargs) + + +async def _tool_manager_execute_function_tool_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): + return await _trace_tool_execution(wrapped, args, kwargs) + + +def _create_tool_spans_from_messages(result: Any) -> None: + """ + Create TOOL-type spans from tool call/return message parts in a completed agent result. + + Uses message timestamps from PydanticAI to position spans correctly in the trace: + - start_time = ModelResponse.timestamp (when the model requested the tool call) + - end_time = ModelRequest.timestamp (when the tool result was sent back) + """ + try: + _create_tool_spans_from_messages_impl(result) + except Exception: + pass + + +def _create_tool_spans_from_messages_impl(result: Any) -> None: + from pydantic_ai.messages import ToolCallPart, ToolReturnPart + + messages = result.new_messages() + + returns_by_id: dict[str, tuple[Any, float | None]] = {} + for msg in messages: + if not hasattr(msg, "parts"): + continue + msg_ts = _msg_timestamp(msg) + for part in msg.parts: + if isinstance(part, ToolReturnPart) and hasattr(part, "tool_call_id"): + returns_by_id[part.tool_call_id] = (part, msg_ts) + + for msg in messages: + if not hasattr(msg, "parts"): + continue + call_ts = _msg_timestamp(msg) + for part in msg.parts: + if not isinstance(part, ToolCallPart): + continue + + tool_name = getattr(part, "tool_name", None) or "unknown_tool" + tool_call_id = getattr(part, "tool_call_id", None) + + try: + input_data = part.args_as_dict() + except Exception: + input_data = bt_safe_deep_copy(getattr(part, "args", None)) + + output_data = None + return_ts: float | None = None + if tool_call_id and tool_call_id in returns_by_id: + return_part, return_ts = returns_by_id[tool_call_id] + output_data = bt_safe_deep_copy(getattr(return_part, "content", None)) + + metadata = {} + if tool_call_id: + metadata["tool_call_id"] = tool_call_id + + with start_span( + name=tool_name, + type=SpanTypeAttribute.TOOL, + input=input_data, + start_time=call_ts, + metadata=metadata if metadata else None, + ) as tool_span: + metrics = {} + if call_ts is not None: + metrics["start"] = call_ts + if return_ts is not None: + metrics["end"] = return_ts + if call_ts is not None and return_ts is not None: + metrics["duration"] = return_ts - call_ts + tool_span.log(output=output_data, metrics=metrics if metrics else None) + tool_span.end(end_time=return_ts) + + +def _msg_timestamp(msg: Any) -> float | None: + """Extract epoch-seconds timestamp from a PydanticAI message, or None.""" + ts = getattr(msg, "timestamp", None) + if ts is None: + return None + try: + return ts.timestamp() # datetime → float + except Exception: + return None + + +def _serialize_user_prompt(user_prompt: Any) -> Any: + """Serialize user prompt, handling BinaryContent and other types.""" + if user_prompt is None: + return None + + if isinstance(user_prompt, str): + return user_prompt + + if isinstance(user_prompt, list): + return [_serialize_content_part(part) for part in user_prompt] + + return _serialize_content_part(user_prompt) + + +def _serialize_content_part(part: Any) -> Any: + """Serialize a content part, handling BinaryContent specially. + + This function handles: + - BinaryContent: converts to Braintrust Attachment + - Parts with nested content (UserPromptPart): recursively serializes content items + - Strings: passes through unchanged + - Other objects: converts to dict via model_dump + """ + if part is None: + return None + + if hasattr(part, "data") and hasattr(part, "media_type") and hasattr(part, "kind"): + if part.kind == "binary": + data = part.data + media_type = part.media_type + + extension = media_type.split("/")[1] if "/" in media_type else "bin" + filename = f"file.{extension}" + + attachment = Attachment(data=data, filename=filename, content_type=media_type) + return {"type": "binary", "attachment": attachment, "media_type": media_type} + + if hasattr(part, "content"): + content = part.content + if isinstance(content, list): + serialized_content = [_serialize_content_part(item) for item in content] + result = bt_safe_deep_copy(part) + if isinstance(result, dict): + result["content"] = serialized_content + return result + elif content is not None: + serialized_content = _serialize_content_part(content) + result = bt_safe_deep_copy(part) + if isinstance(result, dict): + result["content"] = serialized_content + return result + + if isinstance(part, str): + return part + + return bt_safe_deep_copy(part) + + +def _serialize_messages(messages: Any) -> Any: + """Serialize messages list.""" + if not messages: + return [] + + result = [] + for msg in messages: + if hasattr(msg, "parts") and msg.parts: + original_parts = msg.parts + serialized_parts = [_serialize_content_part(p) for p in original_parts] + + # Use model_dump with exclude to avoid serializing parts field prematurely + if hasattr(msg, "model_dump"): + try: + serialized_msg = msg.model_dump(exclude={"parts"}, exclude_none=True) + except (TypeError, ValueError): + # If exclude parameter not supported, fall back to bt_safe_deep_copy + serialized_msg = bt_safe_deep_copy(msg) + else: + serialized_msg = bt_safe_deep_copy(msg) + + if isinstance(serialized_msg, dict): + serialized_msg["parts"] = serialized_parts + else: + serialized_msg = bt_safe_deep_copy(msg) + + result.append(serialized_msg) + + return result + + +def _serialize_result_output(result: Any) -> Any: + """Serialize agent run result output.""" + if not result: + return None + + output_dict = {} + + if hasattr(result, "output"): + output_dict["output"] = bt_safe_deep_copy(result.output) + + if hasattr(result, "response"): + output_dict["response"] = _serialize_model_response(result.response) + + return output_dict if output_dict else bt_safe_deep_copy(result) + + +def _serialize_stream_output(stream_result: Any) -> Any: + """Serialize stream result output.""" + if not stream_result: + return None + + output_dict = {} + + if hasattr(stream_result, "response"): + output_dict["response"] = _serialize_model_response(stream_result.response) + + return output_dict if output_dict else None + + +def _serialize_model_response(response: Any) -> Any: + """Serialize a model response.""" + if not response: + return None + + response_dict = bt_safe_deep_copy(response) + + if hasattr(response, "parts") and isinstance(response_dict, dict): + response_dict["parts"] = [_serialize_content_part(p) for p in response.parts] + + return response_dict + + +def _extract_model_info_from_model_instance(model: Any) -> tuple[str | None, str | None]: + """Extract model name and provider from a model instance. + + Args: + model: A Pydantic AI model instance (OpenAIChatModel, AnthropicModel, etc.) + + Returns: + Tuple of (model_name, provider) + """ + if not model: + return None, None + + if isinstance(model, str): + return _parse_model_string(model) + + if hasattr(model, "model_name"): + model_name = model.model_name + class_name = type(model).__name__ + provider = None + if "OpenAI" in class_name: + provider = "openai" + elif "Anthropic" in class_name: + provider = "anthropic" + elif "Gemini" in class_name: + provider = "gemini" + elif "Groq" in class_name: + provider = "groq" + elif "Mistral" in class_name: + provider = "mistral" + elif "VertexAI" in class_name: + provider = "vertexai" + + return model_name, provider + + if hasattr(model, "name"): + return _parse_model_string(model.name) + + return None, None + + +def _extract_model_info(agent: Any) -> tuple[str | None, str | None]: + """Extract model name and provider from agent. + + Args: + agent: A Pydantic AI Agent instance + + Returns: + Tuple of (model_name, provider) + """ + if not hasattr(agent, "model"): + return None, None + + return _extract_model_info_from_model_instance(agent.model) + + +def _build_model_metadata(model_name: str | None, provider: str | None, model_settings: Any = None) -> dict[str, Any]: + """Build metadata dictionary with model info. + + Args: + model_name: The model name (e.g., "gpt-4o") + provider: The provider (e.g., "openai") + model_settings: Optional model settings to include + + Returns: + Dictionary of metadata + """ + metadata = {} + if model_name: + metadata["model"] = model_name + if provider: + metadata["provider"] = provider + if model_settings: + metadata["model_settings"] = bt_safe_deep_copy(model_settings) + return metadata + + +def _parse_model_string(model: Any) -> tuple[str | None, str | None]: + """Parse model string to extract provider and model name. + + Pydantic AI uses format: "provider:model-name" (e.g., "openai:gpt-4o") + """ + if not model: + return None, None + + model_str = str(model) + + if ":" in model_str: + parts = model_str.split(":", 1) + return parts[1], parts[0] # (model_name, provider) + + return model_str, None + + +def _extract_usage_metrics(result: Any, start_time: float, end_time: float) -> dict[str, float] | None: + """Extract usage metrics from agent run result.""" + metrics: dict[str, float] = {} + + metrics["start"] = start_time + metrics["end"] = end_time + metrics["duration"] = end_time - start_time + + usage = None + if hasattr(result, "response"): + try: + response = result.response + if hasattr(response, "usage"): + usage = response.usage + except (AttributeError, ValueError): + pass + + if usage is None and hasattr(result, "usage"): + usage = result.usage + + if usage is None: + return metrics + + if hasattr(usage, "input_tokens"): + input_tokens = usage.input_tokens + if input_tokens is not None: + metrics["prompt_tokens"] = float(input_tokens) + + if hasattr(usage, "output_tokens"): + output_tokens = usage.output_tokens + if output_tokens is not None: + metrics["completion_tokens"] = float(output_tokens) + + if hasattr(usage, "total_tokens"): + total_tokens = usage.total_tokens + if total_tokens is not None: + metrics["tokens"] = float(total_tokens) + + if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: + metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens) + + if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: + metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens) + + if hasattr(usage, "input_audio_tokens") and usage.input_audio_tokens is not None: + metrics["prompt_audio_tokens"] = float(usage.input_audio_tokens) + + if hasattr(usage, "output_audio_tokens") and usage.output_audio_tokens is not None: + metrics["completion_audio_tokens"] = float(usage.output_audio_tokens) + + if hasattr(usage, "details") and isinstance(usage.details, dict): + details = usage.details + + if "reasoning_tokens" in details: + metrics["completion_reasoning_tokens"] = float(details["reasoning_tokens"]) + + if "cached_tokens" in details: + metrics["prompt_cached_tokens"] = float(details["cached_tokens"]) + + return metrics if metrics else None + + +def _extract_stream_usage_metrics( + stream_result: Any, start_time: float, end_time: float, first_token_time: float | None +) -> dict[str, float] | None: + """Extract usage metrics from stream result.""" + metrics: dict[str, float] = {} + + metrics["start"] = start_time + metrics["end"] = end_time + metrics["duration"] = end_time - start_time + + if first_token_time: + metrics["time_to_first_token"] = first_token_time - start_time + + if hasattr(stream_result, "usage"): + usage_func = stream_result.usage + if callable(usage_func): + usage = usage_func() + else: + usage = usage_func + + if usage: + if hasattr(usage, "input_tokens") and usage.input_tokens is not None: + metrics["prompt_tokens"] = float(usage.input_tokens) + + if hasattr(usage, "output_tokens") and usage.output_tokens is not None: + metrics["completion_tokens"] = float(usage.output_tokens) + + if hasattr(usage, "total_tokens") and usage.total_tokens is not None: + metrics["tokens"] = float(usage.total_tokens) + + if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: + metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens) + + if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: + metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens) + + return metrics if metrics else None + + +def _extract_response_metrics( + response: Any, start_time: float, end_time: float, first_token_time: float | None = None +) -> dict[str, float] | None: + """Extract metrics from model response.""" + metrics: dict[str, float] = {} + + metrics["start"] = start_time + metrics["end"] = end_time + metrics["duration"] = end_time - start_time + + if first_token_time: + metrics["time_to_first_token"] = first_token_time - start_time + + if hasattr(response, "usage") and response.usage: + usage = response.usage + + if hasattr(usage, "input_tokens") and usage.input_tokens is not None: + metrics["prompt_tokens"] = float(usage.input_tokens) + + if hasattr(usage, "output_tokens") and usage.output_tokens is not None: + metrics["completion_tokens"] = float(usage.output_tokens) + + if hasattr(usage, "total_tokens") and usage.total_tokens is not None: + metrics["tokens"] = float(usage.total_tokens) + + if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: + metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens) + + if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: + metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens) + + # Extract reasoning tokens for reasoning models (o1/o3) + if hasattr(usage, "details") and usage.details is not None: + if hasattr(usage.details, "reasoning_tokens") and usage.details.reasoning_tokens is not None: + metrics["completion_reasoning_tokens"] = float(usage.details.reasoning_tokens) + + return metrics if metrics else None + + +def _create_start_producer_wrapper(): + """Create wrapper for StreamedResponseSync._start_producer to propagate context. + + StreamedResponseSync._start_producer creates a background thread that doesn't + inherit contextvars. This wrapper ensures Braintrust context flows to that thread + so nested instrumentation (like wrap_openai) creates properly parented spans. + """ + + def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> None: + ctx = contextvars.copy_context() + original_async_producer = instance._async_producer + + def _context_wrapped_async_producer() -> None: + ctx.run(original_async_producer) + + instance._async_producer = _context_wrapped_async_producer + try: + return wrapped(*args, **kwargs) + finally: + instance._async_producer = original_async_producer + + return wrapper + + +def _serialize_type(obj: Any) -> Any: + """Serialize a type/class for logging, handling Pydantic models and other types. + + This is useful for output_type, toolsets, and similar type parameters. + Returns full JSON schema for Pydantic models so engineers can see exactly + what structured output schema was used. + """ + import inspect + + # For sequences of types (like Union types or list of models) + if isinstance(obj, (list, tuple)): + return [_serialize_type(item) for item in obj] + + # Handle Pydantic AI's output wrappers (ToolOutput, NativeOutput, PromptedOutput, TextOutput) + if hasattr(obj, "output"): + # These are wrapper classes with an 'output' field containing the actual type + wrapper_info = {"wrapper": type(obj).__name__} + if hasattr(obj, "name") and obj.name: + wrapper_info["name"] = obj.name + if hasattr(obj, "description") and obj.description: + wrapper_info["description"] = obj.description + wrapper_info["output"] = _serialize_type(obj.output) + return wrapper_info + + # If it's a Pydantic model class, return its full JSON schema + if inspect.isclass(obj): + try: + from pydantic import BaseModel + + if issubclass(obj, BaseModel): + # Return the full JSON schema - includes all field info, descriptions, constraints, etc. + return obj.model_json_schema() + except (ImportError, AttributeError, TypeError): + pass + + # Not a Pydantic model, return class name + return obj.__name__ + + # If it has a __name__ attribute (like functions), use that + if hasattr(obj, "__name__"): + return obj.__name__ + + # Try standard serialization + return bt_safe_deep_copy(obj) + + +def _build_agent_input_and_metadata(args: Any, kwargs: Any, instance: Any) -> tuple[dict[str, Any], dict[str, Any]]: + """Build input data and metadata for agent wrappers. + + Returns: + Tuple of (input_data, metadata) + """ + input_data = {} + + user_prompt = args[0] if len(args) > 0 else kwargs.get("user_prompt") + if user_prompt is not None: + input_data["user_prompt"] = _serialize_user_prompt(user_prompt) + + for key, value in kwargs.items(): + if key == "deps": + continue + elif key == "message_history": + input_data[key] = _serialize_messages(value) if value is not None else None + elif key in ("output_type", "toolsets"): + # These often contain types/classes, use special serialization + input_data[key] = _serialize_type(value) if value is not None else None + elif key == "model_settings": + # model_settings passed to run() goes in INPUT (it's a run() parameter) + input_data[key] = bt_safe_deep_copy(value) if value is not None else None + else: + input_data[key] = bt_safe_deep_copy(value) if value is not None else None + + if "model" in kwargs: + model_name, provider = _parse_model_string(kwargs["model"]) + else: + model_name, provider = _extract_model_info(instance) + + # Extract agent-level configuration for metadata + # Only add to metadata if NOT explicitly passed in kwargs (those go in input) + agent_model_settings = None + if "model_settings" not in kwargs and hasattr(instance, "model_settings") and instance.model_settings is not None: + agent_model_settings = instance.model_settings + + metadata = _build_model_metadata(model_name, provider, agent_model_settings) + + # Extract additional agent configuration (only if not passed as kwargs) + if "name" not in kwargs and hasattr(instance, "name") and instance.name is not None: + metadata["agent_name"] = instance.name + + if "end_strategy" not in kwargs and hasattr(instance, "end_strategy") and instance.end_strategy is not None: + metadata["end_strategy"] = str(instance.end_strategy) + + # Extract output_type if set on agent and not passed as kwarg + # output_type can be a Pydantic model, str, or other types that get converted to JSON schema + if "output_type" not in kwargs and hasattr(instance, "output_type") and instance.output_type is not None: + try: + metadata["output_type"] = _serialize_type(instance.output_type) + except Exception as e: + logger.debug(f"Failed to extract output_type from agent: {e}") + + # Extract toolsets if set on agent and not passed as kwarg + # Toolsets go in INPUT (not metadata) because agent.run() accepts toolsets parameter + if "toolsets" not in kwargs and hasattr(instance, "toolsets"): + try: + toolsets = instance.toolsets + if toolsets: + # Convert toolsets to a list with FULL tool schemas for input + serialized_toolsets = [] + for ts in toolsets: + ts_info = { + "id": getattr(ts, "id", str(type(ts).__name__)), + "label": getattr(ts, "label", None), + } + # Add full tool schemas (not just names) since toolsets can be passed to agent.run() + if hasattr(ts, "tools") and ts.tools: + tools_list = [] + tools_dict = ts.tools + # tools is a dict mapping tool name -> Tool object + for tool_name, tool_obj in tools_dict.items(): + tool_dict = { + "name": tool_name, + } + # Extract description + if hasattr(tool_obj, "description") and tool_obj.description: + tool_dict["description"] = tool_obj.description + # Extract JSON schema for parameters + if hasattr(tool_obj, "function_schema") and hasattr( + tool_obj.function_schema, "json_schema" + ): + tool_dict["parameters"] = tool_obj.function_schema.json_schema + tools_list.append(tool_dict) + ts_info["tools"] = tools_list + serialized_toolsets.append(ts_info) + input_data["toolsets"] = serialized_toolsets + except Exception as e: + logger.debug(f"Failed to extract toolsets from agent: {e}") + + # Extract system_prompt from agent if not passed as kwarg + # Note: system_prompt goes in input (not metadata) because it's semantically part of the LLM input + # Pydantic AI doesn't expose a public API for this, so we access the private _system_prompts + # attribute. This is wrapped in try/except to gracefully handle if the internal structure changes. + if "system_prompt" not in kwargs: + try: + if hasattr(instance, "_system_prompts") and instance._system_prompts: + input_data["system_prompt"] = "\n\n".join(instance._system_prompts) + except Exception as e: + logger.debug(f"Failed to extract system_prompt from agent: {e}") + + return input_data, metadata + + +def _build_direct_model_input_and_metadata(args: Any, kwargs: Any) -> tuple[dict[str, Any], dict[str, Any]]: + """Build input data and metadata for direct model request wrappers. + + Returns: + Tuple of (input_data, metadata) + """ + input_data = {} + + model = args[0] if len(args) > 0 else kwargs.get("model") + if model is not None: + input_data["model"] = str(model) + + messages = args[1] if len(args) > 1 else kwargs.get("messages", []) + if messages: + input_data["messages"] = _serialize_messages(messages) + + for key, value in kwargs.items(): + if key not in ["model", "messages"]: + input_data[key] = bt_safe_deep_copy(value) if value is not None else None + + model_name, provider = _parse_model_string(model) + metadata = _build_model_metadata(model_name, provider) + + return input_data, metadata diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py index e3442b85..86ddd717 100644 --- a/py/src/braintrust/wrappers/pydantic_ai.py +++ b/py/src/braintrust/wrappers/pydantic_ai.py @@ -1,1476 +1,24 @@ -import asyncio -import contextvars -import logging -import sys -import time -from contextlib import AbstractAsyncContextManager -from typing import Any - -from braintrust.bt_json import bt_safe_deep_copy -from braintrust.logger import NOOP_SPAN, Attachment, current_span, init_logger, start_span -from braintrust.span_types import SpanTypeAttribute -from wrapt import wrap_function_wrapper - - -logger = logging.getLogger(__name__) - -__all__ = ["setup_pydantic_ai"] - - -def setup_pydantic_ai( - api_key: str | None = None, - project_id: str | None = None, - project_name: str | None = None, -) -> bool: - """ - Setup Braintrust integration with Pydantic AI. Will automatically patch Pydantic AI Agents and direct API functions for automatic tracing. - - Args: - api_key (Optional[str]): Braintrust API key. - project_id (Optional[str]): Braintrust project ID. - project_name (Optional[str]): Braintrust project name. - - Returns: - bool: True if setup was successful, False otherwise. - """ - span = current_span() - if span == NOOP_SPAN: - init_logger(project=project_name, api_key=api_key, project_id=project_id) - - try: - import pydantic_ai.direct as direct_module - from pydantic_ai import Agent - - Agent = wrap_agent(Agent) - - wrap_function_wrapper(direct_module, "model_request", _create_direct_model_request_wrapper()) - wrap_function_wrapper(direct_module, "model_request_sync", _create_direct_model_request_sync_wrapper()) - wrap_function_wrapper(direct_module, "model_request_stream", _create_direct_model_request_stream_wrapper()) - wrap_function_wrapper( - direct_module, "model_request_stream_sync", _create_direct_model_request_stream_sync_wrapper() - ) - - wrap_model_classes() - - # Patch StreamedResponseSync to propagate context to background threads - try: - if hasattr(direct_module, "StreamedResponseSync"): - wrap_function_wrapper( - direct_module.StreamedResponseSync, "_start_producer", _create_start_producer_wrapper() - ) - logger.debug("Pydantic AI StreamedResponseSync context propagation patching successful") - except Exception as e: - logger.warning(f"Failed to patch StreamedResponseSync context propagation: {e}") - - return True - except ImportError: - # Not installed - this is expected when using auto_instrument() - return False - - -def wrap_agent(Agent: Any) -> Any: - if _is_patched(Agent): - return Agent - - def _ensure_model_wrapped(instance: Any): - """Ensure the agent's model class is wrapped (lazy wrapping).""" - if hasattr(instance, "_model") and instance._model is not None: - model_class = type(instance._model) - _wrap_concrete_model_class(model_class) - - async def agent_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - _ensure_model_wrapped(instance) - input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) - - with start_span( - name=f"agent_run [{instance.name}]" if hasattr(instance, "name") and instance.name else "agent_run", - type=SpanTypeAttribute.LLM, - input=input_data if input_data else None, - metadata=metadata, - ) as agent_span: - start_time = time.time() - result = await wrapped(*args, **kwargs) - end_time = time.time() - - _create_tool_spans_from_messages(result) - - output = _serialize_result_output(result) - metrics = _extract_usage_metrics(result, start_time, end_time) - - agent_span.log(output=output, metrics=metrics) - return result - - wrap_function_wrapper(Agent, "run", agent_run_wrapper) - - def agent_run_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - _ensure_model_wrapped(instance) - input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) - - with start_span( - name=f"agent_run_sync [{instance.name}]" - if hasattr(instance, "name") and instance.name - else "agent_run_sync", - type=SpanTypeAttribute.LLM, - input=input_data if input_data else None, - metadata=metadata, - ) as agent_span: - start_time = time.time() - result = wrapped(*args, **kwargs) - end_time = time.time() - - _create_tool_spans_from_messages(result) - - output = _serialize_result_output(result) - metrics = _extract_usage_metrics(result, start_time, end_time) - - agent_span.log(output=output, metrics=metrics) - return result - - wrap_function_wrapper(Agent, "run_sync", agent_run_sync_wrapper) - - def agent_to_cli_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - _ensure_model_wrapped(instance) - input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) - - with start_span( - name=f"agent_to_cli_sync [{instance.name}]" - if hasattr(instance, "name") and instance.name - else "agent_to_cli_sync", - type=SpanTypeAttribute.LLM, - input=input_data if input_data else None, - metadata=metadata, - ) as agent_span: - start_time = time.time() - result = wrapped(*args, **kwargs) - end_time = time.time() - agent_span.log(metrics={"start": start_time, "end": end_time, "duration": end_time - start_time}) - return result - - wrap_function_wrapper(Agent, "to_cli_sync", agent_to_cli_sync_wrapper) - - def agent_run_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - _ensure_model_wrapped(instance) - input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) - agent_name = instance.name if hasattr(instance, "name") else None - span_name = f"agent_run_stream [{agent_name}]" if agent_name else "agent_run_stream" - - return _AgentStreamWrapper( - wrapped(*args, **kwargs), - span_name, - input_data, - metadata, - ) - - wrap_function_wrapper(Agent, "run_stream", agent_run_stream_wrapper) - - def agent_run_stream_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - _ensure_model_wrapped(instance) - input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) - agent_name = instance.name if hasattr(instance, "name") else None - span_name = f"agent_run_stream_sync [{agent_name}]" if agent_name else "agent_run_stream_sync" - - # Create span context BEFORE calling wrapped function so internal spans nest under it - span_cm = start_span( - name=span_name, - type=SpanTypeAttribute.LLM, - input=input_data if input_data else None, - metadata=metadata, - ) - span = span_cm.__enter__() - start_time = time.time() - - try: - # Call the original function within the span context - stream_result = wrapped(*args, **kwargs) - return _AgentStreamResultSyncProxy( - stream_result, - span, - span_cm, - start_time, - ) - except Exception: - # Clean up span on error - span_cm.__exit__(*sys.exc_info()) - raise - - wrap_function_wrapper(Agent, "run_stream_sync", agent_run_stream_sync_wrapper) - - async def agent_run_stream_events_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - _ensure_model_wrapped(instance) - input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance) - - agent_name = instance.name if hasattr(instance, "name") else None - span_name = f"agent_run_stream_events [{agent_name}]" if agent_name else "agent_run_stream_events" - - with start_span( - name=span_name, - type=SpanTypeAttribute.LLM, - input=input_data if input_data else None, - metadata=metadata, - ) as agent_span: - start_time = time.time() - event_count = 0 - final_result = None - - async for event in wrapped(*args, **kwargs): - event_count += 1 - if hasattr(event, "output"): - final_result = event - yield event - - end_time = time.time() - - if final_result: - _create_tool_spans_from_messages(final_result) - - output = None - metrics = { - "start": start_time, - "end": end_time, - "duration": end_time - start_time, - "event_count": event_count, - } - - if final_result: - output = _serialize_result_output(final_result) - usage_metrics = _extract_usage_metrics(final_result, start_time, end_time) - metrics.update(usage_metrics) - - agent_span.log(output=output, metrics=metrics) - - wrap_function_wrapper(Agent, "run_stream_events", agent_run_stream_events_wrapper) - - Agent._braintrust_patched = True - - return Agent - - -def _create_direct_model_request_wrapper(): - """Create wrapper for direct.model_request().""" - - async def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - with start_span( - name="model_request", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = await wrapped(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - return wrapper - - -def _create_direct_model_request_sync_wrapper(): - """Create wrapper for direct.model_request_sync().""" - - def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - with start_span( - name="model_request_sync", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = wrapped(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - return wrapper - - -def _create_direct_model_request_stream_wrapper(): - """Create wrapper for direct.model_request_stream().""" - - def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - return _DirectStreamWrapper( - wrapped(*args, **kwargs), - "model_request_stream", - input_data, - metadata, - ) - - return wrapper - - -def _create_direct_model_request_stream_sync_wrapper(): - """Create wrapper for direct.model_request_stream_sync().""" - - def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - return _DirectStreamWrapperSync( - wrapped(*args, **kwargs), - "model_request_stream_sync", - input_data, - metadata, - ) - - return wrapper - - -def wrap_model_request(original_func: Any) -> Any: - async def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - with start_span( - name="model_request", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = await original_func(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - return wrapper - - -def wrap_model_request_sync(original_func: Any) -> Any: - def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - with start_span( - name="model_request_sync", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = original_func(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - return wrapper - - -def wrap_model_request_stream(original_func: Any) -> Any: - def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - return _DirectStreamWrapper( - original_func(*args, **kwargs), - "model_request_stream", - input_data, - metadata, - ) - - return wrapper - - -def wrap_model_request_stream_sync(original_func: Any) -> Any: - def wrapper(*args, **kwargs): - input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs) - - return _DirectStreamWrapperSync( - original_func(*args, **kwargs), - "model_request_stream_sync", - input_data, - metadata, - ) - - return wrapper - - -def wrap_model_classes(): - """Wrap Model classes to capture internal model requests made by agents.""" - try: - from pydantic_ai.models import Model - - def wrap_all_subclasses(base_class): - """Recursively wrap all subclasses of a base class.""" - for subclass in base_class.__subclasses__(): - if not getattr(subclass, "__abstractmethods__", None): - try: - _wrap_concrete_model_class(subclass) - except Exception as e: - logger.debug(f"Could not wrap {subclass.__name__}: {e}") - - wrap_all_subclasses(subclass) - - wrap_all_subclasses(Model) - - except Exception as e: - logger.warning(f"Failed to wrap Model classes: {e}") - - -def _build_model_class_input_and_metadata(instance: Any, args: Any, kwargs: Any): - """Build input data and metadata for model class request wrappers. - - Returns: - Tuple of (model_name, display_name, input_data, metadata) - """ - model_name, provider = _extract_model_info_from_model_instance(instance) - display_name = model_name or type(instance).__name__ - - messages = args[0] if len(args) > 0 else kwargs.get("messages") - model_settings = args[1] if len(args) > 1 else kwargs.get("model_settings") - - serialized_messages = _serialize_messages(messages) - - input_data = {"messages": serialized_messages} - if model_settings is not None: - input_data["model_settings"] = bt_safe_deep_copy(model_settings) - - metadata = _build_model_metadata(model_name, provider, model_settings=None) - - return model_name, display_name, input_data, metadata - - -def _wrap_concrete_model_class(model_class: Any): - """Wrap a concrete model class to trace its request methods.""" - if _is_patched(model_class): - return - - async def model_request_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs) - - with start_span( - name=f"chat {display_name}", - type=SpanTypeAttribute.LLM, - input=input_data, - metadata=metadata, - ) as span: - start_time = time.time() - result = await wrapped(*args, **kwargs) - end_time = time.time() - - output = _serialize_model_response(result) - metrics = _extract_response_metrics(result, start_time, end_time) - - span.log(output=output, metrics=metrics) - return result - - def model_request_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any): - model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs) - - return _DirectStreamWrapper( - wrapped(*args, **kwargs), - f"chat {display_name}", - input_data, - metadata, - ) - - wrap_function_wrapper(model_class, "request", model_request_wrapper) - wrap_function_wrapper(model_class, "request_stream", model_request_stream_wrapper) - model_class._braintrust_patched = True - - -class _AgentStreamWrapper(AbstractAsyncContextManager): - """Wrapper for agent.run_stream() that adds tracing while passing through the stream result.""" - - def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any): - self.stream_cm = stream_cm - self.span_name = span_name - self.input_data = input_data - self.metadata = metadata - self.span_cm = None - self.start_time = None - self.stream_result = None - self._enter_task = None - self._first_token_time = None - - async def __aenter__(self): - self._enter_task = asyncio.current_task() - - # Use context manager properly so span stays current - # DON'T pass start_time here - we'll set it via metrics in __aexit__ - self.span_cm = start_span( - name=self.span_name, - type=SpanTypeAttribute.LLM, - input=self.input_data if self.input_data else None, - metadata=self.metadata, - ) - self.span_cm.__enter__() - - # Capture start time right before entering the stream (API call initiation) - self.start_time = time.time() - self.stream_result = await self.stream_cm.__aenter__() - - # Wrap the stream result to capture first token time - return _StreamResultProxy(self.stream_result, self) - - async def __aexit__(self, exc_type, exc_val, exc_tb): - try: - await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb) - finally: - if self.span_cm and self.start_time and self.stream_result: - end_time = time.time() - - _create_tool_spans_from_messages(self.stream_result) - - output = _serialize_stream_output(self.stream_result) - metrics = _extract_stream_usage_metrics( - self.stream_result, self.start_time, end_time, self._first_token_time - ) - self.span_cm.log(output=output, metrics=metrics) - - # Clean up span context - if self.span_cm: - if asyncio.current_task() is self._enter_task: - self.span_cm.__exit__(None, None, None) - else: - self.span_cm.end() - - return False - - -class _StreamResultProxy: - """Proxy for stream result that captures first token time.""" - - def __init__(self, stream_result: Any, wrapper: _AgentStreamWrapper): - self._stream_result = stream_result - self._wrapper = wrapper - - def __getattr__(self, name: str): - """Delegate all attribute access to the wrapped stream result.""" - attr = getattr(self._stream_result, name) - - # Wrap streaming methods to capture first token time - if callable(attr) and name in ("stream_text", "stream_output"): - - async def wrapped_method(*args, **kwargs): - result = attr(*args, **kwargs) - async for item in result: - if self._wrapper._first_token_time is None: - self._wrapper._first_token_time = time.time() - yield item - - return wrapped_method - - return attr - - -class _DirectStreamWrapper(AbstractAsyncContextManager): - """Wrapper for model_request_stream() that adds tracing while passing through the stream.""" - - def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any): - self.stream_cm = stream_cm - self.span_name = span_name - self.input_data = input_data - self.metadata = metadata - self.span_cm = None - self.start_time = None - self.stream = None - self._enter_task = None - self._first_token_time = None - - async def __aenter__(self): - self._enter_task = asyncio.current_task() - - # Use context manager properly so span stays current - # DON'T pass start_time here - we'll set it via metrics in __aexit__ - self.span_cm = start_span( - name=self.span_name, - type=SpanTypeAttribute.LLM, - input=self.input_data if self.input_data else None, - metadata=self.metadata, - ) - self.span_cm.__enter__() - - # Capture start time right before entering the stream (API call initiation) - self.start_time = time.time() - self.stream = await self.stream_cm.__aenter__() - - # Wrap the stream to capture first token time - return _DirectStreamIteratorProxy(self.stream, self) - - async def __aexit__(self, exc_type, exc_val, exc_tb): - try: - await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb) - finally: - if self.span_cm and self.start_time and self.stream: - end_time = time.time() - - try: - final_response = self.stream.get() - output = _serialize_model_response(final_response) - metrics = _extract_response_metrics( - final_response, self.start_time, end_time, self._first_token_time - ) - self.span_cm.log(output=output, metrics=metrics) - except Exception as e: - logger.debug(f"Failed to extract stream output/metrics: {e}") - - # Clean up span context - if self.span_cm: - if asyncio.current_task() is self._enter_task: - self.span_cm.__exit__(None, None, None) - else: - self.span_cm.end() - - return False - - -class _DirectStreamIteratorProxy: - """Proxy for direct stream that captures first token time.""" - - def __init__(self, stream: Any, wrapper: _DirectStreamWrapper): - self._stream = stream - self._wrapper = wrapper - self._iterator = None - - def __getattr__(self, name: str): - """Delegate all attribute access to the wrapped stream.""" - return getattr(self._stream, name) - - def __aiter__(self): - """Return async iterator that captures first token time.""" - # Get the actual async iterator from the stream - self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream - return self - - async def __anext__(self): - """Capture first token time on first iteration.""" - if self._iterator is None: - # In case __aiter__ wasn't called, initialize it - self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream - - item = await self._iterator.__anext__() - if self._wrapper._first_token_time is None: - self._wrapper._first_token_time = time.time() - return item - - -class _AgentStreamResultSyncProxy: - """Proxy for agent.run_stream_sync() result that adds tracing while delegating to actual stream result.""" - - def __init__(self, stream_result: Any, span: Any, span_cm: Any, start_time: float): - self._stream_result = stream_result - self._span = span - self._span_cm = span_cm - self._start_time = start_time - self._logged = False - self._finalize_on_del = True - self._first_token_time = None - - def __getattr__(self, name: str): - """Delegate all attribute access to the wrapped stream result.""" - attr = getattr(self._stream_result, name) - - # Wrap any method that returns an iterator to auto-finalize when exhausted - if callable(attr) and name in ("stream_text", "stream_output", "__iter__"): - - def wrapped_method(*args, **kwargs): - try: - iterator = attr(*args, **kwargs) - # If it's an iterator, wrap it - if hasattr(iterator, "__iter__") or hasattr(iterator, "__next__"): - try: - for item in iterator: - if self._first_token_time is None: - self._first_token_time = time.time() - yield item - finally: - self._finalize() - self._finalize_on_del = False # Don't finalize again in __del__ - else: - return iterator - except Exception: - self._finalize() - self._finalize_on_del = False - raise - - return wrapped_method - - return attr - - def _finalize(self): - """Log metrics and close span.""" - if self._span and not self._logged and self._stream_result: - try: - end_time = time.time() - - _create_tool_spans_from_messages(self._stream_result) - - output = _serialize_stream_output(self._stream_result) - metrics = _extract_stream_usage_metrics( - self._stream_result, self._start_time, end_time, self._first_token_time - ) - self._span.log(output=output, metrics=metrics) - self._logged = True - finally: - try: - self._span_cm.__exit__(None, None, None) - except Exception: - pass - - def __del__(self): - """Ensure span is closed when proxy is destroyed.""" - if self._finalize_on_del: - self._finalize() - - -class _DirectStreamWrapperSync: - """Wrapper for model_request_stream_sync() that adds tracing while passing through the stream.""" - - def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any): - self.stream_cm = stream_cm - self.span_name = span_name - self.input_data = input_data - self.metadata = metadata - self.span_cm = None - self.start_time = None - self.stream = None - self._first_token_time = None - - def __enter__(self): - # Use context manager properly so span stays current - # DON'T pass start_time here - we'll set it via metrics in __exit__ - self.span_cm = start_span( - name=self.span_name, - type=SpanTypeAttribute.LLM, - input=self.input_data if self.input_data else None, - metadata=self.metadata, - ) - span = self.span_cm.__enter__() - - # Capture start time right before entering the stream (API call initiation) - self.start_time = time.time() - self.stream = self.stream_cm.__enter__() - - # Wrap the stream to capture first token time - return _DirectStreamIteratorSyncProxy(self.stream, self) - - def __exit__(self, exc_type, exc_val, exc_tb): - try: - self.stream_cm.__exit__(exc_type, exc_val, exc_tb) - finally: - if self.span_cm and self.start_time and self.stream: - end_time = time.time() - - try: - final_response = self.stream.get() - output = _serialize_model_response(final_response) - metrics = _extract_response_metrics( - final_response, self.start_time, end_time, self._first_token_time - ) - self.span_cm.log(output=output, metrics=metrics) - except Exception as e: - logger.debug(f"Failed to extract stream output/metrics: {e}") - - # Always clean up span context - if self.span_cm: - self.span_cm.__exit__(None, None, None) - - return False - - -class _DirectStreamIteratorSyncProxy: - """Proxy for direct stream (sync) that captures first token time.""" - - def __init__(self, stream: Any, wrapper: _DirectStreamWrapperSync): - self._stream = stream - self._wrapper = wrapper - self._iterator = None - - def __getattr__(self, name: str): - """Delegate all attribute access to the wrapped stream.""" - return getattr(self._stream, name) - - def __iter__(self): - """Return iterator that captures first token time.""" - # Get the actual iterator from the stream - self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream - return self - - def __next__(self): - """Capture first token time on first iteration.""" - if self._iterator is None: - # In case __iter__ wasn't called, initialize it - self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream - - item = self._iterator.__next__() - if self._wrapper._first_token_time is None: - self._wrapper._first_token_time = time.time() - return item - - -def _create_tool_spans_from_messages(result: Any) -> None: - """ - Create TOOL-type spans from tool call/return message parts in a completed agent result. - - Uses message timestamps from PydanticAI to position spans correctly in the trace: - - start_time = ModelResponse.timestamp (when the model requested the tool call) - - end_time = ModelRequest.timestamp (when the tool result was sent back) - """ - try: - _create_tool_spans_from_messages_impl(result) - except Exception: - pass - - -def _create_tool_spans_from_messages_impl(result: Any) -> None: - from pydantic_ai.messages import ToolCallPart, ToolReturnPart - - messages = result.new_messages() - - returns_by_id: dict[str, tuple[Any, float | None]] = {} - for msg in messages: - if not hasattr(msg, "parts"): - continue - msg_ts = _msg_timestamp(msg) - for part in msg.parts: - if isinstance(part, ToolReturnPart) and hasattr(part, "tool_call_id"): - returns_by_id[part.tool_call_id] = (part, msg_ts) - - for msg in messages: - if not hasattr(msg, "parts"): - continue - call_ts = _msg_timestamp(msg) - for part in msg.parts: - if not isinstance(part, ToolCallPart): - continue - - tool_name = getattr(part, "tool_name", None) or "unknown_tool" - tool_call_id = getattr(part, "tool_call_id", None) - - try: - input_data = part.args_as_dict() - except Exception: - input_data = bt_safe_deep_copy(getattr(part, "args", None)) - - output_data = None - return_ts: float | None = None - if tool_call_id and tool_call_id in returns_by_id: - return_part, return_ts = returns_by_id[tool_call_id] - output_data = bt_safe_deep_copy(getattr(return_part, "content", None)) - - metadata = {} - if tool_call_id: - metadata["tool_call_id"] = tool_call_id - - with start_span( - name=tool_name, - type=SpanTypeAttribute.TOOL, - input=input_data, - start_time=call_ts, - metadata=metadata if metadata else None, - ) as tool_span: - metrics = {} - if call_ts is not None: - metrics["start"] = call_ts - if return_ts is not None: - metrics["end"] = return_ts - if call_ts is not None and return_ts is not None: - metrics["duration"] = return_ts - call_ts - tool_span.log(output=output_data, metrics=metrics if metrics else None) - tool_span.end(end_time=return_ts) - - -def _msg_timestamp(msg: Any) -> float | None: - """Extract epoch-seconds timestamp from a PydanticAI message, or None.""" - ts = getattr(msg, "timestamp", None) - if ts is None: - return None - try: - return ts.timestamp() # datetime → float - except Exception: - return None - - -def _serialize_user_prompt(user_prompt: Any) -> Any: - """Serialize user prompt, handling BinaryContent and other types.""" - if user_prompt is None: - return None - - if isinstance(user_prompt, str): - return user_prompt - - if isinstance(user_prompt, list): - return [_serialize_content_part(part) for part in user_prompt] - - return _serialize_content_part(user_prompt) - - -def _serialize_content_part(part: Any) -> Any: - """Serialize a content part, handling BinaryContent specially. - - This function handles: - - BinaryContent: converts to Braintrust Attachment - - Parts with nested content (UserPromptPart): recursively serializes content items - - Strings: passes through unchanged - - Other objects: converts to dict via model_dump - """ - if part is None: - return None - - if hasattr(part, "data") and hasattr(part, "media_type") and hasattr(part, "kind"): - if part.kind == "binary": - data = part.data - media_type = part.media_type - - extension = media_type.split("/")[1] if "/" in media_type else "bin" - filename = f"file.{extension}" - - attachment = Attachment(data=data, filename=filename, content_type=media_type) - return {"type": "binary", "attachment": attachment, "media_type": media_type} - - if hasattr(part, "content"): - content = part.content - if isinstance(content, list): - serialized_content = [_serialize_content_part(item) for item in content] - result = bt_safe_deep_copy(part) - if isinstance(result, dict): - result["content"] = serialized_content - return result - elif content is not None: - serialized_content = _serialize_content_part(content) - result = bt_safe_deep_copy(part) - if isinstance(result, dict): - result["content"] = serialized_content - return result - - if isinstance(part, str): - return part - - return bt_safe_deep_copy(part) - - -def _serialize_messages(messages: Any) -> Any: - """Serialize messages list.""" - if not messages: - return [] - - result = [] - for msg in messages: - if hasattr(msg, "parts") and msg.parts: - original_parts = msg.parts - serialized_parts = [_serialize_content_part(p) for p in original_parts] - - # Use model_dump with exclude to avoid serializing parts field prematurely - if hasattr(msg, "model_dump"): - try: - serialized_msg = msg.model_dump(exclude={"parts"}, exclude_none=True) - except (TypeError, ValueError): - # If exclude parameter not supported, fall back to bt_safe_deep_copy - serialized_msg = bt_safe_deep_copy(msg) - else: - serialized_msg = bt_safe_deep_copy(msg) - - if isinstance(serialized_msg, dict): - serialized_msg["parts"] = serialized_parts - else: - serialized_msg = bt_safe_deep_copy(msg) - - result.append(serialized_msg) - - return result - - -def _serialize_result_output(result: Any) -> Any: - """Serialize agent run result output.""" - if not result: - return None - - output_dict = {} - - if hasattr(result, "output"): - output_dict["output"] = bt_safe_deep_copy(result.output) - - if hasattr(result, "response"): - output_dict["response"] = _serialize_model_response(result.response) - - return output_dict if output_dict else bt_safe_deep_copy(result) - - -def _serialize_stream_output(stream_result: Any) -> Any: - """Serialize stream result output.""" - if not stream_result: - return None - - output_dict = {} - - if hasattr(stream_result, "response"): - output_dict["response"] = _serialize_model_response(stream_result.response) - - return output_dict if output_dict else None - - -def _serialize_model_response(response: Any) -> Any: - """Serialize a model response.""" - if not response: - return None - - response_dict = bt_safe_deep_copy(response) - - if hasattr(response, "parts") and isinstance(response_dict, dict): - response_dict["parts"] = [_serialize_content_part(p) for p in response.parts] - - return response_dict - - -def _extract_model_info_from_model_instance(model: Any) -> tuple[str | None, str | None]: - """Extract model name and provider from a model instance. - - Args: - model: A Pydantic AI model instance (OpenAIChatModel, AnthropicModel, etc.) - - Returns: - Tuple of (model_name, provider) - """ - if not model: - return None, None - - if isinstance(model, str): - return _parse_model_string(model) - - if hasattr(model, "model_name"): - model_name = model.model_name - class_name = type(model).__name__ - provider = None - if "OpenAI" in class_name: - provider = "openai" - elif "Anthropic" in class_name: - provider = "anthropic" - elif "Gemini" in class_name: - provider = "gemini" - elif "Groq" in class_name: - provider = "groq" - elif "Mistral" in class_name: - provider = "mistral" - elif "VertexAI" in class_name: - provider = "vertexai" - - return model_name, provider - - if hasattr(model, "name"): - return _parse_model_string(model.name) - - return None, None - - -def _extract_model_info(agent: Any) -> tuple[str | None, str | None]: - """Extract model name and provider from agent. - - Args: - agent: A Pydantic AI Agent instance - - Returns: - Tuple of (model_name, provider) - """ - if not hasattr(agent, "model"): - return None, None - - return _extract_model_info_from_model_instance(agent.model) - - -def _build_model_metadata(model_name: str | None, provider: str | None, model_settings: Any = None) -> dict[str, Any]: - """Build metadata dictionary with model info. - - Args: - model_name: The model name (e.g., "gpt-4o") - provider: The provider (e.g., "openai") - model_settings: Optional model settings to include - - Returns: - Dictionary of metadata - """ - metadata = {} - if model_name: - metadata["model"] = model_name - if provider: - metadata["provider"] = provider - if model_settings: - metadata["model_settings"] = bt_safe_deep_copy(model_settings) - return metadata - - -def _parse_model_string(model: Any) -> tuple[str | None, str | None]: - """Parse model string to extract provider and model name. - - Pydantic AI uses format: "provider:model-name" (e.g., "openai:gpt-4o") - """ - if not model: - return None, None - - model_str = str(model) - - if ":" in model_str: - parts = model_str.split(":", 1) - return parts[1], parts[0] # (model_name, provider) - - return model_str, None - - -def _extract_usage_metrics(result: Any, start_time: float, end_time: float) -> dict[str, float] | None: - """Extract usage metrics from agent run result.""" - metrics: dict[str, float] = {} - - metrics["start"] = start_time - metrics["end"] = end_time - metrics["duration"] = end_time - start_time - - usage = None - if hasattr(result, "response"): - try: - response = result.response - if hasattr(response, "usage"): - usage = response.usage - except (AttributeError, ValueError): - pass - - if usage is None and hasattr(result, "usage"): - usage = result.usage - - if usage is None: - return metrics - - if hasattr(usage, "input_tokens"): - input_tokens = usage.input_tokens - if input_tokens is not None: - metrics["prompt_tokens"] = float(input_tokens) - - if hasattr(usage, "output_tokens"): - output_tokens = usage.output_tokens - if output_tokens is not None: - metrics["completion_tokens"] = float(output_tokens) - - if hasattr(usage, "total_tokens"): - total_tokens = usage.total_tokens - if total_tokens is not None: - metrics["tokens"] = float(total_tokens) - - if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: - metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens) - - if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: - metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens) - - if hasattr(usage, "input_audio_tokens") and usage.input_audio_tokens is not None: - metrics["prompt_audio_tokens"] = float(usage.input_audio_tokens) - - if hasattr(usage, "output_audio_tokens") and usage.output_audio_tokens is not None: - metrics["completion_audio_tokens"] = float(usage.output_audio_tokens) - - if hasattr(usage, "details") and isinstance(usage.details, dict): - details = usage.details - - if "reasoning_tokens" in details: - metrics["completion_reasoning_tokens"] = float(details["reasoning_tokens"]) - - if "cached_tokens" in details: - metrics["prompt_cached_tokens"] = float(details["cached_tokens"]) - - return metrics if metrics else None - - -def _extract_stream_usage_metrics( - stream_result: Any, start_time: float, end_time: float, first_token_time: float | None -) -> dict[str, float] | None: - """Extract usage metrics from stream result.""" - metrics: dict[str, float] = {} - - metrics["start"] = start_time - metrics["end"] = end_time - metrics["duration"] = end_time - start_time - - if first_token_time: - metrics["time_to_first_token"] = first_token_time - start_time - - if hasattr(stream_result, "usage"): - usage_func = stream_result.usage - if callable(usage_func): - usage = usage_func() - else: - usage = usage_func - - if usage: - if hasattr(usage, "input_tokens") and usage.input_tokens is not None: - metrics["prompt_tokens"] = float(usage.input_tokens) - - if hasattr(usage, "output_tokens") and usage.output_tokens is not None: - metrics["completion_tokens"] = float(usage.output_tokens) - - if hasattr(usage, "total_tokens") and usage.total_tokens is not None: - metrics["tokens"] = float(usage.total_tokens) - - if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: - metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens) - - if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: - metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens) - - return metrics if metrics else None - - -def _extract_response_metrics( - response: Any, start_time: float, end_time: float, first_token_time: float | None = None -) -> dict[str, float] | None: - """Extract metrics from model response.""" - metrics: dict[str, float] = {} - - metrics["start"] = start_time - metrics["end"] = end_time - metrics["duration"] = end_time - start_time - - if first_token_time: - metrics["time_to_first_token"] = first_token_time - start_time - - if hasattr(response, "usage") and response.usage: - usage = response.usage - - if hasattr(usage, "input_tokens") and usage.input_tokens is not None: - metrics["prompt_tokens"] = float(usage.input_tokens) - - if hasattr(usage, "output_tokens") and usage.output_tokens is not None: - metrics["completion_tokens"] = float(usage.output_tokens) - - if hasattr(usage, "total_tokens") and usage.total_tokens is not None: - metrics["tokens"] = float(usage.total_tokens) - - if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None: - metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens) - - if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None: - metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens) - - # Extract reasoning tokens for reasoning models (o1/o3) - if hasattr(usage, "details") and usage.details is not None: - if hasattr(usage.details, "reasoning_tokens") and usage.details.reasoning_tokens is not None: - metrics["completion_reasoning_tokens"] = float(usage.details.reasoning_tokens) - - return metrics if metrics else None - - -def _create_start_producer_wrapper(): - """Create wrapper for StreamedResponseSync._start_producer to propagate context. - - StreamedResponseSync._start_producer creates a background thread that doesn't - inherit contextvars. This wrapper ensures Braintrust context flows to that thread - so nested instrumentation (like wrap_openai) creates properly parented spans. - """ - - def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> None: - ctx = contextvars.copy_context() - original_async_producer = instance._async_producer - - def _context_wrapped_async_producer() -> None: - ctx.run(original_async_producer) - - instance._async_producer = _context_wrapped_async_producer - try: - return wrapped(*args, **kwargs) - finally: - instance._async_producer = original_async_producer - - return wrapper - - -def _is_patched(obj: Any) -> bool: - """Check if object is already patched. - - For classes we check __dict__ directly because getattr walks the MRO. - Without this, wrapping WrapperModel first causes InstrumentedModel to - appear already-patched (it inherits the flag), so its request() method - is never wrapped and the inner "chat" span is lost. - """ - if isinstance(obj, type): - return obj.__dict__.get("_braintrust_patched", False) - return getattr(obj, "_braintrust_patched", False) - - -def _serialize_type(obj: Any) -> Any: - """Serialize a type/class for logging, handling Pydantic models and other types. - - This is useful for output_type, toolsets, and similar type parameters. - Returns full JSON schema for Pydantic models so engineers can see exactly - what structured output schema was used. - """ - import inspect - - # For sequences of types (like Union types or list of models) - if isinstance(obj, (list, tuple)): - return [_serialize_type(item) for item in obj] - - # Handle Pydantic AI's output wrappers (ToolOutput, NativeOutput, PromptedOutput, TextOutput) - if hasattr(obj, "output"): - # These are wrapper classes with an 'output' field containing the actual type - wrapper_info = {"wrapper": type(obj).__name__} - if hasattr(obj, "name") and obj.name: - wrapper_info["name"] = obj.name - if hasattr(obj, "description") and obj.description: - wrapper_info["description"] = obj.description - wrapper_info["output"] = _serialize_type(obj.output) - return wrapper_info - - # If it's a Pydantic model class, return its full JSON schema - if inspect.isclass(obj): - try: - from pydantic import BaseModel - - if issubclass(obj, BaseModel): - # Return the full JSON schema - includes all field info, descriptions, constraints, etc. - return obj.model_json_schema() - except (ImportError, AttributeError, TypeError): - pass - - # Not a Pydantic model, return class name - return obj.__name__ - - # If it has a __name__ attribute (like functions), use that - if hasattr(obj, "__name__"): - return obj.__name__ - - # Try standard serialization - return bt_safe_deep_copy(obj) - - -def _build_agent_input_and_metadata(args: Any, kwargs: Any, instance: Any) -> tuple[dict[str, Any], dict[str, Any]]: - """Build input data and metadata for agent wrappers. - - Returns: - Tuple of (input_data, metadata) - """ - input_data = {} - - user_prompt = args[0] if len(args) > 0 else kwargs.get("user_prompt") - if user_prompt is not None: - input_data["user_prompt"] = _serialize_user_prompt(user_prompt) - - for key, value in kwargs.items(): - if key == "deps": - continue - elif key == "message_history": - input_data[key] = _serialize_messages(value) if value is not None else None - elif key in ("output_type", "toolsets"): - # These often contain types/classes, use special serialization - input_data[key] = _serialize_type(value) if value is not None else None - elif key == "model_settings": - # model_settings passed to run() goes in INPUT (it's a run() parameter) - input_data[key] = bt_safe_deep_copy(value) if value is not None else None - else: - input_data[key] = bt_safe_deep_copy(value) if value is not None else None - - if "model" in kwargs: - model_name, provider = _parse_model_string(kwargs["model"]) - else: - model_name, provider = _extract_model_info(instance) - - # Extract agent-level configuration for metadata - # Only add to metadata if NOT explicitly passed in kwargs (those go in input) - agent_model_settings = None - if "model_settings" not in kwargs and hasattr(instance, "model_settings") and instance.model_settings is not None: - agent_model_settings = instance.model_settings - - metadata = _build_model_metadata(model_name, provider, agent_model_settings) - - # Extract additional agent configuration (only if not passed as kwargs) - if "name" not in kwargs and hasattr(instance, "name") and instance.name is not None: - metadata["agent_name"] = instance.name - - if "end_strategy" not in kwargs and hasattr(instance, "end_strategy") and instance.end_strategy is not None: - metadata["end_strategy"] = str(instance.end_strategy) - - # Extract output_type if set on agent and not passed as kwarg - # output_type can be a Pydantic model, str, or other types that get converted to JSON schema - if "output_type" not in kwargs and hasattr(instance, "output_type") and instance.output_type is not None: - try: - metadata["output_type"] = _serialize_type(instance.output_type) - except Exception as e: - logger.debug(f"Failed to extract output_type from agent: {e}") - - # Extract toolsets if set on agent and not passed as kwarg - # Toolsets go in INPUT (not metadata) because agent.run() accepts toolsets parameter - if "toolsets" not in kwargs and hasattr(instance, "toolsets"): - try: - toolsets = instance.toolsets - if toolsets: - # Convert toolsets to a list with FULL tool schemas for input - serialized_toolsets = [] - for ts in toolsets: - ts_info = { - "id": getattr(ts, "id", str(type(ts).__name__)), - "label": getattr(ts, "label", None), - } - # Add full tool schemas (not just names) since toolsets can be passed to agent.run() - if hasattr(ts, "tools") and ts.tools: - tools_list = [] - tools_dict = ts.tools - # tools is a dict mapping tool name -> Tool object - for tool_name, tool_obj in tools_dict.items(): - tool_dict = { - "name": tool_name, - } - # Extract description - if hasattr(tool_obj, "description") and tool_obj.description: - tool_dict["description"] = tool_obj.description - # Extract JSON schema for parameters - if hasattr(tool_obj, "function_schema") and hasattr( - tool_obj.function_schema, "json_schema" - ): - tool_dict["parameters"] = tool_obj.function_schema.json_schema - tools_list.append(tool_dict) - ts_info["tools"] = tools_list - serialized_toolsets.append(ts_info) - input_data["toolsets"] = serialized_toolsets - except Exception as e: - logger.debug(f"Failed to extract toolsets from agent: {e}") - - # Extract system_prompt from agent if not passed as kwarg - # Note: system_prompt goes in input (not metadata) because it's semantically part of the LLM input - # Pydantic AI doesn't expose a public API for this, so we access the private _system_prompts - # attribute. This is wrapped in try/except to gracefully handle if the internal structure changes. - if "system_prompt" not in kwargs: - try: - if hasattr(instance, "_system_prompts") and instance._system_prompts: - input_data["system_prompt"] = "\n\n".join(instance._system_prompts) - except Exception as e: - logger.debug(f"Failed to extract system_prompt from agent: {e}") - - return input_data, metadata - - -def _build_direct_model_input_and_metadata(args: Any, kwargs: Any) -> tuple[dict[str, Any], dict[str, Any]]: - """Build input data and metadata for direct model request wrappers. - - Returns: - Tuple of (input_data, metadata) - """ - input_data = {} - - model = args[0] if len(args) > 0 else kwargs.get("model") - if model is not None: - input_data["model"] = str(model) - - messages = args[1] if len(args) > 1 else kwargs.get("messages", []) - if messages: - input_data["messages"] = _serialize_messages(messages) - - for key, value in kwargs.items(): - if key not in ["model", "messages"]: - input_data[key] = bt_safe_deep_copy(value) if value is not None else None - - model_name, provider = _parse_model_string(model) - metadata = _build_model_metadata(model_name, provider) - - return input_data, metadata +"""Compatibility re-exports for the migrated Pydantic AI integration.""" + +from braintrust.integrations.pydantic_ai import ( + PydanticAIIntegration, + setup_pydantic_ai, + wrap_agent, + wrap_model_classes, + wrap_model_request, + wrap_model_request_stream, + wrap_model_request_stream_sync, + wrap_model_request_sync, +) + + +__all__ = [ + "PydanticAIIntegration", + "setup_pydantic_ai", + "wrap_agent", + "wrap_model_classes", + "wrap_model_request", + "wrap_model_request_sync", + "wrap_model_request_stream", + "wrap_model_request_stream_sync", +]