From 3f6d01c6ccf3dd158107a0325abfad5b82db2d9b Mon Sep 17 00:00:00 2001
From: Abhijeet Prasad <abhijeet@braintrustdata.com>
Date: Tue, 31 Mar 2026 10:59:50 -0400
Subject: [PATCH] ref(pydantic_ai): migrate wrapper to integration patchers

Move the Pydantic AI wrapper implementation into the integrations package and keep the
legacy wrapper module as a compatibility re-export. This aligns the provider
with the newer integration architecture and consolidates its tests,
auto-instrument wiring, and public exports in one place.

Add generic class-scan support in the shared integration base so runtime-loaded
subclasses can be rescanned and patched without leaking patch-marker details
into tracing code. Use that helper for Pydantic AI model-class wrapping and
move wrap_model_classes into patchers so patcher-specific behavior stays in the
patching layer.
---
 py/noxfile.py                                 |    6 +-
 py/src/braintrust/__init__.py                 |    6 +-
 py/src/braintrust/auto.py                     |   11 +-
 py/src/braintrust/integrations/__init__.py    |    2 +
 py/src/braintrust/integrations/base.py        |  148 +-
 .../integrations/pydantic_ai/__init__.py      |   52 +
 .../cassettes/test_agent_run_async.yaml       |    0
 .../cassettes/test_agent_run_stream.yaml      |    0
 .../test_agent_run_stream_events.yaml         |    0
 ...st_agent_run_stream_structured_output.yaml |    0
 .../cassettes/test_agent_run_stream_sync.yaml |    0
 .../cassettes/test_agent_run_sync.yaml        |    0
 ...nt_stream_buffer_pattern_early_return.yaml |    0
 .../test_agent_stream_early_break.yaml        |    0
 .../test_agent_structured_output.yaml         |    0
 .../test_agent_with_binary_content.yaml       |    0
 .../test_agent_with_custom_settings.yaml      |    0
 .../test_agent_with_document_input.yaml       |    0
 .../test_agent_with_message_history.yaml      |    0
 ...agent_with_model_settings_in_metadata.yaml |  110 ++
 ...with_model_settings_override_in_input.yaml |  120 ++
 ..._agent_with_system_prompt_in_metadata.yaml |    0
 .../test_agent_with_tool_execution.yaml       |    0
 .../cassettes/test_agent_with_tools.yaml      |    0
 .../cassettes/test_auto_pydantic_ai.yaml      |  112 ++
 .../cassettes/test_direct_model_request.yaml  |    0
 ...s_nested_chat_span_without_class_scan.yaml |  109 ++
 .../test_direct_model_request_stream.yaml     |    0
 ..._model_request_stream_complete_output.yaml |    0
 ...test_direct_model_request_stream_sync.yaml |    0
 .../test_direct_model_request_sync.yaml       |    0
 ...st_direct_model_request_with_settings.yaml |    0
 .../test_model_class_span_names.yaml          |    0
 ...tream_sync_thread_context_propagation.yaml |  436 +++++
 ...multiple_identical_sequential_streams.yaml |    0
 .../test_multiple_sequential_streams.yaml     |    0
 .../cassettes/test_no_model_agent_run.yaml    |    0
 .../test_no_model_agent_run_with_logfire.yaml |    0
 .../test_pydantic_wrapped_completion.yaml     |    0
 .../test_pydantic_wrapped_stream.yaml         |    0
 ...st_stream_buffer_pattern_early_return.yaml |    0
 ...st_stream_early_break_async_generator.yaml |  314 ++++
 .../test_tool_execution_creates_spans.yaml    |    0
 ..._not_depend_on_message_reconstruction.yaml |  217 +++
 .../test_wrapper_agent_run_is_traced.yaml     |  111 ++
 .../integrations/pydantic_ai/integration.py   |   32 +
 .../integrations/pydantic_ai/patchers.py      |  201 +++
 .../test_pydantic_ai_integration.py           |  458 ++---
 .../pydantic_ai}/test_pydantic_ai_logfire.py  |    6 +
 .../test_pydantic_ai_wrap_openai.py           |    6 +
 .../integrations/pydantic_ai/tracing.py       | 1478 ++++++++++++++++
 py/src/braintrust/wrappers/pydantic_ai.py     | 1500 +----------------
 52 files changed, 3662 insertions(+), 1773 deletions(-)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/__init__.py
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_async.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream_events.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream_structured_output.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_stream_sync.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_run_sync.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_stream_buffer_pattern_early_return.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_stream_early_break.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_structured_output.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_binary_content.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_custom_settings.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_document_input.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_message_history.yaml (100%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_system_prompt_in_metadata.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_tool_execution.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_agent_with_tools.yaml (100%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request.yaml (100%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_stream.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_stream_complete_output.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_stream_sync.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_sync.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_direct_model_request_with_settings.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_model_class_span_names.yaml (100%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_multiple_identical_sequential_streams.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_multiple_sequential_streams.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_no_model_agent_run.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_no_model_agent_run_with_logfire.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_pydantic_wrapped_completion.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_pydantic_wrapped_stream.yaml (100%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_stream_buffer_pattern_early_return.yaml (100%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/cassettes/test_tool_execution_creates_spans.yaml (100%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/integration.py
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/patchers.py
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/test_pydantic_ai_integration.py (89%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/test_pydantic_ai_logfire.py (94%)
 rename py/src/braintrust/{wrappers => integrations/pydantic_ai}/test_pydantic_ai_wrap_openai.py (97%)
 create mode 100644 py/src/braintrust/integrations/pydantic_ai/tracing.py

diff --git a/py/noxfile.py b/py/noxfile.py
index 4294032e..fc5ab01c 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -124,7 +124,7 @@ def test_pydantic_ai_wrap_openai(session, version):
     """Test pydantic_ai with wrap_openai() approach - supports older versions."""
     _install_test_deps(session)
     _install(session, "pydantic_ai", version)
-    _run_tests(session, f"{WRAPPER_DIR}/test_pydantic_ai_wrap_openai.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/pydantic_ai/test_pydantic_ai_wrap_openai.py")
     _run_core_tests(session)
 
 
@@ -137,7 +137,7 @@ def test_pydantic_ai_integration(session, version):
         session.skip("pydantic_ai integration tests require Python >= 3.10 (pydantic_ai 1.10.0+)")
     _install_test_deps(session)
     _install(session, "pydantic_ai", version)
-    _run_tests(session, f"{WRAPPER_DIR}/test_pydantic_ai_integration.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/pydantic_ai/test_pydantic_ai_integration.py")
     _run_core_tests(session)
 
 
@@ -149,7 +149,7 @@ def test_pydantic_ai_logfire(session):
     _install_test_deps(session)
     _install(session, "pydantic_ai")
     _install(session, "logfire")
-    _run_tests(session, f"{WRAPPER_DIR}/test_pydantic_ai_logfire.py")
+    _run_tests(session, f"{INTEGRATION_DIR}/pydantic_ai/test_pydantic_ai_logfire.py")
 
 
 @nox.session()
diff --git a/py/src/braintrust/__init__.py b/py/src/braintrust/__init__.py
index f40d80a3..02f7e252 100644
--- a/py/src/braintrust/__init__.py
+++ b/py/src/braintrust/__init__.py
@@ -76,6 +76,9 @@ def is_equal(expected, output):
 from .integrations.openrouter import (
     wrap_openrouter,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
+from .integrations.pydantic_ai import (
+    setup_pydantic_ai,  # noqa: F401 # type: ignore[reportUnusedImport]
+)
 from .logger import *
 from .logger import (
     _internal_get_global_state,  # noqa: F401 # type: ignore[reportUnusedImport]
@@ -98,6 +101,3 @@ def is_equal(expected, output):
 from .wrappers.litellm import (
     wrap_litellm,  # noqa: F401 # type: ignore[reportUnusedImport]
 )
-from .wrappers.pydantic_ai import (
-    setup_pydantic_ai,  # noqa: F401 # type: ignore[reportUnusedImport]
-)
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
index 4ede9f96..189ea2f9 100644
--- a/py/src/braintrust/auto.py
+++ b/py/src/braintrust/auto.py
@@ -16,6 +16,7 @@
     DSPyIntegration,
     GoogleGenAIIntegration,
     OpenRouterIntegration,
+    PydanticAIIntegration,
 )
 
 
@@ -124,7 +125,7 @@ def auto_instrument(
     if litellm:
         results["litellm"] = _instrument_litellm()
     if pydantic_ai:
-        results["pydantic_ai"] = _instrument_pydantic_ai()
+        results["pydantic_ai"] = _instrument_integration(PydanticAIIntegration)
     if google_genai:
         results["google_genai"] = _instrument_integration(GoogleGenAIIntegration)
     if openrouter:
@@ -163,11 +164,3 @@ def _instrument_litellm() -> bool:
 
         return patch_litellm()
     return False
-
-
-def _instrument_pydantic_ai() -> bool:
-    with _try_patch():
-        from braintrust.wrappers.pydantic_ai import setup_pydantic_ai
-
-        return setup_pydantic_ai()
-    return False
diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
index e4e6e208..3331f2a5 100644
--- a/py/src/braintrust/integrations/__init__.py
+++ b/py/src/braintrust/integrations/__init__.py
@@ -6,6 +6,7 @@
 from .dspy import DSPyIntegration
 from .google_genai import GoogleGenAIIntegration
 from .openrouter import OpenRouterIntegration
+from .pydantic_ai import PydanticAIIntegration
 
 
 __all__ = [
@@ -17,4 +18,5 @@
     "DSPyIntegration",
     "GoogleGenAIIntegration",
     "OpenRouterIntegration",
+    "PydanticAIIntegration",
 ]
diff --git a/py/src/braintrust/integrations/base.py b/py/src/braintrust/integrations/base.py
index 8abd3320..62e0c8ce 100644
--- a/py/src/braintrust/integrations/base.py
+++ b/py/src/braintrust/integrations/base.py
@@ -20,6 +20,31 @@ class BasePatcher(ABC):
     patch_id: ClassVar[str | None] = None
     version_spec: ClassVar[str | None] = None
     priority: ClassVar[int] = 100
+    rescan_on_setup: ClassVar[bool] = False
+
+    @classmethod
+    def patch_marker_attr(cls) -> str:
+        """Return the sentinel attribute used to mark this patcher as applied."""
+        suffix = re.sub(r"\W+", "_", cls.identifier()).strip("_")
+        return f"__braintrust_patched_{suffix}__"
+
+    @classmethod
+    def has_patch_marker(cls, obj: Any) -> bool:
+        """Return whether *obj* is marked as patched by this patcher.
+
+        For classes, read ``__dict__`` directly so markers inherited via the
+        MRO do not make subclasses appear locally patched.
+        """
+        if obj is None:
+            return False
+        if isinstance(obj, type):
+            return bool(obj.__dict__.get(cls.patch_marker_attr(), False))
+        return bool(getattr(obj, cls.patch_marker_attr(), False))
+
+    @classmethod
+    def mark_patched(cls, obj: Any) -> None:
+        """Mark an object as patched by this patcher."""
+        setattr(obj, cls.patch_marker_attr(), True)
 
     @classmethod
     def identifier(cls) -> str:
@@ -44,6 +69,115 @@ def patch(cls, module: Any | None, version: str | None, *, target: Any | None =
         raise NotImplementedError
 
 
+class ClassScanPatcher(BasePatcher):
+    """Base patcher for rescanning and patching discovered class hierarchies."""
+
+    rescan_on_setup: ClassVar[bool] = True
+    include_abstract_classes: ClassVar[bool] = False
+    target_module: ClassVar[str | None] = None
+    root_class_path: ClassVar[str | None] = None
+
+    @classmethod
+    def resolve_scan_root(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> Any | None:
+        """Return the object from which this patcher resolves its root class."""
+        if target is not None:
+            return target
+        if cls.target_module is not None:
+            try:
+                return importlib.import_module(cls.target_module)
+            except ImportError:
+                return None
+        return module
+
+    @classmethod
+    def iter_root_classes(
+        cls,
+        module: Any | None,
+        version: str | None,
+        *,
+        target: Any | None = None,
+    ) -> Iterable[type[Any]]:
+        """Yield root classes whose subclass trees should be scanned."""
+        if cls.root_class_path is None:
+            return ()
+        root = cls.resolve_scan_root(module, version, target=target)
+        if root is None:
+            return ()
+        root_class = _resolve_attr_path(root, cls.root_class_path)
+        if root_class is None:
+            return ()
+        return (root_class,)
+
+    @classmethod
+    def resolve_root_classes(
+        cls,
+        module: Any | None,
+        version: str | None,
+        *,
+        target: Any | None = None,
+    ) -> tuple[type[Any], ...]:
+        """Return the currently discoverable root classes for this patcher."""
+        try:
+            return tuple(cls.iter_root_classes(module, version, target=target))
+        except ImportError:
+            return ()
+
+    @classmethod
+    def applies(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
+        """Return whether any root classes are currently discoverable."""
+        return super().applies(module, version, target=target) and bool(
+            cls.resolve_root_classes(module, version, target=target)
+        )
+
+    @classmethod
+    @abstractmethod
+    def patch_class(cls, target_class: type[Any]) -> bool | None:
+        """Patch one discovered class.
+
+        Return ``False`` to skip marking the class as patched. Any other return
+        value is treated as a successful patch.
+        """
+        raise NotImplementedError
+
+    @classmethod
+    def iter_classes(
+        cls,
+        module: Any | None,
+        version: str | None,
+        *,
+        target: Any | None = None,
+    ) -> Iterable[type[Any]]:
+        """Yield discovered subclasses under the configured root classes."""
+
+        def walk(base_class: type[Any]) -> Iterable[type[Any]]:
+            for subclass in base_class.__subclasses__():
+                if cls.include_abstract_classes or not getattr(subclass, "__abstractmethods__", None):
+                    yield subclass
+                yield from walk(subclass)
+
+        for root_class in cls.resolve_root_classes(module, version, target=target):
+            yield from walk(root_class)
+
+    @classmethod
+    def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
+        """Return ``True`` when every currently discovered class is patched."""
+        classes = tuple(cls.iter_classes(module, version, target=target))
+        return bool(classes) and all(cls.has_patch_marker(class_) for class_ in classes)
+
+    @classmethod
+    def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
+        """Patch all newly discovered classes under the configured roots."""
+        success = False
+        for class_ in cls.iter_classes(module, version, target=target):
+            if cls.has_patch_marker(class_):
+                continue
+            if cls.patch_class(class_) is False:
+                continue
+            cls.mark_patched(class_)
+            success = True
+        return success
+
+
 class FunctionWrapperPatcher(BasePatcher):
     """Base patcher for single-target `wrap_function_wrapper` instrumentation.
 
@@ -125,14 +259,13 @@ def mark_patched(cls, obj: Any) -> None:
     @classmethod
     def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
         """Return whether this patcher's target has already been instrumented."""
-        marker = cls.patch_marker_attr()
         resolved_target = cls.resolve_target(module, version, target=target)
-        if resolved_target is not None and getattr(resolved_target, marker, False):
+        if cls.has_patch_marker(resolved_target):
             return True
         # Fall back to checking the root — the marker may live there when the
         # resolved target does not support setattr (e.g. bound methods).
         root = cls.resolve_root(module, version, target=target)
-        if root is not None and root is not resolved_target and getattr(root, marker, False):
+        if root is not None and root is not resolved_target and cls.has_patch_marker(root):
             return True
         return False
 
@@ -152,7 +285,7 @@ def patch(cls, module: Any | None, version: str | None, *, target: Any | None =
         cls.mark_patched(resolved_target)
         # If mark_patched could not store the marker on the target (e.g. bound
         # methods), store it on the root so is_patched() can still find it.
-        if not getattr(resolved_target, marker, False):
+        if not cls.has_patch_marker(resolved_target):
             setattr(root, marker, True)
         return True
 
@@ -174,8 +307,7 @@ def wrap_target(cls, target: Any) -> Any:
         ``superseded_by`` has a target that exists on *target*.  Returns
         *target* for convenient chaining.
         """
-        marker = cls.patch_marker_attr()
-        if getattr(target, marker, False):
+        if cls.has_patch_marker(target):
             return target
         attr = cls.target_path.rsplit(".", 1)[-1]
         if _resolve_attr_path(target, attr) is None:
@@ -241,7 +373,7 @@ def mark_patched(cls, obj: Any) -> None:
     def is_patched(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
         """Return whether this patcher's replacement class is already installed."""
         resolved_target = cls.resolve_target(module, version, target=target)
-        return bool(resolved_target is not None and getattr(resolved_target, cls.patch_marker_attr(), False))
+        return bool(resolved_target is not None and cls.has_patch_marker(resolved_target))
 
     @classmethod
     def patch(cls, module: Any | None, version: str | None, *, target: Any | None = None) -> bool:
@@ -370,7 +502,7 @@ def setup(
         for patcher in sorted(selected_patchers, key=lambda patcher: patcher.priority):
             if not patcher.applies(module, version, target=target):
                 continue
-            if patcher.is_patched(module, version, target=target):
+            if not patcher.rescan_on_setup and patcher.is_patched(module, version, target=target):
                 success = True
                 continue
             success = patcher.patch(module, version, target=target) or success
diff --git a/py/src/braintrust/integrations/pydantic_ai/__init__.py b/py/src/braintrust/integrations/pydantic_ai/__init__.py
new file mode 100644
index 00000000..141fa667
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/__init__.py
@@ -0,0 +1,52 @@
+"""Braintrust integration for Pydantic AI."""
+
+import logging
+
+from braintrust.logger import NOOP_SPAN, current_span, init_logger
+
+from .integration import PydanticAIIntegration
+from .patchers import wrap_agent, wrap_model_classes
+from .tracing import (
+    wrap_model_request,
+    wrap_model_request_stream,
+    wrap_model_request_stream_sync,
+    wrap_model_request_sync,
+)
+
+
+logger = logging.getLogger(__name__)
+
+__all__ = [
+    "PydanticAIIntegration",
+    "setup_pydantic_ai",
+    "wrap_agent",
+    "wrap_model_classes",
+    "wrap_model_request",
+    "wrap_model_request_sync",
+    "wrap_model_request_stream",
+    "wrap_model_request_stream_sync",
+]
+
+
+def setup_pydantic_ai(
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
+) -> bool:
+    """
+    Setup Braintrust integration with Pydantic AI. Will automatically patch Pydantic AI
+    agents and direct API functions for automatic tracing.
+
+    Args:
+        api_key: Braintrust API key.
+        project_id: Braintrust project ID.
+        project_name: Braintrust project name.
+
+    Returns:
+        True if setup was successful, False otherwise.
+    """
+    span = current_span()
+    if span == NOOP_SPAN:
+        init_logger(project=project_name, api_key=api_key, project_id=project_id)
+
+    return PydanticAIIntegration.setup()
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_async.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_async.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_run_async.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_async.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream_events.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_events.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream_events.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_events.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream_structured_output.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_structured_output.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream_structured_output.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_structured_output.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_stream_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_sync.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_run_stream_sync.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_stream_sync.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_run_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_sync.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_run_sync.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_run_sync.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_stream_buffer_pattern_early_return.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_buffer_pattern_early_return.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_stream_buffer_pattern_early_return.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_buffer_pattern_early_return.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_stream_early_break.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_early_break.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_stream_early_break.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_stream_early_break.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_structured_output.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_structured_output.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_structured_output.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_structured_output.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_binary_content.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_binary_content.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_binary_content.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_binary_content.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_custom_settings.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_custom_settings.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_custom_settings.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_custom_settings.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_document_input.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_document_input.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_document_input.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_document_input.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_message_history.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_message_history.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_message_history.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_message_history.yaml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml
new file mode 100644
index 00000000..f8c05e41
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_in_metadata.yaml
@@ -0,0 +1,110 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Say hello"}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false,"temperature":0.5}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '135'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=VQg4i_utDK73HtVZX9MnimdbFMrcTwHiGTkj8zvaxBM-1766265730198-0.0.1.1-604800000
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.73.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.30.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DPB5LNj3De8GmyTYjsSCXBus7wf7B\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774893183,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Hello! How can I assist you today?\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        9,\n    \"completion_tokens\": 9,\n    \"total_tokens\": 18,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e48f8ba4ee792c6-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 30 Mar 2026 17:53:04 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '839'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '637'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=K5ScKgWxKV8qrun72h6zWqwqzuox1P7HfwixJPaisaU-1774893183.0866485-1.0.1.1-aFlnpoUkbkAngI0favlhLoCOJtcaN7dUO6bYg0g4jfC.HRhU3s_NrZt7oH01lSi39dR_xL9hFmrQs2o5en0gk0jRe0MRJTasLHnGP6o4.yXI0SZeUn56WYYaGKfOKVTx;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Mon, 30 Mar 2026
+        18:23:04 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_1e2852cd7fd34d339ac251eeef0c0487
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml
new file mode 100644
index 00000000..1e5de914
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_model_settings_override_in_input.yaml
@@ -0,0 +1,120 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Tell me a story"}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false,"temperature":0.9}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '141'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=VQg4i_utDK73HtVZX9MnimdbFMrcTwHiGTkj8zvaxBM-1766265730198-0.0.1.1-604800000;
+        __cf_bm=K5ScKgWxKV8qrun72h6zWqwqzuox1P7HfwixJPaisaU-1774893183.0866485-1.0.1.1-aFlnpoUkbkAngI0favlhLoCOJtcaN7dUO6bYg0g4jfC.HRhU3s_NrZt7oH01lSi39dR_xL9hFmrQs2o5en0gk0jRe0MRJTasLHnGP6o4.yXI0SZeUn56WYYaGKfOKVTx
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.73.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.30.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DPB5N7VhlEbWXNCjjIvlcDN3PJnb6\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774893185,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"Once upon a time in a quaint little
+        village nestled between rolling hills and lush green forests, there lived
+        a young girl named Elara. She was known far and wide for her insatiable curiosity
+        and adventurous spirit. Every morning, Elara would wander through the village,
+        her bright blue eyes sparkling with wonder at the world around her.\\n\\nOne
+        sunny afternoon, while exploring the outskirts of the forest, Elara stumbled
+        upon a hidden path that she had never seen before. It was overgrown with vines
+        and flowers, but something about it beckoned her to follow. With each step,
+        the sounds of the village faded away, replaced by the gentle rustle of leaves
+        and the distant chirping of birds.\\n\\nAs she ventured deeper into the woods,
+        the sunlight danced through the trees, creating a magical tapestry of light
+        and shadow. After walking for what felt like hours, Elara arrived at a clearing
+        that took her breath away. In the center stood a magnificent tree, its bark
+        shimmering like silver, with branches\",\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"length\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 11,\n    \"completion_tokens\":
+        200,\n    \"total_tokens\": 211,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e48f8c39839aa98-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Mon, 30 Mar 2026 17:53:09 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '1813'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '4441'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_0e199628e6ab42a8bde2da2054693dac
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_system_prompt_in_metadata.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_system_prompt_in_metadata.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_system_prompt_in_metadata.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_system_prompt_in_metadata.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_tool_execution.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tool_execution.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_tool_execution.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tool_execution.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_agent_with_tools.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tools.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_agent_with_tools.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_agent_with_tools.yaml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml
new file mode 100644
index 00000000..32c50c9d
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_auto_pydantic_ai.yaml
@@ -0,0 +1,112 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"Say hi"}],"model":"gpt-4o-mini","max_completion_tokens":100,"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '114'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.44.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.15.0
+      X-Stainless-Raw-Response:
+      - 'true'
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAAAwAAAP//jFLLbtswELzrK7Y8W4Vsq4ntSw8tmvTUBuihDwQCQ64kJhSXIFdtjcD/
+        XlCyLbkPoBcddnZGM8N9zgCE0WIHQrWSVedt/rbQhW8/lF/iHX69a8q9evOx+fzp6fGdu7kRi8Sg
+        h0dUfGK9VNR5i2zIjbAKKBmT6vL6aluUq/X61QB0pNEmWuM5LynvjDP5qliVeXGdLzdHdktGYRQ7
+        +JYBADwP3+TTafwpdlAsTpMOY5QNit15CUAEsmkiZIwmsnQsFhOoyDG6wfqtAW4x4Au4pR+gpIP3
+        MHJgTz0wabl/PecGrPsok3/XWzsDpHPEMuUfXN8fkcPZp6XGB3qIv1FFbZyJbRVQRnLJU2TyYkAP
+        GcD90Ed/EVH4QJ3niukJh99tRzUxPcKELY9VCSaWdjY/kS7EKo0sjY2zOoWSqkU9MafuZa8NzYBs
+        FvlPM3/THmMb1/yP/AQohZ5RVz6gNuoy8LQWMJ3ov9bOFQ+GRcTw3Sis2GBIz6Cxlr0dD0fEfWTs
+        qtq4BoMPZrye2ler7XpdyO3VZiOyQ/YLAAD//wMAbBhxq0sDAAA=
+    headers:
+      CF-RAY:
+      - 9c1afdbedfc4cf0a-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Thu, 22 Jan 2026 00:38:55 GMT
+      Server:
+      - cloudflare
+      Set-Cookie:
+      - __cf_bm=.v4HHKHusX6vziKsYW5cyVzZRrAsCxp4XT463GaX0yQ-1769042335-1.0.1.1-InjFtjx7UOJ8ivwZeShYpDg8mc4QGt.4kpoe9GlkrPwH7LBqBZxH.e.oLUSXSkyh_t0ETNUXh6C5G5zGSAXLYT6oNyc6cef0jwB2ADi_S.w;
+        path=/; expires=Thu, 22-Jan-26 01:08:55 GMT; domain=.api.openai.com; HttpOnly;
+        Secure; SameSite=None
+      - _cfuvid=bHlkcNsEuGGe.AQuXN6zbPWK8MJ2dKBjLFcSS263aVQ-1769042335390-0.0.1.1-604800000;
+        path=/; domain=.api.openai.com; HttpOnly; Secure; SameSite=None
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      cf-cache-status:
+      - DYNAMIC
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '395'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-envoy-upstream-service-time:
+      - '412'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999997'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_0cc555f0b9354a85a3b0f965716d99de
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request.yaml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml
new file mode 100644
index 00000000..d4ff5c2e
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_creates_nested_chat_span_without_class_scan.yaml
@@ -0,0 +1,109 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"What is 2+2? Answer with just the
+      number."}],"model":"gpt-4o-mini","stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '121'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.66.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.24.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DPVzHBFp5i6BHHGWuTsYqBGubpikG\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774973531,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"4\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 20,\n    \"completion_tokens\":
+        1,\n    \"total_tokens\": 21,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e50a256ab1f178c-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 31 Mar 2026 16:12:11 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '807'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '392'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=zLt6TNwkPTB2O.Bp3ceD4Ijrj3biQuLYigEMbvthfOs-1774973530.6689873-1.0.1.1-PL.I8XK.cSEDkAeAeA.SAH05Z2qjjTzvfUfqd4fC3.j0tktm6p0Tqb1.Zwy0776VHR2qgoUpogSKSon8qg2QYY179MO.33I7RVvoQMPznp3dxHHWwIBuPqEqxwJZqbZ2;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 31 Mar 2026
+        16:42:11 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999987'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_c9480ee8e49349b7a68c3a2767779fd3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_complete_output.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_complete_output.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_complete_output.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_complete_output.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_sync.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_stream_sync.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_stream_sync.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_sync.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_sync.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_sync.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_sync.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_direct_model_request_with_settings.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_with_settings.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_direct_model_request_with_settings.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_direct_model_request_with_settings.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_model_class_span_names.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_class_span_names.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_model_class_span_names.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_class_span_names.yaml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml
new file mode 100644
index 00000000..69ecec3d
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_model_request_stream_sync_thread_context_propagation.yaml
@@ -0,0 +1,436 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '0'
+      User-Agent:
+      - python-requests/2.33.1
+    method: POST
+    uri: https://www.braintrust.dev/api/apikey/login
+  response:
+    body:
+      string: '{"org_info":[{"id":"5d7c97d7-fef1-4cb7-bda6-7e3756a0ca8e","name":"braintrustdata.com","api_url":"https://staging-api.braintrust.dev","git_metadata":{"fields":["commit","branch","tag","author_name","author_email","commit_message","commit_time","dirty"],"collect":"some"},"is_universal_api":true,"proxy_url":"https://staging-api.braintrust.dev","realtime_url":"wss://realtime.braintrustapi.com"}]}'
+    headers:
+      Access-Control-Allow-Credentials:
+      - 'true'
+      Access-Control-Allow-Headers:
+      - X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5,
+        Content-Type, Date, X-Api-Version
+      Access-Control-Allow-Methods:
+      - GET,OPTIONS,PATCH,DELETE,POST,PUT
+      Access-Control-Allow-Origin:
+      - '*'
+      Cache-Control:
+      - public, max-age=0, must-revalidate
+      Content-Length:
+      - '395'
+      Content-Security-Policy:
+      - 'script-src ''self'' ''unsafe-eval'' ''wasm-unsafe-eval'' ''strict-dynamic''
+        ''nonce-Y2NiZDlhM2MtNGFlNy00OTE1LWE1YjQtZTI3MzcwOTEzZTgx''  *.js.stripe.com
+        js.stripe.com maps.googleapis.com ; style-src ''self'' ''unsafe-inline'' *.braintrust.dev
+        btcm6qilbbhv4yi1.public.blob.vercel-storage.com fonts.googleapis.com www.gstatic.com
+        d4tuoctqmanu0.cloudfront.net; font-src ''self'' data: fonts.gstatic.com btcm6qilbbhv4yi1.public.blob.vercel-storage.com
+        cdn.jsdelivr.net d4tuoctqmanu0.cloudfront.net fonts.googleapis.com mintlify-assets.b-cdn.net
+        fonts.cdnfonts.com; object-src ''none''; base-uri ''self''; form-action ''self'';
+        frame-ancestors ''self''; worker-src ''self'' blob:; report-uri https://o4507221741076480.ingest.us.sentry.io/api/4507221754380288/security/?sentry_key=27fa5ac907cf7c6ce4a1ab2a03f805b4&sentry_environment=production&sentry_release=16;
+        report-to csp-endpoint-0'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:13 GMT
+      Etag:
+      - '"12n7ok4b5phaz"'
+      Reporting-Endpoints:
+      - csp-endpoint-0="https://o4507221741076480.ingest.us.sentry.io/api/4507221754380288/security/?sentry_key=27fa5ac907cf7c6ce4a1ab2a03f805b4&sentry_environment=production&sentry_release=16"
+      Server:
+      - Vercel
+      Strict-Transport-Security:
+      - max-age=63072000
+      X-Bt-Was-Udf-Cached:
+      - 'true'
+      X-Clerk-Auth-Message:
+      - Invalid JWT form. A JWT consists of three parts separated by dots. (reason=token-invalid,
+        token-carrier=header)
+      X-Clerk-Auth-Reason:
+      - token-invalid
+      X-Clerk-Auth-Status:
+      - signed-out
+      X-Content-Type-Options:
+      - nosniff
+      X-Frame-Options:
+      - SAMEORIGIN
+      X-Matched-Path:
+      - /api/apikey/login
+      X-Nonce:
+      - Y2NiZDlhM2MtNGFlNy00OTE1LWE1YjQtZTI3MzcwOTEzZTgx
+      X-Vercel-Cache:
+      - MISS
+      X-Vercel-Id:
+      - yul1::iad1::l88hm-1774893193654-1b03d5ef0879
+    status:
+      code: 200
+      message: OK
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      User-Agent:
+      - python-requests/2.33.1
+    method: GET
+    uri: https://staging-api.braintrust.dev/version
+  response:
+    body:
+      string: '{"version":"1.1.31","date_version":"20260330","ff_version":21,"commit":"7e00d36a24f3ee49ce2f75f19d20386636e5519b","deployment_mode":"lambda","deployment_type":"custom","brainstore_default":"force","brainstore_can_contain_row_refs":true,"skip_pg_config":"all","has_realtime_wal_bucket":true,"brainstore_wal_footer_version":"v3","brainstore_wal_use_efficient_format":true,"has_logs2":true,"js":true,"universal":true,"code_execution":true,"logs3_payload_max_bytes":5242880,"control_plane_telemetry":["status","metrics","logs","traces","memprof","usage"]}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '551'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:13 GMT
+      Via:
+      - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 f4f653453255d3978688a6c5c61be2d6.cloudfront.net
+        (CloudFront)
+      X-Amz-Cf-Id:
+      - VbidRp8Ziuk63w3KD3joP-ZO7fBoIV_oHTYVlsHGQjWuURLgUMZ-Ow==
+      X-Amz-Cf-Pop:
+      - IAD55-P9
+      - IAD61-P11
+      X-Amzn-Trace-Id:
+      - Root=1-69cab889-1aaac8301c7833580844d4e1;Parent=025690bcec6beb43;Sampled=0;Lineage=1:fc3b4ff1:0
+      X-Cache:
+      - Miss from cloudfront
+      access-control-allow-credentials:
+      - 'true'
+      access-control-expose-headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms
+      etag:
+      - W/"227-KBfWb2OrI4L6tO+9HRx1pkjhfrY"
+      vary:
+      - Origin
+      x-amz-apigw-id:
+      - bDHFmGKEIAMEaqg=
+      x-amzn-Remapped-content-length:
+      - '551'
+      x-amzn-RequestId:
+      - 031b2887-f4fc-4737-ac70-77fa3d42cc96
+      x-bt-internal-trace-id:
+      - 69cab88900000000055a53298b8a9e38
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"rows": [{"_is_merge": false, "context": {"caller_filename": "/Users/abhijeetprasad/workspace/braintrust-sdk-python/py/.nox/test_pydantic_ai_integration-latest/lib/python3.13/site-packages/pydantic_ai/direct.py",
+      "caller_functionname": "_consume_async_stream", "caller_lineno": 360}, "created":
+      "2026-03-30T17:53:13.604954+00:00", "id": "021ecd8c-ef0b-4705-8491-84b7fd7e44a3",
+      "input": {"instrument": null, "messages": [{"instructions": null, "kind": "request",
+      "metadata": null, "parts": [{"content": "Hello", "part_kind": "user-prompt",
+      "timestamp": "2026-03-30 17:53:13.603675+00:00"}], "run_id": null, "timestamp":
+      null}], "model": "openai:gpt-4o-mini", "model_request_parameters": null, "model_settings":
+      null}, "log_id": "g", "metadata": {"model": "gpt-4o-mini", "provider": "openai"},
+      "metrics": {"start": 1774893193.604952}, "project_id": "test-pydantic-ai-integration",
+      "root_span_id": "6c05f860-41f9-4d0b-92d2-53a72ea73045", "span_attributes": {"exec_counter":
+      91, "name": "model_request_stream", "type": "llm"}, "span_id": "33c192f1-78db-4d9f-89f7-d6f36bc817aa",
+      "span_parents": ["c568d22b-66c3-4f43-a610-2f3807e92d60"]}], "api_version": 2}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1152'
+      User-Agent:
+      - python-requests/2.33.1
+    method: POST
+    uri: https://staging-api.braintrust.dev/logs3
+  response:
+    body:
+      string: '{"Code":"ForbiddenError","Message":"Missing read access to project_log
+        id test-pydantic-ai-integration, or the project_log does not exist [user_email=abhijeet@braintrustdata.com]
+        [user_org=braintrustdata.com] [timestamp=1774893194.106]","InternalTraceId":"69cab88a000000001216f8994c57459e","Path":"/logs3","Service":"api"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:14 GMT
+      Via:
+      - 1.1 e8ac579de7fc88986153d8653adf92fc.cloudfront.net (CloudFront), 1.1 da473159f6f131ea8035a6279b0f60aa.cloudfront.net
+        (CloudFront)
+      X-Amz-Cf-Id:
+      - MsuUmsZgF5KmuHR5gJnlVeocN-0ut6X8N3L4lrOMghuMxf_EFvJerg==
+      X-Amz-Cf-Pop:
+      - IAD55-P9
+      - IAD61-P11
+      X-Amzn-Trace-Id:
+      - Root=1-69cab889-2d61f8540b519af25e9769e5;Parent=5b0c1b4a8c31a605;Sampled=0;Lineage=1:fc3b4ff1:0
+      X-Cache:
+      - Error from cloudfront
+      access-control-allow-credentials:
+      - 'true'
+      access-control-expose-headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms
+      content-length:
+      - '322'
+      etag:
+      - W/"142-xcxriWqbDXxHzOZANosR4ILTx3M"
+      vary:
+      - Origin, Accept-Encoding
+      x-amz-apigw-id:
+      - bDHFnEuqIAMEtgg=
+      x-amzn-RequestId:
+      - 3aaa6d41-535f-49d1-b1bd-4da058a30870
+      x-bt-internal-trace-id:
+      - 69cab88a000000001216f8994c57459e
+    status:
+      code: 403
+      message: Forbidden
+- request:
+    body: '{"rows": [{"_is_merge": true, "id": "cb2a81f1-d1ce-46eb-b71f-48f94cc77709",
+      "log_id": "g", "metrics": {"completion_tokens": 8.0, "duration": 0.002259969711303711,
+      "end": 1774893189.5945878, "prompt_cache_creation_tokens": 0.0, "prompt_cached_tokens":
+      0.0, "prompt_tokens": 14.0, "start": 1774893189.5923278, "time_to_first_token":
+      0.0016760826110839844, "tokens": 22.0}, "output": {"finish_reason": "stop",
+      "kind": "response", "metadata": null, "model_name": "gpt-4o-mini-2024-07-18",
+      "parts": [{"content": "1, 2, 3.", "id": null, "part_kind": "text", "provider_details":
+      null, "provider_name": null}], "provider_details": {"finish_reason": "stop",
+      "timestamp": "2025-12-20 21:13:35+00:00"}, "provider_name": "openai", "provider_response_id":
+      "chatcmpl-CoyYZrJy9JYFY664IA2WgzVOsQSmj", "provider_url": "https://api.openai.com/v1/",
+      "run_id": null, "timestamp": "2026-03-30 17:53:09.593906+00:00", "usage": {"cache_audio_read_tokens":
+      0, "cache_read_tokens": 0, "cache_write_tokens": 0, "details": {"accepted_prediction_tokens":
+      0, "audio_tokens": 0, "reasoning_tokens": 0, "rejected_prediction_tokens": 0},
+      "input_audio_tokens": 0, "input_tokens": 14, "output_audio_tokens": 0, "output_tokens":
+      8}}, "project_id": "test-pydantic-ai-integration", "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7",
+      "span_id": "d47230a5-49a5-4cba-b863-1e479ceac7aa", "span_parents": ["f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e"]},{"_is_merge":
+      true, "id": "bdcee637-b6ac-4d83-bd0b-d6d5f559b2e0", "log_id": "g", "metrics":
+      {"completion_tokens": 8.0, "duration": 0.0023889541625976562, "end": 1774893189.59468,
+      "prompt_cache_creation_tokens": 0.0, "prompt_cached_tokens": 0.0, "prompt_tokens":
+      14.0, "start": 1774893189.592291, "time_to_first_token": 0.001712799072265625,
+      "tokens": 22.0}, "output": {"finish_reason": "stop", "kind": "response", "metadata":
+      null, "model_name": "gpt-4o-mini-2024-07-18", "parts": [{"content": "1, 2, 3.",
+      "id": null, "part_kind": "text", "provider_details": null, "provider_name":
+      null}], "provider_details": {"finish_reason": "stop", "timestamp": "2025-12-20
+      21:13:35+00:00"}, "provider_name": "openai", "provider_response_id": "chatcmpl-CoyYZrJy9JYFY664IA2WgzVOsQSmj",
+      "provider_url": "https://api.openai.com/v1/", "run_id": null, "timestamp": "2026-03-30
+      17:53:09.593906+00:00", "usage": {"cache_audio_read_tokens": 0, "cache_read_tokens":
+      0, "cache_write_tokens": 0, "details": {"accepted_prediction_tokens": 0, "audio_tokens":
+      0, "reasoning_tokens": 0, "rejected_prediction_tokens": 0}, "input_audio_tokens":
+      0, "input_tokens": 14, "output_audio_tokens": 0, "output_tokens": 8}}, "project_id":
+      "test-pydantic-ai-integration", "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7",
+      "span_id": "f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e", "span_parents": ["7b1235d0-d31d-49ed-8df1-6ae33d4dedf7"]}],
+      "api_version": 2}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '2826'
+      User-Agent:
+      - python-requests/2.33.1
+    method: POST
+    uri: https://staging-api.braintrust.dev/logs3
+  response:
+    body:
+      string: '{"Code":"ForbiddenError","Message":"Missing read access to project_log
+        id test-pydantic-ai-integration, or the project_log does not exist [user_email=abhijeet@braintrustdata.com]
+        [user_org=braintrustdata.com] [timestamp=1774893194.229]","InternalTraceId":"69cab88a000000006f8e7e21edeefe09","Path":"/logs3","Service":"api"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:14 GMT
+      Via:
+      - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 bd7551a5260a8bac087bad2ac8ebffec.cloudfront.net
+        (CloudFront)
+      X-Amz-Cf-Id:
+      - U3UXEimOcKVsdV6Lo0Yk8XezNhwfTiQcy__NSvaZ_5xaT8odied-Tw==
+      X-Amz-Cf-Pop:
+      - IAD55-P9
+      - IAD61-P11
+      X-Amzn-Trace-Id:
+      - Root=1-69cab88a-72db82d1703ec6b9651329df;Parent=40e411d20fc278df;Sampled=0;Lineage=1:fc3b4ff1:0
+      X-Cache:
+      - Error from cloudfront
+      access-control-allow-credentials:
+      - 'true'
+      access-control-expose-headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms
+      content-length:
+      - '322'
+      etag:
+      - W/"142-wDmrkVP8fVfti8Te3LX+0pveJRw"
+      vary:
+      - Origin, Accept-Encoding
+      x-amz-apigw-id:
+      - bDHFoFWDoAMEbjQ=
+      x-amzn-RequestId:
+      - e0b0db62-51f5-451f-8aa4-6e86a0936ec7
+      x-bt-internal-trace-id:
+      - 69cab88a000000006f8e7e21edeefe09
+    status:
+      code: 403
+      message: Forbidden
+- request:
+    body: '{"messages":[{"role":"user","content":"Hello"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '124'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.73.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.30.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"lMoGU7R2O"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"O1qW8o"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"VVrBHX6n3h"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        How"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"5hWyEXX"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        can"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"nVyWCh4"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        I"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"d6to3IExF"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        assist"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"JxzI"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"i6J33cg"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        today"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Irdup"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"?"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"R65rXtBVaV"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"Bf2lm"}
+
+
+        data: {"id":"chatcmpl-DPB5VAbDb1LwuCfC12JgojqMCDbvL","object":"chat.completion.chunk","created":1774893193,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":8,"completion_tokens":9,"total_tokens":17,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"OmfK72y6EWIX"}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e48f8fc3b177116-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:14 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '403'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=IHGiGf.Mtx_0IgVPmpOyr33nU6wJiuBbWv2RKuFyGQ8-1774893193.6401794-1.0.1.1-_WtR5M3BuFYZr5v6J1YF_vcnLZ95nF_kyCjT5FUTaqbdodV8NBNQct8MkpsPKDu12ElhK3j1RUo3KbOi1PcdyyIXNn0Gv6R_R73LtuYWQFstRviblljvfUx8ryTnWuz0;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Mon, 30 Mar 2026
+        18:23:14 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999995'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_50755602447c427682158e8e30b526a7
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/wrappers/cassettes/test_multiple_identical_sequential_streams.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_identical_sequential_streams.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_multiple_identical_sequential_streams.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_identical_sequential_streams.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_multiple_sequential_streams.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_sequential_streams.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_multiple_sequential_streams.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_multiple_sequential_streams.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_no_model_agent_run.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_no_model_agent_run.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_no_model_agent_run_with_logfire.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run_with_logfire.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_no_model_agent_run_with_logfire.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_no_model_agent_run_with_logfire.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_completion.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_completion.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_completion.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_completion.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_stream.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_stream.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_pydantic_wrapped_stream.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_pydantic_wrapped_stream.yaml
diff --git a/py/src/braintrust/wrappers/cassettes/test_stream_buffer_pattern_early_return.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_buffer_pattern_early_return.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_stream_buffer_pattern_early_return.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_buffer_pattern_early_return.yaml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml
new file mode 100644
index 00000000..d171dce1
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_stream_early_break_async_generator.yaml
@@ -0,0 +1,314 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      User-Agent:
+      - python-requests/2.33.1
+    method: GET
+    uri: https://staging-api.braintrust.dev/version
+  response:
+    body:
+      string: '{"version":"1.1.31","date_version":"20260330","ff_version":21,"commit":"7e00d36a24f3ee49ce2f75f19d20386636e5519b","deployment_mode":"lambda","deployment_type":"custom","brainstore_default":"force","brainstore_can_contain_row_refs":true,"skip_pg_config":"all","has_realtime_wal_bucket":true,"brainstore_wal_footer_version":"v3","brainstore_wal_use_efficient_format":true,"has_logs2":true,"js":true,"universal":true,"code_execution":true,"logs3_payload_max_bytes":5242880,"control_plane_telemetry":["status","metrics","logs","traces","memprof","usage"]}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '551'
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:10 GMT
+      Via:
+      - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 0260b26200cba81bc8e0dc18d51916d8.cloudfront.net
+        (CloudFront)
+      X-Amz-Cf-Id:
+      - Rutphtid1FBgIKTDJbyYk8gNAU97BOpNPtX1xDZMaleONeKJ3s38Iw==
+      X-Amz-Cf-Pop:
+      - IAD55-P9
+      - IAD61-P11
+      X-Amzn-Trace-Id:
+      - Root=1-69cab886-77d423473139f3c74b00cea3;Parent=06a3bc64aa61c425;Sampled=0;Lineage=1:fc3b4ff1:0
+      X-Cache:
+      - Miss from cloudfront
+      access-control-allow-credentials:
+      - 'true'
+      access-control-expose-headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms
+      etag:
+      - W/"227-KBfWb2OrI4L6tO+9HRx1pkjhfrY"
+      vary:
+      - Origin
+      x-amz-apigw-id:
+      - bDHFBHTxIAMErhA=
+      x-amzn-Remapped-content-length:
+      - '551'
+      x-amzn-RequestId:
+      - 51a738b6-ac50-428b-a776-be56711c5922
+      x-bt-internal-trace-id:
+      - 69cab886000000002c579ac6ee72eb06
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"rows": [{"_is_merge": false, "context": {"caller_filename": "/Users/abhijeetprasad/workspace/braintrust-sdk-python/py/.nox/test_pydantic_ai_integration-latest/lib/python3.13/site-packages/pydantic_ai/direct.py",
+      "caller_functionname": "_consume_async_stream", "caller_lineno": 360}, "created":
+      "2026-03-30T17:53:09.592213+00:00", "id": "bdcee637-b6ac-4d83-bd0b-d6d5f559b2e0",
+      "input": {"instrument": null, "messages": [{"instructions": null, "kind": "request",
+      "metadata": null, "parts": [{"content": "Count from 1 to 3", "part_kind": "user-prompt",
+      "timestamp": "2026-03-30 17:53:09.591693+00:00"}], "run_id": null, "timestamp":
+      null}], "model": "openai:gpt-4o-mini", "model_request_parameters": null, "model_settings":
+      null}, "log_id": "g", "metadata": {"model": "gpt-4o-mini", "provider": "openai"},
+      "metrics": {"start": 1774893189.592212}, "project_id": "test-pydantic-ai-integration",
+      "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7", "span_attributes": {"exec_counter":
+      58, "name": "model_request_stream", "type": "llm"}, "span_id": "f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e",
+      "span_parents": ["7b1235d0-d31d-49ed-8df1-6ae33d4dedf7"]},{"_is_merge": false,
+      "context": {"caller_filename": "/Users/abhijeetprasad/workspace/braintrust-sdk-python/py/.nox/test_pydantic_ai_integration-latest/lib/python3.13/site-packages/pydantic_ai/direct.py",
+      "caller_functionname": "_consume_async_stream", "caller_lineno": 360}, "created":
+      "2026-03-30T17:53:09.592300+00:00", "id": "cb2a81f1-d1ce-46eb-b71f-48f94cc77709",
+      "input": {"messages": [{"instructions": null, "kind": "request", "metadata":
+      null, "parts": [{"content": "Count from 1 to 3", "part_kind": "user-prompt",
+      "timestamp": "2026-03-30 17:53:09.591693+00:00"}], "run_id": null, "timestamp":
+      null}]}, "log_id": "g", "metadata": {"model": "gpt-4o-mini", "provider": "openai"},
+      "metrics": {"start": 1774893189.592299}, "project_id": "test-pydantic-ai-integration",
+      "root_span_id": "7b1235d0-d31d-49ed-8df1-6ae33d4dedf7", "span_attributes": {"exec_counter":
+      59, "name": "chat gpt-4o-mini", "type": "llm"}, "span_id": "d47230a5-49a5-4cba-b863-1e479ceac7aa",
+      "span_parents": ["f35d8e02-4e31-4cb8-b8ad-2bb47c382e1e"]}], "api_version": 2}'
+    headers:
+      Accept:
+      - '*/*'
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '2186'
+      User-Agent:
+      - python-requests/2.33.1
+    method: POST
+    uri: https://staging-api.braintrust.dev/logs3
+  response:
+    body:
+      string: '{"Code":"ForbiddenError","Message":"Missing read access to project_log
+        id test-pydantic-ai-integration, or the project_log does not exist [user_email=abhijeet@braintrustdata.com]
+        [user_org=braintrustdata.com] [timestamp=1774893190.508]","InternalTraceId":"69cab886000000002541a41a062b3821","Path":"/logs3","Service":"api"}'
+    headers:
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:10 GMT
+      Via:
+      - 1.1 90aae5d559fbbbe252f6d8de0a8d7ca8.cloudfront.net (CloudFront), 1.1 c2397f8122d12a766778848b1e46618c.cloudfront.net
+        (CloudFront)
+      X-Amz-Cf-Id:
+      - d73rTtpa6LLDwu7hVQKfmSYL_SSkYfPKbkMviRoaI0PhG9JcQdcc0A==
+      X-Amz-Cf-Pop:
+      - IAD55-P9
+      - IAD61-P11
+      X-Amzn-Trace-Id:
+      - Root=1-69cab886-4aa46ad25fe765fa582e040e;Parent=796a754107031849;Sampled=0;Lineage=1:fc3b4ff1:0
+      X-Cache:
+      - Error from cloudfront
+      access-control-allow-credentials:
+      - 'true'
+      access-control-expose-headers:
+      - x-bt-cursor,x-bt-found-existing,x-bt-query-plan,x-bt-api-duration-ms,x-bt-brainstore-duration-ms
+      content-length:
+      - '322'
+      etag:
+      - W/"142-o3U2Y50CsoSuWcGoGfpU85I9aag"
+      vary:
+      - Origin, Accept-Encoding
+      x-amz-apigw-id:
+      - bDHFCF_-IAMEsQQ=
+      x-amzn-RequestId:
+      - 071836c2-6481-4fe4-82c8-63a2f60f3811
+      x-bt-internal-trace-id:
+      - 69cab886000000002541a41a062b3821
+    status:
+      code: 403
+      message: Forbidden
+- request:
+    body: '{"messages":[{"role":"user","content":"Count from 1 to 5"}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true}}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '136'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=VQg4i_utDK73HtVZX9MnimdbFMrcTwHiGTkj8zvaxBM-1766265730198-0.0.1.1-604800000;
+        __cf_bm=K5ScKgWxKV8qrun72h6zWqwqzuox1P7HfwixJPaisaU-1774893183.0866485-1.0.1.1-aFlnpoUkbkAngI0favlhLoCOJtcaN7dUO6bYg0g4jfC.HRhU3s_NrZt7oH01lSi39dR_xL9hFmrQs2o5en0gk0jRe0MRJTasLHnGP6o4.yXI0SZeUn56WYYaGKfOKVTx
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.73.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.30.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"GJC9P5OdK"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Sure"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"1P1UjOI"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"ZlSk142ecs"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        Here"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"4yabYK"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        you"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"grCdnO7"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        go"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Nhi1EPE9"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":":"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"M7DzNi5HGu"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"2IAA3kRV0b"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"1"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RhdCx21T32"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"N2k2EsOSJW"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"KcDwfb76mt"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"2"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"7lR6LaTkCm"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"wdhkGWENkw"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Kv82utB0v1"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"3"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"mr5lS7vtdA"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"s9xOzGrc1a"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"u42PFlFCKS"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"4"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"6FjYL9XiZG"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":","},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"jJ789GMJk1"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        "},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"xaM26X2ccz"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"5"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"VEKM6MTL9y"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"."},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"DWYxA5bszQ"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"ky8MR"}
+
+
+        data: {"id":"chatcmpl-DPB5SpfA9eLCxFYN0uY4KsauBgrVN","object":"chat.completion.chunk","created":1774893190,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":14,"completion_tokens":21,"total_tokens":35,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"jlxaAs828P"}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e48f8e32964076d-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Mon, 30 Mar 2026 17:53:10 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '372'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999992'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_9b03c171263a4211b5037eff9b1a4723
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/wrappers/cassettes/test_tool_execution_creates_spans.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_creates_spans.yaml
similarity index 100%
rename from py/src/braintrust/wrappers/cassettes/test_tool_execution_creates_spans.yaml
rename to py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_creates_spans.yaml
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml
new file mode 100644
index 00000000..5ecd07ac
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_tool_execution_tracing_does_not_depend_on_message_reconstruction.yaml
@@ -0,0 +1,217 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"What''s the weather in Paris?"}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"","parameters":{"additionalProperties":false,"properties":{"city":{"type":"string"}},"required":["city"],"type":"object"},"strict":true}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '372'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000;
+        __cf_bm=epAi6KrcpiRht5_zKqAbs_ZkpcP6bWzSCDoNAhVSQcg-1774973531.7748783-1.0.1.1-eexwuoEVNhxJeFCArlDcDavolPepARox5VsLLlCOfl17u1yyKZAxen8yKFPrew9xF3zVGlK3_FSx59t5p8RKNEk1f83tfxWC6HF_lbbsUvSvr3Wt1mbPvGIdnDbSfyDL
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.66.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.24.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DPVzIVqFFl74aixNWy61pqlKSLtm7\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774973532,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": null,\n        \"tool_calls\": [\n          {\n
+        \           \"id\": \"call_wcgZ1wAdiVEmHWxtAmhLUoz2\",\n            \"type\":
+        \"function\",\n            \"function\": {\n              \"name\": \"get_weather\",\n
+        \             \"arguments\": \"{\\\"city\\\":\\\"Paris\\\"}\"\n            }\n
+        \         }\n        ],\n        \"refusal\": null,\n        \"annotations\":
+        []\n      },\n      \"logprobs\": null,\n      \"finish_reason\": \"tool_calls\"\n
+        \   }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 43,\n    \"completion_tokens\":
+        14,\n    \"total_tokens\": 57,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_e738e3044b\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e50a260ae9436d0-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 31 Mar 2026 16:12:12 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '1084'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '527'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999990'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_f52a7793909d4febb7d8282622d2a13c
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"user","content":"What''s the weather in Paris?"},{"role":"assistant","content":null,"tool_calls":[{"id":"call_wcgZ1wAdiVEmHWxtAmhLUoz2","type":"function","function":{"name":"get_weather","arguments":"{\"city\":\"Paris\"}"}}]},{"role":"tool","tool_call_id":"call_wcgZ1wAdiVEmHWxtAmhLUoz2","content":"It''s
+      sunny in Paris"}],"model":"gpt-4o-mini","max_completion_tokens":200,"stream":false,"tool_choice":"auto","tools":[{"type":"function","function":{"name":"get_weather","description":"","parameters":{"additionalProperties":false,"properties":{"city":{"type":"string"}},"required":["city"],"type":"object"},"strict":true}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '644'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000;
+        __cf_bm=epAi6KrcpiRht5_zKqAbs_ZkpcP6bWzSCDoNAhVSQcg-1774973531.7748783-1.0.1.1-eexwuoEVNhxJeFCArlDcDavolPepARox5VsLLlCOfl17u1yyKZAxen8yKFPrew9xF3zVGlK3_FSx59t5p8RKNEk1f83tfxWC6HF_lbbsUvSvr3Wt1mbPvGIdnDbSfyDL
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.66.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.24.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DPVzJr38yaFUi6ANuIRoeJAjXIBrO\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774973533,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"The weather in Paris is currently sunny.\",\n
+        \       \"refusal\": null,\n        \"annotations\": []\n      },\n      \"logprobs\":
+        null,\n      \"finish_reason\": \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\":
+        69,\n    \"completion_tokens\": 9,\n    \"total_tokens\": 78,\n    \"prompt_tokens_details\":
+        {\n      \"cached_tokens\": 0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_78db9bf1f6\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e50a264add7c8b2-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 31 Mar 2026 16:12:13 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '846'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '429'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999985'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_e05560b30a4e4fabaf1166c1fe903df1
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml
new file mode 100644
index 00000000..aa0b0d07
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/cassettes/test_wrapper_agent_run_is_traced.yaml
@@ -0,0 +1,111 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"user","content":"What is 2+2? Answer with just the
+      number."}],"model":"gpt-4o-mini","max_completion_tokens":50,"stream":false}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate, zstd
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '148'
+      Content-Type:
+      - application/json
+      Cookie:
+      - _cfuvid=tXOZ7vGE2DBF6L6fDg_veKtSaUVC4UPotJDezWYoYXI-1766265191281-0.0.1.1-604800000
+      Host:
+      - api.openai.com
+      User-Agent:
+      - pydantic-ai/1.66.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.24.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: "{\n  \"id\": \"chatcmpl-DPVzHBw20ZdgFxQ8hDmn74NllhQob\",\n  \"object\":
+        \"chat.completion\",\n  \"created\": 1774973531,\n  \"model\": \"gpt-4o-mini-2024-07-18\",\n
+        \ \"choices\": [\n    {\n      \"index\": 0,\n      \"message\": {\n        \"role\":
+        \"assistant\",\n        \"content\": \"4\",\n        \"refusal\": null,\n
+        \       \"annotations\": []\n      },\n      \"logprobs\": null,\n      \"finish_reason\":
+        \"stop\"\n    }\n  ],\n  \"usage\": {\n    \"prompt_tokens\": 20,\n    \"completion_tokens\":
+        1,\n    \"total_tokens\": 21,\n    \"prompt_tokens_details\": {\n      \"cached_tokens\":
+        0,\n      \"audio_tokens\": 0\n    },\n    \"completion_tokens_details\":
+        {\n      \"reasoning_tokens\": 0,\n      \"audio_tokens\": 0,\n      \"accepted_prediction_tokens\":
+        0,\n      \"rejected_prediction_tokens\": 0\n    }\n  },\n  \"service_tier\":
+        \"default\",\n  \"system_fingerprint\": \"fp_ca3e7d71bf\"\n}\n"
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e50a25d9cd94cc4-YYZ
+      Connection:
+      - keep-alive
+      Content-Type:
+      - application/json
+      Date:
+      - Tue, 31 Mar 2026 16:12:12 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      content-length:
+      - '807'
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '377'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=epAi6KrcpiRht5_zKqAbs_ZkpcP6bWzSCDoNAhVSQcg-1774973531.7748783-1.0.1.1-eexwuoEVNhxJeFCArlDcDavolPepARox5VsLLlCOfl17u1yyKZAxen8yKFPrew9xF3zVGlK3_FSx59t5p8RKNEk1f83tfxWC6HF_lbbsUvSvr3Wt1mbPvGIdnDbSfyDL;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Tue, 31 Mar 2026
+        16:42:12 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999987'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_8d78179fe6524c99a93e2eb714045e9c
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/pydantic_ai/integration.py b/py/src/braintrust/integrations/pydantic_ai/integration.py
new file mode 100644
index 00000000..64c689ff
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/integration.py
@@ -0,0 +1,32 @@
+"""Pydantic AI integration orchestration."""
+
+from braintrust.integrations.base import BaseIntegration
+
+from .patchers import (
+    AgentPatcher,
+    DirectModelRequestPatcher,
+    DirectModelRequestStreamPatcher,
+    DirectModelRequestStreamSyncPatcher,
+    DirectModelRequestSyncPatcher,
+    DirectPrepareModelPatcher,
+    StreamedResponseSyncStartProducerPatcher,
+    ToolManagerFunctionToolPatcher,
+)
+
+
+class PydanticAIIntegration(BaseIntegration):
+    """Braintrust instrumentation for Pydantic AI."""
+
+    name = "pydantic_ai"
+    import_names = ("pydantic_ai",)
+    min_version = "1.10.0"
+    patchers = (
+        StreamedResponseSyncStartProducerPatcher,
+        AgentPatcher,
+        DirectPrepareModelPatcher,
+        DirectModelRequestPatcher,
+        DirectModelRequestSyncPatcher,
+        DirectModelRequestStreamPatcher,
+        DirectModelRequestStreamSyncPatcher,
+        ToolManagerFunctionToolPatcher,
+    )
diff --git a/py/src/braintrust/integrations/pydantic_ai/patchers.py b/py/src/braintrust/integrations/pydantic_ai/patchers.py
new file mode 100644
index 00000000..0335fcd7
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/patchers.py
@@ -0,0 +1,201 @@
+"""Pydantic AI patchers."""
+
+import warnings
+from typing import Any, ClassVar
+
+from braintrust.integrations.base import ClassScanPatcher, CompositeFunctionWrapperPatcher, FunctionWrapperPatcher
+
+from .tracing import (
+    _agent_get_model_wrapper,
+    _agent_run_stream_events_wrapper,
+    _agent_run_stream_sync_wrapper,
+    _agent_run_stream_wrapper,
+    _agent_run_sync_wrapper,
+    _agent_run_wrapper,
+    _agent_to_cli_sync_wrapper,
+    _create_direct_model_request_stream_sync_wrapper,
+    _create_direct_model_request_stream_wrapper,
+    _create_direct_model_request_sync_wrapper,
+    _create_direct_model_request_wrapper,
+    _create_start_producer_wrapper,
+    _direct_prepare_model_wrapper,
+    _tool_manager_call_function_tool_wrapper,
+    _tool_manager_execute_function_tool_wrapper,
+    _wrap_concrete_model_class,
+)
+
+
+class _AgentRunPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.run"
+    target_module = "pydantic_ai.agent.abstract"
+    target_path = "AbstractAgent.run"
+    wrapper = _agent_run_wrapper
+
+
+class _AgentRunSyncPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.run_sync"
+    target_module = "pydantic_ai.agent.abstract"
+    target_path = "AbstractAgent.run_sync"
+    wrapper = _agent_run_sync_wrapper
+
+
+class _AgentToCliSyncPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.to_cli_sync"
+    target_module = "pydantic_ai.agent.abstract"
+    target_path = "AbstractAgent.to_cli_sync"
+    wrapper = _agent_to_cli_sync_wrapper
+
+
+class _AgentRunStreamPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.run_stream"
+    target_module = "pydantic_ai.agent.abstract"
+    target_path = "AbstractAgent.run_stream"
+    wrapper = _agent_run_stream_wrapper
+
+
+class _AgentRunStreamSyncPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.run_stream_sync"
+    target_module = "pydantic_ai.agent.abstract"
+    target_path = "AbstractAgent.run_stream_sync"
+    wrapper = _agent_run_stream_sync_wrapper
+
+
+class _AgentRunStreamEventsPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.run_stream_events"
+    target_module = "pydantic_ai.agent.abstract"
+    target_path = "AbstractAgent.run_stream_events"
+    wrapper = _agent_run_stream_events_wrapper
+
+
+class _AgentGetModelPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.agent.get_model"
+    target_module = "pydantic_ai"
+    target_path = "Agent._get_model"
+    wrapper = _agent_get_model_wrapper
+
+
+class AgentPatcher(CompositeFunctionWrapperPatcher):
+    """Patch Pydantic AI agent entrypoints for tracing."""
+
+    name = "pydantic_ai.agent"
+    sub_patchers = (
+        _AgentRunPatcher,
+        _AgentRunSyncPatcher,
+        _AgentToCliSyncPatcher,
+        _AgentRunStreamPatcher,
+        _AgentRunStreamSyncPatcher,
+        _AgentRunStreamEventsPatcher,
+        _AgentGetModelPatcher,
+    )
+
+
+class DirectPrepareModelPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.direct.prepare_model"
+    target_module = "pydantic_ai.direct"
+    target_path = "_prepare_model"
+    wrapper = _direct_prepare_model_wrapper
+
+
+class DirectModelRequestPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.direct.model_request"
+    target_module = "pydantic_ai.direct"
+    target_path = "model_request"
+    wrapper = _create_direct_model_request_wrapper()
+
+
+class DirectModelRequestSyncPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.direct.model_request_sync"
+    target_module = "pydantic_ai.direct"
+    target_path = "model_request_sync"
+    wrapper = _create_direct_model_request_sync_wrapper()
+
+
+class DirectModelRequestStreamPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.direct.model_request_stream"
+    target_module = "pydantic_ai.direct"
+    target_path = "model_request_stream"
+    wrapper = _create_direct_model_request_stream_wrapper()
+
+
+class DirectModelRequestStreamSyncPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.direct.model_request_stream_sync"
+    target_module = "pydantic_ai.direct"
+    target_path = "model_request_stream_sync"
+    wrapper = _create_direct_model_request_stream_sync_wrapper()
+
+
+class StreamedResponseSyncStartProducerPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.direct.streamed_response_sync.start_producer"
+    target_module = "pydantic_ai.direct"
+    target_path = "StreamedResponseSync._start_producer"
+    wrapper = _create_start_producer_wrapper()
+    priority: ClassVar[int] = 50
+
+
+class _ToolManagerExecuteFunctionToolPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.tool_manager.execute_function_tool"
+    target_module = "pydantic_ai._tool_manager"
+    target_path = "ToolManager._execute_function_tool_call"
+    wrapper = _tool_manager_execute_function_tool_wrapper
+
+
+class _ToolManagerCallFunctionToolPatcher(FunctionWrapperPatcher):
+    name = "pydantic_ai.tool_manager.call_function_tool"
+    target_module = "pydantic_ai._tool_manager"
+    target_path = "ToolManager._call_function_tool"
+    wrapper = _tool_manager_call_function_tool_wrapper
+    superseded_by = (_ToolManagerExecuteFunctionToolPatcher,)
+
+
+class ToolManagerFunctionToolPatcher(CompositeFunctionWrapperPatcher):
+    name = "pydantic_ai.tool_manager"
+    sub_patchers = (
+        _ToolManagerExecuteFunctionToolPatcher,
+        _ToolManagerCallFunctionToolPatcher,
+    )
+
+
+def wrap_agent(Agent: Any) -> Any:
+    return AgentPatcher.wrap_target(Agent)
+
+
+class ModelClassesPatcher(ClassScanPatcher):
+    """Deprecated compatibility fallback for model subclass scanning.
+
+    Normal setup now wraps resolved models via ``Agent._get_model`` and
+    ``pydantic_ai.direct._prepare_model`` instead of relying on subclass scans.
+    """
+
+    name = "pydantic_ai.models"
+    priority: ClassVar[int] = 200
+    target_module = "pydantic_ai.models"
+    root_class_path = "Model"
+
+    patch_class = staticmethod(_wrap_concrete_model_class)
+
+
+def wrap_model_class(model_class: Any) -> Any:
+    warnings.warn(
+        "wrap_model_class() is deprecated and no longer needed for normal setup. "
+        "setup_pydantic_ai() now wraps models at runtime via model resolution seams.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    if ModelClassesPatcher.has_patch_marker(model_class):
+        return model_class
+    _wrap_concrete_model_class(model_class)
+    ModelClassesPatcher.mark_patched(model_class)
+    return model_class
+
+
+def wrap_model_classes() -> bool:
+    """Deprecated compatibility shim for scanning currently loaded model subclasses."""
+    warnings.warn(
+        "wrap_model_classes() is deprecated and no longer needed. "
+        "setup_pydantic_ai() now wraps models at runtime via model resolution seams.",
+        DeprecationWarning,
+        stacklevel=2,
+    )
+    if not ModelClassesPatcher.applies(None, None):
+        return False
+    return ClassScanPatcher.patch.__func__(ModelClassesPatcher, None, None)
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
similarity index 89%
rename from py/src/braintrust/wrappers/test_pydantic_ai_integration.py
rename to py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
index 5dc1007c..c6142505 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_integration.py
+++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_integration.py
@@ -5,6 +5,7 @@
 import asyncio
 import inspect
 import time
+from pathlib import Path
 
 import pytest
 from braintrust import logger, setup_pydantic_ai, traced
@@ -22,6 +23,11 @@
 TEST_PROMPT = "What is 2+2? Answer with just the number."
 
 
+@pytest.fixture(scope="module")
+def vcr_cassette_dir():
+    return str(Path(__file__).resolve().parent / "cassettes")
+
+
 @pytest.fixture(scope="module", autouse=True)
 def setup_wrapper():
     """Setup pydantic_ai wrapper before any tests run."""
@@ -67,6 +73,36 @@ def _assert_metrics_are_valid(metrics, start, end):
         assert metrics["completion_tokens"] > 0
 
 
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_direct_model_request_creates_nested_chat_span_without_class_scan(memory_logger, direct):
+    """Direct calls should resolve and wrap models via _prepare_model, not class scanning."""
+    assert not memory_logger.pop()
+
+    messages = [ModelRequest(parts=[UserPromptPart(content=TEST_PROMPT)])]
+
+    start = time.time()
+    response = await direct.model_request(model=MODEL, messages=messages)
+    end = time.time()
+
+    assert response.parts
+    assert "4" in str(response.parts[0].content)
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2, f"Expected at least 2 spans (model_request + chat), got {len(spans)}"
+
+    direct_span = next((s for s in spans if s["span_attributes"]["name"] == "model_request"), None)
+    chat_span = next((s for s in spans if "chat" in s["span_attributes"]["name"]), None)
+
+    assert direct_span is not None, "model_request span not found"
+    assert chat_span is not None, "chat span not found"
+    assert chat_span["span_parents"] == [direct_span["span_id"]]
+    assert chat_span["metadata"]["model"] == "gpt-4o-mini"
+    assert chat_span["metadata"]["provider"] == "openai"
+    _assert_metrics_are_valid(direct_span["metrics"], start, end)
+    _assert_metrics_are_valid(chat_span["metrics"], start, end)
+
+
 @pytest.mark.vcr
 @pytest.mark.asyncio
 async def test_agent_run_async(memory_logger):
@@ -117,6 +153,37 @@ async def test_agent_run_async(memory_logger):
     assert agent_span["metrics"]["completion_tokens"] > 0
 
 
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_wrapper_agent_run_is_traced(memory_logger):
+    """WrapperAgent inherits AbstractAgent methods and should be traced by setup()."""
+    from pydantic_ai.agent.wrapper import WrapperAgent
+
+    assert not memory_logger.pop()
+
+    wrapped = WrapperAgent(Agent(MODEL, name="wrapped-agent", model_settings=ModelSettings(max_tokens=50)))
+
+    start = time.time()
+    result = await wrapped.run(TEST_PROMPT)
+    end = time.time()
+
+    assert result.output
+    assert "4" in str(result.output)
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2, f"Expected at least 2 spans (agent_run + chat), got {len(spans)}"
+
+    agent_span = next((s for s in spans if "agent_run" in s["span_attributes"]["name"]), None)
+    chat_span = next((s for s in spans if "chat" in s["span_attributes"]["name"]), None)
+
+    assert agent_span is not None, "agent_run span not found"
+    assert chat_span is not None, "chat span not found"
+    assert agent_span["span_attributes"]["name"] == "agent_run [wrapped-agent]"
+    assert chat_span["span_parents"] == [agent_span["span_id"]]
+    _assert_metrics_are_valid(agent_span["metrics"], start, end)
+    _assert_metrics_are_valid(chat_span["metrics"], start, end)
+
+
 @pytest.mark.vcr
 def test_agent_run_sync(memory_logger):
     """Test Agent.run_sync() synchronous method."""
@@ -703,270 +770,6 @@ async def test_direct_model_request_stream_complete_output(memory_logger, direct
     assert len(spans) >= 1
 
 
-@pytest.mark.vcr
-@pytest.mark.asyncio
-async def test_direct_api_streaming_call_3(memory_logger, direct):
-    """Test direct API streaming (call 3) - should output complete '1, 2, 3, 4, 5'."""
-    assert not memory_logger.pop()
-
-    IDENTICAL_PROMPT = "Count from 1 to 5."
-    messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])]
-
-    collected_text = ""
-    async with direct.model_request_stream(
-        model="openai:gpt-4o", messages=messages, model_settings=ModelSettings(max_tokens=100)
-    ) as stream:
-        async for chunk in stream:
-            # FIX: Handle PartStartEvent which contains initial text
-            if hasattr(chunk, "part") and hasattr(chunk.part, "content"):
-                collected_text += str(chunk.part.content)
-            # Handle PartDeltaEvent with delta content
-            elif hasattr(chunk, "delta") and chunk.delta:
-                if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta:
-                    collected_text += chunk.delta.content_delta
-
-    # Now this should pass!
-    assert "1" in collected_text, f"Expected '1' in output but got: {collected_text}"
-    assert "2" in collected_text
-    assert "3" in collected_text
-    assert "4" in collected_text
-    assert "5" in collected_text
-
-
-@pytest.mark.vcr
-@pytest.mark.asyncio
-async def test_direct_api_streaming_call_4(memory_logger, direct):
-    """Test direct API streaming (call 4) - identical to call 3."""
-    assert not memory_logger.pop()
-
-    IDENTICAL_PROMPT = "Count from 1 to 5."
-    messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])]
-
-    collected_text = ""
-    async with direct.model_request_stream(
-        model="openai:gpt-4o", messages=messages, model_settings=ModelSettings(max_tokens=100)
-    ) as stream:
-        async for chunk in stream:
-            # FIX: Handle PartStartEvent which contains initial text
-            if hasattr(chunk, "part") and hasattr(chunk.part, "content"):
-                collected_text += str(chunk.part.content)
-            # Handle PartDeltaEvent with delta content
-            elif hasattr(chunk, "delta") and chunk.delta:
-                if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta:
-                    collected_text += chunk.delta.content_delta
-
-    # Now this should pass!
-    assert "1" in collected_text, f"Expected '1' in output but got: {collected_text}"
-
-
-@pytest.mark.vcr
-@pytest.mark.asyncio
-async def test_direct_api_streaming_early_break_call_5(memory_logger, direct):
-    """Test direct API streaming with early break (call 5) - should still get first few chars including '1'."""
-    assert not memory_logger.pop()
-
-    IDENTICAL_PROMPT = "Count from 1 to 5."
-    messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])]
-
-    collected_text = ""
-    i = 0
-    async with direct.model_request_stream(
-        model="openai:gpt-4o", messages=messages, model_settings=ModelSettings(max_tokens=100)
-    ) as stream:
-        async for chunk in stream:
-            # FIX: Handle PartStartEvent which contains initial text
-            if hasattr(chunk, "part") and hasattr(chunk.part, "content"):
-                collected_text += str(chunk.part.content)
-            # Handle PartDeltaEvent with delta content
-            elif hasattr(chunk, "delta") and chunk.delta:
-                if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta:
-                    collected_text += chunk.delta.content_delta
-
-            i += 1
-            if i >= 3:
-                break
-
-    # Even with early break after 3 chunks, we should capture text from PartStartEvent (chunk 1)
-    print(f"Collected text: '{collected_text}'")
-    assert len(collected_text) > 0, f"Expected some text even with early break but got empty string"
-    # Verify we're capturing PartStartEvent by checking we got text before breaking at chunk 3
-    assert collected_text, f"Should have captured text from PartStartEvent or first delta"
-
-
-@pytest.mark.vcr
-@pytest.mark.asyncio
-async def test_direct_api_streaming_no_duplication(memory_logger, direct):
-    """Test that direct API streaming doesn't duplicate output and captures all text in span."""
-    assert not memory_logger.pop()
-
-    collected_text = ""
-    chunk_count = 0
-
-    # Use direct API streaming
-    messages = [ModelRequest(parts=[UserPromptPart(content="Count from 1 to 5, separated by commas.")])]
-    async with direct.model_request_stream(
-        messages=messages,
-        model_settings=ModelSettings(max_tokens=100),
-        model="openai:gpt-4o",
-    ) as response:
-        async for chunk in response:
-            chunk_count += 1
-            # Extract text from chunk
-            text = None
-            if hasattr(chunk, "part") and hasattr(chunk.part, "content"):
-                text = str(chunk.part.content)
-            elif hasattr(chunk, "delta") and chunk.delta:
-                if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta:
-                    text = chunk.delta.content_delta
-
-            if text:
-                collected_text += text
-
-    print(f"Collected text from stream: '{collected_text}'")
-    print(f"Total chunks: {chunk_count}")
-
-    # Verify we collected complete text
-    assert len(collected_text) > 0, "Should have collected text from stream"
-    assert "1" in collected_text, "Should have '1' in output"
-
-    # Check span captured the full output
-    spans = memory_logger.pop()
-    assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}"
-
-    # Find the model_request_stream span
-    stream_span = next((s for s in spans if "model_request_stream" in s["span_attributes"]["name"]), None)
-    assert stream_span is not None, "model_request_stream span not found"
-
-    # Check that span output contains the full text, not just "1,"
-    span_output = stream_span.get("output", {})
-    print(f"Span output: {span_output}")
-
-    # The span should capture the full response
-    if "response" in span_output and "parts" in span_output["response"]:
-        parts = span_output["response"]["parts"]
-        span_text = "".join(str(p.get("content", "")) for p in parts if isinstance(p, dict))
-        print(f"Span captured text: '{span_text}'")
-        # Should have more than just "1,"
-        assert len(span_text) > 2, f"Span should capture more than just '1,', got: '{span_text}'"
-        assert "1" in span_text, "Span should contain '1'"
-
-
-@pytest.mark.vcr
-@pytest.mark.asyncio
-async def test_direct_api_streaming_no_duplication_comprehensive(memory_logger, direct):
-    """Comprehensive test matching golden test setup to verify no duplication and full output capture."""
-    assert not memory_logger.pop()
-
-    # Match golden test exactly
-    IDENTICAL_PROMPT = "Count from 1 to 5."
-    IDENTICAL_SETTINGS = ModelSettings(max_tokens=100)
-
-    messages = [ModelRequest(parts=[UserPromptPart(content=IDENTICAL_PROMPT)])]
-
-    collected_text = ""
-    chunk_types = []
-    seen_delta = False
-
-    async with direct.model_request_stream(
-        messages=messages, model_settings=IDENTICAL_SETTINGS, model="openai:gpt-4o"
-    ) as stream:
-        async for chunk in stream:
-            # Track chunk types
-            if hasattr(chunk, "part") and hasattr(chunk.part, "content") and not seen_delta:
-                chunk_types.append(("PartStartEvent", str(chunk.part.content)))
-                text = str(chunk.part.content)
-                collected_text += text
-            elif hasattr(chunk, "delta") and chunk.delta:
-                seen_delta = True
-                if hasattr(chunk.delta, "content_delta") and chunk.delta.content_delta:
-                    chunk_types.append(("PartDeltaEvent", chunk.delta.content_delta))
-                    text = chunk.delta.content_delta
-                    collected_text += text
-
-    print(f"\nCollected text: '{collected_text}'")
-    print(f"Total chunks received: {len(chunk_types)}")
-    print(f"All chunk types:")
-    for i, (chunk_type, content) in enumerate(chunk_types):
-        print(f"  {i}: {chunk_type} = {content!r}")
-
-    # Verify no duplication in collected text
-    # Expected: "Sure! Here you go:\n\n1, 2, 3, 4, 5." or similar (length ~30)
-    # Should NOT be duplicated
-    assert len(collected_text) < 60, (
-        f"Text seems duplicated (too long): '{collected_text}' (len={len(collected_text)})"
-    )
-    assert collected_text.count("1, 2, 3") == 1, f"Text should appear once, not duplicated: '{collected_text}'"
-
-    # Check span
-    spans = memory_logger.pop()
-    print(f"Number of spans: {len(spans)}")
-    for i, s in enumerate(spans):
-        print(f"Span {i}: {s['span_attributes']['name']} (type: {s['span_attributes'].get('type', 'N/A')})")
-        if "span_parents" in s and s["span_parents"]:
-            print(f"  Parents: {s['span_parents']}")
-
-    # Should have 1 or 2 spans (direct API wrapper + potentially model wrapper)
-    assert len(spans) >= 1, f"Expected at least 1 span, got {len(spans)}"
-
-    # Find the model_request_stream span
-    stream_span = next((s for s in spans if "model_request_stream" in s["span_attributes"]["name"]), None)
-    assert stream_span is not None, "model_request_stream span not found"
-
-    # Check that span output is not empty and captures reasonable amount of text
-    span_output = stream_span.get("output", {})
-    print(f"Span output keys: {span_output.keys() if span_output else 'None'}")
-
-    if "parts" in span_output:
-        parts = span_output.get("parts", [])
-        print(f"Span parts: {parts}")
-        if parts and len(parts) > 0:
-            first_part = parts[0]
-            print(f"First part type: {type(first_part)}")
-            print(f"First part: {first_part}")
-            if isinstance(first_part, dict):
-                part_content = first_part.get("content", "")
-                print(f"Part content: '{part_content}'")
-                print(f"Part content length: {len(part_content)}")
-                # The span should capture the FULL text, not just "1,"
-                assert len(part_content) > 5, f"Span should capture full text, got: '{part_content}'"
-
-
-@pytest.mark.vcr
-@pytest.mark.asyncio
-async def test_async_generator_pattern_call_6(memory_logger):
-    """Test async generator pattern (call 6) - wrapping stream in async generator."""
-    assert not memory_logger.pop()
-
-    IDENTICAL_PROMPT = "Count from 1 to 5."
-
-    async def stream_with_async_generator(prompt: str):
-        """Wrap the stream in an async generator (customer pattern)."""
-        agent = Agent("openai:gpt-4o", model_settings=ModelSettings(max_tokens=100))
-        async for event in agent.run_stream_events(prompt):
-            yield event
-
-    collected_text = ""
-    i = 0
-    async for event in stream_with_async_generator(IDENTICAL_PROMPT):
-        # run_stream_events returns ResultEvent objects with different structure
-        # Try to extract text from whatever event type we get
-        if hasattr(event, "content") and event.content:
-            collected_text += str(event.content)
-        elif hasattr(event, "part") and hasattr(event.part, "content"):
-            collected_text += str(event.part.content)
-        elif hasattr(event, "delta") and event.delta:
-            if hasattr(event.delta, "content_delta") and event.delta.content_delta:
-                collected_text += event.delta.content_delta
-
-        i += 1
-        if i >= 3:
-            break
-
-    # This should capture something
-    print(f"Collected text from generator: '{collected_text}'")
-    assert len(collected_text) > 0, f"Expected some text from async generator but got empty string"
-
-
 @pytest.mark.vcr
 @pytest.mark.asyncio
 async def test_agent_structured_output(memory_logger):
@@ -1959,6 +1762,42 @@ def calculate(operation: str, a: float, b: float) -> str:
     assert calc_tool_span["span_parents"] == [agent_span["span_id"]], "tool span should be nested under agent_run"
 
 
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_tool_execution_tracing_does_not_depend_on_message_reconstruction(memory_logger, monkeypatch):
+    """Real tool execution spans should be emitted even if message reconstruction is unavailable."""
+    from braintrust.integrations.pydantic_ai import tracing as pydantic_ai_tracing
+
+    assert not memory_logger.pop()
+
+    def fail_if_called(result):
+        raise AssertionError("message-based tool span reconstruction should not run")
+
+    monkeypatch.setattr(pydantic_ai_tracing, "_create_tool_spans_from_messages_impl", fail_if_called)
+
+    agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=200))
+
+    @agent.tool_plain
+    def get_weather(city: str) -> str:
+        return f"It's sunny in {city}"
+
+    result = await agent.run("What's the weather in Paris?")
+
+    assert result.output
+    assert "Paris" in str(result.output) or "sunny" in str(result.output)
+
+    spans = memory_logger.pop()
+    agent_span = next((s for s in spans if "agent_run" in s["span_attributes"]["name"]), None)
+    tool_span = next((s for s in spans if s["span_attributes"].get("name") == "get_weather"), None)
+
+    assert agent_span is not None, "agent_run span not found"
+    assert tool_span is not None, "runtime tool span not found"
+    assert tool_span["span_attributes"]["type"] == SpanTypeAttribute.TOOL
+    assert tool_span["span_parents"] == [agent_span["span_id"]]
+    assert tool_span["metadata"].get("tool_call_id")
+    assert tool_span["metrics"]["duration"] >= 0
+
+
 @pytest.mark.vcr
 def test_tool_execution_creates_spans(memory_logger):
     """Test that executing tools with agents works and creates traced spans."""
@@ -2029,7 +1868,7 @@ def test_agent_tool_metadata_extraction(memory_logger):
 
     Principle: If agent.run() accepts it, it goes in input only.
     """
-    from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata
+    from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata
 
     agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=100))
 
@@ -2110,7 +1949,7 @@ def search_database(query: str, limit: int = 10) -> str:
 
 def test_agent_without_tools_metadata():
     """Test metadata extraction for agent without tools."""
-    from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata
+    from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata
 
     # Agent with no tools
     agent = Agent(MODEL, model_settings=ModelSettings(max_tokens=50))
@@ -2127,7 +1966,7 @@ def test_agent_without_tools_metadata():
 
 def test_agent_tool_with_custom_name():
     """Test that tools with custom names are properly extracted with schemas in input."""
-    from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata
+    from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata
 
     agent = Agent(MODEL)
 
@@ -2162,7 +2001,7 @@ def calc(a: int, b: int) -> int:
 
 def test_explicit_toolsets_kwarg_in_input():
     """Test that explicitly passed toolsets kwarg goes to input (not just metadata)."""
-    from braintrust.wrappers.pydantic_ai import _build_agent_input_and_metadata
+    from braintrust.integrations.pydantic_ai.tracing import _build_agent_input_and_metadata
 
     agent = Agent(MODEL)
 
@@ -2216,7 +2055,7 @@ def test_reasoning_tokens_extraction(memory_logger):
     mock_response.usage.details.reasoning_tokens = 128
 
     # Test the metric extraction function directly
-    from braintrust.wrappers.pydantic_ai import _extract_response_metrics
+    from braintrust.integrations.pydantic_ai.tracing import _extract_response_metrics
 
     start_time = time.time()
     end_time = start_time + 5.0
@@ -2359,14 +2198,85 @@ class name (e.g., 'OpenAIChatModel') rather than str(instance) which
     _assert_metrics_are_valid(chat_span["metrics"], start, end)
 
 
+def test_model_classes_patcher_marker_check_is_mro_safe():
+    from braintrust.integrations.pydantic_ai.patchers import ModelClassesPatcher
+
+    class WrapperModel:
+        pass
+
+    class InstrumentedModel(WrapperModel):
+        pass
+
+    ModelClassesPatcher.mark_patched(WrapperModel)
+
+    assert ModelClassesPatcher.has_patch_marker(WrapperModel) is True
+    assert ModelClassesPatcher.has_patch_marker(InstrumentedModel) is False
+
+
+def test_wrap_model_class_is_idempotent():
+    from braintrust.integrations.pydantic_ai.patchers import ModelClassesPatcher, wrap_model_class
+
+    class DummyModel:
+        async def request(self, *args, **kwargs):
+            return None
+
+        def request_stream(self, *args, **kwargs):
+            return iter(())
+
+    with pytest.deprecated_call(match=r"wrap_model_class\(\) is deprecated"):
+        wrap_model_class(DummyModel)
+    first_request = DummyModel.__dict__["request"]
+    first_request_stream = DummyModel.__dict__["request_stream"]
+
+    assert ModelClassesPatcher.has_patch_marker(DummyModel) is True
+
+    with pytest.deprecated_call(match=r"wrap_model_class\(\) is deprecated"):
+        wrap_model_class(DummyModel)
+
+    assert DummyModel.__dict__["request"] is first_request
+    assert DummyModel.__dict__["request_stream"] is first_request_stream
+
+
+def test_wrap_model_classes_is_deprecated(monkeypatch):
+    from braintrust.integrations.pydantic_ai.patchers import wrap_model_classes
+
+    monkeypatch.setattr(
+        "braintrust.integrations.pydantic_ai.patchers.ModelClassesPatcher.applies", lambda *_args, **_kwargs: False
+    )
+
+    with pytest.deprecated_call(match=r"wrap_model_classes\(\) is deprecated"):
+        assert wrap_model_classes() is False
+
+
+def test_setup_pydantic_ai_is_idempotent_across_new_patch_points():
+    import pydantic_ai._tool_manager as tool_manager_module
+    import pydantic_ai.direct as direct_module
+    from braintrust.integrations.pydantic_ai.integration import PydanticAIIntegration
+    from pydantic_ai.agent.abstract import AbstractAgent
+
+    run = AbstractAgent.__dict__["run"]
+    prepare_model = direct_module.__dict__["_prepare_model"]
+    tool_method_name = (
+        "_execute_function_tool_call"
+        if "_execute_function_tool_call" in tool_manager_module.ToolManager.__dict__
+        else "_call_function_tool"
+    )
+    tool_method = tool_manager_module.ToolManager.__dict__[tool_method_name]
+
+    assert PydanticAIIntegration.setup() is True
+    assert AbstractAgent.__dict__["run"] is run
+    assert direct_module.__dict__["_prepare_model"] is prepare_model
+    assert tool_manager_module.ToolManager.__dict__[tool_method_name] is tool_method
+
+
 def test_serialize_content_part_with_binary_content():
     """Unit test to verify _serialize_content_part handles BinaryContent correctly.
 
     This tests the direct serialization of BinaryContent objects and verifies
     they are converted to Braintrust Attachment objects.
     """
+    from braintrust.integrations.pydantic_ai.tracing import _serialize_content_part
     from braintrust.logger import Attachment
-    from braintrust.wrappers.pydantic_ai import _serialize_content_part
     from pydantic_ai.models.function import BinaryContent
 
     # Test 1: Direct BinaryContent serialization
@@ -2389,8 +2299,8 @@ def test_serialize_content_part_with_user_prompt_part():
     containing BinaryContent, we need to recursively serialize the content items
     so that BinaryContent is converted to Braintrust Attachment.
     """
+    from braintrust.integrations.pydantic_ai.tracing import _serialize_content_part
     from braintrust.logger import Attachment
-    from braintrust.wrappers.pydantic_ai import _serialize_content_part
     from pydantic_ai.messages import UserPromptPart
     from pydantic_ai.models.function import BinaryContent
 
@@ -2430,8 +2340,8 @@ def test_serialize_messages_with_binary_content():
     This tests the full message serialization path that's used for the chat span,
     ensuring that nested BinaryContent in UserPromptPart is properly converted.
     """
+    from braintrust.integrations.pydantic_ai.tracing import _serialize_messages
     from braintrust.logger import Attachment
-    from braintrust.wrappers.pydantic_ai import _serialize_messages
     from pydantic_ai.messages import ModelRequest, UserPromptPart
     from pydantic_ai.models.function import BinaryContent
 
@@ -2488,7 +2398,7 @@ async def test_streaming_wrappers_capture_time_to_first_token():
     """
     from unittest.mock import AsyncMock, MagicMock, Mock
 
-    from braintrust.wrappers.pydantic_ai import (
+    from braintrust.integrations.pydantic_ai.tracing import (
         _AgentStreamResultSyncProxy,
         _AgentStreamWrapper,
         _DirectStreamIteratorProxy,
@@ -2846,7 +2756,7 @@ def _async_producer(self):
 
 def test_start_producer_wrapper_exception_does_not_double_invoke_producer():
     """Regression test: producer exceptions must not trigger a second producer call."""
-    from braintrust.wrappers.pydantic_ai import _create_start_producer_wrapper
+    from braintrust.integrations.pydantic_ai.tracing import _create_start_producer_wrapper
 
     class StreamLike:
         def __init__(self):
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_logfire.py
similarity index 94%
rename from py/src/braintrust/wrappers/test_pydantic_ai_logfire.py
rename to py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_logfire.py
index 661b7bf7..6a0caa98 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_logfire.py
+++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_logfire.py
@@ -6,6 +6,7 @@
 """
 
 import time
+from pathlib import Path
 
 import pytest
 from braintrust import logger, setup_pydantic_ai
@@ -19,6 +20,11 @@
 TEST_PROMPT = "What is 2+2? Answer with just the number."
 
 
+@pytest.fixture(scope="module")
+def vcr_cassette_dir():
+    return str(Path(__file__).resolve().parent / "cassettes")
+
+
 @pytest.fixture(scope="module", autouse=True)
 def setup_wrapper():
     """Setup pydantic_ai wrapper and logfire before any tests run."""
diff --git a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py
similarity index 97%
rename from py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py
rename to py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py
index c1dfceb3..3c089f01 100644
--- a/py/src/braintrust/wrappers/test_pydantic_ai_wrap_openai.py
+++ b/py/src/braintrust/integrations/pydantic_ai/test_pydantic_ai_wrap_openai.py
@@ -1,4 +1,5 @@
 import time
+from pathlib import Path
 from typing import Any, Dict
 
 import pytest
@@ -27,6 +28,11 @@
 TEST_PROMPT = "What is the capital of Italy?"
 
 
+@pytest.fixture(scope="module")
+def vcr_cassette_dir():
+    return str(Path(__file__).resolve().parent / "cassettes")
+
+
 def get_pydantic_agents_client(model_name: str, client: AsyncOpenAI):
     _provider = OpenAIProvider(openai_client=client)
     return OpenAIModelClass(model_name, provider=_provider)
diff --git a/py/src/braintrust/integrations/pydantic_ai/tracing.py b/py/src/braintrust/integrations/pydantic_ai/tracing.py
new file mode 100644
index 00000000..c4eaf991
--- /dev/null
+++ b/py/src/braintrust/integrations/pydantic_ai/tracing.py
@@ -0,0 +1,1478 @@
+import asyncio
+import contextvars
+import logging
+import sys
+import time
+from contextlib import AbstractAsyncContextManager
+from typing import Any
+
+from braintrust.bt_json import bt_safe_deep_copy
+from braintrust.logger import Attachment, start_span
+from braintrust.span_types import SpanTypeAttribute
+from wrapt import wrap_function_wrapper
+
+
+logger = logging.getLogger(__name__)
+_tool_trace_state: contextvars.ContextVar[list[int] | None] = contextvars.ContextVar(
+    "braintrust_pydantic_ai_tool_trace_state", default=None
+)
+
+
+def wrap_agent(Agent: Any) -> Any:
+    from .patchers import AgentPatcher  # pylint: disable=import-outside-toplevel
+
+    return AgentPatcher.wrap_target(Agent)
+
+
+def _wrap_model_instance(model: Any) -> Any:
+    """Ensure a resolved model class is wrapped exactly once."""
+    if model is None:
+        return model
+
+    from .patchers import wrap_model_class  # pylint: disable=import-outside-toplevel
+
+    wrap_model_class(type(model))
+    return model
+
+
+def _agent_get_model_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    return _wrap_model_instance(wrapped(*args, **kwargs))
+
+
+def _direct_prepare_model_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    return _wrap_model_instance(wrapped(*args, **kwargs))
+
+
+def _start_tool_trace_capture() -> Any:
+    return _tool_trace_state.set([0])
+
+
+def _reset_tool_trace_capture(token: Any) -> None:
+    _tool_trace_state.reset(token)
+
+
+def _mark_tool_span_emitted() -> None:
+    state = _tool_trace_state.get()
+    if state is not None:
+        state[0] += 1
+
+
+def _maybe_create_tool_spans_from_messages(result: Any) -> None:
+    state = _tool_trace_state.get()
+    if state is not None and state[0] > 0:
+        return
+    _create_tool_spans_from_messages(result)
+
+
+async def _agent_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
+
+    with start_span(
+        name=f"agent_run [{instance.name}]" if hasattr(instance, "name") and instance.name else "agent_run",
+        type=SpanTypeAttribute.LLM,
+        input=input_data if input_data else None,
+        metadata=metadata,
+    ) as agent_span:
+        tool_trace_token = _start_tool_trace_capture()
+        try:
+            start_time = time.time()
+            result = await wrapped(*args, **kwargs)
+            end_time = time.time()
+
+            _maybe_create_tool_spans_from_messages(result)
+
+            output = _serialize_result_output(result)
+            metrics = _extract_usage_metrics(result, start_time, end_time)
+
+            agent_span.log(output=output, metrics=metrics)
+            return result
+        finally:
+            _reset_tool_trace_capture(tool_trace_token)
+
+
+def _agent_run_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
+
+    with start_span(
+        name=f"agent_run_sync [{instance.name}]" if hasattr(instance, "name") and instance.name else "agent_run_sync",
+        type=SpanTypeAttribute.LLM,
+        input=input_data if input_data else None,
+        metadata=metadata,
+    ) as agent_span:
+        tool_trace_token = _start_tool_trace_capture()
+        try:
+            start_time = time.time()
+            result = wrapped(*args, **kwargs)
+            end_time = time.time()
+
+            _maybe_create_tool_spans_from_messages(result)
+
+            output = _serialize_result_output(result)
+            metrics = _extract_usage_metrics(result, start_time, end_time)
+
+            agent_span.log(output=output, metrics=metrics)
+            return result
+        finally:
+            _reset_tool_trace_capture(tool_trace_token)
+
+
+def _agent_to_cli_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
+
+    with start_span(
+        name=f"agent_to_cli_sync [{instance.name}]"
+        if hasattr(instance, "name") and instance.name
+        else "agent_to_cli_sync",
+        type=SpanTypeAttribute.LLM,
+        input=input_data if input_data else None,
+        metadata=metadata,
+    ) as agent_span:
+        start_time = time.time()
+        result = wrapped(*args, **kwargs)
+        end_time = time.time()
+        agent_span.log(metrics={"start": start_time, "end": end_time, "duration": end_time - start_time})
+        return result
+
+
+def _agent_run_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
+    agent_name = instance.name if hasattr(instance, "name") else None
+    span_name = f"agent_run_stream [{agent_name}]" if agent_name else "agent_run_stream"
+
+    return _AgentStreamWrapper(
+        wrapped(*args, **kwargs),
+        span_name,
+        input_data,
+        metadata,
+    )
+
+
+def _agent_run_stream_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
+    agent_name = instance.name if hasattr(instance, "name") else None
+    span_name = f"agent_run_stream_sync [{agent_name}]" if agent_name else "agent_run_stream_sync"
+
+    # Create span context BEFORE calling wrapped function so internal spans nest under it
+    span_cm = start_span(
+        name=span_name,
+        type=SpanTypeAttribute.LLM,
+        input=input_data if input_data else None,
+        metadata=metadata,
+    )
+    span = span_cm.__enter__()
+    tool_trace_token = _start_tool_trace_capture()
+    start_time = time.time()
+
+    try:
+        # Call the original function within the span context
+        stream_result = wrapped(*args, **kwargs)
+        return _AgentStreamResultSyncProxy(
+            stream_result,
+            span,
+            span_cm,
+            start_time,
+            tool_trace_token,
+        )
+    except Exception:
+        # Clean up span on error
+        _reset_tool_trace_capture(tool_trace_token)
+        span_cm.__exit__(*sys.exc_info())
+        raise
+
+
+async def _agent_run_stream_events_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
+
+    agent_name = instance.name if hasattr(instance, "name") else None
+    span_name = f"agent_run_stream_events [{agent_name}]" if agent_name else "agent_run_stream_events"
+
+    with start_span(
+        name=span_name,
+        type=SpanTypeAttribute.LLM,
+        input=input_data if input_data else None,
+        metadata=metadata,
+    ) as agent_span:
+        tool_trace_token = _start_tool_trace_capture()
+        try:
+            start_time = time.time()
+            event_count = 0
+            final_result = None
+
+            async for event in wrapped(*args, **kwargs):
+                event_count += 1
+                if hasattr(event, "output"):
+                    final_result = event
+                yield event
+
+            end_time = time.time()
+
+            if final_result:
+                _maybe_create_tool_spans_from_messages(final_result)
+
+            output = None
+            metrics = {
+                "start": start_time,
+                "end": end_time,
+                "duration": end_time - start_time,
+                "event_count": event_count,
+            }
+
+            if final_result:
+                output = _serialize_result_output(final_result)
+                usage_metrics = _extract_usage_metrics(final_result, start_time, end_time)
+                metrics.update(usage_metrics)
+
+            agent_span.log(output=output, metrics=metrics)
+        finally:
+            _reset_tool_trace_capture(tool_trace_token)
+
+
+def _create_direct_model_request_wrapper():
+    """Create wrapper for direct.model_request()."""
+
+    async def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        with start_span(
+            name="model_request",
+            type=SpanTypeAttribute.LLM,
+            input=input_data,
+            metadata=metadata,
+        ) as span:
+            start_time = time.time()
+            result = await wrapped(*args, **kwargs)
+            end_time = time.time()
+
+            output = _serialize_model_response(result)
+            metrics = _extract_response_metrics(result, start_time, end_time)
+
+            span.log(output=output, metrics=metrics)
+            return result
+
+    return wrapper
+
+
+def _create_direct_model_request_sync_wrapper():
+    """Create wrapper for direct.model_request_sync()."""
+
+    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        with start_span(
+            name="model_request_sync",
+            type=SpanTypeAttribute.LLM,
+            input=input_data,
+            metadata=metadata,
+        ) as span:
+            start_time = time.time()
+            result = wrapped(*args, **kwargs)
+            end_time = time.time()
+
+            output = _serialize_model_response(result)
+            metrics = _extract_response_metrics(result, start_time, end_time)
+
+            span.log(output=output, metrics=metrics)
+            return result
+
+    return wrapper
+
+
+def _create_direct_model_request_stream_wrapper():
+    """Create wrapper for direct.model_request_stream()."""
+
+    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        return _DirectStreamWrapper(
+            wrapped(*args, **kwargs),
+            "model_request_stream",
+            input_data,
+            metadata,
+        )
+
+    return wrapper
+
+
+def _create_direct_model_request_stream_sync_wrapper():
+    """Create wrapper for direct.model_request_stream_sync()."""
+
+    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        return _DirectStreamWrapperSync(
+            wrapped(*args, **kwargs),
+            "model_request_stream_sync",
+            input_data,
+            metadata,
+        )
+
+    return wrapper
+
+
+def wrap_model_request(original_func: Any) -> Any:
+    async def wrapper(*args, **kwargs):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        with start_span(
+            name="model_request",
+            type=SpanTypeAttribute.LLM,
+            input=input_data,
+            metadata=metadata,
+        ) as span:
+            start_time = time.time()
+            result = await original_func(*args, **kwargs)
+            end_time = time.time()
+
+            output = _serialize_model_response(result)
+            metrics = _extract_response_metrics(result, start_time, end_time)
+
+            span.log(output=output, metrics=metrics)
+            return result
+
+    return wrapper
+
+
+def wrap_model_request_sync(original_func: Any) -> Any:
+    def wrapper(*args, **kwargs):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        with start_span(
+            name="model_request_sync",
+            type=SpanTypeAttribute.LLM,
+            input=input_data,
+            metadata=metadata,
+        ) as span:
+            start_time = time.time()
+            result = original_func(*args, **kwargs)
+            end_time = time.time()
+
+            output = _serialize_model_response(result)
+            metrics = _extract_response_metrics(result, start_time, end_time)
+
+            span.log(output=output, metrics=metrics)
+            return result
+
+    return wrapper
+
+
+def wrap_model_request_stream(original_func: Any) -> Any:
+    def wrapper(*args, **kwargs):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        return _DirectStreamWrapper(
+            original_func(*args, **kwargs),
+            "model_request_stream",
+            input_data,
+            metadata,
+        )
+
+    return wrapper
+
+
+def wrap_model_request_stream_sync(original_func: Any) -> Any:
+    def wrapper(*args, **kwargs):
+        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
+
+        return _DirectStreamWrapperSync(
+            original_func(*args, **kwargs),
+            "model_request_stream_sync",
+            input_data,
+            metadata,
+        )
+
+    return wrapper
+
+
+def _build_model_class_input_and_metadata(instance: Any, args: Any, kwargs: Any):
+    """Build input data and metadata for model class request wrappers.
+
+    Returns:
+        Tuple of (model_name, display_name, input_data, metadata)
+    """
+    model_name, provider = _extract_model_info_from_model_instance(instance)
+    display_name = model_name or type(instance).__name__
+
+    messages = args[0] if len(args) > 0 else kwargs.get("messages")
+    model_settings = args[1] if len(args) > 1 else kwargs.get("model_settings")
+
+    serialized_messages = _serialize_messages(messages)
+
+    input_data = {"messages": serialized_messages}
+    if model_settings is not None:
+        input_data["model_settings"] = bt_safe_deep_copy(model_settings)
+
+    metadata = _build_model_metadata(model_name, provider, model_settings=None)
+
+    return model_name, display_name, input_data, metadata
+
+
+def _wrap_concrete_model_class(model_class: Any):
+    """Wrap a concrete model class to trace its request methods."""
+
+    async def model_request_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+        model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs)
+
+        with start_span(
+            name=f"chat {display_name}",
+            type=SpanTypeAttribute.LLM,
+            input=input_data,
+            metadata=metadata,
+        ) as span:
+            start_time = time.time()
+            result = await wrapped(*args, **kwargs)
+            end_time = time.time()
+
+            output = _serialize_model_response(result)
+            metrics = _extract_response_metrics(result, start_time, end_time)
+
+            span.log(output=output, metrics=metrics)
+            return result
+
+    def model_request_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+        model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs)
+
+        return _DirectStreamWrapper(
+            wrapped(*args, **kwargs),
+            f"chat {display_name}",
+            input_data,
+            metadata,
+        )
+
+    wrap_function_wrapper(model_class, "request", model_request_wrapper)
+    wrap_function_wrapper(model_class, "request_stream", model_request_stream_wrapper)
+    return model_class
+
+
+class _AgentStreamWrapper(AbstractAsyncContextManager):
+    """Wrapper for agent.run_stream() that adds tracing while passing through the stream result."""
+
+    def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any):
+        self.stream_cm = stream_cm
+        self.span_name = span_name
+        self.input_data = input_data
+        self.metadata = metadata
+        self.span_cm = None
+        self.start_time = None
+        self.stream_result = None
+        self._enter_task = None
+        self._first_token_time = None
+        self._tool_trace_token = None
+
+    async def __aenter__(self):
+        self._enter_task = asyncio.current_task()
+
+        # Use context manager properly so span stays current
+        # DON'T pass start_time here - we'll set it via metrics in __aexit__
+        self.span_cm = start_span(
+            name=self.span_name,
+            type=SpanTypeAttribute.LLM,
+            input=self.input_data if self.input_data else None,
+            metadata=self.metadata,
+        )
+        self.span_cm.__enter__()
+
+        # Capture start time right before entering the stream (API call initiation)
+        self._tool_trace_token = _start_tool_trace_capture()
+        self.start_time = time.time()
+        self.stream_result = await self.stream_cm.__aenter__()
+
+        # Wrap the stream result to capture first token time
+        return _StreamResultProxy(self.stream_result, self)
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        try:
+            await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb)
+        finally:
+            if self.span_cm and self.start_time and self.stream_result:
+                end_time = time.time()
+
+                _maybe_create_tool_spans_from_messages(self.stream_result)
+
+                output = _serialize_stream_output(self.stream_result)
+                metrics = _extract_stream_usage_metrics(
+                    self.stream_result, self.start_time, end_time, self._first_token_time
+                )
+                self.span_cm.log(output=output, metrics=metrics)
+
+            # Clean up span context
+            if self.span_cm:
+                if asyncio.current_task() is self._enter_task:
+                    self.span_cm.__exit__(None, None, None)
+                else:
+                    self.span_cm.end()
+            if self._tool_trace_token is not None:
+                _reset_tool_trace_capture(self._tool_trace_token)
+                self._tool_trace_token = None
+
+        return False
+
+
+class _StreamResultProxy:
+    """Proxy for stream result that captures first token time."""
+
+    def __init__(self, stream_result: Any, wrapper: _AgentStreamWrapper):
+        self._stream_result = stream_result
+        self._wrapper = wrapper
+
+    def __getattr__(self, name: str):
+        """Delegate all attribute access to the wrapped stream result."""
+        attr = getattr(self._stream_result, name)
+
+        # Wrap streaming methods to capture first token time
+        if callable(attr) and name in ("stream_text", "stream_output"):
+
+            async def wrapped_method(*args, **kwargs):
+                result = attr(*args, **kwargs)
+                async for item in result:
+                    if self._wrapper._first_token_time is None:
+                        self._wrapper._first_token_time = time.time()
+                    yield item
+
+            return wrapped_method
+
+        return attr
+
+
+class _DirectStreamWrapper(AbstractAsyncContextManager):
+    """Wrapper for model_request_stream() that adds tracing while passing through the stream."""
+
+    def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any):
+        self.stream_cm = stream_cm
+        self.span_name = span_name
+        self.input_data = input_data
+        self.metadata = metadata
+        self.span_cm = None
+        self.start_time = None
+        self.stream = None
+        self._enter_task = None
+        self._first_token_time = None
+
+    async def __aenter__(self):
+        self._enter_task = asyncio.current_task()
+
+        # Use context manager properly so span stays current
+        # DON'T pass start_time here - we'll set it via metrics in __aexit__
+        self.span_cm = start_span(
+            name=self.span_name,
+            type=SpanTypeAttribute.LLM,
+            input=self.input_data if self.input_data else None,
+            metadata=self.metadata,
+        )
+        self.span_cm.__enter__()
+
+        # Capture start time right before entering the stream (API call initiation)
+        self.start_time = time.time()
+        self.stream = await self.stream_cm.__aenter__()
+
+        # Wrap the stream to capture first token time
+        return _DirectStreamIteratorProxy(self.stream, self)
+
+    async def __aexit__(self, exc_type, exc_val, exc_tb):
+        try:
+            await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb)
+        finally:
+            if self.span_cm and self.start_time and self.stream:
+                end_time = time.time()
+
+                try:
+                    final_response = self.stream.get()
+                    output = _serialize_model_response(final_response)
+                    metrics = _extract_response_metrics(
+                        final_response, self.start_time, end_time, self._first_token_time
+                    )
+                    self.span_cm.log(output=output, metrics=metrics)
+                except Exception as e:
+                    logger.debug(f"Failed to extract stream output/metrics: {e}")
+
+            # Clean up span context
+            if self.span_cm:
+                if asyncio.current_task() is self._enter_task:
+                    self.span_cm.__exit__(None, None, None)
+                else:
+                    self.span_cm.end()
+
+        return False
+
+
+class _DirectStreamIteratorProxy:
+    """Proxy for direct stream that captures first token time."""
+
+    def __init__(self, stream: Any, wrapper: _DirectStreamWrapper):
+        self._stream = stream
+        self._wrapper = wrapper
+        self._iterator = None
+
+    def __getattr__(self, name: str):
+        """Delegate all attribute access to the wrapped stream."""
+        return getattr(self._stream, name)
+
+    def __aiter__(self):
+        """Return async iterator that captures first token time."""
+        # Get the actual async iterator from the stream
+        self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream
+        return self
+
+    async def __anext__(self):
+        """Capture first token time on first iteration."""
+        if self._iterator is None:
+            # In case __aiter__ wasn't called, initialize it
+            self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream
+
+        item = await self._iterator.__anext__()
+        if self._wrapper._first_token_time is None:
+            self._wrapper._first_token_time = time.time()
+        return item
+
+
+class _AgentStreamResultSyncProxy:
+    """Proxy for agent.run_stream_sync() result that adds tracing while delegating to actual stream result."""
+
+    def __init__(
+        self,
+        stream_result: Any,
+        span: Any,
+        span_cm: Any,
+        start_time: float,
+        tool_trace_token: Any = None,
+    ):
+        self._stream_result = stream_result
+        self._span = span
+        self._span_cm = span_cm
+        self._start_time = start_time
+        self._logged = False
+        self._finalize_on_del = True
+        self._first_token_time = None
+        self._tool_trace_token = tool_trace_token
+
+    def __getattr__(self, name: str):
+        """Delegate all attribute access to the wrapped stream result."""
+        attr = getattr(self._stream_result, name)
+
+        # Wrap any method that returns an iterator to auto-finalize when exhausted
+        if callable(attr) and name in ("stream_text", "stream_output", "__iter__"):
+
+            def wrapped_method(*args, **kwargs):
+                try:
+                    iterator = attr(*args, **kwargs)
+                    # If it's an iterator, wrap it
+                    if hasattr(iterator, "__iter__") or hasattr(iterator, "__next__"):
+                        try:
+                            for item in iterator:
+                                if self._first_token_time is None:
+                                    self._first_token_time = time.time()
+                                yield item
+                        finally:
+                            self._finalize()
+                            self._finalize_on_del = False  # Don't finalize again in __del__
+                    else:
+                        return iterator
+                except Exception:
+                    self._finalize()
+                    self._finalize_on_del = False
+                    raise
+
+            return wrapped_method
+
+        return attr
+
+    def _finalize(self):
+        """Log metrics and close span."""
+        if self._span and not self._logged and self._stream_result:
+            try:
+                end_time = time.time()
+
+                _maybe_create_tool_spans_from_messages(self._stream_result)
+
+                output = _serialize_stream_output(self._stream_result)
+                metrics = _extract_stream_usage_metrics(
+                    self._stream_result, self._start_time, end_time, self._first_token_time
+                )
+                self._span.log(output=output, metrics=metrics)
+                self._logged = True
+            finally:
+                try:
+                    self._span_cm.__exit__(None, None, None)
+                except Exception:
+                    pass
+                if self._tool_trace_token is not None:
+                    _reset_tool_trace_capture(self._tool_trace_token)
+                    self._tool_trace_token = None
+
+    def __del__(self):
+        """Ensure span is closed when proxy is destroyed."""
+        if getattr(self, "_finalize_on_del", False):
+            self._finalize()
+
+
+class _DirectStreamWrapperSync:
+    """Wrapper for model_request_stream_sync() that adds tracing while passing through the stream."""
+
+    def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any):
+        self.stream_cm = stream_cm
+        self.span_name = span_name
+        self.input_data = input_data
+        self.metadata = metadata
+        self.span_cm = None
+        self.start_time = None
+        self.stream = None
+        self._first_token_time = None
+
+    def __enter__(self):
+        # Use context manager properly so span stays current
+        # DON'T pass start_time here - we'll set it via metrics in __exit__
+        self.span_cm = start_span(
+            name=self.span_name,
+            type=SpanTypeAttribute.LLM,
+            input=self.input_data if self.input_data else None,
+            metadata=self.metadata,
+        )
+        span = self.span_cm.__enter__()
+
+        # Capture start time right before entering the stream (API call initiation)
+        self.start_time = time.time()
+        self.stream = self.stream_cm.__enter__()
+
+        # Wrap the stream to capture first token time
+        return _DirectStreamIteratorSyncProxy(self.stream, self)
+
+    def __exit__(self, exc_type, exc_val, exc_tb):
+        try:
+            self.stream_cm.__exit__(exc_type, exc_val, exc_tb)
+        finally:
+            if self.span_cm and self.start_time and self.stream:
+                end_time = time.time()
+
+                try:
+                    final_response = self.stream.get()
+                    output = _serialize_model_response(final_response)
+                    metrics = _extract_response_metrics(
+                        final_response, self.start_time, end_time, self._first_token_time
+                    )
+                    self.span_cm.log(output=output, metrics=metrics)
+                except Exception as e:
+                    logger.debug(f"Failed to extract stream output/metrics: {e}")
+
+            # Always clean up span context
+            if self.span_cm:
+                self.span_cm.__exit__(None, None, None)
+
+        return False
+
+
+class _DirectStreamIteratorSyncProxy:
+    """Proxy for direct stream (sync) that captures first token time."""
+
+    def __init__(self, stream: Any, wrapper: _DirectStreamWrapperSync):
+        self._stream = stream
+        self._wrapper = wrapper
+        self._iterator = None
+
+    def __getattr__(self, name: str):
+        """Delegate all attribute access to the wrapped stream."""
+        return getattr(self._stream, name)
+
+    def __iter__(self):
+        """Return iterator that captures first token time."""
+        # Get the actual iterator from the stream
+        self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream
+        return self
+
+    def __next__(self):
+        """Capture first token time on first iteration."""
+        if self._iterator is None:
+            # In case __iter__ wasn't called, initialize it
+            self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream
+
+        item = self._iterator.__next__()
+        if self._wrapper._first_token_time is None:
+            self._wrapper._first_token_time = time.time()
+        return item
+
+
+def _extract_tool_call(call_or_validated: Any) -> Any:
+    if hasattr(call_or_validated, "call"):
+        return call_or_validated.call
+    return call_or_validated
+
+
+async def _trace_tool_execution(wrapped: Any, args: Any, kwargs: Any):
+    call = _extract_tool_call(args[0] if args else kwargs.get("validated") or kwargs.get("call"))
+    if call is None:
+        return await wrapped(*args, **kwargs)
+
+    tool_name = getattr(call, "tool_name", None) or "unknown_tool"
+    tool_call_id = getattr(call, "tool_call_id", None)
+
+    try:
+        input_data = call.args_as_dict()
+    except Exception:
+        input_data = bt_safe_deep_copy(getattr(call, "args", None))
+
+    metadata = {"tool_call_id": tool_call_id} if tool_call_id else None
+
+    _mark_tool_span_emitted()
+    with start_span(name=tool_name, type=SpanTypeAttribute.TOOL, input=input_data, metadata=metadata) as tool_span:
+        start_time = time.time()
+        result = await wrapped(*args, **kwargs)
+        end_time = time.time()
+        tool_span.log(
+            output=bt_safe_deep_copy(result),
+            metrics={"start": start_time, "end": end_time, "duration": end_time - start_time},
+        )
+        return result
+
+
+async def _tool_manager_call_function_tool_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    return await _trace_tool_execution(wrapped, args, kwargs)
+
+
+async def _tool_manager_execute_function_tool_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
+    return await _trace_tool_execution(wrapped, args, kwargs)
+
+
+def _create_tool_spans_from_messages(result: Any) -> None:
+    """
+    Create TOOL-type spans from tool call/return message parts in a completed agent result.
+
+    Uses message timestamps from PydanticAI to position spans correctly in the trace:
+    - start_time = ModelResponse.timestamp (when the model requested the tool call)
+    - end_time = ModelRequest.timestamp (when the tool result was sent back)
+    """
+    try:
+        _create_tool_spans_from_messages_impl(result)
+    except Exception:
+        pass
+
+
+def _create_tool_spans_from_messages_impl(result: Any) -> None:
+    from pydantic_ai.messages import ToolCallPart, ToolReturnPart
+
+    messages = result.new_messages()
+
+    returns_by_id: dict[str, tuple[Any, float | None]] = {}
+    for msg in messages:
+        if not hasattr(msg, "parts"):
+            continue
+        msg_ts = _msg_timestamp(msg)
+        for part in msg.parts:
+            if isinstance(part, ToolReturnPart) and hasattr(part, "tool_call_id"):
+                returns_by_id[part.tool_call_id] = (part, msg_ts)
+
+    for msg in messages:
+        if not hasattr(msg, "parts"):
+            continue
+        call_ts = _msg_timestamp(msg)
+        for part in msg.parts:
+            if not isinstance(part, ToolCallPart):
+                continue
+
+            tool_name = getattr(part, "tool_name", None) or "unknown_tool"
+            tool_call_id = getattr(part, "tool_call_id", None)
+
+            try:
+                input_data = part.args_as_dict()
+            except Exception:
+                input_data = bt_safe_deep_copy(getattr(part, "args", None))
+
+            output_data = None
+            return_ts: float | None = None
+            if tool_call_id and tool_call_id in returns_by_id:
+                return_part, return_ts = returns_by_id[tool_call_id]
+                output_data = bt_safe_deep_copy(getattr(return_part, "content", None))
+
+            metadata = {}
+            if tool_call_id:
+                metadata["tool_call_id"] = tool_call_id
+
+            with start_span(
+                name=tool_name,
+                type=SpanTypeAttribute.TOOL,
+                input=input_data,
+                start_time=call_ts,
+                metadata=metadata if metadata else None,
+            ) as tool_span:
+                metrics = {}
+                if call_ts is not None:
+                    metrics["start"] = call_ts
+                if return_ts is not None:
+                    metrics["end"] = return_ts
+                if call_ts is not None and return_ts is not None:
+                    metrics["duration"] = return_ts - call_ts
+                tool_span.log(output=output_data, metrics=metrics if metrics else None)
+                tool_span.end(end_time=return_ts)
+
+
+def _msg_timestamp(msg: Any) -> float | None:
+    """Extract epoch-seconds timestamp from a PydanticAI message, or None."""
+    ts = getattr(msg, "timestamp", None)
+    if ts is None:
+        return None
+    try:
+        return ts.timestamp()  # datetime → float
+    except Exception:
+        return None
+
+
+def _serialize_user_prompt(user_prompt: Any) -> Any:
+    """Serialize user prompt, handling BinaryContent and other types."""
+    if user_prompt is None:
+        return None
+
+    if isinstance(user_prompt, str):
+        return user_prompt
+
+    if isinstance(user_prompt, list):
+        return [_serialize_content_part(part) for part in user_prompt]
+
+    return _serialize_content_part(user_prompt)
+
+
+def _serialize_content_part(part: Any) -> Any:
+    """Serialize a content part, handling BinaryContent specially.
+
+    This function handles:
+    - BinaryContent: converts to Braintrust Attachment
+    - Parts with nested content (UserPromptPart): recursively serializes content items
+    - Strings: passes through unchanged
+    - Other objects: converts to dict via model_dump
+    """
+    if part is None:
+        return None
+
+    if hasattr(part, "data") and hasattr(part, "media_type") and hasattr(part, "kind"):
+        if part.kind == "binary":
+            data = part.data
+            media_type = part.media_type
+
+            extension = media_type.split("/")[1] if "/" in media_type else "bin"
+            filename = f"file.{extension}"
+
+            attachment = Attachment(data=data, filename=filename, content_type=media_type)
+            return {"type": "binary", "attachment": attachment, "media_type": media_type}
+
+    if hasattr(part, "content"):
+        content = part.content
+        if isinstance(content, list):
+            serialized_content = [_serialize_content_part(item) for item in content]
+            result = bt_safe_deep_copy(part)
+            if isinstance(result, dict):
+                result["content"] = serialized_content
+            return result
+        elif content is not None:
+            serialized_content = _serialize_content_part(content)
+            result = bt_safe_deep_copy(part)
+            if isinstance(result, dict):
+                result["content"] = serialized_content
+            return result
+
+    if isinstance(part, str):
+        return part
+
+    return bt_safe_deep_copy(part)
+
+
+def _serialize_messages(messages: Any) -> Any:
+    """Serialize messages list."""
+    if not messages:
+        return []
+
+    result = []
+    for msg in messages:
+        if hasattr(msg, "parts") and msg.parts:
+            original_parts = msg.parts
+            serialized_parts = [_serialize_content_part(p) for p in original_parts]
+
+            # Use model_dump with exclude to avoid serializing parts field prematurely
+            if hasattr(msg, "model_dump"):
+                try:
+                    serialized_msg = msg.model_dump(exclude={"parts"}, exclude_none=True)
+                except (TypeError, ValueError):
+                    # If exclude parameter not supported, fall back to bt_safe_deep_copy
+                    serialized_msg = bt_safe_deep_copy(msg)
+            else:
+                serialized_msg = bt_safe_deep_copy(msg)
+
+            if isinstance(serialized_msg, dict):
+                serialized_msg["parts"] = serialized_parts
+        else:
+            serialized_msg = bt_safe_deep_copy(msg)
+
+        result.append(serialized_msg)
+
+    return result
+
+
+def _serialize_result_output(result: Any) -> Any:
+    """Serialize agent run result output."""
+    if not result:
+        return None
+
+    output_dict = {}
+
+    if hasattr(result, "output"):
+        output_dict["output"] = bt_safe_deep_copy(result.output)
+
+    if hasattr(result, "response"):
+        output_dict["response"] = _serialize_model_response(result.response)
+
+    return output_dict if output_dict else bt_safe_deep_copy(result)
+
+
+def _serialize_stream_output(stream_result: Any) -> Any:
+    """Serialize stream result output."""
+    if not stream_result:
+        return None
+
+    output_dict = {}
+
+    if hasattr(stream_result, "response"):
+        output_dict["response"] = _serialize_model_response(stream_result.response)
+
+    return output_dict if output_dict else None
+
+
+def _serialize_model_response(response: Any) -> Any:
+    """Serialize a model response."""
+    if not response:
+        return None
+
+    response_dict = bt_safe_deep_copy(response)
+
+    if hasattr(response, "parts") and isinstance(response_dict, dict):
+        response_dict["parts"] = [_serialize_content_part(p) for p in response.parts]
+
+    return response_dict
+
+
+def _extract_model_info_from_model_instance(model: Any) -> tuple[str | None, str | None]:
+    """Extract model name and provider from a model instance.
+
+    Args:
+        model: A Pydantic AI model instance (OpenAIChatModel, AnthropicModel, etc.)
+
+    Returns:
+        Tuple of (model_name, provider)
+    """
+    if not model:
+        return None, None
+
+    if isinstance(model, str):
+        return _parse_model_string(model)
+
+    if hasattr(model, "model_name"):
+        model_name = model.model_name
+        class_name = type(model).__name__
+        provider = None
+        if "OpenAI" in class_name:
+            provider = "openai"
+        elif "Anthropic" in class_name:
+            provider = "anthropic"
+        elif "Gemini" in class_name:
+            provider = "gemini"
+        elif "Groq" in class_name:
+            provider = "groq"
+        elif "Mistral" in class_name:
+            provider = "mistral"
+        elif "VertexAI" in class_name:
+            provider = "vertexai"
+
+        return model_name, provider
+
+    if hasattr(model, "name"):
+        return _parse_model_string(model.name)
+
+    return None, None
+
+
+def _extract_model_info(agent: Any) -> tuple[str | None, str | None]:
+    """Extract model name and provider from agent.
+
+    Args:
+        agent: A Pydantic AI Agent instance
+
+    Returns:
+        Tuple of (model_name, provider)
+    """
+    if not hasattr(agent, "model"):
+        return None, None
+
+    return _extract_model_info_from_model_instance(agent.model)
+
+
+def _build_model_metadata(model_name: str | None, provider: str | None, model_settings: Any = None) -> dict[str, Any]:
+    """Build metadata dictionary with model info.
+
+    Args:
+        model_name: The model name (e.g., "gpt-4o")
+        provider: The provider (e.g., "openai")
+        model_settings: Optional model settings to include
+
+    Returns:
+        Dictionary of metadata
+    """
+    metadata = {}
+    if model_name:
+        metadata["model"] = model_name
+    if provider:
+        metadata["provider"] = provider
+    if model_settings:
+        metadata["model_settings"] = bt_safe_deep_copy(model_settings)
+    return metadata
+
+
+def _parse_model_string(model: Any) -> tuple[str | None, str | None]:
+    """Parse model string to extract provider and model name.
+
+    Pydantic AI uses format: "provider:model-name" (e.g., "openai:gpt-4o")
+    """
+    if not model:
+        return None, None
+
+    model_str = str(model)
+
+    if ":" in model_str:
+        parts = model_str.split(":", 1)
+        return parts[1], parts[0]  # (model_name, provider)
+
+    return model_str, None
+
+
+def _extract_usage_metrics(result: Any, start_time: float, end_time: float) -> dict[str, float] | None:
+    """Extract usage metrics from agent run result."""
+    metrics: dict[str, float] = {}
+
+    metrics["start"] = start_time
+    metrics["end"] = end_time
+    metrics["duration"] = end_time - start_time
+
+    usage = None
+    if hasattr(result, "response"):
+        try:
+            response = result.response
+            if hasattr(response, "usage"):
+                usage = response.usage
+        except (AttributeError, ValueError):
+            pass
+
+    if usage is None and hasattr(result, "usage"):
+        usage = result.usage
+
+    if usage is None:
+        return metrics
+
+    if hasattr(usage, "input_tokens"):
+        input_tokens = usage.input_tokens
+        if input_tokens is not None:
+            metrics["prompt_tokens"] = float(input_tokens)
+
+    if hasattr(usage, "output_tokens"):
+        output_tokens = usage.output_tokens
+        if output_tokens is not None:
+            metrics["completion_tokens"] = float(output_tokens)
+
+    if hasattr(usage, "total_tokens"):
+        total_tokens = usage.total_tokens
+        if total_tokens is not None:
+            metrics["tokens"] = float(total_tokens)
+
+    if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None:
+        metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens)
+
+    if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None:
+        metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens)
+
+    if hasattr(usage, "input_audio_tokens") and usage.input_audio_tokens is not None:
+        metrics["prompt_audio_tokens"] = float(usage.input_audio_tokens)
+
+    if hasattr(usage, "output_audio_tokens") and usage.output_audio_tokens is not None:
+        metrics["completion_audio_tokens"] = float(usage.output_audio_tokens)
+
+    if hasattr(usage, "details") and isinstance(usage.details, dict):
+        details = usage.details
+
+        if "reasoning_tokens" in details:
+            metrics["completion_reasoning_tokens"] = float(details["reasoning_tokens"])
+
+        if "cached_tokens" in details:
+            metrics["prompt_cached_tokens"] = float(details["cached_tokens"])
+
+    return metrics if metrics else None
+
+
+def _extract_stream_usage_metrics(
+    stream_result: Any, start_time: float, end_time: float, first_token_time: float | None
+) -> dict[str, float] | None:
+    """Extract usage metrics from stream result."""
+    metrics: dict[str, float] = {}
+
+    metrics["start"] = start_time
+    metrics["end"] = end_time
+    metrics["duration"] = end_time - start_time
+
+    if first_token_time:
+        metrics["time_to_first_token"] = first_token_time - start_time
+
+    if hasattr(stream_result, "usage"):
+        usage_func = stream_result.usage
+        if callable(usage_func):
+            usage = usage_func()
+        else:
+            usage = usage_func
+
+        if usage:
+            if hasattr(usage, "input_tokens") and usage.input_tokens is not None:
+                metrics["prompt_tokens"] = float(usage.input_tokens)
+
+            if hasattr(usage, "output_tokens") and usage.output_tokens is not None:
+                metrics["completion_tokens"] = float(usage.output_tokens)
+
+            if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
+                metrics["tokens"] = float(usage.total_tokens)
+
+            if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None:
+                metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens)
+
+            if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None:
+                metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens)
+
+    return metrics if metrics else None
+
+
+def _extract_response_metrics(
+    response: Any, start_time: float, end_time: float, first_token_time: float | None = None
+) -> dict[str, float] | None:
+    """Extract metrics from model response."""
+    metrics: dict[str, float] = {}
+
+    metrics["start"] = start_time
+    metrics["end"] = end_time
+    metrics["duration"] = end_time - start_time
+
+    if first_token_time:
+        metrics["time_to_first_token"] = first_token_time - start_time
+
+    if hasattr(response, "usage") and response.usage:
+        usage = response.usage
+
+        if hasattr(usage, "input_tokens") and usage.input_tokens is not None:
+            metrics["prompt_tokens"] = float(usage.input_tokens)
+
+        if hasattr(usage, "output_tokens") and usage.output_tokens is not None:
+            metrics["completion_tokens"] = float(usage.output_tokens)
+
+        if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
+            metrics["tokens"] = float(usage.total_tokens)
+
+        if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None:
+            metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens)
+
+        if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None:
+            metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens)
+
+        # Extract reasoning tokens for reasoning models (o1/o3)
+        if hasattr(usage, "details") and usage.details is not None:
+            if hasattr(usage.details, "reasoning_tokens") and usage.details.reasoning_tokens is not None:
+                metrics["completion_reasoning_tokens"] = float(usage.details.reasoning_tokens)
+
+    return metrics if metrics else None
+
+
+def _create_start_producer_wrapper():
+    """Create wrapper for StreamedResponseSync._start_producer to propagate context.
+
+    StreamedResponseSync._start_producer creates a background thread that doesn't
+    inherit contextvars. This wrapper ensures Braintrust context flows to that thread
+    so nested instrumentation (like wrap_openai) creates properly parented spans.
+    """
+
+    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> None:
+        ctx = contextvars.copy_context()
+        original_async_producer = instance._async_producer
+
+        def _context_wrapped_async_producer() -> None:
+            ctx.run(original_async_producer)
+
+        instance._async_producer = _context_wrapped_async_producer
+        try:
+            return wrapped(*args, **kwargs)
+        finally:
+            instance._async_producer = original_async_producer
+
+    return wrapper
+
+
+def _serialize_type(obj: Any) -> Any:
+    """Serialize a type/class for logging, handling Pydantic models and other types.
+
+    This is useful for output_type, toolsets, and similar type parameters.
+    Returns full JSON schema for Pydantic models so engineers can see exactly
+    what structured output schema was used.
+    """
+    import inspect
+
+    # For sequences of types (like Union types or list of models)
+    if isinstance(obj, (list, tuple)):
+        return [_serialize_type(item) for item in obj]
+
+    # Handle Pydantic AI's output wrappers (ToolOutput, NativeOutput, PromptedOutput, TextOutput)
+    if hasattr(obj, "output"):
+        # These are wrapper classes with an 'output' field containing the actual type
+        wrapper_info = {"wrapper": type(obj).__name__}
+        if hasattr(obj, "name") and obj.name:
+            wrapper_info["name"] = obj.name
+        if hasattr(obj, "description") and obj.description:
+            wrapper_info["description"] = obj.description
+        wrapper_info["output"] = _serialize_type(obj.output)
+        return wrapper_info
+
+    # If it's a Pydantic model class, return its full JSON schema
+    if inspect.isclass(obj):
+        try:
+            from pydantic import BaseModel
+
+            if issubclass(obj, BaseModel):
+                # Return the full JSON schema - includes all field info, descriptions, constraints, etc.
+                return obj.model_json_schema()
+        except (ImportError, AttributeError, TypeError):
+            pass
+
+        # Not a Pydantic model, return class name
+        return obj.__name__
+
+    # If it has a __name__ attribute (like functions), use that
+    if hasattr(obj, "__name__"):
+        return obj.__name__
+
+    # Try standard serialization
+    return bt_safe_deep_copy(obj)
+
+
+def _build_agent_input_and_metadata(args: Any, kwargs: Any, instance: Any) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Build input data and metadata for agent wrappers.
+
+    Returns:
+        Tuple of (input_data, metadata)
+    """
+    input_data = {}
+
+    user_prompt = args[0] if len(args) > 0 else kwargs.get("user_prompt")
+    if user_prompt is not None:
+        input_data["user_prompt"] = _serialize_user_prompt(user_prompt)
+
+    for key, value in kwargs.items():
+        if key == "deps":
+            continue
+        elif key == "message_history":
+            input_data[key] = _serialize_messages(value) if value is not None else None
+        elif key in ("output_type", "toolsets"):
+            # These often contain types/classes, use special serialization
+            input_data[key] = _serialize_type(value) if value is not None else None
+        elif key == "model_settings":
+            # model_settings passed to run() goes in INPUT (it's a run() parameter)
+            input_data[key] = bt_safe_deep_copy(value) if value is not None else None
+        else:
+            input_data[key] = bt_safe_deep_copy(value) if value is not None else None
+
+    if "model" in kwargs:
+        model_name, provider = _parse_model_string(kwargs["model"])
+    else:
+        model_name, provider = _extract_model_info(instance)
+
+    # Extract agent-level configuration for metadata
+    # Only add to metadata if NOT explicitly passed in kwargs (those go in input)
+    agent_model_settings = None
+    if "model_settings" not in kwargs and hasattr(instance, "model_settings") and instance.model_settings is not None:
+        agent_model_settings = instance.model_settings
+
+    metadata = _build_model_metadata(model_name, provider, agent_model_settings)
+
+    # Extract additional agent configuration (only if not passed as kwargs)
+    if "name" not in kwargs and hasattr(instance, "name") and instance.name is not None:
+        metadata["agent_name"] = instance.name
+
+    if "end_strategy" not in kwargs and hasattr(instance, "end_strategy") and instance.end_strategy is not None:
+        metadata["end_strategy"] = str(instance.end_strategy)
+
+    # Extract output_type if set on agent and not passed as kwarg
+    # output_type can be a Pydantic model, str, or other types that get converted to JSON schema
+    if "output_type" not in kwargs and hasattr(instance, "output_type") and instance.output_type is not None:
+        try:
+            metadata["output_type"] = _serialize_type(instance.output_type)
+        except Exception as e:
+            logger.debug(f"Failed to extract output_type from agent: {e}")
+
+    # Extract toolsets if set on agent and not passed as kwarg
+    # Toolsets go in INPUT (not metadata) because agent.run() accepts toolsets parameter
+    if "toolsets" not in kwargs and hasattr(instance, "toolsets"):
+        try:
+            toolsets = instance.toolsets
+            if toolsets:
+                # Convert toolsets to a list with FULL tool schemas for input
+                serialized_toolsets = []
+                for ts in toolsets:
+                    ts_info = {
+                        "id": getattr(ts, "id", str(type(ts).__name__)),
+                        "label": getattr(ts, "label", None),
+                    }
+                    # Add full tool schemas (not just names) since toolsets can be passed to agent.run()
+                    if hasattr(ts, "tools") and ts.tools:
+                        tools_list = []
+                        tools_dict = ts.tools
+                        # tools is a dict mapping tool name -> Tool object
+                        for tool_name, tool_obj in tools_dict.items():
+                            tool_dict = {
+                                "name": tool_name,
+                            }
+                            # Extract description
+                            if hasattr(tool_obj, "description") and tool_obj.description:
+                                tool_dict["description"] = tool_obj.description
+                            # Extract JSON schema for parameters
+                            if hasattr(tool_obj, "function_schema") and hasattr(
+                                tool_obj.function_schema, "json_schema"
+                            ):
+                                tool_dict["parameters"] = tool_obj.function_schema.json_schema
+                            tools_list.append(tool_dict)
+                        ts_info["tools"] = tools_list
+                    serialized_toolsets.append(ts_info)
+                input_data["toolsets"] = serialized_toolsets
+        except Exception as e:
+            logger.debug(f"Failed to extract toolsets from agent: {e}")
+
+    # Extract system_prompt from agent if not passed as kwarg
+    # Note: system_prompt goes in input (not metadata) because it's semantically part of the LLM input
+    # Pydantic AI doesn't expose a public API for this, so we access the private _system_prompts
+    # attribute. This is wrapped in try/except to gracefully handle if the internal structure changes.
+    if "system_prompt" not in kwargs:
+        try:
+            if hasattr(instance, "_system_prompts") and instance._system_prompts:
+                input_data["system_prompt"] = "\n\n".join(instance._system_prompts)
+        except Exception as e:
+            logger.debug(f"Failed to extract system_prompt from agent: {e}")
+
+    return input_data, metadata
+
+
+def _build_direct_model_input_and_metadata(args: Any, kwargs: Any) -> tuple[dict[str, Any], dict[str, Any]]:
+    """Build input data and metadata for direct model request wrappers.
+
+    Returns:
+        Tuple of (input_data, metadata)
+    """
+    input_data = {}
+
+    model = args[0] if len(args) > 0 else kwargs.get("model")
+    if model is not None:
+        input_data["model"] = str(model)
+
+    messages = args[1] if len(args) > 1 else kwargs.get("messages", [])
+    if messages:
+        input_data["messages"] = _serialize_messages(messages)
+
+    for key, value in kwargs.items():
+        if key not in ["model", "messages"]:
+            input_data[key] = bt_safe_deep_copy(value) if value is not None else None
+
+    model_name, provider = _parse_model_string(model)
+    metadata = _build_model_metadata(model_name, provider)
+
+    return input_data, metadata
diff --git a/py/src/braintrust/wrappers/pydantic_ai.py b/py/src/braintrust/wrappers/pydantic_ai.py
index e3442b85..86ddd717 100644
--- a/py/src/braintrust/wrappers/pydantic_ai.py
+++ b/py/src/braintrust/wrappers/pydantic_ai.py
@@ -1,1476 +1,24 @@
-import asyncio
-import contextvars
-import logging
-import sys
-import time
-from contextlib import AbstractAsyncContextManager
-from typing import Any
-
-from braintrust.bt_json import bt_safe_deep_copy
-from braintrust.logger import NOOP_SPAN, Attachment, current_span, init_logger, start_span
-from braintrust.span_types import SpanTypeAttribute
-from wrapt import wrap_function_wrapper
-
-
-logger = logging.getLogger(__name__)
-
-__all__ = ["setup_pydantic_ai"]
-
-
-def setup_pydantic_ai(
-    api_key: str | None = None,
-    project_id: str | None = None,
-    project_name: str | None = None,
-) -> bool:
-    """
-    Setup Braintrust integration with Pydantic AI. Will automatically patch Pydantic AI Agents and direct API functions for automatic tracing.
-
-    Args:
-        api_key (Optional[str]): Braintrust API key.
-        project_id (Optional[str]): Braintrust project ID.
-        project_name (Optional[str]): Braintrust project name.
-
-    Returns:
-        bool: True if setup was successful, False otherwise.
-    """
-    span = current_span()
-    if span == NOOP_SPAN:
-        init_logger(project=project_name, api_key=api_key, project_id=project_id)
-
-    try:
-        import pydantic_ai.direct as direct_module
-        from pydantic_ai import Agent
-
-        Agent = wrap_agent(Agent)
-
-        wrap_function_wrapper(direct_module, "model_request", _create_direct_model_request_wrapper())
-        wrap_function_wrapper(direct_module, "model_request_sync", _create_direct_model_request_sync_wrapper())
-        wrap_function_wrapper(direct_module, "model_request_stream", _create_direct_model_request_stream_wrapper())
-        wrap_function_wrapper(
-            direct_module, "model_request_stream_sync", _create_direct_model_request_stream_sync_wrapper()
-        )
-
-        wrap_model_classes()
-
-        # Patch StreamedResponseSync to propagate context to background threads
-        try:
-            if hasattr(direct_module, "StreamedResponseSync"):
-                wrap_function_wrapper(
-                    direct_module.StreamedResponseSync, "_start_producer", _create_start_producer_wrapper()
-                )
-                logger.debug("Pydantic AI StreamedResponseSync context propagation patching successful")
-        except Exception as e:
-            logger.warning(f"Failed to patch StreamedResponseSync context propagation: {e}")
-
-        return True
-    except ImportError:
-        # Not installed - this is expected when using auto_instrument()
-        return False
-
-
-def wrap_agent(Agent: Any) -> Any:
-    if _is_patched(Agent):
-        return Agent
-
-    def _ensure_model_wrapped(instance: Any):
-        """Ensure the agent's model class is wrapped (lazy wrapping)."""
-        if hasattr(instance, "_model") and instance._model is not None:
-            model_class = type(instance._model)
-            _wrap_concrete_model_class(model_class)
-
-    async def agent_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        _ensure_model_wrapped(instance)
-        input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
-
-        with start_span(
-            name=f"agent_run [{instance.name}]" if hasattr(instance, "name") and instance.name else "agent_run",
-            type=SpanTypeAttribute.LLM,
-            input=input_data if input_data else None,
-            metadata=metadata,
-        ) as agent_span:
-            start_time = time.time()
-            result = await wrapped(*args, **kwargs)
-            end_time = time.time()
-
-            _create_tool_spans_from_messages(result)
-
-            output = _serialize_result_output(result)
-            metrics = _extract_usage_metrics(result, start_time, end_time)
-
-            agent_span.log(output=output, metrics=metrics)
-            return result
-
-    wrap_function_wrapper(Agent, "run", agent_run_wrapper)
-
-    def agent_run_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        _ensure_model_wrapped(instance)
-        input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
-
-        with start_span(
-            name=f"agent_run_sync [{instance.name}]"
-            if hasattr(instance, "name") and instance.name
-            else "agent_run_sync",
-            type=SpanTypeAttribute.LLM,
-            input=input_data if input_data else None,
-            metadata=metadata,
-        ) as agent_span:
-            start_time = time.time()
-            result = wrapped(*args, **kwargs)
-            end_time = time.time()
-
-            _create_tool_spans_from_messages(result)
-
-            output = _serialize_result_output(result)
-            metrics = _extract_usage_metrics(result, start_time, end_time)
-
-            agent_span.log(output=output, metrics=metrics)
-            return result
-
-    wrap_function_wrapper(Agent, "run_sync", agent_run_sync_wrapper)
-
-    def agent_to_cli_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        _ensure_model_wrapped(instance)
-        input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
-
-        with start_span(
-            name=f"agent_to_cli_sync [{instance.name}]"
-            if hasattr(instance, "name") and instance.name
-            else "agent_to_cli_sync",
-            type=SpanTypeAttribute.LLM,
-            input=input_data if input_data else None,
-            metadata=metadata,
-        ) as agent_span:
-            start_time = time.time()
-            result = wrapped(*args, **kwargs)
-            end_time = time.time()
-            agent_span.log(metrics={"start": start_time, "end": end_time, "duration": end_time - start_time})
-            return result
-
-    wrap_function_wrapper(Agent, "to_cli_sync", agent_to_cli_sync_wrapper)
-
-    def agent_run_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        _ensure_model_wrapped(instance)
-        input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
-        agent_name = instance.name if hasattr(instance, "name") else None
-        span_name = f"agent_run_stream [{agent_name}]" if agent_name else "agent_run_stream"
-
-        return _AgentStreamWrapper(
-            wrapped(*args, **kwargs),
-            span_name,
-            input_data,
-            metadata,
-        )
-
-    wrap_function_wrapper(Agent, "run_stream", agent_run_stream_wrapper)
-
-    def agent_run_stream_sync_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        _ensure_model_wrapped(instance)
-        input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
-        agent_name = instance.name if hasattr(instance, "name") else None
-        span_name = f"agent_run_stream_sync [{agent_name}]" if agent_name else "agent_run_stream_sync"
-
-        # Create span context BEFORE calling wrapped function so internal spans nest under it
-        span_cm = start_span(
-            name=span_name,
-            type=SpanTypeAttribute.LLM,
-            input=input_data if input_data else None,
-            metadata=metadata,
-        )
-        span = span_cm.__enter__()
-        start_time = time.time()
-
-        try:
-            # Call the original function within the span context
-            stream_result = wrapped(*args, **kwargs)
-            return _AgentStreamResultSyncProxy(
-                stream_result,
-                span,
-                span_cm,
-                start_time,
-            )
-        except Exception:
-            # Clean up span on error
-            span_cm.__exit__(*sys.exc_info())
-            raise
-
-    wrap_function_wrapper(Agent, "run_stream_sync", agent_run_stream_sync_wrapper)
-
-    async def agent_run_stream_events_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        _ensure_model_wrapped(instance)
-        input_data, metadata = _build_agent_input_and_metadata(args, kwargs, instance)
-
-        agent_name = instance.name if hasattr(instance, "name") else None
-        span_name = f"agent_run_stream_events [{agent_name}]" if agent_name else "agent_run_stream_events"
-
-        with start_span(
-            name=span_name,
-            type=SpanTypeAttribute.LLM,
-            input=input_data if input_data else None,
-            metadata=metadata,
-        ) as agent_span:
-            start_time = time.time()
-            event_count = 0
-            final_result = None
-
-            async for event in wrapped(*args, **kwargs):
-                event_count += 1
-                if hasattr(event, "output"):
-                    final_result = event
-                yield event
-
-            end_time = time.time()
-
-            if final_result:
-                _create_tool_spans_from_messages(final_result)
-
-            output = None
-            metrics = {
-                "start": start_time,
-                "end": end_time,
-                "duration": end_time - start_time,
-                "event_count": event_count,
-            }
-
-            if final_result:
-                output = _serialize_result_output(final_result)
-                usage_metrics = _extract_usage_metrics(final_result, start_time, end_time)
-                metrics.update(usage_metrics)
-
-            agent_span.log(output=output, metrics=metrics)
-
-    wrap_function_wrapper(Agent, "run_stream_events", agent_run_stream_events_wrapper)
-
-    Agent._braintrust_patched = True
-
-    return Agent
-
-
-def _create_direct_model_request_wrapper():
-    """Create wrapper for direct.model_request()."""
-
-    async def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        with start_span(
-            name="model_request",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = await wrapped(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    return wrapper
-
-
-def _create_direct_model_request_sync_wrapper():
-    """Create wrapper for direct.model_request_sync()."""
-
-    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        with start_span(
-            name="model_request_sync",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = wrapped(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    return wrapper
-
-
-def _create_direct_model_request_stream_wrapper():
-    """Create wrapper for direct.model_request_stream()."""
-
-    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        return _DirectStreamWrapper(
-            wrapped(*args, **kwargs),
-            "model_request_stream",
-            input_data,
-            metadata,
-        )
-
-    return wrapper
-
-
-def _create_direct_model_request_stream_sync_wrapper():
-    """Create wrapper for direct.model_request_stream_sync()."""
-
-    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        return _DirectStreamWrapperSync(
-            wrapped(*args, **kwargs),
-            "model_request_stream_sync",
-            input_data,
-            metadata,
-        )
-
-    return wrapper
-
-
-def wrap_model_request(original_func: Any) -> Any:
-    async def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        with start_span(
-            name="model_request",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = await original_func(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    return wrapper
-
-
-def wrap_model_request_sync(original_func: Any) -> Any:
-    def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        with start_span(
-            name="model_request_sync",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = original_func(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    return wrapper
-
-
-def wrap_model_request_stream(original_func: Any) -> Any:
-    def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        return _DirectStreamWrapper(
-            original_func(*args, **kwargs),
-            "model_request_stream",
-            input_data,
-            metadata,
-        )
-
-    return wrapper
-
-
-def wrap_model_request_stream_sync(original_func: Any) -> Any:
-    def wrapper(*args, **kwargs):
-        input_data, metadata = _build_direct_model_input_and_metadata(args, kwargs)
-
-        return _DirectStreamWrapperSync(
-            original_func(*args, **kwargs),
-            "model_request_stream_sync",
-            input_data,
-            metadata,
-        )
-
-    return wrapper
-
-
-def wrap_model_classes():
-    """Wrap Model classes to capture internal model requests made by agents."""
-    try:
-        from pydantic_ai.models import Model
-
-        def wrap_all_subclasses(base_class):
-            """Recursively wrap all subclasses of a base class."""
-            for subclass in base_class.__subclasses__():
-                if not getattr(subclass, "__abstractmethods__", None):
-                    try:
-                        _wrap_concrete_model_class(subclass)
-                    except Exception as e:
-                        logger.debug(f"Could not wrap {subclass.__name__}: {e}")
-
-                wrap_all_subclasses(subclass)
-
-        wrap_all_subclasses(Model)
-
-    except Exception as e:
-        logger.warning(f"Failed to wrap Model classes: {e}")
-
-
-def _build_model_class_input_and_metadata(instance: Any, args: Any, kwargs: Any):
-    """Build input data and metadata for model class request wrappers.
-
-    Returns:
-        Tuple of (model_name, display_name, input_data, metadata)
-    """
-    model_name, provider = _extract_model_info_from_model_instance(instance)
-    display_name = model_name or type(instance).__name__
-
-    messages = args[0] if len(args) > 0 else kwargs.get("messages")
-    model_settings = args[1] if len(args) > 1 else kwargs.get("model_settings")
-
-    serialized_messages = _serialize_messages(messages)
-
-    input_data = {"messages": serialized_messages}
-    if model_settings is not None:
-        input_data["model_settings"] = bt_safe_deep_copy(model_settings)
-
-    metadata = _build_model_metadata(model_name, provider, model_settings=None)
-
-    return model_name, display_name, input_data, metadata
-
-
-def _wrap_concrete_model_class(model_class: Any):
-    """Wrap a concrete model class to trace its request methods."""
-    if _is_patched(model_class):
-        return
-
-    async def model_request_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs)
-
-        with start_span(
-            name=f"chat {display_name}",
-            type=SpanTypeAttribute.LLM,
-            input=input_data,
-            metadata=metadata,
-        ) as span:
-            start_time = time.time()
-            result = await wrapped(*args, **kwargs)
-            end_time = time.time()
-
-            output = _serialize_model_response(result)
-            metrics = _extract_response_metrics(result, start_time, end_time)
-
-            span.log(output=output, metrics=metrics)
-            return result
-
-    def model_request_stream_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any):
-        model_name, display_name, input_data, metadata = _build_model_class_input_and_metadata(instance, args, kwargs)
-
-        return _DirectStreamWrapper(
-            wrapped(*args, **kwargs),
-            f"chat {display_name}",
-            input_data,
-            metadata,
-        )
-
-    wrap_function_wrapper(model_class, "request", model_request_wrapper)
-    wrap_function_wrapper(model_class, "request_stream", model_request_stream_wrapper)
-    model_class._braintrust_patched = True
-
-
-class _AgentStreamWrapper(AbstractAsyncContextManager):
-    """Wrapper for agent.run_stream() that adds tracing while passing through the stream result."""
-
-    def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any):
-        self.stream_cm = stream_cm
-        self.span_name = span_name
-        self.input_data = input_data
-        self.metadata = metadata
-        self.span_cm = None
-        self.start_time = None
-        self.stream_result = None
-        self._enter_task = None
-        self._first_token_time = None
-
-    async def __aenter__(self):
-        self._enter_task = asyncio.current_task()
-
-        # Use context manager properly so span stays current
-        # DON'T pass start_time here - we'll set it via metrics in __aexit__
-        self.span_cm = start_span(
-            name=self.span_name,
-            type=SpanTypeAttribute.LLM,
-            input=self.input_data if self.input_data else None,
-            metadata=self.metadata,
-        )
-        self.span_cm.__enter__()
-
-        # Capture start time right before entering the stream (API call initiation)
-        self.start_time = time.time()
-        self.stream_result = await self.stream_cm.__aenter__()
-
-        # Wrap the stream result to capture first token time
-        return _StreamResultProxy(self.stream_result, self)
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        try:
-            await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb)
-        finally:
-            if self.span_cm and self.start_time and self.stream_result:
-                end_time = time.time()
-
-                _create_tool_spans_from_messages(self.stream_result)
-
-                output = _serialize_stream_output(self.stream_result)
-                metrics = _extract_stream_usage_metrics(
-                    self.stream_result, self.start_time, end_time, self._first_token_time
-                )
-                self.span_cm.log(output=output, metrics=metrics)
-
-            # Clean up span context
-            if self.span_cm:
-                if asyncio.current_task() is self._enter_task:
-                    self.span_cm.__exit__(None, None, None)
-                else:
-                    self.span_cm.end()
-
-        return False
-
-
-class _StreamResultProxy:
-    """Proxy for stream result that captures first token time."""
-
-    def __init__(self, stream_result: Any, wrapper: _AgentStreamWrapper):
-        self._stream_result = stream_result
-        self._wrapper = wrapper
-
-    def __getattr__(self, name: str):
-        """Delegate all attribute access to the wrapped stream result."""
-        attr = getattr(self._stream_result, name)
-
-        # Wrap streaming methods to capture first token time
-        if callable(attr) and name in ("stream_text", "stream_output"):
-
-            async def wrapped_method(*args, **kwargs):
-                result = attr(*args, **kwargs)
-                async for item in result:
-                    if self._wrapper._first_token_time is None:
-                        self._wrapper._first_token_time = time.time()
-                    yield item
-
-            return wrapped_method
-
-        return attr
-
-
-class _DirectStreamWrapper(AbstractAsyncContextManager):
-    """Wrapper for model_request_stream() that adds tracing while passing through the stream."""
-
-    def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any):
-        self.stream_cm = stream_cm
-        self.span_name = span_name
-        self.input_data = input_data
-        self.metadata = metadata
-        self.span_cm = None
-        self.start_time = None
-        self.stream = None
-        self._enter_task = None
-        self._first_token_time = None
-
-    async def __aenter__(self):
-        self._enter_task = asyncio.current_task()
-
-        # Use context manager properly so span stays current
-        # DON'T pass start_time here - we'll set it via metrics in __aexit__
-        self.span_cm = start_span(
-            name=self.span_name,
-            type=SpanTypeAttribute.LLM,
-            input=self.input_data if self.input_data else None,
-            metadata=self.metadata,
-        )
-        self.span_cm.__enter__()
-
-        # Capture start time right before entering the stream (API call initiation)
-        self.start_time = time.time()
-        self.stream = await self.stream_cm.__aenter__()
-
-        # Wrap the stream to capture first token time
-        return _DirectStreamIteratorProxy(self.stream, self)
-
-    async def __aexit__(self, exc_type, exc_val, exc_tb):
-        try:
-            await self.stream_cm.__aexit__(exc_type, exc_val, exc_tb)
-        finally:
-            if self.span_cm and self.start_time and self.stream:
-                end_time = time.time()
-
-                try:
-                    final_response = self.stream.get()
-                    output = _serialize_model_response(final_response)
-                    metrics = _extract_response_metrics(
-                        final_response, self.start_time, end_time, self._first_token_time
-                    )
-                    self.span_cm.log(output=output, metrics=metrics)
-                except Exception as e:
-                    logger.debug(f"Failed to extract stream output/metrics: {e}")
-
-            # Clean up span context
-            if self.span_cm:
-                if asyncio.current_task() is self._enter_task:
-                    self.span_cm.__exit__(None, None, None)
-                else:
-                    self.span_cm.end()
-
-        return False
-
-
-class _DirectStreamIteratorProxy:
-    """Proxy for direct stream that captures first token time."""
-
-    def __init__(self, stream: Any, wrapper: _DirectStreamWrapper):
-        self._stream = stream
-        self._wrapper = wrapper
-        self._iterator = None
-
-    def __getattr__(self, name: str):
-        """Delegate all attribute access to the wrapped stream."""
-        return getattr(self._stream, name)
-
-    def __aiter__(self):
-        """Return async iterator that captures first token time."""
-        # Get the actual async iterator from the stream
-        self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream
-        return self
-
-    async def __anext__(self):
-        """Capture first token time on first iteration."""
-        if self._iterator is None:
-            # In case __aiter__ wasn't called, initialize it
-            self._iterator = self._stream.__aiter__() if hasattr(self._stream, "__aiter__") else self._stream
-
-        item = await self._iterator.__anext__()
-        if self._wrapper._first_token_time is None:
-            self._wrapper._first_token_time = time.time()
-        return item
-
-
-class _AgentStreamResultSyncProxy:
-    """Proxy for agent.run_stream_sync() result that adds tracing while delegating to actual stream result."""
-
-    def __init__(self, stream_result: Any, span: Any, span_cm: Any, start_time: float):
-        self._stream_result = stream_result
-        self._span = span
-        self._span_cm = span_cm
-        self._start_time = start_time
-        self._logged = False
-        self._finalize_on_del = True
-        self._first_token_time = None
-
-    def __getattr__(self, name: str):
-        """Delegate all attribute access to the wrapped stream result."""
-        attr = getattr(self._stream_result, name)
-
-        # Wrap any method that returns an iterator to auto-finalize when exhausted
-        if callable(attr) and name in ("stream_text", "stream_output", "__iter__"):
-
-            def wrapped_method(*args, **kwargs):
-                try:
-                    iterator = attr(*args, **kwargs)
-                    # If it's an iterator, wrap it
-                    if hasattr(iterator, "__iter__") or hasattr(iterator, "__next__"):
-                        try:
-                            for item in iterator:
-                                if self._first_token_time is None:
-                                    self._first_token_time = time.time()
-                                yield item
-                        finally:
-                            self._finalize()
-                            self._finalize_on_del = False  # Don't finalize again in __del__
-                    else:
-                        return iterator
-                except Exception:
-                    self._finalize()
-                    self._finalize_on_del = False
-                    raise
-
-            return wrapped_method
-
-        return attr
-
-    def _finalize(self):
-        """Log metrics and close span."""
-        if self._span and not self._logged and self._stream_result:
-            try:
-                end_time = time.time()
-
-                _create_tool_spans_from_messages(self._stream_result)
-
-                output = _serialize_stream_output(self._stream_result)
-                metrics = _extract_stream_usage_metrics(
-                    self._stream_result, self._start_time, end_time, self._first_token_time
-                )
-                self._span.log(output=output, metrics=metrics)
-                self._logged = True
-            finally:
-                try:
-                    self._span_cm.__exit__(None, None, None)
-                except Exception:
-                    pass
-
-    def __del__(self):
-        """Ensure span is closed when proxy is destroyed."""
-        if self._finalize_on_del:
-            self._finalize()
-
-
-class _DirectStreamWrapperSync:
-    """Wrapper for model_request_stream_sync() that adds tracing while passing through the stream."""
-
-    def __init__(self, stream_cm: Any, span_name: str, input_data: Any, metadata: Any):
-        self.stream_cm = stream_cm
-        self.span_name = span_name
-        self.input_data = input_data
-        self.metadata = metadata
-        self.span_cm = None
-        self.start_time = None
-        self.stream = None
-        self._first_token_time = None
-
-    def __enter__(self):
-        # Use context manager properly so span stays current
-        # DON'T pass start_time here - we'll set it via metrics in __exit__
-        self.span_cm = start_span(
-            name=self.span_name,
-            type=SpanTypeAttribute.LLM,
-            input=self.input_data if self.input_data else None,
-            metadata=self.metadata,
-        )
-        span = self.span_cm.__enter__()
-
-        # Capture start time right before entering the stream (API call initiation)
-        self.start_time = time.time()
-        self.stream = self.stream_cm.__enter__()
-
-        # Wrap the stream to capture first token time
-        return _DirectStreamIteratorSyncProxy(self.stream, self)
-
-    def __exit__(self, exc_type, exc_val, exc_tb):
-        try:
-            self.stream_cm.__exit__(exc_type, exc_val, exc_tb)
-        finally:
-            if self.span_cm and self.start_time and self.stream:
-                end_time = time.time()
-
-                try:
-                    final_response = self.stream.get()
-                    output = _serialize_model_response(final_response)
-                    metrics = _extract_response_metrics(
-                        final_response, self.start_time, end_time, self._first_token_time
-                    )
-                    self.span_cm.log(output=output, metrics=metrics)
-                except Exception as e:
-                    logger.debug(f"Failed to extract stream output/metrics: {e}")
-
-            # Always clean up span context
-            if self.span_cm:
-                self.span_cm.__exit__(None, None, None)
-
-        return False
-
-
-class _DirectStreamIteratorSyncProxy:
-    """Proxy for direct stream (sync) that captures first token time."""
-
-    def __init__(self, stream: Any, wrapper: _DirectStreamWrapperSync):
-        self._stream = stream
-        self._wrapper = wrapper
-        self._iterator = None
-
-    def __getattr__(self, name: str):
-        """Delegate all attribute access to the wrapped stream."""
-        return getattr(self._stream, name)
-
-    def __iter__(self):
-        """Return iterator that captures first token time."""
-        # Get the actual iterator from the stream
-        self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream
-        return self
-
-    def __next__(self):
-        """Capture first token time on first iteration."""
-        if self._iterator is None:
-            # In case __iter__ wasn't called, initialize it
-            self._iterator = self._stream.__iter__() if hasattr(self._stream, "__iter__") else self._stream
-
-        item = self._iterator.__next__()
-        if self._wrapper._first_token_time is None:
-            self._wrapper._first_token_time = time.time()
-        return item
-
-
-def _create_tool_spans_from_messages(result: Any) -> None:
-    """
-    Create TOOL-type spans from tool call/return message parts in a completed agent result.
-
-    Uses message timestamps from PydanticAI to position spans correctly in the trace:
-    - start_time = ModelResponse.timestamp (when the model requested the tool call)
-    - end_time = ModelRequest.timestamp (when the tool result was sent back)
-    """
-    try:
-        _create_tool_spans_from_messages_impl(result)
-    except Exception:
-        pass
-
-
-def _create_tool_spans_from_messages_impl(result: Any) -> None:
-    from pydantic_ai.messages import ToolCallPart, ToolReturnPart
-
-    messages = result.new_messages()
-
-    returns_by_id: dict[str, tuple[Any, float | None]] = {}
-    for msg in messages:
-        if not hasattr(msg, "parts"):
-            continue
-        msg_ts = _msg_timestamp(msg)
-        for part in msg.parts:
-            if isinstance(part, ToolReturnPart) and hasattr(part, "tool_call_id"):
-                returns_by_id[part.tool_call_id] = (part, msg_ts)
-
-    for msg in messages:
-        if not hasattr(msg, "parts"):
-            continue
-        call_ts = _msg_timestamp(msg)
-        for part in msg.parts:
-            if not isinstance(part, ToolCallPart):
-                continue
-
-            tool_name = getattr(part, "tool_name", None) or "unknown_tool"
-            tool_call_id = getattr(part, "tool_call_id", None)
-
-            try:
-                input_data = part.args_as_dict()
-            except Exception:
-                input_data = bt_safe_deep_copy(getattr(part, "args", None))
-
-            output_data = None
-            return_ts: float | None = None
-            if tool_call_id and tool_call_id in returns_by_id:
-                return_part, return_ts = returns_by_id[tool_call_id]
-                output_data = bt_safe_deep_copy(getattr(return_part, "content", None))
-
-            metadata = {}
-            if tool_call_id:
-                metadata["tool_call_id"] = tool_call_id
-
-            with start_span(
-                name=tool_name,
-                type=SpanTypeAttribute.TOOL,
-                input=input_data,
-                start_time=call_ts,
-                metadata=metadata if metadata else None,
-            ) as tool_span:
-                metrics = {}
-                if call_ts is not None:
-                    metrics["start"] = call_ts
-                if return_ts is not None:
-                    metrics["end"] = return_ts
-                if call_ts is not None and return_ts is not None:
-                    metrics["duration"] = return_ts - call_ts
-                tool_span.log(output=output_data, metrics=metrics if metrics else None)
-                tool_span.end(end_time=return_ts)
-
-
-def _msg_timestamp(msg: Any) -> float | None:
-    """Extract epoch-seconds timestamp from a PydanticAI message, or None."""
-    ts = getattr(msg, "timestamp", None)
-    if ts is None:
-        return None
-    try:
-        return ts.timestamp()  # datetime → float
-    except Exception:
-        return None
-
-
-def _serialize_user_prompt(user_prompt: Any) -> Any:
-    """Serialize user prompt, handling BinaryContent and other types."""
-    if user_prompt is None:
-        return None
-
-    if isinstance(user_prompt, str):
-        return user_prompt
-
-    if isinstance(user_prompt, list):
-        return [_serialize_content_part(part) for part in user_prompt]
-
-    return _serialize_content_part(user_prompt)
-
-
-def _serialize_content_part(part: Any) -> Any:
-    """Serialize a content part, handling BinaryContent specially.
-
-    This function handles:
-    - BinaryContent: converts to Braintrust Attachment
-    - Parts with nested content (UserPromptPart): recursively serializes content items
-    - Strings: passes through unchanged
-    - Other objects: converts to dict via model_dump
-    """
-    if part is None:
-        return None
-
-    if hasattr(part, "data") and hasattr(part, "media_type") and hasattr(part, "kind"):
-        if part.kind == "binary":
-            data = part.data
-            media_type = part.media_type
-
-            extension = media_type.split("/")[1] if "/" in media_type else "bin"
-            filename = f"file.{extension}"
-
-            attachment = Attachment(data=data, filename=filename, content_type=media_type)
-            return {"type": "binary", "attachment": attachment, "media_type": media_type}
-
-    if hasattr(part, "content"):
-        content = part.content
-        if isinstance(content, list):
-            serialized_content = [_serialize_content_part(item) for item in content]
-            result = bt_safe_deep_copy(part)
-            if isinstance(result, dict):
-                result["content"] = serialized_content
-            return result
-        elif content is not None:
-            serialized_content = _serialize_content_part(content)
-            result = bt_safe_deep_copy(part)
-            if isinstance(result, dict):
-                result["content"] = serialized_content
-            return result
-
-    if isinstance(part, str):
-        return part
-
-    return bt_safe_deep_copy(part)
-
-
-def _serialize_messages(messages: Any) -> Any:
-    """Serialize messages list."""
-    if not messages:
-        return []
-
-    result = []
-    for msg in messages:
-        if hasattr(msg, "parts") and msg.parts:
-            original_parts = msg.parts
-            serialized_parts = [_serialize_content_part(p) for p in original_parts]
-
-            # Use model_dump with exclude to avoid serializing parts field prematurely
-            if hasattr(msg, "model_dump"):
-                try:
-                    serialized_msg = msg.model_dump(exclude={"parts"}, exclude_none=True)
-                except (TypeError, ValueError):
-                    # If exclude parameter not supported, fall back to bt_safe_deep_copy
-                    serialized_msg = bt_safe_deep_copy(msg)
-            else:
-                serialized_msg = bt_safe_deep_copy(msg)
-
-            if isinstance(serialized_msg, dict):
-                serialized_msg["parts"] = serialized_parts
-        else:
-            serialized_msg = bt_safe_deep_copy(msg)
-
-        result.append(serialized_msg)
-
-    return result
-
-
-def _serialize_result_output(result: Any) -> Any:
-    """Serialize agent run result output."""
-    if not result:
-        return None
-
-    output_dict = {}
-
-    if hasattr(result, "output"):
-        output_dict["output"] = bt_safe_deep_copy(result.output)
-
-    if hasattr(result, "response"):
-        output_dict["response"] = _serialize_model_response(result.response)
-
-    return output_dict if output_dict else bt_safe_deep_copy(result)
-
-
-def _serialize_stream_output(stream_result: Any) -> Any:
-    """Serialize stream result output."""
-    if not stream_result:
-        return None
-
-    output_dict = {}
-
-    if hasattr(stream_result, "response"):
-        output_dict["response"] = _serialize_model_response(stream_result.response)
-
-    return output_dict if output_dict else None
-
-
-def _serialize_model_response(response: Any) -> Any:
-    """Serialize a model response."""
-    if not response:
-        return None
-
-    response_dict = bt_safe_deep_copy(response)
-
-    if hasattr(response, "parts") and isinstance(response_dict, dict):
-        response_dict["parts"] = [_serialize_content_part(p) for p in response.parts]
-
-    return response_dict
-
-
-def _extract_model_info_from_model_instance(model: Any) -> tuple[str | None, str | None]:
-    """Extract model name and provider from a model instance.
-
-    Args:
-        model: A Pydantic AI model instance (OpenAIChatModel, AnthropicModel, etc.)
-
-    Returns:
-        Tuple of (model_name, provider)
-    """
-    if not model:
-        return None, None
-
-    if isinstance(model, str):
-        return _parse_model_string(model)
-
-    if hasattr(model, "model_name"):
-        model_name = model.model_name
-        class_name = type(model).__name__
-        provider = None
-        if "OpenAI" in class_name:
-            provider = "openai"
-        elif "Anthropic" in class_name:
-            provider = "anthropic"
-        elif "Gemini" in class_name:
-            provider = "gemini"
-        elif "Groq" in class_name:
-            provider = "groq"
-        elif "Mistral" in class_name:
-            provider = "mistral"
-        elif "VertexAI" in class_name:
-            provider = "vertexai"
-
-        return model_name, provider
-
-    if hasattr(model, "name"):
-        return _parse_model_string(model.name)
-
-    return None, None
-
-
-def _extract_model_info(agent: Any) -> tuple[str | None, str | None]:
-    """Extract model name and provider from agent.
-
-    Args:
-        agent: A Pydantic AI Agent instance
-
-    Returns:
-        Tuple of (model_name, provider)
-    """
-    if not hasattr(agent, "model"):
-        return None, None
-
-    return _extract_model_info_from_model_instance(agent.model)
-
-
-def _build_model_metadata(model_name: str | None, provider: str | None, model_settings: Any = None) -> dict[str, Any]:
-    """Build metadata dictionary with model info.
-
-    Args:
-        model_name: The model name (e.g., "gpt-4o")
-        provider: The provider (e.g., "openai")
-        model_settings: Optional model settings to include
-
-    Returns:
-        Dictionary of metadata
-    """
-    metadata = {}
-    if model_name:
-        metadata["model"] = model_name
-    if provider:
-        metadata["provider"] = provider
-    if model_settings:
-        metadata["model_settings"] = bt_safe_deep_copy(model_settings)
-    return metadata
-
-
-def _parse_model_string(model: Any) -> tuple[str | None, str | None]:
-    """Parse model string to extract provider and model name.
-
-    Pydantic AI uses format: "provider:model-name" (e.g., "openai:gpt-4o")
-    """
-    if not model:
-        return None, None
-
-    model_str = str(model)
-
-    if ":" in model_str:
-        parts = model_str.split(":", 1)
-        return parts[1], parts[0]  # (model_name, provider)
-
-    return model_str, None
-
-
-def _extract_usage_metrics(result: Any, start_time: float, end_time: float) -> dict[str, float] | None:
-    """Extract usage metrics from agent run result."""
-    metrics: dict[str, float] = {}
-
-    metrics["start"] = start_time
-    metrics["end"] = end_time
-    metrics["duration"] = end_time - start_time
-
-    usage = None
-    if hasattr(result, "response"):
-        try:
-            response = result.response
-            if hasattr(response, "usage"):
-                usage = response.usage
-        except (AttributeError, ValueError):
-            pass
-
-    if usage is None and hasattr(result, "usage"):
-        usage = result.usage
-
-    if usage is None:
-        return metrics
-
-    if hasattr(usage, "input_tokens"):
-        input_tokens = usage.input_tokens
-        if input_tokens is not None:
-            metrics["prompt_tokens"] = float(input_tokens)
-
-    if hasattr(usage, "output_tokens"):
-        output_tokens = usage.output_tokens
-        if output_tokens is not None:
-            metrics["completion_tokens"] = float(output_tokens)
-
-    if hasattr(usage, "total_tokens"):
-        total_tokens = usage.total_tokens
-        if total_tokens is not None:
-            metrics["tokens"] = float(total_tokens)
-
-    if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None:
-        metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens)
-
-    if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None:
-        metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens)
-
-    if hasattr(usage, "input_audio_tokens") and usage.input_audio_tokens is not None:
-        metrics["prompt_audio_tokens"] = float(usage.input_audio_tokens)
-
-    if hasattr(usage, "output_audio_tokens") and usage.output_audio_tokens is not None:
-        metrics["completion_audio_tokens"] = float(usage.output_audio_tokens)
-
-    if hasattr(usage, "details") and isinstance(usage.details, dict):
-        details = usage.details
-
-        if "reasoning_tokens" in details:
-            metrics["completion_reasoning_tokens"] = float(details["reasoning_tokens"])
-
-        if "cached_tokens" in details:
-            metrics["prompt_cached_tokens"] = float(details["cached_tokens"])
-
-    return metrics if metrics else None
-
-
-def _extract_stream_usage_metrics(
-    stream_result: Any, start_time: float, end_time: float, first_token_time: float | None
-) -> dict[str, float] | None:
-    """Extract usage metrics from stream result."""
-    metrics: dict[str, float] = {}
-
-    metrics["start"] = start_time
-    metrics["end"] = end_time
-    metrics["duration"] = end_time - start_time
-
-    if first_token_time:
-        metrics["time_to_first_token"] = first_token_time - start_time
-
-    if hasattr(stream_result, "usage"):
-        usage_func = stream_result.usage
-        if callable(usage_func):
-            usage = usage_func()
-        else:
-            usage = usage_func
-
-        if usage:
-            if hasattr(usage, "input_tokens") and usage.input_tokens is not None:
-                metrics["prompt_tokens"] = float(usage.input_tokens)
-
-            if hasattr(usage, "output_tokens") and usage.output_tokens is not None:
-                metrics["completion_tokens"] = float(usage.output_tokens)
-
-            if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
-                metrics["tokens"] = float(usage.total_tokens)
-
-            if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None:
-                metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens)
-
-            if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None:
-                metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens)
-
-    return metrics if metrics else None
-
-
-def _extract_response_metrics(
-    response: Any, start_time: float, end_time: float, first_token_time: float | None = None
-) -> dict[str, float] | None:
-    """Extract metrics from model response."""
-    metrics: dict[str, float] = {}
-
-    metrics["start"] = start_time
-    metrics["end"] = end_time
-    metrics["duration"] = end_time - start_time
-
-    if first_token_time:
-        metrics["time_to_first_token"] = first_token_time - start_time
-
-    if hasattr(response, "usage") and response.usage:
-        usage = response.usage
-
-        if hasattr(usage, "input_tokens") and usage.input_tokens is not None:
-            metrics["prompt_tokens"] = float(usage.input_tokens)
-
-        if hasattr(usage, "output_tokens") and usage.output_tokens is not None:
-            metrics["completion_tokens"] = float(usage.output_tokens)
-
-        if hasattr(usage, "total_tokens") and usage.total_tokens is not None:
-            metrics["tokens"] = float(usage.total_tokens)
-
-        if hasattr(usage, "cache_read_tokens") and usage.cache_read_tokens is not None:
-            metrics["prompt_cached_tokens"] = float(usage.cache_read_tokens)
-
-        if hasattr(usage, "cache_write_tokens") and usage.cache_write_tokens is not None:
-            metrics["prompt_cache_creation_tokens"] = float(usage.cache_write_tokens)
-
-        # Extract reasoning tokens for reasoning models (o1/o3)
-        if hasattr(usage, "details") and usage.details is not None:
-            if hasattr(usage.details, "reasoning_tokens") and usage.details.reasoning_tokens is not None:
-                metrics["completion_reasoning_tokens"] = float(usage.details.reasoning_tokens)
-
-    return metrics if metrics else None
-
-
-def _create_start_producer_wrapper():
-    """Create wrapper for StreamedResponseSync._start_producer to propagate context.
-
-    StreamedResponseSync._start_producer creates a background thread that doesn't
-    inherit contextvars. This wrapper ensures Braintrust context flows to that thread
-    so nested instrumentation (like wrap_openai) creates properly parented spans.
-    """
-
-    def wrapper(wrapped: Any, instance: Any, args: Any, kwargs: Any) -> None:
-        ctx = contextvars.copy_context()
-        original_async_producer = instance._async_producer
-
-        def _context_wrapped_async_producer() -> None:
-            ctx.run(original_async_producer)
-
-        instance._async_producer = _context_wrapped_async_producer
-        try:
-            return wrapped(*args, **kwargs)
-        finally:
-            instance._async_producer = original_async_producer
-
-    return wrapper
-
-
-def _is_patched(obj: Any) -> bool:
-    """Check if object is already patched.
-
-    For classes we check __dict__ directly because getattr walks the MRO.
-    Without this, wrapping WrapperModel first causes InstrumentedModel to
-    appear already-patched (it inherits the flag), so its request() method
-    is never wrapped and the inner "chat" span is lost.
-    """
-    if isinstance(obj, type):
-        return obj.__dict__.get("_braintrust_patched", False)
-    return getattr(obj, "_braintrust_patched", False)
-
-
-def _serialize_type(obj: Any) -> Any:
-    """Serialize a type/class for logging, handling Pydantic models and other types.
-
-    This is useful for output_type, toolsets, and similar type parameters.
-    Returns full JSON schema for Pydantic models so engineers can see exactly
-    what structured output schema was used.
-    """
-    import inspect
-
-    # For sequences of types (like Union types or list of models)
-    if isinstance(obj, (list, tuple)):
-        return [_serialize_type(item) for item in obj]
-
-    # Handle Pydantic AI's output wrappers (ToolOutput, NativeOutput, PromptedOutput, TextOutput)
-    if hasattr(obj, "output"):
-        # These are wrapper classes with an 'output' field containing the actual type
-        wrapper_info = {"wrapper": type(obj).__name__}
-        if hasattr(obj, "name") and obj.name:
-            wrapper_info["name"] = obj.name
-        if hasattr(obj, "description") and obj.description:
-            wrapper_info["description"] = obj.description
-        wrapper_info["output"] = _serialize_type(obj.output)
-        return wrapper_info
-
-    # If it's a Pydantic model class, return its full JSON schema
-    if inspect.isclass(obj):
-        try:
-            from pydantic import BaseModel
-
-            if issubclass(obj, BaseModel):
-                # Return the full JSON schema - includes all field info, descriptions, constraints, etc.
-                return obj.model_json_schema()
-        except (ImportError, AttributeError, TypeError):
-            pass
-
-        # Not a Pydantic model, return class name
-        return obj.__name__
-
-    # If it has a __name__ attribute (like functions), use that
-    if hasattr(obj, "__name__"):
-        return obj.__name__
-
-    # Try standard serialization
-    return bt_safe_deep_copy(obj)
-
-
-def _build_agent_input_and_metadata(args: Any, kwargs: Any, instance: Any) -> tuple[dict[str, Any], dict[str, Any]]:
-    """Build input data and metadata for agent wrappers.
-
-    Returns:
-        Tuple of (input_data, metadata)
-    """
-    input_data = {}
-
-    user_prompt = args[0] if len(args) > 0 else kwargs.get("user_prompt")
-    if user_prompt is not None:
-        input_data["user_prompt"] = _serialize_user_prompt(user_prompt)
-
-    for key, value in kwargs.items():
-        if key == "deps":
-            continue
-        elif key == "message_history":
-            input_data[key] = _serialize_messages(value) if value is not None else None
-        elif key in ("output_type", "toolsets"):
-            # These often contain types/classes, use special serialization
-            input_data[key] = _serialize_type(value) if value is not None else None
-        elif key == "model_settings":
-            # model_settings passed to run() goes in INPUT (it's a run() parameter)
-            input_data[key] = bt_safe_deep_copy(value) if value is not None else None
-        else:
-            input_data[key] = bt_safe_deep_copy(value) if value is not None else None
-
-    if "model" in kwargs:
-        model_name, provider = _parse_model_string(kwargs["model"])
-    else:
-        model_name, provider = _extract_model_info(instance)
-
-    # Extract agent-level configuration for metadata
-    # Only add to metadata if NOT explicitly passed in kwargs (those go in input)
-    agent_model_settings = None
-    if "model_settings" not in kwargs and hasattr(instance, "model_settings") and instance.model_settings is not None:
-        agent_model_settings = instance.model_settings
-
-    metadata = _build_model_metadata(model_name, provider, agent_model_settings)
-
-    # Extract additional agent configuration (only if not passed as kwargs)
-    if "name" not in kwargs and hasattr(instance, "name") and instance.name is not None:
-        metadata["agent_name"] = instance.name
-
-    if "end_strategy" not in kwargs and hasattr(instance, "end_strategy") and instance.end_strategy is not None:
-        metadata["end_strategy"] = str(instance.end_strategy)
-
-    # Extract output_type if set on agent and not passed as kwarg
-    # output_type can be a Pydantic model, str, or other types that get converted to JSON schema
-    if "output_type" not in kwargs and hasattr(instance, "output_type") and instance.output_type is not None:
-        try:
-            metadata["output_type"] = _serialize_type(instance.output_type)
-        except Exception as e:
-            logger.debug(f"Failed to extract output_type from agent: {e}")
-
-    # Extract toolsets if set on agent and not passed as kwarg
-    # Toolsets go in INPUT (not metadata) because agent.run() accepts toolsets parameter
-    if "toolsets" not in kwargs and hasattr(instance, "toolsets"):
-        try:
-            toolsets = instance.toolsets
-            if toolsets:
-                # Convert toolsets to a list with FULL tool schemas for input
-                serialized_toolsets = []
-                for ts in toolsets:
-                    ts_info = {
-                        "id": getattr(ts, "id", str(type(ts).__name__)),
-                        "label": getattr(ts, "label", None),
-                    }
-                    # Add full tool schemas (not just names) since toolsets can be passed to agent.run()
-                    if hasattr(ts, "tools") and ts.tools:
-                        tools_list = []
-                        tools_dict = ts.tools
-                        # tools is a dict mapping tool name -> Tool object
-                        for tool_name, tool_obj in tools_dict.items():
-                            tool_dict = {
-                                "name": tool_name,
-                            }
-                            # Extract description
-                            if hasattr(tool_obj, "description") and tool_obj.description:
-                                tool_dict["description"] = tool_obj.description
-                            # Extract JSON schema for parameters
-                            if hasattr(tool_obj, "function_schema") and hasattr(
-                                tool_obj.function_schema, "json_schema"
-                            ):
-                                tool_dict["parameters"] = tool_obj.function_schema.json_schema
-                            tools_list.append(tool_dict)
-                        ts_info["tools"] = tools_list
-                    serialized_toolsets.append(ts_info)
-                input_data["toolsets"] = serialized_toolsets
-        except Exception as e:
-            logger.debug(f"Failed to extract toolsets from agent: {e}")
-
-    # Extract system_prompt from agent if not passed as kwarg
-    # Note: system_prompt goes in input (not metadata) because it's semantically part of the LLM input
-    # Pydantic AI doesn't expose a public API for this, so we access the private _system_prompts
-    # attribute. This is wrapped in try/except to gracefully handle if the internal structure changes.
-    if "system_prompt" not in kwargs:
-        try:
-            if hasattr(instance, "_system_prompts") and instance._system_prompts:
-                input_data["system_prompt"] = "\n\n".join(instance._system_prompts)
-        except Exception as e:
-            logger.debug(f"Failed to extract system_prompt from agent: {e}")
-
-    return input_data, metadata
-
-
-def _build_direct_model_input_and_metadata(args: Any, kwargs: Any) -> tuple[dict[str, Any], dict[str, Any]]:
-    """Build input data and metadata for direct model request wrappers.
-
-    Returns:
-        Tuple of (input_data, metadata)
-    """
-    input_data = {}
-
-    model = args[0] if len(args) > 0 else kwargs.get("model")
-    if model is not None:
-        input_data["model"] = str(model)
-
-    messages = args[1] if len(args) > 1 else kwargs.get("messages", [])
-    if messages:
-        input_data["messages"] = _serialize_messages(messages)
-
-    for key, value in kwargs.items():
-        if key not in ["model", "messages"]:
-            input_data[key] = bt_safe_deep_copy(value) if value is not None else None
-
-    model_name, provider = _parse_model_string(model)
-    metadata = _build_model_metadata(model_name, provider)
-
-    return input_data, metadata
+"""Compatibility re-exports for the migrated Pydantic AI integration."""
+
+from braintrust.integrations.pydantic_ai import (
+    PydanticAIIntegration,
+    setup_pydantic_ai,
+    wrap_agent,
+    wrap_model_classes,
+    wrap_model_request,
+    wrap_model_request_stream,
+    wrap_model_request_stream_sync,
+    wrap_model_request_sync,
+)
+
+
+__all__ = [
+    "PydanticAIIntegration",
+    "setup_pydantic_ai",
+    "wrap_agent",
+    "wrap_model_classes",
+    "wrap_model_request",
+    "wrap_model_request_sync",
+    "wrap_model_request_stream",
+    "wrap_model_request_stream_sync",
+]