From 27dc963fee70edb0db3cc35368ef5c6043e7d59e Mon Sep 17 00:00:00 2001
From: Abhijeet Prasad <abhijeet@braintrustdata.com>
Date: Wed, 25 Mar 2026 14:07:37 -0700
Subject: [PATCH 1/3] feat: Add agentscope integration

---
 py/noxfile.py                                 |  12 +
 py/src/braintrust/auto.py                     |  12 +-
 py/src/braintrust/conftest.py                 |   1 +
 py/src/braintrust/integrations/__init__.py    |   9 +-
 .../braintrust/integrations/adk/test_adk.py   |   1 +
 .../integrations/agentscope/__init__.py       |  20 +
 ...quential_pipeline_creates_parent_span.yaml | 545 ++++++++++++++++++
 .../test_agentscope_simple_agent_run.yaml     | 320 ++++++++++
 ...agentscope_tool_use_creates_tool_span.yaml | 451 +++++++++++++++
 .../cassettes/test_auto_agentscope.yaml       | 122 ++++
 .../integrations/agentscope/integration.py    |  26 +
 .../integrations/agentscope/patchers.py       | 103 ++++
 .../agentscope/test_agentscope.py             | 221 +++++++
 .../integrations/agentscope/tracing.py        | 269 +++++++++
 .../auto_test_scripts/test_auto_agentscope.py |  73 +++
 .../braintrust/wrappers/test_google_genai.py  |   1 +
 16 files changed, 2184 insertions(+), 2 deletions(-)
 create mode 100644 py/src/braintrust/integrations/agentscope/__init__.py
 create mode 100644 py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml
 create mode 100644 py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml
 create mode 100644 py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml
 create mode 100644 py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml
 create mode 100644 py/src/braintrust/integrations/agentscope/integration.py
 create mode 100644 py/src/braintrust/integrations/agentscope/patchers.py
 create mode 100644 py/src/braintrust/integrations/agentscope/test_agentscope.py
 create mode 100644 py/src/braintrust/integrations/agentscope/tracing.py
 create mode 100644 py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py

diff --git a/py/noxfile.py b/py/noxfile.py
index 61ea0aec..0f50d440 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -62,6 +62,7 @@ def _pinned_python_version():
 # validate things work with or without them.
 VENDOR_PACKAGES = (
     "agno",
+    "agentscope",
     "anthropic",
     "dspy",
     "openai",
@@ -89,6 +90,7 @@ def _pinned_python_version():
 # Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change
 # to internals we leverage for instrumentation.
 AGNO_VERSIONS = (LATEST, "2.4.0", "2.1.0")
+AGENTSCOPE_VERSIONS = (LATEST, "1.0.0")
 # pydantic_ai 1.x requires Python >= 3.10
 # Two test suites with different version requirements:
 # 1. wrap_openai approach: works with older versions (0.1.9+)
@@ -172,6 +174,16 @@ def test_agno(session, version):
     _run_core_tests(session)
 
 
+@nox.session()
+@nox.parametrize("version", AGENTSCOPE_VERSIONS, ids=AGENTSCOPE_VERSIONS)
+def test_agentscope(session, version):
+    _install_test_deps(session)
+    _install(session, "agentscope", version)
+    _install(session, "openai")
+    _run_tests(session, f"{INTEGRATION_DIR}/agentscope/test_agentscope.py")
+    _run_core_tests(session)
+
+
 @nox.session()
 @nox.parametrize("version", ANTHROPIC_VERSIONS, ids=ANTHROPIC_VERSIONS)
 def test_anthropic(session, version):
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
index 25dd436a..7ac407aa 100644
--- a/py/src/braintrust/auto.py
+++ b/py/src/braintrust/auto.py
@@ -7,7 +7,13 @@
 import logging
 from contextlib import contextmanager
 
-from braintrust.integrations import ADKIntegration, AgnoIntegration, AnthropicIntegration, ClaudeAgentSDKIntegration
+from braintrust.integrations import (
+    ADKIntegration,
+    AgentScopeIntegration,
+    AgnoIntegration,
+    AnthropicIntegration,
+    ClaudeAgentSDKIntegration,
+)
 
 
 __all__ = ["auto_instrument"]
@@ -34,6 +40,7 @@ def auto_instrument(
     pydantic_ai: bool = True,
     google_genai: bool = True,
     agno: bool = True,
+    agentscope: bool = True,
     claude_agent_sdk: bool = True,
     dspy: bool = True,
     adk: bool = True,
@@ -54,6 +61,7 @@ def auto_instrument(
         pydantic_ai: Enable Pydantic AI instrumentation (default: True)
         google_genai: Enable Google GenAI instrumentation (default: True)
         agno: Enable Agno instrumentation (default: True)
+        agentscope: Enable AgentScope instrumentation (default: True)
         claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
         dspy: Enable DSPy instrumentation (default: True)
         adk: Enable Google ADK instrumentation (default: True)
@@ -116,6 +124,8 @@ def auto_instrument(
         results["google_genai"] = _instrument_google_genai()
     if agno:
         results["agno"] = _instrument_integration(AgnoIntegration)
+    if agentscope:
+        results["agentscope"] = _instrument_integration(AgentScopeIntegration)
     if claude_agent_sdk:
         results["claude_agent_sdk"] = _instrument_integration(ClaudeAgentSDKIntegration)
     if dspy:
diff --git a/py/src/braintrust/conftest.py b/py/src/braintrust/conftest.py
index 0fbdf40b..2345b227 100644
--- a/py/src/braintrust/conftest.py
+++ b/py/src/braintrust/conftest.py
@@ -191,6 +191,7 @@ def get_vcr_config():
         "decode_compressed_response": True,
         "filter_headers": [
             "authorization",
+            "Authorization",
             "openai-organization",
             "x-api-key",
             "api-key",
diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
index 35324c1c..095f7f35 100644
--- a/py/src/braintrust/integrations/__init__.py
+++ b/py/src/braintrust/integrations/__init__.py
@@ -1,7 +1,14 @@
 from .adk import ADKIntegration
+from .agentscope import AgentScopeIntegration
 from .agno import AgnoIntegration
 from .anthropic import AnthropicIntegration
 from .claude_agent_sdk import ClaudeAgentSDKIntegration
 
 
-__all__ = ["ADKIntegration", "AgnoIntegration", "AnthropicIntegration", "ClaudeAgentSDKIntegration"]
+__all__ = [
+    "ADKIntegration",
+    "AgentScopeIntegration",
+    "AgnoIntegration",
+    "AnthropicIntegration",
+    "ClaudeAgentSDKIntegration",
+]
diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py
index bed6f3e6..9d9be979 100644
--- a/py/src/braintrust/integrations/adk/test_adk.py
+++ b/py/src/braintrust/integrations/adk/test_adk.py
@@ -41,6 +41,7 @@ def before_record_request(request):
         "cassette_library_dir": str(Path(__file__).parent / "cassettes"),
         "filter_headers": [
             "authorization",
+            "Authorization",
             "x-goog-api-key",
         ],
         "before_record_request": before_record_request,
diff --git a/py/src/braintrust/integrations/agentscope/__init__.py b/py/src/braintrust/integrations/agentscope/__init__.py
new file mode 100644
index 00000000..534f1db8
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/__init__.py
@@ -0,0 +1,20 @@
+"""Braintrust integration for AgentScope."""
+
+from braintrust.logger import NOOP_SPAN, current_span, init_logger
+
+from .integration import AgentScopeIntegration
+
+
+__all__ = ["AgentScopeIntegration", "setup_agentscope"]
+
+
+def setup_agentscope(
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
+) -> bool:
+    """Setup Braintrust integration with AgentScope."""
+    if current_span() == NOOP_SPAN:
+        init_logger(project=project_name, api_key=api_key, project_id=project_id)
+
+    return AgentScopeIntegration.setup()
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml
new file mode 100644
index 00000000..1ceea647
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml
@@ -0,0 +1,545 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite
+      the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"#
+      Conversation History\nThe content between <history></history> tags contains
+      your conversation history\n<history>\nuser: Summarize why tests should use real
+      recorded traffic.\n</history>"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '403'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzzdwMAAAD//41SwW7bMAy95ysMneMi
+        yZwou/a2a5EehqIwFImytcmiIcpbhyH/PspOarfrgF0EmI/v+fGR9+0ed1V1+Hl4HI5f2oevzekU
+        BrHODDx/A51urDuNzIPkMEywjqASZNWtlFUld/KzHIEODfhMa/pUVlh2Lrhyt9lV5UaW2+OV3aLT
+        QNz2xJ9F8Xt8s89g4IXLm/Wt0gGRaoBrtyYuRvS5IhSRo6RCEusZ1BgShNH6PQSwLlGBtngkF5ri
+        AZTnR2M0YIpTVNY6XbhQnIASNyyFItiBVB4mDN4vABUCJpXDGEd4viKXV9Memz7imd5RheUwqK05
+        O+Ig2SAl7MWIXvh9HsMZ3swrWKjrU53wO4y/28tJTswrmcHjFUvsz8/lw379gVhtICnnaZGt0Eq3
+        YGbmvAg1GIcLYLUY+W8vH2lPY3PI/yM/A1pDz6dW9xGM02/nndsi5Hv9V9trxKNhQRB/8AHWyUHM
+        azBg1eCnKxL0ixJ0Ne+qgdhHN52S7WutPoE0cnu2YnVZ/QE7MxO8WAMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e166fbae67b2-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:06:38 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '664'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=0vN_GTd.Taalah9PDsZg_Ru.1_PcZ_NBP9qkR2MCsFE-1774472797.273874-1.0.1.1-_HsrwKoaPPyMDTMtccbmEvGb.WDYukiKlNhKyLTp32aZR8vwDwqATyzmrwTg82HAg9bVn2GQnmrENihz.LTaMxGxvJCORGScpnet2yitftoFB0LwZa12LFWkWMzlprHK;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:36:38 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999940'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_bcc5d231b4f24f049b2e0a0ba5d880a9
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite
+      the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"#
+      Conversation History\nThe content between <history></history> tags contains
+      your conversation history\n<history>\nuser: Summarize why tests should use real
+      recorded traffic.\n</history>"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '403'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/4xSwW7bMAy95ysEneMhcdI56G3FTus2DFt3GgpDkShHjSwJIj2sKPLvo+2kdrcO
+        2EWA+fieHx/5tBBCOiOvhdQHRbpNvnj/+cvDR328uXUfNv7WfTuGlSrfVZ9Sh4+tXPaMuH8ATRfW
+        Gx2ZB+RiGGGdQRH0quuq2m6rclduBqCNBnxPaxIV21i0LriiXJXbYlUV692ZfYhOA3LbD/4U4ml4
+        e5/BwC8ur5aXSguIqgGuXZq4mKPvK1IhOiQVSC4nUMdAEAbrNxDAOkIRrfiOLjTiKyjPj47ZgBF3
+        WVnrtHBB3AESN8yFMtgOVT9M6LyfASqESKoPYxjh/oycnk372KQc9/gHVVoOAw81Z4ccJBtEikkO
+        6Inf+yGc7sW8koXaRDXFIwy/u6pGOTmtZAJ3Z4zYn5/Kb6+Wr4jVBkg5j7NspVb6AGZiTotQnXFx
+        BixmI//t5TXtcWwO+X/kJ0BrSHxqdcpgnH4579SWob/Xf7U9RzwYlgj5Jx9gTQ5yvwYDVnV+vCKJ
+        j0jQ1ryrBnLKbjwlm2qtNlCZar23cnFa/AYAAP//AwB2CCbyWAMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e208d9766142-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:03 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '652'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=nPA03jdSYpPIt1MCtq5c3dH__SdRu5fP0HUZaKsaPsk-1774472823.1741066-1.0.1.1-QeI7TdvczvDfZcpacbJMilgyA_s79AH1EgKxSLO_1Z_BUh6jqZue4vjSpv9Sr.ihxNrRzZkdyq7EzAFHuIxCU9THwR_hF6iuXTWOT0BzCGX_rHDteSnXa7BlEfiRqaBH;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:03 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999940'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_09e84f9d413d4a8d9495fef9e4133024
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You answer
+      the previous message in one sentence."}]},{"role":"user","content":[{"type":"text","text":"#
+      Conversation History\nThe content between <history></history> tags contains
+      your conversation history\n<history>\nAlice: Benefits of Using Real Recorded
+      Traffic in Testing\n</history>"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '410'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPKtcAYAAAD//4xTy27bMBC8+ysInqXA
+        dpQqPRfIMUBb9FQEAk2upG35ApcyYhT+9y6lOHLaFOiFgHZ2docz1OeTg+bT1y/mMH54GB7H5+ZW
+        pV5WhREOP0DnC+tGB+ZBxuAXWCdQGcrUXds2Tbu/3zcz4IIBW2hDzHUTaoce6/1239Tbtt7dv7DH
+        gBqI277zpxC/5rPo9AaeubytLhUHRGoArl2auJiCLRWpiJCy8llWK6iDz+Bn6d8I/SBYquVDh2TA
+        iJxU36MW6EUGyqUB/Kg86xFK6ykpfaoEupjCkUsRUh+SKzhTCIcxUyWUN2IEG0mg4V3Yn0QMZSvy
+        KiSamMkLlHAhwSyAdfJS8EdMwTvuvLnWnKCfSBXf/GTtFaC8D1kV32e3nl6Q86s/Ngys9EB/UGXP
+        vtPY8WrizNgLyiHKGT3z+TTnML2xVvIgF3OXw0+Y193dLePkmv4KLnkzmFmgXevtx+qdaZ2BrNDS
+        VY5SKz2CWZlr6GoyGK6AzdWd/xbz3uzl3hzu/4xfAa0h8rPuYgKD+u2F17YE5d/4V9urx7NgSZCO
+        /Ni7jJBKDgZ6NdnlxUo6UQbXcVgDpJhwebZ97LS6hda0u0MvN+fNbwAAAP//AwDs2pZ8xAMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e20db9eddfce-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:04 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '669'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=HayDHNMCRsqHCSCg5YnFHqR8EGszTInfOwZSaeO5eyg-1774472823.957494-1.0.1.1-T1qVHFP.Enq2BHk4byqWQwdinEXG5pjJnQ6C55kaXNumTZcQVGLUbKoXNUrcL8OlUtndtdfReMfEtfZ5GDvmyZ8zIPnM28rC8KnQNIAYwe3oTr24xzHY4xyLaaAQfXw_;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:04 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999940'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_074bde6fdf2a4c4caf77d081e3a5af31
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite
+      the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"#
+      Conversation History\nThe content between <history></history> tags contains
+      your conversation history\n<history>\nuser: Summarize why tests should use real
+      recorded traffic.\n</history>"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '403'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDb29QAAAAD//4xSTW/bMAy951cYOsdD
+        vlrl3AI99DJsaIGiQ2EoEuWolSVPpLsMQ/77KDup3a0DdhFgPr7nx0c+SL99PHxZP94crg/XPz6v
+        v6/tw62YZ0bcPYOmM+uTjswDcjEMsE6gCLLqUsrNRq62l7IHmmjAZ1rdUrmJZeOCK1eL1aZcyHK5
+        PbH30WlAbvvGn0Xxq3+zz2DgwOXF/FxpAFHVwLVzExdT9LkiFKJDUoHEfAR1DASht34FAawjLKIt
+        7tGFuvgKyvOjYzJgirukrHW6cKG4AyRumAolsB2qPEzovJ8AKoRIKofRj/B0Qo5vpn2s2xR3+AdV
+        WA4D9xVnhxwkG0SKrejRI79PfTjdu3kFCzUtVRRfoP/dhRzkxLiSEdyeMGJ/fixfXsw/EKsMkHIe
+        J9kKrfQezMgcF6E64+IEmE1G/tvLR9rD2Bzy/8iPgNbQ8qlVbQLj9Pt5x7YE+V7/1fYWcW9YIKRX
+        PsCKHKS8BgNWdX64IoE/kaCpeFc1pDa54ZRsW2m1BmnkcmfF7Dj7DQAA//8DANl/ZwFYAwAA
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e31aaae4cf8f-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:47 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '724'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=Mzctd7Vf4Pj8hLREk5uV3xfhGAJqHTBdS7Z68xdqJ3E-1774472866.9835687-1.0.1.1-9060awKZeUAS__HpRhOh3ZDNpHvhVrftE8gP2f5h5qVQ7SUhdLjp1QfnUBiOHckFloYERDcS4nSTfy3q7RPbo.rO9ak4DB2RBS7.iNAo4m7Yek2xS4nBLaBUTUBc7VfU;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:47 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999940'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_923040bc8f624770996dc8d06bd77fef
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You answer
+      the previous message in one sentence."}]},{"role":"user","content":[{"type":"text","text":"#
+      Conversation History\nThe content between <history></history> tags contains
+      your conversation history\n<history>\nAlice: Benefits of Using Real Recorded
+      Traffic in Testing\n</history>"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '410'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbOSAcAAAD//4xTwW7bMAy95ysEnZ0i
+        yZy6uw/tYehp6y5DYSgSbbOVJUGUswZD/n2UndTp2gG7CDAfH/n0nvz19un2W7i//rK6t9u777vD
+        rx/q4UUWmeF3T6DTmXWlPfMgoXcTrCOoBHnquqrKstrcXFcj0HsDNtPakJalX/bocLlZbcrlqlqu
+        b07szqMG4raf/CnE7/HMOp2BFy6vinOlByLVAtfOTVyM3uaKVERISbkkixnU3iVwo/QHQtcKlmr5
+        0D4aMCJF1TSoBTqRgFJuANcpx3qE0nqISh8KgX2Ifs+lALHxsc84UwjbLlEhlDOiAxtIoOFd2BxE
+        8Hkr8iokGpjJC5TofYRRAOvkpeD2GL3rufPqUnOEZiCVfXODtReAcs4nlX0f3Xo8IcdXf6xvWemO
+        /qLKhn2nrubVxJmxF5R8kCN65PNxzGF4Y63kQX1IdfLPMK7bbqdxck5/BjflCUws0M716nPxwbTa
+        QFJo6SJHqZXuwMzMOXQ1GPQXwOLizu/FfDR7ujeH+z/jZ0BrCPys6xDBoH574bktQv43/tX26vEo
+        WBLEPT/2OiHEnIOBRg12erGSDpSgrzmsFmKIOD3bJtRafYLKVOtdIxfHxR8AAAD//wMAQ8dY3cQD
+        AAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e31fec3dc132-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:48 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '768'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=JYxrn1QLzqghAM1PJW6V89UMbmiLatd1atmmQw6NPr0-1774472867.8239858-1.0.1.1-hwGgJY4XYCiJtbeBfm6fdTUACpklHSNTd64qYVFPn23d73s8.NBRfBlLx6nUV4d3.tfIInsBJq50FlZh7Wv9iTFtj7HY1hVkAYRHdnNzeq_4_piEn49lFlc_GkLVdmE0;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:48 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999940'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_13f99272933545f8bac789ebd64bb0f1
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml
new file mode 100644
index 00000000..87f6358e
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml
@@ -0,0 +1,320 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '290'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzNtgAAAAD//4xSsU7DMBDd+xWV56Rq
+        05TQGYbODCyoilz7khgc29hOFYT675ydtglQJBZLvnfv/N7zPRzNkb33u02/qXTPN48AT89bkgSG
+        PrwC8xfWgmnkgRdaDTCzQD2EqauiyPMiK7Z3EWg1BxlotfFprtNWKJFmyyxPl0W6uj+zGy0YOGx7
+        wet8/hnPoFNx6LG8TC6VFpyjNWDt0oRFq2WoEOqccJ4qT5IRZFp5UFH6DqTUc9+AhcW0xULVORpk
+        qk7KCUCV0p4Gm1Hc/oycrnKkro3VB/eDSiq06ZoSU3EYET7tvDYkoic899F2980JwUGt8aXXbxCf
+        W2fDODKGPQHPmEd9clLeJDeGlRw8FdJNUiOMsgb4yBwjph0XegLMJpZ/a7k1e7AtVP2f8SPAGBhc
+        otJY4IJ99zu2WQib+FfbNeIomDiwR1yt0guw4Rs4VLSTw34Q9+E8tCX+VQ3WWDEsSWVKRtdQ8GJ1
+        qMjsNPsCAAD//wMAQQCebTIDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e1608aaf6142-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:06:37 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '373'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=0r2bm90d_zmUe.y8EYZpgoGyTVS4QQSDoJVx.yDpJng-1774472796.2480545-1.0.1.1-onYisUL_Bju9EhfGXQnZBZkwk3gdjG7tHXVdr34BVePUh3JL0OqfVWApVIaF_KDBKfw4HIiGBvzONzv_AS91kbK7eL.FzFDwILNg8_F1h3hsPpZO.pIoeUN1dp_.acW6;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:36:37 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999975'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_0b44866bb8cd459db8712e04e4248889
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '290'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPJOswAAAAD//4xS0WrCMBR99yskz61o
+        7aiv7mX6MJgwtsGQEpPbNjNNQpKOjeG/76ZVWzcHewnknntuzjm5t/dsebeB9Wq/qZ7WD4/L55eU
+        piQKDL17A+ZPrAnTyAMvtOpgZoF6CFNnWZamWbJIkhaoNQcZaKXxcarjWigRJ9MkjadZPFsc2ZUW
+        DBy2veJ1PP5qz6BTcfjA8jQ6VWpwjpaAtVMTFq2WoUKoc8J5qjyJepBp5UG10lcgpR77CixMhi0W
+        isbRIFM1Ug4AqpT2NNhsxW2PyOEsR+rSWL1zP6ikQJuuyjEVhxHh085rQ1r0gOe2td1cOCE4qDY+
+        93oP7XPzpBtH+rAH4BHzqE8OyjfRlWE5B0+FdIPUCKOsAt4z+4hpw4UeAKOB5d9ars3ubAtV/md8
+        DzAGBpcoNxa4YJd++zYLYRP/ajtH3AomDuw7rlbuBdjwDRwK2shuP4j7dB7qHP+qBGus6JakMDmj
+        c8h4NtsVZHQYfQMAAP//AwDajPLhMgMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e202698a251d-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:03 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '334'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=GJt8W0lyxqfgtKXEJ4M8twcG5pNqc4RmQiPqQ_IukiU-1774472822.1494222-1.0.1.1-Jv3zKpnCjFAAQQaXBEt3RElEP.QjtEFbqrvr8BASrk5X7XSiOj1UBc4tUR3t9QbKmOM0VrcVW6R3HYLaYxbTHQqz4Dpjl7Z.Sz9BslefycjBprbfLjQ1aoOYxrSO7lkO;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:03 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999977'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_8c118a37a5da44069de28fba911081e0
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '290'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbKMgIAAAD//4xSQU7DMBC89xWVz0mV
+        pKHhDALBDSFxQlXk2pvE4NiW7dAi1L+zTtomhSJxseSdnfXMeO+zuxuWbp9vt4/pU/WyonrHk45E
+        gaE3b8D8kbVgGnnghVYDzCxQD2FqWhR5XmTXq1UPtJqDDLTa+DjXcSuUiLMky+OkiNPrA7vRgoHD
+        tle8zudf/Rl0Kg47LCfRsdKCc7QGrB2bsGi1DBVCnRPOU+VJNIJMKw+ql/4AUuq5b8DCYtpioeoc
+        DTJVJ+UEoEppT4PNXtz6gOxPcqSujdUb94NKKrTpmhJTcRgRPu28NqRH93iue9vdmROCg1rjS6/f
+        oX9umQ3jyBj2BDxgHvXJSfkqujCs5OCpkG6SGmGUNcBH5hgx7bjQE2A2sfxby6XZg22h6v+MHwHG
+        wOASlcYCF+zc79hmIWziX22niHvBxIH9wNUqvQAbvoFDRTs57Adxn85DW+Jf1WCNFcOSVKZkdAkF
+        L9JNRWb72TcAAAD//wMAHqai9DIDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e3168f481d99-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:46 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '370'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=QmJ_mn8jkQXEiES_f8MtNfFrG.TdUhMi9F3CsYesW.Y-1774472866.3273165-1.0.1.1-9jM9NCSYxoeVuM4GWc_rXbAFSxA2USepp4rz5niKNzmDK.TtF1V7PSKWLir8akfbpuyXvva5QQRFgDCcMT3P0xa_1Tzm9mW9uidzQIoQBbq5O6t0XUQzY8bQC4ddKj1i;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:46 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999977'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2ac33c8b77a343ea881d0fe0fbeccc14
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml
new file mode 100644
index 00000000..8b272df9
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml
@@ -0,0 +1,451 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+      Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+      Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+      maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+      the given python code in a temp file and capture the return\ncode, standard
+      output and error. Note you must `print` the output to get\nthe result, and the
+      tmp file will be removed right after the execution."}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '897'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyKxINAYAAAD//41TXW+bMBR951cgP21T
+        mICSklTawzZtUvuwTZ32sI7Kcs0NeDM2tU3TKMp/n20SIGkmjQcE99xzfD+ObxT9unj88PPmS5as
+        85q+v9usSYVmjiEffgM1B9ZbKi0PDJOih6kCYsCpJnmeZXmaL5ceaGQJ3NGq1kSZjBomWJTGaRbF
+        eZQs9uxaMgrapv2yv2G49W9Xpyjh2Ybj2SHSgNakAhs7JNmgktxFENGaaUOEQbMRpFIYEK500XE+
+        AYyUHFPC+Xhw/2wn3+OwbCI2Fx8vmutvn0hbPz3yz3fXvLldf/+xmJzXS29aX9CqE3QY0gQf4lcn
+        h1lMkMZz4RloZwC3G1NLgakd5ImMTSaq6hrbnWsBbQvksgp0VSAFuuMmfBdehm/CvChEq5gwr/rw
+        6wLt0JHULjj3fT+ZloJVpwl/OUYihDTEdePneL9HdsPKuKxaJR/0CRWtrBV0ja1ztJ/EdCHBoRBf
+        AuqOdo6sXNMabOQf8Icm87RXRaMvRzSd70Fj6+QTVp7PzujhEgxh3hSDDymhNZQjdfQj6UomJ0Aw
+        6f1lNee0+/6ZqP5HfgQohdbeONwqKBk97nhMU+Cu7b/Shin7gpEG9WTvITYMlNtHCSti3dLfUb3R
+        Bhpsl1aB8mbyBm9xHM/LyyRe5gsU7IK/crsEFF8EAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e16f0b9aaf0d-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:06:40 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '1239'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=ncH1zypNLZZs6ohCgRXU6EAQ0iJm0PqJ2cW8rY7HVI8-1774472798.5665667-1.0.1.1-k0DS.3SU2tAkF.r3ZjBzLnG4fwZB2Fu5meB0y3aUWpZtAKeGqa66nGagt.hDaY2vSKIEbmMrwf7bozk5KFrU29xqoOc0X32xTuik1N0lOr3jOPFP1_u5ceUvxEjzvGTM;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:36:40 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999962'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_02a17794efcb4607a4ef7c7281e6d187
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+      Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+      Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+      maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+      the given python code in a temp file and capture the return\ncode, standard
+      output and error. Note you must `print` the output to get\nthe result, and the
+      tmp file will be removed right after the execution."}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '897'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/4xTXWvbMBR9968wetpGPBzj1ElgD2NlHSsrhY21tC5ClW9sdbJkJDkkDfnvk+Qk
+        dtIM5gdj33PP0f042gRhiFiB5iGiFTG0bnh0eXP78uP2Cyw+x79urq713e9LU94v5fr+q6jRyDHk
+        8wtQs2d9pNLywDApOpgqIAac6jjL0jRLpknqgVoWwB2tbEyUyqhmgkVJnKRRnEXj6Y5dSUZB27RH
+        +xuGG/92dYoCVjYcj/aRGrQmJdjYPskGleQugojWTBsiDBr1IJXCgHCli5bzAWCk5JgSzvuDu2cz
+        +O6HZRPx3Wv6IK+vxvKbjOPlagbVKjM/Z98H53XS68YXtGgFPQxpgB/i85PDLCZI7bmwAtoawM3a
+        VFJgagd5ImOTiSrb2nbnWkCbHLmsHM1zpEC33ISfwovwQ5jluWgUE+ZdF36foy06ktoG576fBtNS
+        sGg14W/HSISQhrhu/Byfdsj2sDIuy0bJZ31CRQtrBV1h6xztJzFcSLAvxJeA2qOdIytXNwYb+Qf8
+        oeNJ0qmi3pc9mkx2oLF18gEry0Zn9HABhjBvioMPKaEVFD219yNpCyYHQDDo/W0157S7/pko/0e+
+        ByiFxt443CgoGD3uuE9T4K7tv9IOU/YFIw1qae8hNgyU20cBC2Ld0t1RvdYGamyXVoLyZvIGb3Ac
+        T4qLcTzLpijYBn8BAAD//wMAHPLnAF8EAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e212f84f7e56-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:05 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '760'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=cEkxxhx5PI02Ywh8nf.Xs6jrFBlPOsqdtqnmu2dQoUs-1774472824.7966123-1.0.1.1-l8rKWW5YXcEa4qiqAmWX7gYkTwxcPBKKDdUiau.bKkThNBJdZMRBk8E6aelB55XL8mGvLIuOpvyEL1u_F6R5238q7PZg1iu7hVZPFcKKIeqC1cnWzWuetLrt0ahyQJcC;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:05 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999965'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_5e3d6b5e29b545dc85b5ddedeb732996
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+      Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+      Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+      maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+      the given python code in a temp file and capture the return\ncode, standard
+      output and error. Note you must `print` the output to get\nthe result, and the
+      tmp file will be removed right after the execution."}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '897'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDYtDwEAAAD//4xTXW+bMBR951cgP21T
+        mADR0FTaw6ZuL6m2h+WlHZXlmBvi1tjMNmmzKP99tkmApJk0HhDcc8/x/Tguk+dGP8hNJedPt2Jz
+        //B1/rKYo4ljyOUTUHNkfaTS8sAwKTqYKiAGnGqS51mWp9fTmQdqWQJ3tKoxUSajmgkWpXGaRXEe
+        JdcH9loyCtqm/bK/Ybjzb1enKOHVhuPJMVKD1qQCGzsm2aCS3EUQ0ZppQ4RBkwGkUhgQrnTRcj4C
+        jJQcU8L5cHD37Ebfw7BsIs7n2Uv14/Zukc9+t1/ufork23fx+c9idF4nvW18QatW0H5II7yP35wd
+        ZjFBas+FV6CtAdxszVoKTO0gz2RsMlFVW9vuXAtoVyCXVaCbAinQLTfhp3AafgjzohCNYsK868Lv
+        C7RHJ1L74NL342haClatJvztGIkQ0hDXjZ/j4wHZ9yvjsmqUXOozKlpZK+g1ts7RfhLjhQTHQnwJ
+        qD3ZObJydWOwkc/gD02u0k4VDb4c0PTqABpbJx+x8nxyQQ+XYAjzpuh9SAldQzlQBz+StmRyBASj
+        3t9Wc0m765+J6n/kB4BSaOyNw42CktHTjoc0Be7a/iutn7IvGGlQG3sPsWGg3D5KWBHrlu6O6q02
+        UGO7tAqUN5M3eIOzlKbTeJnHMxTsg78AAAD//wMAxy9FZF8EAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e3264eb7f005-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:49 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '724'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=0XpUc1fNMZh7X5an8o2lKE5Me2U0kr5yZMBg0xANHr0-1774472868.8462937-1.0.1.1-dprcxP4hyQ0IPDL_vk1NK7FsJ11DyabA3P8I942JNvTn7zKho4A0pRDev9WYlxrW1LwDshBeG3MLMUsQs5Y9hxWVr.Wu3JoHqUF6i2Ho7_hr0NaKJDRn8f.qyG5mv6hc;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:49 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999962'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_4413081d046145c59f7df6e62731b407
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+      Python to compute 6 * 7 and return just the result."}]},{"role":"assistant","name":"Jarvis","content":null,"tool_calls":[{"id":"call_7K4wgODLT79quBLSn1FNnAzT","type":"function","function":{"name":"execute_python_code","arguments":"{\"code\":
+      \"result = 6 * 7\\nprint(result)\"}"}}]},{"role":"tool","tool_call_id":"call_7K4wgODLT79quBLSn1FNnAzT","content":"<returncode>0</returncode><stdout>42\n</stdout><stderr></stderr>","name":"execute_python_code"}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+      Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+      maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+      the given python code in a temp file and capture the return\ncode, standard
+      output and error. Note you must `print` the output to get\nthe result, and the
+      tmp file will be removed right after the execution."}}]}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '1293'
+      Content-Type:
+      - application/json
+      Cookie:
+      - __cf_bm=0XpUc1fNMZh7X5an8o2lKE5Me2U0kr5yZMBg0xANHr0-1774472868.8462937-1.0.1.1-dprcxP4hyQ0IPDL_vk1NK7FsJ11DyabA3P8I942JNvTn7zKho4A0pRDev9WYlxrW1LwDshBeG3MLMUsQs5Y9hxWVr.Wu3JoHqUF6i2Ho7_hr0NaKJDRn8f.qyG5mv6hc
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDZzdwUAAAD//41SwW6cMBC971dYPqXS
+        EgGiC7lGinLLKbcmQo4ZwAnYyDO0Sav9947NJpB2K/Vi7c6b93jzZp4r3/rsZ3t9c3Orf3RV22d3
+        RSH3geGenkHTO+tSO+YBGWcXWHtQBEE1K8uiKPOqTCMwugaGQOsmSgqXjMaaJE/zIknLJKtO7N4Z
+        Dcht3/ivEL/iG3zaBl65HLViZQRE1QHX3pu46N0QKlIhGiRlSe5XUDtLYKP1+x6EB5wHEq4VDw8X
+        4sAvGRYVJf/6IgyKIr/c0j20M6owgp2HYQMoax2pEEE0/nhCjh9WB9dN3j3hH1TZcgTY15wYcnxs
+        C8lNMqJHfh9jJPOnKSULjRPV5F4gfi5PrxY9uW5iRbPyBBI7HDas/LA/o1c3QMoMuAlVaqV7aFbq
+        ugE1N8ZtgN1m6r/dnNNeJje2+x/5FdAaJr6xevLQGP154rXNQzjUf7V9pBwNSwT/nS+vJgM+bKKB
+        VvF1LFeJb0gw1ryuDvzkzXJD7VSn6dfmkKVXZSV3x91vPJeasFEDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e32d6800f95b-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:51 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '642'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999947'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_962b361c03444494ac60f59571e1d91c
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml
new file mode 100644
index 00000000..221dcabb
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml
@@ -0,0 +1,122 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a helpful assistant. Be brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hi in two words."}]}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '304'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Raw-Response:
+      - 'true'
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: 'data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bSxoCuDli"}
+
+
+        data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RR3fPq"}
+
+
+        data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+        there"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ks8Wg"}
+
+
+        data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OTuTCk6Yhl"}
+
+
+        data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"Ky1jT"}
+
+
+        data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":29,"completion_tokens":3,"total_tokens":32,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"tz0KSnmcPPt"}
+
+
+        data: [DONE]
+
+
+        '
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e1806b8d67a6-SJC
+      Connection:
+      - keep-alive
+      Content-Type:
+      - text/event-stream; charset=utf-8
+      Date:
+      - Wed, 25 Mar 2026 21:06:43 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '1276'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=26gJQ7Ja3taZFTBb7A3G23kqPcIKnacz5qIbjguQkYs-1774472801.3417854-1.0.1.1-9JqXxOO8Hh_qhFlPUB0VZAqRq_.bnwDdhOr_sD9UVAZbZxHIG013WyWO1wxnxzpoHB2eF6tlQndU7CKalttwp.wptRHYq2G6erRwpDHPPQiZU_8r.6r_TsmfH2ya11Un;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:36:43 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999982'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_369a94628d4547e69137ec894aa584f3
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/integration.py b/py/src/braintrust/integrations/agentscope/integration.py
new file mode 100644
index 00000000..fac8fe8d
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/integration.py
@@ -0,0 +1,26 @@
+"""AgentScope integration orchestration."""
+
+from braintrust.integrations.base import BaseIntegration
+
+from .patchers import (
+    AgentCallPatcher,
+    ChatModelPatcher,
+    FanoutPipelinePatcher,
+    SequentialPipelinePatcher,
+    ToolkitCallToolFunctionPatcher,
+)
+
+
+class AgentScopeIntegration(BaseIntegration):
+    """Braintrust instrumentation for AgentScope. Requires AgentScope v1.0.0 or higher."""
+
+    name = "agentscope"
+    import_names = ("agentscope",)
+    min_version = "1.0.0"
+    patchers = (
+        AgentCallPatcher,
+        SequentialPipelinePatcher,
+        FanoutPipelinePatcher,
+        ToolkitCallToolFunctionPatcher,
+        ChatModelPatcher,
+    )
diff --git a/py/src/braintrust/integrations/agentscope/patchers.py b/py/src/braintrust/integrations/agentscope/patchers.py
new file mode 100644
index 00000000..a0a9ba21
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/patchers.py
@@ -0,0 +1,103 @@
+"""AgentScope patchers."""
+
+from braintrust.integrations.base import CompositeFunctionWrapperPatcher, FunctionWrapperPatcher
+
+from .tracing import (
+    _agent_call_wrapper,
+    _fanout_pipeline_wrapper,
+    _model_call_wrapper,
+    _sequential_pipeline_wrapper,
+    _toolkit_call_tool_function_wrapper,
+)
+
+
+class AgentCallPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope agent execution."""
+
+    name = "agentscope.agent.call"
+    target_module = "agentscope.agent"
+    target_path = "AgentBase.__call__"
+    wrapper = _agent_call_wrapper
+
+
+class SequentialPipelinePatcher(FunctionWrapperPatcher):
+    """Patch AgentScope sequential pipeline execution."""
+
+    name = "agentscope.pipeline.sequential"
+    target_module = "agentscope.pipeline"
+    target_path = "sequential_pipeline"
+    wrapper = _sequential_pipeline_wrapper
+
+
+class FanoutPipelinePatcher(FunctionWrapperPatcher):
+    """Patch AgentScope fanout pipeline execution."""
+
+    name = "agentscope.pipeline.fanout"
+    target_module = "agentscope.pipeline"
+    target_path = "fanout_pipeline"
+    wrapper = _fanout_pipeline_wrapper
+
+
+class ToolkitCallToolFunctionPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope toolkit execution."""
+
+    name = "agentscope.tool.call_tool_function"
+    target_module = "agentscope.tool"
+    target_path = "Toolkit.call_tool_function"
+    wrapper = _toolkit_call_tool_function_wrapper
+
+
+class _OpenAIChatModelPatcher(FunctionWrapperPatcher):
+    name = "agentscope.model.openai"
+    target_module = "agentscope.model"
+    target_path = "OpenAIChatModel.__call__"
+    wrapper = _model_call_wrapper
+
+
+class _DashScopeChatModelPatcher(FunctionWrapperPatcher):
+    name = "agentscope.model.dashscope"
+    target_module = "agentscope.model"
+    target_path = "DashScopeChatModel.__call__"
+    wrapper = _model_call_wrapper
+
+
+class _AnthropicChatModelPatcher(FunctionWrapperPatcher):
+    name = "agentscope.model.anthropic"
+    target_module = "agentscope.model"
+    target_path = "AnthropicChatModel.__call__"
+    wrapper = _model_call_wrapper
+
+
+class _OllamaChatModelPatcher(FunctionWrapperPatcher):
+    name = "agentscope.model.ollama"
+    target_module = "agentscope.model"
+    target_path = "OllamaChatModel.__call__"
+    wrapper = _model_call_wrapper
+
+
+class _GeminiChatModelPatcher(FunctionWrapperPatcher):
+    name = "agentscope.model.gemini"
+    target_module = "agentscope.model"
+    target_path = "GeminiChatModel.__call__"
+    wrapper = _model_call_wrapper
+
+
+class _TrinityChatModelPatcher(FunctionWrapperPatcher):
+    name = "agentscope.model.trinity"
+    target_module = "agentscope.model"
+    target_path = "TrinityChatModel.__call__"
+    wrapper = _model_call_wrapper
+
+
+class ChatModelPatcher(CompositeFunctionWrapperPatcher):
+    """Patch the built-in AgentScope chat model implementations."""
+
+    name = "agentscope.model"
+    sub_patchers = (
+        _OpenAIChatModelPatcher,
+        _DashScopeChatModelPatcher,
+        _AnthropicChatModelPatcher,
+        _OllamaChatModelPatcher,
+        _GeminiChatModelPatcher,
+        _TrinityChatModelPatcher,
+    )
diff --git a/py/src/braintrust/integrations/agentscope/test_agentscope.py b/py/src/braintrust/integrations/agentscope/test_agentscope.py
new file mode 100644
index 00000000..ee09064a
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/test_agentscope.py
@@ -0,0 +1,221 @@
+from pathlib import Path
+
+import pytest
+from braintrust import logger
+from braintrust.integrations.agentscope import setup_agentscope
+from braintrust.span_types import SpanTypeAttribute
+from braintrust.test_helpers import init_test_logger
+from braintrust.wrappers.test_utils import verify_autoinstrument_script
+
+
+PROJECT_NAME = "test_agentscope"
+
+setup_agentscope(project_name=PROJECT_NAME)
+
+
+@pytest.fixture(scope="module")
+def vcr_config():
+    return {
+        "cassette_library_dir": str(Path(__file__).parent / "cassettes"),
+    }
+
+
+@pytest.fixture
+def memory_logger():
+    init_test_logger(PROJECT_NAME)
+    with logger._internal_with_memory_background_logger() as bgl:
+        yield bgl
+
+
+def _span_type(span):
+    span_type = span["span_attributes"]["type"]
+    return span_type.value if hasattr(span_type, "value") else span_type
+
+
+def _make_model(*, stream: bool = False):
+    from agentscope.model import OpenAIChatModel
+
+    return OpenAIChatModel(
+        model_name="gpt-4o-mini",
+        stream=stream,
+        generate_kwargs={"temperature": 0},
+    )
+
+
+def _make_agent(name: str, sys_prompt: str, *, toolkit=None, multi_agent: bool = False):
+    from agentscope.agent import ReActAgent
+    from agentscope.formatter import OpenAIChatFormatter, OpenAIMultiAgentFormatter
+    from agentscope.memory import InMemoryMemory
+    from agentscope.tool import Toolkit
+
+    agent = ReActAgent(
+        name=name,
+        sys_prompt=sys_prompt,
+        model=_make_model(),
+        formatter=OpenAIMultiAgentFormatter() if multi_agent else OpenAIChatFormatter(),
+        toolkit=toolkit or Toolkit(),
+        memory=InMemoryMemory(),
+    )
+    agent.set_console_output_enabled(False)
+    return agent
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_simple_agent_run(memory_logger):
+    from agentscope.message import Msg
+
+    assert not memory_logger.pop()
+
+    agent = _make_agent(
+        "Friday",
+        "You are a concise assistant. Answer in one sentence.",
+    )
+
+    response = await agent(
+        Msg(
+            name="user",
+            content="Say hello in exactly two words.",
+            role="user",
+        )
+    )
+
+    assert response is not None
+
+    spans = memory_logger.pop()
+    agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Friday.reply")
+    llm_spans = [span for span in spans if _span_type(span) == SpanTypeAttribute.LLM]
+
+    assert _span_type(agent_span) == "task"
+    assert llm_spans
+    assert llm_spans[0]["metadata"]["model"] == "gpt-4o-mini"
+    assert "args" not in llm_spans[0]["input"]
+    assert llm_spans[0]["input"]["messages"][0]["role"] == "system"
+    assert llm_spans[0]["input"]["messages"][1]["role"] == "user"
+    assert llm_spans[0]["input"]["messages"][1]["content"][0]["text"] == "Say hello in exactly two words."
+    assert llm_spans[0]["output"]["role"] == "assistant"
+    assert llm_spans[0]["output"]["content"][0]["text"] == "Hello there."
+    assert "usage" not in llm_spans[0]["output"]
+    assert agent_span["span_id"] in llm_spans[0]["span_parents"]
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_sequential_pipeline_creates_parent_span(memory_logger):
+    from agentscope.message import Msg
+    from agentscope.pipeline import sequential_pipeline
+
+    assert not memory_logger.pop()
+
+    agents = [
+        _make_agent("Alice", "You rewrite the input as a short title.", multi_agent=True),
+        _make_agent("Bob", "You answer the previous message in one sentence.", multi_agent=True),
+    ]
+
+    result = await sequential_pipeline(
+        agents=agents,
+        msg=Msg(
+            name="user",
+            content="Summarize why tests should use real recorded traffic.",
+            role="user",
+        ),
+    )
+
+    assert result is not None
+
+    spans = memory_logger.pop()
+    pipeline_span = next(span for span in spans if span["span_attributes"]["name"] == "sequential_pipeline.run")
+    alice_span = next(span for span in spans if span["span_attributes"]["name"] == "Alice.reply")
+    bob_span = next(span for span in spans if span["span_attributes"]["name"] == "Bob.reply")
+
+    assert _span_type(pipeline_span) == "task"
+    assert pipeline_span["span_id"] in alice_span["span_parents"]
+    assert pipeline_span["span_id"] in bob_span["span_parents"]
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_tool_use_creates_tool_span(memory_logger):
+    from agentscope.message import Msg
+    from agentscope.tool import Toolkit, execute_python_code
+
+    assert not memory_logger.pop()
+
+    toolkit = Toolkit()
+    toolkit.register_tool_function(execute_python_code)
+    agent = _make_agent(
+        "Jarvis",
+        "You are a helpful assistant. Use tools when required and keep answers brief.",
+        toolkit=toolkit,
+    )
+
+    response = await agent(
+        Msg(
+            name="user",
+            content="Use Python to compute 6 * 7 and return just the result.",
+            role="user",
+        )
+    )
+
+    assert response is not None
+
+    spans = memory_logger.pop()
+    tool_spans = [span for span in spans if _span_type(span) == "tool"]
+
+    assert tool_spans
+    assert tool_spans[0]["span_attributes"]["name"] == "execute_python_code.execute"
+    assert tool_spans[0]["input"]["tool_name"] == "execute_python_code"
+    assert tool_spans[0]["output"]["content"]
+
+    llm_spans = [span for span in spans if _span_type(span) == SpanTypeAttribute.LLM]
+    assert llm_spans
+    assert llm_spans[0]["output"]["role"] == "assistant"
+    assert llm_spans[0]["output"]["content"][0]["type"] == "tool_use"
+    assert "usage" not in llm_spans[0]["output"]
+
+
+@pytest.mark.asyncio
+async def test_model_call_wrapper_stream_logs_final_output_and_metrics(memory_logger):
+    from braintrust.integrations.agentscope.tracing import _model_call_wrapper
+
+    assert not memory_logger.pop()
+
+    class FakeOpenAIChatModel:
+        model_name = "gpt-4o-mini"
+
+    async def wrapped(*_args, **_kwargs):
+        async def _stream():
+            yield {"content": [{"type": "text", "text": "Hello"}]}
+            yield {
+                "content": [{"type": "text", "text": "Hello there!"}],
+                "usage": {"prompt_tokens": 29, "completion_tokens": 3, "total_tokens": 32},
+            }
+
+        return _stream()
+
+    stream = await _model_call_wrapper(
+        wrapped,
+        FakeOpenAIChatModel(),
+        args=([{"role": "user", "content": "Say hi in two words."}],),
+        kwargs={},
+    )
+
+    chunks = [chunk async for chunk in stream]
+
+    assert chunks[-1]["content"][0]["text"] == "Hello there!"
+
+    spans = memory_logger.pop()
+    assert len(spans) == 1
+    llm_span = spans[0]
+
+    assert _span_type(llm_span) == SpanTypeAttribute.LLM
+    assert llm_span["output"]["role"] == "assistant"
+    assert llm_span["output"]["content"][0]["text"] == "Hello there!"
+    assert llm_span["metrics"]["prompt_tokens"] == 29
+    assert llm_span["metrics"]["completion_tokens"] == 3
+    assert llm_span["metrics"]["tokens"] == 32
+
+
+class TestAutoInstrumentAgentScope:
+    def test_auto_instrument_agentscope(self):
+        verify_autoinstrument_script("test_auto_agentscope.py")
diff --git a/py/src/braintrust/integrations/agentscope/tracing.py b/py/src/braintrust/integrations/agentscope/tracing.py
new file mode 100644
index 00000000..efb4891d
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/tracing.py
@@ -0,0 +1,269 @@
+"""AgentScope-specific span creation and stream aggregation."""
+
+from contextlib import aclosing
+from typing import Any
+
+from braintrust.logger import start_span
+from braintrust.span_types import SpanTypeAttribute
+
+
+def _clean(mapping: dict[str, Any]) -> dict[str, Any]:
+    return {key: value for key, value in mapping.items() if value is not None}
+
+
+def _args_kwargs_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+    return _clean(
+        {
+            "args": list(args) if args else None,
+            "kwargs": kwargs if kwargs else None,
+        }
+    )
+
+
+def _agent_name(instance: Any) -> str:
+    return getattr(instance, "name", None) or instance.__class__.__name__
+
+
+def _pipeline_metadata(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+    agents = kwargs.get("agents")
+    if agents is None and args:
+        agents = args[0]
+
+    agent_names = None
+    if agents:
+        agent_names = [getattr(agent, "name", agent.__class__.__name__) for agent in agents]
+
+    return _clean({"agent_names": agent_names})
+
+
+def _extract_metrics(*candidates: Any) -> dict[str, float] | None:
+    key_map = {
+        "prompt_tokens": "prompt_tokens",
+        "input_tokens": "prompt_tokens",
+        "completion_tokens": "completion_tokens",
+        "output_tokens": "completion_tokens",
+        "total_tokens": "tokens",
+        "tokens": "tokens",
+    }
+
+    for candidate in candidates:
+        data = _field_value(candidate, "usage") or candidate
+
+        metrics = {}
+        for source_key, target_key in key_map.items():
+            value = _field_value(data, source_key)
+            if isinstance(value, (int, float)):
+                metrics[target_key] = float(value)
+        if metrics:
+            return metrics
+
+    return None
+
+
+def _model_provider_name(instance: Any) -> str:
+    class_name = instance.__class__.__name__
+    if class_name.endswith("Model"):
+        return class_name[: -len("Model")]
+    return class_name
+
+
+def _model_metadata(instance: Any) -> dict[str, Any]:
+    return _clean(
+        {
+            "model": getattr(instance, "model_name", None),
+            "provider": _model_provider_name(instance),
+            "model_class": instance.__class__.__name__,
+        }
+    )
+
+
+def _model_call_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+    messages = kwargs.get("messages")
+    if messages is None and args:
+        messages = args[0]
+
+    tools = kwargs.get("tools")
+    if tools is None and len(args) > 1:
+        tools = args[1]
+
+    tool_choice = kwargs.get("tool_choice")
+    if tool_choice is None and len(args) > 2:
+        tool_choice = args[2]
+
+    structured_model = kwargs.get("structured_model")
+    if structured_model is None and len(args) > 3:
+        structured_model = args[3]
+
+    return _clean(
+        {
+            "messages": messages,
+            "tools": tools,
+            "tool_choice": tool_choice,
+            "structured_model": structured_model,
+        }
+    )
+
+
+def _model_call_metadata(instance: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+    extra_kwargs = {
+        key: value
+        for key, value in kwargs.items()
+        if key not in {"messages", "tools", "tool_choice", "structured_model"} and value is not None
+    }
+    return {**_model_metadata(instance), **extra_kwargs}
+
+
+def _model_call_output(result: Any) -> Any:
+    if isinstance(result, dict):
+        data = result
+    elif _field_value(result, "content") is not None or _field_value(result, "metadata") is not None:
+        data = {
+            "content": _field_value(result, "content"),
+            "metadata": _field_value(result, "metadata"),
+        }
+    else:
+        return result
+
+    normalized = _clean(
+        {
+            "role": "assistant" if data.get("content") is not None else None,
+            "content": data.get("content"),
+            "metadata": data.get("metadata"),
+        }
+    )
+    return normalized or data
+
+
+def _field_value(data: Any, key: str) -> Any:
+    if isinstance(data, dict):
+        return data.get(key)
+    try:
+        return getattr(data, key, None)
+    except Exception:
+        return None
+
+
+def _tool_name(tool_call: Any) -> str:
+    if isinstance(tool_call, dict):
+        return str(tool_call.get("name") or "unknown_tool")
+    return str(getattr(tool_call, "name", "unknown_tool"))
+
+
+async def _agent_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    with start_span(
+        name=f"{_agent_name(instance)}.reply",
+        type=SpanTypeAttribute.TASK,
+        input=_args_kwargs_input(args, kwargs),
+        metadata=_clean({"agent_class": instance.__class__.__name__}),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(output=result)
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _sequential_pipeline_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    with start_span(
+        name="sequential_pipeline.run",
+        type=SpanTypeAttribute.TASK,
+        input=_args_kwargs_input(args, kwargs),
+        metadata=_pipeline_metadata(args, kwargs),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(output=result)
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _fanout_pipeline_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    with start_span(
+        name="fanout_pipeline.run",
+        type=SpanTypeAttribute.TASK,
+        input=_args_kwargs_input(args, kwargs),
+        metadata=_pipeline_metadata(args, kwargs),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(output=result)
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _toolkit_call_tool_function_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    tool_call = args[0] if args else kwargs.get("tool_call")
+    tool_name = _tool_name(tool_call)
+    with start_span(
+        name=f"{tool_name}.execute",
+        type=SpanTypeAttribute.TOOL,
+        input=_clean(
+            {
+                "tool_name": tool_name,
+                "tool_call": tool_call,
+            }
+        ),
+        metadata=_clean({"toolkit_class": instance.__class__.__name__}),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            if _is_async_iterator(result):
+
+                async def _trace():
+                    last_chunk = None
+                    async with aclosing(result) as agen:
+                        async for chunk in agen:
+                            last_chunk = chunk
+                            yield chunk
+                    if last_chunk is not None:
+                        span.log(output=last_chunk)
+
+                return _trace()
+
+            span.log(output=result)
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+def _is_async_iterator(value: Any) -> bool:
+    try:
+        return getattr(value, "__aiter__", None) is not None and getattr(value, "__anext__", None) is not None
+    except Exception:
+        return False
+
+
+async def _model_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    with start_span(
+        name=f"{_model_provider_name(instance)}.call",
+        type=SpanTypeAttribute.LLM,
+        input=_model_call_input(args, kwargs),
+        metadata=_model_call_metadata(instance, kwargs),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            if _is_async_iterator(result):
+
+                async def _trace():
+                    last_chunk = None
+                    async with aclosing(result) as agen:
+                        async for chunk in agen:
+                            last_chunk = chunk
+                            yield chunk
+                    if last_chunk is not None:
+                        span.log(output=_model_call_output(last_chunk), metrics=_extract_metrics(last_chunk))
+
+                return _trace()
+
+            span.log(output=_model_call_output(result), metrics=_extract_metrics(result))
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
new file mode 100644
index 00000000..08efd409
--- /dev/null
+++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
@@ -0,0 +1,73 @@
+"""Test auto_instrument for AgentScope."""
+
+import os
+from pathlib import Path
+
+
+os.environ["BRAINTRUST_CASSETTES_DIR"] = str(Path(__file__).resolve().parent.parent / "agentscope" / "cassettes")
+
+from braintrust.auto import auto_instrument
+from braintrust.wrappers.test_utils import autoinstrument_test_context
+
+
+results = auto_instrument()
+assert results.get("agentscope") == True, "auto_instrument should return True for agentscope"
+
+results2 = auto_instrument()
+assert results2.get("agentscope") == True, "auto_instrument should still return True on second call"
+
+from agentscope.agent import AgentBase, ReActAgent
+from agentscope.formatter import OpenAIChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import OpenAIChatModel
+from agentscope.pipeline import fanout_pipeline, sequential_pipeline
+from agentscope.tool import Toolkit
+
+
+assert hasattr(AgentBase.__call__, "__wrapped__"), "AgentBase.__call__ should be wrapped"
+assert hasattr(sequential_pipeline, "__wrapped__"), "sequential_pipeline should be wrapped"
+assert hasattr(fanout_pipeline, "__wrapped__"), "fanout_pipeline should be wrapped"
+assert hasattr(Toolkit.call_tool_function, "__wrapped__"), "Toolkit.call_tool_function should be wrapped"
+assert hasattr(OpenAIChatModel.__call__, "__wrapped__"), "OpenAIChatModel.__call__ should be wrapped"
+
+
+with autoinstrument_test_context("test_auto_agentscope") as memory_logger:
+    agent = ReActAgent(
+        name="Test Agent",
+        sys_prompt="You are a helpful assistant. Be brief.",
+        model=OpenAIChatModel(
+            model_name="gpt-4o-mini",
+            generate_kwargs={"temperature": 0},
+        ),
+        formatter=OpenAIChatFormatter(),
+        toolkit=Toolkit(),
+        memory=InMemoryMemory(),
+    )
+    agent.set_console_output_enabled(False)
+
+    response = agent(
+        Msg(
+            name="user",
+            content="Say hi in two words.",
+            role="user",
+        )
+    )
+
+    import asyncio
+
+    result = asyncio.run(response)
+    assert result is not None
+
+    spans = memory_logger.pop()
+    assert len(spans) >= 2, f"Expected at least 2 spans (agent + model), got {len(spans)}"
+
+    agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Test Agent.reply")
+    llm_spans = [span for span in spans if span["span_attributes"]["type"].value == "llm"]
+
+    assert agent_span["span_attributes"]["type"].value == "task"
+    assert llm_spans, "Should have at least one LLM span"
+    assert llm_spans[0]["metadata"]["model"] == "gpt-4o-mini"
+    assert agent_span["span_id"] in llm_spans[0]["span_parents"]
+
+print("SUCCESS")
diff --git a/py/src/braintrust/wrappers/test_google_genai.py b/py/src/braintrust/wrappers/test_google_genai.py
index 73a31e71..7839ed0c 100644
--- a/py/src/braintrust/wrappers/test_google_genai.py
+++ b/py/src/braintrust/wrappers/test_google_genai.py
@@ -31,6 +31,7 @@ def before_record_request(request):
         "record_mode": record_mode,
         "filter_headers": [
             "authorization",
+            "Authorization",
             "x-api-key",
             "x-goog-api-key",
         ],

From fd6b14b9595ad85de0eff4df18cc2d5f381c0609 Mon Sep 17 00:00:00 2001
From: Abhijeet Prasad <abhijeet@braintrustdata.com>
Date: Wed, 25 Mar 2026 15:39:14 -0700
Subject: [PATCH 2/3] test: fix AgentScope 1.0.0 compatibility

---
 .../integrations/agentscope/test_agentscope.py   |  5 ++++-
 .../auto_test_scripts/test_auto_agentscope.py    | 16 +++++++++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/py/src/braintrust/integrations/agentscope/test_agentscope.py b/py/src/braintrust/integrations/agentscope/test_agentscope.py
index ee09064a..249a1b7e 100644
--- a/py/src/braintrust/integrations/agentscope/test_agentscope.py
+++ b/py/src/braintrust/integrations/agentscope/test_agentscope.py
@@ -56,7 +56,10 @@ def _make_agent(name: str, sys_prompt: str, *, toolkit=None, multi_agent: bool =
         toolkit=toolkit or Toolkit(),
         memory=InMemoryMemory(),
     )
-    agent.set_console_output_enabled(False)
+    if hasattr(agent, "set_console_output_enabled"):
+        agent.set_console_output_enabled(False)
+    elif hasattr(agent, "disable_console_output"):
+        agent.disable_console_output()
     return agent
 
 
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
index 08efd409..2cc545a9 100644
--- a/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
+++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
@@ -21,13 +21,20 @@
 from agentscope.memory import InMemoryMemory
 from agentscope.message import Msg
 from agentscope.model import OpenAIChatModel
-from agentscope.pipeline import fanout_pipeline, sequential_pipeline
+from agentscope.pipeline import sequential_pipeline
 from agentscope.tool import Toolkit
 
 
+try:
+    from agentscope.pipeline import fanout_pipeline
+except ImportError:
+    fanout_pipeline = None
+
+
 assert hasattr(AgentBase.__call__, "__wrapped__"), "AgentBase.__call__ should be wrapped"
 assert hasattr(sequential_pipeline, "__wrapped__"), "sequential_pipeline should be wrapped"
-assert hasattr(fanout_pipeline, "__wrapped__"), "fanout_pipeline should be wrapped"
+if fanout_pipeline is not None:
+    assert hasattr(fanout_pipeline, "__wrapped__"), "fanout_pipeline should be wrapped"
 assert hasattr(Toolkit.call_tool_function, "__wrapped__"), "Toolkit.call_tool_function should be wrapped"
 assert hasattr(OpenAIChatModel.__call__, "__wrapped__"), "OpenAIChatModel.__call__ should be wrapped"
 
@@ -44,7 +51,10 @@
         toolkit=Toolkit(),
         memory=InMemoryMemory(),
     )
-    agent.set_console_output_enabled(False)
+    if hasattr(agent, "set_console_output_enabled"):
+        agent.set_console_output_enabled(False)
+    elif hasattr(agent, "disable_console_output"):
+        agent.disable_console_output()
 
     response = agent(
         Msg(

From dba55b3df64dfe6ae78a8b267248691d909b6eec Mon Sep 17 00:00:00 2001
From: Abhijeet Prasad <abhijeet@braintrustdata.com>
Date: Wed, 25 Mar 2026 17:30:10 -0700
Subject: [PATCH 3/3] feat: Add agentscope eval instrumentation

---
 .../integrations/agentscope/__init__.py       |  43 ++-
 ..._general_evaluator_creates_eval_spans.yaml | 320 ++++++++++++++++
 .../integrations/agentscope/integration.py    |  25 ++
 .../integrations/agentscope/patchers.py       |  71 ++++
 .../agentscope/test_agentscope.py             | 252 ++++++++++++-
 .../integrations/agentscope/tracing.py        | 344 +++++++++++++++++-
 py/src/braintrust/integrations/base.py        |   3 +-
 7 files changed, 1053 insertions(+), 5 deletions(-)
 create mode 100644 py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml

diff --git a/py/src/braintrust/integrations/agentscope/__init__.py b/py/src/braintrust/integrations/agentscope/__init__.py
index 534f1db8..b78c158e 100644
--- a/py/src/braintrust/integrations/agentscope/__init__.py
+++ b/py/src/braintrust/integrations/agentscope/__init__.py
@@ -1,20 +1,59 @@
 """Braintrust integration for AgentScope."""
 
+from typing import Any
+
 from braintrust.logger import NOOP_SPAN, current_span, init_logger
 
 from .integration import AgentScopeIntegration
+from .patchers import (
+    GeneralEvaluatorPatcher,
+    MetricCallPatcher,
+    RayEvaluatorRunPatcher,
+    TaskEvaluatePatcher,
+)
 
 
-__all__ = ["AgentScopeIntegration", "setup_agentscope"]
+__all__ = ["AgentScopeIntegration", "setup_agentscope", "wrap_evaluator"]
 
 
 def setup_agentscope(
     api_key: str | None = None,
     project_id: str | None = None,
     project_name: str | None = None,
+    instrument_evals: bool = True,
 ) -> bool:
     """Setup Braintrust integration with AgentScope."""
     if current_span() == NOOP_SPAN:
         init_logger(project=project_name, api_key=api_key, project_id=project_id)
 
-    return AgentScopeIntegration.setup()
+    return AgentScopeIntegration.setup(instrument_evals=instrument_evals)
+
+
+def wrap_evaluator(Evaluator: Any) -> Any:
+    """Manually patch an AgentScope evaluator class for tracing.
+
+    This helper patches the evaluator class itself and, when available, also
+    enables task and metric tracing from the exported ``agentscope.evaluate``
+    module so ``GeneralEvaluator`` produces nested evaluation spans even when
+    global setup is not used.
+    """
+    class_name = getattr(Evaluator, "__name__", "")
+    if class_name == "RayEvaluator":
+        RayEvaluatorRunPatcher.wrap_target(Evaluator)
+    else:
+        GeneralEvaluatorPatcher.wrap_target(Evaluator)
+
+    try:
+        import agentscope.evaluate as agentscope_evaluate
+    except ImportError:
+        return Evaluator
+
+    task_cls = getattr(agentscope_evaluate, "Task", None)
+    if task_cls is not None:
+        TaskEvaluatePatcher.wrap_target(task_cls)
+
+    metric_cls = getattr(agentscope_evaluate, "MetricBase", None)
+    if metric_cls is not None:
+        MetricCallPatcher.wrap_target(metric_cls)
+
+    return Evaluator
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml
new file mode 100644
index 00000000..87f6358e
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml
@@ -0,0 +1,320 @@
+interactions:
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '290'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzNtgAAAAD//4xSsU7DMBDd+xWV56Rq
+        05TQGYbODCyoilz7khgc29hOFYT675ydtglQJBZLvnfv/N7zPRzNkb33u02/qXTPN48AT89bkgSG
+        PrwC8xfWgmnkgRdaDTCzQD2EqauiyPMiK7Z3EWg1BxlotfFprtNWKJFmyyxPl0W6uj+zGy0YOGx7
+        wet8/hnPoFNx6LG8TC6VFpyjNWDt0oRFq2WoEOqccJ4qT5IRZFp5UFH6DqTUc9+AhcW0xULVORpk
+        qk7KCUCV0p4Gm1Hc/oycrnKkro3VB/eDSiq06ZoSU3EYET7tvDYkoic899F2980JwUGt8aXXbxCf
+        W2fDODKGPQHPmEd9clLeJDeGlRw8FdJNUiOMsgb4yBwjph0XegLMJpZ/a7k1e7AtVP2f8SPAGBhc
+        otJY4IJ99zu2WQib+FfbNeIomDiwR1yt0guw4Rs4VLSTw34Q9+E8tCX+VQ3WWDEsSWVKRtdQ8GJ1
+        qMjsNPsCAAD//wMAQQCebTIDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e1608aaf6142-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:06:37 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '373'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=0r2bm90d_zmUe.y8EYZpgoGyTVS4QQSDoJVx.yDpJng-1774472796.2480545-1.0.1.1-onYisUL_Bju9EhfGXQnZBZkwk3gdjG7tHXVdr34BVePUh3JL0OqfVWApVIaF_KDBKfw4HIiGBvzONzv_AS91kbK7eL.FzFDwILNg8_F1h3hsPpZO.pIoeUN1dp_.acW6;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:36:37 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999975'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_0b44866bb8cd459db8712e04e4248889
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '290'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPJOswAAAAD//4xS0WrCMBR99yskz61o
+        7aiv7mX6MJgwtsGQEpPbNjNNQpKOjeG/76ZVWzcHewnknntuzjm5t/dsebeB9Wq/qZ7WD4/L55eU
+        piQKDL17A+ZPrAnTyAMvtOpgZoF6CFNnWZamWbJIkhaoNQcZaKXxcarjWigRJ9MkjadZPFsc2ZUW
+        DBy2veJ1PP5qz6BTcfjA8jQ6VWpwjpaAtVMTFq2WoUKoc8J5qjyJepBp5UG10lcgpR77CixMhi0W
+        isbRIFM1Ug4AqpT2NNhsxW2PyOEsR+rSWL1zP6ikQJuuyjEVhxHh085rQ1r0gOe2td1cOCE4qDY+
+        93oP7XPzpBtH+rAH4BHzqE8OyjfRlWE5B0+FdIPUCKOsAt4z+4hpw4UeAKOB5d9ars3ubAtV/md8
+        DzAGBpcoNxa4YJd++zYLYRP/ajtH3AomDuw7rlbuBdjwDRwK2shuP4j7dB7qHP+qBGus6JakMDmj
+        c8h4NtsVZHQYfQMAAP//AwDajPLhMgMAAA==
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e202698a251d-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:03 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '334'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=GJt8W0lyxqfgtKXEJ4M8twcG5pNqc4RmQiPqQ_IukiU-1774472822.1494222-1.0.1.1-Jv3zKpnCjFAAQQaXBEt3RElEP.QjtEFbqrvr8BASrk5X7XSiOj1UBc4tUR3t9QbKmOM0VrcVW6R3HYLaYxbTHQqz4Dpjl7Z.Sz9BslefycjBprbfLjQ1aoOYxrSO7lkO;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:03 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999977'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_8c118a37a5da44069de28fba911081e0
+    status:
+      code: 200
+      message: OK
+- request:
+    body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+      are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+      hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+    headers:
+      Accept:
+      - application/json
+      Accept-Encoding:
+      - gzip, deflate
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '290'
+      Content-Type:
+      - application/json
+      Host:
+      - api.openai.com
+      User-Agent:
+      - AsyncOpenAI/Python 2.29.0
+      X-Stainless-Arch:
+      - arm64
+      X-Stainless-Async:
+      - async:asyncio
+      X-Stainless-Lang:
+      - python
+      X-Stainless-OS:
+      - MacOS
+      X-Stainless-Package-Version:
+      - 2.29.0
+      X-Stainless-Runtime:
+      - CPython
+      X-Stainless-Runtime-Version:
+      - 3.13.3
+      x-stainless-read-timeout:
+      - '600'
+      x-stainless-retry-count:
+      - '0'
+    method: POST
+    uri: https://api.openai.com/v1/chat/completions
+  response:
+    body:
+      string: !!binary |
+        H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbKMgIAAAD//4xSQU7DMBC89xWVz0mV
+        pKHhDALBDSFxQlXk2pvE4NiW7dAi1L+zTtomhSJxseSdnfXMeO+zuxuWbp9vt4/pU/WyonrHk45E
+        gaE3b8D8kbVgGnnghVYDzCxQD2FqWhR5XmTXq1UPtJqDDLTa+DjXcSuUiLMky+OkiNPrA7vRgoHD
+        tle8zudf/Rl0Kg47LCfRsdKCc7QGrB2bsGi1DBVCnRPOU+VJNIJMKw+ql/4AUuq5b8DCYtpioeoc
+        DTJVJ+UEoEppT4PNXtz6gOxPcqSujdUb94NKKrTpmhJTcRgRPu28NqRH93iue9vdmROCg1rjS6/f
+        oX9umQ3jyBj2BDxgHvXJSfkqujCs5OCpkG6SGmGUNcBH5hgx7bjQE2A2sfxby6XZg22h6v+MHwHG
+        wOASlcYCF+zc79hmIWziX22niHvBxIH9wNUqvQAbvoFDRTs57Adxn85DW+Jf1WCNFcOSVKZkdAkF
+        L9JNRWb72TcAAAD//wMAHqai9DIDAAA=
+    headers:
+      CF-Cache-Status:
+      - DYNAMIC
+      CF-Ray:
+      - 9e20e3168f481d99-SJC
+      Connection:
+      - keep-alive
+      Content-Encoding:
+      - gzip
+      Content-Type:
+      - application/json
+      Date:
+      - Wed, 25 Mar 2026 21:07:46 GMT
+      Server:
+      - cloudflare
+      Strict-Transport-Security:
+      - max-age=31536000; includeSubDomains; preload
+      Transfer-Encoding:
+      - chunked
+      X-Content-Type-Options:
+      - nosniff
+      access-control-expose-headers:
+      - X-Request-ID
+      alt-svc:
+      - h3=":443"; ma=86400
+      openai-organization:
+      - braintrust-data
+      openai-processing-ms:
+      - '370'
+      openai-project:
+      - proj_vsCSXafhhByzWOThMrJcZiw9
+      openai-version:
+      - '2020-10-01'
+      set-cookie:
+      - __cf_bm=QmJ_mn8jkQXEiES_f8MtNfFrG.TdUhMi9F3CsYesW.Y-1774472866.3273165-1.0.1.1-9jM9NCSYxoeVuM4GWc_rXbAFSxA2USepp4rz5niKNzmDK.TtF1V7PSKWLir8akfbpuyXvva5QQRFgDCcMT3P0xa_1Tzm9mW9uidzQIoQBbq5O6t0XUQzY8bQC4ddKj1i;
+        HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+        21:37:46 GMT
+      x-openai-proxy-wasm:
+      - v0.1
+      x-ratelimit-limit-requests:
+      - '30000'
+      x-ratelimit-limit-tokens:
+      - '150000000'
+      x-ratelimit-remaining-requests:
+      - '29999'
+      x-ratelimit-remaining-tokens:
+      - '149999977'
+      x-ratelimit-reset-requests:
+      - 2ms
+      x-ratelimit-reset-tokens:
+      - 0s
+      x-request-id:
+      - req_2ac33c8b77a343ea881d0fe0fbeccc14
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/integration.py b/py/src/braintrust/integrations/agentscope/integration.py
index fac8fe8d..20b5f203 100644
--- a/py/src/braintrust/integrations/agentscope/integration.py
+++ b/py/src/braintrust/integrations/agentscope/integration.py
@@ -6,7 +6,11 @@
     AgentCallPatcher,
     ChatModelPatcher,
     FanoutPipelinePatcher,
+    GeneralEvaluatorPatcher,
+    MetricCallPatcher,
+    RayEvaluatorRunPatcher,
     SequentialPipelinePatcher,
+    TaskEvaluatePatcher,
     ToolkitCallToolFunctionPatcher,
 )
 
@@ -23,4 +27,25 @@ class AgentScopeIntegration(BaseIntegration):
         FanoutPipelinePatcher,
         ToolkitCallToolFunctionPatcher,
         ChatModelPatcher,
+        GeneralEvaluatorPatcher,
+        RayEvaluatorRunPatcher,
+        TaskEvaluatePatcher,
+        MetricCallPatcher,
     )
+
+    eval_patchers = (
+        GeneralEvaluatorPatcher,
+        RayEvaluatorRunPatcher,
+        TaskEvaluatePatcher,
+        MetricCallPatcher,
+    )
+
+    @classmethod
+    def setup(
+        cls,
+        *,
+        target=None,
+        instrument_evals: bool = True,
+    ) -> bool:
+        patchers = cls.patchers if instrument_evals else tuple(p for p in cls.patchers if p not in cls.eval_patchers)
+        return super().setup(target=target, patchers=patchers)
diff --git a/py/src/braintrust/integrations/agentscope/patchers.py b/py/src/braintrust/integrations/agentscope/patchers.py
index a0a9ba21..8d4eb314 100644
--- a/py/src/braintrust/integrations/agentscope/patchers.py
+++ b/py/src/braintrust/integrations/agentscope/patchers.py
@@ -5,8 +5,14 @@
 from .tracing import (
     _agent_call_wrapper,
     _fanout_pipeline_wrapper,
+    _general_evaluator_run_evaluation_wrapper,
+    _general_evaluator_run_solution_wrapper,
+    _general_evaluator_run_wrapper,
+    _metric_call_wrapper,
     _model_call_wrapper,
+    _ray_evaluator_run_wrapper,
     _sequential_pipeline_wrapper,
+    _task_evaluate_wrapper,
     _toolkit_call_tool_function_wrapper,
 )
 
@@ -101,3 +107,68 @@ class ChatModelPatcher(CompositeFunctionWrapperPatcher):
         _GeminiChatModelPatcher,
         _TrinityChatModelPatcher,
     )
+
+
+class _GeneralEvaluatorRunPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope GeneralEvaluator root execution."""
+
+    name = "agentscope.evaluate.general.run"
+    target_module = "agentscope.evaluate"
+    target_path = "GeneralEvaluator.run"
+    wrapper = _general_evaluator_run_wrapper
+
+
+class _GeneralEvaluatorRunSolutionPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope GeneralEvaluator solution execution."""
+
+    name = "agentscope.evaluate.general.run_solution"
+    target_module = "agentscope.evaluate"
+    target_path = "GeneralEvaluator.run_solution"
+    wrapper = _general_evaluator_run_solution_wrapper
+
+
+class _GeneralEvaluatorRunEvaluationPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope GeneralEvaluator evaluation execution."""
+
+    name = "agentscope.evaluate.general.run_evaluation"
+    target_module = "agentscope.evaluate"
+    target_path = "GeneralEvaluator.run_evaluation"
+    wrapper = _general_evaluator_run_evaluation_wrapper
+
+
+class GeneralEvaluatorPatcher(CompositeFunctionWrapperPatcher):
+    """Patch AgentScope GeneralEvaluator for Braintrust eval tracing."""
+
+    name = "agentscope.evaluate.general"
+    sub_patchers = (
+        _GeneralEvaluatorRunPatcher,
+        _GeneralEvaluatorRunSolutionPatcher,
+        _GeneralEvaluatorRunEvaluationPatcher,
+    )
+
+
+class RayEvaluatorRunPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope RayEvaluator root execution."""
+
+    name = "agentscope.evaluate.ray"
+    target_module = "agentscope.evaluate"
+    target_path = "RayEvaluator.run"
+    wrapper = _ray_evaluator_run_wrapper
+
+
+class TaskEvaluatePatcher(FunctionWrapperPatcher):
+    """Patch AgentScope task evaluation."""
+
+    name = "agentscope.evaluate.task"
+    target_module = "agentscope.evaluate"
+    target_path = "Task.evaluate"
+    wrapper = _task_evaluate_wrapper
+
+
+class MetricCallPatcher(FunctionWrapperPatcher):
+    """Patch AgentScope metric execution."""
+
+    name = "agentscope.evaluate.metric"
+    target_module = "agentscope.evaluate"
+    target_path = "MetricBase.__call__"
+    wrapper = _metric_call_wrapper
diff --git a/py/src/braintrust/integrations/agentscope/test_agentscope.py b/py/src/braintrust/integrations/agentscope/test_agentscope.py
index 249a1b7e..3688fcd7 100644
--- a/py/src/braintrust/integrations/agentscope/test_agentscope.py
+++ b/py/src/braintrust/integrations/agentscope/test_agentscope.py
@@ -1,8 +1,19 @@
+import sys
+from dataclasses import dataclass
 from pathlib import Path
+from types import ModuleType
 
 import pytest
 from braintrust import logger
-from braintrust.integrations.agentscope import setup_agentscope
+from braintrust.integrations.agentscope import setup_agentscope, wrap_evaluator
+from braintrust.integrations.agentscope.patchers import (
+    AgentCallPatcher,
+    MetricCallPatcher,
+    TaskEvaluatePatcher,
+    _GeneralEvaluatorRunEvaluationPatcher,
+    _GeneralEvaluatorRunPatcher,
+    _GeneralEvaluatorRunSolutionPatcher,
+)
 from braintrust.span_types import SpanTypeAttribute
 from braintrust.test_helpers import init_test_logger
 from braintrust.wrappers.test_utils import verify_autoinstrument_script
@@ -219,6 +230,245 @@ async def _stream():
     assert llm_span["metrics"]["tokens"] == 32
 
 
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_general_evaluator_creates_eval_spans(memory_logger, tmp_path):
+    from agentscope.evaluate import (
+        BenchmarkBase,
+        FileEvaluatorStorage,
+        GeneralEvaluator,
+        MetricBase,
+        MetricResult,
+        MetricType,
+        SolutionOutput,
+        Task,
+    )
+    from agentscope.message import Msg
+
+    assert not memory_logger.pop()
+
+    class ExactMatchMetric(MetricBase):
+        def __init__(self, ground_truth: str):
+            super().__init__(
+                name="exact_match",
+                metric_type=MetricType.NUMERICAL,
+                description="Check whether the model answer exactly matches the ground truth.",
+                categories=[],
+            )
+            self.ground_truth = ground_truth
+
+        async def __call__(self, solution: SolutionOutput) -> MetricResult:
+            is_match = solution.output == self.ground_truth
+            return MetricResult(
+                name=self.name,
+                result=1.0 if is_match else 0.0,
+                message="Correct" if is_match else "Incorrect",
+            )
+
+    class ToyBenchmark(BenchmarkBase):
+        def __init__(self, tasks):
+            super().__init__(
+                name="Toy benchmark",
+                description="A one-task benchmark for AgentScope eval instrumentation.",
+            )
+            self.tasks = tasks
+
+        def __iter__(self):
+            yield from self.tasks
+
+        def __len__(self):
+            return len(self.tasks)
+
+        def __getitem__(self, index):
+            return self.tasks[index]
+
+    task = Task(
+        id="hello-task",
+        input="Say hello in exactly two words.",
+        ground_truth="Hello there.",
+        metrics=[ExactMatchMetric("Hello there.")],
+        tags={"difficulty": "easy", "category": "greeting"},
+        metadata={"suite": "toy"},
+    )
+    evaluator = GeneralEvaluator(
+        name="Toy benchmark evaluation",
+        benchmark=ToyBenchmark([task]),
+        n_repeat=1,
+        storage=FileEvaluatorStorage(save_dir=str(tmp_path / "agentscope-eval")),
+        n_workers=1,
+    )
+
+    async def solution(eval_task: Task, pre_hook):
+        agent = _make_agent(
+            "Friday",
+            "You are a concise assistant. Answer in one sentence.",
+        )
+        if hasattr(agent, "register_instance_hook"):
+            agent.register_instance_hook("pre_print", "save_logging", pre_hook)
+
+        response = await agent(
+            Msg(
+                name="user",
+                content=eval_task.input,
+                role="user",
+            )
+        )
+
+        content = response.content
+        if isinstance(content, list):
+            output = next(
+                (item["text"] for item in content if isinstance(item, dict) and item.get("type") == "text"),
+                None,
+            )
+            trajectory = content
+        else:
+            output = content
+            trajectory = [content]
+
+        return SolutionOutput(
+            success=True,
+            output=output,
+            trajectory=trajectory,
+            meta={"agent": "Friday"},
+        )
+
+    await evaluator.run(solution)
+
+    spans = memory_logger.pop()
+    root_span = next(span for span in spans if span["span_attributes"]["name"] == "agentscope.evaluate.run")
+    solution_span = next(span for span in spans if span["span_attributes"]["name"] == "hello-task.solution")
+    evaluation_span = next(span for span in spans if span["span_attributes"]["name"] == "hello-task.evaluate")
+    metric_span = next(span for span in spans if span["span_attributes"]["name"] == "exact_match")
+    agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Friday.reply")
+
+    assert _span_type(root_span) == "eval"
+    assert root_span["metadata"]["benchmark_name"] == "Toy benchmark"
+    assert root_span["metadata"]["task_count"] == 1
+    assert root_span["output"]["status"] == "completed"
+
+    assert _span_type(solution_span) == "task"
+    assert solution_span["input"] == "Say hello in exactly two words."
+    assert solution_span["expected"] == "Hello there."
+    assert solution_span["tags"] == ["category:greeting", "difficulty:easy"]
+    assert solution_span["metadata"]["repeat_id"] == "0"
+    assert solution_span["metadata"]["metric_names"] == ["exact_match"]
+    assert solution_span["metadata"]["task_tags"] == {"difficulty": "easy", "category": "greeting"}
+    assert solution_span["output"]["output"] == "Hello there."
+    assert solution_span["span_id"] in agent_span["span_parents"]
+
+    assert _span_type(evaluation_span) == "eval"
+    assert evaluation_span["span_id"] in metric_span["span_parents"]
+    assert solution_span["span_id"] in evaluation_span["span_parents"]
+    assert root_span["span_id"] in solution_span["span_parents"]
+    assert evaluation_span["output"][0]["result"] == 1.0
+    assert evaluation_span["output"][0]["message"] == "Correct"
+
+    assert _span_type(metric_span) == "score"
+    assert metric_span["scores"]["exact_match"] == 1.0
+    assert metric_span["output"]["result"] == 1.0
+    assert metric_span["output"]["message"] == "Correct"
+
+
+@dataclass
+class _FakeAgentscopeModules:
+    AgentBase: type
+    GeneralEvaluator: type
+    MetricBase: type
+    Task: type
+
+
+@pytest.fixture
+def fake_agentscope_modules(monkeypatch):
+    agentscope_module = ModuleType("agentscope")
+    agentscope_module.__path__ = []
+    agentscope_module.__version__ = "1.0.0"
+
+    agent_module = ModuleType("agentscope.agent")
+    evaluate_module = ModuleType("agentscope.evaluate")
+
+    class AgentBase:
+        async def __call__(self, *_args, **_kwargs):
+            return "ok"
+
+    class Task:
+        async def evaluate(self, *_args, **_kwargs):
+            return []
+
+    class MetricBase:
+        async def __call__(self, *_args, **_kwargs):
+            return None
+
+    class GeneralEvaluator:
+        async def run(self, *_args, **_kwargs):
+            return None
+
+        async def run_solution(self, *_args, **_kwargs):
+            return None
+
+        async def run_evaluation(self, *_args, **_kwargs):
+            return None
+
+    agent_module.AgentBase = AgentBase
+    evaluate_module.GeneralEvaluator = GeneralEvaluator
+    evaluate_module.Task = Task
+    evaluate_module.MetricBase = MetricBase
+
+    agentscope_module.agent = agent_module
+    agentscope_module.evaluate = evaluate_module
+
+    monkeypatch.setitem(sys.modules, "agentscope", agentscope_module)
+    monkeypatch.setitem(sys.modules, "agentscope.agent", agent_module)
+    monkeypatch.setitem(sys.modules, "agentscope.evaluate", evaluate_module)
+
+    return _FakeAgentscopeModules(
+        AgentBase=AgentBase,
+        GeneralEvaluator=GeneralEvaluator,
+        MetricBase=MetricBase,
+        Task=Task,
+    )
+
+
+def test_setup_agentscope_can_skip_eval_patchers(fake_agentscope_modules):
+    result = setup_agentscope(project_name=PROJECT_NAME, instrument_evals=False)
+
+    assert result is True
+    assert getattr(fake_agentscope_modules.AgentBase.__call__, AgentCallPatcher.patch_marker_attr(), False)
+    assert not getattr(
+        fake_agentscope_modules.GeneralEvaluator, _GeneralEvaluatorRunPatcher.patch_marker_attr(), False
+    )
+    assert not getattr(
+        fake_agentscope_modules.GeneralEvaluator,
+        _GeneralEvaluatorRunSolutionPatcher.patch_marker_attr(),
+        False,
+    )
+    assert not getattr(
+        fake_agentscope_modules.GeneralEvaluator,
+        _GeneralEvaluatorRunEvaluationPatcher.patch_marker_attr(),
+        False,
+    )
+    assert not getattr(fake_agentscope_modules.Task, TaskEvaluatePatcher.patch_marker_attr(), False)
+    assert not getattr(fake_agentscope_modules.MetricBase, MetricCallPatcher.patch_marker_attr(), False)
+
+
+def test_wrap_evaluator_patches_evaluator_and_eval_types(fake_agentscope_modules):
+    wrapped = wrap_evaluator(fake_agentscope_modules.GeneralEvaluator)
+    wrapped_again = wrap_evaluator(fake_agentscope_modules.GeneralEvaluator)
+
+    assert wrapped is fake_agentscope_modules.GeneralEvaluator
+    assert wrapped_again is fake_agentscope_modules.GeneralEvaluator
+    assert getattr(fake_agentscope_modules.GeneralEvaluator, _GeneralEvaluatorRunPatcher.patch_marker_attr(), False)
+    assert getattr(
+        fake_agentscope_modules.GeneralEvaluator, _GeneralEvaluatorRunSolutionPatcher.patch_marker_attr(), False
+    )
+    assert getattr(
+        fake_agentscope_modules.GeneralEvaluator,
+        _GeneralEvaluatorRunEvaluationPatcher.patch_marker_attr(),
+        False,
+    )
+    assert getattr(fake_agentscope_modules.Task, TaskEvaluatePatcher.patch_marker_attr(), False)
+    assert getattr(fake_agentscope_modules.MetricBase, MetricCallPatcher.patch_marker_attr(), False)
+
+
 class TestAutoInstrumentAgentScope:
     def test_auto_instrument_agentscope(self):
         verify_autoinstrument_script("test_auto_agentscope.py")
diff --git a/py/src/braintrust/integrations/agentscope/tracing.py b/py/src/braintrust/integrations/agentscope/tracing.py
index efb4891d..b5e1e4ad 100644
--- a/py/src/braintrust/integrations/agentscope/tracing.py
+++ b/py/src/braintrust/integrations/agentscope/tracing.py
@@ -1,10 +1,14 @@
 """AgentScope-specific span creation and stream aggregation."""
 
 from contextlib import aclosing
+from contextvars import ContextVar
 from typing import Any
 
 from braintrust.logger import start_span
-from braintrust.span_types import SpanTypeAttribute
+from braintrust.span_types import SpanPurpose, SpanTypeAttribute
+
+
+_SUPPRESS_TASK_EVALUATE_SPAN: ContextVar[bool] = ContextVar("_SUPPRESS_TASK_EVALUATE_SPAN", default=False)
 
 
 def _clean(mapping: dict[str, Any]) -> dict[str, Any]:
@@ -149,6 +153,184 @@ def _tool_name(tool_call: Any) -> str:
     return str(getattr(tool_call, "name", "unknown_tool"))
 
 
+def _call_arg(args: Any, kwargs: dict[str, Any], index: int, key: str) -> Any:
+    if key in kwargs:
+        return kwargs[key]
+    return args[index] if len(args) > index else None
+
+
+def _maybe_awaitable_name(value: Any) -> str | None:
+    return getattr(value, "__name__", None) or getattr(value, "__qualname__", None)
+
+
+def _metric_name(metric: Any) -> str:
+    return str(getattr(metric, "name", None) or metric.__class__.__name__)
+
+
+def _task_id(task: Any) -> str:
+    return str(_field_value(task, "id") or _field_value(task, "name") or task.__class__.__name__)
+
+
+def _task_input(task: Any) -> Any:
+    for key in ("input", "input_data", "question", "prompt"):
+        value = _field_value(task, key)
+        if value is not None:
+            return value
+    return None
+
+
+def _task_expected(task: Any) -> Any:
+    for key in ("ground_truth", "expected", "reference", "answer"):
+        value = _field_value(task, key)
+        if value is not None:
+            return value
+    return None
+
+
+def _task_tags(task: Any) -> Any:
+    tags = _field_value(task, "tags")
+    if isinstance(tags, dict):
+        return [f"{key}:{value}" for key, value in sorted(tags.items())]
+    return tags
+
+
+def _task_metric_names(task: Any) -> list[str] | None:
+    metrics = _field_value(task, "metrics")
+    if not metrics:
+        return None
+    return [_metric_name(metric) for metric in metrics]
+
+
+def _task_metadata(task: Any) -> dict[str, Any]:
+    metadata = _field_value(task, "metadata")
+    if isinstance(metadata, dict):
+        return metadata
+    return {}
+
+
+def _solution_output_summary(solution_output: Any) -> Any:
+    if solution_output is None:
+        return None
+    if isinstance(solution_output, dict):
+        return solution_output
+
+    summary = _clean(
+        {
+            "output": _field_value(solution_output, "output"),
+            "success": _field_value(solution_output, "success"),
+            "trajectory": _field_value(solution_output, "trajectory"),
+            "meta": _field_value(solution_output, "meta") or _field_value(solution_output, "metadata"),
+            "message": _field_value(solution_output, "message"),
+        }
+    )
+    return summary or solution_output
+
+
+def _metric_result_summary(result: Any) -> Any:
+    if result is None:
+        return None
+    if isinstance(result, dict):
+        return result
+
+    summary = _clean(
+        {
+            "result": _field_value(result, "result"),
+            "message": _field_value(result, "message"),
+            "detail": _field_value(result, "detail"),
+            "metadata": _field_value(result, "metadata") or _field_value(result, "meta"),
+        }
+    )
+    return summary or result
+
+
+def _metric_score(metric: Any, result: Any) -> dict[str, float] | None:
+    value = _field_value(result, "result")
+    if isinstance(value, bool):
+        return {_metric_name(metric): 1.0 if value else 0.0}
+    if isinstance(value, (int, float)):
+        return {_metric_name(metric): float(value)}
+    return None
+
+
+def _evaluator_metadata(instance: Any, solution: Any = None) -> dict[str, Any]:
+    benchmark = getattr(instance, "benchmark", None)
+    task_count = len(benchmark) if benchmark is not None and hasattr(benchmark, "__len__") else None
+    return _clean(
+        {
+            "evaluator_class": instance.__class__.__name__,
+            "evaluator_name": getattr(instance, "name", None),
+            "benchmark_name": _field_value(benchmark, "name"),
+            "benchmark_description": _field_value(benchmark, "description"),
+            "task_count": task_count,
+            "n_repeat": getattr(instance, "n_repeat", None),
+            "n_workers": getattr(instance, "n_workers", None),
+            "storage_class": getattr(getattr(instance, "storage", None), "__class__", type(None)).__name__,
+            "solution_name": _maybe_awaitable_name(solution),
+        }
+    )
+
+
+def _task_span_metadata(task: Any, repeat_id: str | None = None, **extra: Any) -> dict[str, Any]:
+    raw_tags = _field_value(task, "tags")
+    return _clean(
+        {
+            **_task_metadata(task),
+            "task_id": _task_id(task),
+            "repeat_id": repeat_id,
+            "metric_names": _task_metric_names(task),
+            "task_tags": raw_tags if isinstance(raw_tags, dict) else None,
+            **extra,
+        }
+    )
+
+
+def _storage_get(storage: Any, method_name: str, *args: Any) -> Any:
+    method = getattr(storage, method_name, None)
+    if method is None:
+        return None
+    try:
+        return method(*args)
+    except Exception:
+        return None
+
+
+def _stored_solution_output(instance: Any, task: Any, repeat_id: str) -> Any:
+    storage = getattr(instance, "storage", None)
+    if storage is None:
+        return None
+    return _storage_get(storage, "get_solution_result", _task_id(task), repeat_id)
+
+
+def _stored_evaluation_results(instance: Any, task: Any, repeat_id: str) -> list[Any] | None:
+    storage = getattr(instance, "storage", None)
+    metrics = _field_value(task, "metrics") or []
+    if storage is None or not metrics:
+        return None
+
+    results = []
+    for metric in metrics:
+        result = _storage_get(storage, "get_evaluation_result", _task_id(task), repeat_id, _metric_name(metric))
+        if result is None:
+            return None
+        results.append(result)
+    return results
+
+
+def _log_metric_span(parent_span: Any, metric: Any, solution_output: Any, result: Any) -> None:
+    with parent_span.start_span(
+        name=_metric_name(metric),
+        type=SpanTypeAttribute.SCORE,
+        span_attributes={"purpose": SpanPurpose.SCORER.value},
+        input=_solution_output_summary(solution_output),
+        metadata=_clean({"metric_class": metric.__class__.__name__}),
+    ) as metric_span:
+        metric_span.log(
+            output=_metric_result_summary(result),
+            metadata=_field_value(result, "metadata") or _field_value(result, "meta"),
+            scores=_metric_score(metric, result),
+        )
+
+
 async def _agent_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
     with start_span(
         name=f"{_agent_name(instance)}.reply",
@@ -165,6 +347,166 @@ async def _agent_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: di
             raise
 
 
+async def _general_evaluator_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    solution = _call_arg(args, kwargs, 0, "solution")
+    with start_span(
+        name="agentscope.evaluate.run",
+        type=SpanTypeAttribute.EVAL,
+        input=_clean(
+            {
+                "benchmark_name": _field_value(getattr(instance, "benchmark", None), "name"),
+                "n_repeat": getattr(instance, "n_repeat", None),
+                "n_workers": getattr(instance, "n_workers", None),
+            }
+        ),
+        metadata=_evaluator_metadata(instance, solution),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(output={"status": "completed"})
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _ray_evaluator_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    solution = _call_arg(args, kwargs, 0, "solution")
+    with start_span(
+        name="agentscope.evaluate.run",
+        type=SpanTypeAttribute.EVAL,
+        input=_clean(
+            {
+                "benchmark_name": _field_value(getattr(instance, "benchmark", None), "name"),
+                "n_repeat": getattr(instance, "n_repeat", None),
+                "n_workers": getattr(instance, "n_workers", None),
+            }
+        ),
+        metadata={**_evaluator_metadata(instance, solution), "distributed": True},
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(output={"status": "completed"})
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _general_evaluator_run_solution_wrapper(
+    wrapped: Any,
+    instance: Any,
+    args: Any,
+    kwargs: dict[str, Any],
+) -> Any:
+    repeat_id = str(_call_arg(args, kwargs, 0, "repeat_id"))
+    task = _call_arg(args, kwargs, 1, "task")
+    storage = getattr(instance, "storage", None)
+    was_cached = False
+    if storage is not None and task is not None:
+        exists = getattr(storage, "solution_result_exists", None)
+        if exists is not None:
+            try:
+                was_cached = bool(exists(_task_id(task), repeat_id))
+            except Exception:
+                was_cached = False
+
+    with start_span(
+        name=f"{_task_id(task)}.solution",
+        type=SpanTypeAttribute.TASK,
+        input=_task_input(task),
+        expected=_task_expected(task),
+        tags=_task_tags(task),
+        metadata=_task_span_metadata(task, repeat_id, cached=was_cached),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            solution_output = _stored_solution_output(instance, task, repeat_id)
+            span.log(output=_solution_output_summary(solution_output))
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _general_evaluator_run_evaluation_wrapper(
+    wrapped: Any,
+    instance: Any,
+    args: Any,
+    kwargs: dict[str, Any],
+) -> Any:
+    task = _call_arg(args, kwargs, 0, "task")
+    repeat_id = str(_call_arg(args, kwargs, 1, "repeat_id"))
+    solution_output = _call_arg(args, kwargs, 2, "solution_output")
+
+    with start_span(
+        name=f"{_task_id(task)}.evaluate",
+        type=SpanTypeAttribute.EVAL,
+        input=_solution_output_summary(solution_output),
+        metadata=_task_span_metadata(task, repeat_id),
+    ) as span:
+        token = _SUPPRESS_TASK_EVALUATE_SPAN.set(True)
+        try:
+            result = await wrapped(*args, **kwargs)
+            evaluation_results = _stored_evaluation_results(instance, task, repeat_id)
+            if evaluation_results is not None:
+                metrics = _field_value(task, "metrics") or []
+                for metric, evaluation_result in zip(metrics, evaluation_results):
+                    _log_metric_span(span, metric, solution_output, evaluation_result)
+                span.log(output=[_metric_result_summary(item) for item in evaluation_results])
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+        finally:
+            _SUPPRESS_TASK_EVALUATE_SPAN.reset(token)
+
+
+async def _task_evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    if _SUPPRESS_TASK_EVALUATE_SPAN.get():
+        return await wrapped(*args, **kwargs)
+
+    solution_output = _call_arg(args, kwargs, 0, "solution_output")
+    with start_span(
+        name=f"{_task_id(instance)}.evaluate",
+        type=SpanTypeAttribute.EVAL,
+        input=_solution_output_summary(solution_output),
+        metadata=_task_span_metadata(instance),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(output=[_metric_result_summary(item) for item in result] if result is not None else None)
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
+async def _metric_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+    if _SUPPRESS_TASK_EVALUATE_SPAN.get():
+        return await wrapped(*args, **kwargs)
+
+    solution_output = _call_arg(args, kwargs, 0, "solution_output")
+    with start_span(
+        name=_metric_name(instance),
+        type=SpanTypeAttribute.SCORE,
+        span_attributes={"purpose": SpanPurpose.SCORER.value},
+        input=_solution_output_summary(solution_output),
+        metadata=_clean({"metric_class": instance.__class__.__name__}),
+    ) as span:
+        try:
+            result = await wrapped(*args, **kwargs)
+            span.log(
+                output=_metric_result_summary(result),
+                metadata=_field_value(result, "metadata") or _field_value(result, "meta"),
+                scores=_metric_score(instance, result),
+            )
+            return result
+        except Exception as exc:
+            span.log(error=str(exc))
+            raise
+
+
 async def _sequential_pipeline_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
     with start_span(
         name="sequential_pipeline.run",
diff --git a/py/src/braintrust/integrations/base.py b/py/src/braintrust/integrations/base.py
index 690e6c22..7d491b96 100644
--- a/py/src/braintrust/integrations/base.py
+++ b/py/src/braintrust/integrations/base.py
@@ -337,6 +337,7 @@ def setup(
         cls,
         *,
         target: Any | None = None,
+        patchers: tuple[type[BasePatcher], ...] | None = None,
     ) -> bool:
         """Apply all applicable patchers for this integration."""
         module = _import_first_available(cls.import_names)
@@ -347,7 +348,7 @@ def setup(
             return False
 
         success = False
-        selected_patchers = cls.resolve_patchers()
+        selected_patchers = cls.resolve_patchers() if patchers is None else patchers
         for patcher in sorted(selected_patchers, key=lambda patcher: patcher.priority):
             if not patcher.applies(module, version, target=target):
                 continue