diff --git a/py/noxfile.py b/py/noxfile.py
index 61ea0aec..0f50d440 100644
--- a/py/noxfile.py
+++ b/py/noxfile.py
@@ -62,6 +62,7 @@ def _pinned_python_version():
# validate things work with or without them.
VENDOR_PACKAGES = (
"agno",
+ "agentscope",
"anthropic",
"dspy",
"openai",
@@ -89,6 +90,7 @@ def _pinned_python_version():
# Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change
# to internals we leverage for instrumentation.
AGNO_VERSIONS = (LATEST, "2.4.0", "2.1.0")
+AGENTSCOPE_VERSIONS = (LATEST, "1.0.0")
# pydantic_ai 1.x requires Python >= 3.10
# Two test suites with different version requirements:
# 1. wrap_openai approach: works with older versions (0.1.9+)
@@ -172,6 +174,16 @@ def test_agno(session, version):
_run_core_tests(session)
+@nox.session()
+@nox.parametrize("version", AGENTSCOPE_VERSIONS, ids=AGENTSCOPE_VERSIONS)
+def test_agentscope(session, version):
+ _install_test_deps(session)
+ _install(session, "agentscope", version)
+ _install(session, "openai")
+ _run_tests(session, f"{INTEGRATION_DIR}/agentscope/test_agentscope.py")
+ _run_core_tests(session)
+
+
@nox.session()
@nox.parametrize("version", ANTHROPIC_VERSIONS, ids=ANTHROPIC_VERSIONS)
def test_anthropic(session, version):
diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
index 25dd436a..7ac407aa 100644
--- a/py/src/braintrust/auto.py
+++ b/py/src/braintrust/auto.py
@@ -7,7 +7,13 @@
import logging
from contextlib import contextmanager
-from braintrust.integrations import ADKIntegration, AgnoIntegration, AnthropicIntegration, ClaudeAgentSDKIntegration
+from braintrust.integrations import (
+ ADKIntegration,
+ AgentScopeIntegration,
+ AgnoIntegration,
+ AnthropicIntegration,
+ ClaudeAgentSDKIntegration,
+)
__all__ = ["auto_instrument"]
@@ -34,6 +40,7 @@ def auto_instrument(
pydantic_ai: bool = True,
google_genai: bool = True,
agno: bool = True,
+ agentscope: bool = True,
claude_agent_sdk: bool = True,
dspy: bool = True,
adk: bool = True,
@@ -54,6 +61,7 @@ def auto_instrument(
pydantic_ai: Enable Pydantic AI instrumentation (default: True)
google_genai: Enable Google GenAI instrumentation (default: True)
agno: Enable Agno instrumentation (default: True)
+ agentscope: Enable AgentScope instrumentation (default: True)
claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
dspy: Enable DSPy instrumentation (default: True)
adk: Enable Google ADK instrumentation (default: True)
@@ -116,6 +124,8 @@ def auto_instrument(
results["google_genai"] = _instrument_google_genai()
if agno:
results["agno"] = _instrument_integration(AgnoIntegration)
+ if agentscope:
+ results["agentscope"] = _instrument_integration(AgentScopeIntegration)
if claude_agent_sdk:
results["claude_agent_sdk"] = _instrument_integration(ClaudeAgentSDKIntegration)
if dspy:
diff --git a/py/src/braintrust/conftest.py b/py/src/braintrust/conftest.py
index 0fbdf40b..2345b227 100644
--- a/py/src/braintrust/conftest.py
+++ b/py/src/braintrust/conftest.py
@@ -191,6 +191,7 @@ def get_vcr_config():
"decode_compressed_response": True,
"filter_headers": [
"authorization",
+ "Authorization",
"openai-organization",
"x-api-key",
"api-key",
diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
index 35324c1c..095f7f35 100644
--- a/py/src/braintrust/integrations/__init__.py
+++ b/py/src/braintrust/integrations/__init__.py
@@ -1,7 +1,14 @@
from .adk import ADKIntegration
+from .agentscope import AgentScopeIntegration
from .agno import AgnoIntegration
from .anthropic import AnthropicIntegration
from .claude_agent_sdk import ClaudeAgentSDKIntegration
-__all__ = ["ADKIntegration", "AgnoIntegration", "AnthropicIntegration", "ClaudeAgentSDKIntegration"]
+__all__ = [
+ "ADKIntegration",
+ "AgentScopeIntegration",
+ "AgnoIntegration",
+ "AnthropicIntegration",
+ "ClaudeAgentSDKIntegration",
+]
diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py
index bed6f3e6..9d9be979 100644
--- a/py/src/braintrust/integrations/adk/test_adk.py
+++ b/py/src/braintrust/integrations/adk/test_adk.py
@@ -41,6 +41,7 @@ def before_record_request(request):
"cassette_library_dir": str(Path(__file__).parent / "cassettes"),
"filter_headers": [
"authorization",
+ "Authorization",
"x-goog-api-key",
],
"before_record_request": before_record_request,
diff --git a/py/src/braintrust/integrations/agentscope/__init__.py b/py/src/braintrust/integrations/agentscope/__init__.py
new file mode 100644
index 00000000..b78c158e
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/__init__.py
@@ -0,0 +1,59 @@
+"""Braintrust integration for AgentScope."""
+
+from typing import Any
+
+from braintrust.logger import NOOP_SPAN, current_span, init_logger
+
+from .integration import AgentScopeIntegration
+from .patchers import (
+ GeneralEvaluatorPatcher,
+ MetricCallPatcher,
+ RayEvaluatorRunPatcher,
+ TaskEvaluatePatcher,
+)
+
+
+__all__ = ["AgentScopeIntegration", "setup_agentscope", "wrap_evaluator"]
+
+
+def setup_agentscope(
+ api_key: str | None = None,
+ project_id: str | None = None,
+ project_name: str | None = None,
+ instrument_evals: bool = True,
+) -> bool:
+ """Setup Braintrust integration with AgentScope."""
+ if current_span() == NOOP_SPAN:
+ init_logger(project=project_name, api_key=api_key, project_id=project_id)
+
+ return AgentScopeIntegration.setup(instrument_evals=instrument_evals)
+
+
+def wrap_evaluator(Evaluator: Any) -> Any:
+ """Manually patch an AgentScope evaluator class for tracing.
+
+ This helper patches the evaluator class itself and, when available, also
+ enables task and metric tracing from the exported ``agentscope.evaluate``
+ module so ``GeneralEvaluator`` produces nested evaluation spans even when
+ global setup is not used.
+ """
+ class_name = getattr(Evaluator, "__name__", "")
+ if class_name == "RayEvaluator":
+ RayEvaluatorRunPatcher.wrap_target(Evaluator)
+ else:
+ GeneralEvaluatorPatcher.wrap_target(Evaluator)
+
+ try:
+ import agentscope.evaluate as agentscope_evaluate
+ except ImportError:
+ return Evaluator
+
+ task_cls = getattr(agentscope_evaluate, "Task", None)
+ if task_cls is not None:
+ TaskEvaluatePatcher.wrap_target(task_cls)
+
+ metric_cls = getattr(agentscope_evaluate, "MetricBase", None)
+ if metric_cls is not None:
+ MetricCallPatcher.wrap_target(metric_cls)
+
+ return Evaluator
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml
new file mode 100644
index 00000000..87f6358e
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_general_evaluator_creates_eval_spans.yaml
@@ -0,0 +1,320 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '290'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzNtgAAAAD//4xSsU7DMBDd+xWV56Rq
+ 05TQGYbODCyoilz7khgc29hOFYT675ydtglQJBZLvnfv/N7zPRzNkb33u02/qXTPN48AT89bkgSG
+ PrwC8xfWgmnkgRdaDTCzQD2EqauiyPMiK7Z3EWg1BxlotfFprtNWKJFmyyxPl0W6uj+zGy0YOGx7
+ wet8/hnPoFNx6LG8TC6VFpyjNWDt0oRFq2WoEOqccJ4qT5IRZFp5UFH6DqTUc9+AhcW0xULVORpk
+ qk7KCUCV0p4Gm1Hc/oycrnKkro3VB/eDSiq06ZoSU3EYET7tvDYkoic899F2980JwUGt8aXXbxCf
+ W2fDODKGPQHPmEd9clLeJDeGlRw8FdJNUiOMsgb4yBwjph0XegLMJpZ/a7k1e7AtVP2f8SPAGBhc
+ otJY4IJ99zu2WQib+FfbNeIomDiwR1yt0guw4Rs4VLSTw34Q9+E8tCX+VQ3WWDEsSWVKRtdQ8GJ1
+ qMjsNPsCAAD//wMAQQCebTIDAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e1608aaf6142-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:06:37 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '373'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=0r2bm90d_zmUe.y8EYZpgoGyTVS4QQSDoJVx.yDpJng-1774472796.2480545-1.0.1.1-onYisUL_Bju9EhfGXQnZBZkwk3gdjG7tHXVdr34BVePUh3JL0OqfVWApVIaF_KDBKfw4HIiGBvzONzv_AS91kbK7eL.FzFDwILNg8_F1h3hsPpZO.pIoeUN1dp_.acW6;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:36:37 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999975'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_0b44866bb8cd459db8712e04e4248889
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '290'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPJOswAAAAD//4xS0WrCMBR99yskz61o
+ 7aiv7mX6MJgwtsGQEpPbNjNNQpKOjeG/76ZVWzcHewnknntuzjm5t/dsebeB9Wq/qZ7WD4/L55eU
+ piQKDL17A+ZPrAnTyAMvtOpgZoF6CFNnWZamWbJIkhaoNQcZaKXxcarjWigRJ9MkjadZPFsc2ZUW
+ DBy2veJ1PP5qz6BTcfjA8jQ6VWpwjpaAtVMTFq2WoUKoc8J5qjyJepBp5UG10lcgpR77CixMhi0W
+ isbRIFM1Ug4AqpT2NNhsxW2PyOEsR+rSWL1zP6ikQJuuyjEVhxHh085rQ1r0gOe2td1cOCE4qDY+
+ 93oP7XPzpBtH+rAH4BHzqE8OyjfRlWE5B0+FdIPUCKOsAt4z+4hpw4UeAKOB5d9ars3ubAtV/md8
+ DzAGBpcoNxa4YJd++zYLYRP/ajtH3AomDuw7rlbuBdjwDRwK2shuP4j7dB7qHP+qBGus6JakMDmj
+ c8h4NtsVZHQYfQMAAP//AwDajPLhMgMAAA==
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e202698a251d-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:03 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '334'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=GJt8W0lyxqfgtKXEJ4M8twcG5pNqc4RmQiPqQ_IukiU-1774472822.1494222-1.0.1.1-Jv3zKpnCjFAAQQaXBEt3RElEP.QjtEFbqrvr8BASrk5X7XSiOj1UBc4tUR3t9QbKmOM0VrcVW6R3HYLaYxbTHQqz4Dpjl7Z.Sz9BslefycjBprbfLjQ1aoOYxrSO7lkO;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:03 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999977'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_8c118a37a5da44069de28fba911081e0
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '290'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbKMgIAAAD//4xSQU7DMBC89xWVz0mV
+ pKHhDALBDSFxQlXk2pvE4NiW7dAi1L+zTtomhSJxseSdnfXMeO+zuxuWbp9vt4/pU/WyonrHk45E
+ gaE3b8D8kbVgGnnghVYDzCxQD2FqWhR5XmTXq1UPtJqDDLTa+DjXcSuUiLMky+OkiNPrA7vRgoHD
+ tle8zudf/Rl0Kg47LCfRsdKCc7QGrB2bsGi1DBVCnRPOU+VJNIJMKw+ql/4AUuq5b8DCYtpioeoc
+ DTJVJ+UEoEppT4PNXtz6gOxPcqSujdUb94NKKrTpmhJTcRgRPu28NqRH93iue9vdmROCg1rjS6/f
+ oX9umQ3jyBj2BDxgHvXJSfkqujCs5OCpkG6SGmGUNcBH5hgx7bjQE2A2sfxby6XZg22h6v+MHwHG
+ wOASlcYCF+zc79hmIWziX22niHvBxIH9wNUqvQAbvoFDRTs57Adxn85DW+Jf1WCNFcOSVKZkdAkF
+ L9JNRWb72TcAAAD//wMAHqai9DIDAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e3168f481d99-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:46 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '370'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=QmJ_mn8jkQXEiES_f8MtNfFrG.TdUhMi9F3CsYesW.Y-1774472866.3273165-1.0.1.1-9jM9NCSYxoeVuM4GWc_rXbAFSxA2USepp4rz5niKNzmDK.TtF1V7PSKWLir8akfbpuyXvva5QQRFgDCcMT3P0xa_1Tzm9mW9uidzQIoQBbq5O6t0XUQzY8bQC4ddKj1i;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:46 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999977'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_2ac33c8b77a343ea881d0fe0fbeccc14
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml
new file mode 100644
index 00000000..1ceea647
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_sequential_pipeline_creates_parent_span.yaml
@@ -0,0 +1,545 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite
+ the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"#
+ Conversation History\nThe content between tags contains
+ your conversation history\n\nuser: Summarize why tests should use real
+ recorded traffic.\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '403'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzzdwMAAAD//41SwW7bMAy95ysMneMi
+ yZwou/a2a5EehqIwFImytcmiIcpbhyH/PspOarfrgF0EmI/v+fGR9+0ed1V1+Hl4HI5f2oevzekU
+ BrHODDx/A51urDuNzIPkMEywjqASZNWtlFUld/KzHIEODfhMa/pUVlh2Lrhyt9lV5UaW2+OV3aLT
+ QNz2xJ9F8Xt8s89g4IXLm/Wt0gGRaoBrtyYuRvS5IhSRo6RCEusZ1BgShNH6PQSwLlGBtngkF5ri
+ AZTnR2M0YIpTVNY6XbhQnIASNyyFItiBVB4mDN4vABUCJpXDGEd4viKXV9Memz7imd5RheUwqK05
+ O+Ig2SAl7MWIXvh9HsMZ3swrWKjrU53wO4y/28tJTswrmcHjFUvsz8/lw379gVhtICnnaZGt0Eq3
+ YGbmvAg1GIcLYLUY+W8vH2lPY3PI/yM/A1pDz6dW9xGM02/nndsi5Hv9V9trxKNhQRB/8AHWyUHM
+ azBg1eCnKxL0ixJ0Ne+qgdhHN52S7WutPoE0cnu2YnVZ/QE7MxO8WAMAAA==
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e166fbae67b2-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:06:38 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '664'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=0vN_GTd.Taalah9PDsZg_Ru.1_PcZ_NBP9qkR2MCsFE-1774472797.273874-1.0.1.1-_HsrwKoaPPyMDTMtccbmEvGb.WDYukiKlNhKyLTp32aZR8vwDwqATyzmrwTg82HAg9bVn2GQnmrENihz.LTaMxGxvJCORGScpnet2yitftoFB0LwZa12LFWkWMzlprHK;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:36:38 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999940'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_bcc5d231b4f24f049b2e0a0ba5d880a9
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite
+ the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"#
+ Conversation History\nThe content between tags contains
+ your conversation history\n\nuser: Summarize why tests should use real
+ recorded traffic.\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '403'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/4xSwW7bMAy95ysEneMhcdI56G3FTus2DFt3GgpDkShHjSwJIj2sKPLvo+2kdrcO
+ 2EWA+fieHx/5tBBCOiOvhdQHRbpNvnj/+cvDR328uXUfNv7WfTuGlSrfVZ9Sh4+tXPaMuH8ATRfW
+ Gx2ZB+RiGGGdQRH0quuq2m6rclduBqCNBnxPaxIV21i0LriiXJXbYlUV692ZfYhOA3LbD/4U4ml4
+ e5/BwC8ur5aXSguIqgGuXZq4mKPvK1IhOiQVSC4nUMdAEAbrNxDAOkIRrfiOLjTiKyjPj47ZgBF3
+ WVnrtHBB3AESN8yFMtgOVT9M6LyfASqESKoPYxjh/oycnk372KQc9/gHVVoOAw81Z4ccJBtEikkO
+ 6Inf+yGc7sW8koXaRDXFIwy/u6pGOTmtZAJ3Z4zYn5/Kb6+Wr4jVBkg5j7NspVb6AGZiTotQnXFx
+ BixmI//t5TXtcWwO+X/kJ0BrSHxqdcpgnH4579SWob/Xf7U9RzwYlgj5Jx9gTQ5yvwYDVnV+vCKJ
+ j0jQ1ryrBnLKbjwlm2qtNlCZar23cnFa/AYAAP//AwB2CCbyWAMAAA==
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e208d9766142-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:03 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '652'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=nPA03jdSYpPIt1MCtq5c3dH__SdRu5fP0HUZaKsaPsk-1774472823.1741066-1.0.1.1-QeI7TdvczvDfZcpacbJMilgyA_s79AH1EgKxSLO_1Z_BUh6jqZue4vjSpv9Sr.ihxNrRzZkdyq7EzAFHuIxCU9THwR_hF6iuXTWOT0BzCGX_rHDteSnXa7BlEfiRqaBH;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:03 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999940'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_09e84f9d413d4a8d9495fef9e4133024
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You answer
+ the previous message in one sentence."}]},{"role":"user","content":[{"type":"text","text":"#
+ Conversation History\nThe content between tags contains
+ your conversation history\n\nAlice: Benefits of Using Real Recorded
+ Traffic in Testing\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '410'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPKtcAYAAAD//4xTy27bMBC8+ysInqXA
+ dpQqPRfIMUBb9FQEAk2upG35ApcyYhT+9y6lOHLaFOiFgHZ2docz1OeTg+bT1y/mMH54GB7H5+ZW
+ pV5WhREOP0DnC+tGB+ZBxuAXWCdQGcrUXds2Tbu/3zcz4IIBW2hDzHUTaoce6/1239Tbtt7dv7DH
+ gBqI277zpxC/5rPo9AaeubytLhUHRGoArl2auJiCLRWpiJCy8llWK6iDz+Bn6d8I/SBYquVDh2TA
+ iJxU36MW6EUGyqUB/Kg86xFK6ykpfaoEupjCkUsRUh+SKzhTCIcxUyWUN2IEG0mg4V3Yn0QMZSvy
+ KiSamMkLlHAhwSyAdfJS8EdMwTvuvLnWnKCfSBXf/GTtFaC8D1kV32e3nl6Q86s/Ngys9EB/UGXP
+ vtPY8WrizNgLyiHKGT3z+TTnML2xVvIgF3OXw0+Y193dLePkmv4KLnkzmFmgXevtx+qdaZ2BrNDS
+ VY5SKz2CWZlr6GoyGK6AzdWd/xbz3uzl3hzu/4xfAa0h8rPuYgKD+u2F17YE5d/4V9urx7NgSZCO
+ /Ni7jJBKDgZ6NdnlxUo6UQbXcVgDpJhwebZ97LS6hda0u0MvN+fNbwAAAP//AwDs2pZ8xAMAAA==
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e20db9eddfce-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:04 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '669'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=HayDHNMCRsqHCSCg5YnFHqR8EGszTInfOwZSaeO5eyg-1774472823.957494-1.0.1.1-T1qVHFP.Enq2BHk4byqWQwdinEXG5pjJnQ6C55kaXNumTZcQVGLUbKoXNUrcL8OlUtndtdfReMfEtfZ5GDvmyZ8zIPnM28rC8KnQNIAYwe3oTr24xzHY4xyLaaAQfXw_;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:04 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999940'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_074bde6fdf2a4c4caf77d081e3a5af31
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You rewrite
+ the input as a short title."}]},{"role":"user","content":[{"type":"text","text":"#
+ Conversation History\nThe content between tags contains
+ your conversation history\n\nuser: Summarize why tests should use real
+ recorded traffic.\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '403'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDb29QAAAAD//4xSTW/bMAy951cYOsdD
+ vlrl3AI99DJsaIGiQ2EoEuWolSVPpLsMQ/77KDup3a0DdhFgPr7nx0c+SL99PHxZP94crg/XPz6v
+ v6/tw62YZ0bcPYOmM+uTjswDcjEMsE6gCLLqUsrNRq62l7IHmmjAZ1rdUrmJZeOCK1eL1aZcyHK5
+ PbH30WlAbvvGn0Xxq3+zz2DgwOXF/FxpAFHVwLVzExdT9LkiFKJDUoHEfAR1DASht34FAawjLKIt
+ 7tGFuvgKyvOjYzJgirukrHW6cKG4AyRumAolsB2qPEzovJ8AKoRIKofRj/B0Qo5vpn2s2xR3+AdV
+ WA4D9xVnhxwkG0SKrejRI79PfTjdu3kFCzUtVRRfoP/dhRzkxLiSEdyeMGJ/fixfXsw/EKsMkHIe
+ J9kKrfQezMgcF6E64+IEmE1G/tvLR9rD2Bzy/8iPgNbQ8qlVbQLj9Pt5x7YE+V7/1fYWcW9YIKRX
+ PsCKHKS8BgNWdX64IoE/kaCpeFc1pDa54ZRsW2m1BmnkcmfF7Dj7DQAA//8DANl/ZwFYAwAA
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e31aaae4cf8f-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:47 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '724'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=Mzctd7Vf4Pj8hLREk5uV3xfhGAJqHTBdS7Z68xdqJ3E-1774472866.9835687-1.0.1.1-9060awKZeUAS__HpRhOh3ZDNpHvhVrftE8gP2f5h5qVQ7SUhdLjp1QfnUBiOHckFloYERDcS4nSTfy3q7RPbo.rO9ak4DB2RBS7.iNAo4m7Yek2xS4nBLaBUTUBc7VfU;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:47 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999940'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_923040bc8f624770996dc8d06bd77fef
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","content":[{"type":"text","text":"You answer
+ the previous message in one sentence."}]},{"role":"user","content":[{"type":"text","text":"#
+ Conversation History\nThe content between tags contains
+ your conversation history\n\nAlice: Benefits of Using Real Recorded
+ Traffic in Testing\n"}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '410'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbOSAcAAAD//4xTwW7bMAy95ysEnZ0i
+ yZy6uw/tYehp6y5DYSgSbbOVJUGUswZD/n2UndTp2gG7CDAfH/n0nvz19un2W7i//rK6t9u777vD
+ rx/q4UUWmeF3T6DTmXWlPfMgoXcTrCOoBHnquqrKstrcXFcj0HsDNtPakJalX/bocLlZbcrlqlqu
+ b07szqMG4raf/CnE7/HMOp2BFy6vinOlByLVAtfOTVyM3uaKVERISbkkixnU3iVwo/QHQtcKlmr5
+ 0D4aMCJF1TSoBTqRgFJuANcpx3qE0nqISh8KgX2Ifs+lALHxsc84UwjbLlEhlDOiAxtIoOFd2BxE
+ 8Hkr8iokGpjJC5TofYRRAOvkpeD2GL3rufPqUnOEZiCVfXODtReAcs4nlX0f3Xo8IcdXf6xvWemO
+ /qLKhn2nrubVxJmxF5R8kCN65PNxzGF4Y63kQX1IdfLPMK7bbqdxck5/BjflCUws0M716nPxwbTa
+ QFJo6SJHqZXuwMzMOXQ1GPQXwOLizu/FfDR7ujeH+z/jZ0BrCPys6xDBoH574bktQv43/tX26vEo
+ WBLEPT/2OiHEnIOBRg12erGSDpSgrzmsFmKIOD3bJtRafYLKVOtdIxfHxR8AAAD//wMAQ8dY3cQD
+ AAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e31fec3dc132-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:48 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '768'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=JYxrn1QLzqghAM1PJW6V89UMbmiLatd1atmmQw6NPr0-1774472867.8239858-1.0.1.1-hwGgJY4XYCiJtbeBfm6fdTUACpklHSNTd64qYVFPn23d73s8.NBRfBlLx6nUV4d3.tfIInsBJq50FlZh7Wv9iTFtj7HY1hVkAYRHdnNzeq_4_piEn49lFlc_GkLVdmE0;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:48 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999940'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_13f99272933545f8bac789ebd64bb0f1
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml
new file mode 100644
index 00000000..87f6358e
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_simple_agent_run.yaml
@@ -0,0 +1,320 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '290'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyCzNtgAAAAD//4xSsU7DMBDd+xWV56Rq
+ 05TQGYbODCyoilz7khgc29hOFYT675ydtglQJBZLvnfv/N7zPRzNkb33u02/qXTPN48AT89bkgSG
+ PrwC8xfWgmnkgRdaDTCzQD2EqauiyPMiK7Z3EWg1BxlotfFprtNWKJFmyyxPl0W6uj+zGy0YOGx7
+ wet8/hnPoFNx6LG8TC6VFpyjNWDt0oRFq2WoEOqccJ4qT5IRZFp5UFH6DqTUc9+AhcW0xULVORpk
+ qk7KCUCV0p4Gm1Hc/oycrnKkro3VB/eDSiq06ZoSU3EYET7tvDYkoic899F2980JwUGt8aXXbxCf
+ W2fDODKGPQHPmEd9clLeJDeGlRw8FdJNUiOMsgb4yBwjph0XegLMJpZ/a7k1e7AtVP2f8SPAGBhc
+ otJY4IJ99zu2WQib+FfbNeIomDiwR1yt0guw4Rs4VLSTw34Q9+E8tCX+VQ3WWDEsSWVKRtdQ8GJ1
+ qMjsNPsCAAD//wMAQQCebTIDAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e1608aaf6142-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:06:37 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '373'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=0r2bm90d_zmUe.y8EYZpgoGyTVS4QQSDoJVx.yDpJng-1774472796.2480545-1.0.1.1-onYisUL_Bju9EhfGXQnZBZkwk3gdjG7tHXVdr34BVePUh3JL0OqfVWApVIaF_KDBKfw4HIiGBvzONzv_AS91kbK7eL.FzFDwILNg8_F1h3hsPpZO.pIoeUN1dp_.acW6;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:36:37 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999975'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_0b44866bb8cd459db8712e04e4248889
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '290'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyPJOswAAAAD//4xS0WrCMBR99yskz61o
+ 7aiv7mX6MJgwtsGQEpPbNjNNQpKOjeG/76ZVWzcHewnknntuzjm5t/dsebeB9Wq/qZ7WD4/L55eU
+ piQKDL17A+ZPrAnTyAMvtOpgZoF6CFNnWZamWbJIkhaoNQcZaKXxcarjWigRJ9MkjadZPFsc2ZUW
+ DBy2veJ1PP5qz6BTcfjA8jQ6VWpwjpaAtVMTFq2WoUKoc8J5qjyJepBp5UG10lcgpR77CixMhi0W
+ isbRIFM1Ug4AqpT2NNhsxW2PyOEsR+rSWL1zP6ikQJuuyjEVhxHh085rQ1r0gOe2td1cOCE4qDY+
+ 93oP7XPzpBtH+rAH4BHzqE8OyjfRlWE5B0+FdIPUCKOsAt4z+4hpw4UeAKOB5d9ars3ubAtV/md8
+ DzAGBpcoNxa4YJd++zYLYRP/ajtH3AomDuw7rlbuBdjwDRwK2shuP4j7dB7qHP+qBGus6JakMDmj
+ c8h4NtsVZHQYfQMAAP//AwDajPLhMgMAAA==
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e202698a251d-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:03 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '334'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=GJt8W0lyxqfgtKXEJ4M8twcG5pNqc4RmQiPqQ_IukiU-1774472822.1494222-1.0.1.1-Jv3zKpnCjFAAQQaXBEt3RElEP.QjtEFbqrvr8BASrk5X7XSiOj1UBc4tUR3t9QbKmOM0VrcVW6R3HYLaYxbTHQqz4Dpjl7Z.Sz9BslefycjBprbfLjQ1aoOYxrSO7lkO;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:03 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999977'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_8c118a37a5da44069de28fba911081e0
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a concise assistant. Answer in one sentence."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hello in exactly two words."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '290'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDbKMgIAAAD//4xSQU7DMBC89xWVz0mV
+ pKHhDALBDSFxQlXk2pvE4NiW7dAi1L+zTtomhSJxseSdnfXMeO+zuxuWbp9vt4/pU/WyonrHk45E
+ gaE3b8D8kbVgGnnghVYDzCxQD2FqWhR5XmTXq1UPtJqDDLTa+DjXcSuUiLMky+OkiNPrA7vRgoHD
+ tle8zudf/Rl0Kg47LCfRsdKCc7QGrB2bsGi1DBVCnRPOU+VJNIJMKw+ql/4AUuq5b8DCYtpioeoc
+ DTJVJ+UEoEppT4PNXtz6gOxPcqSujdUb94NKKrTpmhJTcRgRPu28NqRH93iue9vdmROCg1rjS6/f
+ oX9umQ3jyBj2BDxgHvXJSfkqujCs5OCpkG6SGmGUNcBH5hgx7bjQE2A2sfxby6XZg22h6v+MHwHG
+ wOASlcYCF+zc79hmIWziX22niHvBxIH9wNUqvQAbvoFDRTs57Adxn85DW+Jf1WCNFcOSVKZkdAkF
+ L9JNRWb72TcAAAD//wMAHqai9DIDAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e3168f481d99-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:46 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '370'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=QmJ_mn8jkQXEiES_f8MtNfFrG.TdUhMi9F3CsYesW.Y-1774472866.3273165-1.0.1.1-9jM9NCSYxoeVuM4GWc_rXbAFSxA2USepp4rz5niKNzmDK.TtF1V7PSKWLir8akfbpuyXvva5QQRFgDCcMT3P0xa_1Tzm9mW9uidzQIoQBbq5O6t0XUQzY8bQC4ddKj1i;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:46 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999977'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_2ac33c8b77a343ea881d0fe0fbeccc14
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml
new file mode 100644
index 00000000..8b272df9
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_agentscope_tool_use_creates_tool_span.yaml
@@ -0,0 +1,451 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+ Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+ Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+ maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+ the given python code in a temp file and capture the return\ncode, standard
+ output and error. Note you must `print` the output to get\nthe result, and the
+ tmp file will be removed right after the execution."}}]}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '897'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyKxINAYAAAD//41TXW+bMBR951cgP21T
+ mICSklTawzZtUvuwTZ32sI7Kcs0NeDM2tU3TKMp/n20SIGkmjQcE99xzfD+ObxT9unj88PPmS5as
+ 85q+v9usSYVmjiEffgM1B9ZbKi0PDJOih6kCYsCpJnmeZXmaL5ceaGQJ3NGq1kSZjBomWJTGaRbF
+ eZQs9uxaMgrapv2yv2G49W9Xpyjh2Ybj2SHSgNakAhs7JNmgktxFENGaaUOEQbMRpFIYEK500XE+
+ AYyUHFPC+Xhw/2wn3+OwbCI2Fx8vmutvn0hbPz3yz3fXvLldf/+xmJzXS29aX9CqE3QY0gQf4lcn
+ h1lMkMZz4RloZwC3G1NLgakd5ImMTSaq6hrbnWsBbQvksgp0VSAFuuMmfBdehm/CvChEq5gwr/rw
+ 6wLt0JHULjj3fT+ZloJVpwl/OUYihDTEdePneL9HdsPKuKxaJR/0CRWtrBV0ja1ztJ/EdCHBoRBf
+ AuqOdo6sXNMabOQf8Icm87RXRaMvRzSd70Fj6+QTVp7PzujhEgxh3hSDDymhNZQjdfQj6UomJ0Aw
+ 6f1lNee0+/6ZqP5HfgQohdbeONwqKBk97nhMU+Cu7b/Shin7gpEG9WTvITYMlNtHCSti3dLfUb3R
+ Bhpsl1aB8mbyBm9xHM/LyyRe5gsU7IK/crsEFF8EAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e16f0b9aaf0d-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:06:40 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '1239'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=ncH1zypNLZZs6ohCgRXU6EAQ0iJm0PqJ2cW8rY7HVI8-1774472798.5665667-1.0.1.1-k0DS.3SU2tAkF.r3ZjBzLnG4fwZB2Fu5meB0y3aUWpZtAKeGqa66nGagt.hDaY2vSKIEbmMrwf7bozk5KFrU29xqoOc0X32xTuik1N0lOr3jOPFP1_u5ceUvxEjzvGTM;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:36:40 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999962'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_02a17794efcb4607a4ef7c7281e6d187
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+ Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+ Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+ maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+ the given python code in a temp file and capture the return\ncode, standard
+ output and error. Note you must `print` the output to get\nthe result, and the
+ tmp file will be removed right after the execution."}}]}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '897'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/4xTXWvbMBR9968wetpGPBzj1ElgD2NlHSsrhY21tC5ClW9sdbJkJDkkDfnvk+Qk
+ dtIM5gdj33PP0f042gRhiFiB5iGiFTG0bnh0eXP78uP2Cyw+x79urq713e9LU94v5fr+q6jRyDHk
+ 8wtQs2d9pNLywDApOpgqIAac6jjL0jRLpknqgVoWwB2tbEyUyqhmgkVJnKRRnEXj6Y5dSUZB27RH
+ +xuGG/92dYoCVjYcj/aRGrQmJdjYPskGleQugojWTBsiDBr1IJXCgHCli5bzAWCk5JgSzvuDu2cz
+ +O6HZRPx3Wv6IK+vxvKbjOPlagbVKjM/Z98H53XS68YXtGgFPQxpgB/i85PDLCZI7bmwAtoawM3a
+ VFJgagd5ImOTiSrb2nbnWkCbHLmsHM1zpEC33ISfwovwQ5jluWgUE+ZdF36foy06ktoG576fBtNS
+ sGg14W/HSISQhrhu/Byfdsj2sDIuy0bJZ31CRQtrBV1h6xztJzFcSLAvxJeA2qOdIytXNwYb+Qf8
+ oeNJ0qmi3pc9mkx2oLF18gEry0Zn9HABhjBvioMPKaEVFD219yNpCyYHQDDo/W0157S7/pko/0e+
+ ByiFxt443CgoGD3uuE9T4K7tv9IOU/YFIw1qae8hNgyU20cBC2Ld0t1RvdYGamyXVoLyZvIGb3Ac
+ T4qLcTzLpijYBn8BAAD//wMAHPLnAF8EAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e212f84f7e56-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:05 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '760'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=cEkxxhx5PI02Ywh8nf.Xs6jrFBlPOsqdtqnmu2dQoUs-1774472824.7966123-1.0.1.1-l8rKWW5YXcEa4qiqAmWX7gYkTwxcPBKKDdUiau.bKkThNBJdZMRBk8E6aelB55XL8mGvLIuOpvyEL1u_F6R5238q7PZg1iu7hVZPFcKKIeqC1cnWzWuetLrt0ahyQJcC;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:05 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999965'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_5e3d6b5e29b545dc85b5ddedeb732996
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+ Python to compute 6 * 7 and return just the result."}]}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+ Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+ maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+ the given python code in a temp file and capture the return\ncode, standard
+ output and error. Note you must `print` the output to get\nthe result, and the
+ tmp file will be removed right after the execution."}}]}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '897'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDYtDwEAAAD//4xTXW+bMBR951cgP21T
+ mADR0FTaw6ZuL6m2h+WlHZXlmBvi1tjMNmmzKP99tkmApJk0HhDcc8/x/Tguk+dGP8hNJedPt2Jz
+ //B1/rKYo4ljyOUTUHNkfaTS8sAwKTqYKiAGnGqS51mWp9fTmQdqWQJ3tKoxUSajmgkWpXGaRXEe
+ JdcH9loyCtqm/bK/Ybjzb1enKOHVhuPJMVKD1qQCGzsm2aCS3EUQ0ZppQ4RBkwGkUhgQrnTRcj4C
+ jJQcU8L5cHD37Ebfw7BsIs7n2Uv14/Zukc9+t1/ufork23fx+c9idF4nvW18QatW0H5II7yP35wd
+ ZjFBas+FV6CtAdxszVoKTO0gz2RsMlFVW9vuXAtoVyCXVaCbAinQLTfhp3AafgjzohCNYsK868Lv
+ C7RHJ1L74NL342haClatJvztGIkQ0hDXjZ/j4wHZ9yvjsmqUXOozKlpZK+g1ts7RfhLjhQTHQnwJ
+ qD3ZObJydWOwkc/gD02u0k4VDb4c0PTqABpbJx+x8nxyQQ+XYAjzpuh9SAldQzlQBz+StmRyBASj
+ 3t9Wc0m765+J6n/kB4BSaOyNw42CktHTjoc0Be7a/iutn7IvGGlQG3sPsWGg3D5KWBHrlu6O6q02
+ UGO7tAqUN5M3eIOzlKbTeJnHMxTsg78AAAD//wMAxy9FZF8EAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e3264eb7f005-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:49 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '724'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=0XpUc1fNMZh7X5an8o2lKE5Me2U0kr5yZMBg0xANHr0-1774472868.8462937-1.0.1.1-dprcxP4hyQ0IPDL_vk1NK7FsJ11DyabA3P8I942JNvTn7zKho4A0pRDev9WYlxrW1LwDshBeG3MLMUsQs5Y9hxWVr.Wu3JoHqUF6i2Ho7_hr0NaKJDRn8f.qyG5mv6hc;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:37:49 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999962'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_4413081d046145c59f7df6e62731b407
+ status:
+ code: 200
+ message: OK
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a helpful assistant. Use tools when required and keep answers brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Use
+ Python to compute 6 * 7 and return just the result."}]},{"role":"assistant","name":"Jarvis","content":null,"tool_calls":[{"id":"call_7K4wgODLT79quBLSn1FNnAzT","type":"function","function":{"name":"execute_python_code","arguments":"{\"code\":
+ \"result = 6 * 7\\nprint(result)\"}"}}]},{"role":"tool","tool_call_id":"call_7K4wgODLT79quBLSn1FNnAzT","content":"042\n","name":"execute_python_code"}],"model":"gpt-4o-mini","stream":false,"temperature":0,"tools":[{"type":"function","function":{"name":"execute_python_code","parameters":{"properties":{"code":{"description":"The
+ Python code to be executed.","type":"string"},"timeout":{"default":300,"description":"The
+ maximum time (in seconds) allowed for the code to run.","type":"number"}},"required":["code"],"type":"object"},"description":"Execute
+ the given python code in a temp file and capture the return\ncode, standard
+ output and error. Note you must `print` the output to get\nthe result, and the
+ tmp file will be removed right after the execution."}}]}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '1293'
+ Content-Type:
+ - application/json
+ Cookie:
+ - __cf_bm=0XpUc1fNMZh7X5an8o2lKE5Me2U0kr5yZMBg0xANHr0-1774472868.8462937-1.0.1.1-dprcxP4hyQ0IPDL_vk1NK7FsJ11DyabA3P8I942JNvTn7zKho4A0pRDev9WYlxrW1LwDshBeG3MLMUsQs5Y9hxWVr.Wu3JoHqUF6i2Ho7_hr0NaKJDRn8f.qyG5mv6hc
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: !!binary |
+ H4sIAAAAAAAA/6rmUlBQykxRslJQSs5ILEnOLcjRdfELyDZzdwUAAAD//41SwW6cMBC971dYPqXS
+ EgGiC7lGinLLKbcmQo4ZwAnYyDO0Sav9947NJpB2K/Vi7c6b93jzZp4r3/rsZ3t9c3Orf3RV22d3
+ RSH3geGenkHTO+tSO+YBGWcXWHtQBEE1K8uiKPOqTCMwugaGQOsmSgqXjMaaJE/zIknLJKtO7N4Z
+ Dcht3/ivEL/iG3zaBl65HLViZQRE1QHX3pu46N0QKlIhGiRlSe5XUDtLYKP1+x6EB5wHEq4VDw8X
+ 4sAvGRYVJf/6IgyKIr/c0j20M6owgp2HYQMoax2pEEE0/nhCjh9WB9dN3j3hH1TZcgTY15wYcnxs
+ C8lNMqJHfh9jJPOnKSULjRPV5F4gfi5PrxY9uW5iRbPyBBI7HDas/LA/o1c3QMoMuAlVaqV7aFbq
+ ugE1N8ZtgN1m6r/dnNNeJje2+x/5FdAaJr6xevLQGP154rXNQzjUf7V9pBwNSwT/nS+vJgM+bKKB
+ VvF1LFeJb0gw1ryuDvzkzXJD7VSn6dfmkKVXZSV3x91vPJeasFEDAAA=
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e32d6800f95b-SJC
+ Connection:
+ - keep-alive
+ Content-Encoding:
+ - gzip
+ Content-Type:
+ - application/json
+ Date:
+ - Wed, 25 Mar 2026 21:07:51 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '642'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999947'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_962b361c03444494ac60f59571e1d91c
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml b/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml
new file mode 100644
index 00000000..221dcabb
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/cassettes/test_auto_agentscope.yaml
@@ -0,0 +1,122 @@
+interactions:
+- request:
+ body: '{"messages":[{"role":"system","name":"system","content":[{"type":"text","text":"You
+ are a helpful assistant. Be brief."}]},{"role":"user","name":"user","content":[{"type":"text","text":"Say
+ hi in two words."}]}],"model":"gpt-4o-mini","stream":true,"stream_options":{"include_usage":true},"temperature":0}'
+ headers:
+ Accept:
+ - application/json
+ Accept-Encoding:
+ - gzip, deflate
+ Connection:
+ - keep-alive
+ Content-Length:
+ - '304'
+ Content-Type:
+ - application/json
+ Host:
+ - api.openai.com
+ User-Agent:
+ - AsyncOpenAI/Python 2.29.0
+ X-Stainless-Arch:
+ - arm64
+ X-Stainless-Async:
+ - async:asyncio
+ X-Stainless-Lang:
+ - python
+ X-Stainless-OS:
+ - MacOS
+ X-Stainless-Package-Version:
+ - 2.29.0
+ X-Stainless-Raw-Response:
+ - 'true'
+ X-Stainless-Runtime:
+ - CPython
+ X-Stainless-Runtime-Version:
+ - 3.13.3
+ x-stainless-read-timeout:
+ - '600'
+ x-stainless-retry-count:
+ - '0'
+ method: POST
+ uri: https://api.openai.com/v1/chat/completions
+ response:
+ body:
+ string: 'data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"role":"assistant","content":"","refusal":null},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"bSxoCuDli"}
+
+
+ data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"Hello"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"RR3fPq"}
+
+
+ data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"
+ there"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"Ks8Wg"}
+
+
+ data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{"content":"!"},"logprobs":null,"finish_reason":null}],"usage":null,"obfuscation":"OTuTCk6Yhl"}
+
+
+ data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[{"index":0,"delta":{},"logprobs":null,"finish_reason":"stop"}],"usage":null,"obfuscation":"Ky1jT"}
+
+
+ data: {"id":"chatcmpl-DNPizxiSOmMQFp69qki8RpeoxlQWv","object":"chat.completion.chunk","created":1774472801,"model":"gpt-4o-mini-2024-07-18","service_tier":"default","system_fingerprint":"fp_ca3e7d71bf","choices":[],"usage":{"prompt_tokens":29,"completion_tokens":3,"total_tokens":32,"prompt_tokens_details":{"cached_tokens":0,"audio_tokens":0},"completion_tokens_details":{"reasoning_tokens":0,"audio_tokens":0,"accepted_prediction_tokens":0,"rejected_prediction_tokens":0}},"obfuscation":"tz0KSnmcPPt"}
+
+
+ data: [DONE]
+
+
+ '
+ headers:
+ CF-Cache-Status:
+ - DYNAMIC
+ CF-Ray:
+ - 9e20e1806b8d67a6-SJC
+ Connection:
+ - keep-alive
+ Content-Type:
+ - text/event-stream; charset=utf-8
+ Date:
+ - Wed, 25 Mar 2026 21:06:43 GMT
+ Server:
+ - cloudflare
+ Strict-Transport-Security:
+ - max-age=31536000; includeSubDomains; preload
+ Transfer-Encoding:
+ - chunked
+ X-Content-Type-Options:
+ - nosniff
+ access-control-expose-headers:
+ - X-Request-ID
+ alt-svc:
+ - h3=":443"; ma=86400
+ openai-organization:
+ - braintrust-data
+ openai-processing-ms:
+ - '1276'
+ openai-project:
+ - proj_vsCSXafhhByzWOThMrJcZiw9
+ openai-version:
+ - '2020-10-01'
+ set-cookie:
+ - __cf_bm=26gJQ7Ja3taZFTBb7A3G23kqPcIKnacz5qIbjguQkYs-1774472801.3417854-1.0.1.1-9JqXxOO8Hh_qhFlPUB0VZAqRq_.bnwDdhOr_sD9UVAZbZxHIG013WyWO1wxnxzpoHB2eF6tlQndU7CKalttwp.wptRHYq2G6erRwpDHPPQiZU_8r.6r_TsmfH2ya11Un;
+ HttpOnly; Secure; Path=/; Domain=api.openai.com; Expires=Wed, 25 Mar 2026
+ 21:36:43 GMT
+ x-openai-proxy-wasm:
+ - v0.1
+ x-ratelimit-limit-requests:
+ - '30000'
+ x-ratelimit-limit-tokens:
+ - '150000000'
+ x-ratelimit-remaining-requests:
+ - '29999'
+ x-ratelimit-remaining-tokens:
+ - '149999982'
+ x-ratelimit-reset-requests:
+ - 2ms
+ x-ratelimit-reset-tokens:
+ - 0s
+ x-request-id:
+ - req_369a94628d4547e69137ec894aa584f3
+ status:
+ code: 200
+ message: OK
+version: 1
diff --git a/py/src/braintrust/integrations/agentscope/integration.py b/py/src/braintrust/integrations/agentscope/integration.py
new file mode 100644
index 00000000..20b5f203
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/integration.py
@@ -0,0 +1,51 @@
+"""AgentScope integration orchestration."""
+
+from braintrust.integrations.base import BaseIntegration
+
+from .patchers import (
+ AgentCallPatcher,
+ ChatModelPatcher,
+ FanoutPipelinePatcher,
+ GeneralEvaluatorPatcher,
+ MetricCallPatcher,
+ RayEvaluatorRunPatcher,
+ SequentialPipelinePatcher,
+ TaskEvaluatePatcher,
+ ToolkitCallToolFunctionPatcher,
+)
+
+
+class AgentScopeIntegration(BaseIntegration):
+ """Braintrust instrumentation for AgentScope. Requires AgentScope v1.0.0 or higher."""
+
+ name = "agentscope"
+ import_names = ("agentscope",)
+ min_version = "1.0.0"
+ patchers = (
+ AgentCallPatcher,
+ SequentialPipelinePatcher,
+ FanoutPipelinePatcher,
+ ToolkitCallToolFunctionPatcher,
+ ChatModelPatcher,
+ GeneralEvaluatorPatcher,
+ RayEvaluatorRunPatcher,
+ TaskEvaluatePatcher,
+ MetricCallPatcher,
+ )
+
+ eval_patchers = (
+ GeneralEvaluatorPatcher,
+ RayEvaluatorRunPatcher,
+ TaskEvaluatePatcher,
+ MetricCallPatcher,
+ )
+
+ @classmethod
+ def setup(
+ cls,
+ *,
+ target=None,
+ instrument_evals: bool = True,
+ ) -> bool:
+ patchers = cls.patchers if instrument_evals else tuple(p for p in cls.patchers if p not in cls.eval_patchers)
+ return super().setup(target=target, patchers=patchers)
diff --git a/py/src/braintrust/integrations/agentscope/patchers.py b/py/src/braintrust/integrations/agentscope/patchers.py
new file mode 100644
index 00000000..8d4eb314
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/patchers.py
@@ -0,0 +1,174 @@
+"""AgentScope patchers."""
+
+from braintrust.integrations.base import CompositeFunctionWrapperPatcher, FunctionWrapperPatcher
+
+from .tracing import (
+ _agent_call_wrapper,
+ _fanout_pipeline_wrapper,
+ _general_evaluator_run_evaluation_wrapper,
+ _general_evaluator_run_solution_wrapper,
+ _general_evaluator_run_wrapper,
+ _metric_call_wrapper,
+ _model_call_wrapper,
+ _ray_evaluator_run_wrapper,
+ _sequential_pipeline_wrapper,
+ _task_evaluate_wrapper,
+ _toolkit_call_tool_function_wrapper,
+)
+
+
+class AgentCallPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope agent execution."""
+
+ name = "agentscope.agent.call"
+ target_module = "agentscope.agent"
+ target_path = "AgentBase.__call__"
+ wrapper = _agent_call_wrapper
+
+
+class SequentialPipelinePatcher(FunctionWrapperPatcher):
+ """Patch AgentScope sequential pipeline execution."""
+
+ name = "agentscope.pipeline.sequential"
+ target_module = "agentscope.pipeline"
+ target_path = "sequential_pipeline"
+ wrapper = _sequential_pipeline_wrapper
+
+
+class FanoutPipelinePatcher(FunctionWrapperPatcher):
+ """Patch AgentScope fanout pipeline execution."""
+
+ name = "agentscope.pipeline.fanout"
+ target_module = "agentscope.pipeline"
+ target_path = "fanout_pipeline"
+ wrapper = _fanout_pipeline_wrapper
+
+
+class ToolkitCallToolFunctionPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope toolkit execution."""
+
+ name = "agentscope.tool.call_tool_function"
+ target_module = "agentscope.tool"
+ target_path = "Toolkit.call_tool_function"
+ wrapper = _toolkit_call_tool_function_wrapper
+
+
+class _OpenAIChatModelPatcher(FunctionWrapperPatcher):
+ name = "agentscope.model.openai"
+ target_module = "agentscope.model"
+ target_path = "OpenAIChatModel.__call__"
+ wrapper = _model_call_wrapper
+
+
+class _DashScopeChatModelPatcher(FunctionWrapperPatcher):
+ name = "agentscope.model.dashscope"
+ target_module = "agentscope.model"
+ target_path = "DashScopeChatModel.__call__"
+ wrapper = _model_call_wrapper
+
+
+class _AnthropicChatModelPatcher(FunctionWrapperPatcher):
+ name = "agentscope.model.anthropic"
+ target_module = "agentscope.model"
+ target_path = "AnthropicChatModel.__call__"
+ wrapper = _model_call_wrapper
+
+
+class _OllamaChatModelPatcher(FunctionWrapperPatcher):
+ name = "agentscope.model.ollama"
+ target_module = "agentscope.model"
+ target_path = "OllamaChatModel.__call__"
+ wrapper = _model_call_wrapper
+
+
+class _GeminiChatModelPatcher(FunctionWrapperPatcher):
+ name = "agentscope.model.gemini"
+ target_module = "agentscope.model"
+ target_path = "GeminiChatModel.__call__"
+ wrapper = _model_call_wrapper
+
+
+class _TrinityChatModelPatcher(FunctionWrapperPatcher):
+ name = "agentscope.model.trinity"
+ target_module = "agentscope.model"
+ target_path = "TrinityChatModel.__call__"
+ wrapper = _model_call_wrapper
+
+
+class ChatModelPatcher(CompositeFunctionWrapperPatcher):
+ """Patch the built-in AgentScope chat model implementations."""
+
+ name = "agentscope.model"
+ sub_patchers = (
+ _OpenAIChatModelPatcher,
+ _DashScopeChatModelPatcher,
+ _AnthropicChatModelPatcher,
+ _OllamaChatModelPatcher,
+ _GeminiChatModelPatcher,
+ _TrinityChatModelPatcher,
+ )
+
+
+class _GeneralEvaluatorRunPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope GeneralEvaluator root execution."""
+
+ name = "agentscope.evaluate.general.run"
+ target_module = "agentscope.evaluate"
+ target_path = "GeneralEvaluator.run"
+ wrapper = _general_evaluator_run_wrapper
+
+
+class _GeneralEvaluatorRunSolutionPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope GeneralEvaluator solution execution."""
+
+ name = "agentscope.evaluate.general.run_solution"
+ target_module = "agentscope.evaluate"
+ target_path = "GeneralEvaluator.run_solution"
+ wrapper = _general_evaluator_run_solution_wrapper
+
+
+class _GeneralEvaluatorRunEvaluationPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope GeneralEvaluator evaluation execution."""
+
+ name = "agentscope.evaluate.general.run_evaluation"
+ target_module = "agentscope.evaluate"
+ target_path = "GeneralEvaluator.run_evaluation"
+ wrapper = _general_evaluator_run_evaluation_wrapper
+
+
+class GeneralEvaluatorPatcher(CompositeFunctionWrapperPatcher):
+ """Patch AgentScope GeneralEvaluator for Braintrust eval tracing."""
+
+ name = "agentscope.evaluate.general"
+ sub_patchers = (
+ _GeneralEvaluatorRunPatcher,
+ _GeneralEvaluatorRunSolutionPatcher,
+ _GeneralEvaluatorRunEvaluationPatcher,
+ )
+
+
+class RayEvaluatorRunPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope RayEvaluator root execution."""
+
+ name = "agentscope.evaluate.ray"
+ target_module = "agentscope.evaluate"
+ target_path = "RayEvaluator.run"
+ wrapper = _ray_evaluator_run_wrapper
+
+
+class TaskEvaluatePatcher(FunctionWrapperPatcher):
+ """Patch AgentScope task evaluation."""
+
+ name = "agentscope.evaluate.task"
+ target_module = "agentscope.evaluate"
+ target_path = "Task.evaluate"
+ wrapper = _task_evaluate_wrapper
+
+
+class MetricCallPatcher(FunctionWrapperPatcher):
+ """Patch AgentScope metric execution."""
+
+ name = "agentscope.evaluate.metric"
+ target_module = "agentscope.evaluate"
+ target_path = "MetricBase.__call__"
+ wrapper = _metric_call_wrapper
diff --git a/py/src/braintrust/integrations/agentscope/test_agentscope.py b/py/src/braintrust/integrations/agentscope/test_agentscope.py
new file mode 100644
index 00000000..3688fcd7
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/test_agentscope.py
@@ -0,0 +1,474 @@
+import sys
+from dataclasses import dataclass
+from pathlib import Path
+from types import ModuleType
+
+import pytest
+from braintrust import logger
+from braintrust.integrations.agentscope import setup_agentscope, wrap_evaluator
+from braintrust.integrations.agentscope.patchers import (
+ AgentCallPatcher,
+ MetricCallPatcher,
+ TaskEvaluatePatcher,
+ _GeneralEvaluatorRunEvaluationPatcher,
+ _GeneralEvaluatorRunPatcher,
+ _GeneralEvaluatorRunSolutionPatcher,
+)
+from braintrust.span_types import SpanTypeAttribute
+from braintrust.test_helpers import init_test_logger
+from braintrust.wrappers.test_utils import verify_autoinstrument_script
+
+
+PROJECT_NAME = "test_agentscope"
+
+setup_agentscope(project_name=PROJECT_NAME)
+
+
+@pytest.fixture(scope="module")
+def vcr_config():
+ return {
+ "cassette_library_dir": str(Path(__file__).parent / "cassettes"),
+ }
+
+
+@pytest.fixture
+def memory_logger():
+ init_test_logger(PROJECT_NAME)
+ with logger._internal_with_memory_background_logger() as bgl:
+ yield bgl
+
+
+def _span_type(span):
+ span_type = span["span_attributes"]["type"]
+ return span_type.value if hasattr(span_type, "value") else span_type
+
+
+def _make_model(*, stream: bool = False):
+ from agentscope.model import OpenAIChatModel
+
+ return OpenAIChatModel(
+ model_name="gpt-4o-mini",
+ stream=stream,
+ generate_kwargs={"temperature": 0},
+ )
+
+
+def _make_agent(name: str, sys_prompt: str, *, toolkit=None, multi_agent: bool = False):
+ from agentscope.agent import ReActAgent
+ from agentscope.formatter import OpenAIChatFormatter, OpenAIMultiAgentFormatter
+ from agentscope.memory import InMemoryMemory
+ from agentscope.tool import Toolkit
+
+ agent = ReActAgent(
+ name=name,
+ sys_prompt=sys_prompt,
+ model=_make_model(),
+ formatter=OpenAIMultiAgentFormatter() if multi_agent else OpenAIChatFormatter(),
+ toolkit=toolkit or Toolkit(),
+ memory=InMemoryMemory(),
+ )
+ if hasattr(agent, "set_console_output_enabled"):
+ agent.set_console_output_enabled(False)
+ elif hasattr(agent, "disable_console_output"):
+ agent.disable_console_output()
+ return agent
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_simple_agent_run(memory_logger):
+ from agentscope.message import Msg
+
+ assert not memory_logger.pop()
+
+ agent = _make_agent(
+ "Friday",
+ "You are a concise assistant. Answer in one sentence.",
+ )
+
+ response = await agent(
+ Msg(
+ name="user",
+ content="Say hello in exactly two words.",
+ role="user",
+ )
+ )
+
+ assert response is not None
+
+ spans = memory_logger.pop()
+ agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Friday.reply")
+ llm_spans = [span for span in spans if _span_type(span) == SpanTypeAttribute.LLM]
+
+ assert _span_type(agent_span) == "task"
+ assert llm_spans
+ assert llm_spans[0]["metadata"]["model"] == "gpt-4o-mini"
+ assert "args" not in llm_spans[0]["input"]
+ assert llm_spans[0]["input"]["messages"][0]["role"] == "system"
+ assert llm_spans[0]["input"]["messages"][1]["role"] == "user"
+ assert llm_spans[0]["input"]["messages"][1]["content"][0]["text"] == "Say hello in exactly two words."
+ assert llm_spans[0]["output"]["role"] == "assistant"
+ assert llm_spans[0]["output"]["content"][0]["text"] == "Hello there."
+ assert "usage" not in llm_spans[0]["output"]
+ assert agent_span["span_id"] in llm_spans[0]["span_parents"]
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_sequential_pipeline_creates_parent_span(memory_logger):
+ from agentscope.message import Msg
+ from agentscope.pipeline import sequential_pipeline
+
+ assert not memory_logger.pop()
+
+ agents = [
+ _make_agent("Alice", "You rewrite the input as a short title.", multi_agent=True),
+ _make_agent("Bob", "You answer the previous message in one sentence.", multi_agent=True),
+ ]
+
+ result = await sequential_pipeline(
+ agents=agents,
+ msg=Msg(
+ name="user",
+ content="Summarize why tests should use real recorded traffic.",
+ role="user",
+ ),
+ )
+
+ assert result is not None
+
+ spans = memory_logger.pop()
+ pipeline_span = next(span for span in spans if span["span_attributes"]["name"] == "sequential_pipeline.run")
+ alice_span = next(span for span in spans if span["span_attributes"]["name"] == "Alice.reply")
+ bob_span = next(span for span in spans if span["span_attributes"]["name"] == "Bob.reply")
+
+ assert _span_type(pipeline_span) == "task"
+ assert pipeline_span["span_id"] in alice_span["span_parents"]
+ assert pipeline_span["span_id"] in bob_span["span_parents"]
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_tool_use_creates_tool_span(memory_logger):
+ from agentscope.message import Msg
+ from agentscope.tool import Toolkit, execute_python_code
+
+ assert not memory_logger.pop()
+
+ toolkit = Toolkit()
+ toolkit.register_tool_function(execute_python_code)
+ agent = _make_agent(
+ "Jarvis",
+ "You are a helpful assistant. Use tools when required and keep answers brief.",
+ toolkit=toolkit,
+ )
+
+ response = await agent(
+ Msg(
+ name="user",
+ content="Use Python to compute 6 * 7 and return just the result.",
+ role="user",
+ )
+ )
+
+ assert response is not None
+
+ spans = memory_logger.pop()
+ tool_spans = [span for span in spans if _span_type(span) == "tool"]
+
+ assert tool_spans
+ assert tool_spans[0]["span_attributes"]["name"] == "execute_python_code.execute"
+ assert tool_spans[0]["input"]["tool_name"] == "execute_python_code"
+ assert tool_spans[0]["output"]["content"]
+
+ llm_spans = [span for span in spans if _span_type(span) == SpanTypeAttribute.LLM]
+ assert llm_spans
+ assert llm_spans[0]["output"]["role"] == "assistant"
+ assert llm_spans[0]["output"]["content"][0]["type"] == "tool_use"
+ assert "usage" not in llm_spans[0]["output"]
+
+
+@pytest.mark.asyncio
+async def test_model_call_wrapper_stream_logs_final_output_and_metrics(memory_logger):
+ from braintrust.integrations.agentscope.tracing import _model_call_wrapper
+
+ assert not memory_logger.pop()
+
+ class FakeOpenAIChatModel:
+ model_name = "gpt-4o-mini"
+
+ async def wrapped(*_args, **_kwargs):
+ async def _stream():
+ yield {"content": [{"type": "text", "text": "Hello"}]}
+ yield {
+ "content": [{"type": "text", "text": "Hello there!"}],
+ "usage": {"prompt_tokens": 29, "completion_tokens": 3, "total_tokens": 32},
+ }
+
+ return _stream()
+
+ stream = await _model_call_wrapper(
+ wrapped,
+ FakeOpenAIChatModel(),
+ args=([{"role": "user", "content": "Say hi in two words."}],),
+ kwargs={},
+ )
+
+ chunks = [chunk async for chunk in stream]
+
+ assert chunks[-1]["content"][0]["text"] == "Hello there!"
+
+ spans = memory_logger.pop()
+ assert len(spans) == 1
+ llm_span = spans[0]
+
+ assert _span_type(llm_span) == SpanTypeAttribute.LLM
+ assert llm_span["output"]["role"] == "assistant"
+ assert llm_span["output"]["content"][0]["text"] == "Hello there!"
+ assert llm_span["metrics"]["prompt_tokens"] == 29
+ assert llm_span["metrics"]["completion_tokens"] == 3
+ assert llm_span["metrics"]["tokens"] == 32
+
+
+@pytest.mark.vcr
+@pytest.mark.asyncio
+async def test_agentscope_general_evaluator_creates_eval_spans(memory_logger, tmp_path):
+ from agentscope.evaluate import (
+ BenchmarkBase,
+ FileEvaluatorStorage,
+ GeneralEvaluator,
+ MetricBase,
+ MetricResult,
+ MetricType,
+ SolutionOutput,
+ Task,
+ )
+ from agentscope.message import Msg
+
+ assert not memory_logger.pop()
+
+ class ExactMatchMetric(MetricBase):
+ def __init__(self, ground_truth: str):
+ super().__init__(
+ name="exact_match",
+ metric_type=MetricType.NUMERICAL,
+ description="Check whether the model answer exactly matches the ground truth.",
+ categories=[],
+ )
+ self.ground_truth = ground_truth
+
+ async def __call__(self, solution: SolutionOutput) -> MetricResult:
+ is_match = solution.output == self.ground_truth
+ return MetricResult(
+ name=self.name,
+ result=1.0 if is_match else 0.0,
+ message="Correct" if is_match else "Incorrect",
+ )
+
+ class ToyBenchmark(BenchmarkBase):
+ def __init__(self, tasks):
+ super().__init__(
+ name="Toy benchmark",
+ description="A one-task benchmark for AgentScope eval instrumentation.",
+ )
+ self.tasks = tasks
+
+ def __iter__(self):
+ yield from self.tasks
+
+ def __len__(self):
+ return len(self.tasks)
+
+ def __getitem__(self, index):
+ return self.tasks[index]
+
+ task = Task(
+ id="hello-task",
+ input="Say hello in exactly two words.",
+ ground_truth="Hello there.",
+ metrics=[ExactMatchMetric("Hello there.")],
+ tags={"difficulty": "easy", "category": "greeting"},
+ metadata={"suite": "toy"},
+ )
+ evaluator = GeneralEvaluator(
+ name="Toy benchmark evaluation",
+ benchmark=ToyBenchmark([task]),
+ n_repeat=1,
+ storage=FileEvaluatorStorage(save_dir=str(tmp_path / "agentscope-eval")),
+ n_workers=1,
+ )
+
+ async def solution(eval_task: Task, pre_hook):
+ agent = _make_agent(
+ "Friday",
+ "You are a concise assistant. Answer in one sentence.",
+ )
+ if hasattr(agent, "register_instance_hook"):
+ agent.register_instance_hook("pre_print", "save_logging", pre_hook)
+
+ response = await agent(
+ Msg(
+ name="user",
+ content=eval_task.input,
+ role="user",
+ )
+ )
+
+ content = response.content
+ if isinstance(content, list):
+ output = next(
+ (item["text"] for item in content if isinstance(item, dict) and item.get("type") == "text"),
+ None,
+ )
+ trajectory = content
+ else:
+ output = content
+ trajectory = [content]
+
+ return SolutionOutput(
+ success=True,
+ output=output,
+ trajectory=trajectory,
+ meta={"agent": "Friday"},
+ )
+
+ await evaluator.run(solution)
+
+ spans = memory_logger.pop()
+ root_span = next(span for span in spans if span["span_attributes"]["name"] == "agentscope.evaluate.run")
+ solution_span = next(span for span in spans if span["span_attributes"]["name"] == "hello-task.solution")
+ evaluation_span = next(span for span in spans if span["span_attributes"]["name"] == "hello-task.evaluate")
+ metric_span = next(span for span in spans if span["span_attributes"]["name"] == "exact_match")
+ agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Friday.reply")
+
+ assert _span_type(root_span) == "eval"
+ assert root_span["metadata"]["benchmark_name"] == "Toy benchmark"
+ assert root_span["metadata"]["task_count"] == 1
+ assert root_span["output"]["status"] == "completed"
+
+ assert _span_type(solution_span) == "task"
+ assert solution_span["input"] == "Say hello in exactly two words."
+ assert solution_span["expected"] == "Hello there."
+ assert solution_span["tags"] == ["category:greeting", "difficulty:easy"]
+ assert solution_span["metadata"]["repeat_id"] == "0"
+ assert solution_span["metadata"]["metric_names"] == ["exact_match"]
+ assert solution_span["metadata"]["task_tags"] == {"difficulty": "easy", "category": "greeting"}
+ assert solution_span["output"]["output"] == "Hello there."
+ assert solution_span["span_id"] in agent_span["span_parents"]
+
+ assert _span_type(evaluation_span) == "eval"
+ assert evaluation_span["span_id"] in metric_span["span_parents"]
+ assert solution_span["span_id"] in evaluation_span["span_parents"]
+ assert root_span["span_id"] in solution_span["span_parents"]
+ assert evaluation_span["output"][0]["result"] == 1.0
+ assert evaluation_span["output"][0]["message"] == "Correct"
+
+ assert _span_type(metric_span) == "score"
+ assert metric_span["scores"]["exact_match"] == 1.0
+ assert metric_span["output"]["result"] == 1.0
+ assert metric_span["output"]["message"] == "Correct"
+
+
+@dataclass
+class _FakeAgentscopeModules:
+ AgentBase: type
+ GeneralEvaluator: type
+ MetricBase: type
+ Task: type
+
+
+@pytest.fixture
+def fake_agentscope_modules(monkeypatch):
+ agentscope_module = ModuleType("agentscope")
+ agentscope_module.__path__ = []
+ agentscope_module.__version__ = "1.0.0"
+
+ agent_module = ModuleType("agentscope.agent")
+ evaluate_module = ModuleType("agentscope.evaluate")
+
+ class AgentBase:
+ async def __call__(self, *_args, **_kwargs):
+ return "ok"
+
+ class Task:
+ async def evaluate(self, *_args, **_kwargs):
+ return []
+
+ class MetricBase:
+ async def __call__(self, *_args, **_kwargs):
+ return None
+
+ class GeneralEvaluator:
+ async def run(self, *_args, **_kwargs):
+ return None
+
+ async def run_solution(self, *_args, **_kwargs):
+ return None
+
+ async def run_evaluation(self, *_args, **_kwargs):
+ return None
+
+ agent_module.AgentBase = AgentBase
+ evaluate_module.GeneralEvaluator = GeneralEvaluator
+ evaluate_module.Task = Task
+ evaluate_module.MetricBase = MetricBase
+
+ agentscope_module.agent = agent_module
+ agentscope_module.evaluate = evaluate_module
+
+ monkeypatch.setitem(sys.modules, "agentscope", agentscope_module)
+ monkeypatch.setitem(sys.modules, "agentscope.agent", agent_module)
+ monkeypatch.setitem(sys.modules, "agentscope.evaluate", evaluate_module)
+
+ return _FakeAgentscopeModules(
+ AgentBase=AgentBase,
+ GeneralEvaluator=GeneralEvaluator,
+ MetricBase=MetricBase,
+ Task=Task,
+ )
+
+
+def test_setup_agentscope_can_skip_eval_patchers(fake_agentscope_modules):
+ result = setup_agentscope(project_name=PROJECT_NAME, instrument_evals=False)
+
+ assert result is True
+ assert getattr(fake_agentscope_modules.AgentBase.__call__, AgentCallPatcher.patch_marker_attr(), False)
+ assert not getattr(
+ fake_agentscope_modules.GeneralEvaluator, _GeneralEvaluatorRunPatcher.patch_marker_attr(), False
+ )
+ assert not getattr(
+ fake_agentscope_modules.GeneralEvaluator,
+ _GeneralEvaluatorRunSolutionPatcher.patch_marker_attr(),
+ False,
+ )
+ assert not getattr(
+ fake_agentscope_modules.GeneralEvaluator,
+ _GeneralEvaluatorRunEvaluationPatcher.patch_marker_attr(),
+ False,
+ )
+ assert not getattr(fake_agentscope_modules.Task, TaskEvaluatePatcher.patch_marker_attr(), False)
+ assert not getattr(fake_agentscope_modules.MetricBase, MetricCallPatcher.patch_marker_attr(), False)
+
+
+def test_wrap_evaluator_patches_evaluator_and_eval_types(fake_agentscope_modules):
+ wrapped = wrap_evaluator(fake_agentscope_modules.GeneralEvaluator)
+ wrapped_again = wrap_evaluator(fake_agentscope_modules.GeneralEvaluator)
+
+ assert wrapped is fake_agentscope_modules.GeneralEvaluator
+ assert wrapped_again is fake_agentscope_modules.GeneralEvaluator
+ assert getattr(fake_agentscope_modules.GeneralEvaluator, _GeneralEvaluatorRunPatcher.patch_marker_attr(), False)
+ assert getattr(
+ fake_agentscope_modules.GeneralEvaluator, _GeneralEvaluatorRunSolutionPatcher.patch_marker_attr(), False
+ )
+ assert getattr(
+ fake_agentscope_modules.GeneralEvaluator,
+ _GeneralEvaluatorRunEvaluationPatcher.patch_marker_attr(),
+ False,
+ )
+ assert getattr(fake_agentscope_modules.Task, TaskEvaluatePatcher.patch_marker_attr(), False)
+ assert getattr(fake_agentscope_modules.MetricBase, MetricCallPatcher.patch_marker_attr(), False)
+
+
+class TestAutoInstrumentAgentScope:
+ def test_auto_instrument_agentscope(self):
+ verify_autoinstrument_script("test_auto_agentscope.py")
diff --git a/py/src/braintrust/integrations/agentscope/tracing.py b/py/src/braintrust/integrations/agentscope/tracing.py
new file mode 100644
index 00000000..b5e1e4ad
--- /dev/null
+++ b/py/src/braintrust/integrations/agentscope/tracing.py
@@ -0,0 +1,611 @@
+"""AgentScope-specific span creation and stream aggregation."""
+
+from contextlib import aclosing
+from contextvars import ContextVar
+from typing import Any
+
+from braintrust.logger import start_span
+from braintrust.span_types import SpanPurpose, SpanTypeAttribute
+
+
+_SUPPRESS_TASK_EVALUATE_SPAN: ContextVar[bool] = ContextVar("_SUPPRESS_TASK_EVALUATE_SPAN", default=False)
+
+
+def _clean(mapping: dict[str, Any]) -> dict[str, Any]:
+ return {key: value for key, value in mapping.items() if value is not None}
+
+
+def _args_kwargs_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+ return _clean(
+ {
+ "args": list(args) if args else None,
+ "kwargs": kwargs if kwargs else None,
+ }
+ )
+
+
+def _agent_name(instance: Any) -> str:
+ return getattr(instance, "name", None) or instance.__class__.__name__
+
+
+def _pipeline_metadata(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+ agents = kwargs.get("agents")
+ if agents is None and args:
+ agents = args[0]
+
+ agent_names = None
+ if agents:
+ agent_names = [getattr(agent, "name", agent.__class__.__name__) for agent in agents]
+
+ return _clean({"agent_names": agent_names})
+
+
+def _extract_metrics(*candidates: Any) -> dict[str, float] | None:
+ key_map = {
+ "prompt_tokens": "prompt_tokens",
+ "input_tokens": "prompt_tokens",
+ "completion_tokens": "completion_tokens",
+ "output_tokens": "completion_tokens",
+ "total_tokens": "tokens",
+ "tokens": "tokens",
+ }
+
+ for candidate in candidates:
+ data = _field_value(candidate, "usage") or candidate
+
+ metrics = {}
+ for source_key, target_key in key_map.items():
+ value = _field_value(data, source_key)
+ if isinstance(value, (int, float)):
+ metrics[target_key] = float(value)
+ if metrics:
+ return metrics
+
+ return None
+
+
+def _model_provider_name(instance: Any) -> str:
+ class_name = instance.__class__.__name__
+ if class_name.endswith("Model"):
+ return class_name[: -len("Model")]
+ return class_name
+
+
+def _model_metadata(instance: Any) -> dict[str, Any]:
+ return _clean(
+ {
+ "model": getattr(instance, "model_name", None),
+ "provider": _model_provider_name(instance),
+ "model_class": instance.__class__.__name__,
+ }
+ )
+
+
+def _model_call_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+ messages = kwargs.get("messages")
+ if messages is None and args:
+ messages = args[0]
+
+ tools = kwargs.get("tools")
+ if tools is None and len(args) > 1:
+ tools = args[1]
+
+ tool_choice = kwargs.get("tool_choice")
+ if tool_choice is None and len(args) > 2:
+ tool_choice = args[2]
+
+ structured_model = kwargs.get("structured_model")
+ if structured_model is None and len(args) > 3:
+ structured_model = args[3]
+
+ return _clean(
+ {
+ "messages": messages,
+ "tools": tools,
+ "tool_choice": tool_choice,
+ "structured_model": structured_model,
+ }
+ )
+
+
+def _model_call_metadata(instance: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
+ extra_kwargs = {
+ key: value
+ for key, value in kwargs.items()
+ if key not in {"messages", "tools", "tool_choice", "structured_model"} and value is not None
+ }
+ return {**_model_metadata(instance), **extra_kwargs}
+
+
+def _model_call_output(result: Any) -> Any:
+ if isinstance(result, dict):
+ data = result
+ elif _field_value(result, "content") is not None or _field_value(result, "metadata") is not None:
+ data = {
+ "content": _field_value(result, "content"),
+ "metadata": _field_value(result, "metadata"),
+ }
+ else:
+ return result
+
+ normalized = _clean(
+ {
+ "role": "assistant" if data.get("content") is not None else None,
+ "content": data.get("content"),
+ "metadata": data.get("metadata"),
+ }
+ )
+ return normalized or data
+
+
+def _field_value(data: Any, key: str) -> Any:
+ if isinstance(data, dict):
+ return data.get(key)
+ try:
+ return getattr(data, key, None)
+ except Exception:
+ return None
+
+
+def _tool_name(tool_call: Any) -> str:
+ if isinstance(tool_call, dict):
+ return str(tool_call.get("name") or "unknown_tool")
+ return str(getattr(tool_call, "name", "unknown_tool"))
+
+
+def _call_arg(args: Any, kwargs: dict[str, Any], index: int, key: str) -> Any:
+ if key in kwargs:
+ return kwargs[key]
+ return args[index] if len(args) > index else None
+
+
+def _maybe_awaitable_name(value: Any) -> str | None:
+ return getattr(value, "__name__", None) or getattr(value, "__qualname__", None)
+
+
+def _metric_name(metric: Any) -> str:
+ return str(getattr(metric, "name", None) or metric.__class__.__name__)
+
+
+def _task_id(task: Any) -> str:
+ return str(_field_value(task, "id") or _field_value(task, "name") or task.__class__.__name__)
+
+
+def _task_input(task: Any) -> Any:
+ for key in ("input", "input_data", "question", "prompt"):
+ value = _field_value(task, key)
+ if value is not None:
+ return value
+ return None
+
+
+def _task_expected(task: Any) -> Any:
+ for key in ("ground_truth", "expected", "reference", "answer"):
+ value = _field_value(task, key)
+ if value is not None:
+ return value
+ return None
+
+
+def _task_tags(task: Any) -> Any:
+ tags = _field_value(task, "tags")
+ if isinstance(tags, dict):
+ return [f"{key}:{value}" for key, value in sorted(tags.items())]
+ return tags
+
+
+def _task_metric_names(task: Any) -> list[str] | None:
+ metrics = _field_value(task, "metrics")
+ if not metrics:
+ return None
+ return [_metric_name(metric) for metric in metrics]
+
+
+def _task_metadata(task: Any) -> dict[str, Any]:
+ metadata = _field_value(task, "metadata")
+ if isinstance(metadata, dict):
+ return metadata
+ return {}
+
+
+def _solution_output_summary(solution_output: Any) -> Any:
+ if solution_output is None:
+ return None
+ if isinstance(solution_output, dict):
+ return solution_output
+
+ summary = _clean(
+ {
+ "output": _field_value(solution_output, "output"),
+ "success": _field_value(solution_output, "success"),
+ "trajectory": _field_value(solution_output, "trajectory"),
+ "meta": _field_value(solution_output, "meta") or _field_value(solution_output, "metadata"),
+ "message": _field_value(solution_output, "message"),
+ }
+ )
+ return summary or solution_output
+
+
+def _metric_result_summary(result: Any) -> Any:
+ if result is None:
+ return None
+ if isinstance(result, dict):
+ return result
+
+ summary = _clean(
+ {
+ "result": _field_value(result, "result"),
+ "message": _field_value(result, "message"),
+ "detail": _field_value(result, "detail"),
+ "metadata": _field_value(result, "metadata") or _field_value(result, "meta"),
+ }
+ )
+ return summary or result
+
+
+def _metric_score(metric: Any, result: Any) -> dict[str, float] | None:
+ value = _field_value(result, "result")
+ if isinstance(value, bool):
+ return {_metric_name(metric): 1.0 if value else 0.0}
+ if isinstance(value, (int, float)):
+ return {_metric_name(metric): float(value)}
+ return None
+
+
+def _evaluator_metadata(instance: Any, solution: Any = None) -> dict[str, Any]:
+ benchmark = getattr(instance, "benchmark", None)
+ task_count = len(benchmark) if benchmark is not None and hasattr(benchmark, "__len__") else None
+ return _clean(
+ {
+ "evaluator_class": instance.__class__.__name__,
+ "evaluator_name": getattr(instance, "name", None),
+ "benchmark_name": _field_value(benchmark, "name"),
+ "benchmark_description": _field_value(benchmark, "description"),
+ "task_count": task_count,
+ "n_repeat": getattr(instance, "n_repeat", None),
+ "n_workers": getattr(instance, "n_workers", None),
+ "storage_class": getattr(getattr(instance, "storage", None), "__class__", type(None)).__name__,
+ "solution_name": _maybe_awaitable_name(solution),
+ }
+ )
+
+
+def _task_span_metadata(task: Any, repeat_id: str | None = None, **extra: Any) -> dict[str, Any]:
+ raw_tags = _field_value(task, "tags")
+ return _clean(
+ {
+ **_task_metadata(task),
+ "task_id": _task_id(task),
+ "repeat_id": repeat_id,
+ "metric_names": _task_metric_names(task),
+ "task_tags": raw_tags if isinstance(raw_tags, dict) else None,
+ **extra,
+ }
+ )
+
+
+def _storage_get(storage: Any, method_name: str, *args: Any) -> Any:
+ method = getattr(storage, method_name, None)
+ if method is None:
+ return None
+ try:
+ return method(*args)
+ except Exception:
+ return None
+
+
+def _stored_solution_output(instance: Any, task: Any, repeat_id: str) -> Any:
+ storage = getattr(instance, "storage", None)
+ if storage is None:
+ return None
+ return _storage_get(storage, "get_solution_result", _task_id(task), repeat_id)
+
+
+def _stored_evaluation_results(instance: Any, task: Any, repeat_id: str) -> list[Any] | None:
+ storage = getattr(instance, "storage", None)
+ metrics = _field_value(task, "metrics") or []
+ if storage is None or not metrics:
+ return None
+
+ results = []
+ for metric in metrics:
+ result = _storage_get(storage, "get_evaluation_result", _task_id(task), repeat_id, _metric_name(metric))
+ if result is None:
+ return None
+ results.append(result)
+ return results
+
+
+def _log_metric_span(parent_span: Any, metric: Any, solution_output: Any, result: Any) -> None:
+ with parent_span.start_span(
+ name=_metric_name(metric),
+ type=SpanTypeAttribute.SCORE,
+ span_attributes={"purpose": SpanPurpose.SCORER.value},
+ input=_solution_output_summary(solution_output),
+ metadata=_clean({"metric_class": metric.__class__.__name__}),
+ ) as metric_span:
+ metric_span.log(
+ output=_metric_result_summary(result),
+ metadata=_field_value(result, "metadata") or _field_value(result, "meta"),
+ scores=_metric_score(metric, result),
+ )
+
+
+async def _agent_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ with start_span(
+ name=f"{_agent_name(instance)}.reply",
+ type=SpanTypeAttribute.TASK,
+ input=_args_kwargs_input(args, kwargs),
+ metadata=_clean({"agent_class": instance.__class__.__name__}),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(output=result)
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _general_evaluator_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ solution = _call_arg(args, kwargs, 0, "solution")
+ with start_span(
+ name="agentscope.evaluate.run",
+ type=SpanTypeAttribute.EVAL,
+ input=_clean(
+ {
+ "benchmark_name": _field_value(getattr(instance, "benchmark", None), "name"),
+ "n_repeat": getattr(instance, "n_repeat", None),
+ "n_workers": getattr(instance, "n_workers", None),
+ }
+ ),
+ metadata=_evaluator_metadata(instance, solution),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(output={"status": "completed"})
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _ray_evaluator_run_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ solution = _call_arg(args, kwargs, 0, "solution")
+ with start_span(
+ name="agentscope.evaluate.run",
+ type=SpanTypeAttribute.EVAL,
+ input=_clean(
+ {
+ "benchmark_name": _field_value(getattr(instance, "benchmark", None), "name"),
+ "n_repeat": getattr(instance, "n_repeat", None),
+ "n_workers": getattr(instance, "n_workers", None),
+ }
+ ),
+ metadata={**_evaluator_metadata(instance, solution), "distributed": True},
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(output={"status": "completed"})
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _general_evaluator_run_solution_wrapper(
+ wrapped: Any,
+ instance: Any,
+ args: Any,
+ kwargs: dict[str, Any],
+) -> Any:
+ repeat_id = str(_call_arg(args, kwargs, 0, "repeat_id"))
+ task = _call_arg(args, kwargs, 1, "task")
+ storage = getattr(instance, "storage", None)
+ was_cached = False
+ if storage is not None and task is not None:
+ exists = getattr(storage, "solution_result_exists", None)
+ if exists is not None:
+ try:
+ was_cached = bool(exists(_task_id(task), repeat_id))
+ except Exception:
+ was_cached = False
+
+ with start_span(
+ name=f"{_task_id(task)}.solution",
+ type=SpanTypeAttribute.TASK,
+ input=_task_input(task),
+ expected=_task_expected(task),
+ tags=_task_tags(task),
+ metadata=_task_span_metadata(task, repeat_id, cached=was_cached),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ solution_output = _stored_solution_output(instance, task, repeat_id)
+ span.log(output=_solution_output_summary(solution_output))
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _general_evaluator_run_evaluation_wrapper(
+ wrapped: Any,
+ instance: Any,
+ args: Any,
+ kwargs: dict[str, Any],
+) -> Any:
+ task = _call_arg(args, kwargs, 0, "task")
+ repeat_id = str(_call_arg(args, kwargs, 1, "repeat_id"))
+ solution_output = _call_arg(args, kwargs, 2, "solution_output")
+
+ with start_span(
+ name=f"{_task_id(task)}.evaluate",
+ type=SpanTypeAttribute.EVAL,
+ input=_solution_output_summary(solution_output),
+ metadata=_task_span_metadata(task, repeat_id),
+ ) as span:
+ token = _SUPPRESS_TASK_EVALUATE_SPAN.set(True)
+ try:
+ result = await wrapped(*args, **kwargs)
+ evaluation_results = _stored_evaluation_results(instance, task, repeat_id)
+ if evaluation_results is not None:
+ metrics = _field_value(task, "metrics") or []
+ for metric, evaluation_result in zip(metrics, evaluation_results):
+ _log_metric_span(span, metric, solution_output, evaluation_result)
+ span.log(output=[_metric_result_summary(item) for item in evaluation_results])
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+ finally:
+ _SUPPRESS_TASK_EVALUATE_SPAN.reset(token)
+
+
+async def _task_evaluate_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ if _SUPPRESS_TASK_EVALUATE_SPAN.get():
+ return await wrapped(*args, **kwargs)
+
+ solution_output = _call_arg(args, kwargs, 0, "solution_output")
+ with start_span(
+ name=f"{_task_id(instance)}.evaluate",
+ type=SpanTypeAttribute.EVAL,
+ input=_solution_output_summary(solution_output),
+ metadata=_task_span_metadata(instance),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(output=[_metric_result_summary(item) for item in result] if result is not None else None)
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _metric_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ if _SUPPRESS_TASK_EVALUATE_SPAN.get():
+ return await wrapped(*args, **kwargs)
+
+ solution_output = _call_arg(args, kwargs, 0, "solution_output")
+ with start_span(
+ name=_metric_name(instance),
+ type=SpanTypeAttribute.SCORE,
+ span_attributes={"purpose": SpanPurpose.SCORER.value},
+ input=_solution_output_summary(solution_output),
+ metadata=_clean({"metric_class": instance.__class__.__name__}),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(
+ output=_metric_result_summary(result),
+ metadata=_field_value(result, "metadata") or _field_value(result, "meta"),
+ scores=_metric_score(instance, result),
+ )
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _sequential_pipeline_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ with start_span(
+ name="sequential_pipeline.run",
+ type=SpanTypeAttribute.TASK,
+ input=_args_kwargs_input(args, kwargs),
+ metadata=_pipeline_metadata(args, kwargs),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(output=result)
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _fanout_pipeline_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ with start_span(
+ name="fanout_pipeline.run",
+ type=SpanTypeAttribute.TASK,
+ input=_args_kwargs_input(args, kwargs),
+ metadata=_pipeline_metadata(args, kwargs),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ span.log(output=result)
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+async def _toolkit_call_tool_function_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ tool_call = args[0] if args else kwargs.get("tool_call")
+ tool_name = _tool_name(tool_call)
+ with start_span(
+ name=f"{tool_name}.execute",
+ type=SpanTypeAttribute.TOOL,
+ input=_clean(
+ {
+ "tool_name": tool_name,
+ "tool_call": tool_call,
+ }
+ ),
+ metadata=_clean({"toolkit_class": instance.__class__.__name__}),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ if _is_async_iterator(result):
+
+ async def _trace():
+ last_chunk = None
+ async with aclosing(result) as agen:
+ async for chunk in agen:
+ last_chunk = chunk
+ yield chunk
+ if last_chunk is not None:
+ span.log(output=last_chunk)
+
+ return _trace()
+
+ span.log(output=result)
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
+
+
+def _is_async_iterator(value: Any) -> bool:
+ try:
+ return getattr(value, "__aiter__", None) is not None and getattr(value, "__anext__", None) is not None
+ except Exception:
+ return False
+
+
+async def _model_call_wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any]) -> Any:
+ with start_span(
+ name=f"{_model_provider_name(instance)}.call",
+ type=SpanTypeAttribute.LLM,
+ input=_model_call_input(args, kwargs),
+ metadata=_model_call_metadata(instance, kwargs),
+ ) as span:
+ try:
+ result = await wrapped(*args, **kwargs)
+ if _is_async_iterator(result):
+
+ async def _trace():
+ last_chunk = None
+ async with aclosing(result) as agen:
+ async for chunk in agen:
+ last_chunk = chunk
+ yield chunk
+ if last_chunk is not None:
+ span.log(output=_model_call_output(last_chunk), metrics=_extract_metrics(last_chunk))
+
+ return _trace()
+
+ span.log(output=_model_call_output(result), metrics=_extract_metrics(result))
+ return result
+ except Exception as exc:
+ span.log(error=str(exc))
+ raise
diff --git a/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
new file mode 100644
index 00000000..2cc545a9
--- /dev/null
+++ b/py/src/braintrust/integrations/auto_test_scripts/test_auto_agentscope.py
@@ -0,0 +1,83 @@
+"""Test auto_instrument for AgentScope."""
+
+import os
+from pathlib import Path
+
+
+os.environ["BRAINTRUST_CASSETTES_DIR"] = str(Path(__file__).resolve().parent.parent / "agentscope" / "cassettes")
+
+from braintrust.auto import auto_instrument
+from braintrust.wrappers.test_utils import autoinstrument_test_context
+
+
+results = auto_instrument()
+assert results.get("agentscope") == True, "auto_instrument should return True for agentscope"
+
+results2 = auto_instrument()
+assert results2.get("agentscope") == True, "auto_instrument should still return True on second call"
+
+from agentscope.agent import AgentBase, ReActAgent
+from agentscope.formatter import OpenAIChatFormatter
+from agentscope.memory import InMemoryMemory
+from agentscope.message import Msg
+from agentscope.model import OpenAIChatModel
+from agentscope.pipeline import sequential_pipeline
+from agentscope.tool import Toolkit
+
+
+try:
+ from agentscope.pipeline import fanout_pipeline
+except ImportError:
+ fanout_pipeline = None
+
+
+assert hasattr(AgentBase.__call__, "__wrapped__"), "AgentBase.__call__ should be wrapped"
+assert hasattr(sequential_pipeline, "__wrapped__"), "sequential_pipeline should be wrapped"
+if fanout_pipeline is not None:
+ assert hasattr(fanout_pipeline, "__wrapped__"), "fanout_pipeline should be wrapped"
+assert hasattr(Toolkit.call_tool_function, "__wrapped__"), "Toolkit.call_tool_function should be wrapped"
+assert hasattr(OpenAIChatModel.__call__, "__wrapped__"), "OpenAIChatModel.__call__ should be wrapped"
+
+
+with autoinstrument_test_context("test_auto_agentscope") as memory_logger:
+ agent = ReActAgent(
+ name="Test Agent",
+ sys_prompt="You are a helpful assistant. Be brief.",
+ model=OpenAIChatModel(
+ model_name="gpt-4o-mini",
+ generate_kwargs={"temperature": 0},
+ ),
+ formatter=OpenAIChatFormatter(),
+ toolkit=Toolkit(),
+ memory=InMemoryMemory(),
+ )
+ if hasattr(agent, "set_console_output_enabled"):
+ agent.set_console_output_enabled(False)
+ elif hasattr(agent, "disable_console_output"):
+ agent.disable_console_output()
+
+ response = agent(
+ Msg(
+ name="user",
+ content="Say hi in two words.",
+ role="user",
+ )
+ )
+
+ import asyncio
+
+ result = asyncio.run(response)
+ assert result is not None
+
+ spans = memory_logger.pop()
+ assert len(spans) >= 2, f"Expected at least 2 spans (agent + model), got {len(spans)}"
+
+ agent_span = next(span for span in spans if span["span_attributes"]["name"] == "Test Agent.reply")
+ llm_spans = [span for span in spans if span["span_attributes"]["type"].value == "llm"]
+
+ assert agent_span["span_attributes"]["type"].value == "task"
+ assert llm_spans, "Should have at least one LLM span"
+ assert llm_spans[0]["metadata"]["model"] == "gpt-4o-mini"
+ assert agent_span["span_id"] in llm_spans[0]["span_parents"]
+
+print("SUCCESS")
diff --git a/py/src/braintrust/integrations/base.py b/py/src/braintrust/integrations/base.py
index 690e6c22..7d491b96 100644
--- a/py/src/braintrust/integrations/base.py
+++ b/py/src/braintrust/integrations/base.py
@@ -337,6 +337,7 @@ def setup(
cls,
*,
target: Any | None = None,
+ patchers: tuple[type[BasePatcher], ...] | None = None,
) -> bool:
"""Apply all applicable patchers for this integration."""
module = _import_first_available(cls.import_names)
@@ -347,7 +348,7 @@ def setup(
return False
success = False
- selected_patchers = cls.resolve_patchers()
+ selected_patchers = cls.resolve_patchers() if patchers is None else patchers
for patcher in sorted(selected_patchers, key=lambda patcher: patcher.priority):
if not patcher.applies(module, version, target=target):
continue
diff --git a/py/src/braintrust/wrappers/test_google_genai.py b/py/src/braintrust/wrappers/test_google_genai.py
index 73a31e71..7839ed0c 100644
--- a/py/src/braintrust/wrappers/test_google_genai.py
+++ b/py/src/braintrust/wrappers/test_google_genai.py
@@ -31,6 +31,7 @@ def before_record_request(request):
"record_mode": record_mode,
"filter_headers": [
"authorization",
+ "Authorization",
"x-api-key",
"x-goog-api-key",
],