braintrustdata · Abhijeet Prasad (AbhiPrasad) · Mar 25, 2026 · Mar 25, 2026 · Mar 26, 2026
diff --git a/py/noxfile.py b/py/noxfile.py
@@ -62,6 +62,7 @@ def _pinned_python_version():
 # validate things work with or without them.
 VENDOR_PACKAGES = (
     "agno",
+    "agentscope",
     "anthropic",
     "dspy",
     "openai",
@@ -89,6 +90,7 @@ def _pinned_python_version():
 # Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change
 # to internals we leverage for instrumentation.
 AGNO_VERSIONS = (LATEST, "2.4.0", "2.1.0")
+AGENTSCOPE_VERSIONS = (LATEST, "1.0.0")
 # pydantic_ai 1.x requires Python >= 3.10
 # Two test suites with different version requirements:
 # 1. wrap_openai approach: works with older versions (0.1.9+)
@@ -172,6 +174,16 @@ def test_agno(session, version):
     _run_core_tests(session)
 
 
+@nox.session()
+@nox.parametrize("version", AGENTSCOPE_VERSIONS, ids=AGENTSCOPE_VERSIONS)
+def test_agentscope(session, version):
+    _install_test_deps(session)
+    _install(session, "agentscope", version)
+    _install(session, "openai")
+    _run_tests(session, f"{INTEGRATION_DIR}/agentscope/test_agentscope.py")
+    _run_core_tests(session)
+
+
 @nox.session()
 @nox.parametrize("version", ANTHROPIC_VERSIONS, ids=ANTHROPIC_VERSIONS)
 def test_anthropic(session, version):

diff --git a/py/src/braintrust/auto.py b/py/src/braintrust/auto.py
@@ -7,7 +7,13 @@
 import logging
 from contextlib import contextmanager
 
-from braintrust.integrations import ADKIntegration, AgnoIntegration, AnthropicIntegration, ClaudeAgentSDKIntegration
+from braintrust.integrations import (
+    ADKIntegration,
+    AgentScopeIntegration,
+    AgnoIntegration,
+    AnthropicIntegration,
+    ClaudeAgentSDKIntegration,
+)
 
 
 __all__ = ["auto_instrument"]
@@ -34,6 +40,7 @@ def auto_instrument(
     pydantic_ai: bool = True,
     google_genai: bool = True,
     agno: bool = True,
+    agentscope: bool = True,
     claude_agent_sdk: bool = True,
     dspy: bool = True,
     adk: bool = True,
@@ -54,6 +61,7 @@ def auto_instrument(
         pydantic_ai: Enable Pydantic AI instrumentation (default: True)
         google_genai: Enable Google GenAI instrumentation (default: True)
         agno: Enable Agno instrumentation (default: True)
+        agentscope: Enable AgentScope instrumentation (default: True)
         claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
         dspy: Enable DSPy instrumentation (default: True)
         adk: Enable Google ADK instrumentation (default: True)
@@ -116,6 +124,8 @@ def auto_instrument(
         results["google_genai"] = _instrument_google_genai()
     if agno:
         results["agno"] = _instrument_integration(AgnoIntegration)
+    if agentscope:
+        results["agentscope"] = _instrument_integration(AgentScopeIntegration)
     if claude_agent_sdk:
         results["claude_agent_sdk"] = _instrument_integration(ClaudeAgentSDKIntegration)
     if dspy:

diff --git a/py/src/braintrust/conftest.py b/py/src/braintrust/conftest.py
@@ -191,6 +191,7 @@ def get_vcr_config():
         "decode_compressed_response": True,
         "filter_headers": [
             "authorization",
+            "Authorization",
             "openai-organization",
             "x-api-key",
             "api-key",

diff --git a/py/src/braintrust/integrations/__init__.py b/py/src/braintrust/integrations/__init__.py
@@ -1,7 +1,14 @@
 from .adk import ADKIntegration
+from .agentscope import AgentScopeIntegration
 from .agno import AgnoIntegration
 from .anthropic import AnthropicIntegration
 from .claude_agent_sdk import ClaudeAgentSDKIntegration
 
 
-__all__ = ["ADKIntegration", "AgnoIntegration", "AnthropicIntegration", "ClaudeAgentSDKIntegration"]
+__all__ = [
+    "ADKIntegration",
+    "AgentScopeIntegration",
+    "AgnoIntegration",
+    "AnthropicIntegration",
+    "ClaudeAgentSDKIntegration",
+]
diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py
@@ -41,6 +41,7 @@ def before_record_request(request):
         "cassette_library_dir": str(Path(__file__).parent / "cassettes"),
         "filter_headers": [
             "authorization",
+            "Authorization",
             "x-goog-api-key",
         ],
         "before_record_request": before_record_request,

diff --git a/py/src/braintrust/integrations/agentscope/__init__.py b/py/src/braintrust/integrations/agentscope/__init__.py
@@ -0,0 +1,59 @@
+"""Braintrust integration for AgentScope."""
+
+from typing import Any
+
+from braintrust.logger import NOOP_SPAN, current_span, init_logger
+
+from .integration import AgentScopeIntegration
+from .patchers import (
+    GeneralEvaluatorPatcher,
+    MetricCallPatcher,
+    RayEvaluatorRunPatcher,
+    TaskEvaluatePatcher,
+)
+
+
+__all__ = ["AgentScopeIntegration", "setup_agentscope", "wrap_evaluator"]
+
+
+def setup_agentscope(
+    api_key: str | None = None,
+    project_id: str | None = None,
+    project_name: str | None = None,
+    instrument_evals: bool = True,
+) -> bool:
+    """Setup Braintrust integration with AgentScope."""
+    if current_span() == NOOP_SPAN:
+        init_logger(project=project_name, api_key=api_key, project_id=project_id)
+
+    return AgentScopeIntegration.setup(instrument_evals=instrument_evals)
+
+
+def wrap_evaluator(Evaluator: Any) -> Any:
+    """Manually patch an AgentScope evaluator class for tracing.
+
+    This helper patches the evaluator class itself and, when available, also
+    enables task and metric tracing from the exported ``agentscope.evaluate``
+    module so ``GeneralEvaluator`` produces nested evaluation spans even when
+    global setup is not used.
+    """
+    class_name = getattr(Evaluator, "__name__", "")
+    if class_name == "RayEvaluator":
+        RayEvaluatorRunPatcher.wrap_target(Evaluator)
+    else:
+        GeneralEvaluatorPatcher.wrap_target(Evaluator)
+
+    try:
+        import agentscope.evaluate as agentscope_evaluate
+    except ImportError:
+        return Evaluator
+
+    task_cls = getattr(agentscope_evaluate, "Task", None)
+    if task_cls is not None:
+        TaskEvaluatePatcher.wrap_target(task_cls)
+
+    metric_cls = getattr(agentscope_evaluate, "MetricBase", None)
+    if metric_cls is not None:
+        MetricCallPatcher.wrap_target(metric_cls)
+
+    return Evaluator