Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions py/noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ def _pinned_python_version():
# validate things work with or without them.
VENDOR_PACKAGES = (
"agno",
"agentscope",
"anthropic",
"dspy",
"openai",
Expand Down Expand Up @@ -89,6 +90,7 @@ def _pinned_python_version():
# Keep LATEST for newest API coverage, and pin 2.4.0 to cover the 2.4 -> 2.5 breaking change
# to internals we leverage for instrumentation.
AGNO_VERSIONS = (LATEST, "2.4.0", "2.1.0")
AGENTSCOPE_VERSIONS = (LATEST, "1.0.0")
# pydantic_ai 1.x requires Python >= 3.10
# Two test suites with different version requirements:
# 1. wrap_openai approach: works with older versions (0.1.9+)
Expand Down Expand Up @@ -172,6 +174,16 @@ def test_agno(session, version):
_run_core_tests(session)


@nox.session()
@nox.parametrize("version", AGENTSCOPE_VERSIONS, ids=AGENTSCOPE_VERSIONS)
def test_agentscope(session, version):
_install_test_deps(session)
_install(session, "agentscope", version)
_install(session, "openai")
_run_tests(session, f"{INTEGRATION_DIR}/agentscope/test_agentscope.py")
_run_core_tests(session)


@nox.session()
@nox.parametrize("version", ANTHROPIC_VERSIONS, ids=ANTHROPIC_VERSIONS)
def test_anthropic(session, version):
Expand Down
12 changes: 11 additions & 1 deletion py/src/braintrust/auto.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,13 @@
import logging
from contextlib import contextmanager

from braintrust.integrations import ADKIntegration, AgnoIntegration, AnthropicIntegration, ClaudeAgentSDKIntegration
from braintrust.integrations import (
ADKIntegration,
AgentScopeIntegration,
AgnoIntegration,
AnthropicIntegration,
ClaudeAgentSDKIntegration,
)


__all__ = ["auto_instrument"]
Expand All @@ -34,6 +40,7 @@ def auto_instrument(
pydantic_ai: bool = True,
google_genai: bool = True,
agno: bool = True,
agentscope: bool = True,
claude_agent_sdk: bool = True,
dspy: bool = True,
adk: bool = True,
Expand All @@ -54,6 +61,7 @@ def auto_instrument(
pydantic_ai: Enable Pydantic AI instrumentation (default: True)
google_genai: Enable Google GenAI instrumentation (default: True)
agno: Enable Agno instrumentation (default: True)
agentscope: Enable AgentScope instrumentation (default: True)
claude_agent_sdk: Enable Claude Agent SDK instrumentation (default: True)
dspy: Enable DSPy instrumentation (default: True)
adk: Enable Google ADK instrumentation (default: True)
Expand Down Expand Up @@ -116,6 +124,8 @@ def auto_instrument(
results["google_genai"] = _instrument_google_genai()
if agno:
results["agno"] = _instrument_integration(AgnoIntegration)
if agentscope:
results["agentscope"] = _instrument_integration(AgentScopeIntegration)
if claude_agent_sdk:
results["claude_agent_sdk"] = _instrument_integration(ClaudeAgentSDKIntegration)
if dspy:
Expand Down
1 change: 1 addition & 0 deletions py/src/braintrust/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,7 @@ def get_vcr_config():
"decode_compressed_response": True,
"filter_headers": [
"authorization",
"Authorization",
"openai-organization",
"x-api-key",
"api-key",
Expand Down
9 changes: 8 additions & 1 deletion py/src/braintrust/integrations/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,14 @@
from .adk import ADKIntegration
from .agentscope import AgentScopeIntegration
from .agno import AgnoIntegration
from .anthropic import AnthropicIntegration
from .claude_agent_sdk import ClaudeAgentSDKIntegration


__all__ = ["ADKIntegration", "AgnoIntegration", "AnthropicIntegration", "ClaudeAgentSDKIntegration"]
__all__ = [
"ADKIntegration",
"AgentScopeIntegration",
"AgnoIntegration",
"AnthropicIntegration",
"ClaudeAgentSDKIntegration",
]
1 change: 1 addition & 0 deletions py/src/braintrust/integrations/adk/test_adk.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ def before_record_request(request):
"cassette_library_dir": str(Path(__file__).parent / "cassettes"),
"filter_headers": [
"authorization",
"Authorization",
"x-goog-api-key",
],
"before_record_request": before_record_request,
Expand Down
59 changes: 59 additions & 0 deletions py/src/braintrust/integrations/agentscope/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
"""Braintrust integration for AgentScope."""

from typing import Any

from braintrust.logger import NOOP_SPAN, current_span, init_logger

from .integration import AgentScopeIntegration
from .patchers import (
GeneralEvaluatorPatcher,
MetricCallPatcher,
RayEvaluatorRunPatcher,
TaskEvaluatePatcher,
)


__all__ = ["AgentScopeIntegration", "setup_agentscope", "wrap_evaluator"]


def setup_agentscope(
api_key: str | None = None,
project_id: str | None = None,
project_name: str | None = None,
instrument_evals: bool = True,
) -> bool:
"""Setup Braintrust integration with AgentScope."""
if current_span() == NOOP_SPAN:
init_logger(project=project_name, api_key=api_key, project_id=project_id)

return AgentScopeIntegration.setup(instrument_evals=instrument_evals)


def wrap_evaluator(Evaluator: Any) -> Any:
"""Manually patch an AgentScope evaluator class for tracing.

This helper patches the evaluator class itself and, when available, also
enables task and metric tracing from the exported ``agentscope.evaluate``
module so ``GeneralEvaluator`` produces nested evaluation spans even when
global setup is not used.
"""
class_name = getattr(Evaluator, "__name__", "")
if class_name == "RayEvaluator":
RayEvaluatorRunPatcher.wrap_target(Evaluator)
else:
GeneralEvaluatorPatcher.wrap_target(Evaluator)

try:
import agentscope.evaluate as agentscope_evaluate
except ImportError:
return Evaluator

task_cls = getattr(agentscope_evaluate, "Task", None)
if task_cls is not None:
TaskEvaluatePatcher.wrap_target(task_cls)

metric_cls = getattr(agentscope_evaluate, "MetricBase", None)
if metric_cls is not None:
MetricCallPatcher.wrap_target(metric_cls)

return Evaluator
Loading
Loading