Skip to content

Commit e34c6a8

Browse files
authored
ref: Extract out common integration utils (#228)
1 parent 94cfbdd commit e34c6a8

16 files changed

Lines changed: 825 additions & 556 deletions

File tree

.agents/skills/sdk-integrations/SKILL.md

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -105,12 +105,14 @@ Do not start by wiring patchers and only later asking what the logged span shoul
105105

106106
Keep provider-local code inside `py/src/braintrust/integrations/<provider>/`.
107107

108+
If tracing or normalization logic is genuinely shared across multiple integrations, prefer adding it to `py/src/braintrust/integrations/utils.py` instead of copying it into each provider package. Avoid duplicating code between integrations unless there is a clear provider-specific reason the behavior must diverge.
109+
108110
Typical file ownership:
109111

110112
- `__init__.py`: export the integration class, `setup_<provider>()`, and public `wrap_*()` helpers
111113
- `integration.py`: define the `BaseIntegration` subclass and register patchers
112114
- `patchers.py`: define patchers and manual `wrap_*()` helpers
113-
- `tracing.py`: keep provider-specific tracing, stream handling, normalization, and metadata extraction
115+
- `tracing.py`: keep provider-specific tracing, stream handling, normalization, and metadata extraction; move cross-integration helpers to `py/src/braintrust/integrations/utils.py`
114116
- `test_*.py`: keep provider behavior tests next to the integration
115117
- `cassettes/`: keep VCR recordings next to the integration tests when the provider uses HTTP
116118

AGENTS.md

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -75,6 +75,12 @@ Root `Makefile` exists as a convenience wrapper. The authoritative SDK workflow
7575

7676
`py/noxfile.py` is the source of truth for compatibility coverage.
7777

78+
Testing preferences:
79+
80+
- Prefer VCR-backed integration tests with checked-in cassettes whenever practical.
81+
- Avoid mocks, fakes, and heavily synthetic tests unless there is no reasonable cassette-based alternative or the code under test is truly internal/purely local.
82+
- When fixing a bug or issue, default to a red/green workflow: first add or update a test that reproduces the problem and fails, then implement the fix, unless the user explicitly asks for a different approach.
83+
7884
Key facts:
7985

8086
- `test_core` runs without optional vendor packages.
@@ -87,6 +93,8 @@ When changing behavior, run the narrowest affected session first, then expand on
8793

8894
## VCR
8995

96+
VCR/cassette coverage is the default and preferred testing strategy for provider and integration behavior in this repo. Reach for cassette-backed tests before introducing mocks or fakes, and keep new coverage aligned with the existing VCR patterns unless there is a strong reason not to.
97+
9098
VCR cassette directories:
9199

92100
- `py/src/braintrust/cassettes/`
@@ -162,6 +170,7 @@ Avoid editing `py/src/braintrust/version.py` while also running build commands.
162170

163171
- Keep tests near the code they cover.
164172
- Reuse existing fixtures and cassette patterns.
173+
- Prefer extending an existing cassette-backed test over adding a new mock-heavy test.
165174
- If a change affects examples or integrations, update the nearest example or focused test.
166175
- For CLI/devserver changes, consider whether wheel-mode behavior also needs coverage.
167176
- Do **not** add `from __future__ import annotations` unless it is absolutely required (e.g., a genuine forward-reference that cannot be resolved any other way). This import changes annotation evaluation semantics at runtime and can silently break `get_type_hints()`, Pydantic models, and other runtime introspection. Prefer quoted string literals (`"MyClass"`) or `TYPE_CHECKING` guards for forward references instead.

py/noxfile.py

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
nox -h Get help.
1111
"""
1212

13+
import functools
1314
import glob
1415
import os
1516
import pathlib
@@ -476,14 +477,29 @@ def _get_braintrust_wheel():
476477
return wheels[0]
477478

478479

480+
@functools.cache
481+
def _integration_subdirs_to_ignore() -> list[str]:
482+
"""Return integration subdirectories that require dedicated sessions.
483+
484+
Top-level tests in ``src/braintrust/integrations/`` (e.g. shared utils and
485+
versioning tests) should still run in ``test_core``.
486+
"""
487+
integrations_root = pathlib.Path("src") / INTEGRATION_DIR
488+
return [
489+
f"{INTEGRATION_DIR}/{child.name}"
490+
for child in integrations_root.iterdir()
491+
if child.is_dir() and child.name != "__pycache__"
492+
]
493+
494+
479495
def _run_core_tests(session):
480496
"""Run all tests which don't require optional dependencies."""
481497
_run_tests(
482498
session,
483499
SRC_DIR,
484500
ignore_paths=[
485501
WRAPPER_DIR,
486-
INTEGRATION_DIR,
502+
*_integration_subdirs_to_ignore(),
487503
CONTRIB_DIR,
488504
DEVSERVER_DIR,
489505
],

py/src/braintrust/integrations/agentscope/tracing.py

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -6,14 +6,11 @@
66

77
from braintrust.logger import start_span
88
from braintrust.span_types import SpanTypeAttribute
9-
10-
11-
def _clean(mapping: dict[str, Any]) -> dict[str, Any]:
12-
return {key: value for key, value in mapping.items() if value is not None}
9+
from braintrust.util import clean_nones
1310

1411

1512
def _args_kwargs_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
16-
return _clean(
13+
return clean_nones(
1714
{
1815
"args": list(args) if args else None,
1916
"kwargs": kwargs if kwargs else None,
@@ -34,7 +31,7 @@ def _pipeline_metadata(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
3431
if agents:
3532
agent_names = [getattr(agent, "name", agent.__class__.__name__) for agent in agents]
3633

37-
return _clean({"agent_names": agent_names})
34+
return clean_nones({"agent_names": agent_names})
3835

3936

4037
def _extract_metrics(*candidates: Any) -> dict[str, float] | None:
@@ -69,7 +66,7 @@ def _model_provider_name(instance: Any) -> str:
6966

7067

7168
def _model_metadata(instance: Any) -> dict[str, Any]:
72-
return _clean(
69+
return clean_nones(
7370
{
7471
"model": getattr(instance, "model_name", None),
7572
"provider": _model_provider_name(instance),
@@ -95,7 +92,7 @@ def _model_call_input(args: Any, kwargs: dict[str, Any]) -> dict[str, Any]:
9592
if structured_model is None and len(args) > 3:
9693
structured_model = args[3]
9794

98-
return _clean(
95+
return clean_nones(
9996
{
10097
"messages": messages,
10198
"tools": tools,
@@ -125,7 +122,7 @@ def _model_call_output(result: Any) -> Any:
125122
else:
126123
return result
127124

128-
normalized = _clean(
125+
normalized = clean_nones(
129126
{
130127
"role": "assistant" if data.get("content") is not None else None,
131128
"content": data.get("content"),
@@ -178,7 +175,7 @@ async def _wrapper(wrapped: Any, instance: Any, args: Any, kwargs: dict[str, Any
178175

179176
_agent_call_wrapper = _make_task_wrapper(
180177
name_fn=lambda instance, _a, _k: f"{_agent_name(instance)}.reply",
181-
metadata_fn=lambda instance, _a, _k: _clean({"agent_class": instance.__class__.__name__}),
178+
metadata_fn=lambda instance, _a, _k: clean_nones({"agent_class": instance.__class__.__name__}),
182179
)
183180

184181
_sequential_pipeline_wrapper = _make_task_wrapper(
@@ -224,13 +221,13 @@ async def _toolkit_call_tool_function_wrapper(wrapped: Any, instance: Any, args:
224221
start_span(
225222
name=f"{tool_name}.execute",
226223
type=SpanTypeAttribute.TOOL,
227-
input=_clean(
224+
input=clean_nones(
228225
{
229226
"tool_name": tool_name,
230227
"tool_call": tool_call,
231228
}
232229
),
233-
metadata=_clean({"toolkit_class": instance.__class__.__name__}),
230+
metadata=clean_nones({"toolkit_class": instance.__class__.__name__}),
234231
)
235232
)
236233
try:

py/src/braintrust/integrations/agno/tracing.py

Lines changed: 1 addition & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22
from inspect import isawaitable
33
from typing import Any
44

5+
from braintrust.integrations.utils import _try_to_dict
56
from braintrust.logger import start_span
67
from braintrust.span_types import SpanTypeAttribute
78
from braintrust.util import is_numeric
@@ -24,28 +25,6 @@ def get_args_kwargs(args: list[str], kwargs: dict[str, Any], keys: list[str]):
2425
return {k: args[i] if args else kwargs.get(k) for i, k in enumerate(keys)}, omit(kwargs, keys)
2526

2627

27-
def _try_to_dict(obj: Any) -> Any:
28-
"""Convert object to dict, handling different object types like OpenAI wrapper."""
29-
if isinstance(obj, dict):
30-
return obj
31-
if hasattr(obj, "model_dump") and callable(obj.model_dump):
32-
try:
33-
return obj.model_dump()
34-
except Exception:
35-
pass
36-
if hasattr(obj, "dict") and callable(obj.dict):
37-
try:
38-
return obj.dict()
39-
except Exception:
40-
pass
41-
if hasattr(obj, "__dict__"):
42-
try:
43-
return obj.__dict__.copy()
44-
except Exception:
45-
pass
46-
return obj
47-
48-
4928
def is_sync_iterator(result: Any) -> bool:
5029
return hasattr(result, "__iter__") and hasattr(result, "__next__")
5130

py/src/braintrust/integrations/anthropic/_utils.py

Lines changed: 8 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
from typing import Any
44

5+
from braintrust.integrations.utils import _try_to_dict as _shared_try_to_dict
56
from braintrust.util import is_numeric
67

78

@@ -36,27 +37,14 @@ def __getattr__(self, name: str) -> Any:
3637

3738

3839
def _try_to_dict(obj: Any) -> dict[str, Any] | None:
39-
if isinstance(obj, dict):
40-
return obj
41-
42-
if hasattr(obj, "model_dump"):
43-
try:
44-
candidate = obj.model_dump(mode="python")
45-
except TypeError:
46-
candidate = obj.model_dump()
47-
return candidate if isinstance(candidate, dict) else None
48-
49-
if hasattr(obj, "to_dict"):
50-
candidate = obj.to_dict()
51-
return candidate if isinstance(candidate, dict) else None
52-
53-
if hasattr(obj, "dict"):
54-
candidate = obj.dict()
55-
return candidate if isinstance(candidate, dict) else None
56-
57-
if hasattr(obj, "__dict__"):
58-
return vars(obj)
40+
"""Anthropic-flavoured object→dict conversion.
5941
42+
Delegates to the shared ``_try_to_dict`` first, then returns ``None``
43+
(instead of the original object) when conversion fails.
44+
"""
45+
result = _shared_try_to_dict(obj)
46+
if isinstance(result, dict):
47+
return result
6048
return None
6149

6250

py/src/braintrust/integrations/anthropic/test_anthropic.py

Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,55 @@ def test_extract_anthropic_usage_includes_server_tool_use_metrics_from_objects()
166166
assert metadata == {}
167167

168168

169+
def test_extract_anthropic_usage_supports_to_dict_only_objects():
170+
class ToDictOnly:
171+
__slots__ = ("_payload",)
172+
173+
def __init__(self, payload):
174+
self._payload = payload
175+
176+
def to_dict(self):
177+
return self._payload
178+
179+
usage = ToDictOnly(
180+
{
181+
"input_tokens": 11,
182+
"output_tokens": 7,
183+
"cache_read_input_tokens": 3,
184+
"cache_creation": ToDictOnly(
185+
{
186+
"ephemeral_5m_input_tokens": 2,
187+
"ephemeral_1h_input_tokens": 5,
188+
}
189+
),
190+
"server_tool_use": ToDictOnly(
191+
{
192+
"web_search_requests": 2,
193+
"web_fetch_requests": 1,
194+
}
195+
),
196+
"service_tier": "standard",
197+
}
198+
)
199+
200+
metrics, metadata = extract_anthropic_usage(usage)
201+
202+
assert metrics == {
203+
"prompt_tokens": 21.0,
204+
"completion_tokens": 7.0,
205+
"prompt_cached_tokens": 3.0,
206+
"prompt_cache_creation_tokens": 7.0,
207+
"server_tool_use_web_search_requests": 2.0,
208+
"server_tool_use_web_fetch_requests": 1.0,
209+
"tokens": 28.0,
210+
}
211+
assert metadata == {
212+
"cache_creation_ephemeral_5m_input_tokens": 2,
213+
"cache_creation_ephemeral_1h_input_tokens": 5,
214+
"usage_service_tier": "standard",
215+
}
216+
217+
169218
@pytest.mark.vcr(match_on=["method", "scheme", "host", "port", "path"])
170219
def test_anthropic_messages_create_with_image_attachment_input(memory_logger):
171220
assert not memory_logger.pop()

0 commit comments

Comments
 (0)