Skip to content

Commit 07eae20

Browse files
authored
fix(anthropic): log stop reason in span output (#185)
Include Anthropic message model, stop_reason, and stop_sequence in the logged span output so spans preserve the model stop metadata returned by the API. Add a focused regression test for _log_message_to_span and extend an existing VCR-backed integration test to assert the new fields. Closes #175
1 parent 2c68b88 commit 07eae20

2 files changed

Lines changed: 49 additions & 2 deletions

File tree

py/src/braintrust/integrations/anthropic/test_anthropic.py

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,11 +5,13 @@
55
import time
66
import unittest.mock
77
from pathlib import Path
8+
from types import SimpleNamespace
89

910
import anthropic
1011
import pytest
1112
from braintrust import logger
1213
from braintrust.integrations.anthropic import AnthropicIntegration, wrap_anthropic
14+
from braintrust.integrations.anthropic.tracing import _log_message_to_span
1315
from braintrust.test_helpers import init_test_logger
1416

1517

@@ -37,6 +39,43 @@ def memory_logger():
3739
yield bgl
3840

3941

42+
def test_log_message_to_span_includes_stop_reason_and_stop_sequence():
43+
span = unittest.mock.MagicMock()
44+
message = SimpleNamespace(
45+
role="assistant",
46+
content=[{"type": "text", "text": "done"}],
47+
model=MODEL,
48+
stop_reason="stop_sequence",
49+
stop_sequence="DONE",
50+
usage={
51+
"input_tokens": 11,
52+
"output_tokens": 7,
53+
"cache_read_input_tokens": 0,
54+
"cache_creation_input_tokens": 0,
55+
},
56+
)
57+
58+
_log_message_to_span(message, span, time_to_first_token=0.123)
59+
60+
span.log.assert_called_once_with(
61+
output={
62+
"role": "assistant",
63+
"content": [{"type": "text", "text": "done"}],
64+
"model": MODEL,
65+
"stop_reason": "stop_sequence",
66+
"stop_sequence": "DONE",
67+
},
68+
metrics={
69+
"prompt_tokens": 11.0,
70+
"completion_tokens": 7.0,
71+
"prompt_cached_tokens": 0.0,
72+
"prompt_cache_creation_tokens": 0.0,
73+
"tokens": 18.0,
74+
"time_to_first_token": 0.123,
75+
},
76+
)
77+
78+
4079
@pytest.mark.vcr
4180
def test_anthropic_messages_create_stream_true(memory_logger):
4281
assert not memory_logger.pop()
@@ -351,6 +390,8 @@ def test_anthropic_messages_sync(memory_logger):
351390
metrics = log["metrics"]
352391
_assert_metrics_are_valid(metrics, start, end)
353392
assert log["metadata"]["model"] == MODEL
393+
assert log["output"]["model"] == msg.model
394+
assert log["output"]["stop_reason"] == msg.stop_reason
354395

355396

356397
def _assert_metrics_are_valid(metrics, start, end):

py/src/braintrust/integrations/anthropic/tracing.py

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -452,8 +452,14 @@ def _log_message_to_span(message, span, time_to_first_token: float | None = None
452452

453453
output = {
454454
k: v
455-
for k, v in {"role": getattr(message, "role", None), "content": getattr(message, "content", None)}.items()
456-
if v
455+
for k, v in {
456+
"role": getattr(message, "role", None),
457+
"content": getattr(message, "content", None),
458+
"model": getattr(message, "model", None),
459+
"stop_reason": getattr(message, "stop_reason", None),
460+
"stop_sequence": getattr(message, "stop_sequence", None),
461+
}.items()
462+
if v is not None
457463
} or None
458464

459465
span.log(output=output, metrics=metrics)

0 commit comments

Comments
 (0)