Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions py/src/braintrust/integrations/anthropic/test_anthropic.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,13 @@
import time
import unittest.mock
from pathlib import Path
from types import SimpleNamespace

import anthropic
import pytest
from braintrust import logger
from braintrust.integrations.anthropic import AnthropicIntegration, wrap_anthropic
from braintrust.integrations.anthropic.tracing import _log_message_to_span
from braintrust.test_helpers import init_test_logger


Expand Down Expand Up @@ -37,6 +39,43 @@ def memory_logger():
yield bgl


def test_log_message_to_span_includes_stop_reason_and_stop_sequence():
span = unittest.mock.MagicMock()
message = SimpleNamespace(
role="assistant",
content=[{"type": "text", "text": "done"}],
model=MODEL,
stop_reason="stop_sequence",
stop_sequence="DONE",
usage={
"input_tokens": 11,
"output_tokens": 7,
"cache_read_input_tokens": 0,
"cache_creation_input_tokens": 0,
},
)

_log_message_to_span(message, span, time_to_first_token=0.123)

span.log.assert_called_once_with(
output={
"role": "assistant",
"content": [{"type": "text", "text": "done"}],
"model": MODEL,
"stop_reason": "stop_sequence",
"stop_sequence": "DONE",
},
metrics={
"prompt_tokens": 11.0,
"completion_tokens": 7.0,
"prompt_cached_tokens": 0.0,
"prompt_cache_creation_tokens": 0.0,
"tokens": 18.0,
"time_to_first_token": 0.123,
},
)


@pytest.mark.vcr
def test_anthropic_messages_create_stream_true(memory_logger):
assert not memory_logger.pop()
Expand Down Expand Up @@ -351,6 +390,8 @@ def test_anthropic_messages_sync(memory_logger):
metrics = log["metrics"]
_assert_metrics_are_valid(metrics, start, end)
assert log["metadata"]["model"] == MODEL
assert log["output"]["model"] == msg.model
assert log["output"]["stop_reason"] == msg.stop_reason


def _assert_metrics_are_valid(metrics, start, end):
Expand Down
10 changes: 8 additions & 2 deletions py/src/braintrust/integrations/anthropic/tracing.py
Original file line number Diff line number Diff line change
Expand Up @@ -452,8 +452,14 @@ def _log_message_to_span(message, span, time_to_first_token: float | None = None

output = {
k: v
for k, v in {"role": getattr(message, "role", None), "content": getattr(message, "content", None)}.items()
if v
for k, v in {
"role": getattr(message, "role", None),
"content": getattr(message, "content", None),
"model": getattr(message, "model", None),
"stop_reason": getattr(message, "stop_reason", None),
"stop_sequence": getattr(message, "stop_sequence", None),
}.items()
if v is not None
} or None

span.log(output=output, metrics=metrics)
Expand Down
Loading