Skip to content

Commit 493c64a

Browse files
committed
Add tests for adapters
1 parent fc15602 commit 493c64a

File tree

4 files changed

+1013
-10
lines changed

4 files changed

+1013
-10
lines changed
Lines changed: 374 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,374 @@
1+
import json
2+
import os
3+
from types import SimpleNamespace
4+
from typing import Any, Dict, List
5+
from unittest.mock import Mock
6+
7+
import pytest
8+
import requests
9+
10+
from eval_protocol.adapters.braintrust import BraintrustAdapter
11+
from eval_protocol.models import Message
12+
13+
14+
class MockResponse:
15+
"""Mock response object for requests.post"""
16+
17+
def __init__(self, json_data: Dict[str, Any], status_code: int = 200):
18+
self.json_data = json_data
19+
self.status_code = status_code
20+
21+
def json(self) -> Dict[str, Any]: # noqa: F811
22+
return self.json_data
23+
24+
def raise_for_status(self) -> None:
25+
if self.status_code >= 400:
26+
raise requests.HTTPError(f"HTTP {self.status_code}")
27+
28+
29+
@pytest.fixture
30+
def mock_requests_post(monkeypatch):
31+
"""Mock requests.post to return sample data"""
32+
33+
def fake_post(url: str, headers=None, json_data=None):
34+
# Return a simplified response for basic tests
35+
return MockResponse(
36+
{
37+
"data": [
38+
{
39+
"id": "trace1",
40+
"input": [{"role": "user", "content": "Hello"}],
41+
"output": [{"message": {"role": "assistant", "content": "Hi there!"}}],
42+
}
43+
]
44+
}
45+
)
46+
47+
monkeypatch.setattr("requests.post", fake_post)
48+
return fake_post
49+
50+
51+
def test_basic_btql_query_returns_evaluation_rows(mock_requests_post):
52+
"""Test basic BTQL query execution and conversion to evaluation rows"""
53+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
54+
55+
btql_query = "select: * from: project_logs('test_project') traces limit: 1"
56+
rows = adapter.get_evaluation_rows(btql_query)
57+
58+
assert len(rows) == 1
59+
assert len(rows[0].messages) == 2
60+
assert rows[0].messages[0].role == "user"
61+
assert rows[0].messages[0].content == "Hello"
62+
assert rows[0].messages[1].role == "assistant"
63+
assert rows[0].messages[1].content == "Hi there!"
64+
65+
66+
def test_trace_with_tool_calls_preserved(monkeypatch):
67+
"""Test that tool calls are properly preserved in converted messages"""
68+
69+
def mock_post(url: str, headers=None, json_data=None):
70+
return MockResponse(
71+
{
72+
"data": [
73+
{
74+
"id": "trace_with_tools",
75+
"input": [{"role": "user", "content": "Get reservation details for 7KJ2PL"}],
76+
"output": [
77+
{
78+
"message": {
79+
"role": "assistant",
80+
"content": None,
81+
"tool_calls": [
82+
{
83+
"id": "call_123",
84+
"type": "function",
85+
"function": {
86+
"name": "get_reservation_details",
87+
"arguments": '{"reservation_id": "7KJ2PL"}',
88+
},
89+
}
90+
],
91+
}
92+
}
93+
],
94+
}
95+
]
96+
}
97+
)
98+
99+
monkeypatch.setattr("requests.post", mock_post)
100+
101+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
102+
rows = adapter.get_evaluation_rows("test query")
103+
104+
assert len(rows) == 1
105+
msgs = rows[0].messages
106+
107+
# Find assistant message with tool calls
108+
assistant_msgs = [m for m in msgs if m.role == "assistant" and m.tool_calls]
109+
assert len(assistant_msgs) == 1
110+
111+
assert assistant_msgs[0].tool_calls is not None
112+
tool_call = assistant_msgs[0].tool_calls[0]
113+
assert tool_call.id == "call_123"
114+
assert tool_call.function.name == "get_reservation_details"
115+
assert '{"reservation_id": "7KJ2PL"}' in tool_call.function.arguments
116+
117+
118+
def test_trace_with_tool_response_messages(monkeypatch):
119+
"""Test that tool response messages are properly handled"""
120+
121+
def mock_post(url: str, headers=None, json_data=None):
122+
return MockResponse(
123+
{
124+
"data": [
125+
{
126+
"id": "trace_with_tool_response",
127+
"input": [
128+
{"role": "user", "content": "Check reservation"},
129+
{
130+
"role": "assistant",
131+
"content": None,
132+
"tool_calls": [
133+
{
134+
"id": "call_456",
135+
"type": "function",
136+
"function": {
137+
"name": "get_reservation_details",
138+
"arguments": '{"reservation_id": "ABC123"}',
139+
},
140+
}
141+
],
142+
},
143+
{
144+
"role": "tool",
145+
"tool_call_id": "call_456",
146+
"content": '{"reservation_id": "ABC123", "status": "confirmed"}',
147+
},
148+
],
149+
"output": [
150+
{"message": {"role": "assistant", "content": "Your reservation ABC123 is confirmed."}}
151+
],
152+
}
153+
]
154+
}
155+
)
156+
157+
monkeypatch.setattr("requests.post", mock_post)
158+
159+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
160+
rows = adapter.get_evaluation_rows("test query")
161+
162+
assert len(rows) == 1
163+
msgs = rows[0].messages
164+
165+
# Should have user, assistant with tool_calls, tool response, and final assistant
166+
roles = [m.role for m in msgs]
167+
assert "user" in roles
168+
assert "tool" in roles
169+
assert roles.count("assistant") == 2 # One with tool_calls, one final response
170+
171+
# Check tool message
172+
tool_msgs = [m for m in msgs if m.role == "tool"]
173+
assert len(tool_msgs) == 1
174+
assert tool_msgs[0].tool_call_id == "call_456"
175+
assert tool_msgs[0].content is not None
176+
assert "ABC123" in tool_msgs[0].content
177+
178+
179+
def test_tools_extracted_from_metadata_variants(monkeypatch):
180+
"""Test that tools are extracted from different metadata locations"""
181+
182+
def mock_post_with_tools_in_metadata(url: str, headers=None, json_data=None):
183+
return MockResponse(
184+
{
185+
"data": [
186+
{
187+
"id": "trace_with_metadata_tools",
188+
"input": [{"role": "user", "content": "Test"}],
189+
"output": [{"message": {"role": "assistant", "content": "Response"}}],
190+
"metadata": {
191+
"tools": [
192+
{
193+
"type": "function",
194+
"function": {"name": "get_weather", "description": "Get weather info"},
195+
}
196+
]
197+
},
198+
}
199+
]
200+
}
201+
)
202+
203+
monkeypatch.setattr("requests.post", mock_post_with_tools_in_metadata)
204+
205+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
206+
rows = adapter.get_evaluation_rows("test query")
207+
208+
assert len(rows) == 1
209+
assert rows[0].tools is not None
210+
assert len(rows[0].tools) == 1
211+
assert rows[0].tools[0]["function"]["name"] == "get_weather"
212+
213+
214+
def test_tools_extracted_from_hidden_params(monkeypatch):
215+
"""Test that tools are extracted from nested hidden_params location"""
216+
217+
def mock_post_with_hidden_tools(url: str, headers=None, json_data=None):
218+
return MockResponse(
219+
{
220+
"data": [
221+
{
222+
"id": "trace_with_hidden_tools",
223+
"input": [{"role": "user", "content": "Test"}],
224+
"output": [{"message": {"role": "assistant", "content": "Response"}}],
225+
"metadata": {
226+
"hidden_params": {
227+
"optional_params": {
228+
"tools": [
229+
{
230+
"type": "function",
231+
"function": {
232+
"name": "transfer_to_human_agents",
233+
"description": "Transfer to human",
234+
},
235+
}
236+
]
237+
}
238+
}
239+
},
240+
}
241+
]
242+
}
243+
)
244+
245+
monkeypatch.setattr("requests.post", mock_post_with_hidden_tools)
246+
247+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
248+
rows = adapter.get_evaluation_rows("test query")
249+
250+
assert len(rows) == 1
251+
assert rows[0].tools is not None
252+
assert len(rows[0].tools) == 1
253+
assert rows[0].tools[0]["function"]["name"] == "transfer_to_human_agents"
254+
255+
256+
def test_empty_btql_response_returns_empty_list(monkeypatch):
257+
"""Test that empty BTQL response returns empty list"""
258+
259+
def mock_empty_post(url: str, headers=None, json_data=None):
260+
return MockResponse({"data": []})
261+
262+
monkeypatch.setattr("requests.post", mock_empty_post)
263+
264+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
265+
rows = adapter.get_evaluation_rows("test query")
266+
267+
assert len(rows) == 0
268+
269+
270+
def test_trace_without_meaningful_conversation_skipped(monkeypatch):
271+
"""Test that traces without input or output are skipped"""
272+
273+
def mock_post_incomplete_trace(url: str, headers=None, json_data=None):
274+
return MockResponse(
275+
{
276+
"data": [
277+
{"id": "incomplete_trace", "input": None, "output": []},
278+
{
279+
"id": "valid_trace",
280+
"input": [{"role": "user", "content": "Hello"}],
281+
"output": [{"message": {"role": "assistant", "content": "Hi"}}],
282+
},
283+
]
284+
}
285+
)
286+
287+
monkeypatch.setattr("requests.post", mock_post_incomplete_trace)
288+
289+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
290+
rows = adapter.get_evaluation_rows("test query")
291+
292+
# Should only get the valid trace
293+
assert len(rows) == 1
294+
assert rows[0].input_metadata is not None
295+
assert rows[0].input_metadata.session_data is not None
296+
assert rows[0].input_metadata.session_data["braintrust_trace_id"] == "valid_trace"
297+
298+
299+
def test_custom_converter_used_when_provided(monkeypatch):
300+
"""Test that custom converter is used when provided"""
301+
302+
def mock_post(url: str, headers=None, json_data=None):
303+
return MockResponse(
304+
{
305+
"data": [
306+
{
307+
"id": "custom_trace",
308+
"input": [{"role": "user", "content": "Test"}],
309+
"output": [{"message": {"role": "assistant", "content": "Response"}}],
310+
}
311+
]
312+
}
313+
)
314+
315+
monkeypatch.setattr("requests.post", mock_post)
316+
317+
def custom_converter(trace: Dict[str, Any], include_tool_calls: bool):
318+
# Custom converter that adds a special message
319+
from eval_protocol.models import EvaluationRow, InputMetadata
320+
321+
return EvaluationRow(
322+
messages=[Message(role="system", content="Custom converted message")],
323+
input_metadata=InputMetadata(session_data={"custom": True}),
324+
)
325+
326+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
327+
rows = adapter.get_evaluation_rows("test query", converter=custom_converter)
328+
329+
assert len(rows) == 1
330+
assert rows[0].messages[0].role == "system"
331+
assert rows[0].messages[0].content == "Custom converted message"
332+
assert rows[0].input_metadata is not None
333+
assert rows[0].input_metadata.session_data is not None
334+
assert rows[0].input_metadata.session_data["custom"] is True
335+
336+
337+
def test_api_authentication_error_handling(monkeypatch):
338+
"""Test that API authentication errors are handled properly"""
339+
340+
def mock_auth_error(url: str, headers=None, json_data=None):
341+
return MockResponse({}, status_code=401)
342+
343+
monkeypatch.setattr("requests.post", mock_auth_error)
344+
345+
adapter = BraintrustAdapter(api_key="invalid_key", project_id="test_project")
346+
347+
with pytest.raises(requests.HTTPError):
348+
adapter.get_evaluation_rows("test query")
349+
350+
351+
def test_session_data_includes_trace_id(mock_requests_post):
352+
"""Test that session_data includes the Braintrust trace ID"""
353+
adapter = BraintrustAdapter(api_key="test_key", project_id="test_project")
354+
rows = adapter.get_evaluation_rows("test query")
355+
356+
assert len(rows) == 1
357+
assert rows[0].input_metadata is not None
358+
assert rows[0].input_metadata.session_data is not None
359+
assert rows[0].input_metadata.session_data["braintrust_trace_id"] == "trace1"
360+
361+
362+
def test_missing_required_env_vars(monkeypatch):
363+
"""Test that missing required environment variables raise errors"""
364+
# Mock environment variables to be None
365+
monkeypatch.setenv("BRAINTRUST_API_KEY", "")
366+
monkeypatch.setenv("BRAINTRUST_PROJECT_ID", "")
367+
368+
# Test missing API key
369+
with pytest.raises(ValueError, match="BRAINTRUST_API_KEY"):
370+
BraintrustAdapter(api_key=None, project_id="test_project")
371+
372+
# Test missing project ID
373+
with pytest.raises(ValueError, match="BRAINTRUST_PROJECT_ID"):
374+
BraintrustAdapter(api_key="test_key", project_id=None)

0 commit comments

Comments
 (0)