diff --git a/internal/golden/README.md b/internal/golden/README.md index d5fe1f73..bd7cf9b1 100644 --- a/internal/golden/README.md +++ b/internal/golden/README.md @@ -4,8 +4,6 @@ These test files validate the Braintrust SDK's integration with different AI pro ## Test Files -- `genai-py-v1/google_genai.py` - Tests for Google Generative AI integration - Each test suite validates: - Basic and multi-turn completions diff --git a/internal/golden/adk-py-v1/google_adk.py b/internal/golden/adk-py-v1/google_adk.py deleted file mode 100644 index a0b82811..00000000 --- a/internal/golden/adk-py-v1/google_adk.py +++ /dev/null @@ -1,680 +0,0 @@ -# pyright: reportUnknownMemberType=none -# pyright: reportUnknownVariableType=none -# pyright: reportUnknownParameterType=none -# pyright: reportUnknownArgumentType=none -import asyncio -from pathlib import Path - -import braintrust -from braintrust_adk import setup_adk -from google.adk import Agent -from google.adk.planners import BuiltInPlanner -from google.adk.runners import Runner -from google.adk.sessions import InMemorySessionService -from google.genai import types - - -setup_adk(project_name="golden-py-adk") - -FIXTURES_DIR = Path(__file__).parent.parent / "fixtures" - -# Session configuration -APP_NAME = "golden_test_app" -USER_ID = "test-user" - - -async def get_session_runner(agent: Agent, session_id: str) -> Runner: - """Helper to create a runner with session setup.""" - session_service = InMemorySessionService() - await session_service.create_session(app_name=APP_NAME, user_id=USER_ID, session_id=session_id) - return Runner(agent=agent, app_name=APP_NAME, session_service=session_service) - - -# Test 1: Basic completion -async def test_basic_completion(): - with braintrust.start_span(name="test_basic_completion"): - print("\n=== Test 1: Basic Completion ===") - agent = Agent( - name="basic_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=100, - ), - ) - - runner = await get_session_runner(agent, "session-basic") - - user_msg = types.Content(role="user", parts=[types.Part(text="What is the capital of France?")]) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-basic", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses: - print(responses[0].content.parts[0].text) - return responses - - -# Test 2: Multi-turn conversation -async def test_multi_turn(): - with braintrust.start_span(name="test_multi_turn"): - print("\n=== Test 2: Multi-turn Conversation ===") - agent = Agent( - name="conversation_agent", - model="gemini-2.5-flash", - instruction="You are a helpful assistant with good memory.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=200, - ), - ) - - runner = await get_session_runner(agent, "session-multi-turn") - - # First message - msg1 = types.Content(role="user", parts=[types.Part(text="Hi, my name is Alice.")]) - async for event in runner.run_async(user_id=USER_ID, session_id="session-multi-turn", new_message=msg1): - if event.is_final_response(): - print(f"Response 1: {event.content.parts[0].text}") - - # Second message - msg2 = types.Content(role="user", parts=[types.Part(text="What did I just tell you my name was?")]) - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-multi-turn", new_message=msg2): - if event.is_final_response(): - responses.append(event) - print(f"Response 2: {event.content.parts[0].text}") - - return responses - - -# Test 3: System prompt -async def test_system_prompt(): - with braintrust.start_span(name="test_system_prompt"): - print("\n=== Test 3: System Prompt ===") - agent = Agent( - name="pirate_agent", - model="gemini-2.0-flash-exp", - instruction="You are a pirate. Always respond in pirate speak.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=150, - ), - ) - - runner = await get_session_runner(agent, "session-pirate") - - user_msg = types.Content(role="user", parts=[types.Part(text="Tell me about the weather.")]) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-pirate", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 4: Streaming response -async def test_streaming(): - with braintrust.start_span(name="test_streaming"): - print("\n=== Test 4: Streaming ===") - agent = Agent( - name="counting_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=200, - ), - ) - - runner = await get_session_runner(agent, "session-streaming") - - user_msg = types.Content(role="user", parts=[types.Part(text="Count from 1 to 10 slowly.")]) - - full_text = "" - async for event in runner.run_async(user_id=USER_ID, session_id="session-streaming", new_message=user_msg): - if event.content and event.content.parts: - text = event.content.parts[0].text - if text: - print(text, end="") - full_text += text - - print("\n") - return full_text - - -# Test 5: Image input -async def test_image_input(): - with braintrust.start_span(name="test_image_input"): - print("\n=== Test 5: Image Input ===") - image_path = FIXTURES_DIR / "test-image.png" - - if not image_path.exists(): - print("Skipping: Image file not found") - return None - - agent = Agent( - name="vision_agent", - model="gemini-2.5-flash", - instruction="You are a helpful vision assistant that can analyze images.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=150, - ), - ) - - runner = await get_session_runner(agent, "session-vision") - - with open(image_path, "rb") as f: - image_data = f.read() - - user_msg = types.Content( - role="user", - parts=[ - types.Part.from_bytes(data=image_data, mime_type="image/png"), - types.Part(text="What color is this image?"), - ], - ) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-vision", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 6: Document input -async def test_document_input(): - with braintrust.start_span(name="test_document_input"): - print("\n=== Test 6: Document Input ===") - pdf_path = FIXTURES_DIR / "test-document.pdf" - - if not pdf_path.exists(): - print("Skipping: PDF file not found") - return None - - agent = Agent( - name="doc_agent", - model="gemini-2.0-flash-exp", - instruction="You are a document analysis assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=150, - ), - ) - - runner = await get_session_runner(agent, "session-document") - - with open(pdf_path, "rb") as f: - pdf_data = f.read() - - user_msg = types.Content( - role="user", - parts=[ - types.Part.from_bytes(data=pdf_data, mime_type="application/pdf"), - types.Part(text="What is in this document?"), - ], - ) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-document", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 7: Temperature variations -async def test_temperature_variations(): - with braintrust.start_span(name="test_temperature_variations"): - print("\n=== Test 7: Temperature Variations ===") - - configs = [ - {"temperature": 0.0, "top_p": 1.0}, - {"temperature": 1.0, "top_p": 0.9}, - {"temperature": 0.7, "top_p": 0.95}, - ] - - responses = [] - for i, config in enumerate(configs): - print(f"\nConfig: temp={config['temperature']}, top_p={config['top_p']}") - - # Create a unique agent and session for each iteration to avoid state leakage - agent = Agent( - name=f"agent_temp_{str(config['temperature']).replace('.', '_')}", - model="gemini-2.0-flash-exp", - instruction="You are a creative storyteller.", - generate_content_config=types.GenerateContentConfig( - temperature=config["temperature"], - top_p=config["top_p"], - max_output_tokens=50, - ), - ) - - # Use unique session ID with iteration counter to ensure complete isolation - session_id = f"session-temp-{config['temperature']}-{i}" - runner = await get_session_runner(agent, session_id) - - user_msg = types.Content(role="user", parts=[types.Part(text="Say something creative.")]) - - accumulated_text = "" - async for event in runner.run_async(user_id=USER_ID, session_id=session_id, new_message=user_msg): - # Collect content from any event that has it - if event.content and event.content.parts: - for part in event.content.parts: - if hasattr(part, "text") and part.text: - accumulated_text += part.text - - if event.is_final_response(): - responses.append(event) - - # Print accumulated text if available - if accumulated_text: - print(accumulated_text) - - return responses - - -# Test 8: Stop sequences -async def test_stop_sequences(): - with braintrust.start_span(name="test_stop_sequences"): - print("\n=== Test 8: Stop Sequences ===") - agent = Agent( - name="story_agent", - model="gemini-2.0-flash-exp", - instruction="You are a creative writer.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=500, - stop_sequences=["END", "\n\n"], - ), - ) - - runner = await get_session_runner(agent, "session-stop") - - user_msg = types.Content(role="user", parts=[types.Part(text="Write a short story about a robot.")]) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-stop", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 9: Metadata -async def test_metadata(): - with braintrust.start_span(name="test_metadata"): - print("\n=== Test 9: Metadata ===") - agent = Agent( - name="basic_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=100, - labels={ - "user_id": "test_user_123", - "environment": "testing", - "feature": "metadata_test", - }, - ), - ) - - runner = await get_session_runner(agent, "session-metadata") - - user_msg = types.Content(role="user", parts=[types.Part(text="Hello!")]) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-metadata", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 10: Long context -async def test_long_context(): - with braintrust.start_span(name="test_long_context"): - print("\n=== Test 10: Long Context ===") - agent = Agent( - name="analysis_agent", - model="gemini-2.0-flash-exp", - instruction="You are a text analysis assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=100, - ), - ) - - runner = await get_session_runner(agent, "session-long") - - long_text = "The quick brown fox jumps over the lazy dog. " * 100 - user_msg = types.Content( - role="user", - parts=[ - types.Part(text=f"Here is a long text:\n\n{long_text}\n\nHow many times does the word 'fox' appear?") - ], - ) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-long", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 11: Mixed content types -async def test_mixed_content(): - with braintrust.start_span(name="test_mixed_content"): - print("\n=== Test 11: Mixed Content Types ===") - image_path = FIXTURES_DIR / "test-image.png" - - if not image_path.exists(): - print("Skipping: Image file not found") - return None - - agent = Agent( - name="vision_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful vision assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=200, - ), - ) - - runner = await get_session_runner(agent, "session-mixed") - - with open(image_path, "rb") as f: - image_data = f.read() - - user_msg = types.Content( - role="user", - parts=[ - types.Part(text="First, look at this image:"), - types.Part.from_bytes(data=image_data, mime_type="image/png"), - types.Part(text="Now describe what you see and explain why it matters."), - ], - ) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-mixed", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 12: Empty assistant message (prefill) -async def test_prefill(): - with braintrust.start_span(name="test_prefill"): - print("\n=== Test 12: Prefill ===") - agent = Agent( - name="haiku_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=200, - ), - ) - - runner = await get_session_runner(agent, "session-prefill") - - # First send the user message - msg1 = types.Content(role="user", parts=[types.Part(text="Write a haiku about coding.")]) - async for event in runner.run_async(user_id=USER_ID, session_id="session-prefill", new_message=msg1): - if event.is_final_response(): - print(f"Response 1: {event.content.parts[0].text}") - - # Then send a prefill message - msg2 = types.Content(role="user", parts=[types.Part(text="Here is a haiku:")]) - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-prefill", new_message=msg2): - if event.is_final_response(): - responses.append(event) - print(f"Response 2: {event.content.parts[0].text}") - - return responses - - -# Test 13: Very short max_tokens -async def test_short_max_tokens(): - with braintrust.start_span(name="test_short_max_tokens"): - print("\n=== Test 13: Very Short Max Tokens ===") - agent = Agent( - name="brief_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=5, - ), - ) - - runner = await get_session_runner(agent, "session-brief") - - user_msg = types.Content(role="user", parts=[types.Part(text="What is AI?")]) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-brief", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - - if responses and responses[0].content and responses[0].content.parts: - print(responses[0].content.parts[0].text) - return responses - - -# Test 14: Tool use -async def test_tool_use(): - with braintrust.start_span(name="test_tool_use"): - print("\n=== Test 14: Tool Use ===") - - def get_weather(city_and_state: str, unit: str = "celsius"): - """Get the current weather for a location. - - Args: - city_and_state: The city and state, e.g. San Francisco, CA - unit: The unit of temperature (celsius or fahrenheit). Default to fahrenheit. - """ - return f"22 degrees {unit} and sunny in {city_and_state}" - - agent = Agent( - name="weather_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful weather assistant. Use the get_weather tool to answer questions.", - tools=[get_weather], - generate_content_config=types.GenerateContentConfig( - max_output_tokens=500, - ), - ) - - runner = await get_session_runner(agent, "session-weather") - - user_msg = types.Content(role="user", parts=[types.Part(text="What is the weather like in Paris, France?")]) - - responses = [] - async for event in runner.run_async(user_id=USER_ID, session_id="session-weather", new_message=user_msg): - if event.is_final_response(): - responses.append(event) - print("Response content:") - if event.content and event.content.parts: - for i, part in enumerate(event.content.parts): - if hasattr(part, "function_call") and part.function_call: - print(f"Tool use block {i}:") - print(f" Tool: {part.function_call.name}") - print(f" Input: {part.function_call.args}") - elif hasattr(part, "text") and part.text: - print(f"Text: {part.text}") - - return responses - - -# Test 15: Tool use with result (multi-turn) -async def test_tool_use_with_result(): - with braintrust.start_span(name="test_tool_use_with_result"): - print("\n=== Test 15: Tool Use With Result ===") - - def calculate(operation: str, a: float, b: float): - """Perform a mathematical calculation. - - Args: - operation: The mathematical operation (add, subtract, multiply, divide) - a: First number - b: Second number - """ - ops = { - "add": a + b, - "subtract": a - b, - "multiply": a * b, - "divide": a / b if b != 0 else "Error: Division by zero", - } - return ops.get(operation, "Invalid operation") - - agent = Agent( - name="math_agent", - model="gemini-2.0-flash-exp", - instruction="You are a helpful math assistant. Use the calculate tool to perform calculations.", - tools=[calculate], - generate_content_config=types.GenerateContentConfig( - max_output_tokens=500, - ), - ) - - runner = await get_session_runner(agent, "session-calculator") - - user_msg = types.Content(role="user", parts=[types.Part(text="What is 127 multiplied by 49?")]) - - print("First response:") - async for event in runner.run_async(user_id=USER_ID, session_id="session-calculator", new_message=user_msg): - if event.is_final_response(): - if event.content and event.content.parts: - for part in event.content.parts: - if hasattr(part, "function_call") and part.function_call: - print(f"Tool called: {part.function_call.name}") - print(f"Input: {part.function_call.args}") - - # Note: In a real scenario, the agent would automatically execute the tool and continue - # For this test, we're just demonstrating the tool call initiation - responses = [] - return responses - - -# Test 16: Reasoning tokens generation and follow-up -async def test_reasoning(): - with braintrust.start_span(name="test_reasoning"): - print("\n=== Test 16: Reasoning Tokens & Follow-up ===") - - # First request: Analyze pattern and derive formula - print("\n--- First request (generate reasoning) ---") - agent = Agent( - name="reasoning_agent", - model="gemini-2.5-flash", - instruction="You are a mathematical reasoning assistant.", - generate_content_config=types.GenerateContentConfig( - max_output_tokens=2048, - ), - planner=BuiltInPlanner(thinking_config=types.ThinkingConfig(include_thoughts=True, thinking_budget=1024)), - ) - - runner = await get_session_runner(agent, "session-reasoning") - - user_msg = types.Content( - role="user", - parts=[ - types.Part( - text="Look at this sequence: 2, 6, 12, 20, 30. What is the pattern and what would be the formula for the nth term?" - ) - ], - ) - - print("First response:") - async for event in runner.run_async(user_id=USER_ID, session_id="session-reasoning", new_message=user_msg): - if event.is_final_response(): - if event.content and event.content.parts: - print(event.content.parts[0].text) - - # Second request: Apply the discovered pattern to solve a new problem - print("\n--- Follow-up request (using reasoning context) ---") - follow_up_msg = types.Content( - role="user", - parts=[ - types.Part( - text="Using the pattern you discovered, what would be the 10th term? And can you find the sum of the first 10 terms?" - ) - ], - ) - - responses = [] - print("Follow-up response:") - async for event in runner.run_async( - user_id=USER_ID, session_id="session-reasoning", new_message=follow_up_msg - ): - if event.is_final_response(): - responses.append(event) - if event.content and event.content.parts: - print(event.content.parts[0].text) - - return responses - - -async def run_async_tests(): - """Run all asynchronous tests.""" - tests = [ - test_basic_completion, - test_multi_turn, - test_system_prompt, - test_streaming, - test_image_input, - test_document_input, - test_temperature_variations, - test_stop_sequences, - test_metadata, - test_long_context, - test_mixed_content, - test_prefill, - test_short_max_tokens, - test_tool_use, - test_tool_use_with_result, - test_reasoning, - ] - - for test in tests: - try: - await test() - # Rate limiting - await asyncio.sleep(3) - except Exception as e: - print(f"Test {test.__name__} failed: {e}") - import traceback - - traceback.print_exc() - - -async def main(): - """Run all tests.""" - print("=" * 60) - print("Google ADK Golden Tests with Braintrust") - print("=" * 60) - - # Run all async tests - print("\n### Running ADK Agent Tests ###") - await run_async_tests() - - print("\n" + "=" * 60) - print("All tests completed!") - print("=" * 60) - - -if __name__ == "__main__": - asyncio.run(main()) diff --git a/internal/golden/adk-py-v1/pyproject.toml b/internal/golden/adk-py-v1/pyproject.toml deleted file mode 100644 index 7df1d400..00000000 --- a/internal/golden/adk-py-v1/pyproject.toml +++ /dev/null @@ -1,15 +0,0 @@ -[project] -name = "golden" -version = "0.1.0" -description = "Add your description here" -readme = "README.md" -requires-python = ">=3.11" -dependencies = [ - "braintrust", - "braintrust-adk", - "google-adk>=1.14.1", -] - -[tool.uv.sources] -braintrust = { path = "../../py", editable = true } -braintrust-adk = { path = "../../integrations/adk-py", editable = true } diff --git a/py/src/braintrust/integrations/adk/cassettes/test_adk_multi_turn_history_is_logged.yaml b/py/src/braintrust/integrations/adk/cassettes/test_adk_multi_turn_history_is_logged.yaml new file mode 100644 index 00000000..ebd7197a --- /dev/null +++ b/py/src/braintrust/integrations/adk/cassettes/test_adk_multi_turn_history_is_logged.yaml @@ -0,0 +1,118 @@ +interactions: +- request: + body: '{"contents": [{"parts": [{"text": "Hi, my name is Alice."}], "role": "user"}], + "systemInstruction": {"parts": [{"text": "You are a concise assistant. When + the user says their name, acknowledge it briefly. When later asked to recall + it, answer with just the name.\n\nYou are an agent. Your internal name is \"conversation_agent\"."}], + "role": "user"}, "generationConfig": {}}' + headers: + Content-Type: + - application/json + user-agent: + - google-genai-sdk/1.66.0 gl-python/3.13.3 google-adk/1.14.1 gl-python/3.13.3 + x-goog-api-client: + - google-genai-sdk/1.66.0 gl-python/3.13.3 google-adk/1.14.1 gl-python/3.13.3 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"text\": \"Okay, Alice.\\n\"\n }\n ],\n + \ \"role\": \"model\"\n },\n \"finishReason\": \"STOP\",\n + \ \"avgLogprobs\": -0.025065460801124574\n }\n ],\n \"usageMetadata\": + {\n \"promptTokenCount\": 52,\n \"candidatesTokenCount\": 5,\n \"totalTokenCount\": + 57,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 52\n }\n ],\n \"candidatesTokensDetails\": + [\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 5\n }\n + \ ]\n },\n \"modelVersion\": \"gemini-2.0-flash\",\n \"responseId\": + \"FxTDaZQUjb3-4w-GqOO4Bg\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Tue, 24 Mar 2026 22:45:43 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=491 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +- request: + body: '{"contents": [{"parts": [{"text": "Hi, my name is Alice."}], "role": "user"}, + {"parts": [{"text": "Okay, Alice.\n"}], "role": "model"}, {"parts": [{"text": + "What name did I tell you?"}], "role": "user"}], "systemInstruction": {"parts": + [{"text": "You are a concise assistant. When the user says their name, acknowledge + it briefly. When later asked to recall it, answer with just the name.\n\nYou + are an agent. Your internal name is \"conversation_agent\"."}], "role": "user"}, + "generationConfig": {}}' + headers: + Content-Type: + - application/json + user-agent: + - google-genai-sdk/1.66.0 gl-python/3.13.3 google-adk/1.14.1 gl-python/3.13.3 + x-goog-api-client: + - google-genai-sdk/1.66.0 gl-python/3.13.3 google-adk/1.14.1 gl-python/3.13.3 + method: POST + uri: https://generativelanguage.googleapis.com/v1beta/models/gemini-2.0-flash:generateContent + response: + body: + string: "{\n \"candidates\": [\n {\n \"content\": {\n \"parts\": + [\n {\n \"text\": \"Alice.\\n\"\n }\n ],\n + \ \"role\": \"model\"\n },\n \"finishReason\": \"STOP\",\n + \ \"avgLogprobs\": -0.0751932164033254\n }\n ],\n \"usageMetadata\": + {\n \"promptTokenCount\": 64,\n \"candidatesTokenCount\": 3,\n \"totalTokenCount\": + 67,\n \"promptTokensDetails\": [\n {\n \"modality\": \"TEXT\",\n + \ \"tokenCount\": 64\n }\n ],\n \"candidatesTokensDetails\": + [\n {\n \"modality\": \"TEXT\",\n \"tokenCount\": 3\n }\n + \ ]\n },\n \"modelVersion\": \"gemini-2.0-flash\",\n \"responseId\": + \"FxTDae3LJ5O9_uMPwcTkwAQ\"\n}\n" + headers: + Alt-Svc: + - h3=":443"; ma=2592000,h3-29=":443"; ma=2592000 + Content-Encoding: + - gzip + Content-Type: + - application/json; charset=UTF-8 + Date: + - Tue, 24 Mar 2026 22:45:43 GMT + Server: + - scaffolding on HTTPServer2 + Server-Timing: + - gfet4t7; dur=438 + Transfer-Encoding: + - chunked + Vary: + - Origin + - X-Origin + - Referer + X-Content-Type-Options: + - nosniff + X-Frame-Options: + - SAMEORIGIN + X-Gemini-Service-Tier: + - standard + X-XSS-Protection: + - '0' + status: + code: 200 + message: OK +version: 1 diff --git a/py/src/braintrust/integrations/adk/test_adk.py b/py/src/braintrust/integrations/adk/test_adk.py index cff3eeaa..bed6f3e6 100644 --- a/py/src/braintrust/integrations/adk/test_adk.py +++ b/py/src/braintrust/integrations/adk/test_adk.py @@ -21,6 +21,7 @@ PROJECT_NAME = "test_adk" +FIXTURES_DIR = Path(__file__).parent.parent.parent.parent.parent.parent / "internal" / "golden" / "fixtures" setup_adk(project_name=PROJECT_NAME) @@ -53,6 +54,22 @@ def memory_logger(): yield bgl +async def _create_runner(agent: Agent, *, app_name: str, user_id: str, session_id: str) -> Runner: + session_service = InMemorySessionService() + await session_service.create_session(app_name=app_name, user_id=user_id, session_id=session_id) + return Runner(agent=agent, app_name=app_name, session_service=session_service) + + +def _extract_text_parts(contents): + texts = [] + for content in contents or []: + for part in content.get("parts", []): + text = part.get("text") + if text is not None: + texts.append(text) + return texts + + def test_adk_thread_context_propagation(memory_logger): """Runner.run should preserve Braintrust context across its thread bridge.""" import asyncio @@ -129,6 +146,192 @@ def target(): assert call_count == 1 +@pytest.mark.vcr +@pytest.mark.asyncio +async def test_adk_multi_turn_history_is_logged(memory_logger): + """Multi-turn session history should be visible in traced LLM requests.""" + assert not memory_logger.pop() + + app_name = "conversation_app" + user_id = "test-user" + session_id = "test-session-conversation" + agent = Agent( + name="conversation_agent", + model="gemini-2.0-flash", + instruction=( + "You are a concise assistant. " + "When the user says their name, acknowledge it briefly. " + "When later asked to recall it, answer with just the name." + ), + ) + runner = await _create_runner(agent, app_name=app_name, user_id=user_id, session_id=session_id) + + async def run_message(text: str) -> str: + responses = [] + user_msg = types.Content(role="user", parts=[types.Part(text=text)]) + async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=user_msg): + if event.is_final_response(): + responses.append(event) + assert responses + return responses[0].content.parts[0].text + + first_response_text = await run_message("Hi, my name is Alice.") + second_response_text = await run_message("What name did I tell you?") + + memory_logger.flush() + spans = memory_logger.pop() + + invocation_spans = [row for row in spans if row["span_attributes"]["name"] == f"invocation [{app_name}]"] + assert len(invocation_spans) == 2 + assert {span["metadata"]["session_id"] for span in invocation_spans} == {session_id} + assert {span["input"]["new_message"]["parts"][0]["text"] for span in invocation_spans} == { + "Hi, my name is Alice.", + "What name did I tell you?", + } + + llm_spans = [row for row in spans if row["span_attributes"]["type"] == "llm"] + assert len(llm_spans) == 2 + + follow_up_span = next( + span for span in llm_spans if "What name did I tell you?" in _extract_text_parts(span["input"]["contents"]) + ) + follow_up_texts = _extract_text_parts(follow_up_span["input"]["contents"]) + + assert "Hi, my name is Alice." in follow_up_texts + assert "What name did I tell you?" in follow_up_texts + assert first_response_text in follow_up_texts + assert "alice" in second_response_text.lower() + + +@pytest.mark.asyncio +async def test_adk_generation_config_is_logged(memory_logger): + """Sampling and stop-sequence config should be captured in the LLM span input.""" + from google.adk.models.base_llm import BaseLlm + from google.adk.models.llm_request import LlmRequest + from google.adk.models.llm_response import LlmResponse + from google.adk.models.registry import LLMRegistry + + assert not memory_logger.pop() + + class ConfigCaptureLlm(BaseLlm): + @classmethod + def supported_models(cls) -> list[str]: + return [r"test-llm-config-capture"] + + async def generate_content_async(self, llm_request: LlmRequest, stream: bool = False): + yield LlmResponse(content=types.Content(role="model", parts=[types.Part(text="configured")])) + + LLMRegistry.register(ConfigCaptureLlm) + + app_name = "config_app" + user_id = "test-user" + session_id = "test-session-config" + agent = LlmAgent( + name="config_agent", + model="test-llm-config-capture", + instruction="Reply with the word configured.", + generate_content_config=types.GenerateContentConfig( + max_output_tokens=23, + temperature=0.7, + top_p=0.9, + stop_sequences=["END", "\n\n"], + ), + ) + runner = await _create_runner(agent, app_name=app_name, user_id=user_id, session_id=session_id) + + user_msg = types.Content(role="user", parts=[types.Part(text="Please answer.")]) + responses = [] + async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=user_msg): + if event.is_final_response(): + responses.append(event) + + assert responses + + spans = memory_logger.pop() + llm_spans = [row for row in spans if row["span_attributes"]["type"] == "llm"] + assert llm_spans + + config = llm_spans[0]["input"]["config"] + assert config["max_output_tokens"] == 23 + assert config["temperature"] == 0.7 + assert config["top_p"] == 0.9 + assert config["stop_sequences"] == ["END", "\n\n"] + + +@pytest.mark.asyncio +async def test_adk_document_inline_data_attachment_conversion(memory_logger): + """Document bytes should be logged as attachment references, not raw payloads.""" + from google.adk.models.base_llm import BaseLlm + from google.adk.models.llm_request import LlmRequest + from google.adk.models.llm_response import LlmResponse + from google.adk.models.registry import LLMRegistry + + assert not memory_logger.pop() + + class DocumentCaptureLlm(BaseLlm): + @classmethod + def supported_models(cls) -> list[str]: + return [r"test-llm-document-capture"] + + async def generate_content_async(self, llm_request: LlmRequest, stream: bool = False): + yield LlmResponse(content=types.Content(role="model", parts=[types.Part(text="document received")])) + + LLMRegistry.register(DocumentCaptureLlm) + + app_name = "document_app" + user_id = "test-user" + session_id = "test-session-document" + agent = LlmAgent( + name="document_agent", + model="test-llm-document-capture", + instruction="Acknowledge the uploaded document.", + ) + runner = await _create_runner(agent, app_name=app_name, user_id=user_id, session_id=session_id) + + pdf_path = FIXTURES_DIR / "test-document.pdf" + with open(pdf_path, "rb") as f: + pdf_data = f.read() + + user_msg = types.Content( + role="user", + parts=[ + types.Part(inline_data=types.Blob(mime_type="application/pdf", data=pdf_data)), + types.Part(text="Summarize this document."), + ], + ) + + responses = [] + async for event in runner.run_async(user_id=user_id, session_id=session_id, new_message=user_msg): + if event.is_final_response(): + responses.append(event) + + assert responses + + spans = memory_logger.pop() + invocation_span = next(row for row in spans if row["span_attributes"]["name"] == f"invocation [{app_name}]") + new_message = invocation_span["input"]["new_message"] + assert len(new_message["parts"]) == 2 + + document_part = new_message["parts"][0] + assert "image_url" in document_part + attachment = document_part["image_url"]["url"] + assert isinstance(attachment, Attachment) + assert attachment.reference["content_type"] == "application/pdf" + assert attachment.reference["filename"] == "file.pdf" + + text_part = new_message["parts"][1] + assert text_part == {"text": "Summarize this document."} + + logged_payload = str(invocation_span).lower() + assert pdf_data[:8].hex() not in logged_payload + + llm_span = next(row for row in spans if row["span_attributes"]["type"] == "llm") + llm_contents = llm_span["input"]["contents"] + llm_document_part = llm_contents[0]["parts"][0] + assert isinstance(llm_document_part["image_url"]["url"], Attachment) + assert llm_document_part["image_url"]["url"].reference["content_type"] == "application/pdf" + + @pytest.mark.vcr @pytest.mark.asyncio async def test_adk_braintrust_integration(memory_logger):