diff --git a/contributing/samples/interactions_api/agent.py b/contributing/samples/interactions_api/agent.py index 908a8539482..5561bfbd898 100644 --- a/contributing/samples/interactions_api/agent.py +++ b/contributing/samples/interactions_api/agent.py @@ -12,19 +12,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -"""Agent definition for testing the Interactions API integration. - -NOTE: The Interactions API does NOT support mixing custom function calling tools -with built-in tools in the same agent. To work around this limitation, we use -bypass_multi_tools_limit=True on GoogleSearchTool, which converts the built-in -google_search to a function calling tool (via GoogleSearchAgentTool). - -The bypass is only triggered when len(agent.tools) > 1, so we include multiple -tools in the agent (GoogleSearchTool + get_current_weather). - -With bypass_multi_tools_limit=True and multiple tools, all tools become function -calling tools, which allows mixing google_search with custom function tools. -""" +"""Agent definition for testing the Interactions API integration.""" from google.adk.agents.llm_agent import Agent from google.adk.models.google_llm import Gemini @@ -74,10 +62,7 @@ def get_current_weather(city: str) -> dict: } -# Main agent with google_search (via bypass) and custom function tools -# Using bypass_multi_tools_limit=True converts google_search to a function calling tool. -# We need len(tools) > 1 to trigger the bypass, so we include get_current_weather directly. -# This allows mixing google_search with custom function tools via the Interactions API. +# Main agent with google_search built-in tool and custom function tools # # NOTE: code_executor is not compatible with function calling mode because the model # tries to call a function (e.g., run_code) instead of outputting code in markdown. @@ -99,7 +84,7 @@ def get_current_weather(city: str) -> dict: Be concise and helpful in your responses. Always confirm what you did. """, tools=[ - GoogleSearchTool(bypass_multi_tools_limit=True), + GoogleSearchTool(), get_current_weather, ], ) diff --git a/contributing/samples/interactions_api/main.py b/contributing/samples/interactions_api/main.py index a776f31ea94..8b40c3c12ea 100644 --- a/contributing/samples/interactions_api/main.py +++ b/contributing/samples/interactions_api/main.py @@ -16,17 +16,11 @@ This script tests the following features: 1. Basic text generation -2. Google Search tool (via bypass_multi_tools_limit) +2. Google Search tool 3. Multi-turn conversations with stateful interactions 4. Google Search tool (additional coverage) 5. Custom function tool (get_current_weather) -NOTE: The Interactions API does NOT support mixing custom function calling tools -with built-in tools. To work around this, we use bypass_multi_tools_limit=True -on GoogleSearchTool, which converts it to a function calling tool (via -GoogleSearchAgentTool). The bypass only triggers when len(agent.tools) > 1, -so we include both GoogleSearchTool and get_current_weather in the agent. - NOTE: Code execution via UnsafeLocalCodeExecutor is not compatible with function calling mode because the model tries to call a function instead of outputting code in markdown. @@ -41,7 +35,6 @@ import logging from pathlib import Path import time -from typing import Optional from dotenv import load_dotenv from google.adk.agents.run_config import RunConfig @@ -49,6 +42,7 @@ from google.adk.runners import InMemoryRunner from google.adk.runners import Runner from google.genai import types +import httpx from .agent import root_agent @@ -67,7 +61,8 @@ async def call_agent_async( prompt: str, agent_name: str = "", show_interaction_id: bool = True, -) -> tuple[str, Optional[str]]: + additional_parts: list[types.Part] | None = None, +) -> tuple[str, str | None]: """Call the agent asynchronously with the user's prompt. Args: @@ -77,13 +72,16 @@ async def call_agent_async( prompt: The prompt to send agent_name: The expected agent name for filtering responses show_interaction_id: Whether to show interaction IDs in output + additional_parts: Optional list of additional content parts (e.g. files) Returns: A tuple of (response_text, interaction_id) """ - content = types.Content( - role="user", parts=[types.Part.from_text(text=prompt)] - ) + parts = [types.Part.from_text(text=prompt)] + if additional_parts: + parts.extend(additional_parts) + + content = types.Content(role="user", parts=parts) final_response_text = "" last_interaction_id = None @@ -264,6 +262,39 @@ async def test_custom_function_tool(runner: Runner, session_id: str): return interaction_id +async def test_pdf_summarization(runner: Runner, session_id: str) -> str | None: + """Test PDF summarization using the Interactions API.""" + print("\n" + "=" * 60) + print("TEST 6: PDF Summarization") + print("=" * 60) + + url = "https://storage.googleapis.com/cloud-samples-data/generative-ai/pdf/2403.05530.pdf" + print(f"Downloading {url}...") + async with httpx.AsyncClient() as client: + response = await client.get( + url, headers={"User-Agent": "Mozilla/5.0"}, follow_redirects=True + ) + response.raise_for_status() + pdf_bytes = response.content + + pdf_part = types.Part.from_bytes(data=pdf_bytes, mime_type="application/pdf") + response, interaction_id = await call_agent_async( + runner, + USER_ID, + session_id, + "Please summarize the attached PDF document.", + additional_parts=[pdf_part], + ) + + assert response, "Expected a non-empty response" + assert len(response) > 0, f"Expected summary in response: {response}" + assert ( + "gemini" in response.lower() or "multimodal" in response.lower() + ), f"Expected summary of PDF in response: {response}" + print("PASSED: PDF Summarization works") + return interaction_id + + def check_interactions_api_available() -> bool: """Check if the interactions API is available in the SDK.""" try: @@ -311,6 +342,7 @@ async def run_all_tests(): await test_multi_turn_conversation(runner, session.id) await test_google_search_tool(runner, session.id) await test_custom_function_tool(runner, session.id) + await test_pdf_summarization(runner, session.id) print("\n" + "=" * 60) print("ALL TESTS PASSED (Interactions API)") diff --git a/pyproject.toml b/pyproject.toml index 0e7c449b9f8..8a7048d10a7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ dependencies = [ "google-cloud-spanner>=3.56,<4", # For Spanner database "google-cloud-speech>=2.30,<3", # For Audio Transcription "google-cloud-storage>=2.18,<4", # For GCS Artifact service - "google-genai>=1.72,<2", # Google GenAI SDK + "google-genai>=2.9,<3", # Google GenAI SDK "graphviz>=0.20.2,<1", # Graphviz for graph rendering "httpx>=0.27,<1", # HTTP client library "jsonschema>=4.23,<5", # Agent Builder config validation diff --git a/src/google/adk/models/interactions_utils.py b/src/google/adk/models/interactions_utils.py index 89ffe6be71b..044081bb759 100644 --- a/src/google/adk/models/interactions_utils.py +++ b/src/google/adk/models/interactions_utils.py @@ -35,21 +35,51 @@ import logging from typing import Any from typing import AsyncGenerator -from typing import Optional from typing import TYPE_CHECKING from google.genai import types +from google.genai.interactions import AudioContentParam +from google.genai.interactions import CodeExecutionCallStep +from google.genai.interactions import CodeExecutionCallStepParam +from google.genai.interactions import CodeExecutionResultStep +from google.genai.interactions import CodeExecutionResultStepParam +from google.genai.interactions import ContentParam +from google.genai.interactions import DocumentContentParam +from google.genai.interactions import ErrorEvent +from google.genai.interactions import FunctionCallStep +from google.genai.interactions import FunctionCallStepParam +from google.genai.interactions import FunctionParam +from google.genai.interactions import FunctionResultStep +from google.genai.interactions import FunctionResultStepParam +from google.genai.interactions import GenerationConfigParam +from google.genai.interactions import GoogleSearchResultStep +from google.genai.interactions import ImageContentParam +from google.genai.interactions import Interaction +from google.genai.interactions import InteractionCompletedEvent +from google.genai.interactions import InteractionCreatedEvent +from google.genai.interactions import InteractionSSEEvent +from google.genai.interactions import InteractionStatusUpdate +from google.genai.interactions import ModelOutputStep +from google.genai.interactions import ModelOutputStepParam +from google.genai.interactions import Step +from google.genai.interactions import StepDelta +from google.genai.interactions import StepParam +from google.genai.interactions import StepStart +from google.genai.interactions import StepStop +from google.genai.interactions import TextContentParam +from google.genai.interactions import ThoughtStep +from google.genai.interactions import ThoughtStepParam +from google.genai.interactions import ToolParam +from google.genai.interactions import UserInputStepParam +from google.genai.interactions import VideoContentParam +from pydantic import BaseModel +from typing_extensions import deprecated if TYPE_CHECKING: from google.genai import Client - from google.genai._interactions.types.interaction import Output - from google.genai._interactions.types.tool_param import ToolParam - from google.genai._interactions.types.turn_param import TurnParam - from google.genai.interactions_types import Interaction - from google.genai.interactions_types import InteractionSSEEvent - from .llm_request import LlmRequest - from .llm_response import LlmResponse +from .llm_request import LlmRequest +from .llm_response import LlmResponse logger = logging.getLogger('google_adk.' + __name__) @@ -57,8 +87,8 @@ def _extract_stream_interaction_id( - event: 'InteractionSSEEvent', -) -> Optional[str]: + event: InteractionSSEEvent, +) -> str | None: """Extract the interaction ID from an Interactions SSE event. Different SSE lifecycle events expose the interaction ID on different @@ -67,26 +97,37 @@ def _extract_stream_interaction_id( google-genai builds may also yield a legacy ``interaction`` event with a top-level ``id``. """ - from google.genai._interactions.types.interaction_complete_event import InteractionCompleteEvent - from google.genai._interactions.types.interaction_start_event import InteractionStartEvent - from google.genai._interactions.types.interaction_status_update import InteractionStatusUpdate - if isinstance(event, InteractionStatusUpdate): return event.interaction_id - if isinstance(event, (InteractionStartEvent, InteractionCompleteEvent)): + if isinstance(event, (InteractionCreatedEvent, InteractionCompletedEvent)): return event.interaction.id - try: - if event.event_type == 'interaction': - return event.id - except AttributeError: - pass + if isinstance(event, Interaction): + return event.id return None -def convert_part_to_interaction_content(part: types.Part) -> Optional[dict]: +def _encode_base64_string(data: bytes) -> str: + """Encode bytes to a base64 string.""" + return base64.b64encode(data).decode('utf-8') + + +def _wrap_content_param_in_step( + content_param: ContentParam, role: str +) -> StepParam: + """Wraps a ContentParam into a UserInputStepParam or ModelOutputStepParam.""" + if role == 'model': + return ModelOutputStepParam(type='model_output', content=[content_param]) + return UserInputStepParam(type='user_input', content=[content_param]) + + +@deprecated( + 'convert_part_to_interaction_content is deprecated and will be removed in' + ' future versions' +) +def convert_part_to_interaction_content(part: types.Part) -> dict | None: """Convert a types.Part to an interaction content dict. Args: @@ -213,45 +254,180 @@ def convert_part_to_interaction_content(part: types.Part) -> Optional[dict]: return None -def convert_content_to_turn(content: types.Content) -> TurnParam: - """Convert a types.Content to a TurnParam dict for interactions API. +def _convert_part_to_interaction_content( + part: types.Part, + role: str = 'user', +) -> StepParam | None: + """Convert a types.Part to an interaction content dict. + + Args: + part: The Part object to convert. + role: The role to wrap the content in ('user' or 'model'). + + Returns: + A StepParam dict representing the interaction content, or None if + the part type is not supported. + """ + if part.text is not None: + return _wrap_content_param_in_step( + TextContentParam(type='text', text=part.text), role + ) + elif part.function_call is not None: + return FunctionCallStepParam( + type='function_call', + id=part.function_call.id or '', + name=part.function_call.name or '', + arguments=part.function_call.args or {}, + ) + elif part.function_response is not None: + + # genai.types.FunctionResponse specifies that + # an error response should be inside an error key + func_resp = part.function_response.response + is_error = False + if isinstance(func_resp, dict) and 'error' in func_resp: + is_error = True + + # Pass the function response through to the interactions API. + # Dict and list values are passed directly — the Interactions API handles + # JSON serialization internally. Pre-serializing with json.dumps() would + # cause double-escaping. + if not isinstance(func_resp, (dict, str, list)): + func_resp = str(func_resp) + logger.debug( + 'Converting function_response: name=%s, call_id=%s', + part.function_response.name, + part.function_response.id, + ) + return FunctionResultStepParam( + type='function_result', + name=part.function_response.name or '', + call_id=part.function_response.id or '', + result=func_resp, + is_error=is_error, + ) + elif part.inline_data is not None: + mime_type = part.inline_data.mime_type or '' + # The interactions API requires inline data to be a base64 encoded string + # when serialized to JSON, otherwise openapi_dumps will raise a TypeError. + data = part.inline_data.data + if isinstance(data, bytes): + data = _encode_base64_string(data) + + if mime_type.startswith('image/'): + return _wrap_content_param_in_step( + ImageContentParam(type='image', data=data, mime_type=mime_type), role + ) + elif mime_type.startswith('audio/'): + return _wrap_content_param_in_step( + AudioContentParam(type='audio', data=data, mime_type=mime_type), role + ) + elif mime_type.startswith('video/'): + return _wrap_content_param_in_step( + VideoContentParam(type='video', data=data, mime_type=mime_type), role + ) + else: + return _wrap_content_param_in_step( + DocumentContentParam(type='document', data=data, mime_type=mime_type), + role, + ) + elif part.file_data is not None: + mime_type = part.file_data.mime_type or '' + if mime_type.startswith('image/'): + return _wrap_content_param_in_step( + ImageContentParam( + type='image', uri=part.file_data.file_uri, mime_type=mime_type + ), + role, + ) + elif mime_type.startswith('audio/'): + return _wrap_content_param_in_step( + AudioContentParam( + type='audio', uri=part.file_data.file_uri, mime_type=mime_type + ), + role, + ) + elif mime_type.startswith('video/'): + return _wrap_content_param_in_step( + VideoContentParam( + type='video', uri=part.file_data.file_uri, mime_type=mime_type + ), + role, + ) + else: + return _wrap_content_param_in_step( + DocumentContentParam( + type='document', uri=part.file_data.file_uri, mime_type=mime_type + ), + role, + ) + elif part.thought: + # part.thought is a boolean indicating this is a thought part + # ThoughtContentParam expects 'signature' (base64 encoded bytes) + thought_result = ThoughtStepParam(type='thought') + if part.thought_signature is not None: + thought_result['signature'] = _encode_base64_string( + part.thought_signature + ) + return thought_result + elif part.code_execution_result is not None: + is_error = part.code_execution_result.outcome in ( + types.Outcome.OUTCOME_FAILED, + types.Outcome.OUTCOME_DEADLINE_EXCEEDED, + ) + return CodeExecutionResultStepParam( + type='code_execution_result', + call_id='', + result=part.code_execution_result.output or '', + is_error=is_error, + ) + elif part.executable_code is not None: + return CodeExecutionCallStepParam( + type='code_execution_call', + id='', + arguments={ + 'code': part.executable_code.code, + 'language': part.executable_code.language, + }, + ) + return None + + +def _convert_content_to_step(content: types.Content) -> list[StepParam]: + """Convert a types.Content to a list of StepParam dicts for interactions API. Args: content: The Content object to convert. Returns: - A TurnParam dictionary for the interactions API. + A list of StepParam dictionaries for the interactions API. """ - contents = [] + steps: list[StepParam] = [] + + role = content.role or 'user' if content.parts: for part in content.parts: - interaction_content = convert_part_to_interaction_content(part) + interaction_content = _convert_part_to_interaction_content(part, role) if interaction_content: - contents.append(interaction_content) + steps.append(interaction_content) - return { - 'role': content.role or 'user', - 'content': contents, - } + return steps -def convert_contents_to_turns( +def _convert_contents_to_steps( contents: list[types.Content], -) -> list[TurnParam]: +) -> list[StepParam]: """Convert a list of Content objects to interactions API input format. Args: contents: The list of Content objects to convert. Returns: - A list of TurnParam dictionaries for the interactions API. + A list of StepParam dictionaries for the interactions API. """ - turns = [] - for content in contents: - turn = convert_content_to_turn(content) - if turn['content']: # Only add turns with content - turns.append(turn) - return turns + return [ + step for content in contents for step in _convert_content_to_step(content) + ] def convert_tools_config_to_interactions_format( @@ -276,7 +452,7 @@ def convert_tools_config_to_interactions_format( # Handle function declarations if tool.function_declarations: for func_decl in tool.function_declarations: - func_tool: dict[str, Any] = { + func_tool: FunctionParam = { 'type': 'function', 'name': func_decl.name, } @@ -288,14 +464,14 @@ def convert_tools_config_to_interactions_format( props = {} for k, v in func_decl.parameters.properties.items(): props[k] = v.model_dump(exclude_none=True) - func_tool['parameters'] = { + + params_dict: dict[str, object] = { 'type': 'object', 'properties': props, } if func_decl.parameters.required: - func_tool['parameters']['required'] = list( - func_decl.parameters.required - ) + params_dict['required'] = list(func_decl.parameters.required) + func_tool['parameters'] = params_dict elif func_decl.parameters_json_schema: func_tool['parameters'] = func_decl.parameters_json_schema interaction_tools.append(func_tool) @@ -319,115 +495,127 @@ def convert_tools_config_to_interactions_format( return interaction_tools -def convert_interaction_output_to_part(output: Output) -> Optional[types.Part]: - """Convert an interaction output content to a types.Part. +def _function_result_to_response( + result: BaseModel | dict[str, Any] | list[Any] | str, +) -> dict[str, Any]: + """Convert a FunctionResultStep result into a FunctionResponse dict. + + The Interactions API types the result as a model, a list of content blocks, + or a plain string, but types.FunctionResponse.response requires a dict. A + dict is returned as-is; other non-dict shapes are wrapped under a 'result' + key. + """ + if isinstance(result, dict): + return result + if isinstance(result, BaseModel): + return result.model_dump() + if isinstance(result, list): + items: list[Any] = [] + for item in result: + if isinstance(item, BaseModel): + items.append(item.model_dump()) + else: + items.append(item) + return {'result': items} + return {'result': result} + + +def _convert_interaction_step_to_parts(step: Step) -> list[types.Part]: + """Convert an interaction output content to a list of types.Part. Args: output: The interaction output object to convert. Returns: - A types.Part object, or None if the output type is not supported. + A list of types.Part objects. """ - if not hasattr(output, 'type'): - return None - - output_type = output.type - - if output_type == 'text': - return types.Part.from_text(text=output.text or '') - elif output_type == 'function_call': + if isinstance(step, ModelOutputStep): + if not step.content: + return [] + + parts = [] + for content in step.content: + if content.type == 'text': + parts.append(types.Part.from_text(text=content.text)) + elif content.type in ['image', 'audio', 'document', 'video']: + if content.data: + parts.append( + types.Part( + inline_data=types.Blob( + data=content.data, + mime_type=content.mime_type, + ) + ) + ) + elif content.uri: + parts.append( + types.Part( + file_data=types.FileData( + file_uri=content.uri, + mime_type=content.mime_type, + ) + ) + ) + return parts + elif isinstance(step, FunctionCallStep): logger.debug( 'Converting function_call output: name=%s, id=%s', - output.name, - output.id, + step.name, + step.id, ) - thought_signature = None - thought_sig_value = getattr(output, 'thought_signature', None) - if thought_sig_value and isinstance(thought_sig_value, str): - # Decode base64 string back to bytes - thought_signature = base64.b64decode(thought_sig_value) - return types.Part( - function_call=types.FunctionCall( - id=output.id, - name=output.name, - args=output.arguments or {}, - ), - thought_signature=thought_signature, - ) - elif output_type == 'function_result': - result = output.result - # Handle different result formats - if isinstance(result, str): - result_value = result - elif hasattr(result, 'items'): - result_value = result.items - else: - result_value = result - return types.Part( - function_response=types.FunctionResponse( - id=output.call_id, - response=result_value, + return [ + types.Part( + function_call=types.FunctionCall( + id=step.id, + name=step.name, + args=step.arguments or {}, + ), ) - ) - elif output_type == 'image': - if output.data: - return types.Part( - inline_data=types.Blob( - data=output.data, - mime_type=output.mime_type, - ) - ) - elif output.uri: - return types.Part( - file_data=types.FileData( - file_uri=output.uri, - mime_type=output.mime_type, - ) - ) - elif output_type == 'audio': - if output.data: - return types.Part( - inline_data=types.Blob( - data=output.data, - mime_type=output.mime_type, - ) - ) - elif output.uri: - return types.Part( - file_data=types.FileData( - file_uri=output.uri, - mime_type=output.mime_type, - ) - ) - elif output_type == 'thought': + ] + elif isinstance(step, FunctionResultStep): + return [ + types.Part( + function_response=types.FunctionResponse( + id=step.call_id or '', + response=_function_result_to_response(step.result), + ) + ) + ] + elif isinstance(step, ThoughtStep): # ThoughtContent has a 'signature' attribute, not 'thought' # These are internal model reasoning and typically not exposed as Parts # Skip thought outputs for now - return None - elif output_type == 'code_execution_result': - return types.Part( - code_execution_result=types.CodeExecutionResult( - output=output.result or '', - outcome=types.Outcome.OUTCOME_FAILED - if output.is_error - else types.Outcome.OUTCOME_OK, + return [] + elif isinstance(step, CodeExecutionResultStep): + return [ + types.Part( + code_execution_result=types.CodeExecutionResult( + output=step.result or '', + outcome=types.Outcome.OUTCOME_FAILED + if step.is_error + else types.Outcome.OUTCOME_OK, + ) ) - ) - elif output_type == 'code_execution_call': - args = output.arguments or {} - return types.Part( - executable_code=types.ExecutableCode( - code=args.get('code', ''), - language=args.get('language', 'PYTHON'), + ] + elif isinstance(step, CodeExecutionCallStep): + args = step.arguments + return [ + types.Part( + executable_code=types.ExecutableCode( + code=args.code, + language=types.Language.PYTHON + if args.language and args.language.lower() == 'python' + else types.Language.LANGUAGE_UNSPECIFIED, + ) ) - ) - elif output_type == 'google_search_result': + ] + elif isinstance(step, GoogleSearchResultStep): # For google search results, we create a text part with the results - if output.result: - results_text = '\n'.join(str(r) for r in output.result if r) - return types.Part.from_text(text=results_text) + if step.result: + results_text = '\n'.join(str(r) for r in step.result if r) + return [types.Part.from_text(text=results_text)] - return None + return [] def convert_interaction_to_llm_response( @@ -443,13 +631,15 @@ def convert_interaction_to_llm_response( """ from .llm_response import LlmResponse - # Check for errors + # Check for errors. Lifecycle SSE events carry a partial interaction + # (InteractionSseEventInteraction) that has no 'error' attribute. if interaction.status == 'failed': error_msg = 'Unknown error' error_code = 'UNKNOWN_ERROR' - if interaction.error: - error_msg = interaction.error.message or error_msg - error_code = interaction.error.code or error_code + error = getattr(interaction, 'error', None) + if error: + error_msg = error.message or error_msg + error_code = error.code or error_code return LlmResponse( error_code=error_code, error_message=error_msg, @@ -458,11 +648,11 @@ def convert_interaction_to_llm_response( # Convert outputs to Content parts parts = [] - if interaction.outputs: - for output in interaction.outputs: - part = convert_interaction_output_to_part(output) - if part: - parts.append(part) + if interaction.steps: + for step in interaction.steps: + step_parts = _convert_interaction_step_to_parts(step) + if step_parts: + parts.extend(step_parts) content = None if parts: @@ -502,8 +692,8 @@ def convert_interaction_to_llm_response( def convert_interaction_event_to_llm_response( event: InteractionSSEEvent, aggregated_parts: list[types.Part], - interaction_id: Optional[str] = None, -) -> Optional[LlmResponse]: + interaction_id: str | None = None, +) -> LlmResponse | None: """Convert an InteractionSSEEvent to an LlmResponse for streaming. Args: @@ -514,19 +704,34 @@ def convert_interaction_event_to_llm_response( Returns: LlmResponse if this event produces one, None otherwise. """ - from .llm_response import LlmResponse - event_type = getattr(event, 'event_type', None) + if isinstance(event, StepStart): + + # Streaming function calls follow a sequence of events (https://ai.google.dev/gemini-api/docs/interactions-breaking-changes-may-2026#streaming): + # 1. StepStart: Delivers the function id and name. + # 2. StepDelta (multiple): Streams arguments as raw JSON strings via arguments. + # 3. StepStop: Signals the end of the step, where arguments are finalized and parsed. + if isinstance(event.step, FunctionCallStep): + fc = types.FunctionCall( + id=event.step.id, + name=event.step.name, + partial_args=[], + ) + part = types.Part(function_call=fc) + aggregated_parts.append(part) - if event_type == 'content.delta': - delta = event.delta - if delta is None: - return None + return LlmResponse( + content=types.Content(role='model', parts=[part]), + partial=True, + turn_complete=False, + interaction_id=interaction_id, + ) - delta_type = getattr(delta, 'type', None) + elif isinstance(event, StepDelta): + delta = event.delta - if delta_type == 'text': - text = delta.text or '' + if delta.type == 'text': + text = delta.text if text: part = types.Part.from_text(text=text) aggregated_parts.append(part) @@ -537,93 +742,121 @@ def convert_interaction_event_to_llm_response( interaction_id=interaction_id, ) - elif delta_type == 'function_call': - # Function calls are typically sent as complete units - # DON'T yield immediately - add to aggregated_parts only. - # The function_call will be yielded in the final response which has - # the correct interaction_id. If we yield here, interaction_id may be - # None because SSE streams the id later in the 'interaction' event. - if delta.name: - thought_signature = None - thought_sig_value = getattr(delta, 'thought_signature', None) - if thought_sig_value and isinstance(thought_sig_value, str): - # Decode base64 string back to bytes - thought_signature = base64.b64decode(thought_sig_value) - part = types.Part( - function_call=types.FunctionCall( - id=delta.id or '', - name=delta.name, - args=delta.arguments or {}, - ), - thought_signature=thought_signature, - ) - aggregated_parts.append(part) - # Return None - function_call will be in the final aggregated response - return None - - elif delta_type == 'image': - if delta.data or delta.uri: - if delta.data: + elif delta.type == 'image': + data = delta.data + uri = delta.uri + mime_type = delta.mime_type + if data or uri: + if data: part = types.Part( inline_data=types.Blob( - data=delta.data, - mime_type=delta.mime_type, + data=data, + mime_type=mime_type, ) ) else: part = types.Part( file_data=types.FileData( - file_uri=delta.uri, - mime_type=delta.mime_type, + file_uri=uri, + mime_type=mime_type, ) ) aggregated_parts.append(part) return LlmResponse( content=types.Content(role='model', parts=[part]), - partial=False, + partial=True, turn_complete=False, interaction_id=interaction_id, ) - elif event_type == 'content.stop': - # Content streaming finished, return aggregated content - if aggregated_parts: - return LlmResponse( - content=types.Content(role='model', parts=list(aggregated_parts)), - partial=False, - turn_complete=False, - interaction_id=interaction_id, - ) + elif delta.type == 'arguments_delta': + if aggregated_parts: + last_part = aggregated_parts[-1] + if last_part.function_call: + delta_args = delta.arguments + if ( + delta_args is not None + and last_part.function_call.partial_args is not None + ): + last_part.function_call.partial_args.append( + types.PartialArg(string_value=delta_args) + ) + + chunk_part = types.Part( + function_call=types.FunctionCall( + name=last_part.function_call.name, + partial_args=[types.PartialArg(string_value=delta_args)], + ) + ) + return LlmResponse( + content=types.Content(role='model', parts=[chunk_part]), + partial=True, + turn_complete=False, + interaction_id=interaction_id, + ) + + elif isinstance(event, StepStop): + if aggregated_parts and aggregated_parts[-1].function_call: + fc = aggregated_parts[-1].function_call + if fc.partial_args is not None: + arg_str = ''.join(pa.string_value or '' for pa in fc.partial_args) + + args = {} + if arg_str: + try: + args = json.loads(arg_str) + except json.JSONDecodeError as e: + logger.error( + 'Failed to parse function call args: %s. arg_str: %s', + e, + arg_str, + ) + fc.args = args + fc.partial_args = None + return LlmResponse( + error_code='JSON_PARSE_ERROR', + error_message='Failed to parse function call arguments', + turn_complete=True, + finish_reason=types.FinishReason.STOP, + interaction_id=interaction_id, + ) + + fc.args = args + fc.partial_args = None - elif event_type == 'interaction': - # Final interaction event with complete data - return convert_interaction_to_llm_response(event) + return None - elif event_type == 'interaction.status_update': - status = getattr(event, 'status', None) - if status in ('completed', 'requires_action'): + elif isinstance(event, InteractionCompletedEvent): + # Final aggregated response + if aggregated_parts: return LlmResponse( - content=types.Content(role='model', parts=list(aggregated_parts)) - if aggregated_parts - else None, + content=types.Content(role='model', parts=aggregated_parts), partial=False, turn_complete=True, finish_reason=types.FinishReason.STOP, interaction_id=interaction_id, ) - elif status == 'failed': - error = getattr(event, 'error', None) + # If no streaming parts were collected, convert the final interaction directly + return convert_interaction_to_llm_response(event.interaction) + + elif isinstance(event, Interaction): + # Fallback for legacy interaction events without lifecycle + return convert_interaction_to_llm_response(event) + + elif isinstance(event, InteractionStatusUpdate): + if event.status == 'failed': return LlmResponse( - error_code=error.code if error else 'UNKNOWN_ERROR', - error_message=error.message if error else 'Unknown error', + error_code='UNKNOWN_ERROR', + error_message='Unknown error', turn_complete=True, interaction_id=interaction_id, ) - elif event_type == 'error': + elif isinstance(event, ErrorEvent): + error = event.error return LlmResponse( - error_code=getattr(event, 'code', 'UNKNOWN_ERROR'), - error_message=getattr(event, 'message', 'Unknown error'), + error_code=error.code if error else 'UNKNOWN_ERROR', + error_message=error.message if error else 'Unknown error', turn_complete=True, interaction_id=interaction_id, ) @@ -633,7 +866,7 @@ def convert_interaction_event_to_llm_response( def build_generation_config( config: types.GenerateContentConfig, -) -> dict[str, Any]: +) -> GenerationConfigParam: """Build generation config dict for interactions API. Args: @@ -642,7 +875,7 @@ def build_generation_config( Returns: A dictionary containing generation configuration parameters. """ - generation_config: dict[str, Any] = {} + generation_config: GenerationConfigParam = {} if config.temperature is not None: generation_config['temperature'] = config.temperature if config.top_p is not None: @@ -662,7 +895,7 @@ def build_generation_config( def extract_system_instruction( config: types.GenerateContentConfig, -) -> Optional[str]: +) -> str | None: """Extract system instruction as a string from config. Args: @@ -679,9 +912,10 @@ def extract_system_instruction( elif isinstance(config.system_instruction, types.Content): # Extract text from Content texts = [] - for part in config.system_instruction.parts: - if part.text: - texts.append(part.text) + if config.system_instruction.parts: + for part in config.system_instruction.parts: + if part.text: + texts.append(part.text) return '\n'.join(texts) if texts else None return None @@ -707,18 +941,18 @@ def _build_tool_log(tool: ToolParam) -> str: def build_interactions_request_log( model: str, - input_turns: list[TurnParam], - system_instruction: Optional[str], - tools: Optional[list[ToolParam]], - generation_config: Optional[dict[str, Any]], - previous_interaction_id: Optional[str], + input_steps: list[StepParam], + system_instruction: str | None, + tools: list[ToolParam] | None, + generation_config: dict[str, object] | None, + previous_interaction_id: str | None, stream: bool, ) -> str: """Build a log string for an interactions API request. Args: model: The model name. - input_turns: The input turns to send. + input_steps: The input steps to send. system_instruction: The system instruction. tools: The tools configuration. generation_config: The generation config. @@ -728,11 +962,11 @@ def build_interactions_request_log( Returns: A formatted log string describing the request. """ - # Format input turns for logging - turns_logs = [] - for turn in input_turns: - role = turn.get('role', 'unknown') - contents = turn.get('content', []) + # Format input steps for logging + steps_logs = [] + for step in input_steps: + role = step.get('role', 'unknown') + contents = step.get('content', []) content_strs = [] for content in contents: content_type = content.get('type', 'unknown') @@ -755,7 +989,7 @@ def build_interactions_request_log( content_strs.append(f'function_result[{call_id}]: {result}') else: content_strs.append(f'{content_type}: ...') - turns_logs.append(f' [{role}]: {", ".join(content_strs)}') + steps_logs.append(f' [{role}]: {", ".join(content_strs)}') # Format tools for logging tools_logs = [] @@ -781,8 +1015,8 @@ def build_interactions_request_log( Generation Config: {config_str} ----------------------------------------------------------- -Input Turns: -{_NEW_LINE.join(turns_logs) if turns_logs else '(none)'} +Input Steps: +{_NEW_LINE.join(steps_logs) if steps_logs else '(none)'} ----------------------------------------------------------- Tools: {_NEW_LINE.join(tools_logs) if tools_logs else '(none)'} @@ -805,17 +1039,17 @@ def build_interactions_response_log(interaction: Interaction) -> str: # Extract outputs outputs_logs = [] - if hasattr(interaction, 'outputs') and interaction.outputs: - for output in interaction.outputs: - output_type = getattr(output, 'type', 'unknown') + if hasattr(interaction, 'steps') and interaction.steps: + for step in interaction.steps: + output_type = getattr(step, 'type', 'unknown') if output_type == 'text': - text = getattr(output, 'text', '') + text = getattr(step, 'text', '') if len(text) > 300: text = text[:300] + '...' outputs_logs.append(f' text: "{text}"') elif output_type == 'function_call': - name = getattr(output, 'name', '') - args = getattr(output, 'arguments', {}) + name = getattr(step, 'name', '') + args = getattr(step, 'arguments', {}) outputs_logs.append(f' function_call: {name}({json.dumps(args)})') else: outputs_logs.append(f' {output_type}: ...') @@ -868,7 +1102,7 @@ def build_interactions_event_log(event: InteractionSSEEvent) -> str: details = [] - if event_type == 'content.delta': + if event_type == 'step.delta': delta = getattr(event, 'delta', None) if delta: delta_type = getattr(delta, 'type', 'unknown') @@ -884,11 +1118,11 @@ def build_interactions_event_log(event: InteractionSSEEvent) -> str: else: details.append(f'{delta_type}: ...') - elif event_type == 'interaction.status_update': + elif event_type in ('interaction.completed', 'interaction.requires_action'): status = getattr(event, 'status', 'unknown') details.append(f'status: {status}') - elif event_type == 'error': + elif event_type == 'interaction.error': code = getattr(event, 'code', 'unknown') message = getattr(event, 'message', 'unknown') details.append(f'error: {code} - {message}') @@ -906,12 +1140,8 @@ def _get_latest_user_contents( For interactions API with previous_interaction_id, we only need to send the current turn's messages since prior history is maintained by - the interaction chain. - - Special handling for function_result: When the user content contains a - function_result (response to a model's function_call), we must also include - the preceding model content with the function_call. The Interactions API - needs both the function_call and function_result to properly match call_ids. + the interaction chain. The preceding model turn with the function_call + is already encapsulated in the previous_interaction_id state. Args: contents: The full list of content messages. @@ -923,41 +1153,16 @@ def _get_latest_user_contents( return [] # Find the latest continuous user messages from the end - latest_user_contents = [] - for content in reversed(contents): + latest_user_contents: list[types.Content] = [] + for i in range(len(contents) - 1, -1, -1): + content = contents[i] if content.role == 'user': - latest_user_contents.insert(0, content) + latest_user_contents.append(content) else: # Stop when we hit a non-user message break - # Check if the user contents contain a function_result - has_function_result = False - for content in latest_user_contents: - if content.parts: - for part in content.parts: - if part.function_response is not None: - has_function_result = True - break - if has_function_result: - break - - # If we have a function_result, we also need the preceding model content - # with the function_call so the API can match the call_id - if has_function_result and len(contents) > len(latest_user_contents): - # Get the index where user contents start - user_start_idx = len(contents) - len(latest_user_contents) - if user_start_idx > 0: - # Check if the content before user contents is a model turn with - # function_call - preceding_content = contents[user_start_idx - 1] - if preceding_content.role == 'model' and preceding_content.parts: - for part in preceding_content.parts: - if part.function_call is not None: - # Include the model's function_call turn before user's - # function_result - return [preceding_content] + latest_user_contents - + latest_user_contents.reverse() return latest_user_contents @@ -983,7 +1188,6 @@ async def generate_content_via_interactions( Yields: LlmResponse objects converted from interaction responses. """ - from .llm_response import LlmResponse # When previous_interaction_id is set, only send the latest continuous # user messages (the current turn) instead of full conversation history @@ -992,7 +1196,7 @@ async def generate_content_via_interactions( contents = _get_latest_user_contents(contents) # Convert contents to interactions API format - input_turns = convert_contents_to_turns(contents) + input_steps = _convert_contents_to_steps(contents) interaction_tools = convert_tools_config_to_interactions_format( llm_request.config ) @@ -1013,8 +1217,8 @@ async def generate_content_via_interactions( logger.debug( build_interactions_request_log( - model=llm_request.model, - input_turns=input_turns, + model=llm_request.model or '', + input_steps=input_steps, system_instruction=system_instruction, tools=interaction_tools if interaction_tools else None, generation_config=generation_config if generation_config else None, @@ -1024,13 +1228,13 @@ async def generate_content_via_interactions( ) # Track the current interaction ID from responses - current_interaction_id: Optional[str] = None + current_interaction_id: str | None = None if stream: # Streaming mode responses = await api_client.aio.interactions.create( model=llm_request.model, - input=input_turns, + input=input_steps, stream=True, system_instruction=system_instruction, tools=interaction_tools if interaction_tools else None, @@ -1052,21 +1256,11 @@ async def generate_content_via_interactions( if llm_response: yield llm_response - # Final aggregated response - if aggregated_parts: - yield LlmResponse( - content=types.Content(role='model', parts=aggregated_parts), - partial=False, - turn_complete=True, - finish_reason=types.FinishReason.STOP, - interaction_id=current_interaction_id, - ) - else: # Non-streaming mode interaction = await api_client.aio.interactions.create( model=llm_request.model, - input=input_turns, + input=input_steps, stream=False, system_instruction=system_instruction, tools=interaction_tools if interaction_tools else None, diff --git a/tests/unittests/integrations/crewai/test_crewai_tool.py b/tests/unittests/integrations/crewai/test_crewai_tool.py index f7f9bfe0bd6..eda884da600 100644 --- a/tests/unittests/integrations/crewai/test_crewai_tool.py +++ b/tests/unittests/integrations/crewai/test_crewai_tool.py @@ -16,9 +16,11 @@ import pytest -# Skip entire module if Python < 3.10 (must be before crewai_tool import) +# Skip the module when the optional crewai dependency is not installed. Guard on +# the third-party dep itself rather than the adk wrapper, so a real import bug in +# crewai_tool surfaces as a failure instead of being silently skipped. pytest.importorskip( - "google.adk.integrations.crewai.crewai_tool", reason="Requires Python 3.10+" + "crewai.tools", reason="Requires crewai (google-adk[extensions])" ) from google.adk.agents.context import Context diff --git a/tests/unittests/models/test_interactions_utils.py b/tests/unittests/models/test_interactions_utils.py index 118a925ab6e..65019b1c9eb 100644 --- a/tests/unittests/models/test_interactions_utils.py +++ b/tests/unittests/models/test_interactions_utils.py @@ -20,16 +20,27 @@ from datetime import datetime from datetime import timezone import json -from types import SimpleNamespace from unittest.mock import MagicMock from google.adk.models import interactions_utils from google.adk.models.llm_request import LlmRequest +from google.genai import interactions from google.genai import types -from google.genai._interactions.types.interaction import Interaction -from google.genai._interactions.types.interaction_complete_event import InteractionCompleteEvent -from google.genai._interactions.types.interaction_start_event import InteractionStartEvent -from google.genai._interactions.types.interaction_status_update import InteractionStatusUpdate +from google.genai.interactions import CodeExecutionResultStep +from google.genai.interactions import FunctionCallStep +from google.genai.interactions import FunctionResultStep +from google.genai.interactions import ImageContent +from google.genai.interactions import Interaction +from google.genai.interactions import InteractionCompletedEvent +from google.genai.interactions import InteractionCreatedEvent +from google.genai.interactions import InteractionSseEventInteraction +from google.genai.interactions import ModelOutputStep +from google.genai.interactions import StepDelta +from google.genai.interactions import StepStart +from google.genai.interactions import StepStop +from google.genai.interactions import TextContent +from google.genai.interactions import ThoughtStep +from google.genai.interactions import Usage import pytest @@ -73,21 +84,6 @@ def __init__(self, events: list[object]): self.aio = _FakeAio(events) -def _build_function_call_delta_event( - *, function_id: str, name: str, arguments: dict[str, object] -) -> SimpleNamespace: - """Build a version-agnostic content.delta event for a function call.""" - return SimpleNamespace( - event_type='content.delta', - delta=SimpleNamespace( - type='function_call', - id=function_id, - name=name, - arguments=arguments, - ), - ) - - def _build_llm_request() -> LlmRequest: """Build a minimal request for interactions streaming tests.""" return LlmRequest( @@ -102,69 +98,75 @@ def _build_llm_request() -> LlmRequest: ) -def _build_lifecycle_streamed_events() -> list[object]: +@pytest.fixture +def fc_step() -> FunctionCallStep: + """Fixture providing a basic FunctionCallStep.""" + return FunctionCallStep( + type='function_call', + id='call_1', + name='get_weather', + arguments={'city': 'Tokyo'}, + ) + + +def _build_lifecycle_streamed_events(fc_step: FunctionCallStep) -> list[object]: """Build streamed events with lifecycle updates carrying the ID.""" - now = datetime.now(timezone.utc) + now = datetime.now(timezone.utc).isoformat() + + interaction = InteractionSseEventInteraction( + id='interaction_123', + created=now, + updated=now, + status='requires_action', + steps=[fc_step], + ) + return [ - InteractionStartEvent( - event_type='interaction.start', - interaction=Interaction( - id='interaction_123', - created=now, - updated=now, - status='in_progress', - ), + InteractionCreatedEvent( + event_type='interaction.created', + interaction=interaction, ), - _build_function_call_delta_event( - function_id='call_1', - name='get_weather', - arguments={'city': 'Tokyo'}, - ), - InteractionStatusUpdate( - event_type='interaction.status_update', - interaction_id='interaction_123', - status='requires_action', + InteractionCompletedEvent( + event_type='interaction.completed', + interaction=interaction, ), ] -def _build_complete_streamed_events() -> list[object]: +def _build_complete_streamed_events(fc_step: FunctionCallStep) -> list[object]: """Build streamed events with the ID on an interaction.complete event.""" - now = datetime.now(timezone.utc) + now = datetime.now(timezone.utc).isoformat() + + interaction = InteractionSseEventInteraction( + id='interaction_complete_123', + created=now, + updated=now, + status='requires_action', + steps=[fc_step], + ) + return [ - _build_function_call_delta_event( - function_id='call_1', - name='get_weather', - arguments={'city': 'Tokyo'}, - ), - InteractionCompleteEvent( - event_type='interaction.complete', - interaction=Interaction( - id='interaction_complete_123', - created=now, - updated=now, - status='requires_action', - ), + InteractionCompletedEvent( + event_type='interaction.completed', + interaction=interaction, ), ] -def _build_legacy_streamed_events() -> list[object]: +def _build_legacy_streamed_events(fc_step: FunctionCallStep) -> list[object]: """Build streamed events with the ID on the legacy interaction event.""" + now = datetime.now(timezone.utc).isoformat() + + interaction = Interaction( + id='interaction_legacy_123', + created=now, + updated=now, + status='requires_action', + steps=[fc_step], + ) + return [ - _build_function_call_delta_event( - function_id='call_1', - name='get_weather', - arguments={'city': 'Tokyo'}, - ), - SimpleNamespace( - event_type='interaction', - id='interaction_legacy_123', - status='requires_action', - error=None, - outputs=None, - usage=None, - ), + interaction, ] @@ -194,13 +196,27 @@ async def _collect_function_call_interaction_ids( class TestConvertPartToInteractionContent: - """Tests for convert_part_to_interaction_content.""" + """Tests for _convert_part_to_interaction_content.""" def test_text_part(self): """Test converting a text Part.""" part = types.Part(text='Hello, world!') - result = interactions_utils.convert_part_to_interaction_content(part) - assert result == {'type': 'text', 'text': 'Hello, world!'} + result = interactions_utils._convert_part_to_interaction_content(part) + assert result == { + 'type': 'user_input', + 'content': [{'type': 'text', 'text': 'Hello, world!'}], + } + + def test_text_part_model_role(self): + """Test converting a text Part for model role.""" + part = types.Part(text='Hello, user!') + result = interactions_utils._convert_part_to_interaction_content( + part, role='model' + ) + assert result == { + 'type': 'model_output', + 'content': [{'type': 'text', 'text': 'Hello, user!'}], + } def test_function_call_part(self): """Test converting a function call Part.""" @@ -211,7 +227,7 @@ def test_function_call_part(self): args={'city': 'London'}, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { 'type': 'function_call', 'id': 'call_123', @@ -227,12 +243,12 @@ def test_function_call_part_no_id(self): args={'city': 'London'}, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result['id'] == '' assert result['name'] == 'get_weather' - def test_function_call_part_with_thought_signature(self): - """Test converting a function call Part with thought_signature.""" + def test_function_call_part_thought_signature_dropped(self): + """Thought signatures are not sent on interactions function call steps.""" part = types.Part( function_call=types.FunctionCall( id='call_456', @@ -241,17 +257,14 @@ def test_function_call_part_with_thought_signature(self): ), thought_signature=b'test_signature_bytes', ) - result = interactions_utils.convert_part_to_interaction_content(part) - assert result['type'] == 'function_call' - assert result['id'] == 'call_456' - assert result['name'] == 'my_tool' - assert result['arguments'] == {'doc': 'content'} - # thought_signature should be base64 encoded - assert 'thought_signature' in result - - assert ( - base64.b64decode(result['thought_signature']) == b'test_signature_bytes' - ) + result = interactions_utils._convert_part_to_interaction_content(part) + assert result == { + 'type': 'function_call', + 'id': 'call_456', + 'name': 'my_tool', + 'arguments': {'doc': 'content'}, + } + assert 'signature' not in result def test_function_call_part_without_thought_signature(self): """Test converting a function call Part without thought_signature.""" @@ -262,10 +275,10 @@ def test_function_call_part_without_thought_signature(self): args={}, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result['type'] == 'function_call' - # thought_signature should not be present - assert 'thought_signature' not in result + # signature should not be present + assert 'signature' not in result def test_function_response_dict(self): """Test converting a function response Part with dict response.""" @@ -276,13 +289,15 @@ def test_function_response_dict(self): response={'temperature': 20, 'condition': 'sunny'}, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result['type'] == 'function_result' assert result['call_id'] == 'call_123' assert result['name'] == 'get_weather' - # Dict should be passed through directly (not JSON-serialized). - assert result['result'] == {'temperature': 20, 'condition': 'sunny'} - assert isinstance(result['result'], dict) + # Dict should be passed through directly (not JSON-serialized) + assert result['result'] == { + 'temperature': 20, + 'condition': 'sunny', + } def test_function_response_simple(self): """Test converting a function response Part with simple response.""" @@ -293,13 +308,30 @@ def test_function_response_simple(self): response={'message': 'Weather is sunny'}, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result['type'] == 'function_result' assert result['call_id'] == 'call_123' assert result['name'] == 'check_weather' - # Dict should be passed through directly (not JSON-serialized). + # Dict should be JSON serialized assert result['result'] == {'message': 'Weather is sunny'} + def test_convert_part_to_interaction_content_function_response_error(self): + part = types.Part( + function_response=types.FunctionResponse( + name='my_function', + id='call_123', + response={'error': 'something went wrong'}, + ) + ) + result = interactions_utils._convert_part_to_interaction_content(part) + assert result == interactions.FunctionResultStepParam( + type='function_result', + name='my_function', + call_id='call_123', + result={'error': 'something went wrong'}, + is_error=True, + ) + def test_function_response_dict_not_double_serialized(self): """Regression test: avoid double-serializing bash tool outputs. @@ -320,7 +352,7 @@ def test_function_response_dict_not_double_serialized(self): response=bash_response, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) # The result value must be the dict itself, NOT a JSON string. assert isinstance(result['result'], dict) assert result['result'] == bash_response @@ -337,11 +369,16 @@ def test_inline_data_image(self): mime_type='image/png', ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { - 'type': 'image', - 'data': b'image_data', - 'mime_type': 'image/png', + 'type': 'user_input', + 'content': [{ + 'type': 'image', + 'data': ( + 'aW1hZ2VfZGF0YQ==' + ), # base64.b64encode(b'image_data').decode('utf-8') + 'mime_type': 'image/png', + }], } def test_inline_data_audio(self): @@ -352,11 +389,16 @@ def test_inline_data_audio(self): mime_type='audio/mp3', ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { - 'type': 'audio', - 'data': b'audio_data', - 'mime_type': 'audio/mp3', + 'type': 'user_input', + 'content': [{ + 'type': 'audio', + 'data': ( + 'YXVkaW9fZGF0YQ==' + ), # base64.b64encode(b'audio_data').decode('utf-8') + 'mime_type': 'audio/mp3', + }], } def test_inline_data_video(self): @@ -367,11 +409,16 @@ def test_inline_data_video(self): mime_type='video/mp4', ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { - 'type': 'video', - 'data': b'video_data', - 'mime_type': 'video/mp4', + 'type': 'user_input', + 'content': [{ + 'type': 'video', + 'data': ( + 'dmlkZW9fZGF0YQ==' + ), # base64.b64encode(b'video_data').decode('utf-8') + 'mime_type': 'video/mp4', + }], } def test_inline_data_document(self): @@ -382,11 +429,16 @@ def test_inline_data_document(self): mime_type='application/pdf', ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { - 'type': 'document', - 'data': b'doc_data', - 'mime_type': 'application/pdf', + 'type': 'user_input', + 'content': [{ + 'type': 'document', + 'data': ( + 'ZG9jX2RhdGE=' + ), # base64.b64encode(b'doc_data').decode('utf-8') + 'mime_type': 'application/pdf', + }], } def test_file_data_image(self): @@ -397,11 +449,14 @@ def test_file_data_image(self): mime_type='image/png', ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { - 'type': 'image', - 'uri': 'gs://bucket/image.png', - 'mime_type': 'image/png', + 'type': 'user_input', + 'content': [{ + 'type': 'image', + 'uri': 'gs://bucket/image.png', + 'mime_type': 'image/png', + }], } def test_text_with_thought_flag(self): @@ -410,22 +465,25 @@ def test_text_with_thought_flag(self): # When text is present, the convert function returns text type (not thought) # because text check comes before thought check in the implementation part = types.Part(text='Let me think about this...', thought=True) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) # Text content is returned as-is (thought flag not represented in output) - assert result == {'type': 'text', 'text': 'Let me think about this...'} + assert result == { + 'type': 'user_input', + 'content': [{'type': 'text', 'text': 'Let me think about this...'}], + } def test_thought_only_part(self): """Test converting a thought-only Part with signature.""" signature_bytes = b'test-thought-signature' part = types.Part(thought=True, thought_signature=signature_bytes) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) expected_signature = base64.b64encode(signature_bytes).decode('utf-8') assert result == {'type': 'thought', 'signature': expected_signature} def test_thought_only_part_without_signature(self): """Test converting a thought-only Part without signature.""" part = types.Part(thought=True) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == {'type': 'thought'} def test_code_execution_result(self): @@ -436,7 +494,7 @@ def test_code_execution_result(self): outcome=types.Outcome.OUTCOME_OK, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { 'type': 'code_execution_result', 'call_id': '', @@ -452,7 +510,7 @@ def test_code_execution_result_with_error(self): outcome=types.Outcome.OUTCOME_FAILED, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { 'type': 'code_execution_result', 'call_id': '', @@ -468,7 +526,7 @@ def test_code_execution_result_deadline_exceeded(self): outcome=types.Outcome.OUTCOME_DEADLINE_EXCEEDED, ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { 'type': 'code_execution_result', 'call_id': '', @@ -484,7 +542,7 @@ def test_executable_code(self): language='PYTHON', ) ) - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result == { 'type': 'code_execution_call', 'id': '', @@ -497,12 +555,12 @@ def test_executable_code(self): def test_empty_part(self): """Test converting an empty Part returns None.""" part = types.Part() - result = interactions_utils.convert_part_to_interaction_content(part) + result = interactions_utils._convert_part_to_interaction_content(part) assert result is None -class TestConvertContentToTurn: - """Tests for convert_content_to_turn.""" +class TestConvertContentToStep: + """Tests for _convert_content_to_step.""" def test_user_content(self): """Test converting user content.""" @@ -510,11 +568,11 @@ def test_user_content(self): role='user', parts=[types.Part(text='Hello!')], ) - result = interactions_utils.convert_content_to_turn(content) - assert result == { - 'role': 'user', + result = interactions_utils._convert_content_to_step(content) + assert result == [{ + 'type': 'user_input', 'content': [{'type': 'text', 'text': 'Hello!'}], - } + }] def test_model_content(self): """Test converting model content.""" @@ -522,11 +580,11 @@ def test_model_content(self): role='model', parts=[types.Part(text='Hi there!')], ) - result = interactions_utils.convert_content_to_turn(content) - assert result == { - 'role': 'model', + result = interactions_utils._convert_content_to_step(content) + assert result == [{ + 'type': 'model_output', 'content': [{'type': 'text', 'text': 'Hi there!'}], - } + }] def test_multiple_parts(self): """Test converting content with multiple parts.""" @@ -539,30 +597,60 @@ def test_multiple_parts(self): ), ], ) - result = interactions_utils.convert_content_to_turn(content) - assert result['role'] == 'user' - assert len(result['content']) == 2 - assert result['content'][0] == {'type': 'text', 'text': 'Look at this:'} - assert result['content'][1]['type'] == 'image' + result = interactions_utils._convert_content_to_step(content) + assert len(result) == 2 + assert result[0]['type'] == 'user_input' + assert result[0]['content'][0] == {'type': 'text', 'text': 'Look at this:'} + assert result[1]['type'] == 'user_input' + assert result[1]['content'][0]['type'] == 'image' + + def test_interleaved_parts(self): + """Test converting content with interleaved text and media parts.""" + content = types.Content( + role='user', + parts=[ + types.Part(text='First:'), + types.Part( + inline_data=types.Blob(data=b'img1', mime_type='image/png') + ), + types.Part(text='Second:'), + types.Part( + inline_data=types.Blob(data=b'img2', mime_type='image/jpeg') + ), + types.Part(text='End'), + ], + ) + result = interactions_utils._convert_content_to_step(content) + assert len(result) == 5 + assert result[0]['type'] == 'user_input' + assert result[0]['content'][0] == {'type': 'text', 'text': 'First:'} + assert result[1]['type'] == 'user_input' + assert result[1]['content'][0]['type'] == 'image' + assert result[2]['type'] == 'user_input' + assert result[2]['content'][0] == {'type': 'text', 'text': 'Second:'} + assert result[3]['type'] == 'user_input' + assert result[3]['content'][0]['type'] == 'image' + assert result[4]['type'] == 'user_input' + assert result[4]['content'][0] == {'type': 'text', 'text': 'End'} def test_default_role(self): """Test that default role is 'user' when not specified.""" content = types.Content(parts=[types.Part(text='Hi')]) - result = interactions_utils.convert_content_to_turn(content) - assert result['role'] == 'user' + result = interactions_utils._convert_content_to_step(content) + assert result[0]['type'] == 'user_input' -class TestConvertContentsToTurns: - """Tests for convert_contents_to_turns.""" +class TestConvertContentsToSteps: + """Tests for convert_contents_to_steps.""" def test_single_content(self): """Test converting a list with single content.""" contents = [ types.Content(role='user', parts=[types.Part(text='What is 2+2?')]), ] - result = interactions_utils.convert_contents_to_turns(contents) + result = interactions_utils._convert_contents_to_steps(contents) assert len(result) == 1 - assert result[0]['role'] == 'user' + assert result[0]['type'] == 'user_input' assert result[0]['content'][0]['text'] == 'What is 2+2?' def test_multi_turn_conversation(self): @@ -572,11 +660,11 @@ def test_multi_turn_conversation(self): types.Content(role='model', parts=[types.Part(text='Hello!')]), types.Content(role='user', parts=[types.Part(text='How are you?')]), ] - result = interactions_utils.convert_contents_to_turns(contents) + result = interactions_utils._convert_contents_to_steps(contents) assert len(result) == 3 - assert result[0]['role'] == 'user' - assert result[1]['role'] == 'model' - assert result[2]['role'] == 'user' + assert result[0]['type'] == 'user_input' + assert result[1]['type'] == 'model_output' + assert result[2]['type'] == 'user_input' def test_empty_content_skipped(self): """Test that empty contents are skipped.""" @@ -584,13 +672,13 @@ def test_empty_content_skipped(self): types.Content(role='user', parts=[types.Part(text='Hi')]), types.Content(role='model', parts=[]), # Empty parts ] - result = interactions_utils.convert_contents_to_turns(contents) + result = interactions_utils._convert_contents_to_steps(contents) # Only the first content should be included assert len(result) == 1 class TestConvertToolsConfig: - """Tests for convert_tools_config_to_interactions_format.""" + """Tests for _convert_tools_config_to_interactions_format.""" def test_function_declaration(self): """Test converting function declarations.""" @@ -651,133 +739,184 @@ def test_no_tools(self): assert result == [] -class TestConvertInteractionOutputToPart: - """Tests for convert_interaction_output_to_part.""" +class TestConvertInteractionOutputToParts: + """Tests for convert_interaction_output_to_parts.""" def test_text_output(self): """Test converting text output.""" - output = MagicMock() - output.type = 'text' - output.text = 'Hello!' - result = interactions_utils.convert_interaction_output_to_part(output) + output = ModelOutputStep( + type='model_output', content=[TextContent(type='text', text='Hello!')] + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.text == 'Hello!' def test_function_call_output(self): """Test converting function call output.""" - output = MagicMock() - output.type = 'function_call' - output.id = 'call_123' - output.name = 'get_weather' - output.arguments = {'city': 'London'} - result = interactions_utils.convert_interaction_output_to_part(output) + output = FunctionCallStep( + type='function_call', + id='call_123', + name='get_weather', + arguments={'city': 'London'}, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.function_call.id == 'call_123' assert result.function_call.name == 'get_weather' assert result.function_call.args == {'city': 'London'} - def test_function_call_output_with_thought_signature(self): - """Test converting function call output with thought_signature.""" - output = MagicMock( - spec=['type', 'id', 'name', 'arguments', 'thought_signature'] - ) - output.type = 'function_call' - output.id = 'call_sig_123' - output.name = 'gemini3_tool' - output.arguments = {'content': 'hello'} - # thought_signature is base64 encoded in the output - output.thought_signature = base64.b64encode(b'gemini3_signature').decode( - 'utf-8' - ) - result = interactions_utils.convert_interaction_output_to_part(output) - assert result.function_call.id == 'call_sig_123' - assert result.function_call.name == 'gemini3_tool' - assert result.function_call.args == {'content': 'hello'} - # thought_signature should be decoded back to bytes - assert result.thought_signature == b'gemini3_signature' - def test_function_call_output_without_thought_signature(self): """Test converting function call output without thought_signature.""" - output = MagicMock(spec=['type', 'id', 'name', 'arguments']) - output.type = 'function_call' - output.id = 'call_no_sig' - output.name = 'regular_tool' - output.arguments = {} - result = interactions_utils.convert_interaction_output_to_part(output) + output = FunctionCallStep( + type='function_call', + id='call_no_sig', + name='regular_tool', + arguments={}, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.function_call.id == 'call_no_sig' assert result.function_call.name == 'regular_tool' # thought_signature should be None assert result.thought_signature is None - def test_function_result_output_with_items_list(self): - """Test converting function result output with items list. - - The implementation handles the case where result has an 'items' attribute - that returns a list-like structure. This test validates that path. - """ - output = MagicMock() - output.type = 'function_result' - output.call_id = 'call_123' - # Create a mock that has .items returning a dict (for FunctionResponse) - output.result = MagicMock() - output.result.items = {'weather': 'Sunny'} # items attribute returns dict - result = interactions_utils.convert_interaction_output_to_part(output) + def test_function_result_output(self): + """Test converting function result output.""" + output = FunctionResultStep( + type='function_result', + call_id='call_123', + result={'weather': 'Sunny'}, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.function_response.id == 'call_123' assert result.function_response.response == {'weather': 'Sunny'} + def test_function_result_output_preserves_none_values(self): + """None values in a dict result must not be dropped.""" + output = FunctionResultStep( + type='function_result', + call_id='call_none', + result={'data': None, 'ok': True}, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None + assert result.function_response.response == {'data': None, 'ok': True} + + def test_function_result_output_string(self): + """A plain string result is wrapped under a 'result' key.""" + output = FunctionResultStep( + type='function_result', + call_id='call_str', + result='plain text', + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None + assert result.function_response.response == {'result': 'plain text'} + + def test_function_result_output_list(self): + """A list result of content blocks is wrapped under a 'result' key.""" + output = FunctionResultStep( + type='function_result', + call_id='call_list', + result=[{'type': 'text', 'text': 'hi'}], + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None + wrapped = result.function_response.response['result'] + assert wrapped[0]['type'] == 'text' + assert wrapped[0]['text'] == 'hi' + def test_image_output_with_data(self): """Test converting image output with inline data.""" - output = MagicMock() - output.type = 'image' - output.data = b'image_bytes' - output.uri = None - output.mime_type = 'image/png' - result = interactions_utils.convert_interaction_output_to_part(output) + output = ModelOutputStep( + type='model_output', + content=[ + ImageContent( + type='image', + data=base64.b64encode(b'image_bytes').decode('utf-8'), + mime_type='image/png', + ) + ], + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.inline_data.data == b'image_bytes' assert result.inline_data.mime_type == 'image/png' def test_image_output_with_uri(self): """Test converting image output with URI.""" - output = MagicMock() - output.type = 'image' - output.data = None - output.uri = 'gs://bucket/image.png' - output.mime_type = 'image/png' - result = interactions_utils.convert_interaction_output_to_part(output) + output = ModelOutputStep( + type='model_output', + content=[ + ImageContent( + type='image', + uri='gs://bucket/image.png', + mime_type='image/png', + ) + ], + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.file_data.file_uri == 'gs://bucket/image.png' assert result.file_data.mime_type == 'image/png' def test_code_execution_result_output(self): """Test converting code execution result output.""" - output = MagicMock() - output.type = 'code_execution_result' - output.result = 'Output from code' - output.is_error = False # Indicate successful execution - result = interactions_utils.convert_interaction_output_to_part(output) + output = CodeExecutionResultStep( + type='code_execution_result', + call_id='', + result='Output from code', + is_error=False, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.code_execution_result.output == 'Output from code' assert result.code_execution_result.outcome == types.Outcome.OUTCOME_OK def test_code_execution_result_error_output(self): """Test converting code execution result output with error.""" - output = MagicMock() - output.type = 'code_execution_result' - output.result = 'Error: division by zero' - output.is_error = True # Indicate failed execution - result = interactions_utils.convert_interaction_output_to_part(output) + output = CodeExecutionResultStep( + type='code_execution_result', + call_id='', + result='Error: division by zero', + is_error=True, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None assert result.code_execution_result.output == 'Error: division by zero' assert result.code_execution_result.outcome == types.Outcome.OUTCOME_FAILED - def test_thought_output_returns_none(self): - """Test that thought output returns None (not exposed as Part).""" - output = MagicMock() - output.type = 'thought' - output.signature = 'thinking...' - result = interactions_utils.convert_interaction_output_to_part(output) - assert result is None + def test_thought_output_returns_empty(self): + """Test that thought output returns empty list (not exposed as Part).""" + output = ThoughtStep(type='thought', signature='thinking...') + result = interactions_utils._convert_interaction_step_to_parts(output) + assert result == [] def test_no_type_attribute(self): """Test handling output without type attribute.""" output = MagicMock(spec=[]) # No 'type' attribute - result = interactions_utils.convert_interaction_output_to_part(output) - assert result is None + result = interactions_utils._convert_interaction_step_to_parts(output) + assert result == [] + + def test_code_execution_call_output_uppercase_python(self): + """Test converting code execution call output with uppercase PYTHON.""" + from google.genai.interactions import CodeExecutionCallStep + + mock_args = MagicMock() + mock_args.code = 'print("hello")' + mock_args.language = 'PYTHON' + + output = CodeExecutionCallStep.model_construct( + type='code_execution_call', + id='', + arguments=mock_args, + ) + result_list = interactions_utils._convert_interaction_step_to_parts(output) + result = result_list[0] if result_list else None + assert result is not None + assert result.executable_code.code == 'print("hello")' + assert result.executable_code.language == types.Language.PYTHON class TestConvertInteractionToLlmResponse: @@ -785,18 +924,19 @@ class TestConvertInteractionToLlmResponse: def test_successful_text_response(self): """Test converting a successful text response.""" - interaction = MagicMock() - interaction.id = 'interaction_123' - interaction.status = 'completed' - text_output = MagicMock() - text_output.type = 'text' - text_output.text = 'The answer is 4.' - interaction.outputs = [text_output] - interaction.usage = MagicMock() - interaction.usage.total_input_tokens = 10 - interaction.usage.total_output_tokens = 5 - interaction.error = None - + interaction = Interaction( + id='interaction_123', + status='completed', + created=datetime.now(timezone.utc).isoformat(), + updated=datetime.now(timezone.utc).isoformat(), + steps=[ + ModelOutputStep( + type='model_output', + content=[TextContent(type='text', text='The answer is 4.')], + ) + ], + usage=Usage(total_input_tokens=10, total_output_tokens=5), + ) result = interactions_utils.convert_interaction_to_llm_response(interaction) assert result.interaction_id == 'interaction_123' @@ -808,13 +948,14 @@ def test_successful_text_response(self): def test_failed_response(self): """Test converting a failed response.""" - interaction = MagicMock() - interaction.id = 'interaction_123' - interaction.status = 'failed' - interaction.outputs = [] - interaction.error = MagicMock() - interaction.error.code = 'INVALID_REQUEST' - interaction.error.message = 'Bad request' + interaction = Interaction( + id='interaction_123', + status='failed', + created=datetime.now(timezone.utc).isoformat(), + updated=datetime.now(timezone.utc).isoformat(), + steps=[], + ) + interaction.error = MagicMock(code='INVALID_REQUEST', message='Bad request') result = interactions_utils.convert_interaction_to_llm_response(interaction) @@ -824,18 +965,20 @@ def test_failed_response(self): def test_requires_action_response(self): """Test converting a requires_action response (function call).""" - interaction = MagicMock() - interaction.id = 'interaction_123' - interaction.status = 'requires_action' - fc_output = MagicMock() - fc_output.type = 'function_call' - fc_output.id = 'call_1' - fc_output.name = 'get_weather' - fc_output.arguments = {'city': 'Paris'} - interaction.outputs = [fc_output] - interaction.usage = None - interaction.error = None - + interaction = Interaction( + id='interaction_123', + status='requires_action', + created=datetime.now(timezone.utc).isoformat(), + updated=datetime.now(timezone.utc).isoformat(), + steps=[ + FunctionCallStep( + type='function_call', + id='call_1', + name='get_weather', + arguments={'city': 'Paris'}, + ) + ], + ) result = interactions_utils.convert_interaction_to_llm_response(interaction) assert result.interaction_id == 'interaction_123' @@ -1030,12 +1173,11 @@ class TestConvertInteractionEventToLlmResponse: def test_text_delta_event(self): """Test converting a text delta event.""" - event = MagicMock() - event.event_type = 'content.delta' - event.delta = MagicMock() - event.delta.type = 'text' - event.delta.text = 'Hello world' - + event = StepDelta( + event_type='step.delta', + index=0, + delta={'type': 'text', 'text': 'Hello world'}, + ) aggregated_parts = [] result = interactions_utils.convert_interaction_event_to_llm_response( event, aggregated_parts, interaction_id='int_123' @@ -1047,111 +1189,172 @@ def test_text_delta_event(self): assert result.interaction_id == 'int_123' assert len(aggregated_parts) == 1 - def test_function_call_delta_with_thought_signature(self): - """Test converting a function call delta with thought_signature.""" - event = MagicMock() - event.event_type = 'content.delta' - event.delta = MagicMock( - spec=['type', 'id', 'name', 'arguments', 'thought_signature'] - ) - event.delta.type = 'function_call' - event.delta.id = 'fc_delta_123' - event.delta.name = 'streaming_tool' - event.delta.arguments = {'param': 'value'} - # thought_signature is base64 encoded in the delta - event.delta.thought_signature = base64.b64encode(b'delta_signature').decode( - 'utf-8' + def test_image_delta_with_data(self): + """Test converting an image delta with inline data.""" + event = StepDelta( + event_type='step.delta', + index=0, + delta={ + 'type': 'image', + 'data': base64.b64encode(b'image_bytes').decode('utf-8'), + 'mime_type': 'image/png', + }, ) - aggregated_parts = [] result = interactions_utils.convert_interaction_event_to_llm_response( - event, aggregated_parts, interaction_id='int_456' + event, aggregated_parts, interaction_id='int_img' ) - # Function calls return None (added to aggregated_parts only) - assert result is None + assert result is not None + assert result.partial + assert result.content.parts[0].inline_data.data == b'image_bytes' assert len(aggregated_parts) == 1 - fc_part = aggregated_parts[0] - assert fc_part.function_call.id == 'fc_delta_123' - assert fc_part.function_call.name == 'streaming_tool' - assert fc_part.function_call.args == {'param': 'value'} - # thought_signature should be decoded back to bytes - assert fc_part.thought_signature == b'delta_signature' - - def test_function_call_delta_without_thought_signature(self): - """Test converting a function call delta without thought_signature.""" + + def test_unknown_event_type_returns_none(self): + """Test that unknown event types return None.""" event = MagicMock() - event.event_type = 'content.delta' - event.delta = MagicMock(spec=['type', 'id', 'name', 'arguments']) - event.delta.type = 'function_call' - event.delta.id = 'fc_no_sig' - event.delta.name = 'regular_tool' - event.delta.arguments = {} + event.event_type = 'some_unknown_event' # Unknown event type aggregated_parts = [] result = interactions_utils.convert_interaction_event_to_llm_response( - event, aggregated_parts, interaction_id='int_789' + event, aggregated_parts, interaction_id='int_other' ) - # Function calls return None assert result is None - assert len(aggregated_parts) == 1 - fc_part = aggregated_parts[0] - assert fc_part.function_call.name == 'regular_tool' - # thought_signature should be None - assert fc_part.thought_signature is None - - def test_function_call_delta_without_name_skipped(self): - """Test that function call delta without name is skipped.""" - event = MagicMock() - event.event_type = 'content.delta' - event.delta = MagicMock(spec=['type', 'id', 'name', 'arguments']) - event.delta.type = 'function_call' - event.delta.id = 'fc_no_name' - event.delta.name = None # No name - event.delta.arguments = {} + assert not aggregated_parts - aggregated_parts = [] + def test_completed_event_failed_partial_interaction(self): + """A failed lifecycle event with a partial interaction does not crash.""" + event = InteractionCompletedEvent( + event_type='interaction.completed', + interaction=InteractionSseEventInteraction( + id='int_failed', + status='failed', + steps=[], + ), + ) result = interactions_utils.convert_interaction_event_to_llm_response( - event, aggregated_parts, interaction_id='int_000' + event, aggregated_parts=[], interaction_id='int_failed' + ) + assert result is not None + assert result.error_code == 'UNKNOWN_ERROR' + assert result.interaction_id == 'int_failed' + + def test_function_call_streaming_flow(self): + """Test the complete streaming flow for function calls (Start, Delta, Stop).""" + # 1. StepStart + start_event = StepStart( + event_type='step.start', + index=0, + step=FunctionCallStep( + type='function_call', + id='call_1', + name='get_weather', + arguments={}, + ), + ) + aggregated_parts: list[types.Part] = [] + result1 = interactions_utils.convert_interaction_event_to_llm_response( + start_event, aggregated_parts, interaction_id='int_123' ) - # Should be skipped (no name) - assert result is None - assert not aggregated_parts + assert result1 is not None + assert result1.partial is True + assert len(aggregated_parts) == 1 + fc = aggregated_parts[-1].function_call + assert fc + assert fc.name == 'get_weather' + assert fc.id == 'call_1' + assert fc.partial_args == [] + + # 2. StepDelta + delta_event1 = StepDelta( + event_type='step.delta', + index=0, + delta={'type': 'arguments_delta', 'arguments': '{"city": '}, + ) + result2 = interactions_utils.convert_interaction_event_to_llm_response( + delta_event1, aggregated_parts, interaction_id='int_123' + ) - def test_image_delta_with_data(self): - """Test converting an image delta with inline data.""" - event = MagicMock() - event.event_type = 'content.delta' - event.delta = MagicMock() - event.delta.type = 'image' - event.delta.data = b'image_bytes' - event.delta.uri = None - event.delta.mime_type = 'image/png' + assert result2 is not None + assert result2.partial is True + assert ( + result2.content.parts[0].function_call.partial_args[0].string_value + == '{"city": ' + ) - aggregated_parts = [] - result = interactions_utils.convert_interaction_event_to_llm_response( - event, aggregated_parts, interaction_id='int_img' + delta_event2 = StepDelta( + event_type='step.delta', + index=0, + delta={'type': 'arguments_delta', 'arguments': '"Paris"}'}, + ) + result3 = interactions_utils.convert_interaction_event_to_llm_response( + delta_event2, aggregated_parts, interaction_id='int_123' ) - assert result is not None - assert not result.partial - assert result.content.parts[0].inline_data.data == b'image_bytes' - assert len(aggregated_parts) == 1 + assert result3 is not None + assert len(aggregated_parts[0].function_call.partial_args) == 2 - def test_unknown_event_type_returns_none(self): - """Test that unknown event types return None.""" - event = MagicMock() - event.event_type = 'some_unknown_event' # Unknown event type + # 3. StepStop + stop_event = StepStop( + event_type='step.stop', + index=0, + ) + result4 = interactions_utils.convert_interaction_event_to_llm_response( + stop_event, aggregated_parts, interaction_id='int_123' + ) + assert result4 is None + assert aggregated_parts[0].function_call.args == {'city': 'Paris'} + assert aggregated_parts[0].function_call.partial_args is None + + def test_function_call_streaming_json_parse_error(self, caplog): + """Test function call streaming returns an error response on JSON parse error.""" + # 1. StepStart + start_event = StepStart( + event_type='step.start', + index=0, + step=FunctionCallStep( + type='function_call', + id='call_err', + name='bad_json_tool', + arguments={}, + ), + ) aggregated_parts = [] + interactions_utils.convert_interaction_event_to_llm_response( + start_event, aggregated_parts, interaction_id='int_err' + ) + + # 2. StepDelta (invalid JSON) + delta_event = StepDelta( + event_type='step.delta', + index=0, + delta={'type': 'arguments_delta', 'arguments': '{"broken": "json'}, + ) + interactions_utils.convert_interaction_event_to_llm_response( + delta_event, aggregated_parts, interaction_id='int_err' + ) + + # 3. StepStop + stop_event = StepStop( + event_type='step.stop', + index=0, + ) result = interactions_utils.convert_interaction_event_to_llm_response( - event, aggregated_parts, interaction_id='int_other' + stop_event, aggregated_parts, interaction_id='int_err' ) - assert result is None - assert not aggregated_parts + # Assert an error LlmResponse is returned + assert result is not None + assert result.error_code == 'JSON_PARSE_ERROR' + assert result.error_message == 'Failed to parse function call arguments' + assert result.turn_complete is True + assert result.interaction_id == 'int_err' + + # The logging check can remain to ensure the raw exception is still logged. + assert 'Failed to parse function call args' in caplog.text @pytest.mark.parametrize( @@ -1159,7 +1362,7 @@ def test_unknown_event_type_returns_none(self): [ pytest.param( _build_lifecycle_streamed_events, - ['interaction_123', 'interaction_123'], + ['interaction_123'], id='lifecycle-events', ), pytest.param( @@ -1175,11 +1378,12 @@ def test_unknown_event_type_returns_none(self): ], ) def test_generate_content_via_interactions_stream_extracts_interaction_id( - streamed_events_factory: Callable[[], list[object]], + streamed_events_factory: Callable[[FunctionCallStep], list[object]], expected_ids: list[str], + fc_step: FunctionCallStep, ): """Streamed interaction IDs should be preserved across event variants.""" - streamed_events = streamed_events_factory() + streamed_events = streamed_events_factory(fc_step) assert ( asyncio.run(_collect_function_call_interaction_ids(streamed_events))