Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 14 additions & 7 deletions eval_protocol/pytest/default_agent_rollout_processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,14 +74,9 @@ async def call_agent(self) -> str:

# Add all tool results to messages (they will be in the same order as tool_calls)
for tool_call, (tool_call_id, content) in zip(message.tool_calls, tool_results):
tool_message_content = self._format_tool_message_content(content)
self.append_message_and_log(
Message(
role="tool",
content=[
ChatCompletionContentPartTextParam(text=content.text, type="text") for content in content
],
tool_call_id=tool_call_id,
)
Message(role="tool", content=tool_message_content, tool_call_id=tool_call_id)
)
return await self.call_agent()
return message.content
Expand Down Expand Up @@ -114,6 +109,18 @@ def _get_content_from_tool_result(self, tool_result: CallToolResult) -> List[Tex
raise NotImplementedError("Non-text content is not supported yet")
return tool_result.content

def _format_tool_message_content(
self, content: List[TextContent]
) -> Union[str, List[ChatCompletionContentPartTextParam]]:
Copy link

Copilot AI Aug 14, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The return type annotation is missing the import for Union. Add 'from typing import Union' to the imports.

Copilot uses AI. Check for mistakes.
Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

lol

"""Format tool result content for inclusion in a tool message.

- If a single text item, return plain string per OpenAI semantics.
- If multiple items, return a list of text parts.
"""
if len(content) == 1 and isinstance(content[0], TextContent):
return content[0].text
return [ChatCompletionContentPartTextParam(text=c.text, type="text") for c in content]


async def default_agent_rollout_processor(
rows: List[EvaluationRow], config: RolloutProcessorConfig
Expand Down
40 changes: 40 additions & 0 deletions tests/pytest/test_tool_response_single_string.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import asyncio
from typing import List, Optional

from mcp.types import TextContent
from openai.types.chat.chat_completion_message import (
ChatCompletionMessageToolCall,
FunctionCall,
)

from eval_protocol.models import EvaluationRow, Message
from eval_protocol.pytest.default_agent_rollout_processor import Agent


class NoOpLogger:
def log(self, row: EvaluationRow) -> None:
return None

def read(self, row_id: Optional[str] = None) -> List[EvaluationRow]:
return []


def test_tool_result_single_text_becomes_string():
# Prepare a minimal evaluation row and agent
row = EvaluationRow(messages=[Message(role="user", content="use the tool")])
agent = Agent(model="dummy", row=row, config_path="", logger=NoOpLogger())

# Single text content becomes a plain string
single = [TextContent(type="text", text="single result")]
formatted = agent._format_tool_message_content(single)
assert isinstance(formatted, str)
assert formatted == "single result"

# Multiple text contents become a list of text parts
multiple = [
TextContent(type="text", text="first"),
TextContent(type="text", text="second"),
]
formatted_multi = agent._format_tool_message_content(multiple)
assert isinstance(formatted_multi, list)
assert [part["text"] for part in formatted_multi] == ["first", "second"]
Loading