Skip to content

Commit 92a321f

Browse files
committed
single string tool response should just be string
1 parent 54333cf commit 92a321f

File tree

2 files changed

+54
-7
lines changed

2 files changed

+54
-7
lines changed

eval_protocol/pytest/default_agent_rollout_processor.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -74,14 +74,9 @@ async def call_agent(self) -> str:
7474

7575
# Add all tool results to messages (they will be in the same order as tool_calls)
7676
for tool_call, (tool_call_id, content) in zip(message.tool_calls, tool_results):
77+
tool_message_content = self._format_tool_message_content(content)
7778
self.append_message_and_log(
78-
Message(
79-
role="tool",
80-
content=[
81-
ChatCompletionContentPartTextParam(text=content.text, type="text") for content in content
82-
],
83-
tool_call_id=tool_call_id,
84-
)
79+
Message(role="tool", content=tool_message_content, tool_call_id=tool_call_id)
8580
)
8681
return await self.call_agent()
8782
return message.content
@@ -114,6 +109,18 @@ def _get_content_from_tool_result(self, tool_result: CallToolResult) -> List[Tex
114109
raise NotImplementedError("Non-text content is not supported yet")
115110
return tool_result.content
116111

112+
def _format_tool_message_content(
113+
self, content: List[TextContent]
114+
) -> Union[str, List[ChatCompletionContentPartTextParam]]:
115+
"""Format tool result content for inclusion in a tool message.
116+
117+
- If a single text item, return plain string per OpenAI semantics.
118+
- If multiple items, return a list of text parts.
119+
"""
120+
if len(content) == 1 and isinstance(content[0], TextContent):
121+
return content[0].text
122+
return [ChatCompletionContentPartTextParam(text=c.text, type="text") for c in content]
123+
117124

118125
async def default_agent_rollout_processor(
119126
rows: List[EvaluationRow], config: RolloutProcessorConfig
Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
import asyncio
2+
from typing import List, Optional
3+
4+
from mcp.types import TextContent
5+
from openai.types.chat.chat_completion_message import (
6+
ChatCompletionMessageToolCall,
7+
FunctionCall,
8+
)
9+
10+
from eval_protocol.models import EvaluationRow, Message
11+
from eval_protocol.pytest.default_agent_rollout_processor import Agent
12+
13+
14+
class NoOpLogger:
15+
def log(self, row: EvaluationRow) -> None:
16+
return None
17+
18+
def read(self, row_id: Optional[str] = None) -> List[EvaluationRow]:
19+
return []
20+
21+
22+
def test_tool_result_single_text_becomes_string():
23+
# Prepare a minimal evaluation row and agent
24+
row = EvaluationRow(messages=[Message(role="user", content="use the tool")])
25+
agent = Agent(model="dummy", row=row, config_path="", logger=NoOpLogger())
26+
27+
# Single text content becomes a plain string
28+
single = [TextContent(type="text", text="single result")]
29+
formatted = agent._format_tool_message_content(single)
30+
assert isinstance(formatted, str)
31+
assert formatted == "single result"
32+
33+
# Multiple text contents become a list of text parts
34+
multiple = [
35+
TextContent(type="text", text="first"),
36+
TextContent(type="text", text="second"),
37+
]
38+
formatted_multi = agent._format_tool_message_content(multiple)
39+
assert isinstance(formatted_multi, list)
40+
assert [part["text"] for part in formatted_multi] == ["first", "second"]

0 commit comments

Comments
 (0)