Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions eval_protocol/mcp/execution/base_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,20 @@ def __init__(
# Initialize conversation state tracking for proper OpenAI trajectories
self.initialized = False

def _supports_reasoning_details(self) -> bool:
Comment thread
xzrderek marked this conversation as resolved.
Outdated
"""
Returns True if this policy is configured for a provider/model that expects
top-level reasoning_details to be preserved (e.g., Gemini 3 via OpenRouter).
"""
model_id = getattr(self, "model_id", "") or ""
base_url = getattr(self, "base_url", "") or ""

if isinstance(model_id, str) and "openrouter" in model_id:
return True
if isinstance(base_url, str) and "openrouter.ai" in base_url:
return True
return False

@abstractmethod
async def _make_llm_call(self, messages: List[Dict], tools: List[Dict]) -> Dict:
"""
Expand Down Expand Up @@ -199,6 +213,9 @@ async def _generate_live_tool_calls(
if message.get("tool_calls"):
assistant_message_for_history["tool_calls"] = message["tool_calls"]

if message.get("reasoning_details") and self._supports_reasoning_details():
assistant_message_for_history["reasoning_details"] = message["reasoning_details"]
Comment thread
xzrderek marked this conversation as resolved.
Outdated
Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Bug: Field name mismatch: reasoning_details vs reasoning_content

The new code uses reasoning_details as the field name, but the rest of the codebase (including the Message model in eval_protocol/models.py and the langchain adapter) uses reasoning_content. LiteLLM's documentation also uses reasoning_content. If LiteLLM returns the reasoning in a field called reasoning_content via provider_specific_fields, checking for message.get("reasoning_details") would return None and the reasoning content wouldn't be captured. Similarly, adding reasoning_details to allowed_fields wouldn't preserve the actual reasoning_content field.

Additional Locations (1)

Fix in Cursor Fix in Web


# Add to actual conversation history
conversation_history.append(assistant_message_for_history)

Expand Down
53 changes: 31 additions & 22 deletions eval_protocol/mcp/execution/policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,6 +148,9 @@ def _clean_messages_for_api(self, messages: List[Dict]) -> List[Dict]:
# Standard OpenAI message fields
allowed_fields = {"role", "content", "tool_calls", "tool_call_id", "name"}

if self._supports_reasoning_details():
allowed_fields.add("reasoning_details")

clean_messages = []
for msg in messages:
# Only keep allowed fields
Expand Down Expand Up @@ -217,31 +220,37 @@ async def _make_llm_call(self, messages: List[Dict[str, Any]], tools: List[Dict[
logger.debug(f"🔄 API call for model: {self.model_id}")

# LiteLLM already returns OpenAI-compatible format
message_obj = getattr(response.choices[0], "message", object())

message_dict: Dict[str, Any] = {
"role": getattr(message_obj, "role", "assistant"),
"content": getattr(message_obj, "content", None),
"tool_calls": (
[
{
"id": getattr(tc, "id", None),
"type": getattr(tc, "type", "function"),
"function": {
"name": getattr(getattr(tc, "function", None), "name", "tool"),
"arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
},
}
for tc in (getattr(message_obj, "tool_calls", []) or [])
]
if getattr(message_obj, "tool_calls", None)
else []
),
}

if self._supports_reasoning_details():
rd = getattr(message_obj, "reasoning_details", None)
if rd is not None:
message_dict["reasoning_details"] = rd

return {
"choices": [
{
"message": {
"role": getattr(getattr(response.choices[0], "message", object()), "role", "assistant"),
"content": getattr(getattr(response.choices[0], "message", object()), "content", None),
"tool_calls": (
[
{
"id": getattr(tc, "id", None),
"type": getattr(tc, "type", "function"),
"function": {
"name": getattr(getattr(tc, "function", None), "name", "tool"),
"arguments": getattr(getattr(tc, "function", None), "arguments", "{}"),
},
}
for tc in (
getattr(getattr(response.choices[0], "message", object()), "tool_calls", [])
or []
)
]
if getattr(getattr(response.choices[0], "message", object()), "tool_calls", None)
else []
),
},
"message": message_dict,
"finish_reason": getattr(response.choices[0], "finish_reason", None),
}
],
Expand Down
Loading