Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 53 additions & 46 deletions ols/src/tools/tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
DO_NOT_RETRY_REMINDER = "Do not retry this exact tool call."


def _is_retryable_tool_error(error: Exception) -> bool:
"""Return true if a tool execution error is likely transient."""
def _is_transient_error(error: Exception) -> bool:
"""Return true if the error is likely transient and worth retrying automatically."""
if isinstance(error, (TimeoutError, asyncio.TimeoutError, ConnectionError)):
return True
error_text = str(error).lower()
Expand Down Expand Up @@ -152,54 +152,61 @@ async def _execute_single_tool_call(
was_truncated = False
structured_content: dict | None = None

if tool_name is None:
tool_output = (
f"Error: Tool name is missing from tool call. {DO_NOT_RETRY_REMINDER}"
)
# Scenario 1: precondition failure — tool name missing, tool unknown, or sensitive args.
# The LLM must not retry; no argument change can make this call valid.
try:
if tool_name is None:
raise ValueError("Tool name is missing from tool call")
raise_for_sensitive_tool_args(tool_args)
get_tool_by_name(tool_name, all_mcp_tools) # raise for unknown tool
except Exception as e:
tool_output = f"Error: {e}. {DO_NOT_RETRY_REMINDER}"
status = "error"
logger.error("Tool call missing name: %s", tool_call)
logger.error("%s, Tool call: %s", tool_output, tool_call)
else:
try:
raise_for_sensitive_tool_args(tool_args)
attempts = MAX_TOOL_CALL_RETRIES + 1
last_error_text = "unknown error"
for attempt in range(attempts):
try:
tool_output, was_truncated, structured_content = (
await execute_tool_call(
tool_name, tool_args, all_mcp_tools, max_tokens
)
attempts = MAX_TOOL_CALL_RETRIES + 1
last_error_text = "unknown error"
for attempt in range(attempts):
try:
tool_output, was_truncated, structured_content = (
await execute_tool_call(
tool_name, tool_args, all_mcp_tools, max_tokens
)
status = "success"
break
except Exception as error:
last_error_text = str(error)
if attempt < MAX_TOOL_CALL_RETRIES and _is_retryable_tool_error(
error
):
logger.warning(
"Retrying tool '%s' after transient error on attempt %d/%d: %s",
tool_name,
attempt + 1,
attempts,
error,
)
await asyncio.sleep(RETRY_BACKOFF_SECONDS * (2**attempt))
continue
tool_output = (
f"Tool '{tool_name}' execution failed after {attempt + 1} "
f"attempt(s): {last_error_text}. {DO_NOT_RETRY_REMINDER}"
)
status = "success"
break
except Exception as error:
last_error_text = str(error)
# Scenario 2: transient infrastructure error — retry automatically with same args.
if attempt < MAX_TOOL_CALL_RETRIES and _is_transient_error(error):
logger.warning(
"Retrying tool '%s' after transient error on attempt %d/%d: %s",
tool_name,
attempt + 1,
attempts,
error,
)
status = "error"
logger.exception(tool_output)
break
except Exception as e:
tool_output = (
f"Error executing tool '{tool_name}' with args {tool_args}: {e}"
)
tool_output = f"{tool_output}. {DO_NOT_RETRY_REMINDER}"
status = "error"
logger.exception(tool_output)
await asyncio.sleep(RETRY_BACKOFF_SECONDS * (2**attempt))
continue
# Scenario 3a: non-transient error on the very first attempt — the LLM likely
# sent bad args. No reminder: the LLM is free to retry with corrected args.
# Scenario 3b: anything else (retries exhausted, or non-transient after a prior
# transient retry) — retrying in any form is unlikely to help.
is_first_attempt_non_transient = (
attempt == 0 and not _is_transient_error(error)
)
retry_note = (
""
if is_first_attempt_non_transient
else f" {DO_NOT_RETRY_REMINDER}"
)
tool_output = (
f"Tool '{tool_name}' execution failed after {attempt + 1} "
f"attempt(s): {last_error_text}.{retry_note}"
)
status = "error"
logger.exception(tool_output)
break

additional_kwargs: dict = {"truncated": was_truncated}
if structured_content is not None:
Expand Down
12 changes: 7 additions & 5 deletions tests/unit/tools/test_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,16 +242,17 @@ async def test_execute_tool_calls_mixed_success_and_failure():
assert tool_messages[0].status == "success"
assert tool_messages[0].content == "fake_output_from_success_tool"

# Second call should fail due to tool raising exception
# Second call should fail due to tool raising non-transient exception (Scenario 3a).
# No DO_NOT_RETRY_REMINDER: LLM may retry with corrected arguments.
assert tool_messages[1].status == "error"
assert "execution failed after 1 attempt(s)" in tool_messages[1].content
assert "Tool execution failed" in tool_messages[1].content
assert DO_NOT_RETRY_REMINDER in tool_messages[1].content
assert DO_NOT_RETRY_REMINDER not in tool_messages[1].content

# Third call should fail due to nonexistent tool
# Third call should fail due to nonexistent tool (Scenario 1 precondition).
# DO_NOT_RETRY_REMINDER is always included: no argument change can make this valid.
assert tool_messages[2].status == "error"
assert "Tool 'nonexistent_tool' not found" in tool_messages[2].content
assert "execution failed after 1 attempt(s)" in tool_messages[2].content
assert DO_NOT_RETRY_REMINDER in tool_messages[2].content


Expand Down Expand Up @@ -343,7 +344,8 @@ async def _non_retryable_failure(**kwargs: Any) -> tuple:
assert call_count == 1
assert tool_messages[0].status == "error"
assert "execution failed after 1 attempt(s)" in tool_messages[0].content
assert DO_NOT_RETRY_REMINDER in tool_messages[0].content
# Scenario 3a: non-transient error, first attempt — LLM may retry with corrected args.
assert DO_NOT_RETRY_REMINDER not in tool_messages[0].content


@pytest.mark.asyncio
Expand Down