diff --git a/ols/src/tools/tools.py b/ols/src/tools/tools.py index 99573c93f..4c720f3f4 100644 --- a/ols/src/tools/tools.py +++ b/ols/src/tools/tools.py @@ -20,8 +20,8 @@ DO_NOT_RETRY_REMINDER = "Do not retry this exact tool call." -def _is_retryable_tool_error(error: Exception) -> bool: - """Return true if a tool execution error is likely transient.""" +def _is_transient_error(error: Exception) -> bool: + """Return true if the error is likely transient and worth retrying automatically.""" if isinstance(error, (TimeoutError, asyncio.TimeoutError, ConnectionError)): return True error_text = str(error).lower() @@ -152,54 +152,61 @@ async def _execute_single_tool_call( was_truncated = False structured_content: dict | None = None - if tool_name is None: - tool_output = ( - f"Error: Tool name is missing from tool call. {DO_NOT_RETRY_REMINDER}" - ) + # Scenario 1: precondition failure — tool name missing, tool unknown, or sensitive args. + # The LLM must not retry; no argument change can make this call valid. + try: + if tool_name is None: + raise ValueError("Tool name is missing from tool call") + raise_for_sensitive_tool_args(tool_args) + get_tool_by_name(tool_name, all_mcp_tools) # raise for unknown tool + except Exception as e: + tool_output = f"Error: {e}. {DO_NOT_RETRY_REMINDER}" status = "error" - logger.error("Tool call missing name: %s", tool_call) + logger.error("%s, Tool call: %s", tool_output, tool_call) else: - try: - raise_for_sensitive_tool_args(tool_args) - attempts = MAX_TOOL_CALL_RETRIES + 1 - last_error_text = "unknown error" - for attempt in range(attempts): - try: - tool_output, was_truncated, structured_content = ( - await execute_tool_call( - tool_name, tool_args, all_mcp_tools, max_tokens - ) + attempts = MAX_TOOL_CALL_RETRIES + 1 + last_error_text = "unknown error" + for attempt in range(attempts): + try: + tool_output, was_truncated, structured_content = ( + await execute_tool_call( + tool_name, tool_args, all_mcp_tools, max_tokens ) - status = "success" - break - except Exception as error: - last_error_text = str(error) - if attempt < MAX_TOOL_CALL_RETRIES and _is_retryable_tool_error( - error - ): - logger.warning( - "Retrying tool '%s' after transient error on attempt %d/%d: %s", - tool_name, - attempt + 1, - attempts, - error, - ) - await asyncio.sleep(RETRY_BACKOFF_SECONDS * (2**attempt)) - continue - tool_output = ( - f"Tool '{tool_name}' execution failed after {attempt + 1} " - f"attempt(s): {last_error_text}. {DO_NOT_RETRY_REMINDER}" + ) + status = "success" + break + except Exception as error: + last_error_text = str(error) + # Scenario 2: transient infrastructure error — retry automatically with same args. + if attempt < MAX_TOOL_CALL_RETRIES and _is_transient_error(error): + logger.warning( + "Retrying tool '%s' after transient error on attempt %d/%d: %s", + tool_name, + attempt + 1, + attempts, + error, ) - status = "error" - logger.exception(tool_output) - break - except Exception as e: - tool_output = ( - f"Error executing tool '{tool_name}' with args {tool_args}: {e}" - ) - tool_output = f"{tool_output}. {DO_NOT_RETRY_REMINDER}" - status = "error" - logger.exception(tool_output) + await asyncio.sleep(RETRY_BACKOFF_SECONDS * (2**attempt)) + continue + # Scenario 3a: non-transient error on the very first attempt — the LLM likely + # sent bad args. No reminder: the LLM is free to retry with corrected args. + # Scenario 3b: anything else (retries exhausted, or non-transient after a prior + # transient retry) — retrying in any form is unlikely to help. + is_first_attempt_non_transient = ( + attempt == 0 and not _is_transient_error(error) + ) + retry_note = ( + "" + if is_first_attempt_non_transient + else f" {DO_NOT_RETRY_REMINDER}" + ) + tool_output = ( + f"Tool '{tool_name}' execution failed after {attempt + 1} " + f"attempt(s): {last_error_text}.{retry_note}" + ) + status = "error" + logger.exception(tool_output) + break additional_kwargs: dict = {"truncated": was_truncated} if structured_content is not None: diff --git a/tests/unit/tools/test_tools.py b/tests/unit/tools/test_tools.py index 9c0de61dd..4c7523399 100644 --- a/tests/unit/tools/test_tools.py +++ b/tests/unit/tools/test_tools.py @@ -242,16 +242,17 @@ async def test_execute_tool_calls_mixed_success_and_failure(): assert tool_messages[0].status == "success" assert tool_messages[0].content == "fake_output_from_success_tool" - # Second call should fail due to tool raising exception + # Second call should fail due to tool raising non-transient exception (Scenario 3a). + # No DO_NOT_RETRY_REMINDER: LLM may retry with corrected arguments. assert tool_messages[1].status == "error" assert "execution failed after 1 attempt(s)" in tool_messages[1].content assert "Tool execution failed" in tool_messages[1].content - assert DO_NOT_RETRY_REMINDER in tool_messages[1].content + assert DO_NOT_RETRY_REMINDER not in tool_messages[1].content - # Third call should fail due to nonexistent tool + # Third call should fail due to nonexistent tool (Scenario 1 precondition). + # DO_NOT_RETRY_REMINDER is always included: no argument change can make this valid. assert tool_messages[2].status == "error" assert "Tool 'nonexistent_tool' not found" in tool_messages[2].content - assert "execution failed after 1 attempt(s)" in tool_messages[2].content assert DO_NOT_RETRY_REMINDER in tool_messages[2].content @@ -343,7 +344,8 @@ async def _non_retryable_failure(**kwargs: Any) -> tuple: assert call_count == 1 assert tool_messages[0].status == "error" assert "execution failed after 1 attempt(s)" in tool_messages[0].content - assert DO_NOT_RETRY_REMINDER in tool_messages[0].content + # Scenario 3a: non-transient error, first attempt — LLM may retry with corrected args. + assert DO_NOT_RETRY_REMINDER not in tool_messages[0].content @pytest.mark.asyncio