From fb4c17afb267d4db58146b1b7a62e4b51fe1c412 Mon Sep 17 00:00:00 2001 From: TerminallyLazy Date: Sun, 21 Jun 2026 06:28:57 -0400 Subject: [PATCH 1/2] fix: harden local model tool calls --- agent.py | 337 +++++++++++++++- extensions/python/system_prompt/AGENTS.md | 2 +- .../_10_small_local_model_prompt.py | 87 +++++ helpers/extract_tools.py | 222 ++++++++++- helpers/extract_tools.py.dox.md | 10 +- helpers/litellm_transport.py | 84 ++++ helpers/litellm_transport.py.dox.md | 36 ++ models.py | 15 + plugins/_text_editor/AGENTS.md | 1 + plugins/_text_editor/helpers/file_ops.py | 2 + .../agent.system.main.small_local_model.md | 15 + prompts/fw.msg_misformat.md | 2 +- prompts/fw.msg_reasoning_only.md | 1 + prompts/fw.msg_reasoning_only_failed.md | 1 + prompts/fw.msg_repeat.md | 2 +- prompts/fw.msg_repeat_completed.md | 3 + prompts/fw.msg_repeat_failed.md | 3 + prompts/fw.msg_tool_completed.md | 1 + tests/test_small_local_model_prompt.py | 369 ++++++++++++++++++ tests/test_stream_tool_early_stop.py | 128 ++++++ tests/test_text_editor_context_patch.py | 10 + tests/test_tool_request_normalization.py | 94 ++++- 22 files changed, 1403 insertions(+), 22 deletions(-) create mode 100644 extensions/python/system_prompt/_10_small_local_model_prompt.py create mode 100644 helpers/litellm_transport.py.dox.md create mode 100644 prompts/agent.system.main.small_local_model.md create mode 100644 prompts/fw.msg_reasoning_only.md create mode 100644 prompts/fw.msg_reasoning_only_failed.md create mode 100644 prompts/fw.msg_repeat_completed.md create mode 100644 prompts/fw.msg_repeat_failed.md create mode 100644 prompts/fw.msg_tool_completed.md create mode 100644 tests/test_small_local_model_prompt.py diff --git a/agent.py b/agent.py index 8eef2906bb..5de9be9fbb 100644 --- a/agent.py +++ b/agent.py @@ -19,6 +19,7 @@ ) from helpers import extension from helpers.print_style import PrintStyle +from helpers.strings import truncate_text from langchain_core.prompts import ( ChatPromptTemplate, @@ -42,6 +43,13 @@ from helpers.litellm_transport import ResponsesTransport from helpers.responses_tools import build_responses_function_tools, original_tool_name +REASONING_ONLY_REPAIR_ATTEMPTS_KEY = "_reasoning_only_repair_attempts" +MAX_REASONING_ONLY_REPAIR_ATTEMPTS = 2 +REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY = "_repeated_response_repair_attempts" +MAX_REPEATED_RESPONSE_REPAIR_ATTEMPTS = 2 +LAST_SUCCESSFUL_TOOL_REQUEST_KEY = "_last_successful_tool_request" + + class AgentContextType(Enum): USER = "user" TASK = "task" @@ -500,26 +508,29 @@ async def stream_callback(chunk: str, full: str): await self.handle_intervention(agent_response) + reasoning_only_handled, reasoning_only_final = ( + self._handle_reasoning_only_result(llm_result) + ) + if reasoning_only_handled: + if reasoning_only_final: + return reasoning_only_final + continue + if ( self.loop_data.last_response == agent_response ): # if assistant_response is the same as last message in history, let him know - # Append the assistant's response to the history - log_item = self.loop_data.params_temporary.get("log_item_generating") - assistant_message = self.hist_add_ai_response( - agent_response, - id=log_item.id if log_item else "", - llm_result=llm_result, + repeated_handled, repeated_final = ( + self._handle_repeated_response_result(llm_result) ) - self._remember_llm_result_state(llm_result, assistant_message) - # Append warning message to the history - warning_msg = self.read_prompt("fw.msg_repeat.md") - wmsg = self.hist_add_warning(message=warning_msg) - PrintStyle(font_color="orange", padding=True).print( - warning_msg - ) - self.context.log.log(type="warning", content=warning_msg, id=wmsg.id) + if repeated_handled: + if repeated_final: + return repeated_final + continue else: # otherwise proceed with tool + self.loop_data.params_persistent.pop( + REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY, None + ) # Append the assistant's response to the history log_item = self.loop_data.params_temporary.get("log_item_generating") assistant_message = self.hist_add_ai_response( @@ -1108,7 +1119,7 @@ async def process_llm_result_tools(self, llm_result: LLMResult): if ( llm_result.mode == "responses" and llm_result.response - and extract_tools.json_parse_dirty(llm_result.response) is None + and extract_tools.extract_tool_request(llm_result.response) is None ): return llm_result.response return await self.process_tools(llm_result.response) @@ -1122,6 +1133,13 @@ async def _execute_tool_request( responses_item_factory: Callable[[Any], dict[str, Any]] | None = None, ): raw_tool_name = raw_tool_name or tool_name + tool_args = extract_tools.sanitize_tool_args(tool_args or {}) + duplicate_final = self._handle_duplicate_completed_tool_request( + tool_name, tool_args + ) + if duplicate_final: + return duplicate_final + tool_method = None tool = None @@ -1197,6 +1215,23 @@ async def _execute_tool_request( await tool.after_execution(response) await self.handle_intervention() + if not self._is_error_tool_response(response): + self._record_successful_tool_request(tool_name, tool_args, response) + completed_message = self._tool_completion_final_message( + tool_name, tool_args, response + ) + if completed_message: + PrintStyle(font_color="green", padding=True).print( + completed_message + ) + self.context.log.log( + type="response", + heading=f"{self.agent_name}: Tool action completed", + content=completed_message, + ) + self._clear_responses_pending_state() + return completed_message + if response.break_loop: self._clear_responses_pending_state() return response.message @@ -1387,10 +1422,254 @@ def _clear_responses_pending_state(self) -> None: state.pop("previous_response_id", None) self.set_data(Agent.DATA_NAME_RESPONSES_STATE, state) + def _handle_reasoning_only_result( + self, llm_result: LLMResult + ) -> tuple[bool, str | None]: + if not self._is_reasoning_only_result(llm_result): + return False, None + + attempts = int( + self.loop_data.params_persistent.get(REASONING_ONLY_REPAIR_ATTEMPTS_KEY, 0) + ) + 1 + self.loop_data.params_persistent[REASONING_ONLY_REPAIR_ATTEMPTS_KEY] = attempts + + if attempts > MAX_REASONING_ONLY_REPAIR_ATTEMPTS: + final_message = self.read_prompt( + "fw.msg_reasoning_only_failed.md", + attempts=MAX_REASONING_ONLY_REPAIR_ATTEMPTS, + ) + PrintStyle(font_color="red", padding=True).print(final_message) + self.context.log.log( + type="response", + heading=f"{self.agent_name}: Local model tool-call failure", + content=final_message, + ) + return True, final_message + + warning_msg = self.read_prompt("fw.msg_reasoning_only.md") + wmsg = self.hist_add_warning(message=warning_msg) + PrintStyle(font_color="orange", padding=True).print(warning_msg) + self.context.log.log(type="warning", content=warning_msg, id=wmsg.id) + return True, None + + def _is_reasoning_only_result(self, llm_result: LLMResult) -> bool: + return ( + not (llm_result.response or "").strip() + and bool((llm_result.reasoning or "").strip()) + and not llm_result.function_calls + and not llm_result.builtin_items + ) + + def _handle_repeated_response_result( + self, llm_result: LLMResult + ) -> tuple[bool, str | None]: + last_tool_result = self._last_successful_tool_result_summary() + if last_tool_result: + final_message = self.read_prompt( + "fw.msg_repeat_completed.md", + last_tool_result=last_tool_result, + ) + PrintStyle(font_color="green", padding=True).print(final_message) + self.context.log.log( + type="response", + heading=f"{self.agent_name}: Completed repeated action", + content=final_message, + ) + return True, final_message + + attempts = int( + self.loop_data.params_persistent.get(REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY, 0) + ) + 1 + self.loop_data.params_persistent[REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY] = attempts + + if attempts > MAX_REPEATED_RESPONSE_REPAIR_ATTEMPTS: + final_message = self.read_prompt( + "fw.msg_repeat_failed.md", + attempts=MAX_REPEATED_RESPONSE_REPAIR_ATTEMPTS, + last_tool_result=( + last_tool_result + or "No successful tool result was available before the repeated output." + ), + ) + PrintStyle(font_color="red", padding=True).print(final_message) + self.context.log.log( + type="response", + heading=f"{self.agent_name}: Repeated response stopped", + content=final_message, + ) + return True, final_message + + warning_msg = self.read_prompt("fw.msg_repeat.md") + wmsg = self.hist_add_warning(message=warning_msg) + PrintStyle(font_color="orange", padding=True).print(warning_msg) + self.context.log.log(type="warning", content=warning_msg, id=wmsg.id) + return True, None + + def _last_successful_tool_result_summary(self) -> str | None: + for message in reversed(self.history.all_messages()): + if message.ai: + continue + if not isinstance(message.content, dict): + return None + if "user_message" in message.content: + return None + tool_name = str(message.content.get("tool_name") or "").strip() + tool_result = str(message.content.get("tool_result") or "").strip() + if not tool_name or not tool_result: + continue + if tool_result.lower().startswith("error"): + continue + return ( + "Last completed tool result:\n" + f"{tool_name}: {truncate_text(tool_result, 1000)}" + ) + return None + + def _handle_duplicate_completed_tool_request( + self, tool_name: str, tool_args: dict + ) -> str | None: + if not self._is_duplicate_completed_tool_request(tool_name, tool_args): + return None + + last = self.loop_data.params_persistent.get(LAST_SUCCESSFUL_TOOL_REQUEST_KEY) + last_tool_result = "" + if isinstance(last, dict): + last_tool_result = str(last.get("last_tool_result") or "") + if not last_tool_result: + last_tool_result = ( + self._last_successful_tool_result_summary() + or "The previous tool action completed successfully." + ) + final_message = self.read_prompt( + "fw.msg_repeat_completed.md", + last_tool_result=last_tool_result, + ) + PrintStyle(font_color="green", padding=True).print(final_message) + self.context.log.log( + type="response", + heading=f"{self.agent_name}: Duplicate tool action stopped", + content=final_message, + ) + self._clear_responses_pending_state() + return final_message + + def _record_successful_tool_request( + self, tool_name: str, tool_args: dict, response: Any + ) -> None: + last_tool_result = self._last_successful_tool_result_summary() or ( + "Last completed tool result:\n" + f"{tool_name}: {truncate_text(str(getattr(response, 'message', '')), 1000)}" + ) + self.loop_data.params_persistent[LAST_SUCCESSFUL_TOOL_REQUEST_KEY] = { + "tool_name": tool_name, + "fingerprint": self._tool_request_fingerprint(tool_name, tool_args), + "completion_key": self._tool_completion_key(tool_name, tool_args, response), + "open_in_canvas": self._tool_open_in_canvas(tool_args, response), + "last_tool_result": last_tool_result, + } + + def _is_duplicate_completed_tool_request(self, tool_name: str, tool_args: dict) -> bool: + last = self.loop_data.params_persistent.get(LAST_SUCCESSFUL_TOOL_REQUEST_KEY) + if not isinstance(last, dict): + return False + if tool_name != str(last.get("tool_name") or ""): + return False + + if self._tool_request_fingerprint(tool_name, tool_args) == last.get( + "fingerprint" + ): + return True + + completion_key = self._tool_completion_key(tool_name, tool_args) + if not completion_key or completion_key != last.get("completion_key"): + return False + return bool(last.get("open_in_canvas")) or self._tool_open_in_canvas(tool_args) + + def _tool_completion_final_message( + self, tool_name: str, tool_args: dict, response: Any + ) -> str | None: + if not self._tool_open_in_canvas(tool_args, response): + return None + completion_key = self._tool_completion_key(tool_name, tool_args, response) + if not completion_key: + return None + if tool_name == "text_editor": + path = self._tool_path(tool_args, response) + return self.read_prompt( + "fw.msg_tool_completed.md", + message=f"The document was saved and opened in the canvas: {path}", + ) + return None + + def _tool_request_fingerprint(self, tool_name: str, tool_args: dict) -> str: + try: + serialized_args = json.dumps( + extract_tools.sanitize_tool_args(tool_args or {}), + sort_keys=True, + ensure_ascii=False, + default=str, + ) + except Exception: + serialized_args = str(tool_args) + return f"{tool_name}:{serialized_args}" + + def _tool_completion_key( + self, tool_name: str, tool_args: dict, response: Any | None = None + ) -> str: + action = self._tool_action(tool_args, response) + path = self._tool_path(tool_args, response) + if tool_name == "text_editor" and action in {"write", "patch"} and path: + return f"{tool_name}:{action}:{path}" + return "" + + def _tool_action(self, tool_args: dict, response: Any | None = None) -> str: + additional = getattr(response, "additional", None) if response else None + if not isinstance(additional, dict): + additional = {} + action = additional.get("action") or tool_args.get("action") or tool_args.get( + "method" + ) + return str(action or "").strip() + + def _tool_path(self, tool_args: dict, response: Any | None = None) -> str: + additional = getattr(response, "additional", None) if response else None + if not isinstance(additional, dict): + additional = {} + path = additional.get("path") or tool_args.get("path") or "" + return str(path or "").strip() + + def _tool_open_in_canvas( + self, tool_args: dict, response: Any | None = None + ) -> bool: + additional = getattr(response, "additional", None) if response else None + if not isinstance(additional, dict): + additional = {} + value = ( + additional.get("open_in_canvas") + if "open_in_canvas" in additional + else tool_args.get("open_in_canvas") + ) + if value is None: + value = tool_args.get("open_canvas") or tool_args.get("open_document") + return self._truthy(value) + + def _truthy(self, value: Any) -> bool: + if isinstance(value, bool): + return value + if value is None: + return False + if isinstance(value, (int, float)): + return value != 0 + return str(value).strip().lower() in {"1", "true", "yes", "y", "on"} + + def _is_error_tool_response(self, response: Any) -> bool: + message = str(getattr(response, "message", "") or "").strip().lower() + return message.startswith("error") + @extension.extensible async def process_tools(self, msg: str): # search for tool usage requests in agent message - tool_request = extract_tools.json_parse_dirty(msg) + tool_request = extract_tools.extract_tool_request(msg) raw_tool_name = "" tool_args = {} @@ -1407,6 +1686,12 @@ async def process_tools(self, msg: str): if tool_request is not None: tool_name = raw_tool_name # Initialize tool_name with raw_tool_name + duplicate_final = self._handle_duplicate_completed_tool_request( + tool_name, tool_args + ) + if duplicate_final: + return duplicate_final + tool_method = None # Initialize tool_method tool = None # Initialize tool to None @@ -1470,7 +1755,27 @@ async def process_tools(self, msg: str): await tool.after_execution(response) await self.handle_intervention() + if not self._is_error_tool_response(response): + self._record_successful_tool_request( + tool_name, tool_args, response + ) + completed_message = self._tool_completion_final_message( + tool_name, tool_args, response + ) + if completed_message: + PrintStyle(font_color="green", padding=True).print( + completed_message + ) + self.context.log.log( + type="response", + heading=f"{self.agent_name}: Tool action completed", + content=completed_message, + ) + self._clear_responses_pending_state() + return completed_message + if response.break_loop: + self._clear_responses_pending_state() return response.message finally: self.loop_data.current_tool = None diff --git a/extensions/python/system_prompt/AGENTS.md b/extensions/python/system_prompt/AGENTS.md index bc4ee64483..e1fb03684d 100644 --- a/extensions/python/system_prompt/AGENTS.md +++ b/extensions/python/system_prompt/AGENTS.md @@ -6,7 +6,7 @@ ## Ownership -- Ordered Python files own main, tools, MCP, secrets, skills, and project prompt sections. +- Ordered Python files own main, small-local-model, tools, MCP, secrets, skills, and project prompt sections. ## Local Contracts diff --git a/extensions/python/system_prompt/_10_small_local_model_prompt.py b/extensions/python/system_prompt/_10_small_local_model_prompt.py new file mode 100644 index 0000000000..e9c40b0827 --- /dev/null +++ b/extensions/python/system_prompt/_10_small_local_model_prompt.py @@ -0,0 +1,87 @@ +import re +from typing import Any +from urllib.parse import urlparse + +from helpers.extension import Extension, extensible +from agent import Agent, LoopData + + +LOCAL_PROVIDER_IDS = {"ollama", "lm_studio"} +LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"} +THINKING_MODEL_MARKERS = ( + "qwen3", + "qwen-3", + "qwq", + "deepseek-r1", + "deepseek_r1", + "gpt-oss", + "gemma4", + "gemma-4", +) +SMALL_LOCAL_MAX_B = 14.0 +MODEL_SIZE_RE = re.compile(r"(? str: + try: + from plugins._model_config.helpers.model_config import get_chat_model_config + + chat_cfg = get_chat_model_config(agent) + except Exception: + return "" + + if should_include_small_local_prompt(chat_cfg): + return agent.read_prompt("agent.system.main.small_local_model.md") + return "" + + +def should_include_small_local_prompt(chat_cfg: dict[str, Any] | None) -> bool: + if not isinstance(chat_cfg, dict): + return False + + provider = str(chat_cfg.get("provider") or "").lower().strip() + name = str(chat_cfg.get("name") or "").lower().strip() + api_base = str(chat_cfg.get("api_base") or "").lower().strip() + + is_local = provider in LOCAL_PROVIDER_IDS or _is_local_api_base(api_base) + if not is_local: + return False + + if any(marker in name for marker in THINKING_MODEL_MARKERS): + return True + + size_b = _model_size_billions(name) + return size_b is not None and size_b <= SMALL_LOCAL_MAX_B + + +def _is_local_api_base(api_base: str) -> bool: + if not api_base: + return False + parsed = urlparse(api_base if "://" in api_base else f"http://{api_base}") + host = (parsed.hostname or "").lower() + return host in LOCAL_HOSTS + + +def _model_size_billions(model_name: str) -> float | None: + match = MODEL_SIZE_RE.search(model_name) + if not match: + return None + try: + return float(match.group(1)) + except ValueError: + return None diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py index 39e8378553..28db4e460f 100644 --- a/helpers/extract_tools.py +++ b/helpers/extract_tools.py @@ -2,8 +2,60 @@ from .dirty_json import DirtyJson import regex, re from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility +from helpers.strings import sanitize_string from typing import Any +TOOL_NAME_KEYS = ("tool_name", "tool", "toolName", "function_name", "functionName") +TOOL_ARGS_KEYS = ("tool_args", "args", "arguments", "parameters", "input") +META_KEYS = { + "thought", + "thoughts", + "headline", + "reasoning", + "analysis", + "plan", + "intent", + "title", +} +RESPONSE_TEXT_KEYS = ( + "text", + "message", + "response", + "answer", + "final", + "final_answer", + "content", +) +RESPONSE_INTENT_MARKERS = ( + "response tool", + "final answer", + "respond", + "reply", + "answer the user", + "acknowledg", + "greeting", + "offer of assistance", +) +ACTION_INTENT_MARKERS = ( + "text_editor", + "text editor", + "code_execution", + "terminal", + "shell command", + "run command", + "search_engine", + "browser", + "computer_use", + "vision_load", + "call_subordinate", + "write file", + "read file", + "edit file", + "create file", + "open in canvas", +) + + def json_parse_dirty(json: str) -> dict[str, Any] | None: if not json or not isinstance(json, str): return None @@ -20,6 +72,95 @@ def json_parse_dirty(json: str) -> dict[str, Any] | None: return None +def extract_tool_request(content: str) -> dict[str, Any] | None: + """Extract and repair the first executable tool request from model text.""" + + for tool_request in iter_json_dicts(content): + try: + tool_name, tool_args = normalize_tool_request(tool_request) + return {"tool_name": tool_name, "tool_args": tool_args} + except ValueError: + repaired = repair_tool_request(tool_request) + if not repaired: + continue + try: + tool_name, tool_args = normalize_tool_request(repaired) + except ValueError: + continue + return {"tool_name": tool_name, "tool_args": tool_args} + return None + + +def iter_json_dicts(content: str) -> list[dict[str, Any]]: + if not content or not isinstance(content, str): + return [] + + result: list[dict[str, Any]] = [] + for candidate in iter_json_object_strings(content): + try: + data = DirtyJson.parse_string(candidate) + except Exception: + continue + if isinstance(data, dict): + result.append(data) + return result + + +def iter_json_object_strings(content: str) -> list[str]: + if not content or not isinstance(content, str): + return [] + + strings: list[str] = [] + index = 0 + while index < len(content): + start = content.find("{", index) + if start == -1: + break + candidate = extract_json_root_string(content[start:]) + if candidate: + strings.append(candidate) + index = start + len(candidate) + else: + index = start + 1 + return strings + + +def repair_tool_request(tool_request: Any) -> dict[str, Any] | None: + if not isinstance(tool_request, dict): + return None + + tool_name = _first_string_value(tool_request, TOOL_NAME_KEYS) + raw_args = _first_value(tool_request, TOOL_ARGS_KEYS) + tool_args = _normalize_repaired_args(raw_args, tool_name=tool_name) + + if tool_name: + if tool_args is None: + tool_args = _root_args(tool_request) + if tool_name == "response": + response_text = _response_text(tool_request, tool_args) + if response_text: + tool_args = {**tool_args, "text": response_text} + if not tool_args and tool_name == "response": + response_text = _synthesized_response_text(tool_request) + if response_text: + tool_args = {"text": response_text} + if isinstance(tool_args, dict): + return {"tool_name": tool_name, "tool_args": sanitize_tool_args(tool_args)} + return None + + if _looks_like_final_response_intent(tool_request): + response_text = _response_text(tool_request, {}) or _synthesized_response_text( + tool_request + ) + if response_text: + return { + "tool_name": "response", + "tool_args": sanitize_tool_args({"text": response_text}), + } + + return None + + def normalize_tool_request(tool_request: Any) -> tuple[str, dict]: if not isinstance(tool_request, dict): raise ValueError("Tool request must be a dictionary") @@ -33,7 +174,7 @@ def normalize_tool_request(tool_request: Any) -> tuple[str, dict]: tool_args = tool_request.get("args") if not isinstance(tool_args, dict): raise ValueError("Tool request must have a tool_args (type dictionary) field") - tool_args = dict(tool_args) + tool_args = sanitize_tool_args(dict(tool_args)) if ":" in tool_name: tool_name, action = tool_name.split(":", 1) if not tool_name or not action: @@ -45,6 +186,85 @@ def normalize_tool_request(tool_request: Any) -> tuple[str, dict]: return tool_name, tool_args +def sanitize_tool_args(value: Any) -> Any: + if isinstance(value, str): + return sanitize_string(value) + if isinstance(value, dict): + return { + sanitize_string(key) if isinstance(key, str) else key: sanitize_tool_args(item) + for key, item in value.items() + } + if isinstance(value, list): + return [sanitize_tool_args(item) for item in value] + if isinstance(value, tuple): + return tuple(sanitize_tool_args(item) for item in value) + return value + + +def _first_value(data: dict[str, Any], keys: tuple[str, ...]) -> Any: + for key in keys: + if key in data: + return data.get(key) + return None + + +def _first_string_value(data: dict[str, Any], keys: tuple[str, ...]) -> str: + value = _first_value(data, keys) + return value.strip() if isinstance(value, str) else "" + + +def _normalize_repaired_args(raw_args: Any, *, tool_name: str) -> dict[str, Any] | None: + if isinstance(raw_args, dict): + return dict(raw_args) + if isinstance(raw_args, str) and tool_name == "response": + text = raw_args.strip() + return {"text": text} if text else {} + if raw_args is None: + return None + return None + + +def _root_args(data: dict[str, Any]) -> dict[str, Any]: + excluded = set(TOOL_NAME_KEYS) | set(TOOL_ARGS_KEYS) | META_KEYS + return {key: value for key, value in data.items() if key not in excluded} + + +def _response_text(data: dict[str, Any], tool_args: dict[str, Any]) -> str: + for source in (tool_args, data): + value = _first_value(source, RESPONSE_TEXT_KEYS) + if isinstance(value, str) and value.strip(): + return value.strip() + return "" + + +def _synthesized_response_text(data: dict[str, Any]) -> str: + text = _intent_text(data) + lowered = text.lower() + if any(marker in lowered for marker in ("hi", "hello", "greeting")): + return "Hi. How can I help?" + if "thank" in lowered: + return "You're welcome." + return "" + + +def _looks_like_final_response_intent(data: dict[str, Any]) -> bool: + text = _intent_text(data) + lowered = text.lower() + if any(marker in lowered for marker in ACTION_INTENT_MARKERS): + return False + return any(marker in lowered for marker in RESPONSE_INTENT_MARKERS) + + +def _intent_text(value: Any) -> str: + if isinstance(value, dict): + return " ".join(_intent_text(item) for item in value.values()) + if isinstance(value, list): + return " ".join(_intent_text(item) for item in value) + if value is None: + return "" + return str(value) + + def extract_json_root_string(content: str) -> str | None: if not content or not isinstance(content, str): return None diff --git a/helpers/extract_tools.py.dox.md b/helpers/extract_tools.py.dox.md index 9c65b8ced7..b621335c3d 100644 --- a/helpers/extract_tools.py.dox.md +++ b/helpers/extract_tools.py.dox.md @@ -12,7 +12,12 @@ - `extract_tools.py.dox.md` owns durable notes about responsibilities, contracts, side effects, and verification for that implementation. - Top-level functions: - `json_parse_dirty(json: str) -> dict[str, Any] | None` +- `extract_tool_request(content: str) -> dict[str, Any] | None` +- `iter_json_dicts(content: str) -> list[dict[str, Any]]` +- `iter_json_object_strings(content: str) -> list[str]` +- `repair_tool_request(tool_request: Any) -> dict[str, Any] | None` - `normalize_tool_request(tool_request: Any) -> tuple[str, dict]` +- `sanitize_tool_args(value: Any) -> Any` - `extract_json_root_string(content: str) -> str | None` - `extract_json_object_string(content)` - `extract_json_string(content)` @@ -21,13 +26,16 @@ ## Runtime Contracts - Helper modules own reusable framework APIs and must preserve public callers unless all callers, tests, and docs are updated together. +- `extract_tool_request` is the executable-call entrypoint for model text; it may repair common local-model shapes into canonical `{"tool_name": ..., "tool_args": ...}` requests. +- Repair is intentionally narrow: preserve explicit tool names, move root-level args into `tool_args`, accept common argument aliases, and synthesize final `response` calls only for clear non-action response intent. +- Normalized tool arguments are sanitized recursively so invalid Unicode surrogates from model output do not crash UTF-8 file writes, logs, or tool execution. - Update this file whenever public functions, classes, persistence behavior, path/security assumptions, side effects, or cross-module contracts change. - Observed side-effect areas: settings/state persistence. - Imported dependency areas include: `dirty_json`, `helpers.modules`, `re`, `regex`, `typing`. ## Key Concepts -- Important called helpers/classes observed in the source: `extract_json_object_string`, `content.find`, `DirtyJson`, `content.rfind`, `regex.search`, `re.sub`, `json.strip`, `ValueError`, `tool_name.split`, `parser.parse`, `match.group`, `match.group.replace`, `DirtyJson.parse_string`. +- Important called helpers/classes observed in the source: `extract_json_object_string`, `extract_json_root_string`, `content.find`, `DirtyJson`, `content.rfind`, `regex.search`, `re.sub`, `json.strip`, `ValueError`, `tool_name.split`, `parser.parse`, `match.group`, `match.group.replace`, `DirtyJson.parse_string`. - Keep request/response, tool, or helper semantics documented here at the same time as source changes. ## Work Guidance diff --git a/helpers/litellm_transport.py b/helpers/litellm_transport.py index de5a2b884a..c56079ccd0 100644 --- a/helpers/litellm_transport.py +++ b/helpers/litellm_transport.py @@ -457,7 +457,13 @@ def prepare_kwargs( explicit_prompt_caching: bool = False, ) -> dict[str, Any]: chat_kwargs = dict(kwargs) + response_function_tools = chat_kwargs.pop("a0_responses_function_tools", None) _drop_internal_transport_kwargs(chat_kwargs) + chat_tools = ChatCompletionsTransport.merge_chat_tools( + chat_kwargs.get("tools"), response_function_tools + ) + if _has_tools(chat_tools): + chat_kwargs["tools"] = chat_tools if not _has_tools(chat_kwargs.get("tools")): chat_kwargs.pop("tool_choice", None) chat_kwargs.pop("parallel_tool_calls", None) @@ -488,8 +494,86 @@ def parse(chunk: Any) -> ChatChunk: reasoning_delta = _get_value(delta, "reasoning_content") or _get_value( message, "reasoning_content" ) or "" + tool_calls = _get_value(delta, "tool_calls") or _get_value( + message, "tool_calls" + ) + if tool_calls and not response_delta: + response_delta = ChatCompletionsTransport.tool_calls_text(tool_calls) return {"reasoning_delta": reasoning_delta, "response_delta": response_delta} + @staticmethod + def merge_chat_tools(existing_tools: Any, response_tools: Any) -> Any: + tools: list[Any] = [] + if isinstance(existing_tools, list): + tools.extend(existing_tools) + elif existing_tools: + tools.append(existing_tools) + tools.extend(ChatCompletionsTransport.chat_tools_from_response(response_tools)) + return tools + + @staticmethod + def chat_tools_from_response(tools: Any) -> list[Any]: + chat_tools: list[Any] = [] + for tool in _as_list(tools): + if not isinstance(tool, dict): + chat_tools.append(tool) + continue + if tool.get("type") == "function" and "function" not in tool: + function = { + "name": tool.get("name", ""), + "description": tool.get("description", ""), + "parameters": tool.get("parameters", {}), + } + if "strict" in tool: + function["strict"] = tool["strict"] + chat_tools.append({"type": "function", "function": function}) + else: + chat_tools.append(dict(tool)) + return chat_tools + + @staticmethod + def tool_calls_text(tool_calls: Any) -> str: + calls = [ + ChatCompletionsTransport.tool_call_object(tool_call) + for tool_call in _as_list(tool_calls) + ] + calls = [call for call in calls if call] + if not calls: + return "" + if len(calls) == 1: + return json.dumps(calls[0]) + return json.dumps( + {"tool_name": "parallel_tool_calls", "tool_args": {"calls": calls}} + ) + + @staticmethod + def tool_call_object(tool_call: Any) -> dict[str, Any]: + if not isinstance(tool_call, dict): + tool_call = _object_to_dict(tool_call) + function = _get_value(tool_call, "function") or {} + if not isinstance(function, dict): + function = _object_to_dict(function) + name = _get_value(function, "name") or _get_value(tool_call, "name") + if not name: + return {} + raw_arguments = _get_value(function, "arguments") or _get_value( + tool_call, "arguments" + ) + if isinstance(raw_arguments, str): + try: + args = json.loads(raw_arguments or "{}") + except Exception: + args = {"arguments": raw_arguments} + elif isinstance(raw_arguments, dict): + args = dict(raw_arguments) + elif raw_arguments is None: + args = {} + else: + args = {"arguments": raw_arguments} + if not isinstance(args, dict): + args = {"arguments": args} + return {"tool_name": str(name), "tool_args": args} + class ResponsesTransport: @classmethod diff --git a/helpers/litellm_transport.py.dox.md b/helpers/litellm_transport.py.dox.md new file mode 100644 index 0000000000..787ebdb009 --- /dev/null +++ b/helpers/litellm_transport.py.dox.md @@ -0,0 +1,36 @@ +# litellm_transport.py DOX + +## Purpose + +- Own the LiteLLM transport layer that normalizes Agent Zero chat and Responses API calls. +- Provide fallback between Responses and chat-completions modes while preserving A0 tool-call semantics. + +## Ownership + +- `LiteLLMTransport` selects the active transport, performs sync/async complete and stream calls, and exposes the last `LLMResult`. +- `TransportPolicy` owns mode selection, Responses fallback, and provider capability cache behavior. +- `ChatCompletionsTransport` owns chat-completions request preparation and response parsing. +- `ResponsesTransport` owns Responses API request preparation, output parsing, and conversion between chat messages and Responses input items. +- `ResponsesEventParser` owns stateful parsing of streamed Responses events. + +## Runtime Contracts + +- A0-only kwargs such as `a0_api_mode`, `a0_responses_function_tools`, and Responses state fields must not leak to LiteLLM provider calls. +- Explicit `chat_completions` mode may receive A0-generated function tools through `a0_responses_function_tools`; these must be converted into standard chat-completions `tools`. +- Chat-completions tool calls are converted back into canonical A0 JSON tool requests so the existing tool executor can process them. +- Responses fallback must keep provider-state, local-state, and unsupported-capability caches bounded to transport decisions only. + +## Work Guidance + +- Keep request conversion and response parsing symmetric: if a tool schema is converted into provider format, returned tool calls must convert back into A0 format. +- Avoid broad provider-specific branches unless LiteLLM cannot normalize the behavior. +- Keep streaming changes explicit; chat-completions streaming tool-call deltas require stateful accumulation before they can replace non-stream tool-call turns. + +## Verification + +- Run focused transport tests in `tests/test_stream_tool_early_stop.py` after changing request preparation, parsing, fallback, or cache behavior. +- For local Ollama tool-call changes, verify both final-response and executable-tool prompts when the model is available. + +## Child DOX Index + +No child DOX files. diff --git a/models.py b/models.py index 811fa31e0c..3b0588affc 100644 --- a/models.py +++ b/models.py @@ -266,6 +266,19 @@ def output(self) -> ChatChunk: return ChatChunk(response_delta=response, reasoning_delta=reasoning) +def _force_non_streaming_chat_tools(call_kwargs: dict[str, Any]) -> bool: + mode = str(call_kwargs.get("a0_api_mode") or "").lower().strip() + if mode not in { + "chat", + "chat_completion", + "chat_completions", + "completion", + "completions", + }: + return False + return bool(call_kwargs.get("a0_responses_function_tools")) + + rate_limiters: dict[str, RateLimiter] = {} api_keys_round_robin: dict[str, int] = {} @@ -685,6 +698,8 @@ async def unified_turn( or response_callback is not None or tokens_callback is not None ) + if _force_non_streaming_chat_tools(call_kwargs): + stream = False transport = LiteLLMTransport( model=self.model_name, messages=msgs_conv, diff --git a/plugins/_text_editor/AGENTS.md b/plugins/_text_editor/AGENTS.md index f18d552d31..96e2d07c3e 100644 --- a/plugins/_text_editor/AGENTS.md +++ b/plugins/_text_editor/AGENTS.md @@ -16,6 +16,7 @@ - Preserve stale-read protection before patch operations. - Validate patch structures before applying edits. - Read back changed regions after writes or patches where the tool contract requires confirmation. +- Sanitize invalid Unicode surrogate characters before writing text content to UTF-8 files. ## Work Guidance diff --git a/plugins/_text_editor/helpers/file_ops.py b/plugins/_text_editor/helpers/file_ops.py index 972bf9da24..a5dbb28207 100644 --- a/plugins/_text_editor/helpers/file_ops.py +++ b/plugins/_text_editor/helpers/file_ops.py @@ -10,6 +10,7 @@ from typing import TypedDict from helpers import tokens +from helpers.strings import sanitize_string from plugins._text_editor.helpers.context_patch import ( apply_context_patch_with_metadata, ) @@ -187,6 +188,7 @@ def write_file(path: str, content: str | None) -> WriteResult: """Create or overwrite a file.""" if content is None: content = "" + content = sanitize_string(content) path = os.path.expanduser(path) try: os.makedirs(os.path.dirname(path) or ".", exist_ok=True) diff --git a/prompts/agent.system.main.small_local_model.md b/prompts/agent.system.main.small_local_model.md new file mode 100644 index 0000000000..25989e7262 --- /dev/null +++ b/prompts/agent.system.main.small_local_model.md @@ -0,0 +1,15 @@ +## Small local model tool-call guardrail + +/no_think + +You may reason internally, but your visible assistant response must be exactly one minimal JSON object. + +- Do not include `thoughts`, `headline`, `analysis`, `reasoning`, or `` in the visible response. +- Every visible JSON object must include exactly the executable fields `tool_name` and `tool_args`. +- Put all final user-facing text inside `tool_args.text` on the `response` tool. +- If you see a warning about repeated, reasoning-only, or misformatted output, do not explain the warning. Immediately output a corrected JSON tool request. + +Minimal final-answer example: +~~~json +{"tool_name":"response","tool_args":{"text":"Hi. How can I help?"}} +~~~ diff --git a/prompts/fw.msg_misformat.md b/prompts/fw.msg_misformat.md index 23ae0eff16..a26cdb9ed6 100644 --- a/prompts/fw.msg_misformat.md +++ b/prompts/fw.msg_misformat.md @@ -1 +1 @@ -You have misformatted your message. Follow system prompt instructions on JSON message formatting precisely. \ No newline at end of file +You have misformatted your message. Output exactly one JSON object with `tool_name` and `tool_args`. For a final user answer, use `tool_name`: `response`. Do not output reasoning-only text, markdown fences, or a JSON object that only has `thoughts` and `headline`. diff --git a/prompts/fw.msg_reasoning_only.md b/prompts/fw.msg_reasoning_only.md new file mode 100644 index 0000000000..290cb86301 --- /dev/null +++ b/prompts/fw.msg_reasoning_only.md @@ -0,0 +1 @@ +Your previous output was reasoning only. Now output exactly one valid JSON object in the assistant response, not in the reasoning stream. The JSON object must include `tool_name` and `tool_args`. For a final answer, use `tool_name`: `response` and `tool_args`: {"text":"..."}. Do not repeat prior reasoning or explain this warning. diff --git a/prompts/fw.msg_reasoning_only_failed.md b/prompts/fw.msg_reasoning_only_failed.md new file mode 100644 index 0000000000..66091fdff4 --- /dev/null +++ b/prompts/fw.msg_reasoning_only_failed.md @@ -0,0 +1 @@ +The selected model produced reasoning-only output {{attempts}} times and did not emit a valid tool request. Select a model with stronger JSON/tool-call reliability or disable thinking for this local model, then try again. diff --git a/prompts/fw.msg_repeat.md b/prompts/fw.msg_repeat.md index 74da5835f6..96111d7f23 100644 --- a/prompts/fw.msg_repeat.md +++ b/prompts/fw.msg_repeat.md @@ -1 +1 @@ -You have sent the same message again. You have to do something else! \ No newline at end of file +You repeated the exact same already-processed response. Do not call the same tool with the same arguments again. If the previous tool result completed the task, immediately use `tool_name`: `response` with a brief final answer. If more work is needed, output a different valid JSON tool request with different `tool_args`. diff --git a/prompts/fw.msg_repeat_completed.md b/prompts/fw.msg_repeat_completed.md new file mode 100644 index 0000000000..b53fde50e2 --- /dev/null +++ b/prompts/fw.msg_repeat_completed.md @@ -0,0 +1,3 @@ +Done. The previous tool action already completed, so Agent Zero stopped the duplicate tool call. + +{{last_tool_result}} diff --git a/prompts/fw.msg_repeat_failed.md b/prompts/fw.msg_repeat_failed.md new file mode 100644 index 0000000000..020c69804e --- /dev/null +++ b/prompts/fw.msg_repeat_failed.md @@ -0,0 +1,3 @@ +The selected model repeated the exact same already-processed response {{attempts}} times, so Agent Zero stopped the loop instead of repeating the action again. + +{{last_tool_result}} diff --git a/prompts/fw.msg_tool_completed.md b/prompts/fw.msg_tool_completed.md new file mode 100644 index 0000000000..aef1c8eb3e --- /dev/null +++ b/prompts/fw.msg_tool_completed.md @@ -0,0 +1 @@ +Done. {{message}} diff --git a/tests/test_small_local_model_prompt.py b/tests/test_small_local_model_prompt.py new file mode 100644 index 0000000000..154036d371 --- /dev/null +++ b/tests/test_small_local_model_prompt.py @@ -0,0 +1,369 @@ +import sys +import types +from pathlib import Path + +import pytest + + +PROJECT_ROOT = Path(__file__).resolve().parents[1] +if str(PROJECT_ROOT) not in sys.path: + sys.path.insert(0, str(PROJECT_ROOT)) + +import agent as agent_module +from agent import Agent, LoopData +from extensions.python.system_prompt import _10_small_local_model_prompt as small_prompt +from helpers import history +from helpers.llm_result import LLMResult + + +def test_small_local_prompt_detects_qwen_ollama(): + assert small_prompt.should_include_small_local_prompt( + { + "provider": "ollama", + "name": "qwen3.5:9b", + "api_base": "http://host.docker.internal:11434", + } + ) + + +def test_small_local_prompt_detects_local_small_model_by_size(): + assert small_prompt.should_include_small_local_prompt( + { + "provider": "other", + "name": "llama3.2:8b", + "api_base": "http://127.0.0.1:1234/v1", + } + ) + + +def test_small_local_prompt_ignores_cloud_and_large_local_models(): + assert not small_prompt.should_include_small_local_prompt( + { + "provider": "openrouter", + "name": "qwen3.5:9b", + "api_base": "", + } + ) + assert not small_prompt.should_include_small_local_prompt( + { + "provider": "ollama", + "name": "llama3.3:70b", + "api_base": "http://localhost:11434", + } + ) + + +def test_small_local_prompt_forbids_visible_planning_fields(): + prompt = (PROJECT_ROOT / "prompts" / "agent.system.main.small_local_model.md").read_text( + encoding="utf-8" + ) + + assert "Do not include `thoughts`, `headline`" in prompt + assert "exactly the executable fields `tool_name` and `tool_args`" in prompt + + +@pytest.mark.asyncio +async def test_small_local_prompt_builds_from_model_config(monkeypatch): + from plugins._model_config.helpers import model_config + + class DummyAgent: + def read_prompt(self, file: str, **kwargs): + return f"prompt:{file}" + + monkeypatch.setattr( + model_config, + "get_chat_model_config", + lambda agent: { + "provider": "ollama", + "name": "qwen3.5:9b", + "api_base": "http://host.docker.internal:11434", + }, + ) + + assert await small_prompt.build_prompt(DummyAgent()) == ( + "prompt:agent.system.main.small_local_model.md" + ) + + +def test_reasoning_only_guard_retries_then_fails(monkeypatch): + monkeypatch.setattr( + agent_module, + "PrintStyle", + lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None), + ) + + log_entries = [] + + class DummyLog: + def log(self, **kwargs): + log_entries.append(kwargs) + return types.SimpleNamespace(id=f"log-{len(log_entries)}") + + test_agent = object.__new__(Agent) + test_agent.loop_data = LoopData() + test_agent.context = types.SimpleNamespace(log=DummyLog()) + test_agent.agent_name = "A0" + + warnings = [] + + def read_prompt(file: str, **kwargs): + if file == "fw.msg_reasoning_only.md": + return "repair tool request" + if file == "fw.msg_reasoning_only_failed.md": + return f"failed after {kwargs['attempts']}" + return file + + def hist_add_warning(message: str): + warnings.append(message) + return types.SimpleNamespace(id=f"warn-{len(warnings)}") + + test_agent.read_prompt = read_prompt + test_agent.hist_add_warning = hist_add_warning + + result = LLMResult.from_chat(response="", reasoning="thinking but no tool") + + assert Agent._handle_reasoning_only_result(test_agent, result) == (True, None) + assert Agent._handle_reasoning_only_result(test_agent, result) == (True, None) + assert Agent._handle_reasoning_only_result(test_agent, result) == ( + True, + "failed after 2", + ) + assert warnings == ["repair tool request", "repair tool request"] + assert log_entries[-1]["type"] == "response" + + +def test_reasoning_only_guard_ignores_valid_response(): + test_agent = object.__new__(Agent) + test_agent.loop_data = LoopData() + + result = LLMResult.from_chat( + response='{"tool_name":"response","tool_args":{"text":"ok"}}', + reasoning="thinking before valid response", + ) + + assert Agent._handle_reasoning_only_result(test_agent, result) == (False, None) + + +def test_repeated_response_guard_finalizes_completed_duplicate_tool_call(monkeypatch): + monkeypatch.setattr( + agent_module, + "PrintStyle", + lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None), + ) + + log_entries = [] + + class DummyLog: + def log(self, **kwargs): + log_entries.append(kwargs) + return types.SimpleNamespace(id=f"log-{len(log_entries)}") + + test_agent = object.__new__(Agent) + test_agent.loop_data = LoopData() + test_agent.context = types.SimpleNamespace(log=DummyLog()) + test_agent.agent_name = "A0" + test_agent.history = history.History(test_agent) + test_agent.history.add_message( + False, + { + "tool_name": "text_editor", + "tool_result": "/a0/usr/workdir/todos.md written 9 lines", + }, + ) + + warnings = [] + + def read_prompt(file: str, **kwargs): + if file == "fw.msg_repeat_completed.md": + return f"completed: {kwargs['last_tool_result']}" + if file == "fw.msg_repeat.md": + return "repeat repair" + if file == "fw.msg_repeat_failed.md": + return f"failed after {kwargs['attempts']}: {kwargs['last_tool_result']}" + return file + + def hist_add_warning(message: str): + warnings.append(message) + return types.SimpleNamespace(id=f"warn-{len(warnings)}") + + test_agent.read_prompt = read_prompt + test_agent.hist_add_warning = hist_add_warning + + result = LLMResult.from_chat( + response='{"tool_name":"text_editor","tool_args":{"action":"write"}}' + ) + + handled, final = Agent._handle_repeated_response_result(test_agent, result) + + assert handled is True + assert final is not None + assert "completed:" in final + assert "text_editor: /a0/usr/workdir/todos.md written 9 lines" in final + assert warnings == [] + assert log_entries[-1]["type"] == "response" + + +def test_repeated_response_guard_retries_then_stops_without_completed_tool(monkeypatch): + monkeypatch.setattr( + agent_module, + "PrintStyle", + lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None), + ) + + log_entries = [] + + class DummyLog: + def log(self, **kwargs): + log_entries.append(kwargs) + return types.SimpleNamespace(id=f"log-{len(log_entries)}") + + test_agent = object.__new__(Agent) + test_agent.loop_data = LoopData() + test_agent.context = types.SimpleNamespace(log=DummyLog()) + test_agent.agent_name = "A0" + test_agent.history = history.History(test_agent) + + warnings = [] + + def read_prompt(file: str, **kwargs): + if file == "fw.msg_repeat.md": + return "repeat repair" + if file == "fw.msg_repeat_failed.md": + return f"failed after {kwargs['attempts']}: {kwargs['last_tool_result']}" + return file + + def hist_add_warning(message: str): + warnings.append(message) + return types.SimpleNamespace(id=f"warn-{len(warnings)}") + + test_agent.read_prompt = read_prompt + test_agent.hist_add_warning = hist_add_warning + + result = LLMResult.from_chat(response="same non-tool text") + + assert Agent._handle_repeated_response_result(test_agent, result) == (True, None) + assert Agent._handle_repeated_response_result(test_agent, result) == (True, None) + handled, final = Agent._handle_repeated_response_result(test_agent, result) + + assert handled is True + assert final is not None + assert "failed after 2" in final + assert "No successful tool result was available" in final + assert warnings == ["repeat repair", "repeat repair"] + assert log_entries[-1]["type"] == "response" + + +def test_successful_canvas_text_editor_write_records_semantic_duplicate(monkeypatch): + monkeypatch.setattr( + agent_module, + "PrintStyle", + lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None), + ) + + log_entries = [] + + class DummyLog: + def log(self, **kwargs): + log_entries.append(kwargs) + return types.SimpleNamespace(id=f"log-{len(log_entries)}") + + test_agent = object.__new__(Agent) + test_agent.loop_data = LoopData() + test_agent.context = types.SimpleNamespace(log=DummyLog()) + test_agent.agent_name = "A0" + test_agent.history = history.History(test_agent) + + def read_prompt(file: str, **kwargs): + if file == "fw.msg_repeat_completed.md": + return f"completed: {kwargs['last_tool_result']}" + if file == "fw.msg_tool_completed.md": + return f"done: {kwargs['message']}" + return file + + test_agent.read_prompt = read_prompt + test_agent.get_data = lambda name: {} + test_agent.set_data = lambda name, value: None + + original_args = { + "action": "write", + "path": "/a0/usr/workdir/TODO.md", + "content": "# My Tasks\n", + "open_in_canvas": True, + } + response = types.SimpleNamespace( + message="/a0/usr/workdir/TODO.md written 1 lines", + additional={ + "_tool_name": "text_editor", + "action": "write", + "path": "/a0/usr/workdir/TODO.md", + "open_in_canvas": True, + }, + ) + + test_agent.history.add_message( + False, + { + "tool_name": "text_editor", + "tool_result": "/a0/usr/workdir/TODO.md written 1 lines", + }, + ) + + Agent._record_successful_tool_request( + test_agent, "text_editor", original_args, response + ) + + repeated_args_with_different_content = { + "action": "write", + "path": "/a0/usr/workdir/TODO.md", + "content": "# My Tasks\n\n- [ ] Different generated text\n", + "open_in_canvas": True, + } + + assert Agent._is_duplicate_completed_tool_request( + test_agent, "text_editor", repeated_args_with_different_content + ) + assert Agent._handle_duplicate_completed_tool_request( + test_agent, "text_editor", repeated_args_with_different_content + ) == "completed: Last completed tool result:\ntext_editor: /a0/usr/workdir/TODO.md written 1 lines" + assert log_entries[-1]["heading"] == "A0: Duplicate tool action stopped" + + +def test_canvas_text_editor_completion_finalizes_turn(monkeypatch): + monkeypatch.setattr( + agent_module, + "PrintStyle", + lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None), + ) + + test_agent = object.__new__(Agent) + test_agent.loop_data = LoopData() + + def read_prompt(file: str, **kwargs): + if file == "fw.msg_tool_completed.md": + return f"done: {kwargs['message']}" + return file + + test_agent.read_prompt = read_prompt + + final = Agent._tool_completion_final_message( + test_agent, + "text_editor", + { + "action": "write", + "path": "/a0/usr/workdir/TODO.md", + "content": "# My Tasks\n", + "open_in_canvas": True, + }, + types.SimpleNamespace( + additional={ + "action": "write", + "path": "/a0/usr/workdir/TODO.md", + "open_in_canvas": True, + } + ), + ) + + assert final == ( + "done: The document was saved and opened in the canvas: " + "/a0/usr/workdir/TODO.md" + ) diff --git a/tests/test_stream_tool_early_stop.py b/tests/test_stream_tool_early_stop.py index a1d0ac63ea..0b74ac756f 100644 --- a/tests/test_stream_tool_early_stop.py +++ b/tests/test_stream_tool_early_stop.py @@ -306,6 +306,73 @@ async def response_callback(chunk: str, full: str): assert calls == ["chat"] +@pytest.mark.asyncio +async def test_chat_completions_with_a0_tools_uses_non_stream_acompletion(monkeypatch): + calls: list[dict] = [] + + async def fake_acompletion(*args, **kwargs): + calls.append(kwargs) + return { + "choices": [ + { + "message": { + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "response", + "arguments": '{"text":"ok"}', + }, + } + ], + } + } + ] + } + + async def fake_aresponses(*args, **kwargs): + raise AssertionError("Responses path should not be used") + + async def fake_rate_limiter(*args, **kwargs): + return None + + monkeypatch.setattr(litellm_transport, "acompletion", fake_acompletion) + monkeypatch.setattr(litellm_transport, "aresponses", fake_aresponses) + monkeypatch.setattr(models, "apply_rate_limiter", fake_rate_limiter) + + wrapper = models.LiteLLMChatWrapper( + model="test-model", + provider="ollama", + model_config=None, + a0_api_mode="chat_completions", + ) + + async def response_callback(chunk: str, full: str): + raise AssertionError("tool-call chat completion should not stream") + + result = await wrapper.unified_turn( + messages=[], + response_callback=response_callback, + a0_responses_function_tools=[ + { + "type": "function", + "name": "response", + "description": "Final answer", + "parameters": {"type": "object"}, + } + ], + ) + + assert extract_tools.extract_tool_request(result.response) == { + "tool_name": "response", + "tool_args": {"text": "ok"}, + } + assert calls[0]["stream"] is False + assert calls[0]["tools"][0]["function"]["name"] == "response" + + @pytest.mark.asyncio async def test_unified_call_retries_responses_with_high_reasoning(monkeypatch): validation_error = ValueError( @@ -743,6 +810,67 @@ def test_chat_kwargs_add_openai_prompt_cache_key_for_chat_completions(): assert kwargs["max_tokens"] == 10 +def test_chat_kwargs_convert_a0_response_tools_to_chat_tools(): + kwargs = litellm_transport.ChatCompletionsTransport.prepare_kwargs( + { + "a0_responses_function_tools": [ + { + "type": "function", + "name": "text_editor", + "description": "Edit text files", + "parameters": {"type": "object"}, + } + ], + "tool_choice": "auto", + "parallel_tool_calls": True, + }, + model="ollama/qwen3.5:9b", + ) + + assert kwargs["tools"] == [ + { + "type": "function", + "function": { + "name": "text_editor", + "description": "Edit text files", + "parameters": {"type": "object"}, + }, + } + ] + assert kwargs["tool_choice"] == "auto" + assert kwargs["parallel_tool_calls"] is True + assert "a0_responses_function_tools" not in kwargs + + +def test_chat_parse_turns_tool_calls_into_a0_tool_json(): + parsed = litellm_transport.ChatCompletionsTransport.parse( + { + "choices": [ + { + "message": { + "content": "", + "tool_calls": [ + { + "id": "call_1", + "type": "function", + "function": { + "name": "text_editor", + "arguments": '{"action":"write","path":"todos.md"}', + }, + } + ], + } + } + ] + } + ) + + assert extract_tools.extract_tool_request(parsed["response_delta"]) == { + "tool_name": "text_editor", + "tool_args": {"action": "write", "path": "todos.md"}, + } + + def test_chat_messages_strip_cache_control_for_openai_prompt_cache(): messages = [ { diff --git a/tests/test_text_editor_context_patch.py b/tests/test_text_editor_context_patch.py index 0cbb260ce7..7ddb5e41ed 100644 --- a/tests/test_text_editor_context_patch.py +++ b/tests/test_text_editor_context_patch.py @@ -17,6 +17,7 @@ from plugins._text_editor.helpers.file_ops import ( apply_context_patch_file, apply_exact_replace_file, + write_file, ) from plugins._text_editor.helpers.patch_request import ( exact_replace_to_patch_text, @@ -32,6 +33,15 @@ ) +def test_text_editor_write_file_sanitizes_surrogate_content(tmp_path: Path) -> None: + target = tmp_path / "todo.md" + + result = write_file(str(target), "# \ud83d\udcdd My Tasks\n") + + assert result["error"] == "" + assert target.read_text(encoding="utf-8") == "# ?? My Tasks\n" + + def test_context_patch_chains_after_line_shift(tmp_path: Path) -> None: target = tmp_path / "sample.txt" target.write_text("alpha\nbeta\ngamma\n", encoding="utf-8") diff --git a/tests/test_tool_request_normalization.py b/tests/test_tool_request_normalization.py index 81472c561b..6cda3f33d4 100644 --- a/tests/test_tool_request_normalization.py +++ b/tests/test_tool_request_normalization.py @@ -9,7 +9,7 @@ if str(PROJECT_ROOT) not in sys.path: sys.path.insert(0, str(PROJECT_ROOT)) -from helpers.extract_tools import normalize_tool_request +from helpers.extract_tools import extract_tool_request, normalize_tool_request def test_normalize_tool_request_accepts_canonical_keys() -> None: @@ -65,3 +65,95 @@ def test_normalize_tool_request_preserves_explicit_action_over_method() -> None: def test_normalize_tool_request_rejects_missing_args() -> None: with pytest.raises(ValueError, match="tool_args"): normalize_tool_request({"tool_name": "response"}) + + +def test_extract_tool_request_repairs_root_level_tool_args() -> None: + assert extract_tool_request( + """ + { + "thoughts": ["Need to write the todo file."], + "headline": "Creating TODO list", + "tool_name": "text_editor", + "action": "write", + "path": "/a0/usr/workdir/todos.md", + "content": "# My Tasks" + } + """ + ) == { + "tool_name": "text_editor", + "tool_args": { + "action": "write", + "path": "/a0/usr/workdir/todos.md", + "content": "# My Tasks", + }, + } + + +def test_extract_tool_request_accepts_argument_aliases() -> None: + assert extract_tool_request( + '{"function_name":"response","arguments":{"text":"done"}}' + ) == {"tool_name": "response", "tool_args": {"text": "done"}} + + +def test_extract_tool_request_repairs_response_string_args() -> None: + assert extract_tool_request('{"tool_name":"response","tool_args":"hello"}') == { + "tool_name": "response", + "tool_args": {"text": "hello"}, + } + + +def test_extract_tool_request_sanitizes_surrogate_tool_args() -> None: + request = extract_tool_request( + r'{"tool_name":"text_editor","tool_args":{"content":"# \ud83d\udcdd My Tasks"}}' + ) + + assert request == { + "tool_name": "text_editor", + "tool_args": {"content": "# ?? My Tasks"}, + } + request["tool_args"]["content"].encode("utf-8") + + +def test_extract_tool_request_recovers_thoughts_only_greeting_response() -> None: + assert extract_tool_request( + """ + Reasoning: + { + "thoughts": [ + "The user has sent a simple 'hi' greeting.", + "I need to respond with the appropriate JSON format using the response tool.", + "A friendly acknowledgment and offer of assistance would be good." + ], + "headline": "Acknowledging user's greeting and offering help" + } + """ + ) == { + "tool_name": "response", + "tool_args": {"text": "Hi. How can I help?"}, + } + + +def test_extract_tool_request_does_not_convert_action_planning_to_response() -> None: + assert ( + extract_tool_request( + """ + { + "thoughts": [ + "The user wants a TODO list in the canvas.", + "I should use text_editor with action write." + ], + "headline": "Creating TODO list locally" + } + """ + ) + is None + ) + + +def test_extract_tool_request_prefers_later_valid_tool_object() -> None: + assert extract_tool_request( + """ + Reasoning: {"thoughts":["I need to use text_editor."],"headline":"Plan"} + {"tool_name":"response","tool_args":{"text":"done"}} + """ + ) == {"tool_name": "response", "tool_args": {"text": "done"}} From 1dd5787bdd8485cbf80724dd23d47c10ede93c03 Mon Sep 17 00:00:00 2001 From: TerminallyLazy Date: Sun, 21 Jun 2026 07:40:04 -0400 Subject: [PATCH 2/2] fix: recover incomplete response tool output --- helpers/extract_tools.py | 87 +++++++++++++++++++ helpers/extract_tools.py.dox.md | 1 + plugins/_memory/AGENTS.md | 1 + .../_memory/helpers/memory_consolidation.py | 40 ++++++++- tests/test_memory_consolidation.py | 49 +++++++++++ tests/test_tool_request_normalization.py | 19 ++++ 6 files changed, 195 insertions(+), 2 deletions(-) create mode 100644 tests/test_memory_consolidation.py diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py index 28db4e460f..13b032b130 100644 --- a/helpers/extract_tools.py +++ b/helpers/extract_tools.py @@ -1,4 +1,6 @@ +import json + from .dirty_json import DirtyJson import regex, re from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility @@ -88,9 +90,28 @@ def extract_tool_request(content: str) -> dict[str, Any] | None: except ValueError: continue return {"tool_name": tool_name, "tool_args": tool_args} + partial_response = extract_partial_response_tool_request(content) + if partial_response: + return partial_response return None +def extract_partial_response_tool_request(content: str) -> dict[str, Any] | None: + if not content or not isinstance(content, str): + return None + if not _contains_response_tool_name(content): + return None + + text_value = _partial_json_string_value(content, RESPONSE_TEXT_KEYS) + if not text_value: + return None + + return { + "tool_name": "response", + "tool_args": sanitize_tool_args({"text": text_value}), + } + + def iter_json_dicts(content: str) -> list[dict[str, Any]]: if not content or not isinstance(content, str): return [] @@ -265,6 +286,72 @@ def _intent_text(value: Any) -> str: return str(value) +def _contains_response_tool_name(content: str) -> bool: + for key in TOOL_NAME_KEYS: + pattern = rf'"{re.escape(key)}"\s*:\s*"response"' + if re.search(pattern, content): + return True + return False + + +def _partial_json_string_value(content: str, keys: tuple[str, ...]) -> str: + for key in keys: + pattern = rf'"{re.escape(key)}"\s*:\s*"' + match = re.search(pattern, content) + if not match: + continue + return _decode_json_string_prefix(content[match.end():]).strip() + return "" + + +def _decode_json_string_prefix(content: str) -> str: + chars: list[str] = [] + index = 0 + while index < len(content): + char = content[index] + if char == '"': + break + if char != "\\": + chars.append(char) + index += 1 + continue + + index += 1 + if index >= len(content): + chars.append("\\") + break + + escaped = content[index] + if escaped in {'"', "\\", "/"}: + chars.append(escaped) + elif escaped == "b": + chars.append("\b") + elif escaped == "f": + chars.append("\f") + elif escaped == "n": + chars.append("\n") + elif escaped == "r": + chars.append("\r") + elif escaped == "t": + chars.append("\t") + elif escaped == "u": + hex_value = content[index + 1 : index + 5] + if len(hex_value) == 4 and all(c in "0123456789abcdefABCDEF" for c in hex_value): + chars.append(chr(int(hex_value, 16))) + index += 4 + else: + chars.append("\\u") + else: + chars.append(escaped) + index += 1 + + decoded = "".join(chars) + try: + return json.loads(json.dumps(decoded)) + except Exception: + return decoded + + def extract_json_root_string(content: str) -> str | None: if not content or not isinstance(content, str): return None diff --git a/helpers/extract_tools.py.dox.md b/helpers/extract_tools.py.dox.md index b621335c3d..9d8639dd3b 100644 --- a/helpers/extract_tools.py.dox.md +++ b/helpers/extract_tools.py.dox.md @@ -28,6 +28,7 @@ - Helper modules own reusable framework APIs and must preserve public callers unless all callers, tests, and docs are updated together. - `extract_tool_request` is the executable-call entrypoint for model text; it may repair common local-model shapes into canonical `{"tool_name": ..., "tool_args": ...}` requests. - Repair is intentionally narrow: preserve explicit tool names, move root-level args into `tool_args`, accept common argument aliases, and synthesize final `response` calls only for clear non-action response intent. +- Incomplete `response` tool JSON may be recovered only when an explicit response tool name and a `text`-like string value are present; non-response tool calls still require complete parseable JSON before execution. - Normalized tool arguments are sanitized recursively so invalid Unicode surrogates from model output do not crash UTF-8 file writes, logs, or tool execution. - Update this file whenever public functions, classes, persistence behavior, path/security assumptions, side effects, or cross-module contracts change. - Observed side-effect areas: settings/state persistence. diff --git a/plugins/_memory/AGENTS.md b/plugins/_memory/AGENTS.md index 622026f574..c3e1d5fc92 100644 --- a/plugins/_memory/AGENTS.md +++ b/plugins/_memory/AGENTS.md @@ -17,6 +17,7 @@ - Keep memory scoped by configured subdirectory/context. - Preserve embedding metadata needed to rebuild indexes safely. - Avoid storing transient action-history noise as durable memory. +- If LLM-based consolidation times out, insert the new memory directly instead of dropping it. ## Work Guidance diff --git a/plugins/_memory/helpers/memory_consolidation.py b/plugins/_memory/helpers/memory_consolidation.py index 227fb11e82..e074133f65 100644 --- a/plugins/_memory/helpers/memory_consolidation.py +++ b/plugins/_memory/helpers/memory_consolidation.py @@ -102,13 +102,49 @@ async def process_new_memory( return result except asyncio.TimeoutError: - PrintStyle().error(f"Memory consolidation timeout for area {area}") - return {"success": False, "memory_ids": []} + PrintStyle().warning( + f"Memory consolidation timeout for area {area}; inserting memory without consolidation" + ) + return await self._insert_memory_directly( + new_memory, + metadata, + log_item, + result="Memory inserted after consolidation timeout", + consolidation_action="timeout_direct_insert", + ) except Exception as e: PrintStyle().error(f"Memory consolidation error for area {area}: {str(e)}") return {"success": False, "memory_ids": []} + async def _insert_memory_directly( + self, + new_memory: str, + metadata: Dict[str, Any], + log_item: Optional[LogItem] = None, + *, + result: str = "Memory inserted successfully", + consolidation_action: str = "direct_insert", + ) -> dict: + try: + db = await Memory.get(self.agent) + insert_metadata = dict(metadata) + if 'timestamp' not in insert_metadata: + insert_metadata['timestamp'] = self._get_timestamp() + memory_id = await db.insert_text(new_memory, insert_metadata) + if log_item: + log_item.update( + result=result, + memory_ids=[memory_id], + consolidation_action=consolidation_action + ) + return {"success": True, "memory_ids": [memory_id]} + except Exception as e: + PrintStyle().error(f"Direct memory insertion failed: {str(e)}") + if log_item: + log_item.update(result=f"Memory insertion failed: {str(e)}") + return {"success": False, "memory_ids": []} + async def _process_memory_with_consolidation( self, new_memory: str, diff --git a/tests/test_memory_consolidation.py b/tests/test_memory_consolidation.py new file mode 100644 index 0000000000..25eb45cfea --- /dev/null +++ b/tests/test_memory_consolidation.py @@ -0,0 +1,49 @@ +import asyncio +import types + +import pytest + +from plugins._memory.helpers import memory_consolidation + + +@pytest.mark.asyncio +async def test_memory_consolidation_timeout_inserts_directly(monkeypatch): + inserted = [] + updates = [] + + class FakeMemory: + async def insert_text(self, text, metadata): + inserted.append((text, metadata)) + return "memory-1" + + async def fake_memory_get(agent): + return FakeMemory() + + async def never_finishes(self, new_memory, area, metadata, log_item=None): + await asyncio.sleep(10) + + monkeypatch.setattr(memory_consolidation.Memory, "get", fake_memory_get) + monkeypatch.setattr( + memory_consolidation.MemoryConsolidator, + "_process_memory_with_consolidation", + never_finishes, + ) + + consolidator = memory_consolidation.MemoryConsolidator( + agent=object(), + config=memory_consolidation.ConsolidationConfig( + processing_timeout_seconds=0.01 + ), + ) + result = await consolidator.process_new_memory( + "important finding", + "solutions", + {"source": "test"}, + types.SimpleNamespace(update=lambda **kwargs: updates.append(kwargs)), + ) + + assert result == {"success": True, "memory_ids": ["memory-1"]} + assert inserted[0][0] == "important finding" + assert inserted[0][1]["source"] == "test" + assert inserted[0][1]["timestamp"] + assert updates[-1]["consolidation_action"] == "timeout_direct_insert" diff --git a/tests/test_tool_request_normalization.py b/tests/test_tool_request_normalization.py index 531fb1f685..dbe4f7f951 100644 --- a/tests/test_tool_request_normalization.py +++ b/tests/test_tool_request_normalization.py @@ -103,6 +103,25 @@ def test_extract_tool_request_repairs_response_string_args() -> None: } +def test_extract_tool_request_recovers_incomplete_response_text() -> None: + assert extract_tool_request( + '{"tool_name":"response","tool_args":{"text":"### Report\\n\\nFinding: \\"quoted' + ) == { + "tool_name": "response", + "tool_args": {"text": '### Report\n\nFinding: "quoted'}, + } + + +def test_extract_tool_request_does_not_recover_incomplete_action_tool() -> None: + assert ( + extract_tool_request( + '{"tool_name":"text_editor","tool_args":{"content":"# My Tasks' + ) + is None + ) + assert extract_tool_request('{"') is None + + def test_extract_tool_request_sanitizes_surrogate_tool_args() -> None: request = extract_tool_request( r'{"tool_name":"text_editor","tool_args":{"content":"# \ud83d\udcdd My Tasks"}}'