From fb4c17afb267d4db58146b1b7a62e4b51fe1c412 Mon Sep 17 00:00:00 2001
From: TerminallyLazy <inf3ctious007@gmail.com>
Date: Sun, 21 Jun 2026 06:28:57 -0400
Subject: [PATCH 1/2] fix: harden local model tool calls

---
 agent.py                                      | 337 +++++++++++++++-
 extensions/python/system_prompt/AGENTS.md     |   2 +-
 .../_10_small_local_model_prompt.py           |  87 +++++
 helpers/extract_tools.py                      | 222 ++++++++++-
 helpers/extract_tools.py.dox.md               |  10 +-
 helpers/litellm_transport.py                  |  84 ++++
 helpers/litellm_transport.py.dox.md           |  36 ++
 models.py                                     |  15 +
 plugins/_text_editor/AGENTS.md                |   1 +
 plugins/_text_editor/helpers/file_ops.py      |   2 +
 .../agent.system.main.small_local_model.md    |  15 +
 prompts/fw.msg_misformat.md                   |   2 +-
 prompts/fw.msg_reasoning_only.md              |   1 +
 prompts/fw.msg_reasoning_only_failed.md       |   1 +
 prompts/fw.msg_repeat.md                      |   2 +-
 prompts/fw.msg_repeat_completed.md            |   3 +
 prompts/fw.msg_repeat_failed.md               |   3 +
 prompts/fw.msg_tool_completed.md              |   1 +
 tests/test_small_local_model_prompt.py        | 369 ++++++++++++++++++
 tests/test_stream_tool_early_stop.py          | 128 ++++++
 tests/test_text_editor_context_patch.py       |  10 +
 tests/test_tool_request_normalization.py      |  94 ++++-
 22 files changed, 1403 insertions(+), 22 deletions(-)
 create mode 100644 extensions/python/system_prompt/_10_small_local_model_prompt.py
 create mode 100644 helpers/litellm_transport.py.dox.md
 create mode 100644 prompts/agent.system.main.small_local_model.md
 create mode 100644 prompts/fw.msg_reasoning_only.md
 create mode 100644 prompts/fw.msg_reasoning_only_failed.md
 create mode 100644 prompts/fw.msg_repeat_completed.md
 create mode 100644 prompts/fw.msg_repeat_failed.md
 create mode 100644 prompts/fw.msg_tool_completed.md
 create mode 100644 tests/test_small_local_model_prompt.py

diff --git a/agent.py b/agent.py
index 8eef2906bb..5de9be9fbb 100644
--- a/agent.py
+++ b/agent.py
@@ -19,6 +19,7 @@
 )
 from helpers import extension
 from helpers.print_style import PrintStyle
+from helpers.strings import truncate_text
 
 from langchain_core.prompts import (
     ChatPromptTemplate,
@@ -42,6 +43,13 @@
 from helpers.litellm_transport import ResponsesTransport
 from helpers.responses_tools import build_responses_function_tools, original_tool_name
 
+REASONING_ONLY_REPAIR_ATTEMPTS_KEY = "_reasoning_only_repair_attempts"
+MAX_REASONING_ONLY_REPAIR_ATTEMPTS = 2
+REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY = "_repeated_response_repair_attempts"
+MAX_REPEATED_RESPONSE_REPAIR_ATTEMPTS = 2
+LAST_SUCCESSFUL_TOOL_REQUEST_KEY = "_last_successful_tool_request"
+
+
 class AgentContextType(Enum):
     USER = "user"
     TASK = "task"
@@ -500,26 +508,29 @@ async def stream_callback(chunk: str, full: str):
 
                         await self.handle_intervention(agent_response)
 
+                        reasoning_only_handled, reasoning_only_final = (
+                            self._handle_reasoning_only_result(llm_result)
+                        )
+                        if reasoning_only_handled:
+                            if reasoning_only_final:
+                                return reasoning_only_final
+                            continue
+
                         if (
                             self.loop_data.last_response == agent_response
                         ):  # if assistant_response is the same as last message in history, let him know
-                            # Append the assistant's response to the history
-                            log_item = self.loop_data.params_temporary.get("log_item_generating")
-                            assistant_message = self.hist_add_ai_response(
-                                agent_response,
-                                id=log_item.id if log_item else "",
-                                llm_result=llm_result,
+                            repeated_handled, repeated_final = (
+                                self._handle_repeated_response_result(llm_result)
                             )
-                            self._remember_llm_result_state(llm_result, assistant_message)
-                            # Append warning message to the history
-                            warning_msg = self.read_prompt("fw.msg_repeat.md")
-                            wmsg = self.hist_add_warning(message=warning_msg)
-                            PrintStyle(font_color="orange", padding=True).print(
-                                warning_msg
-                            )
-                            self.context.log.log(type="warning", content=warning_msg, id=wmsg.id)
+                            if repeated_handled:
+                                if repeated_final:
+                                    return repeated_final
+                                continue
 
                         else:  # otherwise proceed with tool
+                            self.loop_data.params_persistent.pop(
+                                REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY, None
+                            )
                             # Append the assistant's response to the history
                             log_item = self.loop_data.params_temporary.get("log_item_generating")
                             assistant_message = self.hist_add_ai_response(
@@ -1108,7 +1119,7 @@ async def process_llm_result_tools(self, llm_result: LLMResult):
         if (
             llm_result.mode == "responses"
             and llm_result.response
-            and extract_tools.json_parse_dirty(llm_result.response) is None
+            and extract_tools.extract_tool_request(llm_result.response) is None
         ):
             return llm_result.response
         return await self.process_tools(llm_result.response)
@@ -1122,6 +1133,13 @@ async def _execute_tool_request(
         responses_item_factory: Callable[[Any], dict[str, Any]] | None = None,
     ):
         raw_tool_name = raw_tool_name or tool_name
+        tool_args = extract_tools.sanitize_tool_args(tool_args or {})
+        duplicate_final = self._handle_duplicate_completed_tool_request(
+            tool_name, tool_args
+        )
+        if duplicate_final:
+            return duplicate_final
+
         tool_method = None
         tool = None
 
@@ -1197,6 +1215,23 @@ async def _execute_tool_request(
             await tool.after_execution(response)
             await self.handle_intervention()
 
+            if not self._is_error_tool_response(response):
+                self._record_successful_tool_request(tool_name, tool_args, response)
+                completed_message = self._tool_completion_final_message(
+                    tool_name, tool_args, response
+                )
+                if completed_message:
+                    PrintStyle(font_color="green", padding=True).print(
+                        completed_message
+                    )
+                    self.context.log.log(
+                        type="response",
+                        heading=f"{self.agent_name}: Tool action completed",
+                        content=completed_message,
+                    )
+                    self._clear_responses_pending_state()
+                    return completed_message
+
             if response.break_loop:
                 self._clear_responses_pending_state()
                 return response.message
@@ -1387,10 +1422,254 @@ def _clear_responses_pending_state(self) -> None:
             state.pop("previous_response_id", None)
             self.set_data(Agent.DATA_NAME_RESPONSES_STATE, state)
 
+    def _handle_reasoning_only_result(
+        self, llm_result: LLMResult
+    ) -> tuple[bool, str | None]:
+        if not self._is_reasoning_only_result(llm_result):
+            return False, None
+
+        attempts = int(
+            self.loop_data.params_persistent.get(REASONING_ONLY_REPAIR_ATTEMPTS_KEY, 0)
+        ) + 1
+        self.loop_data.params_persistent[REASONING_ONLY_REPAIR_ATTEMPTS_KEY] = attempts
+
+        if attempts > MAX_REASONING_ONLY_REPAIR_ATTEMPTS:
+            final_message = self.read_prompt(
+                "fw.msg_reasoning_only_failed.md",
+                attempts=MAX_REASONING_ONLY_REPAIR_ATTEMPTS,
+            )
+            PrintStyle(font_color="red", padding=True).print(final_message)
+            self.context.log.log(
+                type="response",
+                heading=f"{self.agent_name}: Local model tool-call failure",
+                content=final_message,
+            )
+            return True, final_message
+
+        warning_msg = self.read_prompt("fw.msg_reasoning_only.md")
+        wmsg = self.hist_add_warning(message=warning_msg)
+        PrintStyle(font_color="orange", padding=True).print(warning_msg)
+        self.context.log.log(type="warning", content=warning_msg, id=wmsg.id)
+        return True, None
+
+    def _is_reasoning_only_result(self, llm_result: LLMResult) -> bool:
+        return (
+            not (llm_result.response or "").strip()
+            and bool((llm_result.reasoning or "").strip())
+            and not llm_result.function_calls
+            and not llm_result.builtin_items
+        )
+
+    def _handle_repeated_response_result(
+        self, llm_result: LLMResult
+    ) -> tuple[bool, str | None]:
+        last_tool_result = self._last_successful_tool_result_summary()
+        if last_tool_result:
+            final_message = self.read_prompt(
+                "fw.msg_repeat_completed.md",
+                last_tool_result=last_tool_result,
+            )
+            PrintStyle(font_color="green", padding=True).print(final_message)
+            self.context.log.log(
+                type="response",
+                heading=f"{self.agent_name}: Completed repeated action",
+                content=final_message,
+            )
+            return True, final_message
+
+        attempts = int(
+            self.loop_data.params_persistent.get(REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY, 0)
+        ) + 1
+        self.loop_data.params_persistent[REPEATED_RESPONSE_REPAIR_ATTEMPTS_KEY] = attempts
+
+        if attempts > MAX_REPEATED_RESPONSE_REPAIR_ATTEMPTS:
+            final_message = self.read_prompt(
+                "fw.msg_repeat_failed.md",
+                attempts=MAX_REPEATED_RESPONSE_REPAIR_ATTEMPTS,
+                last_tool_result=(
+                    last_tool_result
+                    or "No successful tool result was available before the repeated output."
+                ),
+            )
+            PrintStyle(font_color="red", padding=True).print(final_message)
+            self.context.log.log(
+                type="response",
+                heading=f"{self.agent_name}: Repeated response stopped",
+                content=final_message,
+            )
+            return True, final_message
+
+        warning_msg = self.read_prompt("fw.msg_repeat.md")
+        wmsg = self.hist_add_warning(message=warning_msg)
+        PrintStyle(font_color="orange", padding=True).print(warning_msg)
+        self.context.log.log(type="warning", content=warning_msg, id=wmsg.id)
+        return True, None
+
+    def _last_successful_tool_result_summary(self) -> str | None:
+        for message in reversed(self.history.all_messages()):
+            if message.ai:
+                continue
+            if not isinstance(message.content, dict):
+                return None
+            if "user_message" in message.content:
+                return None
+            tool_name = str(message.content.get("tool_name") or "").strip()
+            tool_result = str(message.content.get("tool_result") or "").strip()
+            if not tool_name or not tool_result:
+                continue
+            if tool_result.lower().startswith("error"):
+                continue
+            return (
+                "Last completed tool result:\n"
+                f"{tool_name}: {truncate_text(tool_result, 1000)}"
+            )
+        return None
+
+    def _handle_duplicate_completed_tool_request(
+        self, tool_name: str, tool_args: dict
+    ) -> str | None:
+        if not self._is_duplicate_completed_tool_request(tool_name, tool_args):
+            return None
+
+        last = self.loop_data.params_persistent.get(LAST_SUCCESSFUL_TOOL_REQUEST_KEY)
+        last_tool_result = ""
+        if isinstance(last, dict):
+            last_tool_result = str(last.get("last_tool_result") or "")
+        if not last_tool_result:
+            last_tool_result = (
+                self._last_successful_tool_result_summary()
+                or "The previous tool action completed successfully."
+            )
+        final_message = self.read_prompt(
+            "fw.msg_repeat_completed.md",
+            last_tool_result=last_tool_result,
+        )
+        PrintStyle(font_color="green", padding=True).print(final_message)
+        self.context.log.log(
+            type="response",
+            heading=f"{self.agent_name}: Duplicate tool action stopped",
+            content=final_message,
+        )
+        self._clear_responses_pending_state()
+        return final_message
+
+    def _record_successful_tool_request(
+        self, tool_name: str, tool_args: dict, response: Any
+    ) -> None:
+        last_tool_result = self._last_successful_tool_result_summary() or (
+            "Last completed tool result:\n"
+            f"{tool_name}: {truncate_text(str(getattr(response, 'message', '')), 1000)}"
+        )
+        self.loop_data.params_persistent[LAST_SUCCESSFUL_TOOL_REQUEST_KEY] = {
+            "tool_name": tool_name,
+            "fingerprint": self._tool_request_fingerprint(tool_name, tool_args),
+            "completion_key": self._tool_completion_key(tool_name, tool_args, response),
+            "open_in_canvas": self._tool_open_in_canvas(tool_args, response),
+            "last_tool_result": last_tool_result,
+        }
+
+    def _is_duplicate_completed_tool_request(self, tool_name: str, tool_args: dict) -> bool:
+        last = self.loop_data.params_persistent.get(LAST_SUCCESSFUL_TOOL_REQUEST_KEY)
+        if not isinstance(last, dict):
+            return False
+        if tool_name != str(last.get("tool_name") or ""):
+            return False
+
+        if self._tool_request_fingerprint(tool_name, tool_args) == last.get(
+            "fingerprint"
+        ):
+            return True
+
+        completion_key = self._tool_completion_key(tool_name, tool_args)
+        if not completion_key or completion_key != last.get("completion_key"):
+            return False
+        return bool(last.get("open_in_canvas")) or self._tool_open_in_canvas(tool_args)
+
+    def _tool_completion_final_message(
+        self, tool_name: str, tool_args: dict, response: Any
+    ) -> str | None:
+        if not self._tool_open_in_canvas(tool_args, response):
+            return None
+        completion_key = self._tool_completion_key(tool_name, tool_args, response)
+        if not completion_key:
+            return None
+        if tool_name == "text_editor":
+            path = self._tool_path(tool_args, response)
+            return self.read_prompt(
+                "fw.msg_tool_completed.md",
+                message=f"The document was saved and opened in the canvas: {path}",
+            )
+        return None
+
+    def _tool_request_fingerprint(self, tool_name: str, tool_args: dict) -> str:
+        try:
+            serialized_args = json.dumps(
+                extract_tools.sanitize_tool_args(tool_args or {}),
+                sort_keys=True,
+                ensure_ascii=False,
+                default=str,
+            )
+        except Exception:
+            serialized_args = str(tool_args)
+        return f"{tool_name}:{serialized_args}"
+
+    def _tool_completion_key(
+        self, tool_name: str, tool_args: dict, response: Any | None = None
+    ) -> str:
+        action = self._tool_action(tool_args, response)
+        path = self._tool_path(tool_args, response)
+        if tool_name == "text_editor" and action in {"write", "patch"} and path:
+            return f"{tool_name}:{action}:{path}"
+        return ""
+
+    def _tool_action(self, tool_args: dict, response: Any | None = None) -> str:
+        additional = getattr(response, "additional", None) if response else None
+        if not isinstance(additional, dict):
+            additional = {}
+        action = additional.get("action") or tool_args.get("action") or tool_args.get(
+            "method"
+        )
+        return str(action or "").strip()
+
+    def _tool_path(self, tool_args: dict, response: Any | None = None) -> str:
+        additional = getattr(response, "additional", None) if response else None
+        if not isinstance(additional, dict):
+            additional = {}
+        path = additional.get("path") or tool_args.get("path") or ""
+        return str(path or "").strip()
+
+    def _tool_open_in_canvas(
+        self, tool_args: dict, response: Any | None = None
+    ) -> bool:
+        additional = getattr(response, "additional", None) if response else None
+        if not isinstance(additional, dict):
+            additional = {}
+        value = (
+            additional.get("open_in_canvas")
+            if "open_in_canvas" in additional
+            else tool_args.get("open_in_canvas")
+        )
+        if value is None:
+            value = tool_args.get("open_canvas") or tool_args.get("open_document")
+        return self._truthy(value)
+
+    def _truthy(self, value: Any) -> bool:
+        if isinstance(value, bool):
+            return value
+        if value is None:
+            return False
+        if isinstance(value, (int, float)):
+            return value != 0
+        return str(value).strip().lower() in {"1", "true", "yes", "y", "on"}
+
+    def _is_error_tool_response(self, response: Any) -> bool:
+        message = str(getattr(response, "message", "") or "").strip().lower()
+        return message.startswith("error")
+
     @extension.extensible
     async def process_tools(self, msg: str):
         # search for tool usage requests in agent message
-        tool_request = extract_tools.json_parse_dirty(msg)
+        tool_request = extract_tools.extract_tool_request(msg)
 
         raw_tool_name = ""
         tool_args = {}
@@ -1407,6 +1686,12 @@ async def process_tools(self, msg: str):
 
         if tool_request is not None:
             tool_name = raw_tool_name  # Initialize tool_name with raw_tool_name
+            duplicate_final = self._handle_duplicate_completed_tool_request(
+                tool_name, tool_args
+            )
+            if duplicate_final:
+                return duplicate_final
+
             tool_method = None  # Initialize tool_method
 
             tool = None  # Initialize tool to None
@@ -1470,7 +1755,27 @@ async def process_tools(self, msg: str):
                     await tool.after_execution(response)
                     await self.handle_intervention()
 
+                    if not self._is_error_tool_response(response):
+                        self._record_successful_tool_request(
+                            tool_name, tool_args, response
+                        )
+                        completed_message = self._tool_completion_final_message(
+                            tool_name, tool_args, response
+                        )
+                        if completed_message:
+                            PrintStyle(font_color="green", padding=True).print(
+                                completed_message
+                            )
+                            self.context.log.log(
+                                type="response",
+                                heading=f"{self.agent_name}: Tool action completed",
+                                content=completed_message,
+                            )
+                            self._clear_responses_pending_state()
+                            return completed_message
+
                     if response.break_loop:
+                        self._clear_responses_pending_state()
                         return response.message
                 finally:
                     self.loop_data.current_tool = None
diff --git a/extensions/python/system_prompt/AGENTS.md b/extensions/python/system_prompt/AGENTS.md
index bc4ee64483..e1fb03684d 100644
--- a/extensions/python/system_prompt/AGENTS.md
+++ b/extensions/python/system_prompt/AGENTS.md
@@ -6,7 +6,7 @@
 
 ## Ownership
 
-- Ordered Python files own main, tools, MCP, secrets, skills, and project prompt sections.
+- Ordered Python files own main, small-local-model, tools, MCP, secrets, skills, and project prompt sections.
 
 ## Local Contracts
 
diff --git a/extensions/python/system_prompt/_10_small_local_model_prompt.py b/extensions/python/system_prompt/_10_small_local_model_prompt.py
new file mode 100644
index 0000000000..e9c40b0827
--- /dev/null
+++ b/extensions/python/system_prompt/_10_small_local_model_prompt.py
@@ -0,0 +1,87 @@
+import re
+from typing import Any
+from urllib.parse import urlparse
+
+from helpers.extension import Extension, extensible
+from agent import Agent, LoopData
+
+
+LOCAL_PROVIDER_IDS = {"ollama", "lm_studio"}
+LOCAL_HOSTS = {"localhost", "127.0.0.1", "0.0.0.0", "::1", "host.docker.internal"}
+THINKING_MODEL_MARKERS = (
+    "qwen3",
+    "qwen-3",
+    "qwq",
+    "deepseek-r1",
+    "deepseek_r1",
+    "gpt-oss",
+    "gemma4",
+    "gemma-4",
+)
+SMALL_LOCAL_MAX_B = 14.0
+MODEL_SIZE_RE = re.compile(r"(?<!\d)(\d+(?:\.\d+)?)[-_ ]?b(?![a-z])", re.IGNORECASE)
+
+
+class SmallLocalModelPrompt(Extension):
+    async def execute(
+        self,
+        system_prompt: list[str] = [],
+        loop_data: LoopData = LoopData(),
+        **kwargs: Any,
+    ):
+        if not self.agent:
+            return
+        prompt = await build_prompt(self.agent)
+        if prompt:
+            system_prompt.append(prompt)
+
+
+@extensible
+async def build_prompt(agent: Agent) -> str:
+    try:
+        from plugins._model_config.helpers.model_config import get_chat_model_config
+
+        chat_cfg = get_chat_model_config(agent)
+    except Exception:
+        return ""
+
+    if should_include_small_local_prompt(chat_cfg):
+        return agent.read_prompt("agent.system.main.small_local_model.md")
+    return ""
+
+
+def should_include_small_local_prompt(chat_cfg: dict[str, Any] | None) -> bool:
+    if not isinstance(chat_cfg, dict):
+        return False
+
+    provider = str(chat_cfg.get("provider") or "").lower().strip()
+    name = str(chat_cfg.get("name") or "").lower().strip()
+    api_base = str(chat_cfg.get("api_base") or "").lower().strip()
+
+    is_local = provider in LOCAL_PROVIDER_IDS or _is_local_api_base(api_base)
+    if not is_local:
+        return False
+
+    if any(marker in name for marker in THINKING_MODEL_MARKERS):
+        return True
+
+    size_b = _model_size_billions(name)
+    return size_b is not None and size_b <= SMALL_LOCAL_MAX_B
+
+
+def _is_local_api_base(api_base: str) -> bool:
+    if not api_base:
+        return False
+    parsed = urlparse(api_base if "://" in api_base else f"http://{api_base}")
+    host = (parsed.hostname or "").lower()
+    return host in LOCAL_HOSTS
+
+
+def _model_size_billions(model_name: str) -> float | None:
+    match = MODEL_SIZE_RE.search(model_name)
+    if not match:
+        return None
+    try:
+        return float(match.group(1))
+    except ValueError:
+        return None
diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py
index 39e8378553..28db4e460f 100644
--- a/helpers/extract_tools.py
+++ b/helpers/extract_tools.py
@@ -2,8 +2,60 @@
 from .dirty_json import DirtyJson
 import regex, re
 from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility
+from helpers.strings import sanitize_string
 from typing import Any
 
+TOOL_NAME_KEYS = ("tool_name", "tool", "toolName", "function_name", "functionName")
+TOOL_ARGS_KEYS = ("tool_args", "args", "arguments", "parameters", "input")
+META_KEYS = {
+    "thought",
+    "thoughts",
+    "headline",
+    "reasoning",
+    "analysis",
+    "plan",
+    "intent",
+    "title",
+}
+RESPONSE_TEXT_KEYS = (
+    "text",
+    "message",
+    "response",
+    "answer",
+    "final",
+    "final_answer",
+    "content",
+)
+RESPONSE_INTENT_MARKERS = (
+    "response tool",
+    "final answer",
+    "respond",
+    "reply",
+    "answer the user",
+    "acknowledg",
+    "greeting",
+    "offer of assistance",
+)
+ACTION_INTENT_MARKERS = (
+    "text_editor",
+    "text editor",
+    "code_execution",
+    "terminal",
+    "shell command",
+    "run command",
+    "search_engine",
+    "browser",
+    "computer_use",
+    "vision_load",
+    "call_subordinate",
+    "write file",
+    "read file",
+    "edit file",
+    "create file",
+    "open in canvas",
+)
+
+
 def json_parse_dirty(json: str) -> dict[str, Any] | None:
     if not json or not isinstance(json, str):
         return None
@@ -20,6 +72,95 @@ def json_parse_dirty(json: str) -> dict[str, Any] | None:
     return None
 
 
+def extract_tool_request(content: str) -> dict[str, Any] | None:
+    """Extract and repair the first executable tool request from model text."""
+
+    for tool_request in iter_json_dicts(content):
+        try:
+            tool_name, tool_args = normalize_tool_request(tool_request)
+            return {"tool_name": tool_name, "tool_args": tool_args}
+        except ValueError:
+            repaired = repair_tool_request(tool_request)
+            if not repaired:
+                continue
+            try:
+                tool_name, tool_args = normalize_tool_request(repaired)
+            except ValueError:
+                continue
+            return {"tool_name": tool_name, "tool_args": tool_args}
+    return None
+
+
+def iter_json_dicts(content: str) -> list[dict[str, Any]]:
+    if not content or not isinstance(content, str):
+        return []
+
+    result: list[dict[str, Any]] = []
+    for candidate in iter_json_object_strings(content):
+        try:
+            data = DirtyJson.parse_string(candidate)
+        except Exception:
+            continue
+        if isinstance(data, dict):
+            result.append(data)
+    return result
+
+
+def iter_json_object_strings(content: str) -> list[str]:
+    if not content or not isinstance(content, str):
+        return []
+
+    strings: list[str] = []
+    index = 0
+    while index < len(content):
+        start = content.find("{", index)
+        if start == -1:
+            break
+        candidate = extract_json_root_string(content[start:])
+        if candidate:
+            strings.append(candidate)
+            index = start + len(candidate)
+        else:
+            index = start + 1
+    return strings
+
+
+def repair_tool_request(tool_request: Any) -> dict[str, Any] | None:
+    if not isinstance(tool_request, dict):
+        return None
+
+    tool_name = _first_string_value(tool_request, TOOL_NAME_KEYS)
+    raw_args = _first_value(tool_request, TOOL_ARGS_KEYS)
+    tool_args = _normalize_repaired_args(raw_args, tool_name=tool_name)
+
+    if tool_name:
+        if tool_args is None:
+            tool_args = _root_args(tool_request)
+        if tool_name == "response":
+            response_text = _response_text(tool_request, tool_args)
+            if response_text:
+                tool_args = {**tool_args, "text": response_text}
+        if not tool_args and tool_name == "response":
+            response_text = _synthesized_response_text(tool_request)
+            if response_text:
+                tool_args = {"text": response_text}
+        if isinstance(tool_args, dict):
+            return {"tool_name": tool_name, "tool_args": sanitize_tool_args(tool_args)}
+        return None
+
+    if _looks_like_final_response_intent(tool_request):
+        response_text = _response_text(tool_request, {}) or _synthesized_response_text(
+            tool_request
+        )
+        if response_text:
+            return {
+                "tool_name": "response",
+                "tool_args": sanitize_tool_args({"text": response_text}),
+            }
+
+    return None
+
+
 def normalize_tool_request(tool_request: Any) -> tuple[str, dict]:
     if not isinstance(tool_request, dict):
         raise ValueError("Tool request must be a dictionary")
@@ -33,7 +174,7 @@ def normalize_tool_request(tool_request: Any) -> tuple[str, dict]:
         tool_args = tool_request.get("args")
     if not isinstance(tool_args, dict):
         raise ValueError("Tool request must have a tool_args (type dictionary) field")
-    tool_args = dict(tool_args)
+    tool_args = sanitize_tool_args(dict(tool_args))
     if ":" in tool_name:
         tool_name, action = tool_name.split(":", 1)
         if not tool_name or not action:
@@ -45,6 +186,85 @@ def normalize_tool_request(tool_request: Any) -> tuple[str, dict]:
     return tool_name, tool_args
 
 
+def sanitize_tool_args(value: Any) -> Any:
+    if isinstance(value, str):
+        return sanitize_string(value)
+    if isinstance(value, dict):
+        return {
+            sanitize_string(key) if isinstance(key, str) else key: sanitize_tool_args(item)
+            for key, item in value.items()
+        }
+    if isinstance(value, list):
+        return [sanitize_tool_args(item) for item in value]
+    if isinstance(value, tuple):
+        return tuple(sanitize_tool_args(item) for item in value)
+    return value
+
+
+def _first_value(data: dict[str, Any], keys: tuple[str, ...]) -> Any:
+    for key in keys:
+        if key in data:
+            return data.get(key)
+    return None
+
+
+def _first_string_value(data: dict[str, Any], keys: tuple[str, ...]) -> str:
+    value = _first_value(data, keys)
+    return value.strip() if isinstance(value, str) else ""
+
+
+def _normalize_repaired_args(raw_args: Any, *, tool_name: str) -> dict[str, Any] | None:
+    if isinstance(raw_args, dict):
+        return dict(raw_args)
+    if isinstance(raw_args, str) and tool_name == "response":
+        text = raw_args.strip()
+        return {"text": text} if text else {}
+    if raw_args is None:
+        return None
+    return None
+
+
+def _root_args(data: dict[str, Any]) -> dict[str, Any]:
+    excluded = set(TOOL_NAME_KEYS) | set(TOOL_ARGS_KEYS) | META_KEYS
+    return {key: value for key, value in data.items() if key not in excluded}
+
+
+def _response_text(data: dict[str, Any], tool_args: dict[str, Any]) -> str:
+    for source in (tool_args, data):
+        value = _first_value(source, RESPONSE_TEXT_KEYS)
+        if isinstance(value, str) and value.strip():
+            return value.strip()
+    return ""
+
+
+def _synthesized_response_text(data: dict[str, Any]) -> str:
+    text = _intent_text(data)
+    lowered = text.lower()
+    if any(marker in lowered for marker in ("hi", "hello", "greeting")):
+        return "Hi. How can I help?"
+    if "thank" in lowered:
+        return "You're welcome."
+    return ""
+
+
+def _looks_like_final_response_intent(data: dict[str, Any]) -> bool:
+    text = _intent_text(data)
+    lowered = text.lower()
+    if any(marker in lowered for marker in ACTION_INTENT_MARKERS):
+        return False
+    return any(marker in lowered for marker in RESPONSE_INTENT_MARKERS)
+
+
+def _intent_text(value: Any) -> str:
+    if isinstance(value, dict):
+        return " ".join(_intent_text(item) for item in value.values())
+    if isinstance(value, list):
+        return " ".join(_intent_text(item) for item in value)
+    if value is None:
+        return ""
+    return str(value)
+
+
 def extract_json_root_string(content: str) -> str | None:
     if not content or not isinstance(content, str):
         return None
diff --git a/helpers/extract_tools.py.dox.md b/helpers/extract_tools.py.dox.md
index 9c65b8ced7..b621335c3d 100644
--- a/helpers/extract_tools.py.dox.md
+++ b/helpers/extract_tools.py.dox.md
@@ -12,7 +12,12 @@
 - `extract_tools.py.dox.md` owns durable notes about responsibilities, contracts, side effects, and verification for that implementation.
 - Top-level functions:
 - `json_parse_dirty(json: str) -> dict[str, Any] | None`
+- `extract_tool_request(content: str) -> dict[str, Any] | None`
+- `iter_json_dicts(content: str) -> list[dict[str, Any]]`
+- `iter_json_object_strings(content: str) -> list[str]`
+- `repair_tool_request(tool_request: Any) -> dict[str, Any] | None`
 - `normalize_tool_request(tool_request: Any) -> tuple[str, dict]`
+- `sanitize_tool_args(value: Any) -> Any`
 - `extract_json_root_string(content: str) -> str | None`
 - `extract_json_object_string(content)`
 - `extract_json_string(content)`
@@ -21,13 +26,16 @@
 ## Runtime Contracts
 
 - Helper modules own reusable framework APIs and must preserve public callers unless all callers, tests, and docs are updated together.
+- `extract_tool_request` is the executable-call entrypoint for model text; it may repair common local-model shapes into canonical `{"tool_name": ..., "tool_args": ...}` requests.
+- Repair is intentionally narrow: preserve explicit tool names, move root-level args into `tool_args`, accept common argument aliases, and synthesize final `response` calls only for clear non-action response intent.
+- Normalized tool arguments are sanitized recursively so invalid Unicode surrogates from model output do not crash UTF-8 file writes, logs, or tool execution.
 - Update this file whenever public functions, classes, persistence behavior, path/security assumptions, side effects, or cross-module contracts change.
 - Observed side-effect areas: settings/state persistence.
 - Imported dependency areas include: `dirty_json`, `helpers.modules`, `re`, `regex`, `typing`.
 
 ## Key Concepts
 
-- Important called helpers/classes observed in the source: `extract_json_object_string`, `content.find`, `DirtyJson`, `content.rfind`, `regex.search`, `re.sub`, `json.strip`, `ValueError`, `tool_name.split`, `parser.parse`, `match.group`, `match.group.replace`, `DirtyJson.parse_string`.
+- Important called helpers/classes observed in the source: `extract_json_object_string`, `extract_json_root_string`, `content.find`, `DirtyJson`, `content.rfind`, `regex.search`, `re.sub`, `json.strip`, `ValueError`, `tool_name.split`, `parser.parse`, `match.group`, `match.group.replace`, `DirtyJson.parse_string`.
 - Keep request/response, tool, or helper semantics documented here at the same time as source changes.
 
 ## Work Guidance
diff --git a/helpers/litellm_transport.py b/helpers/litellm_transport.py
index de5a2b884a..c56079ccd0 100644
--- a/helpers/litellm_transport.py
+++ b/helpers/litellm_transport.py
@@ -457,7 +457,13 @@ def prepare_kwargs(
         explicit_prompt_caching: bool = False,
     ) -> dict[str, Any]:
         chat_kwargs = dict(kwargs)
+        response_function_tools = chat_kwargs.pop("a0_responses_function_tools", None)
         _drop_internal_transport_kwargs(chat_kwargs)
+        chat_tools = ChatCompletionsTransport.merge_chat_tools(
+            chat_kwargs.get("tools"), response_function_tools
+        )
+        if _has_tools(chat_tools):
+            chat_kwargs["tools"] = chat_tools
         if not _has_tools(chat_kwargs.get("tools")):
             chat_kwargs.pop("tool_choice", None)
             chat_kwargs.pop("parallel_tool_calls", None)
@@ -488,8 +494,86 @@ def parse(chunk: Any) -> ChatChunk:
         reasoning_delta = _get_value(delta, "reasoning_content") or _get_value(
             message, "reasoning_content"
         ) or ""
+        tool_calls = _get_value(delta, "tool_calls") or _get_value(
+            message, "tool_calls"
+        )
+        if tool_calls and not response_delta:
+            response_delta = ChatCompletionsTransport.tool_calls_text(tool_calls)
         return {"reasoning_delta": reasoning_delta, "response_delta": response_delta}
 
+    @staticmethod
+    def merge_chat_tools(existing_tools: Any, response_tools: Any) -> Any:
+        tools: list[Any] = []
+        if isinstance(existing_tools, list):
+            tools.extend(existing_tools)
+        elif existing_tools:
+            tools.append(existing_tools)
+        tools.extend(ChatCompletionsTransport.chat_tools_from_response(response_tools))
+        return tools
+
+    @staticmethod
+    def chat_tools_from_response(tools: Any) -> list[Any]:
+        chat_tools: list[Any] = []
+        for tool in _as_list(tools):
+            if not isinstance(tool, dict):
+                chat_tools.append(tool)
+                continue
+            if tool.get("type") == "function" and "function" not in tool:
+                function = {
+                    "name": tool.get("name", ""),
+                    "description": tool.get("description", ""),
+                    "parameters": tool.get("parameters", {}),
+                }
+                if "strict" in tool:
+                    function["strict"] = tool["strict"]
+                chat_tools.append({"type": "function", "function": function})
+            else:
+                chat_tools.append(dict(tool))
+        return chat_tools
+
+    @staticmethod
+    def tool_calls_text(tool_calls: Any) -> str:
+        calls = [
+            ChatCompletionsTransport.tool_call_object(tool_call)
+            for tool_call in _as_list(tool_calls)
+        ]
+        calls = [call for call in calls if call]
+        if not calls:
+            return ""
+        if len(calls) == 1:
+            return json.dumps(calls[0])
+        return json.dumps(
+            {"tool_name": "parallel_tool_calls", "tool_args": {"calls": calls}}
+        )
+
+    @staticmethod
+    def tool_call_object(tool_call: Any) -> dict[str, Any]:
+        if not isinstance(tool_call, dict):
+            tool_call = _object_to_dict(tool_call)
+        function = _get_value(tool_call, "function") or {}
+        if not isinstance(function, dict):
+            function = _object_to_dict(function)
+        name = _get_value(function, "name") or _get_value(tool_call, "name")
+        if not name:
+            return {}
+        raw_arguments = _get_value(function, "arguments") or _get_value(
+            tool_call, "arguments"
+        )
+        if isinstance(raw_arguments, str):
+            try:
+                args = json.loads(raw_arguments or "{}")
+            except Exception:
+                args = {"arguments": raw_arguments}
+        elif isinstance(raw_arguments, dict):
+            args = dict(raw_arguments)
+        elif raw_arguments is None:
+            args = {}
+        else:
+            args = {"arguments": raw_arguments}
+        if not isinstance(args, dict):
+            args = {"arguments": args}
+        return {"tool_name": str(name), "tool_args": args}
+
 
 class ResponsesTransport:
     @classmethod
diff --git a/helpers/litellm_transport.py.dox.md b/helpers/litellm_transport.py.dox.md
new file mode 100644
index 0000000000..787ebdb009
--- /dev/null
+++ b/helpers/litellm_transport.py.dox.md
@@ -0,0 +1,36 @@
+# litellm_transport.py DOX
+
+## Purpose
+
+- Own the LiteLLM transport layer that normalizes Agent Zero chat and Responses API calls.
+- Provide fallback between Responses and chat-completions modes while preserving A0 tool-call semantics.
+
+## Ownership
+
+- `LiteLLMTransport` selects the active transport, performs sync/async complete and stream calls, and exposes the last `LLMResult`.
+- `TransportPolicy` owns mode selection, Responses fallback, and provider capability cache behavior.
+- `ChatCompletionsTransport` owns chat-completions request preparation and response parsing.
+- `ResponsesTransport` owns Responses API request preparation, output parsing, and conversion between chat messages and Responses input items.
+- `ResponsesEventParser` owns stateful parsing of streamed Responses events.
+
+## Runtime Contracts
+
+- A0-only kwargs such as `a0_api_mode`, `a0_responses_function_tools`, and Responses state fields must not leak to LiteLLM provider calls.
+- Explicit `chat_completions` mode may receive A0-generated function tools through `a0_responses_function_tools`; these must be converted into standard chat-completions `tools`.
+- Chat-completions tool calls are converted back into canonical A0 JSON tool requests so the existing tool executor can process them.
+- Responses fallback must keep provider-state, local-state, and unsupported-capability caches bounded to transport decisions only.
+
+## Work Guidance
+
+- Keep request conversion and response parsing symmetric: if a tool schema is converted into provider format, returned tool calls must convert back into A0 format.
+- Avoid broad provider-specific branches unless LiteLLM cannot normalize the behavior.
+- Keep streaming changes explicit; chat-completions streaming tool-call deltas require stateful accumulation before they can replace non-stream tool-call turns.
+
+## Verification
+
+- Run focused transport tests in `tests/test_stream_tool_early_stop.py` after changing request preparation, parsing, fallback, or cache behavior.
+- For local Ollama tool-call changes, verify both final-response and executable-tool prompts when the model is available.
+
+## Child DOX Index
+
+No child DOX files.
diff --git a/models.py b/models.py
index 811fa31e0c..3b0588affc 100644
--- a/models.py
+++ b/models.py
@@ -266,6 +266,19 @@ def output(self) -> ChatChunk:
         return ChatChunk(response_delta=response, reasoning_delta=reasoning)
 
 
+def _force_non_streaming_chat_tools(call_kwargs: dict[str, Any]) -> bool:
+    mode = str(call_kwargs.get("a0_api_mode") or "").lower().strip()
+    if mode not in {
+        "chat",
+        "chat_completion",
+        "chat_completions",
+        "completion",
+        "completions",
+    }:
+        return False
+    return bool(call_kwargs.get("a0_responses_function_tools"))
+
+
 rate_limiters: dict[str, RateLimiter] = {}
 api_keys_round_robin: dict[str, int] = {}
 
@@ -685,6 +698,8 @@ async def unified_turn(
             or response_callback is not None
             or tokens_callback is not None
         )
+        if _force_non_streaming_chat_tools(call_kwargs):
+            stream = False
         transport = LiteLLMTransport(
             model=self.model_name,
             messages=msgs_conv,
diff --git a/plugins/_text_editor/AGENTS.md b/plugins/_text_editor/AGENTS.md
index f18d552d31..96e2d07c3e 100644
--- a/plugins/_text_editor/AGENTS.md
+++ b/plugins/_text_editor/AGENTS.md
@@ -16,6 +16,7 @@
 - Preserve stale-read protection before patch operations.
 - Validate patch structures before applying edits.
 - Read back changed regions after writes or patches where the tool contract requires confirmation.
+- Sanitize invalid Unicode surrogate characters before writing text content to UTF-8 files.
 
 ## Work Guidance
 
diff --git a/plugins/_text_editor/helpers/file_ops.py b/plugins/_text_editor/helpers/file_ops.py
index 972bf9da24..a5dbb28207 100644
--- a/plugins/_text_editor/helpers/file_ops.py
+++ b/plugins/_text_editor/helpers/file_ops.py
@@ -10,6 +10,7 @@
 from typing import TypedDict
 
 from helpers import tokens
+from helpers.strings import sanitize_string
 from plugins._text_editor.helpers.context_patch import (
     apply_context_patch_with_metadata,
 )
@@ -187,6 +188,7 @@ def write_file(path: str, content: str | None) -> WriteResult:
     """Create or overwrite a file."""
     if content is None:
         content = ""
+    content = sanitize_string(content)
     path = os.path.expanduser(path)
     try:
         os.makedirs(os.path.dirname(path) or ".", exist_ok=True)
diff --git a/prompts/agent.system.main.small_local_model.md b/prompts/agent.system.main.small_local_model.md
new file mode 100644
index 0000000000..25989e7262
--- /dev/null
+++ b/prompts/agent.system.main.small_local_model.md
@@ -0,0 +1,15 @@
+## Small local model tool-call guardrail
+
+/no_think
+
+You may reason internally, but your visible assistant response must be exactly one minimal JSON object.
+
+- Do not include `thoughts`, `headline`, `analysis`, `reasoning`, or `<think>` in the visible response.
+- Every visible JSON object must include exactly the executable fields `tool_name` and `tool_args`.
+- Put all final user-facing text inside `tool_args.text` on the `response` tool.
+- If you see a warning about repeated, reasoning-only, or misformatted output, do not explain the warning. Immediately output a corrected JSON tool request.
+
+Minimal final-answer example:
+~~~json
+{"tool_name":"response","tool_args":{"text":"Hi. How can I help?"}}
+~~~
diff --git a/prompts/fw.msg_misformat.md b/prompts/fw.msg_misformat.md
index 23ae0eff16..a26cdb9ed6 100644
--- a/prompts/fw.msg_misformat.md
+++ b/prompts/fw.msg_misformat.md
@@ -1 +1 @@
-You have misformatted your message. Follow system prompt instructions on JSON message formatting precisely.
\ No newline at end of file
+You have misformatted your message. Output exactly one JSON object with `tool_name` and `tool_args`. For a final user answer, use `tool_name`: `response`. Do not output reasoning-only text, markdown fences, or a JSON object that only has `thoughts` and `headline`.
diff --git a/prompts/fw.msg_reasoning_only.md b/prompts/fw.msg_reasoning_only.md
new file mode 100644
index 0000000000..290cb86301
--- /dev/null
+++ b/prompts/fw.msg_reasoning_only.md
@@ -0,0 +1 @@
+Your previous output was reasoning only. Now output exactly one valid JSON object in the assistant response, not in the reasoning stream. The JSON object must include `tool_name` and `tool_args`. For a final answer, use `tool_name`: `response` and `tool_args`: {"text":"..."}. Do not repeat prior reasoning or explain this warning.
diff --git a/prompts/fw.msg_reasoning_only_failed.md b/prompts/fw.msg_reasoning_only_failed.md
new file mode 100644
index 0000000000..66091fdff4
--- /dev/null
+++ b/prompts/fw.msg_reasoning_only_failed.md
@@ -0,0 +1 @@
+The selected model produced reasoning-only output {{attempts}} times and did not emit a valid tool request. Select a model with stronger JSON/tool-call reliability or disable thinking for this local model, then try again.
diff --git a/prompts/fw.msg_repeat.md b/prompts/fw.msg_repeat.md
index 74da5835f6..96111d7f23 100644
--- a/prompts/fw.msg_repeat.md
+++ b/prompts/fw.msg_repeat.md
@@ -1 +1 @@
-You have sent the same message again. You have to do something else!
\ No newline at end of file
+You repeated the exact same already-processed response. Do not call the same tool with the same arguments again. If the previous tool result completed the task, immediately use `tool_name`: `response` with a brief final answer. If more work is needed, output a different valid JSON tool request with different `tool_args`.
diff --git a/prompts/fw.msg_repeat_completed.md b/prompts/fw.msg_repeat_completed.md
new file mode 100644
index 0000000000..b53fde50e2
--- /dev/null
+++ b/prompts/fw.msg_repeat_completed.md
@@ -0,0 +1,3 @@
+Done. The previous tool action already completed, so Agent Zero stopped the duplicate tool call.
+
+{{last_tool_result}}
diff --git a/prompts/fw.msg_repeat_failed.md b/prompts/fw.msg_repeat_failed.md
new file mode 100644
index 0000000000..020c69804e
--- /dev/null
+++ b/prompts/fw.msg_repeat_failed.md
@@ -0,0 +1,3 @@
+The selected model repeated the exact same already-processed response {{attempts}} times, so Agent Zero stopped the loop instead of repeating the action again.
+
+{{last_tool_result}}
diff --git a/prompts/fw.msg_tool_completed.md b/prompts/fw.msg_tool_completed.md
new file mode 100644
index 0000000000..aef1c8eb3e
--- /dev/null
+++ b/prompts/fw.msg_tool_completed.md
@@ -0,0 +1 @@
+Done. {{message}}
diff --git a/tests/test_small_local_model_prompt.py b/tests/test_small_local_model_prompt.py
new file mode 100644
index 0000000000..154036d371
--- /dev/null
+++ b/tests/test_small_local_model_prompt.py
@@ -0,0 +1,369 @@
+import sys
+import types
+from pathlib import Path
+
+import pytest
+
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+    sys.path.insert(0, str(PROJECT_ROOT))
+
+import agent as agent_module
+from agent import Agent, LoopData
+from extensions.python.system_prompt import _10_small_local_model_prompt as small_prompt
+from helpers import history
+from helpers.llm_result import LLMResult
+
+
+def test_small_local_prompt_detects_qwen_ollama():
+    assert small_prompt.should_include_small_local_prompt(
+        {
+            "provider": "ollama",
+            "name": "qwen3.5:9b",
+            "api_base": "http://host.docker.internal:11434",
+        }
+    )
+
+
+def test_small_local_prompt_detects_local_small_model_by_size():
+    assert small_prompt.should_include_small_local_prompt(
+        {
+            "provider": "other",
+            "name": "llama3.2:8b",
+            "api_base": "http://127.0.0.1:1234/v1",
+        }
+    )
+
+
+def test_small_local_prompt_ignores_cloud_and_large_local_models():
+    assert not small_prompt.should_include_small_local_prompt(
+        {
+            "provider": "openrouter",
+            "name": "qwen3.5:9b",
+            "api_base": "",
+        }
+    )
+    assert not small_prompt.should_include_small_local_prompt(
+        {
+            "provider": "ollama",
+            "name": "llama3.3:70b",
+            "api_base": "http://localhost:11434",
+        }
+    )
+
+
+def test_small_local_prompt_forbids_visible_planning_fields():
+    prompt = (PROJECT_ROOT / "prompts" / "agent.system.main.small_local_model.md").read_text(
+        encoding="utf-8"
+    )
+
+    assert "Do not include `thoughts`, `headline`" in prompt
+    assert "exactly the executable fields `tool_name` and `tool_args`" in prompt
+
+
+@pytest.mark.asyncio
+async def test_small_local_prompt_builds_from_model_config(monkeypatch):
+    from plugins._model_config.helpers import model_config
+
+    class DummyAgent:
+        def read_prompt(self, file: str, **kwargs):
+            return f"prompt:{file}"
+
+    monkeypatch.setattr(
+        model_config,
+        "get_chat_model_config",
+        lambda agent: {
+            "provider": "ollama",
+            "name": "qwen3.5:9b",
+            "api_base": "http://host.docker.internal:11434",
+        },
+    )
+
+    assert await small_prompt.build_prompt(DummyAgent()) == (
+        "prompt:agent.system.main.small_local_model.md"
+    )
+
+
+def test_reasoning_only_guard_retries_then_fails(monkeypatch):
+    monkeypatch.setattr(
+        agent_module,
+        "PrintStyle",
+        lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None),
+    )
+
+    log_entries = []
+
+    class DummyLog:
+        def log(self, **kwargs):
+            log_entries.append(kwargs)
+            return types.SimpleNamespace(id=f"log-{len(log_entries)}")
+
+    test_agent = object.__new__(Agent)
+    test_agent.loop_data = LoopData()
+    test_agent.context = types.SimpleNamespace(log=DummyLog())
+    test_agent.agent_name = "A0"
+
+    warnings = []
+
+    def read_prompt(file: str, **kwargs):
+        if file == "fw.msg_reasoning_only.md":
+            return "repair tool request"
+        if file == "fw.msg_reasoning_only_failed.md":
+            return f"failed after {kwargs['attempts']}"
+        return file
+
+    def hist_add_warning(message: str):
+        warnings.append(message)
+        return types.SimpleNamespace(id=f"warn-{len(warnings)}")
+
+    test_agent.read_prompt = read_prompt
+    test_agent.hist_add_warning = hist_add_warning
+
+    result = LLMResult.from_chat(response="", reasoning="thinking but no tool")
+
+    assert Agent._handle_reasoning_only_result(test_agent, result) == (True, None)
+    assert Agent._handle_reasoning_only_result(test_agent, result) == (True, None)
+    assert Agent._handle_reasoning_only_result(test_agent, result) == (
+        True,
+        "failed after 2",
+    )
+    assert warnings == ["repair tool request", "repair tool request"]
+    assert log_entries[-1]["type"] == "response"
+
+
+def test_reasoning_only_guard_ignores_valid_response():
+    test_agent = object.__new__(Agent)
+    test_agent.loop_data = LoopData()
+
+    result = LLMResult.from_chat(
+        response='{"tool_name":"response","tool_args":{"text":"ok"}}',
+        reasoning="thinking before valid response",
+    )
+
+    assert Agent._handle_reasoning_only_result(test_agent, result) == (False, None)
+
+
+def test_repeated_response_guard_finalizes_completed_duplicate_tool_call(monkeypatch):
+    monkeypatch.setattr(
+        agent_module,
+        "PrintStyle",
+        lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None),
+    )
+
+    log_entries = []
+
+    class DummyLog:
+        def log(self, **kwargs):
+            log_entries.append(kwargs)
+            return types.SimpleNamespace(id=f"log-{len(log_entries)}")
+
+    test_agent = object.__new__(Agent)
+    test_agent.loop_data = LoopData()
+    test_agent.context = types.SimpleNamespace(log=DummyLog())
+    test_agent.agent_name = "A0"
+    test_agent.history = history.History(test_agent)
+    test_agent.history.add_message(
+        False,
+        {
+            "tool_name": "text_editor",
+            "tool_result": "/a0/usr/workdir/todos.md written 9 lines",
+        },
+    )
+
+    warnings = []
+
+    def read_prompt(file: str, **kwargs):
+        if file == "fw.msg_repeat_completed.md":
+            return f"completed: {kwargs['last_tool_result']}"
+        if file == "fw.msg_repeat.md":
+            return "repeat repair"
+        if file == "fw.msg_repeat_failed.md":
+            return f"failed after {kwargs['attempts']}: {kwargs['last_tool_result']}"
+        return file
+
+    def hist_add_warning(message: str):
+        warnings.append(message)
+        return types.SimpleNamespace(id=f"warn-{len(warnings)}")
+
+    test_agent.read_prompt = read_prompt
+    test_agent.hist_add_warning = hist_add_warning
+
+    result = LLMResult.from_chat(
+        response='{"tool_name":"text_editor","tool_args":{"action":"write"}}'
+    )
+
+    handled, final = Agent._handle_repeated_response_result(test_agent, result)
+
+    assert handled is True
+    assert final is not None
+    assert "completed:" in final
+    assert "text_editor: /a0/usr/workdir/todos.md written 9 lines" in final
+    assert warnings == []
+    assert log_entries[-1]["type"] == "response"
+
+
+def test_repeated_response_guard_retries_then_stops_without_completed_tool(monkeypatch):
+    monkeypatch.setattr(
+        agent_module,
+        "PrintStyle",
+        lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None),
+    )
+
+    log_entries = []
+
+    class DummyLog:
+        def log(self, **kwargs):
+            log_entries.append(kwargs)
+            return types.SimpleNamespace(id=f"log-{len(log_entries)}")
+
+    test_agent = object.__new__(Agent)
+    test_agent.loop_data = LoopData()
+    test_agent.context = types.SimpleNamespace(log=DummyLog())
+    test_agent.agent_name = "A0"
+    test_agent.history = history.History(test_agent)
+
+    warnings = []
+
+    def read_prompt(file: str, **kwargs):
+        if file == "fw.msg_repeat.md":
+            return "repeat repair"
+        if file == "fw.msg_repeat_failed.md":
+            return f"failed after {kwargs['attempts']}: {kwargs['last_tool_result']}"
+        return file
+
+    def hist_add_warning(message: str):
+        warnings.append(message)
+        return types.SimpleNamespace(id=f"warn-{len(warnings)}")
+
+    test_agent.read_prompt = read_prompt
+    test_agent.hist_add_warning = hist_add_warning
+
+    result = LLMResult.from_chat(response="same non-tool text")
+
+    assert Agent._handle_repeated_response_result(test_agent, result) == (True, None)
+    assert Agent._handle_repeated_response_result(test_agent, result) == (True, None)
+    handled, final = Agent._handle_repeated_response_result(test_agent, result)
+
+    assert handled is True
+    assert final is not None
+    assert "failed after 2" in final
+    assert "No successful tool result was available" in final
+    assert warnings == ["repeat repair", "repeat repair"]
+    assert log_entries[-1]["type"] == "response"
+
+
+def test_successful_canvas_text_editor_write_records_semantic_duplicate(monkeypatch):
+    monkeypatch.setattr(
+        agent_module,
+        "PrintStyle",
+        lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None),
+    )
+
+    log_entries = []
+
+    class DummyLog:
+        def log(self, **kwargs):
+            log_entries.append(kwargs)
+            return types.SimpleNamespace(id=f"log-{len(log_entries)}")
+
+    test_agent = object.__new__(Agent)
+    test_agent.loop_data = LoopData()
+    test_agent.context = types.SimpleNamespace(log=DummyLog())
+    test_agent.agent_name = "A0"
+    test_agent.history = history.History(test_agent)
+
+    def read_prompt(file: str, **kwargs):
+        if file == "fw.msg_repeat_completed.md":
+            return f"completed: {kwargs['last_tool_result']}"
+        if file == "fw.msg_tool_completed.md":
+            return f"done: {kwargs['message']}"
+        return file
+
+    test_agent.read_prompt = read_prompt
+    test_agent.get_data = lambda name: {}
+    test_agent.set_data = lambda name, value: None
+
+    original_args = {
+        "action": "write",
+        "path": "/a0/usr/workdir/TODO.md",
+        "content": "# My Tasks\n",
+        "open_in_canvas": True,
+    }
+    response = types.SimpleNamespace(
+        message="/a0/usr/workdir/TODO.md written 1 lines",
+        additional={
+            "_tool_name": "text_editor",
+            "action": "write",
+            "path": "/a0/usr/workdir/TODO.md",
+            "open_in_canvas": True,
+        },
+    )
+
+    test_agent.history.add_message(
+        False,
+        {
+            "tool_name": "text_editor",
+            "tool_result": "/a0/usr/workdir/TODO.md written 1 lines",
+        },
+    )
+
+    Agent._record_successful_tool_request(
+        test_agent, "text_editor", original_args, response
+    )
+
+    repeated_args_with_different_content = {
+        "action": "write",
+        "path": "/a0/usr/workdir/TODO.md",
+        "content": "# My Tasks\n\n- [ ] Different generated text\n",
+        "open_in_canvas": True,
+    }
+
+    assert Agent._is_duplicate_completed_tool_request(
+        test_agent, "text_editor", repeated_args_with_different_content
+    )
+    assert Agent._handle_duplicate_completed_tool_request(
+        test_agent, "text_editor", repeated_args_with_different_content
+    ) == "completed: Last completed tool result:\ntext_editor: /a0/usr/workdir/TODO.md written 1 lines"
+    assert log_entries[-1]["heading"] == "A0: Duplicate tool action stopped"
+
+
+def test_canvas_text_editor_completion_finalizes_turn(monkeypatch):
+    monkeypatch.setattr(
+        agent_module,
+        "PrintStyle",
+        lambda *args, **kwargs: types.SimpleNamespace(print=lambda message: None),
+    )
+
+    test_agent = object.__new__(Agent)
+    test_agent.loop_data = LoopData()
+
+    def read_prompt(file: str, **kwargs):
+        if file == "fw.msg_tool_completed.md":
+            return f"done: {kwargs['message']}"
+        return file
+
+    test_agent.read_prompt = read_prompt
+
+    final = Agent._tool_completion_final_message(
+        test_agent,
+        "text_editor",
+        {
+            "action": "write",
+            "path": "/a0/usr/workdir/TODO.md",
+            "content": "# My Tasks\n",
+            "open_in_canvas": True,
+        },
+        types.SimpleNamespace(
+            additional={
+                "action": "write",
+                "path": "/a0/usr/workdir/TODO.md",
+                "open_in_canvas": True,
+            }
+        ),
+    )
+
+    assert final == (
+        "done: The document was saved and opened in the canvas: "
+        "/a0/usr/workdir/TODO.md"
+    )
diff --git a/tests/test_stream_tool_early_stop.py b/tests/test_stream_tool_early_stop.py
index a1d0ac63ea..0b74ac756f 100644
--- a/tests/test_stream_tool_early_stop.py
+++ b/tests/test_stream_tool_early_stop.py
@@ -306,6 +306,73 @@ async def response_callback(chunk: str, full: str):
     assert calls == ["chat"]
 
 
+@pytest.mark.asyncio
+async def test_chat_completions_with_a0_tools_uses_non_stream_acompletion(monkeypatch):
+    calls: list[dict] = []
+
+    async def fake_acompletion(*args, **kwargs):
+        calls.append(kwargs)
+        return {
+            "choices": [
+                {
+                    "message": {
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "type": "function",
+                                "function": {
+                                    "name": "response",
+                                    "arguments": '{"text":"ok"}',
+                                },
+                            }
+                        ],
+                    }
+                }
+            ]
+        }
+
+    async def fake_aresponses(*args, **kwargs):
+        raise AssertionError("Responses path should not be used")
+
+    async def fake_rate_limiter(*args, **kwargs):
+        return None
+
+    monkeypatch.setattr(litellm_transport, "acompletion", fake_acompletion)
+    monkeypatch.setattr(litellm_transport, "aresponses", fake_aresponses)
+    monkeypatch.setattr(models, "apply_rate_limiter", fake_rate_limiter)
+
+    wrapper = models.LiteLLMChatWrapper(
+        model="test-model",
+        provider="ollama",
+        model_config=None,
+        a0_api_mode="chat_completions",
+    )
+
+    async def response_callback(chunk: str, full: str):
+        raise AssertionError("tool-call chat completion should not stream")
+
+    result = await wrapper.unified_turn(
+        messages=[],
+        response_callback=response_callback,
+        a0_responses_function_tools=[
+            {
+                "type": "function",
+                "name": "response",
+                "description": "Final answer",
+                "parameters": {"type": "object"},
+            }
+        ],
+    )
+
+    assert extract_tools.extract_tool_request(result.response) == {
+        "tool_name": "response",
+        "tool_args": {"text": "ok"},
+    }
+    assert calls[0]["stream"] is False
+    assert calls[0]["tools"][0]["function"]["name"] == "response"
+
+
 @pytest.mark.asyncio
 async def test_unified_call_retries_responses_with_high_reasoning(monkeypatch):
     validation_error = ValueError(
@@ -743,6 +810,67 @@ def test_chat_kwargs_add_openai_prompt_cache_key_for_chat_completions():
     assert kwargs["max_tokens"] == 10
 
 
+def test_chat_kwargs_convert_a0_response_tools_to_chat_tools():
+    kwargs = litellm_transport.ChatCompletionsTransport.prepare_kwargs(
+        {
+            "a0_responses_function_tools": [
+                {
+                    "type": "function",
+                    "name": "text_editor",
+                    "description": "Edit text files",
+                    "parameters": {"type": "object"},
+                }
+            ],
+            "tool_choice": "auto",
+            "parallel_tool_calls": True,
+        },
+        model="ollama/qwen3.5:9b",
+    )
+
+    assert kwargs["tools"] == [
+        {
+            "type": "function",
+            "function": {
+                "name": "text_editor",
+                "description": "Edit text files",
+                "parameters": {"type": "object"},
+            },
+        }
+    ]
+    assert kwargs["tool_choice"] == "auto"
+    assert kwargs["parallel_tool_calls"] is True
+    assert "a0_responses_function_tools" not in kwargs
+
+
+def test_chat_parse_turns_tool_calls_into_a0_tool_json():
+    parsed = litellm_transport.ChatCompletionsTransport.parse(
+        {
+            "choices": [
+                {
+                    "message": {
+                        "content": "",
+                        "tool_calls": [
+                            {
+                                "id": "call_1",
+                                "type": "function",
+                                "function": {
+                                    "name": "text_editor",
+                                    "arguments": '{"action":"write","path":"todos.md"}',
+                                },
+                            }
+                        ],
+                    }
+                }
+            ]
+        }
+    )
+
+    assert extract_tools.extract_tool_request(parsed["response_delta"]) == {
+        "tool_name": "text_editor",
+        "tool_args": {"action": "write", "path": "todos.md"},
+    }
+
+
 def test_chat_messages_strip_cache_control_for_openai_prompt_cache():
     messages = [
         {
diff --git a/tests/test_text_editor_context_patch.py b/tests/test_text_editor_context_patch.py
index 0cbb260ce7..7ddb5e41ed 100644
--- a/tests/test_text_editor_context_patch.py
+++ b/tests/test_text_editor_context_patch.py
@@ -17,6 +17,7 @@
 from plugins._text_editor.helpers.file_ops import (
     apply_context_patch_file,
     apply_exact_replace_file,
+    write_file,
 )
 from plugins._text_editor.helpers.patch_request import (
     exact_replace_to_patch_text,
@@ -32,6 +33,15 @@
 )
 
 
+def test_text_editor_write_file_sanitizes_surrogate_content(tmp_path: Path) -> None:
+    target = tmp_path / "todo.md"
+
+    result = write_file(str(target), "# \ud83d\udcdd My Tasks\n")
+
+    assert result["error"] == ""
+    assert target.read_text(encoding="utf-8") == "# ?? My Tasks\n"
+
+
 def test_context_patch_chains_after_line_shift(tmp_path: Path) -> None:
     target = tmp_path / "sample.txt"
     target.write_text("alpha\nbeta\ngamma\n", encoding="utf-8")
diff --git a/tests/test_tool_request_normalization.py b/tests/test_tool_request_normalization.py
index 81472c561b..6cda3f33d4 100644
--- a/tests/test_tool_request_normalization.py
+++ b/tests/test_tool_request_normalization.py
@@ -9,7 +9,7 @@
 if str(PROJECT_ROOT) not in sys.path:
     sys.path.insert(0, str(PROJECT_ROOT))
 
-from helpers.extract_tools import normalize_tool_request
+from helpers.extract_tools import extract_tool_request, normalize_tool_request
 
 
 def test_normalize_tool_request_accepts_canonical_keys() -> None:
@@ -65,3 +65,95 @@ def test_normalize_tool_request_preserves_explicit_action_over_method() -> None:
 def test_normalize_tool_request_rejects_missing_args() -> None:
     with pytest.raises(ValueError, match="tool_args"):
         normalize_tool_request({"tool_name": "response"})
+
+
+def test_extract_tool_request_repairs_root_level_tool_args() -> None:
+    assert extract_tool_request(
+        """
+        {
+          "thoughts": ["Need to write the todo file."],
+          "headline": "Creating TODO list",
+          "tool_name": "text_editor",
+          "action": "write",
+          "path": "/a0/usr/workdir/todos.md",
+          "content": "# My Tasks"
+        }
+        """
+    ) == {
+        "tool_name": "text_editor",
+        "tool_args": {
+            "action": "write",
+            "path": "/a0/usr/workdir/todos.md",
+            "content": "# My Tasks",
+        },
+    }
+
+
+def test_extract_tool_request_accepts_argument_aliases() -> None:
+    assert extract_tool_request(
+        '{"function_name":"response","arguments":{"text":"done"}}'
+    ) == {"tool_name": "response", "tool_args": {"text": "done"}}
+
+
+def test_extract_tool_request_repairs_response_string_args() -> None:
+    assert extract_tool_request('{"tool_name":"response","tool_args":"hello"}') == {
+        "tool_name": "response",
+        "tool_args": {"text": "hello"},
+    }
+
+
+def test_extract_tool_request_sanitizes_surrogate_tool_args() -> None:
+    request = extract_tool_request(
+        r'{"tool_name":"text_editor","tool_args":{"content":"# \ud83d\udcdd My Tasks"}}'
+    )
+
+    assert request == {
+        "tool_name": "text_editor",
+        "tool_args": {"content": "# ?? My Tasks"},
+    }
+    request["tool_args"]["content"].encode("utf-8")
+
+
+def test_extract_tool_request_recovers_thoughts_only_greeting_response() -> None:
+    assert extract_tool_request(
+        """
+        Reasoning:
+        {
+          "thoughts": [
+            "The user has sent a simple 'hi' greeting.",
+            "I need to respond with the appropriate JSON format using the response tool.",
+            "A friendly acknowledgment and offer of assistance would be good."
+          ],
+          "headline": "Acknowledging user's greeting and offering help"
+        }
+        """
+    ) == {
+        "tool_name": "response",
+        "tool_args": {"text": "Hi. How can I help?"},
+    }
+
+
+def test_extract_tool_request_does_not_convert_action_planning_to_response() -> None:
+    assert (
+        extract_tool_request(
+            """
+            {
+              "thoughts": [
+                "The user wants a TODO list in the canvas.",
+                "I should use text_editor with action write."
+              ],
+              "headline": "Creating TODO list locally"
+            }
+            """
+        )
+        is None
+    )
+
+
+def test_extract_tool_request_prefers_later_valid_tool_object() -> None:
+    assert extract_tool_request(
+        """
+        Reasoning: {"thoughts":["I need to use text_editor."],"headline":"Plan"}
+        {"tool_name":"response","tool_args":{"text":"done"}}
+        """
+    ) == {"tool_name": "response", "tool_args": {"text": "done"}}

From 1dd5787bdd8485cbf80724dd23d47c10ede93c03 Mon Sep 17 00:00:00 2001
From: TerminallyLazy <inf3ctious007@gmail.com>
Date: Sun, 21 Jun 2026 07:40:04 -0400
Subject: [PATCH 2/2] fix: recover incomplete response tool output

---
 helpers/extract_tools.py                      | 87 +++++++++++++++++++
 helpers/extract_tools.py.dox.md               |  1 +
 plugins/_memory/AGENTS.md                     |  1 +
 .../_memory/helpers/memory_consolidation.py   | 40 ++++++++-
 tests/test_memory_consolidation.py            | 49 +++++++++++
 tests/test_tool_request_normalization.py      | 19 ++++
 6 files changed, 195 insertions(+), 2 deletions(-)
 create mode 100644 tests/test_memory_consolidation.py

diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py
index 28db4e460f..13b032b130 100644
--- a/helpers/extract_tools.py
+++ b/helpers/extract_tools.py
@@ -1,4 +1,6 @@
 
+import json
+
 from .dirty_json import DirtyJson
 import regex, re
 from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility
@@ -88,9 +90,28 @@ def extract_tool_request(content: str) -> dict[str, Any] | None:
             except ValueError:
                 continue
             return {"tool_name": tool_name, "tool_args": tool_args}
+    partial_response = extract_partial_response_tool_request(content)
+    if partial_response:
+        return partial_response
     return None
 
 
+def extract_partial_response_tool_request(content: str) -> dict[str, Any] | None:
+    if not content or not isinstance(content, str):
+        return None
+    if not _contains_response_tool_name(content):
+        return None
+
+    text_value = _partial_json_string_value(content, RESPONSE_TEXT_KEYS)
+    if not text_value:
+        return None
+
+    return {
+        "tool_name": "response",
+        "tool_args": sanitize_tool_args({"text": text_value}),
+    }
+
+
 def iter_json_dicts(content: str) -> list[dict[str, Any]]:
     if not content or not isinstance(content, str):
         return []
@@ -265,6 +286,72 @@ def _intent_text(value: Any) -> str:
     return str(value)
 
 
+def _contains_response_tool_name(content: str) -> bool:
+    for key in TOOL_NAME_KEYS:
+        pattern = rf'"{re.escape(key)}"\s*:\s*"response"'
+        if re.search(pattern, content):
+            return True
+    return False
+
+
+def _partial_json_string_value(content: str, keys: tuple[str, ...]) -> str:
+    for key in keys:
+        pattern = rf'"{re.escape(key)}"\s*:\s*"'
+        match = re.search(pattern, content)
+        if not match:
+            continue
+        return _decode_json_string_prefix(content[match.end():]).strip()
+    return ""
+
+
+def _decode_json_string_prefix(content: str) -> str:
+    chars: list[str] = []
+    index = 0
+    while index < len(content):
+        char = content[index]
+        if char == '"':
+            break
+        if char != "\\":
+            chars.append(char)
+            index += 1
+            continue
+
+        index += 1
+        if index >= len(content):
+            chars.append("\\")
+            break
+
+        escaped = content[index]
+        if escaped in {'"', "\\", "/"}:
+            chars.append(escaped)
+        elif escaped == "b":
+            chars.append("\b")
+        elif escaped == "f":
+            chars.append("\f")
+        elif escaped == "n":
+            chars.append("\n")
+        elif escaped == "r":
+            chars.append("\r")
+        elif escaped == "t":
+            chars.append("\t")
+        elif escaped == "u":
+            hex_value = content[index + 1 : index + 5]
+            if len(hex_value) == 4 and all(c in "0123456789abcdefABCDEF" for c in hex_value):
+                chars.append(chr(int(hex_value, 16)))
+                index += 4
+            else:
+                chars.append("\\u")
+        else:
+            chars.append(escaped)
+        index += 1
+
+    decoded = "".join(chars)
+    try:
+        return json.loads(json.dumps(decoded))
+    except Exception:
+        return decoded
+
+
 def extract_json_root_string(content: str) -> str | None:
     if not content or not isinstance(content, str):
         return None
diff --git a/helpers/extract_tools.py.dox.md b/helpers/extract_tools.py.dox.md
index b621335c3d..9d8639dd3b 100644
--- a/helpers/extract_tools.py.dox.md
+++ b/helpers/extract_tools.py.dox.md
@@ -28,6 +28,7 @@
 - Helper modules own reusable framework APIs and must preserve public callers unless all callers, tests, and docs are updated together.
 - `extract_tool_request` is the executable-call entrypoint for model text; it may repair common local-model shapes into canonical `{"tool_name": ..., "tool_args": ...}` requests.
 - Repair is intentionally narrow: preserve explicit tool names, move root-level args into `tool_args`, accept common argument aliases, and synthesize final `response` calls only for clear non-action response intent.
+- Incomplete `response` tool JSON may be recovered only when an explicit response tool name and a `text`-like string value are present; non-response tool calls still require complete parseable JSON before execution.
 - Normalized tool arguments are sanitized recursively so invalid Unicode surrogates from model output do not crash UTF-8 file writes, logs, or tool execution.
 - Update this file whenever public functions, classes, persistence behavior, path/security assumptions, side effects, or cross-module contracts change.
 - Observed side-effect areas: settings/state persistence.
diff --git a/plugins/_memory/AGENTS.md b/plugins/_memory/AGENTS.md
index 622026f574..c3e1d5fc92 100644
--- a/plugins/_memory/AGENTS.md
+++ b/plugins/_memory/AGENTS.md
@@ -17,6 +17,7 @@
 - Keep memory scoped by configured subdirectory/context.
 - Preserve embedding metadata needed to rebuild indexes safely.
 - Avoid storing transient action-history noise as durable memory.
+- If LLM-based consolidation times out, insert the new memory directly instead of dropping it.
 
 ## Work Guidance
 
diff --git a/plugins/_memory/helpers/memory_consolidation.py b/plugins/_memory/helpers/memory_consolidation.py
index 227fb11e82..e074133f65 100644
--- a/plugins/_memory/helpers/memory_consolidation.py
+++ b/plugins/_memory/helpers/memory_consolidation.py
@@ -102,13 +102,49 @@ async def process_new_memory(
             return result
 
         except asyncio.TimeoutError:
-            PrintStyle().error(f"Memory consolidation timeout for area {area}")
-            return {"success": False, "memory_ids": []}
+            PrintStyle().warning(
+                f"Memory consolidation timeout for area {area}; inserting memory without consolidation"
+            )
+            return await self._insert_memory_directly(
+                new_memory,
+                metadata,
+                log_item,
+                result="Memory inserted after consolidation timeout",
+                consolidation_action="timeout_direct_insert",
+            )
 
         except Exception as e:
             PrintStyle().error(f"Memory consolidation error for area {area}: {str(e)}")
             return {"success": False, "memory_ids": []}
 
+    async def _insert_memory_directly(
+        self,
+        new_memory: str,
+        metadata: Dict[str, Any],
+        log_item: Optional[LogItem] = None,
+        *,
+        result: str = "Memory inserted successfully",
+        consolidation_action: str = "direct_insert",
+    ) -> dict:
+        try:
+            db = await Memory.get(self.agent)
+            insert_metadata = dict(metadata)
+            if 'timestamp' not in insert_metadata:
+                insert_metadata['timestamp'] = self._get_timestamp()
+            memory_id = await db.insert_text(new_memory, insert_metadata)
+            if log_item:
+                log_item.update(
+                    result=result,
+                    memory_ids=[memory_id],
+                    consolidation_action=consolidation_action
+                )
+            return {"success": True, "memory_ids": [memory_id]}
+        except Exception as e:
+            PrintStyle().error(f"Direct memory insertion failed: {str(e)}")
+            if log_item:
+                log_item.update(result=f"Memory insertion failed: {str(e)}")
+            return {"success": False, "memory_ids": []}
+
     async def _process_memory_with_consolidation(
         self,
         new_memory: str,
diff --git a/tests/test_memory_consolidation.py b/tests/test_memory_consolidation.py
new file mode 100644
index 0000000000..25eb45cfea
--- /dev/null
+++ b/tests/test_memory_consolidation.py
@@ -0,0 +1,49 @@
+import asyncio
+import types
+
+import pytest
+
+from plugins._memory.helpers import memory_consolidation
+
+
+@pytest.mark.asyncio
+async def test_memory_consolidation_timeout_inserts_directly(monkeypatch):
+    inserted = []
+    updates = []
+
+    class FakeMemory:
+        async def insert_text(self, text, metadata):
+            inserted.append((text, metadata))
+            return "memory-1"
+
+    async def fake_memory_get(agent):
+        return FakeMemory()
+
+    async def never_finishes(self, new_memory, area, metadata, log_item=None):
+        await asyncio.sleep(10)
+
+    monkeypatch.setattr(memory_consolidation.Memory, "get", fake_memory_get)
+    monkeypatch.setattr(
+        memory_consolidation.MemoryConsolidator,
+        "_process_memory_with_consolidation",
+        never_finishes,
+    )
+
+    consolidator = memory_consolidation.MemoryConsolidator(
+        agent=object(),
+        config=memory_consolidation.ConsolidationConfig(
+            processing_timeout_seconds=0.01
+        ),
+    )
+    result = await consolidator.process_new_memory(
+        "important finding",
+        "solutions",
+        {"source": "test"},
+        types.SimpleNamespace(update=lambda **kwargs: updates.append(kwargs)),
+    )
+
+    assert result == {"success": True, "memory_ids": ["memory-1"]}
+    assert inserted[0][0] == "important finding"
+    assert inserted[0][1]["source"] == "test"
+    assert inserted[0][1]["timestamp"]
+    assert updates[-1]["consolidation_action"] == "timeout_direct_insert"
diff --git a/tests/test_tool_request_normalization.py b/tests/test_tool_request_normalization.py
index 531fb1f685..dbe4f7f951 100644
--- a/tests/test_tool_request_normalization.py
+++ b/tests/test_tool_request_normalization.py
@@ -103,6 +103,25 @@ def test_extract_tool_request_repairs_response_string_args() -> None:
     }
 
 
+def test_extract_tool_request_recovers_incomplete_response_text() -> None:
+    assert extract_tool_request(
+        '{"tool_name":"response","tool_args":{"text":"### Report\\n\\nFinding: \\"quoted'
+    ) == {
+        "tool_name": "response",
+        "tool_args": {"text": '### Report\n\nFinding: "quoted'},
+    }
+
+
+def test_extract_tool_request_does_not_recover_incomplete_action_tool() -> None:
+    assert (
+        extract_tool_request(
+            '{"tool_name":"text_editor","tool_args":{"content":"# My Tasks'
+        )
+        is None
+    )
+    assert extract_tool_request('{"') is None
+
+
 def test_extract_tool_request_sanitizes_surrogate_tool_args() -> None:
     request = extract_tool_request(
         r'{"tool_name":"text_editor","tool_args":{"content":"# \ud83d\udcdd My Tasks"}}'