From 082ed8b1ae497c58879ea8bcb84b8ab3dd31486d Mon Sep 17 00:00:00 2001
From: dmytryk-here <6403375+dmytryk-here@users.noreply.github.com>
Date: Tue, 16 Jun 2026 08:52:10 +0800
Subject: [PATCH] Sanitize dirty JSON tool responses
---
helpers/extract_tools.py | 31 +++++++++++++++++++++++++-
tests/test_extract_tools_json_parse.py | 23 +++++++++++++++++++
2 files changed, 53 insertions(+), 1 deletion(-)
create mode 100644 tests/test_extract_tools_json_parse.py
diff --git a/helpers/extract_tools.py b/helpers/extract_tools.py
index 39e8378553..3fd767c8d1 100644
--- a/helpers/extract_tools.py
+++ b/helpers/extract_tools.py
@@ -4,11 +4,40 @@
from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility
from typing import Any
+
+def _sanitize_for_json_parsing(content: str) -> str:
+ """
+ Remove common LLM output artifacts that break JSON parsing:
+ markdown fences, XML-like wrappers, text outside JSON, and leading/trailing whitespace.
+ """
+ if not isinstance(content, str):
+ return ""
+
+ # 1. Strip markdown code blocks (```json ... ``` and ``` ... ```)
+ cleaned = re.sub(r'^```[a-zA-Z]*\s*\n?', '', content, flags=re.MULTILINE)
+ cleaned = re.sub(r'\n?```\s*$', '', cleaned, flags=re.MULTILINE)
+
+ # 2. Strip XML-style wrappers (...)
+ cleaned = re.sub(r'<\s*invoke\b[^>]*>.*?\s*invoke\s*>', '', cleaned, flags=re.DOTALL)
+ cleaned = re.sub(r'<\s*/\s*invoke\s*>', '', cleaned, flags=re.DOTALL)
+
+ # 3. Remove function-style wrappers (functions.tool_name:123)
+ cleaned = re.sub(r'functions\.\w+\s*:\s*\w+\s*', '', cleaned)
+
+ # 4. Remove leading text before the first '{' if any
+ first_brace = cleaned.find('{')
+ if first_brace > 0:
+ cleaned = cleaned[first_brace:]
+
+ return cleaned
+
+
def json_parse_dirty(json: str) -> dict[str, Any] | None:
if not json or not isinstance(json, str):
return None
- ext_json = extract_json_object_string(json.strip())
+ sanitized = _sanitize_for_json_parsing(json)
+ ext_json = extract_json_object_string(sanitized.strip())
if ext_json:
try:
data = DirtyJson.parse_string(ext_json)
diff --git a/tests/test_extract_tools_json_parse.py b/tests/test_extract_tools_json_parse.py
new file mode 100644
index 0000000000..9456113706
--- /dev/null
+++ b/tests/test_extract_tools_json_parse.py
@@ -0,0 +1,23 @@
+from __future__ import annotations
+
+import sys
+from pathlib import Path
+
+PROJECT_ROOT = Path(__file__).resolve().parents[1]
+if str(PROJECT_ROOT) not in sys.path:
+ sys.path.insert(0, str(PROJECT_ROOT))
+
+from helpers.extract_tools import json_parse_dirty
+
+
+def test_json_parse_dirty_sanitizes_common_llm_wrappers() -> None:
+ payload = """```json
+discard this wrapper artifact
+functions.search:123
+{"tool_name":"search","tool_args":{"query":"agent zero"}}
+```"""
+
+ assert json_parse_dirty(payload) == {
+ "tool_name": "search",
+ "tool_args": {"query": "agent zero"},
+ }