Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 30 additions & 1 deletion helpers/extract_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,40 @@
from helpers.modules import load_classes_from_file, load_classes_from_folder # keep here for backwards compatibility
from typing import Any


def _sanitize_for_json_parsing(content: str) -> str:
"""
Remove common LLM output artifacts that break JSON parsing:
markdown fences, XML-like wrappers, text outside JSON, and leading/trailing whitespace.
"""
if not isinstance(content, str):
return ""

# 1. Strip markdown code blocks (```json ... ``` and ``` ... ```)
cleaned = re.sub(r'^```[a-zA-Z]*\s*\n?', '', content, flags=re.MULTILINE)
cleaned = re.sub(r'\n?```\s*$', '', cleaned, flags=re.MULTILINE)

# 2. Strip XML-style wrappers (<invoke>...</invoke>)
cleaned = re.sub(r'<\s*invoke\b[^>]*>.*?</\s*invoke\s*>', '', cleaned, flags=re.DOTALL)
cleaned = re.sub(r'<\s*/\s*invoke\s*>', '', cleaned, flags=re.DOTALL)

# 3. Remove function-style wrappers (functions.tool_name:123)
cleaned = re.sub(r'functions\.\w+\s*:\s*\w+\s*', '', cleaned)

# 4. Remove leading text before the first '{' if any
first_brace = cleaned.find('{')
if first_brace > 0:
cleaned = cleaned[first_brace:]

return cleaned


def json_parse_dirty(json: str) -> dict[str, Any] | None:
if not json or not isinstance(json, str):
return None

ext_json = extract_json_object_string(json.strip())
sanitized = _sanitize_for_json_parsing(json)
ext_json = extract_json_object_string(sanitized.strip())
if ext_json:
try:
data = DirtyJson.parse_string(ext_json)
Expand Down
23 changes: 23 additions & 0 deletions tests/test_extract_tools_json_parse.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
from __future__ import annotations

import sys
from pathlib import Path

PROJECT_ROOT = Path(__file__).resolve().parents[1]
if str(PROJECT_ROOT) not in sys.path:
sys.path.insert(0, str(PROJECT_ROOT))

from helpers.extract_tools import json_parse_dirty


def test_json_parse_dirty_sanitizes_common_llm_wrappers() -> None:
payload = """```json
<invoke>discard this wrapper artifact</invoke>
functions.search:123
{"tool_name":"search","tool_args":{"query":"agent zero"}}
```"""

assert json_parse_dirty(payload) == {
"tool_name": "search",
"tool_args": {"query": "agent zero"},
}