From 158431d1a0ffd6f5dc1b5a6a0c95890857e98ac0 Mon Sep 17 00:00:00 2001 From: Poseidon Date: Sat, 27 Jun 2026 19:28:24 +0200 Subject: [PATCH] fix(sglang): parse tool_call function arguments before applying the chat template OpenAI wire format carries `function.arguments` as a JSON-encoded string, but chat templates (e.g. Qwen3-Coder) iterate over it as a mapping. The vllm backend already parses arguments before applying the chat template (PR #10256); this mirrors that fix in the sglang backend. Without this fix the second turn of any tool-using session (assistant returns tool_calls, user posts `role:"tool"` result, model is invoked with arguments still as a string) crashes inside transformers' Jinja chat-template rendering with: TypeError: Can only get item pairs from a mapping. File ".../transformers/utils/chat_template_utils.py", in render_jinja_template File ".../jinja2/filters.py", in do_items raise TypeError("Can only get item pairs from a mapping.") Reproduced on `lmsysorg/sglang:v0.5.14` via LocalAI v4.5.4 with `saricles/Qwen3-Coder-Next-NVFP4-GB10` (W4A4 NVFP4 / compressed-tensors) on NVIDIA DGX Spark (GB10, sm_121). After the patch, a tool-call roundtrip (assistant tool_calls -> tool result -> assistant final answer) returns http=200 with the expected follow-up content; no behaviour change on requests that don't carry tool_calls. --- backend/python/sglang/backend.py | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) diff --git a/backend/python/sglang/backend.py b/backend/python/sglang/backend.py index 8b48d23233dc..0d38c6b7d2c9 100644 --- a/backend/python/sglang/backend.py +++ b/backend/python/sglang/backend.py @@ -147,9 +147,25 @@ def _messages_to_dicts(self, messages) -> List[dict]: d["reasoning_content"] = msg.reasoning_content if msg.tool_calls: try: - d["tool_calls"] = json.loads(msg.tool_calls) + tool_calls = json.loads(msg.tool_calls) except json.JSONDecodeError: pass + else: + # OpenAI wire format carries function.arguments as a + # JSON-encoded string, but chat templates (e.g. Qwen3) + # iterate over it as a mapping. The vllm backend + # already parses arguments before applying the chat + # template (PR #10256); mirror that here so the + # sglang backend works with the same wire format. + if isinstance(tool_calls, list): + for tc in tool_calls: + func = tc.get("function") if isinstance(tc, dict) else None + if isinstance(func, dict) and isinstance(func.get("arguments"), str): + try: + func["arguments"] = json.loads(func["arguments"]) + except json.JSONDecodeError: + pass + d["tool_calls"] = tool_calls result.append(d) return result