diff --git a/backend/python/sglang/backend.py b/backend/python/sglang/backend.py index 8b48d23233dc..0d38c6b7d2c9 100644 --- a/backend/python/sglang/backend.py +++ b/backend/python/sglang/backend.py @@ -147,9 +147,25 @@ def _messages_to_dicts(self, messages) -> List[dict]: d["reasoning_content"] = msg.reasoning_content if msg.tool_calls: try: - d["tool_calls"] = json.loads(msg.tool_calls) + tool_calls = json.loads(msg.tool_calls) except json.JSONDecodeError: pass + else: + # OpenAI wire format carries function.arguments as a + # JSON-encoded string, but chat templates (e.g. Qwen3) + # iterate over it as a mapping. The vllm backend + # already parses arguments before applying the chat + # template (PR #10256); mirror that here so the + # sglang backend works with the same wire format. + if isinstance(tool_calls, list): + for tc in tool_calls: + func = tc.get("function") if isinstance(tc, dict) else None + if isinstance(func, dict) and isinstance(func.get("arguments"), str): + try: + func["arguments"] = json.loads(func["arguments"]) + except json.JSONDecodeError: + pass + d["tool_calls"] = tool_calls result.append(d) return result