From 516d13246e2dba534f3e5a7b5e4b9c1f2cc3b975 Mon Sep 17 00:00:00 2001
From: raychen <815315825@qq.com>
Date: Tue, 12 May 2026 10:34:50 +0800
Subject: [PATCH] =?UTF-8?q?feature:=20=E5=A2=9E=E5=8A=A0=20OpenAI=20?=
 =?UTF-8?q?=E5=85=BC=E5=AE=B9=E6=A8=A1=E5=9E=8B=E9=80=82=E9=85=8D=E5=99=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 新增 openai_adapter 目录，按模型拆分 DeepSeek 与 Hunyuan
  的特殊适配逻辑
- 保持 OpenAIModel 主流程兼容，避免将特定模型逻辑继续堆叠在主实现中
- 适配 DeepSeek v4 的 thinking、response_format、reasoning_content 与
  token usage 处理
- 为 hy3-preview 增加 ToolPrompt 文本工具调用解析与流式输出过滤
- 修复 ToolPrompt 流式解析多个工具调用时只保留最后一个的问题
- 优化 thinking 示例，仅在 hy3-preview 下启用
  add_tools_to_prompt，并区分展示思考、工具调用和最终回复
- 更新示例 prompt，避免模型在工具返回前编造结果或过度展开推理
---
 .../llmagent_with_thinking/agent/agent.py     |  12 +-
 .../llmagent_with_thinking/agent/prompts.py   |  12 +-
 examples/llmagent_with_thinking/run_agent.py  |  67 ++++++-
 trpc_agent_sdk/models/_openai_model.py        | 137 +++++++++++--
 .../models/openai_adapter/__init__.py         |  36 ++++
 trpc_agent_sdk/models/openai_adapter/_base.py | 183 ++++++++++++++++++
 .../models/openai_adapter/_deepseek.py        |  81 ++++++++
 .../models/openai_adapter/_hunyuan.py         |  79 ++++++++
 8 files changed, 575 insertions(+), 32 deletions(-)
 create mode 100644 trpc_agent_sdk/models/openai_adapter/__init__.py
 create mode 100644 trpc_agent_sdk/models/openai_adapter/_base.py
 create mode 100644 trpc_agent_sdk/models/openai_adapter/_deepseek.py
 create mode 100644 trpc_agent_sdk/models/openai_adapter/_hunyuan.py

diff --git a/examples/llmagent_with_thinking/agent/agent.py b/examples/llmagent_with_thinking/agent/agent.py
index 7e0691e..b91de51 100644
--- a/examples/llmagent_with_thinking/agent/agent.py
+++ b/examples/llmagent_with_thinking/agent/agent.py
@@ -11,7 +11,7 @@
 from trpc_agent_sdk.tools import FunctionTool
 from trpc_agent_sdk.types import GenerateContentConfig
 from trpc_agent_sdk.types import ThinkingConfig
-
+from trpc_agent_sdk.types import HttpOptions
 from .config import get_model_config
 from .prompts import INSTRUCTION
 from .tools import get_weather_forecast
@@ -32,8 +32,9 @@ def _create_model() -> LLMModel:
         #    if the LLM model service fails to return the JSON format of tool calls, you can also enable ToolPrompt.
         #    This will prompt the LLM model to output the special text for tool calling in the main content,
         #    thereby increasing the probability of successful tool invocation.
-        # You can uncomment the code below to use ToolPrompt.
-        # add_tools_to_prompt=True,
+        # Thinking models may emit tool calls as text. ToolPrompt lets the
+        # framework parse those text calls back into executable FunctionCalls.
+        add_tools_to_prompt=model_name.lower() == "hy3-preview",  # Enable ToolPrompt for Hy3-preview model
     )
     return model
 
@@ -45,6 +46,9 @@ def create_agent():
     weather_tool = FunctionTool(get_weather_report)
     forecast_tool = FunctionTool(get_weather_forecast)
 
+    # Set reasoning effort to high for Hy3-preview model
+    http_options=HttpOptions(extra_body={"chat_template_kwargs": {"reasoning_effort": "high"}})
+
     return LlmAgent(
         name="weather_agent",
         description=
@@ -54,7 +58,7 @@ def create_agent():
         instruction=INSTRUCTION,
         tools=[weather_tool, forecast_tool],
         # Note: thinking_budget must be less than max_output_tokens
-        generate_content_config=GenerateContentConfig(max_output_tokens=10240, ),
+        generate_content_config=GenerateContentConfig(max_output_tokens=10240, http_options=http_options),
         # The model must be a thinking model to use this Planner; this configuration will not take effect for non-thinking models.
         planner=BuiltInPlanner(thinking_config=ThinkingConfig(
             include_thoughts=True,
diff --git a/examples/llmagent_with_thinking/agent/prompts.py b/examples/llmagent_with_thinking/agent/prompts.py
index 05f25c5..8e0ce0d 100644
--- a/examples/llmagent_with_thinking/agent/prompts.py
+++ b/examples/llmagent_with_thinking/agent/prompts.py
@@ -18,13 +18,21 @@
 - Provide clear, useful weather information and suggestions
 
 **Available tools:**
-1. `get_weather`: Get current weather information
+1. `get_weather_report`: Get current weather information
 2. `get_weather_forecast`: Get multi-day weather forecast
 
 **Tool usage guide:**
-- When the user asks about the current weather, use `get_weather`
+- When the user asks about the current weather, use `get_weather_report`
 - When the user asks about the weather for the next few days, use `get_weather_forecast`
 - If the query is not clear, you can use both tools at the same time
+- Do not answer with weather data before the required tool result is available
+- Do not guess, simulate, or invent tool results
+- If a tool is needed, call the tool first and wait for the tool result before giving the final answer
+
+**Thinking guidance:**
+- Keep reasoning concise and focused on choosing the right tool and city
+- Do not repeat the tool usage rules or tool schema in your reasoning
+- Do not draft the final answer in reasoning; use reasoning only to decide the next action
 
 **Reply format:**
 - Provide accurate weather information
diff --git a/examples/llmagent_with_thinking/run_agent.py b/examples/llmagent_with_thinking/run_agent.py
index 4aece79..4878e63 100644
--- a/examples/llmagent_with_thinking/run_agent.py
+++ b/examples/llmagent_with_thinking/run_agent.py
@@ -30,7 +30,7 @@ async def run_weather_agent():
     demo_queries = [
         "What's the weather like today?",
         "What's the current weather in Guangzhou?",
-        "What will the weather be like in Shanghai for the next three days?",
+        "Please check both the current weather in Guangzhou and the three-day weather forecast for Shanghai.",
     ]
 
     for query in demo_queries:
@@ -51,7 +51,47 @@ async def run_weather_agent():
 
         user_content = Content(parts=[Part.from_text(text=query)])
 
-        print("🤖 Assistant: ", end="", flush=True)
+        printed_thinking = False
+        printed_assistant = False
+        in_thinking = False
+        thinking_line_start = False
+        assistant_text_started = False
+
+        def print_assistant_header() -> None:
+            nonlocal printed_assistant
+            if printed_assistant:
+                return
+            if printed_thinking:
+                print("\n")
+            print("🤖 Assistant: ", end="", flush=True)
+            printed_assistant = True
+
+        def print_thinking_header() -> None:
+            nonlocal in_thinking, printed_thinking, thinking_line_start
+            if in_thinking:
+                return
+            print("\n  💭 Thinking: ", end="", flush=True)
+            in_thinking = True
+            printed_thinking = True
+            thinking_line_start = False
+
+        def print_thinking_text(text: str) -> None:
+            nonlocal thinking_line_start
+            for line in text.splitlines(keepends=True):
+                if thinking_line_start:
+                    print("  ", end="", flush=True)
+                print(line, end="", flush=True)
+                thinking_line_start = line.endswith("\n")
+
+        def close_thinking_section() -> None:
+            nonlocal in_thinking, thinking_line_start
+            if in_thinking:
+                if not thinking_line_start:
+                    print()
+                print("  💭 End Thinking")
+                in_thinking = False
+                thinking_line_start = False
+
         async for event in runner.run_async(user_id=user_id, session_id=current_session_id, new_message=user_content):
             if not event.content or not event.content.parts:
                 continue
@@ -59,15 +99,30 @@ async def run_weather_agent():
             if event.partial:
                 for part in event.content.parts:
                     if part.text:
-                        print(part.text, end="", flush=True)
+                        if part.thought:
+                            if assistant_text_started:
+                                continue
+                            print_thinking_header()
+                            print_thinking_text(part.text)
+                        else:
+                            close_thinking_section()
+                            print_assistant_header()
+                            assistant_text_started = True
+                            print(part.text, end="", flush=True)
                 continue
 
             for part in event.content.parts:
-                if part.thought:
-                    continue
-                if part.function_call:
+                if part.thought and part.text and not printed_thinking and not assistant_text_started:
+                    print_thinking_header()
+                    print_thinking_text(part.text)
+                elif part.function_call:
+                    close_thinking_section()
+                    print_assistant_header()
                     print(f"\n🔧 [Invoke Tool:: {part.function_call.name}({part.function_call.args})]")
                 elif part.function_response:
+                    close_thinking_section()
+                    printed_thinking = False
+                    print_assistant_header()
                     print(f"📊 [Tool Result: {part.function_response.response}]")
                 # elif part.text:
                 #     print(f"\n✅ {part.text}")
diff --git a/trpc_agent_sdk/models/_openai_model.py b/trpc_agent_sdk/models/_openai_model.py
index e5ad05d..5fbc059 100644
--- a/trpc_agent_sdk/models/_openai_model.py
+++ b/trpc_agent_sdk/models/_openai_model.py
@@ -40,9 +40,10 @@
 from ._llm_request import LlmRequest
 from ._llm_response import LlmResponse
 from ._registry import register_model
+from .openai_adapter import get_openai_adapter
 from .tool_prompt import ToolPromptFactory
 from .tool_prompt import get_factory
-from .tool_prompt._base import ToolPrompt
+from .tool_prompt import ToolPrompt
 
 
 class ToolCall(BaseModel):
@@ -102,7 +103,7 @@ class ApiParamsKey(str, Enum):
     PARALLEL_TOOL_CALLS = "parallel_tool_calls"
 
 
-@register_model(model_name="OpenAIModel", supported_models=[r"gpt-.*", r"o1-.*", r"deepseek-.*"])
+@register_model(model_name="OpenAIModel", supported_models=[r"gpt-.*", r"o1-.*", r"deepseek-.*", r"hy3-.*"])
 class OpenAIModel(LLMModel):
     """OpenAI model implementation using the abstract model interface.
 
@@ -162,6 +163,7 @@ def __init__(
         **kwargs,
     ):
         super().__init__(model_name, filters_name, **kwargs)
+        self._adapter = get_openai_adapter(self._model_name, self._base_url)
 
         # Extract OpenAI-specific config
         self.organization: str = kwargs.get(const.ORGANIZATION, "")
@@ -188,6 +190,20 @@ def __init__(
         elif not (isinstance(self.tool_prompt, type) and issubclass(self.tool_prompt, ToolPrompt)):
             raise ValueError(f"tool_prompt must be a string or ToolPrompt class, got {type(self.tool_prompt)}")
 
+    def _refresh_adapter(self) -> None:
+        """Refresh provider adapter after model or endpoint changes."""
+        self._adapter = get_openai_adapter(self._model_name, self._base_url)
+
+    @override
+    def set_base_url(self, value: str) -> None:
+        super().set_base_url(value)
+        self._refresh_adapter()
+
+    @override
+    def set_model_name(self, value: str) -> None:
+        super().set_model_name(value)
+        self._refresh_adapter()
+
     def _create_async_client(self):
         """Create a new async client instance."""
 
@@ -286,7 +302,10 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]:
             # Handle different content structures
             if all(conditions_iter):
                 # Simple text message
-                formatted_messages.append({const.ROLE: role, const.CONTENT: parts[0].text})
+                message = {const.ROLE: role, const.CONTENT: parts[0].text}
+                if self._adapter.should_backfill_reasoning_content(role, message):
+                    message[const.REASONING_CONTENT] = ""
+                formatted_messages.append(message)
             else:
                 # Complex message with multiple parts or function calls/responses
                 # Separate function responses from other content
@@ -297,6 +316,8 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]:
 
                 for part in parts:  # type: ignore
                     if part.text:
+                        if part.thought:
+                            continue
                         text_parts.append(part.text)
                     elif part.inline_data and part.inline_data.mime_type:
                         # Handle image data - convert to OpenAI vision format
@@ -315,8 +336,9 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]:
                                     "arguments": (part.function_call.args if isinstance(part.function_call.args, str)
                                                   else json.dumps(part.function_call.args, ensure_ascii=False)),
                                 },
-                                "thought_signature": self._get_part_thought_signature(part),
                             }
+                            if self._adapter.should_include_thought_signature():
+                                tool_call["thought_signature"] = self._get_part_thought_signature(part)
                             tool_calls.append(tool_call)
                         # If add_tools_to_prompt is enabled, skip tool calls (they're handled via text prompts)
                     elif part.function_response:
@@ -399,6 +421,9 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]:
                     if tool_calls and not self.add_tools_to_prompt:
                         message[const.TOOL_CALLS] = tool_calls
 
+                    if self._adapter.should_backfill_reasoning_content(role, message):
+                        message[const.REASONING_CONTENT] = ""
+
                     formatted_messages.append(message)
 
         # Validate and fix message sequence for OpenAI compatibility
@@ -571,6 +596,9 @@ def _set_thinking(self, request: LlmRequest, http_options: dict):
 
         thinking_config = request.config.thinking_config
 
+        if self._adapter.apply_thinking(request, http_options):
+            return
+
         # Only set thinking parameters if include_thoughts is True
         if not thinking_config.include_thoughts:
             return
@@ -682,9 +710,11 @@ def _process_usage(self, chunk_dict: dict) -> Optional[GenerateContentResponseUs
         usage_data = chunk_dict.get(const.USAGE)
         if usage_data is None:
             return None
+        completion_details = usage_data.get("completion_tokens_details") or {}
         return GenerateContentResponseUsageMetadata(
             prompt_token_count=usage_data.get("prompt_tokens", 0),
             candidates_token_count=usage_data.get("completion_tokens", 0),
+            thoughts_token_count=completion_details.get("reasoning_tokens"),
             total_token_count=usage_data.get("total_tokens", 0),
         )
 
@@ -942,9 +972,11 @@ def _process_usage_from_response(self, response_dict: dict) -> Optional[Generate
             return None
 
         usage_data: dict[str, int] = response_dict[const.USAGE]
+        completion_details = usage_data.get("completion_tokens_details") or {}
         return GenerateContentResponseUsageMetadata(
             prompt_token_count=usage_data.get("prompt_tokens", 0),
             candidates_token_count=usage_data.get("completion_tokens", 0),
+            thoughts_token_count=completion_details.get("reasoning_tokens"),
             total_token_count=usage_data.get("total_tokens", 0),
         )
 
@@ -974,6 +1006,7 @@ def _create_response_with_content(self, response_dict: dict) -> LlmResponse:
 
         # Extract content
         text_content = message.get(const.CONTENT, "")
+        reasoning_content = message.get(const.REASONING_CONTENT)
 
         # Check for tool calls
         tool_calls = self._process_tool_calls_from_message(message)
@@ -982,7 +1015,7 @@ def _create_response_with_content(self, response_dict: dict) -> LlmResponse:
         if self.add_tools_to_prompt and text_content and not tool_calls:
             try:
                 tool_prompt = self._create_tool_prompt()
-                parsed_function_calls = tool_prompt.parse_function(text_content)
+                parsed_function_calls = self._adapter.parse_tool_prompt_function_calls(text_content, tool_prompt)
                 if parsed_function_calls:
                     # Convert FunctionCall objects to ToolCall objects
                     tool_calls = []
@@ -997,8 +1030,13 @@ def _create_response_with_content(self, response_dict: dict) -> LlmResponse:
 
         parts = []
 
+        if reasoning_content:
+            content_part = Part.from_text(text=reasoning_content)
+            content_part.thought = True
+            parts.append(content_part)
+
         # Add text content if present
-        if text_content:
+        if text_content and not (tool_calls and self._adapter.should_suppress_tool_prompt_text()):
             content_part = Part.from_text(text=text_content)
             content_part.thought = False  # Regular text content is not thought
             parts.append(content_part)
@@ -1218,6 +1256,10 @@ def _build_response_format(self, config: GenerateContentConfig) -> Optional[Dict
         """
         # Handle response_mime_type and response_schema
         if config.response_mime_type == "application/json":
+            handled, response_format = self._adapter.build_response_format(config)
+            if handled:
+                return response_format
+
             if config.response_schema:
                 # response_schema must be pydantic.BaseModel
                 if not isinstance(config.response_schema, type(BaseModel)):
@@ -1374,6 +1416,9 @@ async def _generate_async_impl(self,
 
         # Update request with merged config
         request.config = merged_config
+        if (request.config and request.config.tools and self._adapter.requires_add_tools_to_prompt()
+                and not self.add_tools_to_prompt):
+            raise ValueError(f"{self._model_name} requires add_tools_to_prompt=True when tools are used.")
 
         # Prepare OpenAI API parameters
         messages = self._format_messages(request)
@@ -1393,11 +1438,14 @@ async def _generate_async_impl(self,
                 # Log warnings for unsupported configuration options
                 self._log_unsupported_config_options(request.config)
                 if request.config.max_output_tokens:
-                    # Use max_completion_tokens for newer models (preferred), fallback to max_tokens
-                    api_params[ApiParamsKey.MAX_COMPLETION_TOKENS] = request.config.max_output_tokens
-                    # Keep max_tokens for backward compatibility (skip for gpt models)
-                    if "gpt-5" not in self._model_name.lower():
+                    if self._adapter.use_max_tokens_only():
                         api_params[ApiParamsKey.MAX_TOKENS] = request.config.max_output_tokens
+                    else:
+                        # Use max_completion_tokens for newer models (preferred), fallback to max_tokens
+                        api_params[ApiParamsKey.MAX_COMPLETION_TOKENS] = request.config.max_output_tokens
+                        # Keep max_tokens for backward compatibility (skip for gpt models)
+                        if "gpt-5" not in self._model_name.lower():
+                            api_params[ApiParamsKey.MAX_TOKENS] = request.config.max_output_tokens
                 if request.config.temperature is not None:
                     api_params[ApiParamsKey.TEMPERATURE] = request.config.temperature
                 if request.config.top_p is not None:
@@ -1406,15 +1454,18 @@ async def _generate_async_impl(self,
                     api_params[ApiParamsKey.STOP] = request.config.stop_sequences
 
                 # Additional OpenAI-specific parameters
-                if request.config.frequency_penalty is not None:
+                if (request.config.frequency_penalty is not None
+                        and not self._adapter.should_skip_config_param("frequency_penalty")):
                     api_params[ApiParamsKey.FREQUENCY_PENALTY] = request.config.frequency_penalty
-                if request.config.presence_penalty is not None:
+                if (request.config.presence_penalty is not None
+                        and not self._adapter.should_skip_config_param("presence_penalty")):
                     api_params[ApiParamsKey.PRESENCE_PENALTY] = request.config.presence_penalty
-                if request.config.seed is not None:
+                if request.config.seed is not None and not self._adapter.should_skip_config_param("seed"):
                     api_params[ApiParamsKey.SEED] = request.config.seed
 
                 # Handle candidate count (maps to OpenAI's 'n' parameter)
-                if request.config.candidate_count is not None and request.config.candidate_count > 0:
+                if (request.config.candidate_count is not None and request.config.candidate_count > 0
+                        and not self._adapter.should_skip_config_param("candidate_count")):
                     api_params[ApiParamsKey.N] = request.config.candidate_count
 
                 # Handle logprobs configuration
@@ -1481,8 +1532,13 @@ async def _generate_stream(self,
 
         # Create tool prompt instance for streaming if needed
         tool_prompt = None
+        streaming_text_filter_state = None
         if self.add_tools_to_prompt:
             tool_prompt = self._create_tool_prompt()
+            streaming_text_filter_state = {
+                "content": self._adapter.create_streaming_text_filter_state(),
+                "reasoning": self._adapter.create_streaming_text_filter_state(),
+            }
 
         client = self._create_async_client()
         try:
@@ -1547,11 +1603,20 @@ async def _generate_stream(self,
                 if delta.get(const.REASONING_CONTENT):
                     reasoning_content = delta.get(const.REASONING_CONTENT)
                     if reasoning_content is not None:
+                        partial_text = reasoning_content
+                        if (tool_prompt and streaming_text_filter_state is not None
+                                and self._adapter.should_filter_reasoning_text()):
+                            reasoning_filter_state = streaming_text_filter_state["reasoning"]
+                            partial_text = self._adapter.filter_streaming_text(reasoning_content,
+                                                                               reasoning_filter_state)
+                        if not partial_text:
+                            continue
+
                         # Reasoning content is always thinking content
-                        thought_content += reasoning_content
+                        thought_content += partial_text
 
                         # Set thought flag to True for reasoning content
-                        content_part = Part.from_text(text=reasoning_content)
+                        content_part = Part.from_text(text=partial_text)
                         content_part.thought = True
 
                         partial_content = Content(parts=[content_part], role=const.MODEL)
@@ -1569,8 +1634,15 @@ async def _generate_stream(self,
                         else:
                             thought_content += content
 
+                        partial_text = content
+                        if tool_prompt and streaming_text_filter_state is not None:
+                            content_filter_state = streaming_text_filter_state["content"]
+                            partial_text = self._adapter.filter_streaming_text(content, content_filter_state)
+                        if not partial_text:
+                            continue
+
                         # Set thought flag based on current thinking state
-                        content_part = Part.from_text(text=content)
+                        content_part = Part.from_text(text=partial_text)
                         content_part.thought = is_thinking
 
                         partial_content = Content(parts=[content_part], role=const.MODEL)
@@ -1584,6 +1656,30 @@ async def _generate_stream(self,
                 if usage:
                     last_usage = usage
 
+            if tool_prompt and streaming_text_filter_state is not None:
+                if self._adapter.should_filter_reasoning_text():
+                    flushed_reasoning_text = self._adapter.flush_streaming_text(
+                        streaming_text_filter_state["reasoning"])
+                    if flushed_reasoning_text:
+                        thought_content += flushed_reasoning_text
+                        content_part = Part.from_text(text=flushed_reasoning_text)
+                        content_part.thought = True
+                        partial_content = Content(parts=[content_part], role=const.MODEL)
+                        yield LlmResponse(content=partial_content,
+                                          partial=True,
+                                          response_id=response_id,
+                                          custom_metadata={"stream_filter_flushed": "reasoning"})
+
+                flushed_content_text = self._adapter.flush_streaming_text(streaming_text_filter_state["content"])
+                if flushed_content_text:
+                    content_part = Part.from_text(text=flushed_content_text)
+                    content_part.thought = is_thinking
+                    partial_content = Content(parts=[content_part], role=const.MODEL)
+                    yield LlmResponse(content=partial_content,
+                                      partial=True,
+                                      response_id=response_id,
+                                      custom_metadata={"stream_filter_flushed": "content"})
+
             # Yield final complete response
             final_content = None
 
@@ -1599,7 +1695,8 @@ async def _generate_stream(self,
             complete_tool_calls = self._create_complete_tool_calls(accumulated_tool_calls)
             if tool_prompt and accumulated_content and not complete_tool_calls:
                 try:
-                    parsed_function_calls = tool_prompt.parse_function(accumulated_content)
+                    parsed_function_calls = self._adapter.parse_tool_prompt_function_calls(
+                        accumulated_content, tool_prompt)
                     if parsed_function_calls:
                         # Convert FunctionCall objects to ToolCall objects
                         complete_tool_calls = []
@@ -1607,14 +1704,14 @@ async def _generate_stream(self,
                             tool_call = ToolCall(id=f"call_{uuid.uuid4().hex[:24]}",
                                                  name=func_call.name,
                                                  arguments=func_call.args)
-                        complete_tool_calls.append(tool_call)
+                            complete_tool_calls.append(tool_call)
                         logger.debug("Parsed %s function calls from final accumulated content",
                                      len(complete_tool_calls))
                 except Exception as ex:  # pylint: disable=broad-except
                     logger.warning("Failed to parse function calls from final accumulated content: %s", ex)
 
             # Add text content if present
-            if accumulated_content:
+            if accumulated_content and not complete_tool_calls:
                 logger.debug("Final accumulated regular content: %s...", accumulated_content[:200])
                 content_part = Part.from_text(text=accumulated_content)
                 content_part.thought = False  # Final accumulated content represents the answer, not thinking
diff --git a/trpc_agent_sdk/models/openai_adapter/__init__.py b/trpc_agent_sdk/models/openai_adapter/__init__.py
new file mode 100644
index 0000000..b41e7c1
--- /dev/null
+++ b/trpc_agent_sdk/models/openai_adapter/__init__.py
@@ -0,0 +1,36 @@
+# Tencent is pleased to support the open source community by making tRPC-Agent-Python available.
+#
+# Copyright (C) 2026 Tencent. All rights reserved.
+#
+# tRPC-Agent-Python is licensed under Apache-2.0.
+"""Adapters for OpenAI-compatible model providers."""
+
+from __future__ import annotations
+
+from typing import Optional
+
+from ._base import DefaultOpenAIAdapter
+from ._base import OpenAIAdapter
+from ._base import ToolPromptTextFilterMixin
+from ._deepseek import DeepSeekAdapter
+from ._hunyuan import HunyuanHy3PreviewAdapter
+
+
+def get_openai_adapter(model_name: str, base_url: Optional[str] = None) -> OpenAIAdapter:
+    """Return the provider adapter for an OpenAI-compatible model."""
+    model_name_lower = model_name.lower()
+    if model_name_lower == "hy3-preview":
+        return HunyuanHy3PreviewAdapter(model_name=model_name, base_url=base_url)
+    if model_name_lower.startswith("deepseek-"):
+        return DeepSeekAdapter(model_name=model_name, base_url=base_url)
+    return DefaultOpenAIAdapter(model_name=model_name, base_url=base_url)
+
+
+__all__ = [
+    "DefaultOpenAIAdapter",
+    "DeepSeekAdapter",
+    "HunyuanHy3PreviewAdapter",
+    "OpenAIAdapter",
+    "ToolPromptTextFilterMixin",
+    "get_openai_adapter",
+]
diff --git a/trpc_agent_sdk/models/openai_adapter/_base.py b/trpc_agent_sdk/models/openai_adapter/_base.py
new file mode 100644
index 0000000..8d042ad
--- /dev/null
+++ b/trpc_agent_sdk/models/openai_adapter/_base.py
@@ -0,0 +1,183 @@
+# Tencent is pleased to support the open source community by making tRPC-Agent-Python available.
+#
+# Copyright (C) 2026 Tencent. All rights reserved.
+#
+# tRPC-Agent-Python is licensed under Apache-2.0.
+"""Base adapter for OpenAI-compatible model provider differences."""
+
+from __future__ import annotations
+
+from typing import Any
+from typing import List
+from typing import Optional
+
+from trpc_agent_sdk.types import FunctionCall
+
+from .. import _constants as const
+
+_TOOL_PROMPT_MARKERS = (
+    "<tools",
+    "<tool_description",
+    "<tool_name",
+    "<parameters",
+    "<tool_call",
+    "<function_calls",
+    "<invoke",
+    "<tool_sep",
+)
+_TOOL_PROMPT_MARKER_ENDS = {
+    "<tools": "</tools>",
+    "<tool_description": "</tool_description>",
+    "<tool_name": "</tool_name>",
+    "<parameters": "</parameters>",
+    "<tool_call": "</tool_call>",
+    "<function_calls": "</function_calls>",
+    "<invoke": "</invoke>",
+    "<tool_sep": "</tool_call>",
+}
+_TOOL_PROMPT_MARKER_LOOKBEHIND = max(
+    max(len(marker) for marker in _TOOL_PROMPT_MARKERS),
+    max(len(marker) for marker in _TOOL_PROMPT_MARKER_ENDS.values()),
+) - 1
+
+
+class OpenAIAdapter:
+    """Adapter hook points for provider-specific OpenAI-compatible behavior."""
+
+    def __init__(self, model_name: str, base_url: Optional[str] = None):
+        self.model_name = model_name
+        self.base_url = base_url
+
+    def use_max_tokens_only(self) -> bool:
+        """Whether max_output_tokens should map only to max_tokens."""
+        return False
+
+    def should_skip_config_param(self, param_name: str) -> bool:
+        """Whether a GenerateContentConfig field should be skipped for this provider."""
+        return False
+
+    def should_include_thought_signature(self) -> bool:
+        """Whether tool call history should include thought_signature."""
+        return True
+
+    def should_backfill_reasoning_content(self, role: str, message: dict[str, Any]) -> bool:
+        """Whether assistant history should include an empty reasoning_content field."""
+        return False
+
+    def build_response_format(self, config: Any) -> tuple[bool, Optional[dict[str, Any]]]:
+        """Return provider-specific response_format.
+
+        The first tuple item indicates whether the adapter handled the config.
+        """
+        return False, None
+
+    def apply_thinking(self, request: Any, http_options: dict[str, Any]) -> bool:
+        """Apply provider-specific thinking options.
+
+        Returns True when the adapter handled thinking and the default OpenAI
+        thinking mapping should be skipped.
+        """
+        return False
+
+    def parse_tool_prompt_function_calls(self, content: str, tool_prompt: Any) -> List[FunctionCall]:
+        """Parse text-form tool calls emitted by a provider."""
+        return tool_prompt.parse_function(content)
+
+    def requires_add_tools_to_prompt(self) -> bool:
+        """Whether this adapter requires ToolPrompt mode when tools are used."""
+        return False
+
+    def should_suppress_tool_prompt_text(self) -> bool:
+        """Whether parsed text-form tool calls should be hidden from final text."""
+        return False
+
+    def should_filter_reasoning_text(self) -> bool:
+        """Whether ToolPrompt filtering should also apply to reasoning_content."""
+        return False
+
+    def create_streaming_text_filter_state(self) -> dict[str, Any]:
+        """Create per-stream state for filtering provider-specific text chunks."""
+        return {}
+
+    def filter_streaming_text(self, text: str, state: dict[str, Any]) -> str:
+        """Filter a streaming text chunk before yielding it to users."""
+        return text
+
+    def flush_streaming_text(self, state: dict[str, Any]) -> str:
+        """Flush any buffered streaming text after the stream ends."""
+        return ""
+
+
+class DefaultOpenAIAdapter(OpenAIAdapter):
+    """Default OpenAI-compatible adapter with no provider overrides."""
+
+    pass
+
+
+class ToolPromptTextFilterMixin:
+    """Opt-in filtering for models that emit ToolPrompt XML as streamed text."""
+
+    def should_suppress_tool_prompt_text(self) -> bool:
+        return True
+
+    def create_streaming_text_filter_state(self) -> dict[str, Any]:
+        return {
+            "buffer": "",
+            "suppress": False,
+            "suppress_until": "",
+        }
+
+    def filter_streaming_text(self, text: str, state: dict[str, Any]) -> str:
+        if state.get("suppress"):
+            buffer = f"{state.get('buffer', '')}{text}"
+            suppress_until = state.get("suppress_until") or ""
+            marker_start = buffer.find(suppress_until) if suppress_until else -1
+            if marker_start < 0:
+                state["buffer"] = buffer[-_TOOL_PROMPT_MARKER_LOOKBEHIND:]
+                return ""
+
+            resume_at = marker_start + len(suppress_until)
+            state["buffer"] = ""
+            state["suppress"] = False
+            state["suppress_until"] = ""
+            return self.filter_streaming_text(buffer[resume_at:], state)
+
+        buffer = f"{state.get('buffer', '')}{text}"
+        marker_start, marker = self._find_first_tool_prompt_marker(buffer)
+        if marker:
+            state["buffer"] = ""
+            state["suppress"] = True
+            state["suppress_until"] = _TOOL_PROMPT_MARKER_ENDS[marker]
+            return buffer[:marker_start] + self.filter_streaming_text(buffer[marker_start:], state)
+
+        if len(buffer) <= _TOOL_PROMPT_MARKER_LOOKBEHIND:
+            state["buffer"] = buffer
+            return ""
+
+        split_at = len(buffer) - _TOOL_PROMPT_MARKER_LOOKBEHIND
+        state["buffer"] = buffer[split_at:]
+        return buffer[:split_at]
+
+    def flush_streaming_text(self, state: dict[str, Any]) -> str:
+        if state.get("suppress"):
+            return ""
+
+        buffer = state.get("buffer", "")
+        state["buffer"] = ""
+        marker_start, marker = self._find_first_tool_prompt_marker(buffer)
+        if marker:
+            state["suppress"] = True
+            state["suppress_until"] = _TOOL_PROMPT_MARKER_ENDS[marker]
+            return buffer[:marker_start]
+        return buffer
+
+    def _find_first_tool_prompt_marker(self, text: str) -> tuple[int, Optional[str]]:
+        marker_positions = [(text.find(marker), marker) for marker in _TOOL_PROMPT_MARKERS if marker in text]
+        if not marker_positions:
+            return -1, None
+        return min(marker_positions, key=lambda item: item[0])
+
+
+def has_reasoning_content(message: dict[str, Any]) -> bool:
+    """Return whether message already includes reasoning_content."""
+    return const.REASONING_CONTENT in message
diff --git a/trpc_agent_sdk/models/openai_adapter/_deepseek.py b/trpc_agent_sdk/models/openai_adapter/_deepseek.py
new file mode 100644
index 0000000..64c82ba
--- /dev/null
+++ b/trpc_agent_sdk/models/openai_adapter/_deepseek.py
@@ -0,0 +1,81 @@
+# Tencent is pleased to support the open source community by making tRPC-Agent-Python available.
+#
+# Copyright (C) 2026 Tencent. All rights reserved.
+#
+# tRPC-Agent-Python is licensed under Apache-2.0.
+"""DeepSeek adapter for OpenAI-compatible chat completions."""
+
+from __future__ import annotations
+
+from typing import Any
+from typing import Optional
+
+from trpc_agent_sdk.log import logger
+
+from .. import _constants as const
+from ._base import OpenAIAdapter
+from ._base import has_reasoning_content
+
+
+class DeepSeekAdapter(OpenAIAdapter):
+    """Provider-specific behavior for DeepSeek's OpenAI-compatible API."""
+
+    def __init__(self, model_name: str, base_url: Optional[str] = None):
+        super().__init__(model_name=model_name, base_url=base_url)
+        self._model_name_lower = model_name.lower()
+
+    def is_v4_model(self) -> bool:
+        """Return whether the current model uses DeepSeek v4 chat completions."""
+        return self._model_name_lower.startswith("deepseek-v4-")
+
+    def use_max_tokens_only(self) -> bool:
+        return True
+
+    def should_skip_config_param(self, param_name: str) -> bool:
+        return param_name in {
+            "frequency_penalty",
+            "presence_penalty",
+            "seed",
+            "candidate_count",
+        }
+
+    def should_include_thought_signature(self) -> bool:
+        return False
+
+    def should_backfill_reasoning_content(self, role: str, message: dict[str, Any]) -> bool:
+        if not self.is_v4_model() or role != const.ASSISTANT:
+            return False
+        if has_reasoning_content(message):
+            return False
+        return bool(message.get(const.CONTENT) or message.get(const.TOOL_CALLS))
+
+    def build_response_format(self, config: Any) -> tuple[bool, Optional[dict[str, Any]]]:
+        if config.response_mime_type != "application/json":
+            return False, None
+        if config.response_schema or config.response_json_schema:
+            logger.warning("DeepSeek only supports JSON object response_format; response schema is ignored.")
+        return True, {"type": "json_object"}
+
+    def apply_thinking(self, request: Any, http_options: dict[str, Any]) -> bool:
+        if not self.is_v4_model():
+            return False
+        if not request.config or not request.config.thinking_config:
+            return False
+
+        thinking_config = request.config.thinking_config
+        if "extra_body" not in http_options:
+            http_options["extra_body"] = {}
+        processed_extra_body = http_options["extra_body"]
+        thinking_body = dict(processed_extra_body.get("thinking") or {})
+
+        if thinking_config.include_thoughts and thinking_config.thinking_budget != 0:
+            thinking_body["type"] = "enabled"
+            thinking_body.setdefault(
+                "reasoning_effort",
+                "max" if thinking_config.thinking_budget and thinking_config.thinking_budget > 0 else "high",
+            )
+        else:
+            thinking_body["type"] = "disabled"
+
+        processed_extra_body["thinking"] = thinking_body
+        return True
diff --git a/trpc_agent_sdk/models/openai_adapter/_hunyuan.py b/trpc_agent_sdk/models/openai_adapter/_hunyuan.py
new file mode 100644
index 0000000..d7e1348
--- /dev/null
+++ b/trpc_agent_sdk/models/openai_adapter/_hunyuan.py
@@ -0,0 +1,79 @@
+# Tencent is pleased to support the open source community by making tRPC-Agent-Python available.
+#
+# Copyright (C) 2026 Tencent. All rights reserved.
+#
+# tRPC-Agent-Python is licensed under Apache-2.0.
+"""Hunyuan adapter for OpenAI-compatible chat completions."""
+
+from __future__ import annotations
+
+import json
+import re
+from typing import Any
+from typing import List
+from typing import Optional
+
+from trpc_agent_sdk.types import FunctionCall
+
+from ._base import OpenAIAdapter
+from ._base import ToolPromptTextFilterMixin
+
+
+class HunyuanHy3PreviewAdapter(ToolPromptTextFilterMixin, OpenAIAdapter):
+    """Provider-specific behavior for the hy3-preview model."""
+
+    def __init__(self, model_name: str, base_url: Optional[str] = None):
+        super().__init__(model_name=model_name, base_url=base_url)
+
+    def parse_tool_prompt_function_calls(self, content: str, tool_prompt: Any) -> List[FunctionCall]:
+        function_calls = self._parse_hunyuan_tool_calls(content)
+        if function_calls:
+            return function_calls
+        return tool_prompt.parse_function(content)
+
+    def requires_add_tools_to_prompt(self) -> bool:
+        return True
+
+    def should_filter_reasoning_text(self) -> bool:
+        return True
+
+    def _parse_hunyuan_tool_calls(self, content: str) -> List[FunctionCall]:
+        function_calls = []
+        matches = re.findall(r"<tool_call>(.*?)</tool_call>", content, re.DOTALL)
+
+        for match in matches:
+            if "<tool_sep>" not in match:
+                continue
+
+            tool_name, params_content = match.split("<tool_sep>", 1)
+            args = self._parse_hunyuan_tool_args(params_content)
+            function_calls.append(FunctionCall(name=tool_name.strip(), args=args))
+
+        return function_calls
+
+    def _parse_hunyuan_tool_args(self, params_content: str) -> dict[str, Any]:
+        args: dict[str, Any] = {}
+        param_matches = re.findall(
+            r"<arg_key>(.*?)</arg_key>\s*<arg_value>(.*?)</arg_value>",
+            params_content,
+            re.DOTALL,
+        )
+        if param_matches:
+            for key, value in param_matches:
+                args[key.strip()] = self._parse_arg_value(value.strip())
+            return args
+
+        params_content = params_content.strip()
+        if not params_content:
+            return args
+
+        parsed_value = self._parse_arg_value(params_content)
+        if isinstance(parsed_value, dict):
+            return parsed_value
+        return {"value": parsed_value}
+
+    def _parse_arg_value(self, value: str) -> Any:
+        try:
+            return json.loads(value)
+        except json.JSONDecodeError:
+            return value