From 516d13246e2dba534f3e5a7b5e4b9c1f2cc3b975 Mon Sep 17 00:00:00 2001 From: raychen <815315825@qq.com> Date: Tue, 12 May 2026 10:34:50 +0800 Subject: [PATCH] =?UTF-8?q?feature:=20=E5=A2=9E=E5=8A=A0=20OpenAI=20?= =?UTF-8?q?=E5=85=BC=E5=AE=B9=E6=A8=A1=E5=9E=8B=E9=80=82=E9=85=8D=E5=99=A8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 新增 openai_adapter 目录,按模型拆分 DeepSeek 与 Hunyuan 的特殊适配逻辑 - 保持 OpenAIModel 主流程兼容,避免将特定模型逻辑继续堆叠在主实现中 - 适配 DeepSeek v4 的 thinking、response_format、reasoning_content 与 token usage 处理 - 为 hy3-preview 增加 ToolPrompt 文本工具调用解析与流式输出过滤 - 修复 ToolPrompt 流式解析多个工具调用时只保留最后一个的问题 - 优化 thinking 示例,仅在 hy3-preview 下启用 add_tools_to_prompt,并区分展示思考、工具调用和最终回复 - 更新示例 prompt,避免模型在工具返回前编造结果或过度展开推理 --- .../llmagent_with_thinking/agent/agent.py | 12 +- .../llmagent_with_thinking/agent/prompts.py | 12 +- examples/llmagent_with_thinking/run_agent.py | 67 ++++++- trpc_agent_sdk/models/_openai_model.py | 137 +++++++++++-- .../models/openai_adapter/__init__.py | 36 ++++ trpc_agent_sdk/models/openai_adapter/_base.py | 183 ++++++++++++++++++ .../models/openai_adapter/_deepseek.py | 81 ++++++++ .../models/openai_adapter/_hunyuan.py | 79 ++++++++ 8 files changed, 575 insertions(+), 32 deletions(-) create mode 100644 trpc_agent_sdk/models/openai_adapter/__init__.py create mode 100644 trpc_agent_sdk/models/openai_adapter/_base.py create mode 100644 trpc_agent_sdk/models/openai_adapter/_deepseek.py create mode 100644 trpc_agent_sdk/models/openai_adapter/_hunyuan.py diff --git a/examples/llmagent_with_thinking/agent/agent.py b/examples/llmagent_with_thinking/agent/agent.py index 7e0691e..b91de51 100644 --- a/examples/llmagent_with_thinking/agent/agent.py +++ b/examples/llmagent_with_thinking/agent/agent.py @@ -11,7 +11,7 @@ from trpc_agent_sdk.tools import FunctionTool from trpc_agent_sdk.types import GenerateContentConfig from trpc_agent_sdk.types import ThinkingConfig - +from trpc_agent_sdk.types import HttpOptions from .config import get_model_config from .prompts import INSTRUCTION from .tools import get_weather_forecast @@ -32,8 +32,9 @@ def _create_model() -> LLMModel: # if the LLM model service fails to return the JSON format of tool calls, you can also enable ToolPrompt. # This will prompt the LLM model to output the special text for tool calling in the main content, # thereby increasing the probability of successful tool invocation. - # You can uncomment the code below to use ToolPrompt. - # add_tools_to_prompt=True, + # Thinking models may emit tool calls as text. ToolPrompt lets the + # framework parse those text calls back into executable FunctionCalls. + add_tools_to_prompt=model_name.lower() == "hy3-preview", # Enable ToolPrompt for Hy3-preview model ) return model @@ -45,6 +46,9 @@ def create_agent(): weather_tool = FunctionTool(get_weather_report) forecast_tool = FunctionTool(get_weather_forecast) + # Set reasoning effort to high for Hy3-preview model + http_options=HttpOptions(extra_body={"chat_template_kwargs": {"reasoning_effort": "high"}}) + return LlmAgent( name="weather_agent", description= @@ -54,7 +58,7 @@ def create_agent(): instruction=INSTRUCTION, tools=[weather_tool, forecast_tool], # Note: thinking_budget must be less than max_output_tokens - generate_content_config=GenerateContentConfig(max_output_tokens=10240, ), + generate_content_config=GenerateContentConfig(max_output_tokens=10240, http_options=http_options), # The model must be a thinking model to use this Planner; this configuration will not take effect for non-thinking models. planner=BuiltInPlanner(thinking_config=ThinkingConfig( include_thoughts=True, diff --git a/examples/llmagent_with_thinking/agent/prompts.py b/examples/llmagent_with_thinking/agent/prompts.py index 05f25c5..8e0ce0d 100644 --- a/examples/llmagent_with_thinking/agent/prompts.py +++ b/examples/llmagent_with_thinking/agent/prompts.py @@ -18,13 +18,21 @@ - Provide clear, useful weather information and suggestions **Available tools:** -1. `get_weather`: Get current weather information +1. `get_weather_report`: Get current weather information 2. `get_weather_forecast`: Get multi-day weather forecast **Tool usage guide:** -- When the user asks about the current weather, use `get_weather` +- When the user asks about the current weather, use `get_weather_report` - When the user asks about the weather for the next few days, use `get_weather_forecast` - If the query is not clear, you can use both tools at the same time +- Do not answer with weather data before the required tool result is available +- Do not guess, simulate, or invent tool results +- If a tool is needed, call the tool first and wait for the tool result before giving the final answer + +**Thinking guidance:** +- Keep reasoning concise and focused on choosing the right tool and city +- Do not repeat the tool usage rules or tool schema in your reasoning +- Do not draft the final answer in reasoning; use reasoning only to decide the next action **Reply format:** - Provide accurate weather information diff --git a/examples/llmagent_with_thinking/run_agent.py b/examples/llmagent_with_thinking/run_agent.py index 4aece79..4878e63 100644 --- a/examples/llmagent_with_thinking/run_agent.py +++ b/examples/llmagent_with_thinking/run_agent.py @@ -30,7 +30,7 @@ async def run_weather_agent(): demo_queries = [ "What's the weather like today?", "What's the current weather in Guangzhou?", - "What will the weather be like in Shanghai for the next three days?", + "Please check both the current weather in Guangzhou and the three-day weather forecast for Shanghai.", ] for query in demo_queries: @@ -51,7 +51,47 @@ async def run_weather_agent(): user_content = Content(parts=[Part.from_text(text=query)]) - print("🤖 Assistant: ", end="", flush=True) + printed_thinking = False + printed_assistant = False + in_thinking = False + thinking_line_start = False + assistant_text_started = False + + def print_assistant_header() -> None: + nonlocal printed_assistant + if printed_assistant: + return + if printed_thinking: + print("\n") + print("🤖 Assistant: ", end="", flush=True) + printed_assistant = True + + def print_thinking_header() -> None: + nonlocal in_thinking, printed_thinking, thinking_line_start + if in_thinking: + return + print("\n 💭 Thinking: ", end="", flush=True) + in_thinking = True + printed_thinking = True + thinking_line_start = False + + def print_thinking_text(text: str) -> None: + nonlocal thinking_line_start + for line in text.splitlines(keepends=True): + if thinking_line_start: + print(" ", end="", flush=True) + print(line, end="", flush=True) + thinking_line_start = line.endswith("\n") + + def close_thinking_section() -> None: + nonlocal in_thinking, thinking_line_start + if in_thinking: + if not thinking_line_start: + print() + print(" 💭 End Thinking") + in_thinking = False + thinking_line_start = False + async for event in runner.run_async(user_id=user_id, session_id=current_session_id, new_message=user_content): if not event.content or not event.content.parts: continue @@ -59,15 +99,30 @@ async def run_weather_agent(): if event.partial: for part in event.content.parts: if part.text: - print(part.text, end="", flush=True) + if part.thought: + if assistant_text_started: + continue + print_thinking_header() + print_thinking_text(part.text) + else: + close_thinking_section() + print_assistant_header() + assistant_text_started = True + print(part.text, end="", flush=True) continue for part in event.content.parts: - if part.thought: - continue - if part.function_call: + if part.thought and part.text and not printed_thinking and not assistant_text_started: + print_thinking_header() + print_thinking_text(part.text) + elif part.function_call: + close_thinking_section() + print_assistant_header() print(f"\n🔧 [Invoke Tool:: {part.function_call.name}({part.function_call.args})]") elif part.function_response: + close_thinking_section() + printed_thinking = False + print_assistant_header() print(f"📊 [Tool Result: {part.function_response.response}]") # elif part.text: # print(f"\n✅ {part.text}") diff --git a/trpc_agent_sdk/models/_openai_model.py b/trpc_agent_sdk/models/_openai_model.py index e5ad05d..5fbc059 100644 --- a/trpc_agent_sdk/models/_openai_model.py +++ b/trpc_agent_sdk/models/_openai_model.py @@ -40,9 +40,10 @@ from ._llm_request import LlmRequest from ._llm_response import LlmResponse from ._registry import register_model +from .openai_adapter import get_openai_adapter from .tool_prompt import ToolPromptFactory from .tool_prompt import get_factory -from .tool_prompt._base import ToolPrompt +from .tool_prompt import ToolPrompt class ToolCall(BaseModel): @@ -102,7 +103,7 @@ class ApiParamsKey(str, Enum): PARALLEL_TOOL_CALLS = "parallel_tool_calls" -@register_model(model_name="OpenAIModel", supported_models=[r"gpt-.*", r"o1-.*", r"deepseek-.*"]) +@register_model(model_name="OpenAIModel", supported_models=[r"gpt-.*", r"o1-.*", r"deepseek-.*", r"hy3-.*"]) class OpenAIModel(LLMModel): """OpenAI model implementation using the abstract model interface. @@ -162,6 +163,7 @@ def __init__( **kwargs, ): super().__init__(model_name, filters_name, **kwargs) + self._adapter = get_openai_adapter(self._model_name, self._base_url) # Extract OpenAI-specific config self.organization: str = kwargs.get(const.ORGANIZATION, "") @@ -188,6 +190,20 @@ def __init__( elif not (isinstance(self.tool_prompt, type) and issubclass(self.tool_prompt, ToolPrompt)): raise ValueError(f"tool_prompt must be a string or ToolPrompt class, got {type(self.tool_prompt)}") + def _refresh_adapter(self) -> None: + """Refresh provider adapter after model or endpoint changes.""" + self._adapter = get_openai_adapter(self._model_name, self._base_url) + + @override + def set_base_url(self, value: str) -> None: + super().set_base_url(value) + self._refresh_adapter() + + @override + def set_model_name(self, value: str) -> None: + super().set_model_name(value) + self._refresh_adapter() + def _create_async_client(self): """Create a new async client instance.""" @@ -286,7 +302,10 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]: # Handle different content structures if all(conditions_iter): # Simple text message - formatted_messages.append({const.ROLE: role, const.CONTENT: parts[0].text}) + message = {const.ROLE: role, const.CONTENT: parts[0].text} + if self._adapter.should_backfill_reasoning_content(role, message): + message[const.REASONING_CONTENT] = "" + formatted_messages.append(message) else: # Complex message with multiple parts or function calls/responses # Separate function responses from other content @@ -297,6 +316,8 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]: for part in parts: # type: ignore if part.text: + if part.thought: + continue text_parts.append(part.text) elif part.inline_data and part.inline_data.mime_type: # Handle image data - convert to OpenAI vision format @@ -315,8 +336,9 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]: "arguments": (part.function_call.args if isinstance(part.function_call.args, str) else json.dumps(part.function_call.args, ensure_ascii=False)), }, - "thought_signature": self._get_part_thought_signature(part), } + if self._adapter.should_include_thought_signature(): + tool_call["thought_signature"] = self._get_part_thought_signature(part) tool_calls.append(tool_call) # If add_tools_to_prompt is enabled, skip tool calls (they're handled via text prompts) elif part.function_response: @@ -399,6 +421,9 @@ def _format_messages(self, request: LlmRequest) -> List[Dict[str, Any]]: if tool_calls and not self.add_tools_to_prompt: message[const.TOOL_CALLS] = tool_calls + if self._adapter.should_backfill_reasoning_content(role, message): + message[const.REASONING_CONTENT] = "" + formatted_messages.append(message) # Validate and fix message sequence for OpenAI compatibility @@ -571,6 +596,9 @@ def _set_thinking(self, request: LlmRequest, http_options: dict): thinking_config = request.config.thinking_config + if self._adapter.apply_thinking(request, http_options): + return + # Only set thinking parameters if include_thoughts is True if not thinking_config.include_thoughts: return @@ -682,9 +710,11 @@ def _process_usage(self, chunk_dict: dict) -> Optional[GenerateContentResponseUs usage_data = chunk_dict.get(const.USAGE) if usage_data is None: return None + completion_details = usage_data.get("completion_tokens_details") or {} return GenerateContentResponseUsageMetadata( prompt_token_count=usage_data.get("prompt_tokens", 0), candidates_token_count=usage_data.get("completion_tokens", 0), + thoughts_token_count=completion_details.get("reasoning_tokens"), total_token_count=usage_data.get("total_tokens", 0), ) @@ -942,9 +972,11 @@ def _process_usage_from_response(self, response_dict: dict) -> Optional[Generate return None usage_data: dict[str, int] = response_dict[const.USAGE] + completion_details = usage_data.get("completion_tokens_details") or {} return GenerateContentResponseUsageMetadata( prompt_token_count=usage_data.get("prompt_tokens", 0), candidates_token_count=usage_data.get("completion_tokens", 0), + thoughts_token_count=completion_details.get("reasoning_tokens"), total_token_count=usage_data.get("total_tokens", 0), ) @@ -974,6 +1006,7 @@ def _create_response_with_content(self, response_dict: dict) -> LlmResponse: # Extract content text_content = message.get(const.CONTENT, "") + reasoning_content = message.get(const.REASONING_CONTENT) # Check for tool calls tool_calls = self._process_tool_calls_from_message(message) @@ -982,7 +1015,7 @@ def _create_response_with_content(self, response_dict: dict) -> LlmResponse: if self.add_tools_to_prompt and text_content and not tool_calls: try: tool_prompt = self._create_tool_prompt() - parsed_function_calls = tool_prompt.parse_function(text_content) + parsed_function_calls = self._adapter.parse_tool_prompt_function_calls(text_content, tool_prompt) if parsed_function_calls: # Convert FunctionCall objects to ToolCall objects tool_calls = [] @@ -997,8 +1030,13 @@ def _create_response_with_content(self, response_dict: dict) -> LlmResponse: parts = [] + if reasoning_content: + content_part = Part.from_text(text=reasoning_content) + content_part.thought = True + parts.append(content_part) + # Add text content if present - if text_content: + if text_content and not (tool_calls and self._adapter.should_suppress_tool_prompt_text()): content_part = Part.from_text(text=text_content) content_part.thought = False # Regular text content is not thought parts.append(content_part) @@ -1218,6 +1256,10 @@ def _build_response_format(self, config: GenerateContentConfig) -> Optional[Dict """ # Handle response_mime_type and response_schema if config.response_mime_type == "application/json": + handled, response_format = self._adapter.build_response_format(config) + if handled: + return response_format + if config.response_schema: # response_schema must be pydantic.BaseModel if not isinstance(config.response_schema, type(BaseModel)): @@ -1374,6 +1416,9 @@ async def _generate_async_impl(self, # Update request with merged config request.config = merged_config + if (request.config and request.config.tools and self._adapter.requires_add_tools_to_prompt() + and not self.add_tools_to_prompt): + raise ValueError(f"{self._model_name} requires add_tools_to_prompt=True when tools are used.") # Prepare OpenAI API parameters messages = self._format_messages(request) @@ -1393,11 +1438,14 @@ async def _generate_async_impl(self, # Log warnings for unsupported configuration options self._log_unsupported_config_options(request.config) if request.config.max_output_tokens: - # Use max_completion_tokens for newer models (preferred), fallback to max_tokens - api_params[ApiParamsKey.MAX_COMPLETION_TOKENS] = request.config.max_output_tokens - # Keep max_tokens for backward compatibility (skip for gpt models) - if "gpt-5" not in self._model_name.lower(): + if self._adapter.use_max_tokens_only(): api_params[ApiParamsKey.MAX_TOKENS] = request.config.max_output_tokens + else: + # Use max_completion_tokens for newer models (preferred), fallback to max_tokens + api_params[ApiParamsKey.MAX_COMPLETION_TOKENS] = request.config.max_output_tokens + # Keep max_tokens for backward compatibility (skip for gpt models) + if "gpt-5" not in self._model_name.lower(): + api_params[ApiParamsKey.MAX_TOKENS] = request.config.max_output_tokens if request.config.temperature is not None: api_params[ApiParamsKey.TEMPERATURE] = request.config.temperature if request.config.top_p is not None: @@ -1406,15 +1454,18 @@ async def _generate_async_impl(self, api_params[ApiParamsKey.STOP] = request.config.stop_sequences # Additional OpenAI-specific parameters - if request.config.frequency_penalty is not None: + if (request.config.frequency_penalty is not None + and not self._adapter.should_skip_config_param("frequency_penalty")): api_params[ApiParamsKey.FREQUENCY_PENALTY] = request.config.frequency_penalty - if request.config.presence_penalty is not None: + if (request.config.presence_penalty is not None + and not self._adapter.should_skip_config_param("presence_penalty")): api_params[ApiParamsKey.PRESENCE_PENALTY] = request.config.presence_penalty - if request.config.seed is not None: + if request.config.seed is not None and not self._adapter.should_skip_config_param("seed"): api_params[ApiParamsKey.SEED] = request.config.seed # Handle candidate count (maps to OpenAI's 'n' parameter) - if request.config.candidate_count is not None and request.config.candidate_count > 0: + if (request.config.candidate_count is not None and request.config.candidate_count > 0 + and not self._adapter.should_skip_config_param("candidate_count")): api_params[ApiParamsKey.N] = request.config.candidate_count # Handle logprobs configuration @@ -1481,8 +1532,13 @@ async def _generate_stream(self, # Create tool prompt instance for streaming if needed tool_prompt = None + streaming_text_filter_state = None if self.add_tools_to_prompt: tool_prompt = self._create_tool_prompt() + streaming_text_filter_state = { + "content": self._adapter.create_streaming_text_filter_state(), + "reasoning": self._adapter.create_streaming_text_filter_state(), + } client = self._create_async_client() try: @@ -1547,11 +1603,20 @@ async def _generate_stream(self, if delta.get(const.REASONING_CONTENT): reasoning_content = delta.get(const.REASONING_CONTENT) if reasoning_content is not None: + partial_text = reasoning_content + if (tool_prompt and streaming_text_filter_state is not None + and self._adapter.should_filter_reasoning_text()): + reasoning_filter_state = streaming_text_filter_state["reasoning"] + partial_text = self._adapter.filter_streaming_text(reasoning_content, + reasoning_filter_state) + if not partial_text: + continue + # Reasoning content is always thinking content - thought_content += reasoning_content + thought_content += partial_text # Set thought flag to True for reasoning content - content_part = Part.from_text(text=reasoning_content) + content_part = Part.from_text(text=partial_text) content_part.thought = True partial_content = Content(parts=[content_part], role=const.MODEL) @@ -1569,8 +1634,15 @@ async def _generate_stream(self, else: thought_content += content + partial_text = content + if tool_prompt and streaming_text_filter_state is not None: + content_filter_state = streaming_text_filter_state["content"] + partial_text = self._adapter.filter_streaming_text(content, content_filter_state) + if not partial_text: + continue + # Set thought flag based on current thinking state - content_part = Part.from_text(text=content) + content_part = Part.from_text(text=partial_text) content_part.thought = is_thinking partial_content = Content(parts=[content_part], role=const.MODEL) @@ -1584,6 +1656,30 @@ async def _generate_stream(self, if usage: last_usage = usage + if tool_prompt and streaming_text_filter_state is not None: + if self._adapter.should_filter_reasoning_text(): + flushed_reasoning_text = self._adapter.flush_streaming_text( + streaming_text_filter_state["reasoning"]) + if flushed_reasoning_text: + thought_content += flushed_reasoning_text + content_part = Part.from_text(text=flushed_reasoning_text) + content_part.thought = True + partial_content = Content(parts=[content_part], role=const.MODEL) + yield LlmResponse(content=partial_content, + partial=True, + response_id=response_id, + custom_metadata={"stream_filter_flushed": "reasoning"}) + + flushed_content_text = self._adapter.flush_streaming_text(streaming_text_filter_state["content"]) + if flushed_content_text: + content_part = Part.from_text(text=flushed_content_text) + content_part.thought = is_thinking + partial_content = Content(parts=[content_part], role=const.MODEL) + yield LlmResponse(content=partial_content, + partial=True, + response_id=response_id, + custom_metadata={"stream_filter_flushed": "content"}) + # Yield final complete response final_content = None @@ -1599,7 +1695,8 @@ async def _generate_stream(self, complete_tool_calls = self._create_complete_tool_calls(accumulated_tool_calls) if tool_prompt and accumulated_content and not complete_tool_calls: try: - parsed_function_calls = tool_prompt.parse_function(accumulated_content) + parsed_function_calls = self._adapter.parse_tool_prompt_function_calls( + accumulated_content, tool_prompt) if parsed_function_calls: # Convert FunctionCall objects to ToolCall objects complete_tool_calls = [] @@ -1607,14 +1704,14 @@ async def _generate_stream(self, tool_call = ToolCall(id=f"call_{uuid.uuid4().hex[:24]}", name=func_call.name, arguments=func_call.args) - complete_tool_calls.append(tool_call) + complete_tool_calls.append(tool_call) logger.debug("Parsed %s function calls from final accumulated content", len(complete_tool_calls)) except Exception as ex: # pylint: disable=broad-except logger.warning("Failed to parse function calls from final accumulated content: %s", ex) # Add text content if present - if accumulated_content: + if accumulated_content and not complete_tool_calls: logger.debug("Final accumulated regular content: %s...", accumulated_content[:200]) content_part = Part.from_text(text=accumulated_content) content_part.thought = False # Final accumulated content represents the answer, not thinking diff --git a/trpc_agent_sdk/models/openai_adapter/__init__.py b/trpc_agent_sdk/models/openai_adapter/__init__.py new file mode 100644 index 0000000..b41e7c1 --- /dev/null +++ b/trpc_agent_sdk/models/openai_adapter/__init__.py @@ -0,0 +1,36 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Adapters for OpenAI-compatible model providers.""" + +from __future__ import annotations + +from typing import Optional + +from ._base import DefaultOpenAIAdapter +from ._base import OpenAIAdapter +from ._base import ToolPromptTextFilterMixin +from ._deepseek import DeepSeekAdapter +from ._hunyuan import HunyuanHy3PreviewAdapter + + +def get_openai_adapter(model_name: str, base_url: Optional[str] = None) -> OpenAIAdapter: + """Return the provider adapter for an OpenAI-compatible model.""" + model_name_lower = model_name.lower() + if model_name_lower == "hy3-preview": + return HunyuanHy3PreviewAdapter(model_name=model_name, base_url=base_url) + if model_name_lower.startswith("deepseek-"): + return DeepSeekAdapter(model_name=model_name, base_url=base_url) + return DefaultOpenAIAdapter(model_name=model_name, base_url=base_url) + + +__all__ = [ + "DefaultOpenAIAdapter", + "DeepSeekAdapter", + "HunyuanHy3PreviewAdapter", + "OpenAIAdapter", + "ToolPromptTextFilterMixin", + "get_openai_adapter", +] diff --git a/trpc_agent_sdk/models/openai_adapter/_base.py b/trpc_agent_sdk/models/openai_adapter/_base.py new file mode 100644 index 0000000..8d042ad --- /dev/null +++ b/trpc_agent_sdk/models/openai_adapter/_base.py @@ -0,0 +1,183 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Base adapter for OpenAI-compatible model provider differences.""" + +from __future__ import annotations + +from typing import Any +from typing import List +from typing import Optional + +from trpc_agent_sdk.types import FunctionCall + +from .. import _constants as const + +_TOOL_PROMPT_MARKERS = ( + "", + "", + "", + "", + "", + "", + "", + "", +} +_TOOL_PROMPT_MARKER_LOOKBEHIND = max( + max(len(marker) for marker in _TOOL_PROMPT_MARKERS), + max(len(marker) for marker in _TOOL_PROMPT_MARKER_ENDS.values()), +) - 1 + + +class OpenAIAdapter: + """Adapter hook points for provider-specific OpenAI-compatible behavior.""" + + def __init__(self, model_name: str, base_url: Optional[str] = None): + self.model_name = model_name + self.base_url = base_url + + def use_max_tokens_only(self) -> bool: + """Whether max_output_tokens should map only to max_tokens.""" + return False + + def should_skip_config_param(self, param_name: str) -> bool: + """Whether a GenerateContentConfig field should be skipped for this provider.""" + return False + + def should_include_thought_signature(self) -> bool: + """Whether tool call history should include thought_signature.""" + return True + + def should_backfill_reasoning_content(self, role: str, message: dict[str, Any]) -> bool: + """Whether assistant history should include an empty reasoning_content field.""" + return False + + def build_response_format(self, config: Any) -> tuple[bool, Optional[dict[str, Any]]]: + """Return provider-specific response_format. + + The first tuple item indicates whether the adapter handled the config. + """ + return False, None + + def apply_thinking(self, request: Any, http_options: dict[str, Any]) -> bool: + """Apply provider-specific thinking options. + + Returns True when the adapter handled thinking and the default OpenAI + thinking mapping should be skipped. + """ + return False + + def parse_tool_prompt_function_calls(self, content: str, tool_prompt: Any) -> List[FunctionCall]: + """Parse text-form tool calls emitted by a provider.""" + return tool_prompt.parse_function(content) + + def requires_add_tools_to_prompt(self) -> bool: + """Whether this adapter requires ToolPrompt mode when tools are used.""" + return False + + def should_suppress_tool_prompt_text(self) -> bool: + """Whether parsed text-form tool calls should be hidden from final text.""" + return False + + def should_filter_reasoning_text(self) -> bool: + """Whether ToolPrompt filtering should also apply to reasoning_content.""" + return False + + def create_streaming_text_filter_state(self) -> dict[str, Any]: + """Create per-stream state for filtering provider-specific text chunks.""" + return {} + + def filter_streaming_text(self, text: str, state: dict[str, Any]) -> str: + """Filter a streaming text chunk before yielding it to users.""" + return text + + def flush_streaming_text(self, state: dict[str, Any]) -> str: + """Flush any buffered streaming text after the stream ends.""" + return "" + + +class DefaultOpenAIAdapter(OpenAIAdapter): + """Default OpenAI-compatible adapter with no provider overrides.""" + + pass + + +class ToolPromptTextFilterMixin: + """Opt-in filtering for models that emit ToolPrompt XML as streamed text.""" + + def should_suppress_tool_prompt_text(self) -> bool: + return True + + def create_streaming_text_filter_state(self) -> dict[str, Any]: + return { + "buffer": "", + "suppress": False, + "suppress_until": "", + } + + def filter_streaming_text(self, text: str, state: dict[str, Any]) -> str: + if state.get("suppress"): + buffer = f"{state.get('buffer', '')}{text}" + suppress_until = state.get("suppress_until") or "" + marker_start = buffer.find(suppress_until) if suppress_until else -1 + if marker_start < 0: + state["buffer"] = buffer[-_TOOL_PROMPT_MARKER_LOOKBEHIND:] + return "" + + resume_at = marker_start + len(suppress_until) + state["buffer"] = "" + state["suppress"] = False + state["suppress_until"] = "" + return self.filter_streaming_text(buffer[resume_at:], state) + + buffer = f"{state.get('buffer', '')}{text}" + marker_start, marker = self._find_first_tool_prompt_marker(buffer) + if marker: + state["buffer"] = "" + state["suppress"] = True + state["suppress_until"] = _TOOL_PROMPT_MARKER_ENDS[marker] + return buffer[:marker_start] + self.filter_streaming_text(buffer[marker_start:], state) + + if len(buffer) <= _TOOL_PROMPT_MARKER_LOOKBEHIND: + state["buffer"] = buffer + return "" + + split_at = len(buffer) - _TOOL_PROMPT_MARKER_LOOKBEHIND + state["buffer"] = buffer[split_at:] + return buffer[:split_at] + + def flush_streaming_text(self, state: dict[str, Any]) -> str: + if state.get("suppress"): + return "" + + buffer = state.get("buffer", "") + state["buffer"] = "" + marker_start, marker = self._find_first_tool_prompt_marker(buffer) + if marker: + state["suppress"] = True + state["suppress_until"] = _TOOL_PROMPT_MARKER_ENDS[marker] + return buffer[:marker_start] + return buffer + + def _find_first_tool_prompt_marker(self, text: str) -> tuple[int, Optional[str]]: + marker_positions = [(text.find(marker), marker) for marker in _TOOL_PROMPT_MARKERS if marker in text] + if not marker_positions: + return -1, None + return min(marker_positions, key=lambda item: item[0]) + + +def has_reasoning_content(message: dict[str, Any]) -> bool: + """Return whether message already includes reasoning_content.""" + return const.REASONING_CONTENT in message diff --git a/trpc_agent_sdk/models/openai_adapter/_deepseek.py b/trpc_agent_sdk/models/openai_adapter/_deepseek.py new file mode 100644 index 0000000..64c82ba --- /dev/null +++ b/trpc_agent_sdk/models/openai_adapter/_deepseek.py @@ -0,0 +1,81 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""DeepSeek adapter for OpenAI-compatible chat completions.""" + +from __future__ import annotations + +from typing import Any +from typing import Optional + +from trpc_agent_sdk.log import logger + +from .. import _constants as const +from ._base import OpenAIAdapter +from ._base import has_reasoning_content + + +class DeepSeekAdapter(OpenAIAdapter): + """Provider-specific behavior for DeepSeek's OpenAI-compatible API.""" + + def __init__(self, model_name: str, base_url: Optional[str] = None): + super().__init__(model_name=model_name, base_url=base_url) + self._model_name_lower = model_name.lower() + + def is_v4_model(self) -> bool: + """Return whether the current model uses DeepSeek v4 chat completions.""" + return self._model_name_lower.startswith("deepseek-v4-") + + def use_max_tokens_only(self) -> bool: + return True + + def should_skip_config_param(self, param_name: str) -> bool: + return param_name in { + "frequency_penalty", + "presence_penalty", + "seed", + "candidate_count", + } + + def should_include_thought_signature(self) -> bool: + return False + + def should_backfill_reasoning_content(self, role: str, message: dict[str, Any]) -> bool: + if not self.is_v4_model() or role != const.ASSISTANT: + return False + if has_reasoning_content(message): + return False + return bool(message.get(const.CONTENT) or message.get(const.TOOL_CALLS)) + + def build_response_format(self, config: Any) -> tuple[bool, Optional[dict[str, Any]]]: + if config.response_mime_type != "application/json": + return False, None + if config.response_schema or config.response_json_schema: + logger.warning("DeepSeek only supports JSON object response_format; response schema is ignored.") + return True, {"type": "json_object"} + + def apply_thinking(self, request: Any, http_options: dict[str, Any]) -> bool: + if not self.is_v4_model(): + return False + if not request.config or not request.config.thinking_config: + return False + + thinking_config = request.config.thinking_config + if "extra_body" not in http_options: + http_options["extra_body"] = {} + processed_extra_body = http_options["extra_body"] + thinking_body = dict(processed_extra_body.get("thinking") or {}) + + if thinking_config.include_thoughts and thinking_config.thinking_budget != 0: + thinking_body["type"] = "enabled" + thinking_body.setdefault( + "reasoning_effort", + "max" if thinking_config.thinking_budget and thinking_config.thinking_budget > 0 else "high", + ) + else: + thinking_body["type"] = "disabled" + + processed_extra_body["thinking"] = thinking_body + return True diff --git a/trpc_agent_sdk/models/openai_adapter/_hunyuan.py b/trpc_agent_sdk/models/openai_adapter/_hunyuan.py new file mode 100644 index 0000000..d7e1348 --- /dev/null +++ b/trpc_agent_sdk/models/openai_adapter/_hunyuan.py @@ -0,0 +1,79 @@ +# Tencent is pleased to support the open source community by making tRPC-Agent-Python available. +# +# Copyright (C) 2026 Tencent. All rights reserved. +# +# tRPC-Agent-Python is licensed under Apache-2.0. +"""Hunyuan adapter for OpenAI-compatible chat completions.""" + +from __future__ import annotations + +import json +import re +from typing import Any +from typing import List +from typing import Optional + +from trpc_agent_sdk.types import FunctionCall + +from ._base import OpenAIAdapter +from ._base import ToolPromptTextFilterMixin + + +class HunyuanHy3PreviewAdapter(ToolPromptTextFilterMixin, OpenAIAdapter): + """Provider-specific behavior for the hy3-preview model.""" + + def __init__(self, model_name: str, base_url: Optional[str] = None): + super().__init__(model_name=model_name, base_url=base_url) + + def parse_tool_prompt_function_calls(self, content: str, tool_prompt: Any) -> List[FunctionCall]: + function_calls = self._parse_hunyuan_tool_calls(content) + if function_calls: + return function_calls + return tool_prompt.parse_function(content) + + def requires_add_tools_to_prompt(self) -> bool: + return True + + def should_filter_reasoning_text(self) -> bool: + return True + + def _parse_hunyuan_tool_calls(self, content: str) -> List[FunctionCall]: + function_calls = [] + matches = re.findall(r"(.*?)", content, re.DOTALL) + + for match in matches: + if "" not in match: + continue + + tool_name, params_content = match.split("", 1) + args = self._parse_hunyuan_tool_args(params_content) + function_calls.append(FunctionCall(name=tool_name.strip(), args=args)) + + return function_calls + + def _parse_hunyuan_tool_args(self, params_content: str) -> dict[str, Any]: + args: dict[str, Any] = {} + param_matches = re.findall( + r"(.*?)\s*(.*?)", + params_content, + re.DOTALL, + ) + if param_matches: + for key, value in param_matches: + args[key.strip()] = self._parse_arg_value(value.strip()) + return args + + params_content = params_content.strip() + if not params_content: + return args + + parsed_value = self._parse_arg_value(params_content) + if isinstance(parsed_value, dict): + return parsed_value + return {"value": parsed_value} + + def _parse_arg_value(self, value: str) -> Any: + try: + return json.loads(value) + except json.JSONDecodeError: + return value