From fb7464d34999a580ec2fb44cada3b183e57edf0b Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 4 Jul 2026 15:32:51 +0800 Subject: [PATCH 1/2] feat(search): add firecrawl search provider Add configurable web search priority and Firecrawl search support for web_agent. Keep disabled search tools hidden from the web_agent tool schema and document the new config/WebUI fields. Tests: uv run pytest tests/; uv run ruff check .; uv run ruff format --check .; uv run mypy . Co-authored-by: GPT-5 --- ARCHITECTURE.md | 2 +- config.toml.example | 20 +- docs/configuration.md | 19 +- docs/usage.md | 4 +- src/Undefined/ai/prompts/system_context.py | 33 ++- src/Undefined/config/config_class.py | 4 + src/Undefined/config/env_registry.py | 4 + src/Undefined/config/load_sections/network.py | 35 +++ src/Undefined/config/search.py | 37 +++ src/Undefined/skills/agents/README.md | 5 +- src/Undefined/skills/agents/runner/context.py | 61 +++++ src/Undefined/skills/agents/runner/tools.py | 14 +- .../skills/agents/web_agent/README.md | 7 +- .../skills/agents/web_agent/config.json | 2 +- .../skills/agents/web_agent/prompt.md | 5 +- .../tools/firecrawl_search/config.json | 21 ++ .../tools/firecrawl_search/handler.py | 143 +++++++++++ .../web_agent/tools/grok_search/config.json | 2 +- .../web_agent/tools/web_search/config.json | 2 +- tests/test_config_env_registry.py | 9 + tests/test_firecrawl_search_tool.py | 242 ++++++++++++++++++ tests/test_search_config.py | 103 ++++++++ 22 files changed, 752 insertions(+), 22 deletions(-) create mode 100644 src/Undefined/config/search.py create mode 100644 src/Undefined/skills/agents/web_agent/tools/firecrawl_search/config.json create mode 100644 src/Undefined/skills/agents/web_agent/tools/firecrawl_search/handler.py create mode 100644 tests/test_firecrawl_search_tool.py create mode 100644 tests/test_search_config.py diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md index c7eb8db3..dbbb80f5 100644 --- a/ARCHITECTURE.md +++ b/ARCHITECTURE.md @@ -114,7 +114,7 @@ graph TB subgraph IntelligentAgents["智能体 Agents (skills/agents/, 7个)"] A_Info["info_agent
信息查询助手
(18个工具)
• weather_query
• *hot 热搜
• bilibili_*
• arxiv_search
• whois"] - A_Web["web_agent
网络搜索助手
(3个工具 + MCP)
• web_search
• crawl_webpage
• Playwright MCP"] + A_Web["web_agent
网络搜索助手
(4个工具 + MCP)
• grok_search
• firecrawl_search
• web_search
• crawl_webpage
• Playwright MCP"] A_File["file_analysis_agent
文件分析助手
• extract_* (PDF/Word/Excel/PPT)
• describe_pdf_page
• analyze_code
• analyze_multimodal"] A_Naga["naga_code_analysis_agent
NagaAgent 代码分析
(7个工具)
• read_file / glob
• search_file_content"] A_Self["undefined_self_code_agent
Undefined 自身代码查阅
(4个工具)
• read_file / list_directory
• glob / search_file_content"] diff --git a/config.toml.example b/config.toml.example index 05d0ace9..edc866de 100644 --- a/config.toml.example +++ b/config.toml.example @@ -1002,13 +1002,29 @@ show_uptime = true # zh: 搜索服务配置。 # en: Search service config. [search] +# zh: web_agent 搜索工具优先级。关闭的工具会从可用工具中隐藏;开启后模型按该顺序优先考虑,但不会被代码硬性路由。 +# en: Search tool priority for web_agent. Disabled tools are hidden; enabled tools are preferred in this order by prompt guidance, not hard routing. +priority = ["grok_search", "firecrawl_search", "web_search"] # zh: SearxNG 搜索服务地址,例如 http://127.0.0.1:8849。 # en: SearxNG service URL, e.g. http://127.0.0.1:8849. searxng_url = "" -# zh: 是否在 web_agent 中启用 grok_search。启用后该工具会优先于 web_search 暴露给模型。 -# en: Enable grok_search in web_agent. When enabled, this tool is exposed with higher priority than web_search. +# zh: 是否在 web_agent 中启用 grok_search。关闭时该工具会从 web_agent 工具列表中隐藏。 +# en: Enable grok_search in web_agent. When disabled, this tool is hidden from the web_agent tool list. grok_search_enabled = false +# zh: Firecrawl 搜索服务配置。 +# en: Firecrawl search service config. +[search.firecrawl] +# zh: 是否在 web_agent 中启用 firecrawl_search。默认关闭;关闭时该工具会隐藏。 +# en: Enable firecrawl_search in web_agent. Disabled by default; when disabled, this tool is hidden. +enabled = false +# zh: Firecrawl API Key。为空时使用 Firecrawl keyless 搜索;填写后会发送 Authorization: Bearer。 +# en: Firecrawl API key. Leave empty for Firecrawl keyless search; when set, Authorization: Bearer is sent. +api_key = "" +# zh: Firecrawl API 基础地址。 +# en: Firecrawl API base URL. +base_url = "https://api.firecrawl.dev" + # zh: 代理设置(可选)。 # en: Proxy settings (optional). [proxy] diff --git a/docs/configuration.md b/docs/configuration.md index caf0c401..7a047afa 100644 --- a/docs/configuration.md +++ b/docs/configuration.md @@ -648,12 +648,22 @@ Prompt caching 补充: | 字段 | 默认值 | 说明 | |---|---:|---| +| `priority` | `["grok_search", "firecrawl_search", "web_search"]` | `web_agent` 搜索工具优先级;关闭的工具会隐藏,开启后仅通过提示词引导选择 | | `searxng_url` | `""` | SearXNG 地址;为空则禁用搜索包装器 | -| `grok_search_enabled` | `false` | 是否在 `web_agent` 中暴露 `grok_search`;启用后该工具优先于 `web_search` | +| `grok_search_enabled` | `false` | 是否在 `web_agent` 中暴露 `grok_search`;关闭时隐藏该工具 | + +#### `search.firecrawl` + +| 字段 | 默认值 | 说明 | +|---|---:|---| +| `enabled` | `false` | 是否在 `web_agent` 中暴露 `firecrawl_search`;关闭时隐藏该工具 | +| `api_key` | `""` | Firecrawl API Key;为空时使用 keyless 搜索 | +| `base_url` | `"https://api.firecrawl.dev"` | Firecrawl API 基础地址 | 补充: - `searxng_url` 可热更新,运行时会重建搜索客户端。 -- `grok_search_enabled` 不需要重建客户端;它只影响 `web_agent` 的工具暴露。 +- `grok_search_enabled`、`search.firecrawl.*`、`priority` 不需要重建客户端;它们影响 `web_agent` 的工具暴露和提示词优先级。 +- `firecrawl_search` 调用 Firecrawl `POST /v2/search`;配置 `api_key` 时发送 `Authorization: Bearer`,为空则走 Firecrawl keyless。 --- @@ -1087,6 +1097,7 @@ Prompt caching 补充: - `render.browser_max_concurrency` 会在当前渲染任务空闲后重建渲染并发信号量。 - `skills.intro_autogen_*`(Agent intro 生成器配置刷新) - `search.searxng_url`(搜索客户端刷新) +- `search.priority` / `search.firecrawl.*` / `search.grok_search_enabled` 会随运行时配置更新,用于后续 `web_agent` 工具暴露和提示词优先级;无需重启。 - `skills.hot_reload*`(技能热重载任务重启) - `skills.hot_reload_interval/debounce`(配置热更新监听器自身重启) @@ -1293,7 +1304,11 @@ Prompt caching 补充: | TOML 路径 | 环境变量 | |-----------|----------| +| `search.priority` | `SEARCH_PRIORITY` | | `search.searxng_url` | `SEARXNG_URL` | +| `search.firecrawl.enabled` | `FIRECRAWL_SEARCH_ENABLED` | +| `search.firecrawl.api_key` | `FIRECRAWL_API_KEY` | +| `search.firecrawl.base_url` | `FIRECRAWL_BASE_URL` | #### `skills` diff --git a/docs/usage.md b/docs/usage.md index 3df41c5b..87268fd5 100644 --- a/docs/usage.md +++ b/docs/usage.md @@ -58,10 +58,12 @@ Undefined 搭载了基于 ChromaDB 向量数据库的后台认知系统,无需 负责网页搜索和网页内容爬取,能够获取互联网上的实时最新信息。 -**子工具**:`grok_search`(Grok 搜索)、`web_search`(通用搜索)、`crawl_webpage`(网页内容提取) +**子工具**:`grok_search`(Grok 搜索)、`firecrawl_search`(Firecrawl 搜索)、`web_search`(SearXNG 搜索)、`crawl_webpage`(网页内容提取) 启用 `grok_search` 后,工具会在调用 Grok 模型时注入检索约束:以服务端提供的当前时间为准,先调用搜索能力,使用多组搜索查询或多个搜索工具进行交叉检索,禁止编造,并在输出中给出来源。 +搜索工具优先级由 `[search].priority` 配置并注入 `web_agent` 提示词;关闭的搜索工具会从工具列表中隐藏。`firecrawl_search` 支持 Firecrawl API Key,未配置 Key 时使用 keyless 搜索。 + **示例:** > *"请搜索最近三天关于 DeepSeek 的最新动态并生成摘要。"* > *"帮我爬取这个网页的主要内容并整理成结构化笔记。"* diff --git a/src/Undefined/ai/prompts/system_context.py b/src/Undefined/ai/prompts/system_context.py index 7dbf2840..987d1993 100644 --- a/src/Undefined/ai/prompts/system_context.py +++ b/src/Undefined/ai/prompts/system_context.py @@ -4,6 +4,13 @@ from typing import Any +from Undefined.config.search import ( + DEFAULT_SEARCH_PRIORITY, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_GROK, + SEARCH_TOOL_SEARXNG, +) + def select_system_prompt_path( *, @@ -79,8 +86,30 @@ def build_model_config_info(runtime_config: Any) -> str: knowledge_enabled = bool(getattr(runtime_config, "knowledge_enabled", False)) parts.append(f"- 知识库: {'已启用' if knowledge_enabled else '未启用'}") - grok_search_enabled = bool(getattr(runtime_config, "grok_search_enabled", False)) - parts.append(f"- 联网搜索: {'已启用' if grok_search_enabled else '未启用'}") + search_priority = list( + getattr(runtime_config, "search_priority", []) or DEFAULT_SEARCH_PRIORITY + ) + enabled_search_tools: list[str] = [] + if bool(getattr(runtime_config, "grok_search_enabled", False)): + enabled_search_tools.append(SEARCH_TOOL_GROK) + if bool(getattr(runtime_config, "firecrawl_search_enabled", False)): + enabled_search_tools.append(SEARCH_TOOL_FIRECRAWL) + if str(getattr(runtime_config, "searxng_url", "") or "").strip(): + enabled_search_tools.append(SEARCH_TOOL_SEARXNG) + ordered_enabled_search_tools = [ + name for name in search_priority if name in enabled_search_tools + ] + ordered_enabled_search_tools.extend( + name + for name in enabled_search_tools + if name not in ordered_enabled_search_tools + ) + if ordered_enabled_search_tools: + parts.append( + f"- 联网搜索: 已启用(优先级={' > '.join(ordered_enabled_search_tools)})" + ) + else: + parts.append("- 联网搜索: 未启用") memes = getattr(runtime_config, "memes", None) if memes is not None: diff --git a/src/Undefined/config/config_class.py b/src/Undefined/config/config_class.py index f49ede09..d81c885f 100644 --- a/src/Undefined/config/config_class.py +++ b/src/Undefined/config/config_class.py @@ -110,8 +110,12 @@ class Config: agent_intro_autogen_queue_interval: float agent_intro_autogen_max_tokens: int agent_intro_hash_path: str + search_priority: list[str] searxng_url: str grok_search_enabled: bool + firecrawl_search_enabled: bool + firecrawl_api_key: str + firecrawl_base_url: str use_proxy: bool http_proxy: str https_proxy: str diff --git a/src/Undefined/config/env_registry.py b/src/Undefined/config/env_registry.py index 99d10ea5..b3894d88 100644 --- a/src/Undefined/config/env_registry.py +++ b/src/Undefined/config/env_registry.py @@ -159,6 +159,10 @@ ("onebot", "token"): "ONEBOT_TOKEN", ("onebot", "ws_url"): "ONEBOT_WS_URL", ("proxy", "use_proxy"): "USE_PROXY", + ("search", "firecrawl", "api_key"): "FIRECRAWL_API_KEY", + ("search", "firecrawl", "base_url"): "FIRECRAWL_BASE_URL", + ("search", "firecrawl", "enabled"): "FIRECRAWL_SEARCH_ENABLED", + ("search", "priority"): "SEARCH_PRIORITY", ("search", "searxng_url"): "SEARXNG_URL", ("skills", "hot_reload"): "SKILLS_HOT_RELOAD", ("skills", "intro_hash_path"): "AGENT_INTRO_HASH_PATH", diff --git a/src/Undefined/config/load_sections/network.py b/src/Undefined/config/load_sections/network.py index 818b5555..0e413027 100644 --- a/src/Undefined/config/load_sections/network.py +++ b/src/Undefined/config/load_sections/network.py @@ -18,6 +18,7 @@ _normalize_base_url, _warn_env_fallback, ) +from ..search import normalize_search_priority logger = logging.getLogger(__name__) @@ -25,6 +26,9 @@ def load_network( data: dict[str, Any], *, config_path: Optional[Path] = None ) -> dict[str, Any]: + search_priority = normalize_search_priority( + _get_value(data, ("search", "priority"), "SEARCH_PRIORITY") + ) searxng_url = _coerce_str( _get_value(data, ("search", "searxng_url"), "SEARXNG_URL"), "" ) @@ -36,6 +40,33 @@ def load_network( ), False, ) + firecrawl_search_enabled = _coerce_bool( + _get_value( + data, + ("search", "firecrawl", "enabled"), + "FIRECRAWL_SEARCH_ENABLED", + ), + False, + ) + firecrawl_api_key = _coerce_str( + _get_value( + data, + ("search", "firecrawl", "api_key"), + "FIRECRAWL_API_KEY", + ), + "", + ) + firecrawl_base_url = _normalize_base_url( + _coerce_str( + _get_value( + data, + ("search", "firecrawl", "base_url"), + "FIRECRAWL_BASE_URL", + ), + "https://api.firecrawl.dev", + ), + "https://api.firecrawl.dev", + ) use_proxy = _coerce_bool( _get_value(data, ("proxy", "use_proxy"), "USE_PROXY"), True @@ -143,8 +174,12 @@ def load_network( # Bilibili 配置 return { + "search_priority": search_priority, "searxng_url": searxng_url, "grok_search_enabled": grok_search_enabled, + "firecrawl_search_enabled": firecrawl_search_enabled, + "firecrawl_api_key": firecrawl_api_key, + "firecrawl_base_url": firecrawl_base_url, "use_proxy": use_proxy, "http_proxy": http_proxy, "https_proxy": https_proxy, diff --git a/src/Undefined/config/search.py b/src/Undefined/config/search.py new file mode 100644 index 00000000..303c5dbb --- /dev/null +++ b/src/Undefined/config/search.py @@ -0,0 +1,37 @@ +"""Search tool configuration helpers.""" + +from __future__ import annotations + +from typing import Any, Final + +from .coercers import _coerce_str_list + +SEARCH_TOOL_GROK: Final = "grok_search" +SEARCH_TOOL_FIRECRAWL: Final = "firecrawl_search" +SEARCH_TOOL_SEARXNG: Final = "web_search" + +DEFAULT_SEARCH_PRIORITY: Final[tuple[str, ...]] = ( + SEARCH_TOOL_GROK, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_SEARXNG, +) +KNOWN_SEARCH_TOOLS: Final[frozenset[str]] = frozenset(DEFAULT_SEARCH_PRIORITY) + + +def normalize_search_priority(value: Any) -> list[str]: + """Return a stable ordered search tool list from TOML/env input.""" + + raw_items = _coerce_str_list(value) + normalized: list[str] = [] + for item in raw_items: + if item not in KNOWN_SEARCH_TOOLS or item in normalized: + continue + normalized.append(item) + + if not normalized: + return list(DEFAULT_SEARCH_PRIORITY) + + for item in DEFAULT_SEARCH_PRIORITY: + if item not in normalized: + normalized.append(item) + return normalized diff --git a/src/Undefined/skills/agents/README.md b/src/Undefined/skills/agents/README.md index a0941365..700b88be 100644 --- a/src/Undefined/skills/agents/README.md +++ b/src/Undefined/skills/agents/README.md @@ -263,8 +263,9 @@ mv skills/tools/my_tool skills/agents/my_agent/tools/ - **功能**:联网搜索、网页阅读、来源核验和最新信息获取。 - **适用场景**:新闻/公告/资料搜索、指定 URL 摘要、多来源对比、时效性问题核验。 - **不适用**:天气、金价、热搜、Whois、B 站、arXiv 检索等结构化查询;用户附件或文件解析。 -- **子工具**:`grok_search`, `web_search`, `crawl_webpage`。 -- **grok_search 参数**:优先使用 `search_request`,用自然语言完整叙述搜索要求,不要只传关键词。 +- **子工具**:`grok_search`, `firecrawl_search`, `web_search`, `crawl_webpage`。 +- **搜索优先级**:由 `[search].priority` 注入提示词引导,关闭的搜索工具会从 `web_agent` 工具列表中隐藏。 +- **grok_search 参数**:使用 `search_request`,用自然语言完整叙述搜索要求,不要只传关键词。 ### file_analysis_agent(文件分析助手) - **功能**:分析用户提供的附件、内部 UID、URL、legacy file_id、arXiv 论文标识或 Bilibili 视频标识,提取文件内容。 diff --git a/src/Undefined/skills/agents/runner/context.py b/src/Undefined/skills/agents/runner/context.py index 3e4e399e..93c01ada 100644 --- a/src/Undefined/skills/agents/runner/context.py +++ b/src/Undefined/skills/agents/runner/context.py @@ -8,6 +8,7 @@ import aiofiles from Undefined.config.models import AgentModelConfig +from Undefined.config.search import DEFAULT_SEARCH_PRIORITY, KNOWN_SEARCH_TOOLS from Undefined.skills.agents.agent_tool_registry import AgentToolRegistry from Undefined.skills.anthropic_skills import AnthropicSkillRegistry @@ -25,6 +26,60 @@ async def load_prompt_text(agent_dir: Path, default_prompt: str) -> str: return default_prompt +def _tool_names(tools: list[dict[str, Any]]) -> set[str]: + names: set[str] = set() + for tool in tools: + function = tool.get("function") if isinstance(tool, dict) else None + name = function.get("name") if isinstance(function, dict) else None + if isinstance(name, str) and name: + names.add(name) + return names + + +def _build_web_agent_search_priority_prompt( + runtime_config: Any | None, + tools: list[dict[str, Any]], +) -> str: + available_names = _tool_names(tools) + priority = list(getattr(runtime_config, "search_priority", []) or []) + if not priority: + priority = list(DEFAULT_SEARCH_PRIORITY) + + ordered = [name for name in priority if name in available_names] + ordered.extend( + name + for name in DEFAULT_SEARCH_PRIORITY + if name in available_names and name not in ordered + ) + if not ordered: + return "" + + return "\n".join( + [ + "【搜索工具优先级】", + f"- 当前可用搜索工具优先级:{' > '.join(ordered)}。", + "- 搜索类任务优先考虑排在前面的工具;当前一个工具不可用、不适合、结果不足或需要交叉验证时,再使用后面的工具。", + "- 关闭的搜索工具不会出现在可用工具列表中;不要提议或假装调用未提供的工具。", + ] + ) + + +def _append_web_agent_runtime_prompt( + agent_name: str, + system_prompt: str, + runtime_config: Any | None, + tools: list[dict[str, Any]], +) -> str: + if agent_name != "web_agent": + return system_prompt + if not (_tool_names(tools) & KNOWN_SEARCH_TOOLS): + return system_prompt + priority_prompt = _build_web_agent_search_priority_prompt(runtime_config, tools) + if not priority_prompt: + return system_prompt + return f"{system_prompt.rstrip()}\n\n{priority_prompt}" + + @dataclass # 类:PreparedAgentRun class PreparedAgentRun: @@ -92,6 +147,12 @@ async def prepare_agent_run( global_enabled=global_enabled, ) system_prompt = await load_prompt_text(agent_dir, default_prompt) + system_prompt = _append_web_agent_runtime_prompt( + agent_name, + system_prompt, + runtime_config, + tools, + ) if agent_skill_registry and agent_skill_registry.has_skills(): skills_xml = agent_skill_registry.build_metadata_xml() diff --git a/src/Undefined/skills/agents/runner/tools.py b/src/Undefined/skills/agents/runner/tools.py index e7a01b0f..85c70256 100644 --- a/src/Undefined/skills/agents/runner/tools.py +++ b/src/Undefined/skills/agents/runner/tools.py @@ -12,6 +12,7 @@ webchat_agent_path, webchat_depth, ) +from Undefined.config.search import SEARCH_TOOL_FIRECRAWL, SEARCH_TOOL_GROK from Undefined.utils.tool_calls import parse_tool_arguments @@ -37,18 +38,21 @@ def _filter_tools_for_runtime_config( tools: list[dict[str, Any]], runtime_config: Any | None, ) -> list[dict[str, Any]]: - # web_agent 在 grok 未启用时从 schema 中剔除 grok_search + # web_agent 在搜索服务未启用时从 schema 中剔除对应工具。 if agent_name != "web_agent" or runtime_config is None: return tools - if bool(getattr(runtime_config, "grok_search_enabled", False)): - return tools - filtered: list[dict[str, Any]] = [] for tool in tools: function = tool.get("function") if isinstance(tool, dict) else None name = function.get("name") if isinstance(function, dict) else None - if name == "grok_search": + if name == SEARCH_TOOL_GROK and not bool( + getattr(runtime_config, "grok_search_enabled", False) + ): + continue + if name == SEARCH_TOOL_FIRECRAWL and not bool( + getattr(runtime_config, "firecrawl_search_enabled", False) + ): continue filtered.append(tool) return filtered diff --git a/src/Undefined/skills/agents/web_agent/README.md b/src/Undefined/skills/agents/web_agent/README.md index d8c04c82..eb3270b7 100644 --- a/src/Undefined/skills/agents/web_agent/README.md +++ b/src/Undefined/skills/agents/web_agent/README.md @@ -2,10 +2,13 @@ 用于网络搜索与网页抓取,支持结合 MCP 的浏览器能力。 默认子工具包括: -- `grok_search`:优先级最高的联网搜索工具(需显式启用),调用时使用 `search_request` 自然语言完整叙述搜索要求;工具会向 Grok 模型注入当前服务端时间、必须先搜索、交叉检索、禁止编造和必须给来源的约束 -- `web_search`:基于 SearXNG 的后备搜索工具 +- `grok_search`:Grok 联网搜索工具(需显式启用),调用时使用 `search_request` 自然语言完整叙述搜索要求;工具会向 Grok 模型注入当前服务端时间、必须先搜索、交叉检索、禁止编造和必须给来源的约束 +- `firecrawl_search`:Firecrawl Search 工具(需显式启用),支持 API Key;未填写 Key 时使用 Firecrawl keyless 搜索 +- `web_search`:基于 SearXNG 的搜索工具 - `crawl_webpage`:读取网页正文 +搜索工具选择由 `[search].priority` 注入提示词引导;关闭的搜索工具会从 `web_agent` 工具列表中隐藏。 + 目录结构: - `config.json`:智能体定义 - `intro.md`:能力说明 diff --git a/src/Undefined/skills/agents/web_agent/config.json b/src/Undefined/skills/agents/web_agent/config.json index c66441dd..f9183df6 100644 --- a/src/Undefined/skills/agents/web_agent/config.json +++ b/src/Undefined/skills/agents/web_agent/config.json @@ -2,7 +2,7 @@ "type": "function", "function": { "name": "web_agent", - "description": "网络搜索助手,提供优先级最高的 grok_search、SearXNG 搜索和网页内容获取功能,用于获取互联网上的最新信息。", + "description": "网络搜索助手,提供按配置优先级选择的 grok_search、firecrawl_search、SearXNG 搜索和网页内容获取功能,用于获取互联网上的最新信息。", "parameters": { "type": "object", "properties": { diff --git a/src/Undefined/skills/agents/web_agent/prompt.md b/src/Undefined/skills/agents/web_agent/prompt.md index 0002b58e..280299c7 100644 --- a/src/Undefined/skills/agents/web_agent/prompt.md +++ b/src/Undefined/skills/agents/web_agent/prompt.md @@ -6,8 +6,9 @@ - 不负责用户附件、PDF/Office/图片等文件解析;这些应交给 `file_analysis_agent`。 工具使用原则: -- 搜索类任务优先考虑 `grok_search`。调用它时用 `search_request` 写完整自然语言检索要求,包含用户明确提出的时间、地区、站点、排除项和回答形式;不要把用户没说的限制硬塞进去。 -- `grok_search` 不可用或不适合时,再使用 `web_search`。 +- 搜索类任务按系统注入的“搜索工具优先级”选择工具;排在前面的工具不可用、不适合、结果不足或需要交叉验证时,再使用后面的工具。 +- 调用 `grok_search` 时用 `search_request` 写完整自然语言检索要求,包含用户明确提出的时间、地区、站点、排除项和回答形式;不要把用户没说的限制硬塞进去。 +- 调用 `firecrawl_search` 或 `web_search` 时使用明确搜索查询,保留用户给出的限定条件,不要主动添加未要求的硬性范围。 - 用户给出具体 URL 时,可以直接读取网页;如果 URL 与问题目标不匹配,先说明再决定是否补充搜索。 - 涉及“今天、现在、最新、近期”等相对时间时,如有当前时间工具,先校准日期。 diff --git a/src/Undefined/skills/agents/web_agent/tools/firecrawl_search/config.json b/src/Undefined/skills/agents/web_agent/tools/firecrawl_search/config.json new file mode 100644 index 00000000..ea85ab21 --- /dev/null +++ b/src/Undefined/skills/agents/web_agent/tools/firecrawl_search/config.json @@ -0,0 +1,21 @@ +{ + "type": "function", + "function": { + "name": "firecrawl_search", + "description": "使用 Firecrawl Search 进行网页搜索。适合需要通用网页结果、可追溯链接和快速补充检索的场景。是否优先使用取决于当前 search.priority 配置;若排在 grok_search 之后,应在 grok_search 不可用、不适合或需要补充交叉检索时使用。", + "parameters": { + "type": "object", + "properties": { + "query": { + "type": "string", + "description": "搜索查询。保留用户明确要求的时间、地区、站点、排除项和关键词,不要主动添加用户未要求的硬性范围。" + }, + "num_results": { + "type": "integer", + "description": "返回结果数量(默认为 5,范围 1-10)" + } + }, + "required": ["query"] + } + } +} diff --git a/src/Undefined/skills/agents/web_agent/tools/firecrawl_search/handler.py b/src/Undefined/skills/agents/web_agent/tools/firecrawl_search/handler.py new file mode 100644 index 00000000..e7482e5a --- /dev/null +++ b/src/Undefined/skills/agents/web_agent/tools/firecrawl_search/handler.py @@ -0,0 +1,143 @@ +from __future__ import annotations + +import json +import logging +from typing import Any + +import httpx + +from Undefined.skills.http_client import request_with_retry +from Undefined.skills.http_config import build_url + +logger = logging.getLogger(__name__) + + +def _coerce_result_limit(value: Any) -> int: + try: + limit = int(value) + except (TypeError, ValueError): + limit = 5 + return min(10, max(1, limit)) + + +def _get_runtime_config(context: dict[str, Any]) -> Any | None: + runtime_config = context.get("runtime_config") + if runtime_config is not None: + return runtime_config + ai_client = context.get("ai_client") + return getattr(ai_client, "runtime_config", None) if ai_client is not None else None + + +def _build_headers(api_key: str) -> dict[str, str]: + headers = {"Content-Type": "application/json"} + if api_key: + headers["Authorization"] = f"Bearer {api_key}" + return headers + + +def _string_value(value: Any) -> str: + return str(value or "").strip() + + +def _format_firecrawl_results(items: list[Any], limit: int) -> str: + lines: list[str] = ["Firecrawl 搜索结果:"] + count = 0 + for index, item in enumerate(items[:limit], start=1): + if not isinstance(item, dict): + continue + title = _string_value(item.get("title")) or "无标题" + url = _string_value(item.get("url")) + description = _string_value(item.get("description")) + category = _string_value(item.get("category")) + position = item.get("position", index) + + lines.append(f"{index}. {title}") + if url: + lines.append(f" URL: {url}") + if description: + lines.append(f" 摘要: {description}") + if category: + lines.append(f" 分类: {category}") + if position: + lines.append(f" 排名: {position}") + count += 1 + + if count == 0: + return "Firecrawl 搜索未返回结果" + return "\n".join(lines) + + +def _extract_error_message(payload: Any) -> str: + if not isinstance(payload, dict): + return "" + for key in ("error", "message"): + value = payload.get(key) + if isinstance(value, str) and value.strip(): + return value.strip() + return "" + + +async def execute(args: dict[str, Any], context: dict[str, Any]) -> str: + query = _string_value(args.get("query")) + if not query: + return "搜索关键词不能为空" + + runtime_config = _get_runtime_config(context) + if runtime_config is None: + return "Firecrawl 搜索功能不可用(缺少运行时配置)" + if not bool(getattr(runtime_config, "firecrawl_search_enabled", False)): + return "Firecrawl 搜索功能未启用(search.firecrawl.enabled=false)" + + base_url = _string_value( + getattr(runtime_config, "firecrawl_base_url", "https://api.firecrawl.dev") + ) + if not base_url: + return "Firecrawl 搜索配置不完整:缺少 search.firecrawl.base_url" + + api_key = _string_value(getattr(runtime_config, "firecrawl_api_key", "")) + limit = _coerce_result_limit(args.get("num_results", 5)) + request_url = build_url(base_url, "/v2/search") + payload = {"query": query, "limit": limit} + + try: + response = await request_with_retry( + "POST", + request_url, + json_data=payload, + headers=_build_headers(api_key), + default_timeout=30.0, + context=context, + ) + data = response.json() + except json.JSONDecodeError: + logger.exception("[firecrawl_search] 响应不是合法 JSON") + return "Firecrawl 搜索失败:响应格式异常" + except httpx.TimeoutException: + return "Firecrawl 搜索请求超时,请稍后重试" + except httpx.HTTPStatusError as exc: + status_code = exc.response.status_code + logger.warning("[firecrawl_search] HTTP 错误: status=%s", status_code) + if status_code in {401, 403}: + return "Firecrawl 搜索认证失败,请检查 search.firecrawl.api_key 或 keyless 配额" + if status_code == 429: + return "Firecrawl 搜索达到限流或 keyless 配额,请稍后重试或配置 API Key" + return "Firecrawl 搜索失败:上游服务返回错误" + except httpx.RequestError: + logger.exception("[firecrawl_search] 网络请求失败") + return "Firecrawl 搜索失败:网络请求错误" + except Exception: + logger.exception("[firecrawl_search] 搜索失败") + return "Firecrawl 搜索失败,请稍后重试" + + if not isinstance(data, dict): + return "Firecrawl 搜索失败:响应格式异常" + if data.get("success") is False: + message = _extract_error_message(data) + return f"Firecrawl 搜索失败:{message}" if message else "Firecrawl 搜索失败" + + result_data = data.get("data") + web_results = result_data.get("web") if isinstance(result_data, dict) else None + if not isinstance(web_results, list) or not web_results: + return "Firecrawl 搜索未返回结果" + + return _format_firecrawl_results(web_results, limit) diff --git a/src/Undefined/skills/agents/web_agent/tools/grok_search/config.json b/src/Undefined/skills/agents/web_agent/tools/grok_search/config.json index 129b4937..6733b35f 100644 --- a/src/Undefined/skills/agents/web_agent/tools/grok_search/config.json +++ b/src/Undefined/skills/agents/web_agent/tools/grok_search/config.json @@ -2,7 +2,7 @@ "type": "function", "function": { "name": "grok_search", - "description": "最优先使用的联网搜索工具,适用于获取最新信息、开放式互联网检索和高质量综合答案。调用时必须使用 search_request,用自然语言详细说明要搜索的内容和回答要求;不要只给关键词,也不要主动把范围写死到用户未要求的限制里。若用户明确给出时间、地区、站点、排除项、输出格式或比较维度等约束,再一并写入。", + "description": "Grok 联网搜索工具,适用于获取最新信息、开放式互联网检索和高质量综合答案。是否优先使用取决于当前 search.priority 配置。调用时必须使用 search_request,用自然语言详细说明要搜索的内容和回答要求;不要只给关键词,也不要主动把范围写死到用户未要求的限制里。若用户明确给出时间、地区、站点、排除项、输出格式或比较维度等约束,再一并写入。", "parameters": { "type": "object", "properties": { diff --git a/src/Undefined/skills/agents/web_agent/tools/web_search/config.json b/src/Undefined/skills/agents/web_agent/tools/web_search/config.json index f7338cb5..f565142d 100644 --- a/src/Undefined/skills/agents/web_agent/tools/web_search/config.json +++ b/src/Undefined/skills/agents/web_agent/tools/web_search/config.json @@ -2,7 +2,7 @@ "type": "function", "function": { "name": "web_search", - "description": "使用 SearXNG 搜索引擎进行网页搜索。它是 grok_search 不可用时的后备联网搜索工具,适用于回答需要最新信息或你不确定的问题。", + "description": "使用 SearXNG 搜索引擎进行网页搜索。是否优先使用取决于当前 search.priority 配置;适用于回答需要最新信息、通用网页结果或补充交叉检索的问题。", "parameters": { "type": "object", "properties": { diff --git a/tests/test_config_env_registry.py b/tests/test_config_env_registry.py index 385f3351..a4ab836f 100644 --- a/tests/test_config_env_registry.py +++ b/tests/test_config_env_registry.py @@ -32,3 +32,12 @@ def test_alternate_env_keys_documented() -> None: def test_registry_has_model_context_window_entries() -> None: assert ("models", "chat", "context_window_tokens") in ENV_REGISTRY + + +def test_registry_has_search_firecrawl_entries() -> None: + assert ENV_REGISTRY[("search", "priority")] == "SEARCH_PRIORITY" + assert ( + ENV_REGISTRY[("search", "firecrawl", "enabled")] == "FIRECRAWL_SEARCH_ENABLED" + ) + assert ENV_REGISTRY[("search", "firecrawl", "api_key")] == "FIRECRAWL_API_KEY" + assert ENV_REGISTRY[("search", "firecrawl", "base_url")] == "FIRECRAWL_BASE_URL" diff --git a/tests/test_firecrawl_search_tool.py b/tests/test_firecrawl_search_tool.py new file mode 100644 index 00000000..e359bb03 --- /dev/null +++ b/tests/test_firecrawl_search_tool.py @@ -0,0 +1,242 @@ +from __future__ import annotations + +import json +from pathlib import Path +from types import SimpleNamespace +from typing import Any + +import httpx +import pytest + +from Undefined.config.search import ( + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_GROK, + SEARCH_TOOL_SEARXNG, +) +from Undefined.skills.agents.runner import _filter_tools_for_runtime_config +from Undefined.skills.agents.runner.context import ( + _build_web_agent_search_priority_prompt, +) +from Undefined.skills.agents.web_agent.tools.firecrawl_search import ( + handler as firecrawl_handler, +) + + +def _runtime_config(**overrides: Any) -> SimpleNamespace: + data: dict[str, Any] = { + "firecrawl_search_enabled": True, + "firecrawl_api_key": "", + "firecrawl_base_url": "https://api.firecrawl.dev", + "search_priority": [ + SEARCH_TOOL_GROK, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_SEARXNG, + ], + } + data.update(overrides) + return SimpleNamespace(**data) + + +def _response(method: str, url: str, payload: dict[str, Any]) -> httpx.Response: + return httpx.Response(200, json=payload, request=httpx.Request(method, url)) + + +def test_firecrawl_search_schema_uses_query_and_num_results() -> None: + config_path = ( + Path("src") + / "Undefined" + / "skills" + / "agents" + / "web_agent" + / "tools" + / "firecrawl_search" + / "config.json" + ) + schema = json.loads(config_path.read_text(encoding="utf-8")) + parameters = schema["function"]["parameters"] + + assert schema["function"]["name"] == SEARCH_TOOL_FIRECRAWL + assert parameters["required"] == ["query"] + assert "query" in parameters["properties"] + assert "num_results" in parameters["properties"] + + +@pytest.mark.asyncio +async def test_firecrawl_search_requires_query() -> None: + result = await firecrawl_handler.execute( + {}, + {"runtime_config": _runtime_config()}, + ) + + assert result == "搜索关键词不能为空" + + +@pytest.mark.asyncio +async def test_firecrawl_search_returns_disabled_when_switch_is_off() -> None: + result = await firecrawl_handler.execute( + {"query": "example search"}, + {"runtime_config": _runtime_config(firecrawl_search_enabled=False)}, + ) + + assert result == "Firecrawl 搜索功能未启用(search.firecrawl.enabled=false)" + + +@pytest.mark.asyncio +async def test_firecrawl_search_keyless_request_and_formats_results( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seen: dict[str, Any] = {} + + async def fake_request_with_retry( + method: str, + url: str, + **kwargs: Any, + ) -> httpx.Response: + seen["method"] = method + seen["url"] = url + seen["kwargs"] = kwargs + return _response( + method, + url, + { + "success": True, + "data": { + "web": [ + { + "url": "https://example.com/a", + "title": "Example A", + "description": "First result", + "position": 1, + } + ] + }, + }, + ) + + monkeypatch.setattr( + firecrawl_handler, + "request_with_retry", + fake_request_with_retry, + ) + + result = await firecrawl_handler.execute( + {"query": "example search", "num_results": 3}, + {"runtime_config": _runtime_config(), "request_id": "req-1"}, + ) + + assert seen["method"] == "POST" + assert seen["url"] == "https://api.firecrawl.dev/v2/search" + assert seen["kwargs"]["json_data"] == {"query": "example search", "limit": 3} + assert seen["kwargs"]["headers"]["Content-Type"] == "application/json" + assert "Authorization" not in seen["kwargs"]["headers"] + assert "Example A" in result + assert "https://example.com/a" in result + assert "First result" in result + + +@pytest.mark.asyncio +async def test_firecrawl_search_sends_bearer_when_api_key_is_configured( + monkeypatch: pytest.MonkeyPatch, +) -> None: + seen: dict[str, Any] = {} + + async def fake_request_with_retry( + method: str, + url: str, + **kwargs: Any, + ) -> httpx.Response: + seen["headers"] = kwargs["headers"] + return _response(method, url, {"success": True, "data": {"web": []}}) + + monkeypatch.setattr( + firecrawl_handler, + "request_with_retry", + fake_request_with_retry, + ) + + result = await firecrawl_handler.execute( + {"query": "example search", "num_results": 99}, + { + "runtime_config": _runtime_config( + firecrawl_api_key="fc-test", + firecrawl_base_url="https://firecrawl.internal/", + ) + }, + ) + + assert seen["headers"]["Authorization"] == "Bearer fc-test" + assert result == "Firecrawl 搜索未返回结果" + + +@pytest.mark.asyncio +async def test_firecrawl_search_reports_rate_limit( + monkeypatch: pytest.MonkeyPatch, +) -> None: + async def fake_request_with_retry( + method: str, + url: str, + **_kwargs: Any, + ) -> httpx.Response: + response = httpx.Response(429, request=httpx.Request(method, url)) + raise httpx.HTTPStatusError( + "too many requests", + request=response.request, + response=response, + ) + + monkeypatch.setattr( + firecrawl_handler, + "request_with_retry", + fake_request_with_retry, + ) + + result = await firecrawl_handler.execute( + {"query": "example search"}, + {"runtime_config": _runtime_config()}, + ) + + assert "限流" in result + + +def test_runner_filters_firecrawl_search_for_web_agent_when_disabled() -> None: + tools = [ + {"function": {"name": SEARCH_TOOL_GROK}}, + {"function": {"name": SEARCH_TOOL_FIRECRAWL}}, + {"function": {"name": SEARCH_TOOL_SEARXNG}}, + ] + + filtered = _filter_tools_for_runtime_config( + "web_agent", + tools, + SimpleNamespace( + grok_search_enabled=True, + firecrawl_search_enabled=False, + ), + ) + + assert [tool["function"]["name"] for tool in filtered] == [ + SEARCH_TOOL_GROK, + SEARCH_TOOL_SEARXNG, + ] + + +def test_web_agent_priority_prompt_uses_available_enabled_tools_only() -> None: + tools = [ + {"function": {"name": SEARCH_TOOL_FIRECRAWL}}, + {"function": {"name": SEARCH_TOOL_SEARXNG}}, + {"function": {"name": "crawl_webpage"}}, + ] + + prompt = _build_web_agent_search_priority_prompt( + SimpleNamespace( + search_priority=[ + SEARCH_TOOL_GROK, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_SEARXNG, + ] + ), + tools, + ) + + assert "firecrawl_search > web_search" in prompt + assert "grok_search >" not in prompt diff --git a/tests/test_search_config.py b/tests/test_search_config.py new file mode 100644 index 00000000..c60f33db --- /dev/null +++ b/tests/test_search_config.py @@ -0,0 +1,103 @@ +from __future__ import annotations + +from pathlib import Path + +import pytest + +from Undefined.config import Config +from Undefined.config.search import ( + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_GROK, + SEARCH_TOOL_SEARXNG, +) + + +_MINIMAL_MAPPING = { + "onebot": {"ws_url": "ws://127.0.0.1:3001"}, + "models": { + "chat": {"api_url": "u", "api_key": "k", "model_name": "chat"}, + "vision": {"api_url": "u", "api_key": "k", "model_name": "vision"}, + "agent": {"api_url": "u", "api_key": "k", "model_name": "agent"}, + }, +} + + +def test_search_config_defaults() -> None: + cfg = Config.from_mapping(_MINIMAL_MAPPING, strict=False) + + assert cfg.search_priority == [ + SEARCH_TOOL_GROK, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_SEARXNG, + ] + assert cfg.firecrawl_search_enabled is False + assert cfg.firecrawl_api_key == "" + assert cfg.firecrawl_base_url == "https://api.firecrawl.dev" + + +def test_search_config_loads_firecrawl_and_priority(tmp_path: Path) -> None: + config_path = tmp_path / "config.toml" + config_path.write_text( + """ +[onebot] +ws_url = "ws://127.0.0.1:3001" + +[models.chat] +api_url = "u" +api_key = "k" +model_name = "chat" + +[models.vision] +api_url = "u" +api_key = "k" +model_name = "vision" + +[models.agent] +api_url = "u" +api_key = "k" +model_name = "agent" + +[search] +priority = ["web_search", "firecrawl_search", "web_search", "unknown"] +grok_search_enabled = true + +[search.firecrawl] +enabled = true +api_key = "fc-test" +base_url = "https://firecrawl.internal/" +""", + encoding="utf-8", + ) + + cfg = Config.load(config_path, strict=False) + + assert cfg.search_priority == [ + SEARCH_TOOL_SEARXNG, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_GROK, + ] + assert cfg.grok_search_enabled is True + assert cfg.firecrawl_search_enabled is True + assert cfg.firecrawl_api_key == "fc-test" + assert cfg.firecrawl_base_url == "https://firecrawl.internal" + + +def test_search_config_env_vars(monkeypatch: pytest.MonkeyPatch) -> None: + monkeypatch.setenv( + "SEARCH_PRIORITY", + "firecrawl_search,web_search,invalid,firecrawl_search", + ) + monkeypatch.setenv("FIRECRAWL_SEARCH_ENABLED", "true") + monkeypatch.setenv("FIRECRAWL_API_KEY", "fc-env") + monkeypatch.setenv("FIRECRAWL_BASE_URL", "https://firecrawl.env") + + cfg = Config.from_mapping(_MINIMAL_MAPPING, strict=False) + + assert cfg.search_priority == [ + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_SEARXNG, + SEARCH_TOOL_GROK, + ] + assert cfg.firecrawl_search_enabled is True + assert cfg.firecrawl_api_key == "fc-env" + assert cfg.firecrawl_base_url == "https://firecrawl.env" From 980a415644486a828bb3bade033f6bbb6cb6c9cc Mon Sep 17 00:00:00 2001 From: Null <1708213363@qq.com> Date: Sat, 4 Jul 2026 15:49:47 +0800 Subject: [PATCH 2/2] fix(search): centralize search priority helpers Expose a public string-list coercion helper and reuse shared search priority ordering in prompt builders. Tests: uv run pytest tests/test_search_config.py tests/test_firecrawl_search_tool.py tests/test_prompt_builder_message_order.py; uv run ruff check .; uv run ruff format --check .; uv run mypy . Co-authored-by: GPT-5 --- src/Undefined/ai/prompts/system_context.py | 11 ++++------ src/Undefined/config/coercers.py | 6 +++++- src/Undefined/config/search.py | 20 +++++++++++++++++-- src/Undefined/skills/agents/runner/context.py | 12 ++--------- tests/test_search_config.py | 14 +++++++++++++ 5 files changed, 43 insertions(+), 20 deletions(-) diff --git a/src/Undefined/ai/prompts/system_context.py b/src/Undefined/ai/prompts/system_context.py index 987d1993..16954a50 100644 --- a/src/Undefined/ai/prompts/system_context.py +++ b/src/Undefined/ai/prompts/system_context.py @@ -9,6 +9,7 @@ SEARCH_TOOL_FIRECRAWL, SEARCH_TOOL_GROK, SEARCH_TOOL_SEARXNG, + order_by_priority, ) @@ -96,13 +97,9 @@ def build_model_config_info(runtime_config: Any) -> str: enabled_search_tools.append(SEARCH_TOOL_FIRECRAWL) if str(getattr(runtime_config, "searxng_url", "") or "").strip(): enabled_search_tools.append(SEARCH_TOOL_SEARXNG) - ordered_enabled_search_tools = [ - name for name in search_priority if name in enabled_search_tools - ] - ordered_enabled_search_tools.extend( - name - for name in enabled_search_tools - if name not in ordered_enabled_search_tools + ordered_enabled_search_tools = order_by_priority( + search_priority, + set(enabled_search_tools), ) if ordered_enabled_search_tools: parts.append( diff --git a/src/Undefined/config/coercers.py b/src/Undefined/config/coercers.py index 4edff34a..d3ebd268 100644 --- a/src/Undefined/config/coercers.py +++ b/src/Undefined/config/coercers.py @@ -110,7 +110,7 @@ def _coerce_int_list(value: Any) -> list[int]: return [] -def _coerce_str_list(value: Any) -> list[str]: +def coerce_str_list(value: Any) -> list[str]: if value is None: return [] if isinstance(value, list): @@ -120,6 +120,10 @@ def _coerce_str_list(value: Any) -> list[str]: return [] +def _coerce_str_list(value: Any) -> list[str]: + return coerce_str_list(value) + + def _coerce_request_params(value: Any) -> dict[str, Any]: return normalize_request_params(value) diff --git a/src/Undefined/config/search.py b/src/Undefined/config/search.py index 303c5dbb..a3db7530 100644 --- a/src/Undefined/config/search.py +++ b/src/Undefined/config/search.py @@ -4,7 +4,7 @@ from typing import Any, Final -from .coercers import _coerce_str_list +from .coercers import coerce_str_list SEARCH_TOOL_GROK: Final = "grok_search" SEARCH_TOOL_FIRECRAWL: Final = "firecrawl_search" @@ -21,7 +21,7 @@ def normalize_search_priority(value: Any) -> list[str]: """Return a stable ordered search tool list from TOML/env input.""" - raw_items = _coerce_str_list(value) + raw_items = coerce_str_list(value) normalized: list[str] = [] for item in raw_items: if item not in KNOWN_SEARCH_TOOLS or item in normalized: @@ -35,3 +35,19 @@ def normalize_search_priority(value: Any) -> list[str]: if item not in normalized: normalized.append(item) return normalized + + +def order_by_priority( + priority: list[str] | tuple[str, ...], + available: set[str], +) -> list[str]: + """Order available search tools by configured priority, then append leftovers.""" + + configured = list(priority or DEFAULT_SEARCH_PRIORITY) + ordered = [name for name in configured if name in available] + ordered.extend( + name + for name in DEFAULT_SEARCH_PRIORITY + if name in available and name not in ordered + ) + return ordered diff --git a/src/Undefined/skills/agents/runner/context.py b/src/Undefined/skills/agents/runner/context.py index 93c01ada..fbbb46f7 100644 --- a/src/Undefined/skills/agents/runner/context.py +++ b/src/Undefined/skills/agents/runner/context.py @@ -8,7 +8,7 @@ import aiofiles from Undefined.config.models import AgentModelConfig -from Undefined.config.search import DEFAULT_SEARCH_PRIORITY, KNOWN_SEARCH_TOOLS +from Undefined.config.search import KNOWN_SEARCH_TOOLS, order_by_priority from Undefined.skills.agents.agent_tool_registry import AgentToolRegistry from Undefined.skills.anthropic_skills import AnthropicSkillRegistry @@ -42,15 +42,7 @@ def _build_web_agent_search_priority_prompt( ) -> str: available_names = _tool_names(tools) priority = list(getattr(runtime_config, "search_priority", []) or []) - if not priority: - priority = list(DEFAULT_SEARCH_PRIORITY) - - ordered = [name for name in priority if name in available_names] - ordered.extend( - name - for name in DEFAULT_SEARCH_PRIORITY - if name in available_names and name not in ordered - ) + ordered = order_by_priority(priority, available_names) if not ordered: return "" diff --git a/tests/test_search_config.py b/tests/test_search_config.py index c60f33db..4802c2c5 100644 --- a/tests/test_search_config.py +++ b/tests/test_search_config.py @@ -9,6 +9,7 @@ SEARCH_TOOL_FIRECRAWL, SEARCH_TOOL_GROK, SEARCH_TOOL_SEARXNG, + order_by_priority, ) @@ -35,6 +36,19 @@ def test_search_config_defaults() -> None: assert cfg.firecrawl_base_url == "https://api.firecrawl.dev" +def test_order_by_priority_filters_and_appends_default_order() -> None: + ordered = order_by_priority( + [SEARCH_TOOL_SEARXNG, SEARCH_TOOL_FIRECRAWL], + {SEARCH_TOOL_GROK, SEARCH_TOOL_FIRECRAWL, SEARCH_TOOL_SEARXNG}, + ) + + assert ordered == [ + SEARCH_TOOL_SEARXNG, + SEARCH_TOOL_FIRECRAWL, + SEARCH_TOOL_GROK, + ] + + def test_search_config_loads_firecrawl_and_priority(tmp_path: Path) -> None: config_path = tmp_path / "config.toml" config_path.write_text(